aboutsummaryrefslogtreecommitdiffstats
path: root/main/luajit/s390x.patch
diff options
context:
space:
mode:
Diffstat (limited to 'main/luajit/s390x.patch')
-rw-r--r--main/luajit/s390x.patch43692
1 files changed, 43692 insertions, 0 deletions
diff --git a/main/luajit/s390x.patch b/main/luajit/s390x.patch
new file mode 100644
index 00000000000..15f51b8ca05
--- /dev/null
+++ b/main/luajit/s390x.patch
@@ -0,0 +1,43692 @@
+From 8d336e1299c4af83df61aed8f59171b15140f0d9 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 10 Nov 2016 10:33:16 +0530
+Subject: [PATCH 001/260] Create lj_target_s390x.h
+
+Adding file lj_target_s390x.h
+Few arm based instructions are changed with equivalent s390x instructions
+---
+ src/lj_target_s390x.h | 287 +++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 287 insertions(+)
+ create mode 100644 src/lj_target_s390x.h
+
+diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
+new file mode 100644
+index 000000000..7da2063d2
+--- /dev/null
++++ b/src/lj_target_s390x.h
+@@ -0,0 +1,287 @@
++/*
++** Definitions for S390 CPUs.
++** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#ifndef _LJ_TARGET_S390_H
++#define _LJ_TARGET_S390_H
++
++/* -- Registers IDs ------------------------------------------------------- */
++
++#define GPRDEF(_) \
++ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
++ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
++#if LJ_SOFTFP
++#define FPRDEF(_)
++#else
++#define FPRDEF(_) \
++ _(F0) _(F2) _(F4) _(F6)
++#endif
++#define VRIDDEF(_)
++
++#define RIDENUM(name) RID_##name,
++
++enum {
++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
++ RID_MAX,
++ RID_TMP = RID_LR,
++
++ /* Calling conventions. */
++ RID_RET = RID_R0,
++ RID_RETLO = RID_R0,
++ RID_RETHI = RID_R1,
++#if LJ_SOFTFP
++ RID_FPRET = RID_R0,
++#else
++ RID_FPRET = RID_D0,
++#endif
++
++ /* These definitions must match with the *.dasc file(s): */
++ RID_BASE = RID_R9, /* Interpreter BASE. */
++ RID_LPC = RID_R6, /* Interpreter PC. */
++ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
++ RID_LREG = RID_R8, /* Interpreter L. */
++
++ /* Register ranges [min, max) and number of registers. */
++ RID_MIN_GPR = RID_R0,
++ RID_MAX_GPR = RID_PC+1,
++ RID_MIN_FPR = RID_MAX_GPR,
++#if LJ_SOFTFP
++ RID_MAX_FPR = RID_MIN_FPR,
++#else
++ RID_MAX_FPR = RID_D15+1,
++#endif
++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
++};
++
++#define RID_NUM_KREF RID_NUM_GPR
++#define RID_MIN_KREF RID_R0
++
++/* -- Register sets ------------------------------------------------------- */
++
++/* Make use of all registers, except sp, lr and pc. */
++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
++#define RSET_GPREVEN \
++ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
++ RID2RSET(RID_R8)|RID2RSET(RID_R10))
++#define RSET_GPRODD \
++ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
++ RID2RSET(RID_R9)|RID2RSET(RID_R11))
++#if LJ_SOFTFP
++#define RSET_FPR 0
++#else
++#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
++#endif
++#define RSET_ALL (RSET_GPR|RSET_FPR)
++#define RSET_INIT RSET_ALL
++
++/* ABI-specific register sets. lr is an implicit scratch register. */
++#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
++#ifdef __APPLE__
++#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
++#else
++#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
++#endif
++#if LJ_SOFTFP
++#define RSET_SCRATCH_FPR 0
++#else
++#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
++#endif
++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
++#define REGARG_FIRSTGPR RID_R0
++#define REGARG_LASTGPR RID_R3
++#define REGARG_NUMGPR 4
++#if LJ_ABI_SOFTFP
++#define REGARG_FIRSTFPR 0
++#define REGARG_LASTFPR 0
++#define REGARG_NUMFPR 0
++#else
++#define REGARG_FIRSTFPR RID_D0
++#define REGARG_LASTFPR RID_D7
++#define REGARG_NUMFPR 8
++#endif
++
++/* -- Spill slots --------------------------------------------------------- */
++
++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
++**
++** SPS_FIXED: Available fixed spill slots in interpreter frame.
++** This definition must match with the *.dasc file(s).
++**
++** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
++*/
++#define SPS_FIXED 2
++#define SPS_FIRST 2
++
++#define SPOFS_TMP 0
++
++#define sps_scale(slot) (4 * (int32_t)(slot))
++#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
++
++/* -- Exit state ---------------------------------------------------------- */
++
++/* This definition must match with the *.dasc file(s). */
++typedef struct {
++#if !LJ_SOFTFP
++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
++#endif
++ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
++ int32_t spill[256]; /* Spill slots. */
++} ExitState;
++
++/* PC after instruction that caused an exit. Used to find the trace number. */
++#define EXITSTATE_PCREG RID_PC
++/* Highest exit + 1 indicates stack check. */
++#define EXITSTATE_CHECKEXIT 1
++
++#define EXITSTUB_SPACING 4
++#define EXITSTUBS_PER_GROUP 32
++
++/* -- Instructions -------------------------------------------------------- */
++
++/* Instruction fields. */
++#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
++#define ARMF_N(r) ((r) << 16)
++#define ARMF_D(r) ((r) << 12)
++#define ARMF_S(r) ((r) << 8)
++#define ARMF_M(r) (r)
++#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
++#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
++
++typedef enum S390Ins {
++
++ // Unsupported in S390
++ #ARMI_LDRSB = 0xe01000d0,
++ #ARMI_S = 0x000100000,
++ #ARMI_LDRD = 0xe00000d0,
++ #ARMI_ADC = 0xe0a00000,
++ #ARMI_SBC = 0xe0c00000,
++ #ARMI_STRB = 0xe4400000,
++ #ARMI_STRH = 0xe00000b0,
++ #ARMI_STRD = 0xe00000f0,
++ #ARMI_BL = 0xeb000000,
++ #ARMI_BLX = 0xfa000000,
++ #ARMI_BLXr = 0xe12fff30,
++ #ARMI_BIC = 0xe1c00000,
++ #ARMI_ORR = 0xe1800000,
++ #ARMI_LDRB = 0xe4500000,
++ #ARMI_MVN = 0xe1e00000,
++ #ARMI_LDRSH = 0xe01000f0,
++ #ARMI_NOP = 0xe1a00000,
++ #ARMI_PUSH = 0xe92d0000,
++ #ARMI_RSB = 0xe0600000,
++ #ARMI_RSC = 0xe0e00000,
++ #ARMI_TEQ = 0xe1300000,
++ #ARMI_CCAL = 0xe0000000,
++ #ARMI_K12 = 0x02000000,
++ #ARMI_KNEG = 0x00200000,
++ #ARMI_LS_W = 0x00200000,
++ #ARMI_LS_U = 0x00800000,
++ #ARMI_LS_P = 0x01000000,
++ #ARMI_LS_R = 0x02000000,
++ #ARMI_LSX_I = 0x00400000,
++
++
++ #ARMI_SUB = 0xe0400000,
++ #ARMI_ADD = 0xe0800000,
++ #ARMI_AND = 0xe0000000,
++ #ARMI_EOR = 0xe0200000,
++ #ARMI_MUL = 0xe0000090,
++ #ARMI_LDR = 0xe4100000,
++ #ARMI_CMP = 0xe1500000,
++ #ARMI_LDRH = 0xe01000b0,
++ #ARMI_B = 0xea000000,
++ #ARMI_MOV = 0xe1a00000,
++ #ARMI_STR = 0xe4000000,
++ #ARMI_TST = 0xe1100000,
++ #ARMI_SMULL = 0xe0c00090,
++ #ARMI_CMN = 0xe1700000,
++ S390I_SR = 0x1B000000,
++ S390I_AR = 0x1A000000,
++ S390I_NR = 0x14000000,
++ S390I_XR = 0x17000000,
++ S390I_MR = 0x1C000000,
++ S390I_LR = 0x18000000,
++ S390I_C = 0x59000000,
++ S390I_LH = 0x48000000,
++ S390I_BASR = 0x0D000000,
++ S390I_MVCL = 0x0e000000,
++ S390I_ST = 0x50000000,
++ S390I_TM = 0x91000000,
++ S390I_MP = 0xbd000090,
++ S390I_CLR = 0x15000000,
++
++ /* ARMv6 */
++ #ARMI_REV = 0xe6bf0f30,
++ #ARMI_SXTB = 0xe6af0070,
++ #ARMI_SXTH = 0xe6bf0070,
++ #ARMI_UXTB = 0xe6ef0070,
++ #ARMI_UXTH = 0xe6ff0070,
++
++ /* ARMv6T2 */
++ #ARMI_MOVW = 0xe3000000,
++ #ARMI_MOVT = 0xe3400000,
++
++ /* VFP */
++ ARMI_VMOV_D = 0xeeb00b40,
++ ARMI_VMOV_S = 0xeeb00a40,
++ ARMI_VMOVI_D = 0xeeb00b00,
++
++ ARMI_VMOV_R_S = 0xee100a10,
++ ARMI_VMOV_S_R = 0xee000a10,
++ ARMI_VMOV_RR_D = 0xec500b10,
++ ARMI_VMOV_D_RR = 0xec400b10,
++
++ ARMI_VADD_D = 0xee300b00,
++ ARMI_VSUB_D = 0xee300b40,
++ ARMI_VMUL_D = 0xee200b00,
++ ARMI_VMLA_D = 0xee000b00,
++ ARMI_VMLS_D = 0xee000b40,
++ ARMI_VNMLS_D = 0xee100b00,
++ ARMI_VDIV_D = 0xee800b00,
++
++ ARMI_VABS_D = 0xeeb00bc0,
++ ARMI_VNEG_D = 0xeeb10b40,
++ ARMI_VSQRT_D = 0xeeb10bc0,
++
++ ARMI_VCMP_D = 0xeeb40b40,
++ ARMI_VCMPZ_D = 0xeeb50b40,
++
++ ARMI_VMRS = 0xeef1fa10,
++
++ ARMI_VCVT_S32_F32 = 0xeebd0ac0,
++ ARMI_VCVT_S32_F64 = 0xeebd0bc0,
++ ARMI_VCVT_U32_F32 = 0xeebc0ac0,
++ ARMI_VCVT_U32_F64 = 0xeebc0bc0,
++ ARMI_VCVTR_S32_F32 = 0xeebd0a40,
++ ARMI_VCVTR_S32_F64 = 0xeebd0b40,
++ ARMI_VCVTR_U32_F32 = 0xeebc0a40,
++ ARMI_VCVTR_U32_F64 = 0xeebc0b40,
++ ARMI_VCVT_F32_S32 = 0xeeb80ac0,
++ ARMI_VCVT_F64_S32 = 0xeeb80bc0,
++ ARMI_VCVT_F32_U32 = 0xeeb80a40,
++ ARMI_VCVT_F64_U32 = 0xeeb80b40,
++ ARMI_VCVT_F32_F64 = 0xeeb70bc0,
++ ARMI_VCVT_F64_F32 = 0xeeb70ac0,
++
++ ARMI_VLDR_S = 0xed100a00,
++ ARMI_VLDR_D = 0xed100b00,
++ ARMI_VSTR_S = 0xed000a00,
++ ARMI_VSTR_D = 0xed000b00,
++} S390Ins;
++
++typedef enum S390Shift {
++ S390SH_SLL, S390SH_SRL, S390SH_SRA
++ # Adjustment needed for ROR
++} S390Shift;
++
++/* ARM condition codes. */
++typedef enum ARMCC {
++ CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
++ CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
++ CC_HS = CC_CS, CC_LO = CC_CC
++} ARMCC;
++
++#endif
+
+From 096a33d925ea91bc442cea4f1a8d7c54e81b1f45 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 10 Nov 2016 10:35:35 +0530
+Subject: [PATCH 002/260] Update Makefile
+
+Added condition for s390 in Makefile
+---
+ src/Makefile | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/src/Makefile b/src/Makefile
+index 4e479ae5a..9f7d28ce3 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -238,6 +238,9 @@ else
+ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= arm
+ else
++ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH)))
++ TARGET_LJARCH= s390
++else
+ ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= arm64
+ else
+
+From be89c18b9827a2cb4ea7807a69e253db075ec7e5 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 10 Nov 2016 10:42:51 +0530
+Subject: [PATCH 003/260] Update lj_arch.h
+
+Added supporting lines for s390
+Lines added using arm lines as reference
+---
+ src/lj_arch.h | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index cc5a0a66d..5155bf691 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -29,6 +29,7 @@
+ #define LUAJIT_ARCH_mips32 6
+ #define LUAJIT_ARCH_MIPS64 7
+ #define LUAJIT_ARCH_mips64 7
++#define LUAJIT_ARCH_S390 8
+
+ /* Target OS. */
+ #define LUAJIT_OS_OTHER 0
+@@ -49,6 +50,8 @@
+ #define LUAJIT_TARGET LUAJIT_ARCH_ARM
+ #elif defined(__aarch64__)
+ #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
++#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) || defined(S390)
++#define LUAJIT_TARGET LUAJIT_ARCH_S390
+ #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
+ #define LUAJIT_TARGET LUAJIT_ARCH_PPC
+ #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
+@@ -230,6 +233,23 @@
+
+ #define LJ_ARCH_VERSION 80
+
++#elif LUAJIT_TARGET == LUAJIT_ARCH_S390
++
++ #define LJ_ARCH_NAME "s390"
++ #define LJ_ARCH_BITS 64
++ #define LJ_ARCH_ENDIAN LUAJIT_BE
++ #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
++ #define LJ_ARCH_HASFPU 1
++ #endif
++ #define LJ_ABI_EABI 1
++ #define LJ_TARGET_S390 1
++ #define LJ_TARGET_EHRETREG 0
++ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
++ #define LJ_TARGET_MASKSHIFT 0
++ #define LJ_TARGET_MASKROT 1
++ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
++ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
++
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
+
+ #ifndef LJ_ARCH_ENDIAN
+@@ -379,6 +399,10 @@
+ #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+ #error "Need at least Clang 3.5 or newer"
+ #endif
++#elif LJ_TARGET_S390
++#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
++#error "Need at least GCC 4.2 or newer"
++#endif
+ #else
+ #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+ #error "Need at least GCC 4.8 or newer"
+
+From f1f03ec44bdcf0228cac0f090c83883a920bfa0c Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Thu, 10 Nov 2016 19:00:41 +0530
+Subject: [PATCH 004/260] Copy of dasm_arm64.lua file, with few changes
+
+Have changed few sections of file, other part is common across architectures
+---
+ dynasm/dasm_s390x.lua | 1177 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 1177 insertions(+)
+ create mode 100644 dynasm/dasm_s390x.lua
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+new file mode 100644
+index 000000000..a0a50e1e1
+--- /dev/null
++++ b/dynasm/dasm_s390x.lua
+@@ -0,0 +1,1177 @@
++------------------------------------------------------------------------------
++-- DynASM s390x module.
++--
++-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
++-- See dynasm.lua for full copyright notice.
++------------------------------------------------------------------------------
++
++-- Module information:
++local _info = {
++ arch = "s390x",
++ description = "DynASM s390x module",
++ version = "1.4.0",
++ vernum = 10400,
++ release = "2015-10-18",
++ author = "Mike Pall",
++ license = "MIT",
++}
++
++-- Exported glue functions for the arch-specific module.
++local _M = { _info = _info }
++
++-- Cache library functions.
++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
++local assert, setmetatable, rawget = assert, setmetatable, rawget
++local _s = string
++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
++local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
++local concat, sort, insert = table.concat, table.sort, table.insert
++local bit = bit or require("bit")
++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
++local ror, tohex = bit.ror, bit.tohex
++
++-- Inherited tables and callbacks.
++local g_opt, g_arch
++local wline, werror, wfatal, wwarn
++
++-- Action name list.
++-- CHECK: Keep this in sync with the C code!
++local action_names = {
++ "STOP", "SECTION", "ESC", "REL_EXT",
++ "ALIGN", "REL_LG", "LABEL_LG",
++ "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
++}
++
++-- Maximum number of section buffer positions for dasm_put().
++-- CHECK: Keep this in sync with the C code!
++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
++
++-- Action name -> action number.
++local map_action = {}
++for n,name in ipairs(action_names) do
++ map_action[name] = n-1
++end
++
++-- Action list buffer.
++local actlist = {}
++
++-- Argument list for next dasm_put(). Start with offset 0 into action list.
++local actargs = { 0 }
++
++-- Current number of section buffer positions for dasm_put().
++local secpos = 1
++
++------------------------------------------------------------------------------
++
++-- Dump action names and numbers.
++local function dumpactions(out)
++ out:write("DynASM encoding engine action codes:\n")
++ for n,name in ipairs(action_names) do
++ local num = map_action[name]
++ out:write(format(" %-10s %02X %d\n", name, num, num))
++ end
++ out:write("\n")
++end
++
++-- Write action list buffer as a huge static C array.
++local function writeactions(out, name)
++ local nn = #actlist
++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
++ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
++ for i = 1,nn-1 do
++ assert(out:write("0x", tohex(actlist[i]), ",\n"))
++ end
++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
++end
++
++------------------------------------------------------------------------------
++
++-- Add word to action list.
++local function wputxw(n)
++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ actlist[#actlist+1] = n
++end
++
++-- Add action to list with optional arg. Advance buffer pos, too.
++local function waction(action, val, a, num)
++ local w = assert(map_action[action], "bad action name `"..action.."'")
++ wputxw(w * 0x10000 + (val or 0))
++ if a then actargs[#actargs+1] = a end
++ if a or num then secpos = secpos + (num or 1) end
++end
++
++-- Flush action list (intervening C code or buffer pos overflow).
++local function wflush(term)
++ if #actlist == actargs[1] then return end -- Nothing to flush.
++ if not term then waction("STOP") end -- Terminate action list.
++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
++ secpos = 1 -- The actionlist offset occupies a buffer position, too.
++end
++
++-- Put escaped word.
++local function wputw(n)
++ if n <= 0x000fffff then waction("ESC") end
++ wputxw(n)
++end
++
++-- Reserve position for word.
++local function wpos()
++ local pos = #actlist+1
++ actlist[pos] = ""
++ return pos
++end
++
++-- Store word to reserved position.
++local function wputpos(pos, n)
++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ if n <= 0x000fffff then
++ insert(actlist, pos+1, n)
++ n = map_action.ESC * 0x10000
++ end
++ actlist[pos] = n
++end
++
++------------------------------------------------------------------------------
++
++-- Global label name -> global label number. With auto assignment on 1st use.
++local next_global = 20
++local map_global = setmetatable({}, { __index = function(t, name)
++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
++ local n = next_global
++ if n > 2047 then werror("too many global labels") end
++ next_global = n + 1
++ t[name] = n
++ return n
++end})
++
++-- Dump global labels.
++local function dumpglobals(out, lvl)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("Global labels:\n")
++ for i=20,next_global-1 do
++ out:write(format(" %s\n", t[i]))
++ end
++ out:write("\n")
++end
++
++-- Write global label enum.
++local function writeglobals(out, prefix)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("enum {\n")
++ for i=20,next_global-1 do
++ out:write(" ", prefix, t[i], ",\n")
++ end
++ out:write(" ", prefix, "_MAX\n};\n")
++end
++
++-- Write global label names.
++local function writeglobalnames(out, name)
++ local t = {}
++ for name, n in pairs(map_global) do t[n] = name end
++ out:write("static const char *const ", name, "[] = {\n")
++ for i=20,next_global-1 do
++ out:write(" \"", t[i], "\",\n")
++ end
++ out:write(" (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Extern label name -> extern label number. With auto assignment on 1st use.
++local next_extern = 0
++local map_extern_ = {}
++local map_extern = setmetatable({}, { __index = function(t, name)
++ -- No restrictions on the name for now.
++ local n = next_extern
++ if n > 2047 then werror("too many extern labels") end
++ next_extern = n + 1
++ t[name] = n
++ map_extern_[n] = name
++ return n
++end})
++
++-- Dump extern labels.
++local function dumpexterns(out, lvl)
++ out:write("Extern labels:\n")
++ for i=0,next_extern-1 do
++ out:write(format(" %s\n", map_extern_[i]))
++ end
++ out:write("\n")
++end
++
++-- Write extern label names.
++local function writeexternnames(out, name)
++ out:write("static const char *const ", name, "[] = {\n")
++ for i=0,next_extern-1 do
++ out:write(" \"", map_extern_[i], "\",\n")
++ end
++ out:write(" (const char *)0\n};\n")
++end
++
++------------------------------------------------------------------------------
++
++-- Arch-specific maps.
++-- TODO: add s390x related register names
++-- Ext. register name -> int. name.
++--local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
++local map_archdef = {}
++
++-- Int. register name -> ext. name.
++-- local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
++local map_reg_rev = {}
++
++local map_type = {} -- Type name -> { ctype, reg }
++local ctypenum = 0 -- Type number (for Dt... macros).
++
++-- Reverse defines for registers.
++function _M.revdef(s)
++ return map_reg_rev[s] or s
++end
++-- not sure of these
++local map_shift = { lsl = 0, lsr = 1, asr = 2, }
++
++local map_extend = {
++ uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
++ sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
++}
++
++local map_cond = {
++ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
++ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
++ hs = 2, lo = 3,
++}
++
++------------------------------------------------------------------------------
++
++local parse_reg_type
++
++
++local function parse_gpr(expr)
++ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
++ local tp = map_type[tname or expr]
++ if tp then
++ local reg = ovreg or tp.reg
++ if not reg then
++ werror("type `"..(tname or expr).."' needs a register override")
++ end
++ expr = reg
++ end
++ local r = match(expr, "^r([1-3]?[0-9])$")
++ if r then
++ r = tonumber(r)
++ if r <= 31 then return r, tp end
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++local function parse_fpr(expr)
++ local r = match(expr, "^f([1-3]?[0-9])$")
++ if r then
++ r = tonumber(r)
++ if r <= 31 then return r end
++ end
++ werror("bad register name `"..expr.."'")
++end
++
++
++
++
++
++local function parse_reg_base(expr)
++ if expr == "sp" then return 0x3e0 end
++ local base, tp = parse_reg(expr)
++ if parse_reg_type ~= "x" then werror("bad register type") end
++ parse_reg_type = false
++ return shl(base, 5), tp
++end
++
++local parse_ctx = {}
++
++local loadenv = setfenv and function(s)
++ local code = loadstring(s, "")
++ if code then setfenv(code, parse_ctx) end
++ return code
++end or function(s)
++ return load(s, "", nil, parse_ctx)
++end
++
++-- Try to parse simple arithmetic, too, since some basic ops are aliases.
++local function parse_number(n)
++ local x = tonumber(n)
++ if x then return x end
++ local code = loadenv("return "..n)
++ if code then
++ local ok, y = pcall(code)
++ if ok then return y end
++ end
++ return nil
++end
++
++local function parse_imm(imm, bits, shift, scale, signed)
++ imm = match(imm, "^#(.*)$")
++ if not imm then werror("expected immediate operand") end
++ local n = parse_number(imm)
++ if n then
++ local m = sar(n, scale)
++ if shl(m, scale) == n then
++ if signed then
++ local s = sar(m, bits-1)
++ if s == 0 then return shl(m, shift)
++ elseif s == -1 then return shl(m + shl(1, bits), shift) end
++ else
++ if sar(m, bits) == 0 then return shl(m, shift) end
++ end
++ end
++ werror("out of range immediate `"..imm.."'")
++ else
++ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
++ return 0
++ end
++end
++
++local function parse_imm12(imm)
++ imm = match(imm, "^#(.*)$")
++ if not imm then werror("expected immediate operand") end
++ local n = parse_number(imm)
++ if n then
++ if shr(n, 12) == 0 then
++ return shl(n, 10)
++ elseif band(n, 0xff000fff) == 0 then
++ return shr(n, 2) + 0x00400000
++ end
++ werror("out of range immediate `"..imm.."'")
++ else
++ waction("IMM12", 0, imm)
++ return 0
++ end
++end
++
++local function parse_imm13(imm)
++ imm = match(imm, "^#(.*)$")
++ if not imm then werror("expected immediate operand") end
++ local n = parse_number(imm)
++ local r64 = parse_reg_type == "x"
++ if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
++ local inv = false
++ if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
++ local t = {}
++ for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
++ local b = table.concat(t)
++ b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
++ local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
++ if p0 then
++ local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
++ if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
++ local s = band(-2*w, 0x3f) - 1
++ if w == 64 then s = s + 0x1000 end
++ if inv then
++ return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
++ else
++ return shl(w-#p0, 16) + shl(s+#p1, 10)
++ end
++ end
++ end
++ werror("out of range immediate `"..imm.."'")
++ elseif r64 then
++ waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
++ actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
++ return 0
++ else
++ waction("IMM13W", 0, imm)
++ return 0
++ end
++end
++
++local function parse_imm6(imm)
++ imm = match(imm, "^#(.*)$")
++ if not imm then werror("expected immediate operand") end
++ local n = parse_number(imm)
++ if n then
++ if n >= 0 and n <= 63 then
++ return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
++ end
++ werror("out of range immediate `"..imm.."'")
++ else
++ waction("IMM6", 0, imm)
++ return 0
++ end
++end
++
++local function parse_imm_load(imm, scale)
++ local n = parse_number(imm)
++ if n then
++ local m = sar(n, scale)
++ if shl(m, scale) == n and m >= 0 and m < 0x1000 then
++ return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
++ elseif n >= -256 and n < 256 then
++ return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
++ end
++ werror("out of range immediate `"..imm.."'")
++ else
++ waction("IMML", 0, imm)
++ return 0
++ end
++end
++
++local function parse_fpimm(imm)
++ imm = match(imm, "^#(.*)$")
++ if not imm then werror("expected immediate operand") end
++ local n = parse_number(imm)
++ if n then
++ local m, e = math.frexp(n)
++ local s, e2 = 0, band(e-2, 7)
++ if m < 0 then m = -m; s = 0x00100000 end
++ m = m*32-16
++ if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
++ return s + shl(e2, 17) + shl(m, 13)
++ end
++ werror("out of range immediate `"..imm.."'")
++ else
++ werror("NYI fpimm action")
++ end
++end
++
++local function parse_shift(expr)
++ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
++ s = map_shift[s]
++ if not s then werror("expected shift operand") end
++ return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
++end
++
++local function parse_lslx16(expr)
++ local n = match(expr, "^lsl%s*#(%d+)$")
++ n = tonumber(n)
++ if not n then werror("expected shift operand") end
++ if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
++ werror("bad shift amount")
++ end
++ return shl(n, 17)
++end
++
++local function parse_extend(expr)
++ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
++ if s == "lsl" then
++ s = parse_reg_type == "x" and 3 or 2
++ else
++ s = map_extend[s]
++ end
++ if not s then werror("expected extend operand") end
++ return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
++end
++
++local function parse_cond(expr, inv)
++ local c = map_cond[expr]
++ if not c then werror("expected condition operand") end
++ return shl(bit.bxor(c, inv), 12)
++end
++
++local function parse_load(params, nparams, n, op)
++ if params[n+2] then werror("too many operands") end
++ local pn, p2 = params[n], params[n+1]
++ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
++ if not p1 then
++ if not p2 then
++ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
++ if reg and tailr ~= "" then
++ local base, tp = parse_reg_base(reg)
++ if tp then
++ waction("IMML", 0, format(tp.ctypefmt, tailr))
++ return op + base
++ end
++ end
++ end
++ werror("expected address operand")
++ end
++ local scale = shr(op, 30)
++ if p2 then
++ if wb == "!" then werror("bad use of '!'") end
++ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
++ elseif wb == "!" then
++ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
++ if not p1a then werror("bad use of '!'") end
++ op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
++ else
++ local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
++ op = op + parse_reg_base(p1a)
++ if p2a ~= "" then
++ local imm = match(p2a, "^,%s*#(.*)$")
++ if imm then
++ op = op + parse_imm_load(imm, scale)
++ else
++ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
++ op = op + shl(parse_reg(p2b), 16) + 0x00200800
++ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
++ werror("bad index register type")
++ end
++ if p3b == "" then
++ if parse_reg_type ~= "x" then werror("bad index register type") end
++ op = op + 0x6000
++ else
++ if p3s == "" or p3s == "#0" then
++ elseif p3s == "#"..scale then
++ op = op + 0x1000
++ else
++ werror("bad scale")
++ end
++ if parse_reg_type == "x" then
++ if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
++ elseif p3b == "sxtx" then op = op + 0xe000
++ else
++ werror("bad extend/shift specifier")
++ end
++ else
++ if p3b == "uxtw" then op = op + 0x4000
++ elseif p3b == "sxtw" then op = op + 0xc000
++ else
++ werror("bad extend/shift specifier")
++ end
++ end
++ end
++ end
++ else
++ if wb == "!" then werror("bad use of '!'") end
++ op = op + 0x01000000
++ end
++ end
++ return op
++end
++
++local function parse_load_pair(params, nparams, n, op)
++ if params[n+2] then werror("too many operands") end
++ local pn, p2 = params[n], params[n+1]
++ local scale = shr(op, 30) == 0 and 2 or 3
++ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
++ if not p1 then
++ if not p2 then
++ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
++ if reg and tailr ~= "" then
++ local base, tp = parse_reg_base(reg)
++ if tp then
++ waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
++ return op + base + 0x01000000
++ end
++ end
++ end
++ werror("expected address operand")
++ end
++ if p2 then
++ if wb == "!" then werror("bad use of '!'") end
++ op = op + 0x00800000
++ else
++ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
++ if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
++ op = op + (wb == "!" and 0x01800000 or 0x01000000)
++ end
++ return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
++end
++
++local function parse_label(label, def)
++ local prefix = sub(label, 1, 2)
++ -- =>label (pc label reference)
++ if prefix == "=>" then
++ return "PC", 0, sub(label, 3)
++ end
++ -- ->name (global label reference)
++ if prefix == "->" then
++ return "LG", map_global[sub(label, 3)]
++ end
++ if def then
++ -- [1-9] (local label definition)
++ if match(label, "^[1-9]$") then
++ return "LG", 10+tonumber(label)
++ end
++ else
++ -- [<>][1-9] (local label reference)
++ local dir, lnum = match(label, "^([<>])([1-9])$")
++ if dir then -- Fwd: 1-9, Bkwd: 11-19.
++ return "LG", lnum + (dir == ">" and 0 or 10)
++ end
++ -- extern label (extern label reference)
++ local extname = match(label, "^extern%s+(%S+)$")
++ if extname then
++ return "EXT", map_extern[extname]
++ end
++ end
++ werror("bad label `"..label.."'")
++end
++
++local function branch_type(op)
++ if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
++ elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
++ band(op, 0x3b000000) == 0x18000000 then
++ return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
++ elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
++ elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
++ elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
++ else
++ assert(false, "unknown branch type")
++ end
++end
++
++------------------------------------------------------------------------------
++
++local map_op, op_template
++
++local function op_alias(opname, f)
++ return function(params, nparams)
++ if not params then return "-> "..opname:sub(1, -3) end
++ f(params, nparams)
++ op_template(params, map_op[opname], nparams)
++ end
++end
++
++local function alias_bfx(p)
++ p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
++end
++
++local function alias_bfiz(p)
++ parse_reg(p[1])
++ if parse_reg_type == "w" then
++ p[3] = "#-("..p[3]:sub(2)..")%32"
++ p[4] = "#("..p[4]:sub(2)..")-1"
++ else
++ p[3] = "#-("..p[3]:sub(2)..")%64"
++ p[4] = "#("..p[4]:sub(2)..")-1"
++ end
++end
++
++local alias_lslimm = op_alias("ubfm_4", function(p)
++ parse_reg(p[1])
++ local sh = p[3]:sub(2)
++ if parse_reg_type == "w" then
++ p[3] = "#-("..sh..")%32"
++ p[4] = "#31-("..sh..")"
++ else
++ p[3] = "#-("..sh..")%64"
++ p[4] = "#63-("..sh..")"
++ end
++end)
++
++-- Template strings for ARM instructions.
++map_op = {
++ -- Basic data processing instructions.
++ add_2 = "00000000005a0000RX-a|00000000001aRR|00000000b9f80000RRF-a|00000000e35a0000RXY-a|00000000e3080000RXY-a",
++
++-- and has several possible ways, need to find one, currently added two type of
++ and_2 = "0000000000540000RX-a|00000000140000RR|00000000b9f4RRF-a|00000000e3540000RXY-a|00000000b9800000RRE| 00000000b9e40000RRF-a",
++ and_c = "0000000000d40000SS-a",
++ and_i = "0000000000940000SI|00000000eb540000SIY",
++
++and_2 = "0000000000540000RX-a|0000000000140000RR|00000000b9f40000RRF-a|00000000e3540000RXY-a",
++ and_3 = "00000000e3800000RXY-a|00000000b9800000RRE|00000000b9e40000RRF-a",
++ and_c = "0000000000d40000SS-a",
++ and_i = "0000000000940000SI",
++ and_i4 = "00000000eb540000SIY"
++ and_i3 = "000000000a540000RI-a|000000000a550000RI-a|000000000c0a0000RIL-a|000000000a560000RI-a|000000000a570000RI-a|000000000c0bRIL-a"
++ --branch related instrcutions
++ bal = "0000000000450000RX-a",
++ balr = "0000000000050000RR",
++ bas = "00000000004d0000RX-a",
++ basr = "00000000000d0000RR",
++ bassm = "00000000000c0000RR",
++ bsm = "00000000000b0000RR",
++ bc = "0000000000470000Rx-b",
++ bcr = "00000000000070000RR",
++ bct = "0000000000460000RX-a",
++ bctr = "0000000000060000RR",
++ bctg = "00000000e3460000RXY-a",
++ bctgr = "00000000b9460000RRE",
++ bxh = "0000000000860000RS-a",
++ bxhg = "00000000eb440000RSY-a",
++ bxle = "0000000000870000RS-a",
++ bxleg = "00000000eb450000RSY-a",
++ bras = "000000000a750000RI-b",
++ brasl = "000000000c050000RIL-b",
++ brc = "000000000a740000RI-c",
++ brcl = "000000000c040000RIL-c",
++ brct = "000000000a760000RI-b",
++ brctg = "000000000a770000RI-b",
++ brctg = "00000000occ60000RIL-b",
++ brxh = "0000000000840000RSI",
++ brxhg = "00000000ec440000RIE-e",
++ brxle = "0000000000850000RSI",
++ brxlg = "00000000ec450000RIE-e",
++
++ ----subtraction (basic operation)
++ sub = "00000000005b0000RX-a"
++ sr = "00000000001b0000RR"
++ srk = "00000000b9f90000RRF-a"
++ sy = "00000000e35b0000RXY-a"
++ sg = "00000000e3090000RXY-a"
++ sgr = "00000000b9090000RRE"
++ sgrk = "00000000b9e90000RRF-a"
++ sgf = "00000000e3190000RXY-a"
++ sgfr = "00000000b9190000RRE"
++ sh = "00000000004b0000RX-a"
++ shy = "00000000e37b0000RXY-a"
++ shhhr = "00000000b9c90000RRF-a"
++ shhlr = "00000000b9d90000RX-a"
++ sl = "00000000005f0000RX-a"
++ slr = "00000000001f0000RR"
++ slrk = "00000000b9f80000RR"
++ sly = "00000000e35f0000RXY-a",
++ slg = "00000000e30b0000RXY-a",
++ slgr = "00000000b9080000RRE",
++ slgrk = "00000000b9eb0000RRF-a",
++ slgf = "00000000e3180000RXY-a",
++ slgfr = "00000000b91b0000RRE",
++ slhhhr = "00000000b9cb0000RRF-a",
++ slhhlr = "00000000b9db0000RRF-a",
++ slfi = "000000000c250000RIL-a",
++ slgfi = "000000000c240000RIL-a",
++ slb = "00000000e3990000RXY-a",
++ slbr = "00000000b9990000RRE" ,
++ slbg = "00000000e3890000RXY-a",
++ slbgr = "00000000b9890000RXY-a",
++
++ cmp_2 = "0000000000590000RX-a|0000000000190000RR|00000000e3590000RXY-a",
++ cmp_3 = "00000000e3200000RXY-a|00000000b9200000RRE|00000000e3300000RXY-a| 00000000b9300000RRE",
++
++ div_2 = "00000000005d0000RX-a|00000000001d0000RR|00000000e3970000RXY-a|00000000b9970000RRE",
++ div_3 ="00000000e3870000RXY-a|00000000b9870000RRE",
++ div_sing ="00000000e30d0000RXY-a|00000000b90d0000RRE|00000000e31d0000RXY-a|00000000b91d0000RRE",
++
++ eor_2 = "0000000000570000RX-a|0000000000170000RR|00000000b9f70000RRF-a|00000000e3570000RXY-a",
++ eor_3 = "00000000e3820000RXY-a|00000000b9820000RRE|00000000b9e70000RRF-a|
++ eor_c = "0000000000d70000SS-a",
++ eor_i = "0000000000970000SI| 00000000eb570000|000000000c060000a|000000000c070000RIL-a",
++
++ -- load instruction to be added and the following instructions need to be changed (are not s390x related)
++
++ neg_2 = "4b0003e0DMg",
++ neg_3 = "4b0003e0DMSg",
++ negs_2 = "6b0003e0DMg",
++ negs_3 = "6b0003e0DMSg",
++ adc_3 = "1a000000DNMg",
++ adcs_3 = "3a000000DNMg",
++ sbc_3 = "5a000000DNMg",
++ sbcs_3 = "7a000000DNMg",
++ ngc_2 = "5a0003e0DMg",
++ ngcs_2 = "7a0003e0DMg",
++ and_3 = "0a000000DNMg|12000000pDNig",
++ and_4 = "0a000000DNMSg",
++ orr_3 = "2a000000DNMg|32000000pDNig",
++ orr_4 = "2a000000DNMSg",
++ eor_3 = "4a000000DNMg|52000000pDNig",
++ eor_4 = "4a000000DNMSg",
++ ands_3 = "6a000000DNMg|72000000DNig",
++ ands_4 = "6a000000DNMSg",
++ tst_2 = "6a00001fNMg|7200001fNig",
++ tst_3 = "6a00001fNMSg",
++ bic_3 = "0a200000DNMg",
++ bic_4 = "0a200000DNMSg",
++ orn_3 = "2a200000DNMg",
++ orn_4 = "2a200000DNMSg",
++ eon_3 = "4a200000DNMg",
++ eon_4 = "4a200000DNMSg",
++ bics_3 = "6a200000DNMg",
++ bics_4 = "6a200000DNMSg",
++ movn_2 = "12800000DWg",
++ movn_3 = "12800000DWRg",
++ movz_2 = "52800000DWg",
++ movz_3 = "52800000DWRg",
++ movk_2 = "72800000DWg",
++ movk_3 = "72800000DWRg",
++ -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
++ mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
++ mov_3 = "2a0003e0DMSg",
++ mvn_2 = "2a2003e0DMg",
++ mvn_3 = "2a2003e0DMSg",
++ adr_2 = "10000000DBx",
++ adrp_2 = "90000000DBx",
++ csel_4 = "1a800000DNMCg",
++ csinc_4 = "1a800400DNMCg",
++ csinv_4 = "5a800000DNMCg",
++ csneg_4 = "5a800400DNMCg",
++ cset_2 = "1a9f07e0Dcg",
++ csetm_2 = "5a9f03e0Dcg",
++ cinc_3 = "1a800400DNmcg",
++ cinv_3 = "5a800000DNmcg",
++ cneg_3 = "5a800400DNmcg",
++ ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
++ ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
++ madd_4 = "1b000000DNMAg",
++ msub_4 = "1b008000DNMAg",
++ mul_3 = "1b007c00DNMg",
++ mneg_3 = "1b00fc00DNMg",
++ smaddl_4 = "9b200000DxNMwAx",
++ smsubl_4 = "9b208000DxNMwAx",
++ smull_3 = "9b207c00DxNMw",
++ smnegl_3 = "9b20fc00DxNMw",
++ smulh_3 = "9b407c00DNMx",
++ umaddl_4 = "9ba00000DxNMwAx",
++ umsubl_4 = "9ba08000DxNMwAx",
++ umull_3 = "9ba07c00DxNMw",
++ umnegl_3 = "9ba0fc00DxNMw",
++ umulh_3 = "9bc07c00DNMx",
++ udiv_3 = "1ac00800DNMg",
++ sdiv_3 = "1ac00c00DNMg",
++ -- Bit operations.
++ sbfm_4 = "13000000DN12w|93400000DN12x",
++ bfm_4 = "33000000DN12w|b3400000DN12x",
++ ubfm_4 = "53000000DN12w|d3400000DN12x",
++ extr_4 = "13800000DNM2w|93c00000DNM2x",
++ sxtb_2 = "13001c00DNw|93401c00DNx",
++ sxth_2 = "13003c00DNw|93403c00DNx",
++ sxtw_2 = "93407c00DxNw",
++ uxtb_2 = "53001c00DNw",
++ uxth_2 = "53003c00DNw",
++ sbfx_4 = op_alias("sbfm_4", alias_bfx),
++ bfxil_4 = op_alias("bfm_4", alias_bfx),
++ ubfx_4 = op_alias("ubfm_4", alias_bfx),
++ sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
++ bfi_4 = op_alias("bfm_4", alias_bfiz),
++ ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
++ lsl_3 = function(params, nparams)
++ if params and params[3]:byte() == 35 then
++ return alias_lslimm(params, nparams)
++ else
++ return op_template(params, "1ac02000DNMg", nparams)
++ end
++ end,
++ lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
++ asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
++ ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
++ clz_2 = "5ac01000DNg",
++ cls_2 = "5ac01400DNg",
++ rbit_2 = "5ac00000DNg",
++ rev_2 = "5ac00800DNw|dac00c00DNx",
++ rev16_2 = "5ac00400DNg",
++ rev32_2 = "dac00800DNx",
++ -- Loads and stores.
++ ["strb_*"] = "38000000DwL",
++ ["ldrb_*"] = "38400000DwL",
++ ["ldrsb_*"] = "38c00000DwL|38800000DxL",
++ ["strh_*"] = "78000000DwL",
++ ["ldrh_*"] = "78400000DwL",
++ ["ldrsh_*"] = "78c00000DwL|78800000DxL",
++ ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
++ ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
++ ["ldrsw_*"] = "98000000DxB|b8800000DxL",
++ -- NOTE: ldur etc. are handled by ldr et al.
++ ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
++ ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
++ ["ldpsw_*"] = "68400000DAxP",
++ -- Branches.
++ b_1 = "14000000B",
++ bl_1 = "94000000B",
++ blr_1 = "d63f0000Nx",
++ br_1 = "d61f0000Nx",
++ ret_0 = "d65f03c0",
++ ret_1 = "d65f0000Nx",
++ -- b.cond is added below.
++ cbz_2 = "34000000DBg",
++ cbnz_2 = "35000000DBg",
++ tbz_3 = "36000000DTBw|36000000DTBx",
++ tbnz_3 = "37000000DTBw|37000000DTBx",
++ -- Miscellaneous instructions.
++ -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
++ -- TODO: sys, sysl, ic, dc, at, tlbi
++ -- TODO: hint, yield, wfe, wfi, sev, sevl
++ -- TODO: clrex, dsb, dmb, isb
++ nop_0 = "d503201f",
++ brk_0 = "d4200000",
++ brk_1 = "d4200000W",
++ -- Floating point instructions.
++ fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
++ fabs_2 = "1e20c000DNf",
++ fneg_2 = "1e214000DNf",
++ fsqrt_2 = "1e21c000DNf",
++ fcvt_2 = "1e22c000DdNs|1e624000DsNd",
++ -- TODO: half-precision and fixed-point conversions.
++ fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
++ fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
++ fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
++ fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
++ fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
++ fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
++ fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
++ fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
++ fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
++ fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
++ scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
++ ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
++ frintn_2 = "1e244000DNf",
++ frintp_2 = "1e24c000DNf",
++ frintm_2 = "1e254000DNf",
++ frintz_2 = "1e25c000DNf",
++ frinta_2 = "1e264000DNf",
++ frintx_2 = "1e274000DNf",
++ frinti_2 = "1e27c000DNf",
++ fadd_3 = "1e202800DNMf",
++ fsub_3 = "1e203800DNMf",
++ fmul_3 = "1e200800DNMf",
++ fnmul_3 = "1e208800DNMf",
++ fdiv_3 = "1e201800DNMf",
++ fmadd_4 = "1f000000DNMAf",
++ fmsub_4 = "1f008000DNMAf",
++ fnmadd_4 = "1f200000DNMAf",
++ fnmsub_4 = "1f208000DNMAf",
++ fmax_3 = "1e204800DNMf",
++ fmaxnm_3 = "1e206800DNMf",
++ fmin_3 = "1e205800DNMf",
++ fminnm_3 = "1e207800DNMf",
++ fcmp_2 = "1e202000NMf|1e202008NZf",
++ fcmpe_2 = "1e202010NMf|1e202018NZf",
++ fccmp_4 = "1e200400NMVCf",
++ fccmpe_4 = "1e200410NMVCf",
++ fcsel_4 = "1e200c00DNMCf",
++ -- TODO: crc32*, aes*, sha*, pmull
++ -- TODO: SIMD instructions.
++}
++for cond,c in pairs(map_cond) do
++ map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
++end
++------------------------------------------------------------------------------
++-- Handle opcodes defined with template strings.
++local function parse_template(params, template, nparams, pos)
++ local op = tonumber(sub(template, 1, 12), 16) -- 13-16 ignored since those are trailing zeros added after the instruction
++ -- 00000000005a0000 converts to 90
++ local n,rs = 1,26
++
++ parse_reg_type = false
++ -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
++ for p in gmatch(sub(template, 17), ".") do
++ local q = params[n]
++ if p == "R" then
++ op = op + parse_reg(q); n = n + 1
++ elseif p == "N" then
++ op = op + shl(parse_reg(q), 5); n = n + 1
++ elseif p == "M" then
++ op = op + shl(parse_reg(q), 16); n = n + 1
++ elseif p == "A" then
++ op = op + shl(parse_reg(q), 10); n = n + 1
++ elseif p == "m" then
++ op = op + shl(parse_reg(params[n-1]), 16)
++ elseif p == "p" then
++ if q == "sp" then params[n] = "@x31" end
++ elseif p == "g" then
++ if parse_reg_type == "x" then
++ op = op + 0x80000000
++ elseif parse_reg_type ~= "w" then
++ werror("bad register type")
++ end
++ parse_reg_type = false
++ elseif p == "f" then
++ if parse_reg_type == "d" then
++ op = op + 0x00400000
++ elseif parse_reg_type ~= "s" then
++ werror("bad register type")
++ end
++ parse_reg_type = false
++ elseif p == "x" or p == "w" or p == "d" or p == "s" then
++ if parse_reg_type ~= p then
++ werror("register size mismatch")
++ end
++ parse_reg_type = false
++ elseif p == "L" then
++ op = parse_load(params, nparams, n, op)
++ elseif p == "P" then
++ op = parse_load_pair(params, nparams, n, op)
++ elseif p == "B" then
++ local mode, v, s = parse_label(q, false); n = n + 1
++ local m = branch_type(op)
++ waction("REL_"..mode, v+m, s, 1)
++ elseif p == "I" then
++ op = op + parse_imm12(q); n = n + 1
++ elseif p == "i" then
++ op = op + parse_imm13(q); n = n + 1
++ elseif p == "W" then
++ op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
++ elseif p == "T" then
++ op = op + parse_imm6(q); n = n + 1
++ elseif p == "1" then
++ op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
++ elseif p == "2" then
++ op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
++ elseif p == "5" then
++ op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
++ elseif p == "V" then
++ op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
++ elseif p == "F" then
++ op = op + parse_fpimm(q); n = n + 1
++ elseif p == "Z" then
++ if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
++ n = n + 1
++ elseif p == "S" then
++ op = op + parse_shift(q); n = n + 1
++ elseif p == "X" then
++ op = op + parse_extend(q); n = n + 1
++ elseif p == "R" then
++ op = op + parse_lslx16(q); n = n + 1
++ elseif p == "C" then
++ op = op + parse_cond(q, 0); n = n + 1
++ elseif p == "c" then
++ op = op + parse_cond(q, 1); n = n + 1
++ else
++ assert(false)
++ end
++ end
++ wputpos(pos, op)
++end
++function op_template(params, template, nparams)
++ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
++ -- Limit number of section buffer positions used by a single dasm_put().
++ -- A single opcode needs a maximum of 3 positions.
++ if secpos+3 > maxsecpos then wflush() end
++ local pos = wpos()
++ local lpos, apos, spos = #actlist, #actargs, secpos
++ local ok, err
++ for t in gmatch(template, "[^|]+") do
++ ok, err = pcall(parse_template, params, t, nparams, pos)
++ if ok then return end
++ secpos = spos
++ actlist[lpos+1] = nil
++ actlist[lpos+2] = nil
++ actlist[lpos+3] = nil
++ actargs[apos+1] = nil
++ actargs[apos+2] = nil
++ actargs[apos+3] = nil
++ end
++ error(err, 0)
++end
++map_op[".template__"] = op_template
++------------------------------------------------------------------------------
++-- Pseudo-opcode to mark the position where the action list is to be emitted.
++map_op[".actionlist_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeactions(out, name) end)
++end
++-- Pseudo-opcode to mark the position where the global enum is to be emitted.
++map_op[".globals_1"] = function(params)
++ if not params then return "prefix" end
++ local prefix = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeglobals(out, prefix) end)
++end
++-- Pseudo-opcode to mark the position where the global names are to be emitted.
++map_op[".globalnames_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeglobalnames(out, name) end)
++end
++-- Pseudo-opcode to mark the position where the extern names are to be emitted.
++map_op[".externnames_1"] = function(params)
++ if not params then return "cvar" end
++ local name = params[1] -- No syntax check. You get to keep the pieces.
++ wline(function(out) writeexternnames(out, name) end)
++end
++------------------------------------------------------------------------------
++-- Label pseudo-opcode (converted from trailing colon form).
++map_op[".label_1"] = function(params)
++ if not params then return "[1-9] | ->global | =>pcexpr" end
++ if secpos+1 > maxsecpos then wflush() end
++ local mode, n, s = parse_label(params[1], true)
++ if mode == "EXT" then werror("bad label definition") end
++ waction("LABEL_"..mode, n, s, 1)
++end
++------------------------------------------------------------------------------
++-- Pseudo-opcodes for data storage.
++map_op[".long_*"] = function(params)
++ if not params then return "imm..." end
++ for _,p in ipairs(params) do
++ local n = tonumber(p)
++ if not n then werror("bad immediate `"..p.."'") end
++ if n < 0 then n = n + 2^32 end
++ wputw(n)
++ if secpos+2 > maxsecpos then wflush() end
++ end
++end
++-- Alignment pseudo-opcode.
++map_op[".align_1"] = function(params)
++ if not params then return "numpow2" end
++ if secpos+1 > maxsecpos then wflush() end
++ local align = tonumber(params[1])
++ if align then
++ local x = align
++ -- Must be a power of 2 in the range (2 ... 256).
++ for i=1,8 do
++ x = x / 2
++ if x == 1 then
++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
++ return
++ end
++ end
++ end
++ werror("bad alignment")
++end
++------------------------------------------------------------------------------
++-- Pseudo-opcode for (primitive) type definitions (map to C types).
++map_op[".type_3"] = function(params, nparams)
++ if not params then
++ return nparams == 2 and "name, ctype" or "name, ctype, reg"
++ end
++ local name, ctype, reg = params[1], params[2], params[3]
++ if not match(name, "^[%a_][%w_]*$") then
++ werror("bad type name `"..name.."'")
++ end
++ local tp = map_type[name]
++ if tp then
++ werror("duplicate type `"..name.."'")
++ end
++ -- Add #type to defines. A bit unclean to put it in map_archdef.
++ map_archdef["#"..name] = "sizeof("..ctype..")"
++ -- Add new type and emit shortcut define.
++ local num = ctypenum + 1
++ map_type[name] = {
++ ctype = ctype,
++ ctypefmt = format("Dt%X(%%s)", num),
++ reg = reg,
++ }
++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
++ ctypenum = num
++end
++map_op[".type_2"] = map_op[".type_3"]
++-- Dump type definitions.
++local function dumptypes(out, lvl)
++ local t = {}
++ for name in pairs(map_type) do t[#t+1] = name end
++ sort(t)
++ out:write("Type definitions:\n")
++ for _,name in ipairs(t) do
++ local tp = map_type[name]
++ local reg = tp.reg or ""
++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
++ end
++ out:write("\n")
++end
++------------------------------------------------------------------------------
++-- Set the current section.
++function _M.section(num)
++ waction("SECTION", num)
++ wflush(true) -- SECTION is a terminal action.
++end
++------------------------------------------------------------------------------
++-- Dump architecture description.
++function _M.dumparch(out)
++ out:write(format("DynASM %s version %s, released %s\n\n",
++ _info.arch, _info.version, _info.release))
++ dumpactions(out)
++end
++-- Dump all user defined elements.
++function _M.dumpdef(out, lvl)
++ dumptypes(out, lvl)
++ dumpglobals(out, lvl)
++ dumpexterns(out, lvl)
++end
++------------------------------------------------------------------------------
++-- Pass callbacks from/to the DynASM core.
++function _M.passcb(wl, we, wf, ww)
++ wline, werror, wfatal, wwarn = wl, we, wf, ww
++ return wflush
++end
++-- Setup the arch-specific module.
++function _M.setup(arch, opt)
++ g_arch, g_opt = arch, opt
++end
++-- Merge the core maps and the arch-specific maps.
++function _M.mergemaps(map_coreop, map_def)
++ setmetatable(map_op, { __index = map_coreop })
++ setmetatable(map_def, { __index = map_archdef })
++ return map_op, map_def
++end
++return _M
++------------------------------------------------------------------------------
+
+From 9583ba36deddbff4a8ffd734896c1b9787b1d2c8 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Thu, 10 Nov 2016 19:00:51 +0530
+Subject: [PATCH 005/260] Created s390x header file
+
+Currently copy of ppc.h, which is same as arm64.h, and added the architecture definition
+---
+ dynasm/dasm_s390x.h | 418 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 418 insertions(+)
+ create mode 100644 dynasm/dasm_s390x.h
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+new file mode 100644
+index 000000000..577920ac9
+--- /dev/null
++++ b/dynasm/dasm_s390x.h
+@@ -0,0 +1,418 @@
++/*
++** DynASM s390x encoding engine.
++** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
++** Released under the MIT license. See dynasm.lua for full copyright notice.
++*/
++
++#include <stddef.h>
++#include <stdarg.h>
++#include <string.h>
++#include <stdlib.h>
++
++#define DASM_ARCH "s390"
++
++#ifndef DASM_EXTERN
++#define DASM_EXTERN(a,b,c,d) 0
++#endif
++
++/* Action definitions. */
++enum {
++ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
++ /* The following actions need a buffer position. */
++ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
++ /* The following actions also have an argument. */
++ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
++ DASM__MAX
++};
++
++/* Maximum number of section buffer positions for a single dasm_put() call. */
++#define DASM_MAXSECPOS 25
++
++/* DynASM encoder status codes. Action list offset or number are or'ed in. */
++#define DASM_S_OK 0x00000000
++#define DASM_S_NOMEM 0x01000000
++#define DASM_S_PHASE 0x02000000
++#define DASM_S_MATCH_SEC 0x03000000
++#define DASM_S_RANGE_I 0x11000000
++#define DASM_S_RANGE_SEC 0x12000000
++#define DASM_S_RANGE_LG 0x13000000
++#define DASM_S_RANGE_PC 0x14000000
++#define DASM_S_RANGE_REL 0x15000000
++#define DASM_S_UNDEF_LG 0x21000000
++#define DASM_S_UNDEF_PC 0x22000000
++
++/* Macros to convert positions (8 bit section + 24 bit index). */
++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
++#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
++#define DASM_SEC2POS(sec) ((sec)<<24)
++#define DASM_POS2SEC(pos) ((pos)>>24)
++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
++
++/* Action list type. */
++typedef const unsigned int *dasm_ActList;
++
++/* Per-section structure. */
++typedef struct dasm_Section {
++ int *rbuf; /* Biased buffer pointer (negative section bias). */
++ int *buf; /* True buffer pointer. */
++ size_t bsize; /* Buffer size in bytes. */
++ int pos; /* Biased buffer position. */
++ int epos; /* End of biased buffer position - max single put. */
++ int ofs; /* Byte offset into section. */
++} dasm_Section;
++
++/* Core structure holding the DynASM encoding state. */
++struct dasm_State {
++ size_t psize; /* Allocated size of this structure. */
++ dasm_ActList actionlist; /* Current actionlist pointer. */
++ int *lglabels; /* Local/global chain/pos ptrs. */
++ size_t lgsize;
++ int *pclabels; /* PC label chains/pos ptrs. */
++ size_t pcsize;
++ void **globals; /* Array of globals (bias -10). */
++ dasm_Section *section; /* Pointer to active section. */
++ size_t codesize; /* Total size of all code sections. */
++ int maxsection; /* 0 <= sectionidx < maxsection. */
++ int status; /* Status code. */
++ dasm_Section sections[1]; /* All sections. Alloc-extended. */
++};
++
++/* The size of the core structure depends on the max. number of sections. */
++#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
++
++
++/* Initialize DynASM state. */
++void dasm_init(Dst_DECL, int maxsection)
++{
++ dasm_State *D;
++ size_t psz = 0;
++ int i;
++ Dst_REF = NULL;
++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
++ D = Dst_REF;
++ D->psize = psz;
++ D->lglabels = NULL;
++ D->lgsize = 0;
++ D->pclabels = NULL;
++ D->pcsize = 0;
++ D->globals = NULL;
++ D->maxsection = maxsection;
++ for (i = 0; i < maxsection; i++) {
++ D->sections[i].buf = NULL; /* Need this for pass3. */
++ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
++ D->sections[i].bsize = 0;
++ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
++ }
++}
++
++/* Free DynASM state. */
++void dasm_free(Dst_DECL)
++{
++ dasm_State *D = Dst_REF;
++ int i;
++ for (i = 0; i < D->maxsection; i++)
++ if (D->sections[i].buf)
++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
++ DASM_M_FREE(Dst, D, D->psize);
++}
++
++/* Setup global label array. Must be called before dasm_setup(). */
++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
++{
++ dasm_State *D = Dst_REF;
++ D->globals = gl - 10; /* Negative bias to compensate for locals. */
++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
++}
++
++/* Grow PC label array. Can be called after dasm_setup(), too. */
++void dasm_growpc(Dst_DECL, unsigned int maxpc)
++{
++ dasm_State *D = Dst_REF;
++ size_t osz = D->pcsize;
++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
++ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
++}
++
++/* Setup encoder. */
++void dasm_setup(Dst_DECL, const void *actionlist)
++{
++ dasm_State *D = Dst_REF;
++ int i;
++ D->actionlist = (dasm_ActList)actionlist;
++ D->status = DASM_S_OK;
++ D->section = &D->sections[0];
++ memset((void *)D->lglabels, 0, D->lgsize);
++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
++ for (i = 0; i < D->maxsection; i++) {
++ D->sections[i].pos = DASM_SEC2POS(i);
++ D->sections[i].ofs = 0;
++ }
++}
++
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++ do { if (!(x)) { \
++ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
++#define CKPL(kind, st) \
++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
++ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
++#else
++#define CK(x, st) ((void)0)
++#define CKPL(kind, st) ((void)0)
++#endif
++
++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
++void dasm_put(Dst_DECL, int start, ...)
++{
++ va_list ap;
++ dasm_State *D = Dst_REF;
++ dasm_ActList p = D->actionlist + start;
++ dasm_Section *sec = D->section;
++ int pos = sec->pos, ofs = sec->ofs;
++ int *b;
++
++ if (pos >= sec->epos) {
++ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
++ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
++ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
++ }
++
++ b = sec->rbuf;
++ b[pos++] = start;
++
++ va_start(ap, start);
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 16);
++ if (action >= DASM__MAX) {
++ ofs += 4;
++ } else {
++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
++ switch (action) {
++ case DASM_STOP: goto stop;
++ case DASM_SECTION:
++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
++ D->section = &D->sections[n]; goto stop;
++ case DASM_ESC: p++; ofs += 4; break;
++ case DASM_REL_EXT: break;
++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
++ case DASM_REL_LG:
++ n = (ins & 2047) - 10; pl = D->lglabels + n;
++ /* Bkwd rel or global. */
++ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
++ pl += 10; n = *pl;
++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
++ goto linkrel;
++ case DASM_REL_PC:
++ pl = D->pclabels + n; CKPL(pc, PC);
++ putrel:
++ n = *pl;
++ if (n < 0) { /* Label exists. Get label pos and store it. */
++ b[pos] = -n;
++ } else {
++ linkrel:
++ b[pos] = n; /* Else link to rel chain, anchored at label. */
++ *pl = pos;
++ }
++ pos++;
++ break;
++ case DASM_LABEL_LG:
++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
++ case DASM_LABEL_PC:
++ pl = D->pclabels + n; CKPL(pc, PC);
++ putlabel:
++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
++ }
++ *pl = -pos; /* Label exists now. */
++ b[pos++] = ofs; /* Store pass1 offset estimate. */
++ break;
++ case DASM_IMM:
++#ifdef DASM_CHECKS
++ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
++#endif
++ n >>= ((ins>>10)&31);
++#ifdef DASM_CHECKS
++ if (ins & 0x8000)
++ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
++ else
++ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
++#endif
++ b[pos++] = n;
++ break;
++ case DASM_IMMSH:
++ CK((n >> 6) == 0, RANGE_I);
++ b[pos++] = n;
++ break;
++ }
++ }
++ }
++stop:
++ va_end(ap);
++ sec->pos = pos;
++ sec->ofs = ofs;
++}
++#undef CK
++
++/* Pass 2: Link sections, shrink aligns, fix label offsets. */
++int dasm_link(Dst_DECL, size_t *szp)
++{
++ dasm_State *D = Dst_REF;
++ int secnum;
++ int ofs = 0;
++
++#ifdef DASM_CHECKS
++ *szp = 0;
++ if (D->status != DASM_S_OK) return D->status;
++ {
++ int pc;
++ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
++ }
++#endif
++
++ { /* Handle globals not defined in this translation unit. */
++ int idx;
++ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ int n = D->lglabels[idx];
++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
++ }
++ }
++
++ /* Combine all code sections. No support for data sections (yet). */
++ for (secnum = 0; secnum < D->maxsection; secnum++) {
++ dasm_Section *sec = D->sections + secnum;
++ int *b = sec->rbuf;
++ int pos = DASM_SEC2POS(secnum);
++ int lastpos = sec->pos;
++
++ while (pos != lastpos) {
++ dasm_ActList p = D->actionlist + b[pos++];
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 16);
++ switch (action) {
++ case DASM_STOP: case DASM_SECTION: goto stop;
++ case DASM_ESC: p++; break;
++ case DASM_REL_EXT: break;
++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
++ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
++ case DASM_IMM: case DASM_IMMSH: pos++; break;
++ }
++ }
++ stop: (void)0;
++ }
++ ofs += sec->ofs; /* Next section starts right after current section. */
++ }
++
++ D->codesize = ofs; /* Total size of all code sections */
++ *szp = ofs;
++ return DASM_S_OK;
++}
++
++#ifdef DASM_CHECKS
++#define CK(x, st) \
++ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
++#else
++#define CK(x, st) ((void)0)
++#endif
++
++/* Pass 3: Encode sections. */
++int dasm_encode(Dst_DECL, void *buffer)
++{
++ dasm_State *D = Dst_REF;
++ char *base = (char *)buffer;
++ unsigned int *cp = (unsigned int *)buffer;
++ int secnum;
++
++ /* Encode all code sections. No support for data sections (yet). */
++ for (secnum = 0; secnum < D->maxsection; secnum++) {
++ dasm_Section *sec = D->sections + secnum;
++ int *b = sec->buf;
++ int *endb = sec->rbuf + sec->pos;
++
++ while (b != endb) {
++ dasm_ActList p = D->actionlist + *b++;
++ while (1) {
++ unsigned int ins = *p++;
++ unsigned int action = (ins >> 16);
++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
++ switch (action) {
++ case DASM_STOP: case DASM_SECTION: goto stop;
++ case DASM_ESC: *cp++ = *p++; break;
++ case DASM_REL_EXT:
++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
++ goto patchrel;
++ case DASM_ALIGN:
++ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
++ break;
++ case DASM_REL_LG:
++ CK(n >= 0, UNDEF_LG);
++ case DASM_REL_PC:
++ CK(n >= 0, UNDEF_PC);
++ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
++ patchrel:
++ CK((n & 3) == 0 &&
++ (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
++ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
++ cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
++ break;
++ case DASM_LABEL_LG:
++ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
++ break;
++ case DASM_LABEL_PC: break;
++ case DASM_IMM:
++ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
++ break;
++ case DASM_IMMSH:
++ cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
++ break;
++ default: *cp++ = ins; break;
++ }
++ }
++ stop: (void)0;
++ }
++ }
++
++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
++ return DASM_S_PHASE;
++ return DASM_S_OK;
++}
++#undef CK
++
++/* Get PC label offset. */
++int dasm_getpclabel(Dst_DECL, unsigned int pc)
++{
++ dasm_State *D = Dst_REF;
++ if (pc*sizeof(int) < D->pcsize) {
++ int pos = D->pclabels[pc];
++ if (pos < 0) return *DASM_POS2PTR(D, -pos);
++ if (pos > 0) return -1; /* Undefined. */
++ }
++ return -2; /* Unused or out of range. */
++}
++
++#ifdef DASM_CHECKS
++/* Optional sanity checker to call between isolated encoding steps. */
++int dasm_checkstep(Dst_DECL, int secmatch)
++{
++ dasm_State *D = Dst_REF;
++ if (D->status == DASM_S_OK) {
++ int i;
++ for (i = 1; i <= 9; i++) {
++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
++ D->lglabels[i] = 0;
++ }
++ }
++ if (D->status == DASM_S_OK && secmatch >= 0 &&
++ D->section != &D->sections[secmatch])
++ D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
++ return D->status;
++}
++#endif
+
+From 633376f0cace757869a4b055c6b6f5a7070d5169 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Fri, 11 Nov 2016 12:04:51 +0530
+Subject: [PATCH 006/260] Update lj_arch.h
+
+changed S390 to S390x
+---
+ src/lj_arch.h | 20 +++++---------------
+ 1 file changed, 5 insertions(+), 15 deletions(-)
+
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index 5155bf691..2638a9412 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -29,7 +29,7 @@
+ #define LUAJIT_ARCH_mips32 6
+ #define LUAJIT_ARCH_MIPS64 7
+ #define LUAJIT_ARCH_mips64 7
+-#define LUAJIT_ARCH_S390 8
++#define LUAJIT_ARCH_S390x 8
+
+ /* Target OS. */
+ #define LUAJIT_OS_OTHER 0
+@@ -50,8 +50,8 @@
+ #define LUAJIT_TARGET LUAJIT_ARCH_ARM
+ #elif defined(__aarch64__)
+ #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
+-#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) || defined(S390)
+-#define LUAJIT_TARGET LUAJIT_ARCH_S390
++#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x) || defined(S390x)
++#define LUAJIT_TARGET LUAJIT_ARCH_S390x
+ #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
+ #define LUAJIT_TARGET LUAJIT_ARCH_PPC
+ #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
+@@ -235,20 +235,10 @@
+
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_S390
+
+- #define LJ_ARCH_NAME "s390"
++ #define LJ_ARCH_NAME "s390x"
+ #define LJ_ARCH_BITS 64
+ #define LJ_ARCH_ENDIAN LUAJIT_BE
+- #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
+- #define LJ_ARCH_HASFPU 1
+- #endif
+- #define LJ_ABI_EABI 1
+ #define LJ_TARGET_S390 1
+- #define LJ_TARGET_EHRETREG 0
+- #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
+- #define LJ_TARGET_MASKSHIFT 0
+- #define LJ_TARGET_MASKROT 1
+- #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
+
+@@ -399,7 +389,7 @@
+ #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+ #error "Need at least Clang 3.5 or newer"
+ #endif
+-#elif LJ_TARGET_S390
++#elif LJ_TARGET_S390x
+ #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
+ #error "Need at least GCC 4.2 or newer"
+ #endif
+
+From d093cff04ade9b61a9a8e926387f81b40a3bebd7 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Fri, 11 Nov 2016 12:08:47 +0530
+Subject: [PATCH 007/260] Update Makefile
+
+changed S390 to S390x
+---
+ src/Makefile | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/Makefile b/src/Makefile
+index 9f7d28ce3..40cd10159 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -238,8 +238,8 @@ else
+ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= arm
+ else
+-ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH)))
+- TARGET_LJARCH= s390
++ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH)))
++ TARGET_LJARCH= s390x
+ else
+ ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= arm64
+
+From 9c3c87bc61d7aac050c4f9b0a43ec53e55e96590 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Fri, 11 Nov 2016 12:13:30 +0530
+Subject: [PATCH 009/260] Update lj_target_s390x.h
+
+removed un replaced arm instructions
+changed S390 to S390x
+---
+ src/lj_target_s390x.h | 129 ++++--------------------------------------
+ 1 file changed, 12 insertions(+), 117 deletions(-)
+
+diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
+index 7da2063d2..27bb34963 100644
+--- a/src/lj_target_s390x.h
++++ b/src/lj_target_s390x.h
+@@ -10,12 +10,15 @@
+
+ #define GPRDEF(_) \
+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
+- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
++ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \
+ #if LJ_SOFTFP
+ #define FPRDEF(_)
+ #else
+ #define FPRDEF(_) \
+- _(F0) _(F2) _(F4) _(F6)
++ _(F0) _(F1) _(F2) _(F3) \
++ _(F4) _(F5) _(F6) _(F7) \
++ _(F8) _(F9) _(F10) _(F11) \
++ _(F12) _(F13) _(F14) _(F15)
+ #endif
+ #define VRIDDEF(_)
+
+@@ -150,54 +153,7 @@ typedef struct {
+ #define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
+ #define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
+
+-typedef enum S390Ins {
+-
+- // Unsupported in S390
+- #ARMI_LDRSB = 0xe01000d0,
+- #ARMI_S = 0x000100000,
+- #ARMI_LDRD = 0xe00000d0,
+- #ARMI_ADC = 0xe0a00000,
+- #ARMI_SBC = 0xe0c00000,
+- #ARMI_STRB = 0xe4400000,
+- #ARMI_STRH = 0xe00000b0,
+- #ARMI_STRD = 0xe00000f0,
+- #ARMI_BL = 0xeb000000,
+- #ARMI_BLX = 0xfa000000,
+- #ARMI_BLXr = 0xe12fff30,
+- #ARMI_BIC = 0xe1c00000,
+- #ARMI_ORR = 0xe1800000,
+- #ARMI_LDRB = 0xe4500000,
+- #ARMI_MVN = 0xe1e00000,
+- #ARMI_LDRSH = 0xe01000f0,
+- #ARMI_NOP = 0xe1a00000,
+- #ARMI_PUSH = 0xe92d0000,
+- #ARMI_RSB = 0xe0600000,
+- #ARMI_RSC = 0xe0e00000,
+- #ARMI_TEQ = 0xe1300000,
+- #ARMI_CCAL = 0xe0000000,
+- #ARMI_K12 = 0x02000000,
+- #ARMI_KNEG = 0x00200000,
+- #ARMI_LS_W = 0x00200000,
+- #ARMI_LS_U = 0x00800000,
+- #ARMI_LS_P = 0x01000000,
+- #ARMI_LS_R = 0x02000000,
+- #ARMI_LSX_I = 0x00400000,
+-
+-
+- #ARMI_SUB = 0xe0400000,
+- #ARMI_ADD = 0xe0800000,
+- #ARMI_AND = 0xe0000000,
+- #ARMI_EOR = 0xe0200000,
+- #ARMI_MUL = 0xe0000090,
+- #ARMI_LDR = 0xe4100000,
+- #ARMI_CMP = 0xe1500000,
+- #ARMI_LDRH = 0xe01000b0,
+- #ARMI_B = 0xea000000,
+- #ARMI_MOV = 0xe1a00000,
+- #ARMI_STR = 0xe4000000,
+- #ARMI_TST = 0xe1100000,
+- #ARMI_SMULL = 0xe0c00090,
+- #ARMI_CMN = 0xe1700000,
++typedef enum S390xIns {
+ S390I_SR = 0x1B000000,
+ S390I_AR = 0x1A000000,
+ S390I_NR = 0x14000000,
+@@ -212,76 +168,15 @@ typedef enum S390Ins {
+ S390I_TM = 0x91000000,
+ S390I_MP = 0xbd000090,
+ S390I_CLR = 0x15000000,
++} S390xIns;
+
+- /* ARMv6 */
+- #ARMI_REV = 0xe6bf0f30,
+- #ARMI_SXTB = 0xe6af0070,
+- #ARMI_SXTH = 0xe6bf0070,
+- #ARMI_UXTB = 0xe6ef0070,
+- #ARMI_UXTH = 0xe6ff0070,
+-
+- /* ARMv6T2 */
+- #ARMI_MOVW = 0xe3000000,
+- #ARMI_MOVT = 0xe3400000,
+-
+- /* VFP */
+- ARMI_VMOV_D = 0xeeb00b40,
+- ARMI_VMOV_S = 0xeeb00a40,
+- ARMI_VMOVI_D = 0xeeb00b00,
+-
+- ARMI_VMOV_R_S = 0xee100a10,
+- ARMI_VMOV_S_R = 0xee000a10,
+- ARMI_VMOV_RR_D = 0xec500b10,
+- ARMI_VMOV_D_RR = 0xec400b10,
+-
+- ARMI_VADD_D = 0xee300b00,
+- ARMI_VSUB_D = 0xee300b40,
+- ARMI_VMUL_D = 0xee200b00,
+- ARMI_VMLA_D = 0xee000b00,
+- ARMI_VMLS_D = 0xee000b40,
+- ARMI_VNMLS_D = 0xee100b00,
+- ARMI_VDIV_D = 0xee800b00,
+-
+- ARMI_VABS_D = 0xeeb00bc0,
+- ARMI_VNEG_D = 0xeeb10b40,
+- ARMI_VSQRT_D = 0xeeb10bc0,
+-
+- ARMI_VCMP_D = 0xeeb40b40,
+- ARMI_VCMPZ_D = 0xeeb50b40,
+-
+- ARMI_VMRS = 0xeef1fa10,
+-
+- ARMI_VCVT_S32_F32 = 0xeebd0ac0,
+- ARMI_VCVT_S32_F64 = 0xeebd0bc0,
+- ARMI_VCVT_U32_F32 = 0xeebc0ac0,
+- ARMI_VCVT_U32_F64 = 0xeebc0bc0,
+- ARMI_VCVTR_S32_F32 = 0xeebd0a40,
+- ARMI_VCVTR_S32_F64 = 0xeebd0b40,
+- ARMI_VCVTR_U32_F32 = 0xeebc0a40,
+- ARMI_VCVTR_U32_F64 = 0xeebc0b40,
+- ARMI_VCVT_F32_S32 = 0xeeb80ac0,
+- ARMI_VCVT_F64_S32 = 0xeeb80bc0,
+- ARMI_VCVT_F32_U32 = 0xeeb80a40,
+- ARMI_VCVT_F64_U32 = 0xeeb80b40,
+- ARMI_VCVT_F32_F64 = 0xeeb70bc0,
+- ARMI_VCVT_F64_F32 = 0xeeb70ac0,
+-
+- ARMI_VLDR_S = 0xed100a00,
+- ARMI_VLDR_D = 0xed100b00,
+- ARMI_VSTR_S = 0xed000a00,
+- ARMI_VSTR_D = 0xed000b00,
+-} S390Ins;
+-
+-typedef enum S390Shift {
++typedef enum S390xShift {
+ S390SH_SLL, S390SH_SRL, S390SH_SRA
+- # Adjustment needed for ROR
+-} S390Shift;
++} S390xShift;
+
+ /* ARM condition codes. */
+-typedef enum ARMCC {
+- CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
+- CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
+- CC_HS = CC_CS, CC_LO = CC_CC
+-} ARMCC;
++typedef enum S390xCC {
++
++} S390xCC;
+
+ #endif
+
+From 96afe3e331fc6aef12d3479ad2e8ae495bb7fc12 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Tue, 15 Nov 2016 10:42:11 +0530
+Subject: [PATCH 010/260] Create vm_s390x.dasc
+
+created vm_s390x.dasc file
+its a copy of vm_x86.dasc
+working on to change this specific to s390x
+---
+ src/vm_s390x.dasc | 5779 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 5779 insertions(+)
+ create mode 100644 src/vm_s390x.dasc
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+new file mode 100644
+index 000000000..d7d618d3b
+--- /dev/null
++++ b/src/vm_s390x.dasc
+@@ -0,0 +1,5779 @@
++|// Low-level VM code for x86 CPUs.
++|// Bytecode interpreter, fast functions and helper functions.
++|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
++|
++|.if P64
++|.arch x64
++|.else
++|.arch x86
++|.endif
++|.section code_op, code_sub
++|
++|.actionlist build_actionlist
++|.globals GLOB_
++|.globalnames globnames
++|.externnames extnames
++|
++|//-----------------------------------------------------------------------
++|
++|.if P64
++|.define X64, 1
++|.if WIN
++|.define X64WIN, 1
++|.endif
++|.endif
++|
++|// Fixed register assignments for the interpreter.
++|// This is very fragile and has many dependencies. Caveat emptor.
++|.define BASE, edx // Not C callee-save, refetched anyway.
++|.if not X64
++|.define KBASE, edi // Must be C callee-save.
++|.define KBASEa, KBASE
++|.define PC, esi // Must be C callee-save.
++|.define PCa, PC
++|.define DISPATCH, ebx // Must be C callee-save.
++|.elif X64WIN
++|.define KBASE, edi // Must be C callee-save.
++|.define KBASEa, rdi
++|.define PC, esi // Must be C callee-save.
++|.define PCa, rsi
++|.define DISPATCH, ebx // Must be C callee-save.
++|.else
++|.define KBASE, r15d // Must be C callee-save.
++|.define KBASEa, r15
++|.define PC, ebx // Must be C callee-save.
++|.define PCa, rbx
++|.define DISPATCH, r14d // Must be C callee-save.
++|.endif
++|
++|.define RA, ecx
++|.define RAH, ch
++|.define RAL, cl
++|.define RB, ebp // Must be ebp (C callee-save).
++|.define RC, eax // Must be eax.
++|.define RCW, ax
++|.define RCH, ah
++|.define RCL, al
++|.define OP, RB
++|.define RD, RC
++|.define RDW, RCW
++|.define RDL, RCL
++|.if X64
++|.define RAa, rcx
++|.define RBa, rbp
++|.define RCa, rax
++|.define RDa, rax
++|.else
++|.define RAa, RA
++|.define RBa, RB
++|.define RCa, RC
++|.define RDa, RD
++|.endif
++|
++|.if not X64
++|.define FCARG1, ecx // x86 fastcall arguments.
++|.define FCARG2, edx
++|.elif X64WIN
++|.define CARG1, rcx // x64/WIN64 C call arguments.
++|.define CARG2, rdx
++|.define CARG3, r8
++|.define CARG4, r9
++|.define CARG1d, ecx
++|.define CARG2d, edx
++|.define CARG3d, r8d
++|.define CARG4d, r9d
++|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
++|.define FCARG2, CARG2d
++|.else
++|.define CARG1, rdi // x64/POSIX C call arguments.
++|.define CARG2, rsi
++|.define CARG3, rdx
++|.define CARG4, rcx
++|.define CARG5, r8
++|.define CARG6, r9
++|.define CARG1d, edi
++|.define CARG2d, esi
++|.define CARG3d, edx
++|.define CARG4d, ecx
++|.define CARG5d, r8d
++|.define CARG6d, r9d
++|.define FCARG1, CARG1d // Simulate x86 fastcall.
++|.define FCARG2, CARG2d
++|.endif
++|
++|// Type definitions. Some of these are only used for documentation.
++|.type L, lua_State
++|.type GL, global_State
++|.type TVALUE, TValue
++|.type GCOBJ, GCobj
++|.type STR, GCstr
++|.type TAB, GCtab
++|.type LFUNC, GCfuncL
++|.type CFUNC, GCfuncC
++|.type PROTO, GCproto
++|.type UPVAL, GCupval
++|.type NODE, Node
++|.type NARGS, int
++|.type TRACE, GCtrace
++|.type SBUF, SBuf
++|
++|// Stack layout while in interpreter. Must match with lj_frame.h.
++|//-----------------------------------------------------------------------
++|.if not X64 // x86 stack layout.
++|
++|.if WIN
++|
++|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
++|.macro saveregs_
++| push edi; push esi; push ebx
++| push extern lj_err_unwind_win
++| fs; push dword [0]
++| fs; mov [0], esp
++| sub esp, CFRAME_SPACE
++|.endmacro
++|.macro restoreregs
++| add esp, CFRAME_SPACE
++| fs; pop dword [0]
++| pop edi // Short for esp += 4.
++| pop ebx; pop esi; pop edi; pop ebp
++|.endmacro
++|
++|.else
++|
++|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
++|.macro saveregs_
++| push edi; push esi; push ebx
++| sub esp, CFRAME_SPACE
++|.endmacro
++|.macro restoreregs
++| add esp, CFRAME_SPACE
++| pop ebx; pop esi; pop edi; pop ebp
++|.endmacro
++|
++|.endif
++|
++|.macro saveregs
++| push ebp; saveregs_
++|.endmacro
++|
++|.if WIN
++|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
++|.define SAVE_NRES, aword [esp+aword*18]
++|.define SAVE_CFRAME, aword [esp+aword*17]
++|.define SAVE_L, aword [esp+aword*16]
++|//----- 16 byte aligned, ^^^ arguments from C caller
++|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
++|.define SAVE_R4, aword [esp+aword*14]
++|.define SAVE_R3, aword [esp+aword*13]
++|.define SAVE_R2, aword [esp+aword*12]
++|//----- 16 byte aligned
++|.define SAVE_R1, aword [esp+aword*11]
++|.define SEH_FUNC, aword [esp+aword*10]
++|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
++|.define UNUSED2, aword [esp+aword*8]
++|//----- 16 byte aligned
++|.define UNUSED1, aword [esp+aword*7]
++|.define SAVE_PC, aword [esp+aword*6]
++|.define TMP2, aword [esp+aword*5]
++|.define TMP1, aword [esp+aword*4]
++|//----- 16 byte aligned
++|.define ARG4, aword [esp+aword*3]
++|.define ARG3, aword [esp+aword*2]
++|.define ARG2, aword [esp+aword*1]
++|.define ARG1, aword [esp] //<-- esp while in interpreter.
++|//----- 16 byte aligned, ^^^ arguments for C callee
++|.else
++|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
++|.define SAVE_NRES, aword [esp+aword*14]
++|.define SAVE_CFRAME, aword [esp+aword*13]
++|.define SAVE_L, aword [esp+aword*12]
++|//----- 16 byte aligned, ^^^ arguments from C caller
++|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
++|.define SAVE_R4, aword [esp+aword*10]
++|.define SAVE_R3, aword [esp+aword*9]
++|.define SAVE_R2, aword [esp+aword*8]
++|//----- 16 byte aligned
++|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
++|.define SAVE_PC, aword [esp+aword*6]
++|.define TMP2, aword [esp+aword*5]
++|.define TMP1, aword [esp+aword*4]
++|//----- 16 byte aligned
++|.define ARG4, aword [esp+aword*3]
++|.define ARG3, aword [esp+aword*2]
++|.define ARG2, aword [esp+aword*1]
++|.define ARG1, aword [esp] //<-- esp while in interpreter.
++|//----- 16 byte aligned, ^^^ arguments for C callee
++|.endif
++|
++|// FPARGx overlaps ARGx and ARG(x+1) on x86.
++|.define FPARG3, qword [esp+qword*1]
++|.define FPARG1, qword [esp]
++|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
++|.define TMPQ, qword [esp+aword*4]
++|.define TMP3, ARG4
++|.define ARG5, TMP1
++|.define TMPa, TMP1
++|.define MULTRES, TMP2
++|
++|// Arguments for vm_call and vm_pcall.
++|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
++|
++|// Arguments for vm_cpcall.
++|.define INARG_CP_CALL, SAVE_ERRF
++|.define INARG_CP_UD, SAVE_NRES
++|.define INARG_CP_FUNC, SAVE_CFRAME
++|
++|//-----------------------------------------------------------------------
++|.elif X64WIN // x64/Windows stack layout
++|
++|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
++|.macro saveregs_
++| push rdi; push rsi; push rbx
++| sub rsp, CFRAME_SPACE
++|.endmacro
++|.macro saveregs
++| push rbp; saveregs_
++|.endmacro
++|.macro restoreregs
++| add rsp, CFRAME_SPACE
++| pop rbx; pop rsi; pop rdi; pop rbp
++|.endmacro
++|
++|.define SAVE_CFRAME, aword [rsp+aword*13]
++|.define SAVE_PC, dword [rsp+dword*25]
++|.define SAVE_L, dword [rsp+dword*24]
++|.define SAVE_ERRF, dword [rsp+dword*23]
++|.define SAVE_NRES, dword [rsp+dword*22]
++|.define TMP2, dword [rsp+dword*21]
++|.define TMP1, dword [rsp+dword*20]
++|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
++|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
++|.define SAVE_R4, aword [rsp+aword*8]
++|.define SAVE_R3, aword [rsp+aword*7]
++|.define SAVE_R2, aword [rsp+aword*6]
++|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
++|.define ARG5, aword [rsp+aword*4]
++|.define CSAVE_4, aword [rsp+aword*3]
++|.define CSAVE_3, aword [rsp+aword*2]
++|.define CSAVE_2, aword [rsp+aword*1]
++|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
++|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
++|
++|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
++|.define TMPQ, qword [rsp+aword*10]
++|.define MULTRES, TMP2
++|.define TMPa, ARG5
++|.define ARG5d, dword [rsp+aword*4]
++|.define TMP3, ARG5d
++|
++|//-----------------------------------------------------------------------
++|.else // x64/POSIX stack layout
++|
++|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
++|.macro saveregs_
++| push rbx; push r15; push r14
++|.if NO_UNWIND
++| push r13; push r12
++|.endif
++| sub rsp, CFRAME_SPACE
++|.endmacro
++|.macro saveregs
++| push rbp; saveregs_
++|.endmacro
++|.macro restoreregs
++| add rsp, CFRAME_SPACE
++|.if NO_UNWIND
++| pop r12; pop r13
++|.endif
++| pop r14; pop r15; pop rbx; pop rbp
++|.endmacro
++|
++|//----- 16 byte aligned,
++|.if NO_UNWIND
++|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
++|.define SAVE_R4, aword [rsp+aword*10]
++|.define SAVE_R3, aword [rsp+aword*9]
++|.define SAVE_R2, aword [rsp+aword*8]
++|.define SAVE_R1, aword [rsp+aword*7]
++|.define SAVE_RU2, aword [rsp+aword*6]
++|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
++|.else
++|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
++|.define SAVE_R4, aword [rsp+aword*8]
++|.define SAVE_R3, aword [rsp+aword*7]
++|.define SAVE_R2, aword [rsp+aword*6]
++|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
++|.endif
++|.define SAVE_CFRAME, aword [rsp+aword*4]
++|.define SAVE_PC, dword [rsp+dword*7]
++|.define SAVE_L, dword [rsp+dword*6]
++|.define SAVE_ERRF, dword [rsp+dword*5]
++|.define SAVE_NRES, dword [rsp+dword*4]
++|.define TMPa, aword [rsp+aword*1]
++|.define TMP2, dword [rsp+dword*1]
++|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
++|//----- 16 byte aligned
++|
++|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
++|.define TMPQ, qword [rsp]
++|.define TMP3, dword [rsp+aword*1]
++|.define MULTRES, TMP2
++|
++|.endif
++|
++|//-----------------------------------------------------------------------
++|
++|// Instruction headers.
++|.macro ins_A; .endmacro
++|.macro ins_AD; .endmacro
++|.macro ins_AJ; .endmacro
++|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
++|.macro ins_AB_; movzx RB, RCH; .endmacro
++|.macro ins_A_C; movzx RC, RCL; .endmacro
++|.macro ins_AND; not RDa; .endmacro
++|
++|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
++|.macro ins_NEXT
++| mov RC, [PC]
++| movzx RA, RCH
++| movzx OP, RCL
++| add PC, 4
++| shr RC, 16
++|.if X64
++| jmp aword [DISPATCH+OP*8]
++|.else
++| jmp aword [DISPATCH+OP*4]
++|.endif
++|.endmacro
++|
++|// Instruction footer.
++|.if 1
++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
++| .define ins_next, ins_NEXT
++| .define ins_next_, ins_NEXT
++|.else
++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
++| // Affects only certain kinds of benchmarks (and only with -j off).
++| // Around 10%-30% slower on Core2, a lot more slower on P4.
++| .macro ins_next
++| jmp ->ins_next
++| .endmacro
++| .macro ins_next_
++| ->ins_next:
++| ins_NEXT
++| .endmacro
++|.endif
++|
++|// Call decode and dispatch.
++|.macro ins_callt
++| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
++| mov PC, LFUNC:RB->pc
++| mov RA, [PC]
++| movzx OP, RAL
++| movzx RA, RAH
++| add PC, 4
++|.if X64
++| jmp aword [DISPATCH+OP*8]
++|.else
++| jmp aword [DISPATCH+OP*4]
++|.endif
++|.endmacro
++|
++|.macro ins_call
++| // BASE = new base, RB = LFUNC, RD = nargs+1
++| mov [BASE-4], PC
++| ins_callt
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++|
++|// Macros to test operand types.
++|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
++|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
++|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
++|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
++|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
++|
++|// These operands must be used with movzx.
++|.define PC_OP, byte [PC-4]
++|.define PC_RA, byte [PC-3]
++|.define PC_RB, byte [PC-1]
++|.define PC_RC, byte [PC-2]
++|.define PC_RD, word [PC-2]
++|
++|.macro branchPC, reg
++| lea PC, [PC+reg*4-BCBIAS_J*4]
++|.endmacro
++|
++|// Assumes DISPATCH is relative to GL.
++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
++|
++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
++|
++|// Decrement hashed hotcount and trigger trace recorder if zero.
++|.macro hotloop, reg
++| mov reg, PC
++| shr reg, 1
++| and reg, HOTCOUNT_PCMASK
++| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
++| jb ->vm_hotloop
++|.endmacro
++|
++|.macro hotcall, reg
++| mov reg, PC
++| shr reg, 1
++| and reg, HOTCOUNT_PCMASK
++| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
++| jb ->vm_hotcall
++|.endmacro
++|
++|// Set current VM state.
++|.macro set_vmstate, st
++| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
++|.endmacro
++|
++|// x87 compares.
++|.macro fcomparepp // Compare and pop st0 >< st1.
++| fucomip st1
++| fpop
++|.endmacro
++|
++|.macro fpop1; fstp st1; .endmacro
++|
++|// Synthesize SSE FP constants.
++|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
++|.if X64
++| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
++|.else
++| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
++|.endif
++|.endmacro
++|
++|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
++|.if X64
++| mov64 tmp, U64x(val,00000000); movd reg, tmp
++|.else
++| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
++|.endif
++|.endmacro
++|
++|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
++| sseconst_hi reg, tmp, 80000000
++|.endmacro
++|.macro sseconst_1, reg, tmp // Synthesize 1.0.
++| sseconst_hi reg, tmp, 3ff00000
++|.endmacro
++|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
++| sseconst_hi reg, tmp, bff00000
++|.endmacro
++|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
++| sseconst_hi reg, tmp, 43300000
++|.endmacro
++|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
++| sseconst_hi reg, tmp, 43380000
++|.endmacro
++|
++|// Move table write barrier back. Overwrites reg.
++|.macro barrierback, tab, reg
++| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
++| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
++| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
++| mov tab->gclist, reg
++|.endmacro
++|
++|//-----------------------------------------------------------------------
++
++/* Generate subroutines used by opcodes and other parts of the VM. */
++/* The .code_sub section should be last to help static branch prediction. */
++static void build_subroutines(BuildCtx *ctx)
++{
++ |.code_sub
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Return handling ----------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_returnp:
++ | test PC, FRAME_P
++ | jz ->cont_dispatch
++ |
++ | // Return from pcall or xpcall fast func.
++ | and PC, -8
++ | sub BASE, PC // Restore caller base.
++ | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
++ | mov PC, [BASE-4] // Fetch PC of previous frame.
++ | // Prepending may overwrite the pcall frame, so do it at the end.
++ | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
++ |
++ |->vm_returnc:
++ | add RD, 1 // RD = nresults+1
++ | jz ->vm_unwind_yield
++ | mov MULTRES, RD
++ | test PC, FRAME_TYPE
++ | jz ->BC_RET_Z // Handle regular return to Lua.
++ |
++ |->vm_return:
++ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
++ | xor PC, FRAME_C
++ | test PC, FRAME_TYPE
++ | jnz ->vm_returnp
++ |
++ | // Return to C.
++ | set_vmstate C
++ | and PC, -8
++ | sub PC, BASE
++ | neg PC // Previous base = BASE - delta.
++ |
++ | sub RD, 1
++ | jz >2
++ |1: // Move results down.
++ |.if X64
++ | mov RBa, [BASE+RA]
++ | mov [BASE-8], RBa
++ |.else
++ | mov RB, [BASE+RA]
++ | mov [BASE-8], RB
++ | mov RB, [BASE+RA+4]
++ | mov [BASE-4], RB
++ |.endif
++ | add BASE, 8
++ | sub RD, 1
++ | jnz <1
++ |2:
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, PC
++ |3:
++ | mov RD, MULTRES
++ | mov RA, SAVE_NRES // RA = wanted nresults+1
++ |4:
++ | cmp RA, RD
++ | jne >6 // More/less results wanted?
++ |5:
++ | sub BASE, 8
++ | mov L:RB->top, BASE
++ |
++ |->vm_leave_cp:
++ | mov RAa, SAVE_CFRAME // Restore previous C frame.
++ | mov L:RB->cframe, RAa
++ | xor eax, eax // Ok return status for vm_pcall.
++ |
++ |->vm_leave_unw:
++ | restoreregs
++ | ret
++ |
++ |6:
++ | jb >7 // Less results wanted?
++ | // More results wanted. Check stack size and fill up results with nil.
++ | cmp BASE, L:RB->maxstack
++ | ja >8
++ | mov dword [BASE-4], LJ_TNIL
++ | add BASE, 8
++ | add RD, 1
++ | jmp <4
++ |
++ |7: // Less results wanted.
++ | test RA, RA
++ | jz <5 // But check for LUA_MULTRET+1.
++ | sub RA, RD // Negative result!
++ | lea BASE, [BASE+RA*8] // Correct top.
++ | jmp <5
++ |
++ |8: // Corner case: need to grow stack for filling up results.
++ | // This can happen if:
++ | // - A C function grows the stack (a lot).
++ | // - The GC shrinks the stack in between.
++ | // - A return back from a lua_call() with (high) nresults adjustment.
++ | mov L:RB->top, BASE // Save current top held in BASE (yes).
++ | mov MULTRES, RD // Need to fill only remainder with nil.
++ | mov FCARG2, RA
++ | mov FCARG1, L:RB
++ | call extern lj_state_growstack@8 // (lua_State *L, int n)
++ | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
++ | jmp <3
++ |
++ |->vm_unwind_yield:
++ | mov al, LUA_YIELD
++ | jmp ->vm_unwind_c_eh
++ |
++ |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
++ | // (void *cframe, int errcode)
++ |.if X64
++ | mov eax, CARG2d // Error return status for vm_pcall.
++ | mov rsp, CARG1
++ |.else
++ | mov eax, FCARG2 // Error return status for vm_pcall.
++ | mov esp, FCARG1
++ |.if WIN
++ | lea FCARG1, SEH_NEXT
++ | fs; mov [0], FCARG1
++ |.endif
++ |.endif
++ |->vm_unwind_c_eh: // Landing pad for external unwinder.
++ | mov L:RB, SAVE_L
++ | mov GL:RB, L:RB->glref
++ | mov dword GL:RB->vmstate, ~LJ_VMST_C
++ | jmp ->vm_leave_unw
++ |
++ |->vm_unwind_rethrow:
++ |.if X64 and not X64WIN
++ | mov FCARG1, SAVE_L
++ | mov FCARG2, eax
++ | restoreregs
++ | jmp extern lj_err_throw@8 // (lua_State *L, int errcode)
++ |.endif
++ |
++ |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
++ | // (void *cframe)
++ |.if X64
++ | and CARG1, CFRAME_RAWMASK
++ | mov rsp, CARG1
++ |.else
++ | and FCARG1, CFRAME_RAWMASK
++ | mov esp, FCARG1
++ |.if WIN
++ | lea FCARG1, SEH_NEXT
++ | fs; mov [0], FCARG1
++ |.endif
++ |.endif
++ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
++ | mov L:RB, SAVE_L
++ | mov RAa, -8 // Results start at BASE+RA = BASE-8.
++ | mov RD, 1+1 // Really 1+2 results, incr. later.
++ | mov BASE, L:RB->base
++ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
++ | add DISPATCH, GG_G2DISP
++ | mov PC, [BASE-4] // Fetch PC of previous frame.
++ | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
++ | set_vmstate INTERP
++ | jmp ->vm_returnc // Increments RD/MULTRES and returns.
++ |
++ |.if WIN and not X64
++ |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
++ | // (void *cframe, void *excptrec, void *unwinder, int errcode)
++ | mov [esp], FCARG1 // Return value for RtlUnwind.
++ | push FCARG2 // Exception record for RtlUnwind.
++ | push 0 // Ignored by RtlUnwind.
++ | push dword [FCARG1+CFRAME_OFS_SEH]
++ | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
++ | mov FCARG1, eax
++ | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
++ | ret // Jump to unwinder.
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Grow stack for calls -----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_growstack_c: // Grow stack for C function.
++ | mov FCARG2, LUA_MINSTACK
++ | jmp >2
++ |
++ |->vm_growstack_v: // Grow stack for vararg Lua function.
++ | sub RD, 8
++ | jmp >1
++ |
++ |->vm_growstack_f: // Grow stack for fixarg Lua function.
++ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
++ | lea RD, [BASE+NARGS:RD*8-8]
++ |1:
++ | movzx RA, byte [PC-4+PC2PROTO(framesize)]
++ | add PC, 4 // Must point after first instruction.
++ | mov L:RB->base, BASE
++ | mov L:RB->top, RD
++ | mov SAVE_PC, PC
++ | mov FCARG2, RA
++ |2:
++ | // RB = L, L->base = new base, L->top = top
++ | mov FCARG1, L:RB
++ | call extern lj_state_growstack@8 // (lua_State *L, int n)
++ | mov BASE, L:RB->base
++ | mov RD, L:RB->top
++ | mov LFUNC:RB, [BASE-8]
++ | sub RD, BASE
++ | shr RD, 3
++ | add NARGS:RD, 1
++ | // BASE = new base, RB = LFUNC, RD = nargs+1
++ | ins_callt // Just retry the call.
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Entry points into the assembler VM ---------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_resume: // Setup C frame and resume thread.
++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
++ | saveregs
++ |.if X64
++ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
++ | mov SAVE_L, CARG1d
++ | mov RA, CARG2d
++ |.else
++ | mov L:RB, SAVE_L
++ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
++ |.endif
++ | mov PC, FRAME_CP
++ | xor RD, RD
++ | lea KBASEa, [esp+CFRAME_RESUME]
++ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
++ | add DISPATCH, GG_G2DISP
++ | mov SAVE_PC, RD // Any value outside of bytecode is ok.
++ | mov SAVE_CFRAME, RDa
++ |.if X64
++ | mov SAVE_NRES, RD
++ | mov SAVE_ERRF, RD
++ |.endif
++ | mov L:RB->cframe, KBASEa
++ | cmp byte L:RB->status, RDL
++ | je >2 // Initial resume (like a call).
++ |
++ | // Resume after yield (like a return).
++ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
++ | set_vmstate INTERP
++ | mov byte L:RB->status, RDL
++ | mov BASE, L:RB->base
++ | mov RD, L:RB->top
++ | sub RD, RA
++ | shr RD, 3
++ | add RD, 1 // RD = nresults+1
++ | sub RA, BASE // RA = resultofs
++ | mov PC, [BASE-4]
++ | mov MULTRES, RD
++ | test PC, FRAME_TYPE
++ | jz ->BC_RET_Z
++ | jmp ->vm_return
++ |
++ |->vm_pcall: // Setup protected C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
++ | saveregs
++ | mov PC, FRAME_CP
++ |.if X64
++ | mov SAVE_ERRF, CARG4d
++ |.endif
++ | jmp >1
++ |
++ |->vm_call: // Setup C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1)
++ | saveregs
++ | mov PC, FRAME_C
++ |
++ |1: // Entry point for vm_pcall above (PC = ftype).
++ |.if X64
++ | mov SAVE_NRES, CARG3d
++ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
++ | mov SAVE_L, CARG1d
++ | mov RA, CARG2d
++ |.else
++ | mov L:RB, SAVE_L
++ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
++ |.endif
++ |
++ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
++ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
++ | mov SAVE_CFRAME, KBASEa
++ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
++ | add DISPATCH, GG_G2DISP
++ |.if X64
++ | mov L:RB->cframe, rsp
++ |.else
++ | mov L:RB->cframe, esp
++ |.endif
++ |
++ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
++ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
++ | set_vmstate INTERP
++ | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
++ | add PC, RA
++ | sub PC, BASE // PC = frame delta + frame type
++ |
++ | mov RD, L:RB->top
++ | sub RD, RA
++ | shr NARGS:RD, 3
++ | add NARGS:RD, 1 // RD = nargs+1
++ |
++ |->vm_call_dispatch:
++ | mov LFUNC:RB, [RA-8]
++ | cmp dword [RA-4], LJ_TFUNC
++ | jne ->vmeta_call // Ensure KBASE defined and != BASE.
++ |
++ |->vm_call_dispatch_f:
++ | mov BASE, RA
++ | ins_call
++ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
++ |
++ |->vm_cpcall: // Setup protected C frame, call C.
++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
++ | saveregs
++ |.if X64
++ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
++ | mov SAVE_L, CARG1d
++ |.else
++ | mov L:RB, SAVE_L
++ | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
++ | mov RC, INARG_CP_UD // Get args before they are overwritten.
++ | mov RA, INARG_CP_FUNC
++ | mov BASE, INARG_CP_CALL
++ |.endif
++ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
++ |
++ | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
++ | sub KBASE, L:RB->top
++ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
++ | mov SAVE_ERRF, 0 // No error function.
++ | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
++ | add DISPATCH, GG_G2DISP
++ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
++ |
++ |.if X64
++ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
++ | mov SAVE_CFRAME, KBASEa
++ | mov L:RB->cframe, rsp
++ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
++ |
++ | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
++ |.else
++ | mov ARG3, RC // Have to copy args downwards.
++ | mov ARG2, RA
++ | mov ARG1, L:RB
++ |
++ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
++ | mov SAVE_CFRAME, KBASE
++ | mov L:RB->cframe, esp
++ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
++ |
++ | call BASE // (lua_State *L, lua_CFunction func, void *ud)
++ |.endif
++ | // TValue * (new base) or NULL returned in eax (RC).
++ | test RC, RC
++ | jz ->vm_leave_cp // No base? Just remove C frame.
++ | mov RA, RC
++ | mov PC, FRAME_CP
++ | jmp <2 // Else continue with the call.
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Metamethod handling ------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |//-- Continuation dispatch ----------------------------------------------
++ |
++ |->cont_dispatch:
++ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
++ | add RA, BASE
++ | and PC, -8
++ | mov RB, BASE
++ | sub BASE, PC // Restore caller BASE.
++ | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
++ | mov RC, RA // ... in [RC]
++ | mov PC, [RB-12] // Restore PC from [cont|PC].
++ |.if X64
++ | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
++ |.if FFI
++ | cmp RA, 1
++ | jbe >1
++ |.endif
++ | lea KBASEa, qword [=>0]
++ | add RAa, KBASEa
++ |.else
++ | mov RA, dword [RB-16]
++ |.if FFI
++ | cmp RA, 1
++ | jbe >1
++ |.endif
++ |.endif
++ | mov LFUNC:KBASE, [BASE-8]
++ | mov KBASE, LFUNC:KBASE->pc
++ | mov KBASE, [KBASE+PC2PROTO(k)]
++ | // BASE = base, RC = result, RB = meta base
++ | jmp RAa // Jump to continuation.
++ |
++ |.if FFI
++ |1:
++ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
++ | // cont = 0: Tail call from C function.
++ | sub RB, BASE
++ | shr RB, 3
++ | lea RD, [RB-1]
++ | jmp ->vm_call_tail
++ |.endif
++ |
++ |->cont_cat: // BASE = base, RC = result, RB = mbase
++ | movzx RA, PC_RB
++ | sub RB, 16
++ | lea RA, [BASE+RA*8]
++ | sub RA, RB
++ | je ->cont_ra
++ | neg RA
++ | shr RA, 3
++ |.if X64WIN
++ | mov CARG3d, RA
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE
++ | mov RCa, [RC]
++ | mov [RB], RCa
++ | mov CARG2d, RB
++ |.elif X64
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE
++ | mov CARG3d, RA
++ | mov RAa, [RC]
++ | mov [RB], RAa
++ | mov CARG2d, RB
++ |.else
++ | mov ARG3, RA
++ | mov RA, [RC+4]
++ | mov RC, [RC]
++ | mov [RB+4], RA
++ | mov [RB], RC
++ | mov ARG2, RB
++ |.endif
++ | jmp ->BC_CAT_Z
++ |
++ |//-- Table indexing metamethods -----------------------------------------
++ |
++ |->vmeta_tgets:
++ | mov TMP1, RC // RC = GCstr *
++ | mov TMP2, LJ_TSTR
++ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
++ | cmp PC_OP, BC_GGET
++ | jne >1
++ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
++ | mov [RA], TAB:RB // RB = GCtab *
++ | mov dword [RA+4], LJ_TTAB
++ | mov RB, RA
++ | jmp >2
++ |
++ |->vmeta_tgetb:
++ | movzx RC, PC_RC
++ |.if DUALNUM
++ | mov TMP2, LJ_TISNUM
++ | mov TMP1, RC
++ |.else
++ | cvtsi2sd xmm0, RC
++ | movsd TMPQ, xmm0
++ |.endif
++ | lea RCa, TMPQ // Store temp. TValue in TMPQ.
++ | jmp >1
++ |
++ |->vmeta_tgetv:
++ | movzx RC, PC_RC // Reload TValue *k from RC.
++ | lea RC, [BASE+RC*8]
++ |1:
++ | movzx RB, PC_RB // Reload TValue *t from RB.
++ | lea RB, [BASE+RB*8]
++ |2:
++ |.if X64
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
++ | mov CARG2d, RB
++ | mov CARG3, RCa // May be 64 bit ptr to stack.
++ | mov L:RB, L:CARG1d
++ |.else
++ | mov ARG2, RB
++ | mov L:RB, SAVE_L
++ | mov ARG3, RC
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
++ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
++ | mov BASE, L:RB->base
++ | test RC, RC
++ | jz >3
++ |->cont_ra: // BASE = base, RC = result
++ | movzx RA, PC_RA
++ |.if X64
++ | mov RBa, [RC]
++ | mov [BASE+RA*8], RBa
++ |.else
++ | mov RB, [RC+4]
++ | mov RC, [RC]
++ | mov [BASE+RA*8+4], RB
++ | mov [BASE+RA*8], RC
++ |.endif
++ | ins_next
++ |
++ |3: // Call __index metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k
++ | mov RA, L:RB->top
++ | mov [RA-12], PC // [cont|PC]
++ | lea PC, [RA+FRAME_CONT]
++ | sub PC, BASE
++ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
++ | mov NARGS:RD, 2+1 // 2 args for func(t, k).
++ | jmp ->vm_call_dispatch_f
++ |
++ |->vmeta_tgetr:
++ | mov FCARG1, TAB:RB
++ | mov RB, BASE // Save BASE.
++ | mov FCARG2, RC // Caveat: FCARG2 == BASE
++ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
++ | // cTValue * or NULL returned in eax (RC).
++ | movzx RA, PC_RA
++ | mov BASE, RB // Restore BASE.
++ | test RC, RC
++ | jnz ->BC_TGETR_Z
++ | mov dword [BASE+RA*8+4], LJ_TNIL
++ | jmp ->BC_TGETR2_Z
++ |
++ |//-----------------------------------------------------------------------
++ |
++ |->vmeta_tsets:
++ | mov TMP1, RC // RC = GCstr *
++ | mov TMP2, LJ_TSTR
++ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
++ | cmp PC_OP, BC_GSET
++ | jne >1
++ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
++ | mov [RA], TAB:RB // RB = GCtab *
++ | mov dword [RA+4], LJ_TTAB
++ | mov RB, RA
++ | jmp >2
++ |
++ |->vmeta_tsetb:
++ | movzx RC, PC_RC
++ |.if DUALNUM
++ | mov TMP2, LJ_TISNUM
++ | mov TMP1, RC
++ |.else
++ | cvtsi2sd xmm0, RC
++ | movsd TMPQ, xmm0
++ |.endif
++ | lea RCa, TMPQ // Store temp. TValue in TMPQ.
++ | jmp >1
++ |
++ |->vmeta_tsetv:
++ | movzx RC, PC_RC // Reload TValue *k from RC.
++ | lea RC, [BASE+RC*8]
++ |1:
++ | movzx RB, PC_RB // Reload TValue *t from RB.
++ | lea RB, [BASE+RB*8]
++ |2:
++ |.if X64
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
++ | mov CARG2d, RB
++ | mov CARG3, RCa // May be 64 bit ptr to stack.
++ | mov L:RB, L:CARG1d
++ |.else
++ | mov ARG2, RB
++ | mov L:RB, SAVE_L
++ | mov ARG3, RC
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
++ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
++ | mov BASE, L:RB->base
++ | test RC, RC
++ | jz >3
++ | // NOBARRIER: lj_meta_tset ensures the table is not black.
++ | movzx RA, PC_RA
++ |.if X64
++ | mov RBa, [BASE+RA*8]
++ | mov [RC], RBa
++ |.else
++ | mov RB, [BASE+RA*8+4]
++ | mov RA, [BASE+RA*8]
++ | mov [RC+4], RB
++ | mov [RC], RA
++ |.endif
++ |->cont_nop: // BASE = base, (RC = result)
++ | ins_next
++ |
++ |3: // Call __newindex metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
++ | mov RA, L:RB->top
++ | mov [RA-12], PC // [cont|PC]
++ | movzx RC, PC_RA
++ | // Copy value to third argument.
++ |.if X64
++ | mov RBa, [BASE+RC*8]
++ | mov [RA+16], RBa
++ |.else
++ | mov RB, [BASE+RC*8+4]
++ | mov RC, [BASE+RC*8]
++ | mov [RA+20], RB
++ | mov [RA+16], RC
++ |.endif
++ | lea PC, [RA+FRAME_CONT]
++ | sub PC, BASE
++ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
++ | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
++ | jmp ->vm_call_dispatch_f
++ |
++ |->vmeta_tsetr:
++ |.if X64WIN
++ | mov L:CARG1d, SAVE_L
++ | mov CARG3d, RC
++ | mov L:CARG1d->base, BASE
++ | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
++ |.elif X64
++ | mov L:CARG1d, SAVE_L
++ | mov CARG2d, TAB:RB
++ | mov L:CARG1d->base, BASE
++ | mov RB, BASE // Save BASE.
++ | mov CARG3d, RC // Caveat: CARG3d == BASE.
++ |.else
++ | mov L:RA, SAVE_L
++ | mov ARG2, TAB:RB
++ | mov RB, BASE // Save BASE.
++ | mov ARG3, RC
++ | mov ARG1, L:RA
++ | mov L:RA->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
++ | // TValue * returned in eax (RC).
++ | movzx RA, PC_RA
++ | mov BASE, RB // Restore BASE.
++ | jmp ->BC_TSETR_Z
++ |
++ |//-- Comparison metamethods ---------------------------------------------
++ |
++ |->vmeta_comp:
++ |.if X64
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
++ |.if X64WIN
++ | lea CARG3d, [BASE+RD*8]
++ | lea CARG2d, [BASE+RA*8]
++ |.else
++ | lea CARG2d, [BASE+RA*8]
++ | lea CARG3d, [BASE+RD*8]
++ |.endif
++ | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
++ | movzx CARG4d, PC_OP
++ |.else
++ | movzx RB, PC_OP
++ | lea RD, [BASE+RD*8]
++ | lea RA, [BASE+RA*8]
++ | mov ARG4, RB
++ | mov L:RB, SAVE_L
++ | mov ARG3, RD
++ | mov ARG2, RA
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
++ | // 0/1 or TValue * (metamethod) returned in eax (RC).
++ |3:
++ | mov BASE, L:RB->base
++ | cmp RC, 1
++ | ja ->vmeta_binop
++ |4:
++ | lea PC, [PC+4]
++ | jb >6
++ |5:
++ | movzx RD, PC_RD
++ | branchPC RD
++ |6:
++ | ins_next
++ |
++ |->cont_condt: // BASE = base, RC = result
++ | add PC, 4
++ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
++ | jb <5
++ | jmp <6
++ |
++ |->cont_condf: // BASE = base, RC = result
++ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
++ | jmp <4
++ |
++ |->vmeta_equal:
++ | sub PC, 4
++ |.if X64WIN
++ | mov CARG3d, RD
++ | mov CARG4d, RB
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
++ | mov CARG2d, RA
++ | mov CARG1d, L:RB // Caveat: CARG1d == RA.
++ |.elif X64
++ | mov CARG2d, RA
++ | mov CARG4d, RB // Caveat: CARG4d == RA.
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
++ | mov CARG3d, RD
++ | mov CARG1d, L:RB
++ |.else
++ | mov ARG4, RB
++ | mov L:RB, SAVE_L
++ | mov ARG3, RD
++ | mov ARG2, RA
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
++ | // 0/1 or TValue * (metamethod) returned in eax (RC).
++ | jmp <3
++ |
++ |->vmeta_equal_cd:
++ |.if FFI
++ | sub PC, 4
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov FCARG1, L:RB
++ | mov FCARG2, dword [PC-4]
++ | mov SAVE_PC, PC
++ | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
++ | // 0/1 or TValue * (metamethod) returned in eax (RC).
++ | jmp <3
++ |.endif
++ |
++ |->vmeta_istype:
++ |.if X64
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
++ | mov CARG2d, RA
++ | movzx CARG3d, PC_RD
++ | mov L:CARG1d, L:RB
++ |.else
++ | movzx RD, PC_RD
++ | mov ARG2, RA
++ | mov L:RB, SAVE_L
++ | mov ARG3, RD
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
++ | mov BASE, L:RB->base
++ | jmp <6
++ |
++ |//-- Arithmetic metamethods ---------------------------------------------
++ |
++ |->vmeta_arith_vno:
++ |.if DUALNUM
++ | movzx RB, PC_RB
++ |.endif
++ |->vmeta_arith_vn:
++ | lea RC, [KBASE+RC*8]
++ | jmp >1
++ |
++ |->vmeta_arith_nvo:
++ |.if DUALNUM
++ | movzx RC, PC_RC
++ |.endif
++ |->vmeta_arith_nv:
++ | lea RC, [KBASE+RC*8]
++ | lea RB, [BASE+RB*8]
++ | xchg RB, RC
++ | jmp >2
++ |
++ |->vmeta_unm:
++ | lea RC, [BASE+RD*8]
++ | mov RB, RC
++ | jmp >2
++ |
++ |->vmeta_arith_vvo:
++ |.if DUALNUM
++ | movzx RB, PC_RB
++ |.endif
++ |->vmeta_arith_vv:
++ | lea RC, [BASE+RC*8]
++ |1:
++ | lea RB, [BASE+RB*8]
++ |2:
++ | lea RA, [BASE+RA*8]
++ |.if X64WIN
++ | mov CARG3d, RB
++ | mov CARG4d, RC
++ | movzx RC, PC_OP
++ | mov ARG5d, RC
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
++ | mov CARG2d, RA
++ | mov CARG1d, L:RB // Caveat: CARG1d == RA.
++ |.elif X64
++ | movzx CARG5d, PC_OP
++ | mov CARG2d, RA
++ | mov CARG4d, RC // Caveat: CARG4d == RA.
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
++ | mov CARG3d, RB
++ | mov L:RB, L:CARG1d
++ |.else
++ | mov ARG3, RB
++ | mov L:RB, SAVE_L
++ | mov ARG4, RC
++ | movzx RC, PC_OP
++ | mov ARG2, RA
++ | mov ARG5, RC
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
++ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
++ | mov BASE, L:RB->base
++ | test RC, RC
++ | jz ->cont_nop
++ |
++ | // Call metamethod for binary op.
++ |->vmeta_binop:
++ | // BASE = base, RC = new base, stack = cont/func/o1/o2
++ | mov RA, RC
++ | sub RC, BASE
++ | mov [RA-12], PC // [cont|PC]
++ | lea PC, [RC+FRAME_CONT]
++ | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
++ | jmp ->vm_call_dispatch
++ |
++ |->vmeta_len:
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
++ | mov L:FCARG1, L:RB
++ | mov SAVE_PC, PC
++ | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
++ | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
++ | mov BASE, L:RB->base
++#if LJ_52
++ | test RC, RC
++ | jne ->vmeta_binop // Binop call for compatibility.
++ | movzx RD, PC_RD
++ | mov TAB:FCARG1, [BASE+RD*8]
++ | jmp ->BC_LEN_Z
++#else
++ | jmp ->vmeta_binop // Binop call for compatibility.
++#endif
++ |
++ |//-- Call metamethod ----------------------------------------------------
++ |
++ |->vmeta_call_ra:
++ | lea RA, [BASE+RA*8+8]
++ |->vmeta_call: // Resolve and call __call metamethod.
++ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
++ | mov TMP2, RA // Save RA, RC for us.
++ | mov TMP1, NARGS:RD
++ | sub RA, 8
++ |.if X64
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
++ | mov CARG2d, RA
++ | lea CARG3d, [RA+NARGS:RD*8]
++ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
++ |.else
++ | lea RC, [RA+NARGS:RD*8]
++ | mov L:RB, SAVE_L
++ | mov ARG2, RA
++ | mov ARG3, RC
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE // This is the callers base!
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
++ | mov BASE, L:RB->base
++ | mov RA, TMP2
++ | mov NARGS:RD, TMP1
++ | mov LFUNC:RB, [RA-8]
++ | add NARGS:RD, 1
++ | // This is fragile. L->base must not move, KBASE must always be defined.
++ | cmp KBASE, BASE // Continue with CALLT if flag set.
++ | je ->BC_CALLT_Z
++ | mov BASE, RA
++ | ins_call // Otherwise call resolved metamethod.
++ |
++ |//-- Argument coercion for 'for' statement ------------------------------
++ |
++ |->vmeta_for:
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov FCARG2, RA // Caveat: FCARG2 == BASE
++ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
++ | mov SAVE_PC, PC
++ | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
++ | mov BASE, L:RB->base
++ | mov RC, [PC-4]
++ | movzx RA, RCH
++ | movzx OP, RCL
++ | shr RC, 16
++ |.if X64
++ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
++ |.else
++ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Fast functions -----------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |.macro .ffunc, name
++ |->ff_ .. name:
++ |.endmacro
++ |
++ |.macro .ffunc_1, name
++ |->ff_ .. name:
++ | cmp NARGS:RD, 1+1; jb ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_2, name
++ |->ff_ .. name:
++ | cmp NARGS:RD, 2+1; jb ->fff_fallback
++ |.endmacro
++ |
++ |.macro .ffunc_nsse, name, op
++ | .ffunc_1 name
++ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
++ | op xmm0, qword [BASE]
++ |.endmacro
++ |
++ |.macro .ffunc_nsse, name
++ | .ffunc_nsse name, movsd
++ |.endmacro
++ |
++ |.macro .ffunc_nnsse, name
++ | .ffunc_2 name
++ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
++ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
++ | movsd xmm0, qword [BASE]
++ | movsd xmm1, qword [BASE+8]
++ |.endmacro
++ |
++ |.macro .ffunc_nnr, name
++ | .ffunc_2 name
++ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
++ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
++ | fld qword [BASE+8]
++ | fld qword [BASE]
++ |.endmacro
++ |
++ |// Inlined GC threshold check. Caveat: uses label 1.
++ |.macro ffgccheck
++ | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
++ | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
++ | jb >1
++ | call ->fff_gcstep
++ |1:
++ |.endmacro
++ |
++ |//-- Base library: checks -----------------------------------------------
++ |
++ |.ffunc_1 assert
++ | mov RB, [BASE+4]
++ | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
++ | mov PC, [BASE-4]
++ | mov MULTRES, RD
++ | mov [BASE-4], RB
++ | mov RB, [BASE]
++ | mov [BASE-8], RB
++ | sub RD, 2
++ | jz >2
++ | mov RA, BASE
++ |1:
++ | add RA, 8
++ |.if X64
++ | mov RBa, [RA]
++ | mov [RA-8], RBa
++ |.else
++ | mov RB, [RA+4]
++ | mov [RA-4], RB
++ | mov RB, [RA]
++ | mov [RA-8], RB
++ |.endif
++ | sub RD, 1
++ | jnz <1
++ |2:
++ | mov RD, MULTRES
++ | jmp ->fff_res_
++ |
++ |.ffunc_1 type
++ | mov RB, [BASE+4]
++ |.if X64
++ | mov RA, RB
++ | sar RA, 15
++ | cmp RA, -2
++ | je >3
++ |.endif
++ | mov RC, ~LJ_TNUMX
++ | not RB
++ | cmp RC, RB
++ | cmova RC, RB
++ |2:
++ | mov CFUNC:RB, [BASE-8]
++ | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
++ | mov PC, [BASE-4]
++ | mov dword [BASE-4], LJ_TSTR
++ | mov [BASE-8], STR:RC
++ | jmp ->fff_res1
++ |.if X64
++ |3:
++ | mov RC, ~LJ_TLIGHTUD
++ | jmp <2
++ |.endif
++ |
++ |//-- Base library: getters and setters ---------------------------------
++ |
++ |.ffunc_1 getmetatable
++ | mov RB, [BASE+4]
++ | mov PC, [BASE-4]
++ | cmp RB, LJ_TTAB; jne >6
++ |1: // Field metatable must be at same offset for GCtab and GCudata!
++ | mov TAB:RB, [BASE]
++ | mov TAB:RB, TAB:RB->metatable
++ |2:
++ | test TAB:RB, TAB:RB
++ | mov dword [BASE-4], LJ_TNIL
++ | jz ->fff_res1
++ | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
++ | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
++ | mov [BASE-8], TAB:RB
++ | mov RA, TAB:RB->hmask
++ | and RA, STR:RC->hash
++ | imul RA, #NODE
++ | add NODE:RA, TAB:RB->node
++ |3: // Rearranged logic, because we expect _not_ to find the key.
++ | cmp dword NODE:RA->key.it, LJ_TSTR
++ | jne >4
++ | cmp dword NODE:RA->key.gcr, STR:RC
++ | je >5
++ |4:
++ | mov NODE:RA, NODE:RA->next
++ | test NODE:RA, NODE:RA
++ | jnz <3
++ | jmp ->fff_res1 // Not found, keep default result.
++ |5:
++ | mov RB, [RA+4]
++ | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
++ | mov RC, [RA]
++ | mov [BASE-4], RB // Return value of mt.__metatable.
++ | mov [BASE-8], RC
++ | jmp ->fff_res1
++ |
++ |6:
++ | cmp RB, LJ_TUDATA; je <1
++ |.if X64
++ | cmp RB, LJ_TNUMX; ja >8
++ | cmp RB, LJ_TISNUM; jbe >7
++ | mov RB, LJ_TLIGHTUD
++ | jmp >8
++ |7:
++ |.else
++ | cmp RB, LJ_TISNUM; ja >8
++ |.endif
++ | mov RB, LJ_TNUMX
++ |8:
++ | not RB
++ | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
++ | jmp <2
++ |
++ |.ffunc_2 setmetatable
++ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
++ | // Fast path: no mt for table yet and not clearing the mt.
++ | mov TAB:RB, [BASE]
++ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
++ | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
++ | mov TAB:RC, [BASE+8]
++ | mov TAB:RB->metatable, TAB:RC
++ | mov PC, [BASE-4]
++ | mov dword [BASE-4], LJ_TTAB // Return original table.
++ | mov [BASE-8], TAB:RB
++ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
++ | jz >1
++ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
++ | barrierback TAB:RB, RC
++ |1:
++ | jmp ->fff_res1
++ |
++ |.ffunc_2 rawget
++ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
++ |.if X64WIN
++ | mov RB, BASE // Save BASE.
++ | lea CARG3d, [BASE+8]
++ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
++ | mov CARG1d, SAVE_L
++ |.elif X64
++ | mov RB, BASE // Save BASE.
++ | mov CARG2d, [BASE]
++ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
++ | mov CARG1d, SAVE_L
++ |.else
++ | mov TAB:RD, [BASE]
++ | mov L:RB, SAVE_L
++ | mov ARG2, TAB:RD
++ | mov ARG1, L:RB
++ | mov RB, BASE // Save BASE.
++ | add BASE, 8
++ | mov ARG3, BASE
++ |.endif
++ | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
++ | // cTValue * returned in eax (RD).
++ | mov BASE, RB // Restore BASE.
++ | // Copy table slot.
++ |.if X64
++ | mov RBa, [RD]
++ | mov PC, [BASE-4]
++ | mov [BASE-8], RBa
++ |.else
++ | mov RB, [RD]
++ | mov RD, [RD+4]
++ | mov PC, [BASE-4]
++ | mov [BASE-8], RB
++ | mov [BASE-4], RD
++ |.endif
++ | jmp ->fff_res1
++ |
++ |//-- Base library: conversions ------------------------------------------
++ |
++ |.ffunc tonumber
++ | // Only handles the number case inline (without a base argument).
++ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
++ | cmp dword [BASE+4], LJ_TISNUM
++ |.if DUALNUM
++ | jne >1
++ | mov RB, dword [BASE]; jmp ->fff_resi
++ |1:
++ | ja ->fff_fallback
++ |.else
++ | jae ->fff_fallback
++ |.endif
++ | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
++ |
++ |.ffunc_1 tostring
++ | // Only handles the string or number case inline.
++ | mov PC, [BASE-4]
++ | cmp dword [BASE+4], LJ_TSTR; jne >3
++ | // A __tostring method in the string base metatable is ignored.
++ | mov STR:RD, [BASE]
++ |2:
++ | mov dword [BASE-4], LJ_TSTR
++ | mov [BASE-8], STR:RD
++ | jmp ->fff_res1
++ |3: // Handle numbers inline, unless a number base metatable is present.
++ | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
++ | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
++ | jne ->fff_fallback
++ | ffgccheck // Caveat: uses label 1.
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Add frame since C call can throw.
++ | mov SAVE_PC, PC // Redundant (but a defined value).
++ |.if X64 and not X64WIN
++ | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
++ |.endif
++ | mov L:FCARG1, L:RB
++ |.if DUALNUM
++ | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
++ |.else
++ | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
++ |.endif
++ | // GCstr returned in eax (RD).
++ | mov BASE, L:RB->base
++ | jmp <2
++ |
++ |//-- Base library: iterators -------------------------------------------
++ |
++ |.ffunc_1 next
++ | je >2 // Missing 2nd arg?
++ |1:
++ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Add frame since C call can throw.
++ | mov L:RB->top, BASE // Dummy frame length is ok.
++ | mov PC, [BASE-4]
++ |.if X64WIN
++ | lea CARG3d, [BASE+8]
++ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
++ | mov CARG1d, L:RB
++ |.elif X64
++ | mov CARG2d, [BASE]
++ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
++ | mov CARG1d, L:RB
++ |.else
++ | mov TAB:RD, [BASE]
++ | mov ARG2, TAB:RD
++ | mov ARG1, L:RB
++ | add BASE, 8
++ | mov ARG3, BASE
++ |.endif
++ | mov SAVE_PC, PC // Needed for ITERN fallback.
++ | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
++ | // Flag returned in eax (RD).
++ | mov BASE, L:RB->base
++ | test RD, RD; jz >3 // End of traversal?
++ | // Copy key and value to results.
++ |.if X64
++ | mov RBa, [BASE+8]
++ | mov RDa, [BASE+16]
++ | mov [BASE-8], RBa
++ | mov [BASE], RDa
++ |.else
++ | mov RB, [BASE+8]
++ | mov RD, [BASE+12]
++ | mov [BASE-8], RB
++ | mov [BASE-4], RD
++ | mov RB, [BASE+16]
++ | mov RD, [BASE+20]
++ | mov [BASE], RB
++ | mov [BASE+4], RD
++ |.endif
++ |->fff_res2:
++ | mov RD, 1+2
++ | jmp ->fff_res
++ |2: // Set missing 2nd arg to nil.
++ | mov dword [BASE+12], LJ_TNIL
++ | jmp <1
++ |3: // End of traversal: return nil.
++ | mov dword [BASE-4], LJ_TNIL
++ | jmp ->fff_res1
++ |
++ |.ffunc_1 pairs
++ | mov TAB:RB, [BASE]
++ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
++#if LJ_52
++ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
++#endif
++ | mov CFUNC:RB, [BASE-8]
++ | mov CFUNC:RD, CFUNC:RB->upvalue[0]
++ | mov PC, [BASE-4]
++ | mov dword [BASE-4], LJ_TFUNC
++ | mov [BASE-8], CFUNC:RD
++ | mov dword [BASE+12], LJ_TNIL
++ | mov RD, 1+3
++ | jmp ->fff_res
++ |
++ |.ffunc_2 ipairs_aux
++ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
++ | cmp dword [BASE+12], LJ_TISNUM
++ |.if DUALNUM
++ | jne ->fff_fallback
++ |.else
++ | jae ->fff_fallback
++ |.endif
++ | mov PC, [BASE-4]
++ |.if DUALNUM
++ | mov RD, dword [BASE+8]
++ | add RD, 1
++ | mov dword [BASE-4], LJ_TISNUM
++ | mov dword [BASE-8], RD
++ |.else
++ | movsd xmm0, qword [BASE+8]
++ | sseconst_1 xmm1, RBa
++ | addsd xmm0, xmm1
++ | cvttsd2si RD, xmm0
++ | movsd qword [BASE-8], xmm0
++ |.endif
++ | mov TAB:RB, [BASE]
++ | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
++ | shl RD, 3
++ | add RD, TAB:RB->array
++ |1:
++ | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
++ | // Copy array slot.
++ |.if X64
++ | mov RBa, [RD]
++ | mov [BASE], RBa
++ |.else
++ | mov RB, [RD]
++ | mov RD, [RD+4]
++ | mov [BASE], RB
++ | mov [BASE+4], RD
++ |.endif
++ | jmp ->fff_res2
++ |2: // Check for empty hash part first. Otherwise call C function.
++ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
++ | mov FCARG1, TAB:RB
++ | mov RB, BASE // Save BASE.
++ | mov FCARG2, RD // Caveat: FCARG2 == BASE
++ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
++ | // cTValue * or NULL returned in eax (RD).
++ | mov BASE, RB
++ | test RD, RD
++ | jnz <1
++ |->fff_res0:
++ | mov RD, 1+0
++ | jmp ->fff_res
++ |
++ |.ffunc_1 ipairs
++ | mov TAB:RB, [BASE]
++ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
++#if LJ_52
++ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
++#endif
++ | mov CFUNC:RB, [BASE-8]
++ | mov CFUNC:RD, CFUNC:RB->upvalue[0]
++ | mov PC, [BASE-4]
++ | mov dword [BASE-4], LJ_TFUNC
++ | mov [BASE-8], CFUNC:RD
++ |.if DUALNUM
++ | mov dword [BASE+12], LJ_TISNUM
++ | mov dword [BASE+8], 0
++ |.else
++ | xorps xmm0, xmm0
++ | movsd qword [BASE+8], xmm0
++ |.endif
++ | mov RD, 1+3
++ | jmp ->fff_res
++ |
++ |//-- Base library: catch errors ----------------------------------------
++ |
++ |.ffunc_1 pcall
++ | lea RA, [BASE+8]
++ | sub NARGS:RD, 1
++ | mov PC, 8+FRAME_PCALL
++ |1:
++ | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
++ | shr RB, HOOK_ACTIVE_SHIFT
++ | and RB, 1
++ | add PC, RB // Remember active hook before pcall.
++ | jmp ->vm_call_dispatch
++ |
++ |.ffunc_2 xpcall
++ | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
++ | mov RB, [BASE+4] // Swap function and traceback.
++ | mov [BASE+12], RB
++ | mov dword [BASE+4], LJ_TFUNC
++ | mov LFUNC:RB, [BASE]
++ | mov PC, [BASE+8]
++ | mov [BASE+8], LFUNC:RB
++ | mov [BASE], PC
++ | lea RA, [BASE+16]
++ | sub NARGS:RD, 2
++ | mov PC, 16+FRAME_PCALL
++ | jmp <1
++ |
++ |//-- Coroutine library --------------------------------------------------
++ |
++ |.macro coroutine_resume_wrap, resume
++ |.if resume
++ |.ffunc_1 coroutine_resume
++ | mov L:RB, [BASE]
++ |.else
++ |.ffunc coroutine_wrap_aux
++ | mov CFUNC:RB, [BASE-8]
++ | mov L:RB, CFUNC:RB->upvalue[0].gcr
++ |.endif
++ | mov PC, [BASE-4]
++ | mov SAVE_PC, PC
++ |.if X64
++ | mov TMP1, L:RB
++ |.else
++ | mov ARG1, L:RB
++ |.endif
++ |.if resume
++ | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
++ |.endif
++ | cmp aword L:RB->cframe, 0; jne ->fff_fallback
++ | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
++ | mov RA, L:RB->top
++ | je >1 // Status != LUA_YIELD (i.e. 0)?
++ | cmp RA, L:RB->base // Check for presence of initial func.
++ | je ->fff_fallback
++ |1:
++ |.if resume
++ | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
++ |.else
++ | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
++ |.endif
++ | cmp PC, L:RB->maxstack; ja ->fff_fallback
++ | mov L:RB->top, PC
++ |
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ |.if resume
++ | add BASE, 8 // Keep resumed thread in stack for GC.
++ |.endif
++ | mov L:RB->top, BASE
++ |.if resume
++ | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
++ |.else
++ | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
++ |.endif
++ | sub RBa, PCa // Relative to PC.
++ |
++ | cmp PC, RA
++ | je >3
++ |2: // Move args to coroutine.
++ |.if X64
++ | mov RCa, [PC+RB]
++ | mov [PC-8], RCa
++ |.else
++ | mov RC, [PC+RB+4]
++ | mov [PC-4], RC
++ | mov RC, [PC+RB]
++ | mov [PC-8], RC
++ |.endif
++ | sub PC, 8
++ | cmp PC, RA
++ | jne <2
++ |3:
++ |.if X64
++ | mov CARG2d, RA
++ | mov CARG1d, TMP1
++ |.else
++ | mov ARG2, RA
++ | xor RA, RA
++ | mov ARG4, RA
++ | mov ARG3, RA
++ |.endif
++ | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
++ |
++ | mov L:RB, SAVE_L
++ |.if X64
++ | mov L:PC, TMP1
++ |.else
++ | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
++ |.endif
++ | mov BASE, L:RB->base
++ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
++ | set_vmstate INTERP
++ |
++ | cmp eax, LUA_YIELD
++ | ja >8
++ |4:
++ | mov RA, L:PC->base
++ | mov KBASE, L:PC->top
++ | mov L:PC->top, RA // Clear coroutine stack.
++ | mov PC, KBASE
++ | sub PC, RA
++ | je >6 // No results?
++ | lea RD, [BASE+PC]
++ | shr PC, 3
++ | cmp RD, L:RB->maxstack
++ | ja >9 // Need to grow stack?
++ |
++ | mov RB, BASE
++ | sub RBa, RAa
++ |5: // Move results from coroutine.
++ |.if X64
++ | mov RDa, [RA]
++ | mov [RA+RB], RDa
++ |.else
++ | mov RD, [RA]
++ | mov [RA+RB], RD
++ | mov RD, [RA+4]
++ | mov [RA+RB+4], RD
++ |.endif
++ | add RA, 8
++ | cmp RA, KBASE
++ | jne <5
++ |6:
++ |.if resume
++ | lea RD, [PC+2] // nresults+1 = 1 + true + results.
++ | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
++ |.else
++ | lea RD, [PC+1] // nresults+1 = 1 + results.
++ |.endif
++ |7:
++ | mov PC, SAVE_PC
++ | mov MULTRES, RD
++ |.if resume
++ | mov RAa, -8
++ |.else
++ | xor RA, RA
++ |.endif
++ | test PC, FRAME_TYPE
++ | jz ->BC_RET_Z
++ | jmp ->vm_return
++ |
++ |8: // Coroutine returned with error (at co->top-1).
++ |.if resume
++ | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
++ | mov RA, L:PC->top
++ | sub RA, 8
++ | mov L:PC->top, RA // Clear error from coroutine stack.
++ | // Copy error message.
++ |.if X64
++ | mov RDa, [RA]
++ | mov [BASE], RDa
++ |.else
++ | mov RD, [RA]
++ | mov [BASE], RD
++ | mov RD, [RA+4]
++ | mov [BASE+4], RD
++ |.endif
++ | mov RD, 1+2 // nresults+1 = 1 + false + error.
++ | jmp <7
++ |.else
++ | mov FCARG2, L:PC
++ | mov FCARG1, L:RB
++ | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
++ | // Error function does not return.
++ |.endif
++ |
++ |9: // Handle stack expansion on return from yield.
++ |.if X64
++ | mov L:RA, TMP1
++ |.else
++ | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
++ |.endif
++ | mov L:RA->top, KBASE // Undo coroutine stack clearing.
++ | mov FCARG2, PC
++ | mov FCARG1, L:RB
++ | call extern lj_state_growstack@8 // (lua_State *L, int n)
++ |.if X64
++ | mov L:PC, TMP1
++ |.else
++ | mov L:PC, ARG1
++ |.endif
++ | mov BASE, L:RB->base
++ | jmp <4 // Retry the stack move.
++ |.endmacro
++ |
++ | coroutine_resume_wrap 1 // coroutine.resume
++ | coroutine_resume_wrap 0 // coroutine.wrap
++ |
++ |.ffunc coroutine_yield
++ | mov L:RB, SAVE_L
++ | test aword L:RB->cframe, CFRAME_RESUME
++ | jz ->fff_fallback
++ | mov L:RB->base, BASE
++ | lea RD, [BASE+NARGS:RD*8-8]
++ | mov L:RB->top, RD
++ | xor RD, RD
++ | mov aword L:RB->cframe, RDa
++ | mov al, LUA_YIELD
++ | mov byte L:RB->status, al
++ | jmp ->vm_leave_unw
++ |
++ |//-- Math library -------------------------------------------------------
++ |
++ |.if not DUALNUM
++ |->fff_resi: // Dummy.
++ |.endif
++ |
++ |->fff_resn:
++ | mov PC, [BASE-4]
++ | fstp qword [BASE-8]
++ | jmp ->fff_res1
++ |
++ | .ffunc_1 math_abs
++ |.if DUALNUM
++ | cmp dword [BASE+4], LJ_TISNUM; jne >2
++ | mov RB, dword [BASE]
++ | cmp RB, 0; jns ->fff_resi
++ | neg RB; js >1
++ |->fff_resbit:
++ |->fff_resi:
++ | mov PC, [BASE-4]
++ | mov dword [BASE-4], LJ_TISNUM
++ | mov dword [BASE-8], RB
++ | jmp ->fff_res1
++ |1:
++ | mov PC, [BASE-4]
++ | mov dword [BASE-4], 0x41e00000 // 2^31.
++ | mov dword [BASE-8], 0
++ | jmp ->fff_res1
++ |2:
++ | ja ->fff_fallback
++ |.else
++ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
++ |.endif
++ | movsd xmm0, qword [BASE]
++ | sseconst_abs xmm1, RDa
++ | andps xmm0, xmm1
++ |->fff_resxmm0:
++ | mov PC, [BASE-4]
++ | movsd qword [BASE-8], xmm0
++ | // fallthrough
++ |
++ |->fff_res1:
++ | mov RD, 1+1
++ |->fff_res:
++ | mov MULTRES, RD
++ |->fff_res_:
++ | test PC, FRAME_TYPE
++ | jnz >7
++ |5:
++ | cmp PC_RB, RDL // More results expected?
++ | ja >6
++ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
++ | movzx RA, PC_RA
++ | not RAa // Note: ~RA = -(RA+1)
++ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
++ | ins_next
++ |
++ |6: // Fill up results with nil.
++ | mov dword [BASE+RD*8-12], LJ_TNIL
++ | add RD, 1
++ | jmp <5
++ |
++ |7: // Non-standard return case.
++ | mov RAa, -8 // Results start at BASE+RA = BASE-8.
++ | jmp ->vm_return
++ |
++ |.if X64
++ |.define fff_resfp, fff_resxmm0
++ |.else
++ |.define fff_resfp, fff_resn
++ |.endif
++ |
++ |.macro math_round, func
++ | .ffunc math_ .. func
++ |.if DUALNUM
++ | cmp dword [BASE+4], LJ_TISNUM; jne >1
++ | mov RB, dword [BASE]; jmp ->fff_resi
++ |1:
++ | ja ->fff_fallback
++ |.else
++ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
++ |.endif
++ | movsd xmm0, qword [BASE]
++ | call ->vm_ .. func .. _sse
++ |.if DUALNUM
++ | cvttsd2si RB, xmm0
++ | cmp RB, 0x80000000
++ | jne ->fff_resi
++ | cvtsi2sd xmm1, RB
++ | ucomisd xmm0, xmm1
++ | jp ->fff_resxmm0
++ | je ->fff_resi
++ |.endif
++ | jmp ->fff_resxmm0
++ |.endmacro
++ |
++ | math_round floor
++ | math_round ceil
++ |
++ |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
++ |
++ |.ffunc math_log
++ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
++ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
++ | movsd xmm0, qword [BASE]
++ |.if not X64
++ | movsd FPARG1, xmm0
++ |.endif
++ | mov RB, BASE
++ | call extern log
++ | mov BASE, RB
++ | jmp ->fff_resfp
++ |
++ |.macro math_extern, func
++ | .ffunc_nsse math_ .. func
++ |.if not X64
++ | movsd FPARG1, xmm0
++ |.endif
++ | mov RB, BASE
++ | call extern func
++ | mov BASE, RB
++ | jmp ->fff_resfp
++ |.endmacro
++ |
++ |.macro math_extern2, func
++ | .ffunc_nnsse math_ .. func
++ |.if not X64
++ | movsd FPARG1, xmm0
++ | movsd FPARG3, xmm1
++ |.endif
++ | mov RB, BASE
++ | call extern func
++ | mov BASE, RB
++ | jmp ->fff_resfp
++ |.endmacro
++ |
++ | math_extern log10
++ | math_extern exp
++ | math_extern sin
++ | math_extern cos
++ | math_extern tan
++ | math_extern asin
++ | math_extern acos
++ | math_extern atan
++ | math_extern sinh
++ | math_extern cosh
++ | math_extern tanh
++ | math_extern2 pow
++ | math_extern2 atan2
++ | math_extern2 fmod
++ |
++ |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
++ |
++ |.ffunc_1 math_frexp
++ | mov RB, [BASE+4]
++ | cmp RB, LJ_TISNUM; jae ->fff_fallback
++ | mov PC, [BASE-4]
++ | mov RC, [BASE]
++ | mov [BASE-4], RB; mov [BASE-8], RC
++ | shl RB, 1; cmp RB, 0xffe00000; jae >3
++ | or RC, RB; jz >3
++ | mov RC, 1022
++ | cmp RB, 0x00200000; jb >4
++ |1:
++ | shr RB, 21; sub RB, RC // Extract and unbias exponent.
++ | cvtsi2sd xmm0, RB
++ | mov RB, [BASE-4]
++ | and RB, 0x800fffff // Mask off exponent.
++ | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
++ | mov [BASE-4], RB
++ |2:
++ | movsd qword [BASE], xmm0
++ | mov RD, 1+2
++ | jmp ->fff_res
++ |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
++ | xorps xmm0, xmm0; jmp <2
++ |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
++ | movsd xmm0, qword [BASE]
++ | sseconst_hi xmm1, RBa, 43500000 // 2^54.
++ | mulsd xmm0, xmm1
++ | movsd qword [BASE-8], xmm0
++ | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
++ |
++ |.ffunc_nsse math_modf
++ | mov RB, [BASE+4]
++ | mov PC, [BASE-4]
++ | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
++ | movaps xmm4, xmm0
++ | call ->vm_trunc_sse
++ | subsd xmm4, xmm0
++ |1:
++ | movsd qword [BASE-8], xmm0
++ | movsd qword [BASE], xmm4
++ | mov RC, [BASE-4]; mov RB, [BASE+4]
++ | xor RC, RB; js >3 // Need to adjust sign?
++ |2:
++ | mov RD, 1+2
++ | jmp ->fff_res
++ |3:
++ | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
++ | jmp <2
++ |4:
++ | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
++ |
++ |.macro math_minmax, name, cmovop, sseop
++ | .ffunc name
++ | mov RA, 2
++ | cmp dword [BASE+4], LJ_TISNUM
++ |.if DUALNUM
++ | jne >4
++ | mov RB, dword [BASE]
++ |1: // Handle integers.
++ | cmp RA, RD; jae ->fff_resi
++ | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
++ | cmp RB, dword [BASE+RA*8-8]
++ | cmovop RB, dword [BASE+RA*8-8]
++ | add RA, 1
++ | jmp <1
++ |3:
++ | ja ->fff_fallback
++ | // Convert intermediate result to number and continue below.
++ | cvtsi2sd xmm0, RB
++ | jmp >6
++ |4:
++ | ja ->fff_fallback
++ |.else
++ | jae ->fff_fallback
++ |.endif
++ |
++ | movsd xmm0, qword [BASE]
++ |5: // Handle numbers or integers.
++ | cmp RA, RD; jae ->fff_resxmm0
++ | cmp dword [BASE+RA*8-4], LJ_TISNUM
++ |.if DUALNUM
++ | jb >6
++ | ja ->fff_fallback
++ | cvtsi2sd xmm1, dword [BASE+RA*8-8]
++ | jmp >7
++ |.else
++ | jae ->fff_fallback
++ |.endif
++ |6:
++ | movsd xmm1, qword [BASE+RA*8-8]
++ |7:
++ | sseop xmm0, xmm1
++ | add RA, 1
++ | jmp <5
++ |.endmacro
++ |
++ | math_minmax math_min, cmovg, minsd
++ | math_minmax math_max, cmovl, maxsd
++ |
++ |//-- String library -----------------------------------------------------
++ |
++ |.ffunc string_byte // Only handle the 1-arg case here.
++ | cmp NARGS:RD, 1+1; jne ->fff_fallback
++ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
++ | mov STR:RB, [BASE]
++ | mov PC, [BASE-4]
++ | cmp dword STR:RB->len, 1
++ | jb ->fff_res0 // Return no results for empty string.
++ | movzx RB, byte STR:RB[1]
++ |.if DUALNUM
++ | jmp ->fff_resi
++ |.else
++ | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
++ |.endif
++ |
++ |.ffunc string_char // Only handle the 1-arg case here.
++ | ffgccheck
++ | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
++ | cmp dword [BASE+4], LJ_TISNUM
++ |.if DUALNUM
++ | jne ->fff_fallback
++ | mov RB, dword [BASE]
++ | cmp RB, 255; ja ->fff_fallback
++ | mov TMP2, RB
++ |.else
++ | jae ->fff_fallback
++ | cvttsd2si RB, qword [BASE]
++ | cmp RB, 255; ja ->fff_fallback
++ | mov TMP2, RB
++ |.endif
++ |.if X64
++ | mov TMP3, 1
++ |.else
++ | mov ARG3, 1
++ |.endif
++ | lea RDa, TMP2 // Points to stack. Little-endian.
++ |->fff_newstr:
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ |.if X64
++ | mov CARG3d, TMP3 // Zero-extended to size_t.
++ | mov CARG2, RDa // May be 64 bit ptr to stack.
++ | mov CARG1d, L:RB
++ |.else
++ | mov ARG2, RD
++ | mov ARG1, L:RB
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_str_new // (lua_State *L, char *str, size_t l)
++ |->fff_resstr:
++ | // GCstr * returned in eax (RD).
++ | mov BASE, L:RB->base
++ | mov PC, [BASE-4]
++ | mov dword [BASE-4], LJ_TSTR
++ | mov [BASE-8], STR:RD
++ | jmp ->fff_res1
++ |
++ |.ffunc string_sub
++ | ffgccheck
++ | mov TMP2, -1
++ | cmp NARGS:RD, 1+2; jb ->fff_fallback
++ | jna >1
++ | cmp dword [BASE+20], LJ_TISNUM
++ |.if DUALNUM
++ | jne ->fff_fallback
++ | mov RB, dword [BASE+16]
++ | mov TMP2, RB
++ |.else
++ | jae ->fff_fallback
++ | cvttsd2si RB, qword [BASE+16]
++ | mov TMP2, RB
++ |.endif
++ |1:
++ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
++ | cmp dword [BASE+12], LJ_TISNUM
++ |.if DUALNUM
++ | jne ->fff_fallback
++ |.else
++ | jae ->fff_fallback
++ |.endif
++ | mov STR:RB, [BASE]
++ | mov TMP3, STR:RB
++ | mov RB, STR:RB->len
++ |.if DUALNUM
++ | mov RA, dword [BASE+8]
++ |.else
++ | cvttsd2si RA, qword [BASE+8]
++ |.endif
++ | mov RC, TMP2
++ | cmp RB, RC // len < end? (unsigned compare)
++ | jb >5
++ |2:
++ | test RA, RA // start <= 0?
++ | jle >7
++ |3:
++ | mov STR:RB, TMP3
++ | sub RC, RA // start > end?
++ | jl ->fff_emptystr
++ | lea RB, [STR:RB+RA+#STR-1]
++ | add RC, 1
++ |4:
++ |.if X64
++ | mov TMP3, RC
++ |.else
++ | mov ARG3, RC
++ |.endif
++ | mov RD, RB
++ | jmp ->fff_newstr
++ |
++ |5: // Negative end or overflow.
++ | jl >6
++ | lea RC, [RC+RB+1] // end = end+(len+1)
++ | jmp <2
++ |6: // Overflow.
++ | mov RC, RB // end = len
++ | jmp <2
++ |
++ |7: // Negative start or underflow.
++ | je >8
++ | add RA, RB // start = start+(len+1)
++ | add RA, 1
++ | jg <3 // start > 0?
++ |8: // Underflow.
++ | mov RA, 1 // start = 1
++ | jmp <3
++ |
++ |->fff_emptystr: // Range underflow.
++ | xor RC, RC // Zero length. Any ptr in RB is ok.
++ | jmp <4
++ |
++ |.macro ffstring_op, name
++ | .ffunc_1 string_ .. name
++ | ffgccheck
++ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
++ | mov L:RB, SAVE_L
++ | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
++ | mov L:RB->base, BASE
++ | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
++ | mov RC, SBUF:FCARG1->b
++ | mov SBUF:FCARG1->L, L:RB
++ | mov SBUF:FCARG1->p, RC
++ | mov SAVE_PC, PC
++ | call extern lj_buf_putstr_ .. name .. @8
++ | mov FCARG1, eax
++ | call extern lj_buf_tostr@4
++ | jmp ->fff_resstr
++ |.endmacro
++ |
++ |ffstring_op reverse
++ |ffstring_op lower
++ |ffstring_op upper
++ |
++ |//-- Bit library --------------------------------------------------------
++ |
++ |.macro .ffunc_bit, name, kind, fdef
++ | fdef name
++ |.if kind == 2
++ | sseconst_tobit xmm1, RBa
++ |.endif
++ | cmp dword [BASE+4], LJ_TISNUM
++ |.if DUALNUM
++ | jne >1
++ | mov RB, dword [BASE]
++ |.if kind > 0
++ | jmp >2
++ |.else
++ | jmp ->fff_resbit
++ |.endif
++ |1:
++ | ja ->fff_fallback
++ |.else
++ | jae ->fff_fallback
++ |.endif
++ | movsd xmm0, qword [BASE]
++ |.if kind < 2
++ | sseconst_tobit xmm1, RBa
++ |.endif
++ | addsd xmm0, xmm1
++ | movd RB, xmm0
++ |2:
++ |.endmacro
++ |
++ |.macro .ffunc_bit, name, kind
++ | .ffunc_bit name, kind, .ffunc_1
++ |.endmacro
++ |
++ |.ffunc_bit bit_tobit, 0
++ | jmp ->fff_resbit
++ |
++ |.macro .ffunc_bit_op, name, ins
++ | .ffunc_bit name, 2
++ | mov TMP2, NARGS:RD // Save for fallback.
++ | lea RD, [BASE+NARGS:RD*8-16]
++ |1:
++ | cmp RD, BASE
++ | jbe ->fff_resbit
++ | cmp dword [RD+4], LJ_TISNUM
++ |.if DUALNUM
++ | jne >2
++ | ins RB, dword [RD]
++ | sub RD, 8
++ | jmp <1
++ |2:
++ | ja ->fff_fallback_bit_op
++ |.else
++ | jae ->fff_fallback_bit_op
++ |.endif
++ | movsd xmm0, qword [RD]
++ | addsd xmm0, xmm1
++ | movd RA, xmm0
++ | ins RB, RA
++ | sub RD, 8
++ | jmp <1
++ |.endmacro
++ |
++ |.ffunc_bit_op bit_band, and
++ |.ffunc_bit_op bit_bor, or
++ |.ffunc_bit_op bit_bxor, xor
++ |
++ |.ffunc_bit bit_bswap, 1
++ | bswap RB
++ | jmp ->fff_resbit
++ |
++ |.ffunc_bit bit_bnot, 1
++ | not RB
++ |.if DUALNUM
++ | jmp ->fff_resbit
++ |.else
++ |->fff_resbit:
++ | cvtsi2sd xmm0, RB
++ | jmp ->fff_resxmm0
++ |.endif
++ |
++ |->fff_fallback_bit_op:
++ | mov NARGS:RD, TMP2 // Restore for fallback
++ | jmp ->fff_fallback
++ |
++ |.macro .ffunc_bit_sh, name, ins
++ |.if DUALNUM
++ | .ffunc_bit name, 1, .ffunc_2
++ | // Note: no inline conversion from number for 2nd argument!
++ | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
++ | mov RA, dword [BASE+8]
++ |.else
++ | .ffunc_nnsse name
++ | sseconst_tobit xmm2, RBa
++ | addsd xmm0, xmm2
++ | addsd xmm1, xmm2
++ | movd RB, xmm0
++ | movd RA, xmm1
++ |.endif
++ | ins RB, cl // Assumes RA is ecx.
++ | jmp ->fff_resbit
++ |.endmacro
++ |
++ |.ffunc_bit_sh bit_lshift, shl
++ |.ffunc_bit_sh bit_rshift, shr
++ |.ffunc_bit_sh bit_arshift, sar
++ |.ffunc_bit_sh bit_rol, rol
++ |.ffunc_bit_sh bit_ror, ror
++ |
++ |//-----------------------------------------------------------------------
++ |
++ |->fff_fallback_2:
++ | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
++ | jmp ->fff_fallback
++ |->fff_fallback_1:
++ | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
++ |->fff_fallback: // Call fast function fallback handler.
++ | // BASE = new base, RD = nargs+1
++ | mov L:RB, SAVE_L
++ | mov PC, [BASE-4] // Fallback may overwrite PC.
++ | mov SAVE_PC, PC // Redundant (but a defined value).
++ | mov L:RB->base, BASE
++ | lea RD, [BASE+NARGS:RD*8-8]
++ | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
++ | mov L:RB->top, RD
++ | mov CFUNC:RD, [BASE-8]
++ | cmp RA, L:RB->maxstack
++ | ja >5 // Need to grow stack.
++ |.if X64
++ | mov CARG1d, L:RB
++ |.else
++ | mov ARG1, L:RB
++ |.endif
++ | call aword CFUNC:RD->f // (lua_State *L)
++ | mov BASE, L:RB->base
++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
++ | test RD, RD; jg ->fff_res // Returned nresults+1?
++ |1:
++ | mov RA, L:RB->top
++ | sub RA, BASE
++ | shr RA, 3
++ | test RD, RD
++ | lea NARGS:RD, [RA+1]
++ | mov LFUNC:RB, [BASE-8]
++ | jne ->vm_call_tail // Returned -1?
++ | ins_callt // Returned 0: retry fast path.
++ |
++ |// Reconstruct previous base for vmeta_call during tailcall.
++ |->vm_call_tail:
++ | mov RA, BASE
++ | test PC, FRAME_TYPE
++ | jnz >3
++ | movzx RB, PC_RA
++ | not RBa // Note: ~RB = -(RB+1)
++ | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
++ | jmp ->vm_call_dispatch // Resolve again for tailcall.
++ |3:
++ | mov RB, PC
++ | and RB, -8
++ | sub BASE, RB
++ | jmp ->vm_call_dispatch // Resolve again for tailcall.
++ |
++ |5: // Grow stack for fallback handler.
++ | mov FCARG2, LUA_MINSTACK
++ | mov FCARG1, L:RB
++ | call extern lj_state_growstack@8 // (lua_State *L, int n)
++ | mov BASE, L:RB->base
++ | xor RD, RD // Simulate a return 0.
++ | jmp <1 // Dumb retry (goes through ff first).
++ |
++ |->fff_gcstep: // Call GC step function.
++ | // BASE = new base, RD = nargs+1
++ | pop RBa // Must keep stack at same level.
++ | mov TMPa, RBa // Save return address
++ | mov L:RB, SAVE_L
++ | mov SAVE_PC, PC // Redundant (but a defined value).
++ | mov L:RB->base, BASE
++ | lea RD, [BASE+NARGS:RD*8-8]
++ | mov FCARG1, L:RB
++ | mov L:RB->top, RD
++ | call extern lj_gc_step@4 // (lua_State *L)
++ | mov BASE, L:RB->base
++ | mov RD, L:RB->top
++ | sub RD, BASE
++ | shr RD, 3
++ | add NARGS:RD, 1
++ | mov RBa, TMPa
++ | push RBa // Restore return address.
++ | ret
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Special dispatch targets -------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->vm_record: // Dispatch target for recording phase.
++ |.if JIT
++ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
++ | test RDL, HOOK_VMEVENT // No recording while in vmevent.
++ | jnz >5
++ | // Decrement the hookcount for consistency, but always do the call.
++ | test RDL, HOOK_ACTIVE
++ | jnz >1
++ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
++ | jz >1
++ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
++ | jmp >1
++ |.endif
++ |
++ |->vm_rethook: // Dispatch target for return hooks.
++ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
++ | test RDL, HOOK_ACTIVE // Hook already active?
++ | jnz >5
++ | jmp >1
++ |
++ |->vm_inshook: // Dispatch target for instr/line hooks.
++ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
++ | test RDL, HOOK_ACTIVE // Hook already active?
++ | jnz >5
++ |
++ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
++ | jz >5
++ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
++ | jz >1
++ | test RDL, LUA_MASKLINE
++ | jz >5
++ |1:
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov FCARG2, PC // Caveat: FCARG2 == BASE
++ | mov FCARG1, L:RB
++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
++ | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
++ |3:
++ | mov BASE, L:RB->base
++ |4:
++ | movzx RA, PC_RA
++ |5:
++ | movzx OP, PC_OP
++ | movzx RD, PC_RD
++ |.if X64
++ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
++ |.else
++ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
++ |.endif
++ |
++ |->cont_hook: // Continue from hook yield.
++ | add PC, 4
++ | mov RA, [RB-24]
++ | mov MULTRES, RA // Restore MULTRES for *M ins.
++ | jmp <4
++ |
++ |->vm_hotloop: // Hot loop counter underflow.
++ |.if JIT
++ | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
++ | mov RB, LFUNC:RB->pc
++ | movzx RD, byte [RB+PC2PROTO(framesize)]
++ | lea RD, [BASE+RD*8]
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov L:RB->top, RD
++ | mov FCARG2, PC
++ | lea FCARG1, [DISPATCH+GG_DISP2J]
++ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
++ | mov SAVE_PC, PC
++ | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
++ | jmp <3
++ |.endif
++ |
++ |->vm_callhook: // Dispatch target for call hooks.
++ | mov SAVE_PC, PC
++ |.if JIT
++ | jmp >1
++ |.endif
++ |
++ |->vm_hotcall: // Hot call counter underflow.
++ |.if JIT
++ | mov SAVE_PC, PC
++ | or PC, 1 // Marker for hot call.
++ |1:
++ |.endif
++ | lea RD, [BASE+NARGS:RD*8-8]
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov L:RB->top, RD
++ | mov FCARG2, PC
++ | mov FCARG1, L:RB
++ | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
++ | // ASMFunction returned in eax/rax (RDa).
++ | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
++ |.if JIT
++ | and PC, -2
++ |.endif
++ | mov BASE, L:RB->base
++ | mov RAa, RDa
++ | mov RD, L:RB->top
++ | sub RD, BASE
++ | mov RBa, RAa
++ | movzx RA, PC_RA
++ | shr RD, 3
++ | add NARGS:RD, 1
++ | jmp RBa
++ |
++ |->cont_stitch: // Trace stitching.
++ |.if JIT
++ | // BASE = base, RC = result, RB = mbase
++ | mov TRACE:RA, [RB-24] // Save previous trace.
++ | mov TMP1, TRACE:RA
++ | mov TMP3, DISPATCH // Need one more register.
++ | mov DISPATCH, MULTRES
++ | movzx RA, PC_RA
++ | lea RA, [BASE+RA*8] // Call base.
++ | sub DISPATCH, 1
++ | jz >2
++ |1: // Move results down.
++ |.if X64
++ | mov RBa, [RC]
++ | mov [RA], RBa
++ |.else
++ | mov RB, [RC]
++ | mov [RA], RB
++ | mov RB, [RC+4]
++ | mov [RA+4], RB
++ |.endif
++ | add RC, 8
++ | add RA, 8
++ | sub DISPATCH, 1
++ | jnz <1
++ |2:
++ | movzx RC, PC_RA
++ | movzx RB, PC_RB
++ | add RC, RB
++ | lea RC, [BASE+RC*8-8]
++ |3:
++ | cmp RC, RA
++ | ja >9 // More results wanted?
++ |
++ | mov DISPATCH, TMP3
++ | mov TRACE:RD, TMP1 // Get previous trace.
++ | movzx RB, word TRACE:RD->traceno
++ | movzx RD, word TRACE:RD->link
++ | cmp RD, RB
++ | je ->cont_nop // Blacklisted.
++ | test RD, RD
++ | jne =>BC_JLOOP // Jump to stitched trace.
++ |
++ | // Stitch a new trace to the previous trace.
++ | mov [DISPATCH+DISPATCH_J(exitno)], RB
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov FCARG2, PC
++ | lea FCARG1, [DISPATCH+GG_DISP2J]
++ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
++ | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
++ | mov BASE, L:RB->base
++ | jmp ->cont_nop
++ |
++ |9: // Fill up results with nil.
++ | mov dword [RA+4], LJ_TNIL
++ | add RA, 8
++ | jmp <3
++ |.endif
++ |
++ |->vm_profhook: // Dispatch target for profiler hook.
++#if LJ_HASPROFILE
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov FCARG2, PC // Caveat: FCARG2 == BASE
++ | mov FCARG1, L:RB
++ | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
++ | mov BASE, L:RB->base
++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
++ | sub PC, 4
++ | jmp ->cont_nop
++#endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Trace exit handler -------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// Called from an exit stub with the exit number on the stack.
++ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
++ |->vm_exit_handler:
++ |.if JIT
++ |.if X64
++ | push r13; push r12
++ | push r11; push r10; push r9; push r8
++ | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
++ | push rbx; push rdx; push rcx; push rax
++ | movzx RC, byte [rbp-8] // Reconstruct exit number.
++ | mov RCH, byte [rbp-16]
++ | mov [rbp-8], r15; mov [rbp-16], r14
++ |.else
++ | push ebp; lea ebp, [esp+12]; push ebp
++ | push ebx; push edx; push ecx; push eax
++ | movzx RC, byte [ebp-4] // Reconstruct exit number.
++ | mov RCH, byte [ebp-8]
++ | mov [ebp-4], edi; mov [ebp-8], esi
++ |.endif
++ | // Caveat: DISPATCH is ebx.
++ | mov DISPATCH, [ebp]
++ | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
++ | set_vmstate EXIT
++ | mov [DISPATCH+DISPATCH_J(exitno)], RC
++ | mov [DISPATCH+DISPATCH_J(parent)], RA
++ |.if X64
++ |.if X64WIN
++ | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
++ |.else
++ | sub rsp, 16*8 // Room for SSE regs.
++ |.endif
++ | add rbp, -128
++ | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
++ | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
++ | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
++ | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
++ | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
++ | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
++ | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
++ | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
++ |.else
++ | sub esp, 8*8+16 // Room for SSE regs + args.
++ | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
++ | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
++ | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
++ | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
++ |.endif
++ | // Caveat: RB is ebp.
++ | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
++ | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
++ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
++ | mov L:RB->base, BASE
++ |.if X64WIN
++ | lea CARG2, [rsp+4*8]
++ |.elif X64
++ | mov CARG2, rsp
++ |.else
++ | lea FCARG2, [esp+16]
++ |.endif
++ | lea FCARG1, [DISPATCH+GG_DISP2J]
++ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
++ | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
++ | // MULTRES or negated error code returned in eax (RD).
++ | mov RAa, L:RB->cframe
++ | and RAa, CFRAME_RAWMASK
++ |.if X64WIN
++ | // Reposition stack later.
++ |.elif X64
++ | mov rsp, RAa // Reposition stack to C frame.
++ |.else
++ | mov esp, RAa // Reposition stack to C frame.
++ |.endif
++ | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
++ | mov BASE, L:RB->base
++ | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC.
++ |.if X64
++ | jmp >1
++ |.endif
++ |.endif
++ |->vm_exit_interp:
++ | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
++ |.if JIT
++ |.if X64
++ | // Restore additional callee-save registers only used in compiled code.
++ |.if X64WIN
++ | lea RAa, [rsp+9*16+4*8]
++ |1:
++ | movdqa xmm15, [RAa-9*16]
++ | movdqa xmm14, [RAa-8*16]
++ | movdqa xmm13, [RAa-7*16]
++ | movdqa xmm12, [RAa-6*16]
++ | movdqa xmm11, [RAa-5*16]
++ | movdqa xmm10, [RAa-4*16]
++ | movdqa xmm9, [RAa-3*16]
++ | movdqa xmm8, [RAa-2*16]
++ | movdqa xmm7, [RAa-1*16]
++ | mov rsp, RAa // Reposition stack to C frame.
++ | movdqa xmm6, [RAa]
++ | mov r15, CSAVE_3
++ | mov r14, CSAVE_4
++ |.else
++ | add rsp, 16 // Reposition stack to C frame.
++ |1:
++ |.endif
++ | mov r13, TMPa
++ | mov r12, TMPQ
++ |.endif
++ | test RD, RD; js >9 // Check for error from exit.
++ | mov L:RB, SAVE_L
++ | mov MULTRES, RD
++ | mov LFUNC:KBASE, [BASE-8]
++ | mov KBASE, LFUNC:KBASE->pc
++ | mov KBASE, [KBASE+PC2PROTO(k)]
++ | mov L:RB->base, BASE
++ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
++ | set_vmstate INTERP
++ | // Modified copy of ins_next which handles function header dispatch, too.
++ | mov RC, [PC]
++ | movzx RA, RCH
++ | movzx OP, RCL
++ | add PC, 4
++ | shr RC, 16
++ | cmp OP, BC_FUNCF // Function header?
++ | jb >3
++ | cmp OP, BC_FUNCC+2 // Fast function?
++ | jae >4
++ |2:
++ | mov RC, MULTRES // RC/RD holds nres+1.
++ |3:
++ |.if X64
++ | jmp aword [DISPATCH+OP*8]
++ |.else
++ | jmp aword [DISPATCH+OP*4]
++ |.endif
++ |
++ |4: // Check frame below fast function.
++ | mov RC, [BASE-4]
++ | test RC, FRAME_TYPE
++ | jnz <2 // Trace stitching continuation?
++ | // Otherwise set KBASE for Lua function below fast function.
++ | movzx RC, byte [RC-3]
++ | not RCa
++ | mov LFUNC:KBASE, [BASE+RC*8-8]
++ | mov KBASE, LFUNC:KBASE->pc
++ | mov KBASE, [KBASE+PC2PROTO(k)]
++ | jmp <2
++ |
++ |9: // Rethrow error from the right C frame.
++ | neg RD
++ | mov FCARG1, L:RB
++ | mov FCARG2, RD
++ | call extern lj_err_throw@8 // (lua_State *L, int errcode)
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Math helper functions ----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// FP value rounding. Called by math.floor/math.ceil fast functions
++ |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
++ |.macro vm_round, name, mode, cond
++ |->name:
++ |.if not X64 and cond
++ | movsd xmm0, qword [esp+4]
++ | call ->name .. _sse
++ | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
++ | fld qword [esp+4]
++ | ret
++ |.endif
++ |
++ |->name .. _sse:
++ | sseconst_abs xmm2, RDa
++ | sseconst_2p52 xmm3, RDa
++ | movaps xmm1, xmm0
++ | andpd xmm1, xmm2 // |x|
++ | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
++ | jbe >1
++ | andnpd xmm2, xmm0 // Isolate sign bit.
++ |.if mode == 2 // trunc(x)?
++ | movaps xmm0, xmm1
++ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
++ | subsd xmm1, xmm3
++ | sseconst_1 xmm3, RDa
++ | cmpsd xmm0, xmm1, 1 // |x| < result?
++ | andpd xmm0, xmm3
++ | subsd xmm1, xmm0 // If yes, subtract -1.
++ | orpd xmm1, xmm2 // Merge sign bit back in.
++ |.else
++ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
++ | subsd xmm1, xmm3
++ | orpd xmm1, xmm2 // Merge sign bit back in.
++ | .if mode == 1 // ceil(x)?
++ | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
++ | cmpsd xmm0, xmm1, 6 // x > result?
++ | .else // floor(x)?
++ | sseconst_1 xmm2, RDa
++ | cmpsd xmm0, xmm1, 1 // x < result?
++ | .endif
++ | andpd xmm0, xmm2
++ | subsd xmm1, xmm0 // If yes, subtract +-1.
++ |.endif
++ | movaps xmm0, xmm1
++ |1:
++ | ret
++ |.endmacro
++ |
++ | vm_round vm_floor, 0, 1
++ | vm_round vm_ceil, 1, JIT
++ | vm_round vm_trunc, 2, JIT
++ |
++ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
++ |->vm_mod:
++ |// Args in xmm0/xmm1, return value in xmm0.
++ |// Caveat: xmm0-xmm5 and RC (eax) modified!
++ | movaps xmm5, xmm0
++ | divsd xmm0, xmm1
++ | sseconst_abs xmm2, RDa
++ | sseconst_2p52 xmm3, RDa
++ | movaps xmm4, xmm0
++ | andpd xmm4, xmm2 // |x/y|
++ | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
++ | jbe >1
++ | andnpd xmm2, xmm0 // Isolate sign bit.
++ | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
++ | subsd xmm4, xmm3
++ | orpd xmm4, xmm2 // Merge sign bit back in.
++ | sseconst_1 xmm2, RDa
++ | cmpsd xmm0, xmm4, 1 // x/y < result?
++ | andpd xmm0, xmm2
++ | subsd xmm4, xmm0 // If yes, subtract 1.0.
++ | movaps xmm0, xmm5
++ | mulsd xmm1, xmm4
++ | subsd xmm0, xmm1
++ | ret
++ |1:
++ | mulsd xmm1, xmm0
++ | movaps xmm0, xmm5
++ | subsd xmm0, xmm1
++ | ret
++ |
++ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
++ |->vm_powi_sse:
++ | cmp eax, 1; jle >6 // i<=1?
++ | // Now 1 < (unsigned)i <= 0x80000000.
++ |1: // Handle leading zeros.
++ | test eax, 1; jnz >2
++ | mulsd xmm0, xmm0
++ | shr eax, 1
++ | jmp <1
++ |2:
++ | shr eax, 1; jz >5
++ | movaps xmm1, xmm0
++ |3: // Handle trailing bits.
++ | mulsd xmm0, xmm0
++ | shr eax, 1; jz >4
++ | jnc <3
++ | mulsd xmm1, xmm0
++ | jmp <3
++ |4:
++ | mulsd xmm0, xmm1
++ |5:
++ | ret
++ |6:
++ | je <5 // x^1 ==> x
++ | jb >7 // x^0 ==> 1
++ | neg eax
++ | call <1
++ | sseconst_1 xmm1, RDa
++ | divsd xmm1, xmm0
++ | movaps xmm0, xmm1
++ | ret
++ |7:
++ | sseconst_1 xmm0, RDa
++ | ret
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Miscellaneous functions --------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
++ |->vm_cpuid:
++ |.if X64
++ | mov eax, CARG1d
++ | .if X64WIN; push rsi; mov rsi, CARG2; .endif
++ | push rbx
++ | xor ecx, ecx
++ | cpuid
++ | mov [rsi], eax
++ | mov [rsi+4], ebx
++ | mov [rsi+8], ecx
++ | mov [rsi+12], edx
++ | pop rbx
++ | .if X64WIN; pop rsi; .endif
++ | ret
++ |.else
++ | pushfd
++ | pop edx
++ | mov ecx, edx
++ | xor edx, 0x00200000 // Toggle ID bit in flags.
++ | push edx
++ | popfd
++ | pushfd
++ | pop edx
++ | xor eax, eax // Zero means no features supported.
++ | cmp ecx, edx
++ | jz >1 // No ID toggle means no CPUID support.
++ | mov eax, [esp+4] // Argument 1 is function number.
++ | push edi
++ | push ebx
++ | xor ecx, ecx
++ | cpuid
++ | mov edi, [esp+16] // Argument 2 is result area.
++ | mov [edi], eax
++ | mov [edi+4], ebx
++ | mov [edi+8], ecx
++ | mov [edi+12], edx
++ | pop ebx
++ | pop edi
++ |1:
++ | ret
++ |.endif
++ |
++ |//-----------------------------------------------------------------------
++ |//-- Assertions ---------------------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |->assert_bad_for_arg_type:
++#ifdef LUA_USE_ASSERT
++ | int3
++#endif
++ | int3
++ |
++ |//-----------------------------------------------------------------------
++ |//-- FFI helper functions -----------------------------------------------
++ |//-----------------------------------------------------------------------
++ |
++ |// Handler for callback functions. Callback slot number in ah/al.
++ |->vm_ffi_callback:
++ |.if FFI
++ |.type CTSTATE, CTState, PC
++ |.if not X64
++ | sub esp, 16 // Leave room for SAVE_ERRF etc.
++ |.endif
++ | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
++ | lea DISPATCH, [ebp+GG_G2DISP]
++ | mov CTSTATE, GL:ebp->ctype_state
++ | movzx eax, ax
++ | mov CTSTATE->cb.slot, eax
++ |.if X64
++ | mov CTSTATE->cb.gpr[0], CARG1
++ | mov CTSTATE->cb.gpr[1], CARG2
++ | mov CTSTATE->cb.gpr[2], CARG3
++ | mov CTSTATE->cb.gpr[3], CARG4
++ | movsd qword CTSTATE->cb.fpr[0], xmm0
++ | movsd qword CTSTATE->cb.fpr[1], xmm1
++ | movsd qword CTSTATE->cb.fpr[2], xmm2
++ | movsd qword CTSTATE->cb.fpr[3], xmm3
++ |.if X64WIN
++ | lea rax, [rsp+CFRAME_SIZE+4*8]
++ |.else
++ | lea rax, [rsp+CFRAME_SIZE]
++ | mov CTSTATE->cb.gpr[4], CARG5
++ | mov CTSTATE->cb.gpr[5], CARG6
++ | movsd qword CTSTATE->cb.fpr[4], xmm4
++ | movsd qword CTSTATE->cb.fpr[5], xmm5
++ | movsd qword CTSTATE->cb.fpr[6], xmm6
++ | movsd qword CTSTATE->cb.fpr[7], xmm7
++ |.endif
++ | mov CTSTATE->cb.stack, rax
++ | mov CARG2, rsp
++ |.else
++ | lea eax, [esp+CFRAME_SIZE+16]
++ | mov CTSTATE->cb.gpr[0], FCARG1
++ | mov CTSTATE->cb.gpr[1], FCARG2
++ | mov CTSTATE->cb.stack, eax
++ | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp.
++ | mov FCARG2, [esp+CFRAME_SIZE+8]
++ | mov SAVE_RET, FCARG1
++ | mov SAVE_R4, FCARG2
++ | mov FCARG2, esp
++ |.endif
++ | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
++ | mov FCARG1, CTSTATE
++ | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf)
++ | // lua_State * returned in eax (RD).
++ | set_vmstate INTERP
++ | mov BASE, L:RD->base
++ | mov RD, L:RD->top
++ | sub RD, BASE
++ | mov LFUNC:RB, [BASE-8]
++ | shr RD, 3
++ | add RD, 1
++ | ins_callt
++ |.endif
++ |
++ |->cont_ffi_callback: // Return from FFI callback.
++ |.if FFI
++ | mov L:RA, SAVE_L
++ | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
++ | mov aword CTSTATE->L, L:RAa
++ | mov L:RA->base, BASE
++ | mov L:RA->top, RB
++ | mov FCARG1, CTSTATE
++ | mov FCARG2, RC
++ | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o)
++ |.if X64
++ | mov rax, CTSTATE->cb.gpr[0]
++ | movsd xmm0, qword CTSTATE->cb.fpr[0]
++ | jmp ->vm_leave_unw
++ |.else
++ | mov L:RB, SAVE_L
++ | mov eax, CTSTATE->cb.gpr[0]
++ | mov edx, CTSTATE->cb.gpr[1]
++ | cmp dword CTSTATE->cb.gpr[2], 1
++ | jb >7
++ | je >6
++ | fld qword CTSTATE->cb.fpr[0].d
++ | jmp >7
++ |6:
++ | fld dword CTSTATE->cb.fpr[0].f
++ |7:
++ | mov ecx, L:RB->top
++ | movzx ecx, word [ecx+6] // Get stack adjustment and copy up.
++ | mov SAVE_L, ecx // Must be one slot above SAVE_RET
++ | restoreregs
++ | pop ecx // Move return addr from SAVE_RET.
++ | add esp, [esp] // Adjust stack.
++ | add esp, 16
++ | push ecx
++ | ret
++ |.endif
++ |.endif
++ |
++ |->vm_ffi_call@4: // Call C function via FFI.
++ | // Caveat: needs special frame unwinding, see below.
++ |.if FFI
++ |.if X64
++ | .type CCSTATE, CCallState, rbx
++ | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
++ |.else
++ | .type CCSTATE, CCallState, ebx
++ | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
++ |.endif
++ |
++ | // Readjust stack.
++ |.if X64
++ | mov eax, CCSTATE->spadj
++ | sub rsp, rax
++ |.else
++ | sub esp, CCSTATE->spadj
++ |.if WIN
++ | mov CCSTATE->spadj, esp
++ |.endif
++ |.endif
++ |
++ | // Copy stack slots.
++ | movzx ecx, byte CCSTATE->nsp
++ | sub ecx, 1
++ | js >2
++ |1:
++ |.if X64
++ | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
++ | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
++ |.else
++ | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
++ | mov [esp+ecx*4], eax
++ |.endif
++ | sub ecx, 1
++ | jns <1
++ |2:
++ |
++ |.if X64
++ | movzx eax, byte CCSTATE->nfpr
++ | mov CARG1, CCSTATE->gpr[0]
++ | mov CARG2, CCSTATE->gpr[1]
++ | mov CARG3, CCSTATE->gpr[2]
++ | mov CARG4, CCSTATE->gpr[3]
++ |.if not X64WIN
++ | mov CARG5, CCSTATE->gpr[4]
++ | mov CARG6, CCSTATE->gpr[5]
++ |.endif
++ | test eax, eax; jz >5
++ | movaps xmm0, CCSTATE->fpr[0]
++ | movaps xmm1, CCSTATE->fpr[1]
++ | movaps xmm2, CCSTATE->fpr[2]
++ | movaps xmm3, CCSTATE->fpr[3]
++ |.if not X64WIN
++ | cmp eax, 4; jbe >5
++ | movaps xmm4, CCSTATE->fpr[4]
++ | movaps xmm5, CCSTATE->fpr[5]
++ | movaps xmm6, CCSTATE->fpr[6]
++ | movaps xmm7, CCSTATE->fpr[7]
++ |.endif
++ |5:
++ |.else
++ | mov FCARG1, CCSTATE->gpr[0]
++ | mov FCARG2, CCSTATE->gpr[1]
++ |.endif
++ |
++ | call aword CCSTATE->func
++ |
++ |.if X64
++ | mov CCSTATE->gpr[0], rax
++ | movaps CCSTATE->fpr[0], xmm0
++ |.if not X64WIN
++ | mov CCSTATE->gpr[1], rdx
++ | movaps CCSTATE->fpr[1], xmm1
++ |.endif
++ |.else
++ | mov CCSTATE->gpr[0], eax
++ | mov CCSTATE->gpr[1], edx
++ | cmp byte CCSTATE->resx87, 1
++ | jb >7
++ | je >6
++ | fstp qword CCSTATE->fpr[0].d[0]
++ | jmp >7
++ |6:
++ | fstp dword CCSTATE->fpr[0].f[0]
++ |7:
++ |.if WIN
++ | sub CCSTATE->spadj, esp
++ |.endif
++ |.endif
++ |
++ |.if X64
++ | mov rbx, [rbp-8]; leave; ret
++ |.else
++ | mov ebx, [ebp-4]; leave; ret
++ |.endif
++ |.endif
++ |// Note: vm_ffi_call must be the last function in this object file!
++ |
++ |//-----------------------------------------------------------------------
++}
++
++/* Generate the code for a single instruction. */
++static void build_ins(BuildCtx *ctx, BCOp op, int defop)
++{
++ int vk = 0;
++ |// Note: aligning all instructions does not pay off.
++ |=>defop:
++
++ switch (op) {
++
++ /* -- Comparison ops ---------------------------------------------------- */
++
++ /* Remember: all ops branch for a true comparison, fall through otherwise. */
++
++ |.macro jmp_comp, lt, ge, le, gt, target
++ ||switch (op) {
++ ||case BC_ISLT:
++ | lt target
++ ||break;
++ ||case BC_ISGE:
++ | ge target
++ ||break;
++ ||case BC_ISLE:
++ | le target
++ ||break;
++ ||case BC_ISGT:
++ | gt target
++ ||break;
++ ||default: break; /* Shut up GCC. */
++ ||}
++ |.endmacro
++
++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
++ | // RA = src1, RD = src2, JMP with RD = target
++ | ins_AD
++ |.if DUALNUM
++ | checkint RA, >7
++ | checkint RD, >8
++ | mov RB, dword [BASE+RA*8]
++ | add PC, 4
++ | cmp RB, dword [BASE+RD*8]
++ | jmp_comp jge, jl, jg, jle, >9
++ |6:
++ | movzx RD, PC_RD
++ | branchPC RD
++ |9:
++ | ins_next
++ |
++ |7: // RA is not an integer.
++ | ja ->vmeta_comp
++ | // RA is a number.
++ | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
++ | // RA is a number, RD is an integer.
++ | cvtsi2sd xmm0, dword [BASE+RD*8]
++ | jmp >2
++ |
++ |8: // RA is an integer, RD is not an integer.
++ | ja ->vmeta_comp
++ | // RA is an integer, RD is a number.
++ | cvtsi2sd xmm1, dword [BASE+RA*8]
++ | movsd xmm0, qword [BASE+RD*8]
++ | add PC, 4
++ | ucomisd xmm0, xmm1
++ | jmp_comp jbe, ja, jb, jae, <9
++ | jmp <6
++ |.else
++ | checknum RA, ->vmeta_comp
++ | checknum RD, ->vmeta_comp
++ |.endif
++ |1:
++ | movsd xmm0, qword [BASE+RD*8]
++ |2:
++ | add PC, 4
++ | ucomisd xmm0, qword [BASE+RA*8]
++ |3:
++ | // Unordered: all of ZF CF PF set, ordered: PF clear.
++ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
++ |.if DUALNUM
++ | jmp_comp jbe, ja, jb, jae, <9
++ | jmp <6
++ |.else
++ | jmp_comp jbe, ja, jb, jae, >1
++ | movzx RD, PC_RD
++ | branchPC RD
++ |1:
++ | ins_next
++ |.endif
++ break;
++
++ case BC_ISEQV: case BC_ISNEV:
++ vk = op == BC_ISEQV;
++ | ins_AD // RA = src1, RD = src2, JMP with RD = target
++ | mov RB, [BASE+RD*8+4]
++ | add PC, 4
++ |.if DUALNUM
++ | cmp RB, LJ_TISNUM; jne >7
++ | checkint RA, >8
++ | mov RB, dword [BASE+RD*8]
++ | cmp RB, dword [BASE+RA*8]
++ if (vk) {
++ | jne >9
++ } else {
++ | je >9
++ }
++ | movzx RD, PC_RD
++ | branchPC RD
++ |9:
++ | ins_next
++ |
++ |7: // RD is not an integer.
++ | ja >5
++ | // RD is a number.
++ | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
++ | // RD is a number, RA is an integer.
++ | cvtsi2sd xmm0, dword [BASE+RA*8]
++ | jmp >2
++ |
++ |8: // RD is an integer, RA is not an integer.
++ | ja >5
++ | // RD is an integer, RA is a number.
++ | cvtsi2sd xmm0, dword [BASE+RD*8]
++ | ucomisd xmm0, qword [BASE+RA*8]
++ | jmp >4
++ |
++ |.else
++ | cmp RB, LJ_TISNUM; jae >5
++ | checknum RA, >5
++ |.endif
++ |1:
++ | movsd xmm0, qword [BASE+RA*8]
++ |2:
++ | ucomisd xmm0, qword [BASE+RD*8]
++ |4:
++ iseqne_fp:
++ if (vk) {
++ | jp >2 // Unordered means not equal.
++ | jne >2
++ } else {
++ | jp >2 // Unordered means not equal.
++ | je >1
++ }
++ iseqne_end:
++ if (vk) {
++ |1: // EQ: Branch to the target.
++ | movzx RD, PC_RD
++ | branchPC RD
++ |2: // NE: Fallthrough to next instruction.
++ |.if not FFI
++ |3:
++ |.endif
++ } else {
++ |.if not FFI
++ |3:
++ |.endif
++ |2: // NE: Branch to the target.
++ | movzx RD, PC_RD
++ | branchPC RD
++ |1: // EQ: Fallthrough to next instruction.
++ }
++ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
++ op == BC_ISEQN || op == BC_ISNEN)) {
++ | jmp <9
++ } else {
++ | ins_next
++ }
++ |
++ if (op == BC_ISEQV || op == BC_ISNEV) {
++ |5: // Either or both types are not numbers.
++ |.if FFI
++ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
++ | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
++ |.endif
++ | checktp RA, RB // Compare types.
++ | jne <2 // Not the same type?
++ | cmp RB, LJ_TISPRI
++ | jae <1 // Same type and primitive type?
++ |
++ | // Same types and not a primitive type. Compare GCobj or pvalue.
++ | mov RA, [BASE+RA*8]
++ | mov RD, [BASE+RD*8]
++ | cmp RA, RD
++ | je <1 // Same GCobjs or pvalues?
++ | cmp RB, LJ_TISTABUD
++ | ja <2 // Different objects and not table/ud?
++ |.if X64
++ | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata.
++ | jb <2
++ |.endif
++ |
++ | // Different tables or userdatas. Need to check __eq metamethod.
++ | // Field metatable must be at same offset for GCtab and GCudata!
++ | mov TAB:RB, TAB:RA->metatable
++ | test TAB:RB, TAB:RB
++ | jz <2 // No metatable?
++ | test byte TAB:RB->nomm, 1<<MM_eq
++ | jnz <2 // Or 'no __eq' flag set?
++ if (vk) {
++ | xor RB, RB // ne = 0
++ } else {
++ | mov RB, 1 // ne = 1
++ }
++ | jmp ->vmeta_equal // Handle __eq metamethod.
++ } else {
++ |.if FFI
++ |3:
++ | cmp RB, LJ_TCDATA
++ if (LJ_DUALNUM && vk) {
++ | jne <9
++ } else {
++ | jne <2
++ }
++ | jmp ->vmeta_equal_cd
++ |.endif
++ }
++ break;
++ case BC_ISEQS: case BC_ISNES:
++ vk = op == BC_ISEQS;
++ | ins_AND // RA = src, RD = str const, JMP with RD = target
++ | mov RB, [BASE+RA*8+4]
++ | add PC, 4
++ | cmp RB, LJ_TSTR; jne >3
++ | mov RA, [BASE+RA*8]
++ | cmp RA, [KBASE+RD*4]
++ iseqne_test:
++ if (vk) {
++ | jne >2
++ } else {
++ | je >1
++ }
++ goto iseqne_end;
++ case BC_ISEQN: case BC_ISNEN:
++ vk = op == BC_ISEQN;
++ | ins_AD // RA = src, RD = num const, JMP with RD = target
++ | mov RB, [BASE+RA*8+4]
++ | add PC, 4
++ |.if DUALNUM
++ | cmp RB, LJ_TISNUM; jne >7
++ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
++ | mov RB, dword [KBASE+RD*8]
++ | cmp RB, dword [BASE+RA*8]
++ if (vk) {
++ | jne >9
++ } else {
++ | je >9
++ }
++ | movzx RD, PC_RD
++ | branchPC RD
++ |9:
++ | ins_next
++ |
++ |7: // RA is not an integer.
++ | ja >3
++ | // RA is a number.
++ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
++ | // RA is a number, RD is an integer.
++ | cvtsi2sd xmm0, dword [KBASE+RD*8]
++ | jmp >2
++ |
++ |8: // RA is an integer, RD is a number.
++ | cvtsi2sd xmm0, dword [BASE+RA*8]
++ | ucomisd xmm0, qword [KBASE+RD*8]
++ | jmp >4
++ |.else
++ | cmp RB, LJ_TISNUM; jae >3
++ |.endif
++ |1:
++ | movsd xmm0, qword [KBASE+RD*8]
++ |2:
++ | ucomisd xmm0, qword [BASE+RA*8]
++ |4:
++ goto iseqne_fp;
++ case BC_ISEQP: case BC_ISNEP:
++ vk = op == BC_ISEQP;
++ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
++ | mov RB, [BASE+RA*8+4]
++ | add PC, 4
++ | cmp RB, RD
++ if (!LJ_HASFFI) goto iseqne_test;
++ if (vk) {
++ | jne >3
++ | movzx RD, PC_RD
++ | branchPC RD
++ |2:
++ | ins_next
++ |3:
++ | cmp RB, LJ_TCDATA; jne <2
++ | jmp ->vmeta_equal_cd
++ } else {
++ | je >2
++ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
++ | movzx RD, PC_RD
++ | branchPC RD
++ |2:
++ | ins_next
++ }
++ break;
++
++ /* -- Unary test and copy ops ------------------------------------------- */
++
++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
++ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
++ | mov RB, [BASE+RD*8+4]
++ | add PC, 4
++ | cmp RB, LJ_TISTRUECOND
++ if (op == BC_IST || op == BC_ISTC) {
++ | jae >1
++ } else {
++ | jb >1
++ }
++ if (op == BC_ISTC || op == BC_ISFC) {
++ | mov [BASE+RA*8+4], RB
++ | mov RB, [BASE+RD*8]
++ | mov [BASE+RA*8], RB
++ }
++ | movzx RD, PC_RD
++ | branchPC RD
++ |1: // Fallthrough to the next instruction.
++ | ins_next
++ break;
++
++ case BC_ISTYPE:
++ | ins_AD // RA = src, RD = -type
++ | add RD, [BASE+RA*8+4]
++ | jne ->vmeta_istype
++ | ins_next
++ break;
++ case BC_ISNUM:
++ | ins_AD // RA = src, RD = -(TISNUM-1)
++ | checknum RA, ->vmeta_istype
++ | ins_next
++ break;
++
++ /* -- Unary ops --------------------------------------------------------- */
++
++ case BC_MOV:
++ | ins_AD // RA = dst, RD = src
++ |.if X64
++ | mov RBa, [BASE+RD*8]
++ | mov [BASE+RA*8], RBa
++ |.else
++ | mov RB, [BASE+RD*8+4]
++ | mov RD, [BASE+RD*8]
++ | mov [BASE+RA*8+4], RB
++ | mov [BASE+RA*8], RD
++ |.endif
++ | ins_next_
++ break;
++ case BC_NOT:
++ | ins_AD // RA = dst, RD = src
++ | xor RB, RB
++ | checktp RD, LJ_TISTRUECOND
++ | adc RB, LJ_TTRUE
++ | mov [BASE+RA*8+4], RB
++ | ins_next
++ break;
++ case BC_UNM:
++ | ins_AD // RA = dst, RD = src
++ |.if DUALNUM
++ | checkint RD, >5
++ | mov RB, [BASE+RD*8]
++ | neg RB
++ | jo >4
++ | mov dword [BASE+RA*8+4], LJ_TISNUM
++ | mov dword [BASE+RA*8], RB
++ |9:
++ | ins_next
++ |4:
++ | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
++ | mov dword [BASE+RA*8], 0
++ | jmp <9
++ |5:
++ | ja ->vmeta_unm
++ |.else
++ | checknum RD, ->vmeta_unm
++ |.endif
++ | movsd xmm0, qword [BASE+RD*8]
++ | sseconst_sign xmm1, RDa
++ | xorps xmm0, xmm1
++ | movsd qword [BASE+RA*8], xmm0
++ |.if DUALNUM
++ | jmp <9
++ |.else
++ | ins_next
++ |.endif
++ break;
++ case BC_LEN:
++ | ins_AD // RA = dst, RD = src
++ | checkstr RD, >2
++ | mov STR:RD, [BASE+RD*8]
++ |.if DUALNUM
++ | mov RD, dword STR:RD->len
++ |1:
++ | mov dword [BASE+RA*8+4], LJ_TISNUM
++ | mov dword [BASE+RA*8], RD
++ |.else
++ | xorps xmm0, xmm0
++ | cvtsi2sd xmm0, dword STR:RD->len
++ |1:
++ | movsd qword [BASE+RA*8], xmm0
++ |.endif
++ | ins_next
++ |2:
++ | checktab RD, ->vmeta_len
++ | mov TAB:FCARG1, [BASE+RD*8]
++#if LJ_52
++ | mov TAB:RB, TAB:FCARG1->metatable
++ | cmp TAB:RB, 0
++ | jnz >9
++ |3:
++#endif
++ |->BC_LEN_Z:
++ | mov RB, BASE // Save BASE.
++ | call extern lj_tab_len@4 // (GCtab *t)
++ | // Length of table returned in eax (RD).
++ |.if DUALNUM
++ | // Nothing to do.
++ |.else
++ | cvtsi2sd xmm0, RD
++ |.endif
++ | mov BASE, RB // Restore BASE.
++ | movzx RA, PC_RA
++ | jmp <1
++#if LJ_52
++ |9: // Check for __len.
++ | test byte TAB:RB->nomm, 1<<MM_len
++ | jnz <3
++ | jmp ->vmeta_len // 'no __len' flag NOT set: check.
++#endif
++ break;
++
++ /* -- Binary ops -------------------------------------------------------- */
++
++ |.macro ins_arithpre, sseins, ssereg
++ | ins_ABC
++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
++ ||switch (vk) {
++ ||case 0:
++ | checknum RB, ->vmeta_arith_vn
++ | .if DUALNUM
++ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
++ | .endif
++ | movsd xmm0, qword [BASE+RB*8]
++ | sseins ssereg, qword [KBASE+RC*8]
++ || break;
++ ||case 1:
++ | checknum RB, ->vmeta_arith_nv
++ | .if DUALNUM
++ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
++ | .endif
++ | movsd xmm0, qword [KBASE+RC*8]
++ | sseins ssereg, qword [BASE+RB*8]
++ || break;
++ ||default:
++ | checknum RB, ->vmeta_arith_vv
++ | checknum RC, ->vmeta_arith_vv
++ | movsd xmm0, qword [BASE+RB*8]
++ | sseins ssereg, qword [BASE+RC*8]
++ || break;
++ ||}
++ |.endmacro
++ |
++ |.macro ins_arithdn, intins
++ | ins_ABC
++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
++ ||switch (vk) {
++ ||case 0:
++ | checkint RB, ->vmeta_arith_vn
++ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
++ | mov RB, [BASE+RB*8]
++ | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
++ || break;
++ ||case 1:
++ | checkint RB, ->vmeta_arith_nv
++ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
++ | mov RC, [KBASE+RC*8]
++ | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
++ || break;
++ ||default:
++ | checkint RB, ->vmeta_arith_vv
++ | checkint RC, ->vmeta_arith_vv
++ | mov RB, [BASE+RB*8]
++ | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
++ || break;
++ ||}
++ | mov dword [BASE+RA*8+4], LJ_TISNUM
++ ||if (vk == 1) {
++ | mov dword [BASE+RA*8], RC
++ ||} else {
++ | mov dword [BASE+RA*8], RB
++ ||}
++ | ins_next
++ |.endmacro
++ |
++ |.macro ins_arithpost
++ | movsd qword [BASE+RA*8], xmm0
++ |.endmacro
++ |
++ |.macro ins_arith, sseins
++ | ins_arithpre sseins, xmm0
++ | ins_arithpost
++ | ins_next
++ |.endmacro
++ |
++ |.macro ins_arith, intins, sseins
++ |.if DUALNUM
++ | ins_arithdn intins
++ |.else
++ | ins_arith, sseins
++ |.endif
++ |.endmacro
++
++ | // RA = dst, RB = src1 or num const, RC = src2 or num const
++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
++ | ins_arith add, addsd
++ break;
++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
++ | ins_arith sub, subsd
++ break;
++ case BC_MULVN: case BC_MULNV: case BC_MULVV:
++ | ins_arith imul, mulsd
++ break;
++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
++ | ins_arith divsd
++ break;
++ case BC_MODVN:
++ | ins_arithpre movsd, xmm1
++ |->BC_MODVN_Z:
++ | call ->vm_mod
++ | ins_arithpost
++ | ins_next
++ break;
++ case BC_MODNV: case BC_MODVV:
++ | ins_arithpre movsd, xmm1
++ | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
++ break;
++ case BC_POW:
++ | ins_arithpre movsd, xmm1
++ | mov RB, BASE
++ |.if not X64
++ | movsd FPARG1, xmm0
++ | movsd FPARG3, xmm1
++ |.endif
++ | call extern pow
++ | movzx RA, PC_RA
++ | mov BASE, RB
++ |.if X64
++ | ins_arithpost
++ |.else
++ | fstp qword [BASE+RA*8]
++ |.endif
++ | ins_next
++ break;
++
++ case BC_CAT:
++ | ins_ABC // RA = dst, RB = src_start, RC = src_end
++ |.if X64
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE
++ | lea CARG2d, [BASE+RC*8]
++ | mov CARG3d, RC
++ | sub CARG3d, RB
++ |->BC_CAT_Z:
++ | mov L:RB, L:CARG1d
++ |.else
++ | lea RA, [BASE+RC*8]
++ | sub RC, RB
++ | mov ARG2, RA
++ | mov ARG3, RC
++ |->BC_CAT_Z:
++ | mov L:RB, SAVE_L
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
++ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
++ | mov BASE, L:RB->base
++ | test RC, RC
++ | jnz ->vmeta_binop
++ | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
++ | movzx RA, PC_RA
++ |.if X64
++ | mov RCa, [BASE+RB*8]
++ | mov [BASE+RA*8], RCa
++ |.else
++ | mov RC, [BASE+RB*8+4]
++ | mov RB, [BASE+RB*8]
++ | mov [BASE+RA*8+4], RC
++ | mov [BASE+RA*8], RB
++ |.endif
++ | ins_next
++ break;
++
++ /* -- Constant ops ------------------------------------------------------ */
++
++ case BC_KSTR:
++ | ins_AND // RA = dst, RD = str const (~)
++ | mov RD, [KBASE+RD*4]
++ | mov dword [BASE+RA*8+4], LJ_TSTR
++ | mov [BASE+RA*8], RD
++ | ins_next
++ break;
++ case BC_KCDATA:
++ |.if FFI
++ | ins_AND // RA = dst, RD = cdata const (~)
++ | mov RD, [KBASE+RD*4]
++ | mov dword [BASE+RA*8+4], LJ_TCDATA
++ | mov [BASE+RA*8], RD
++ | ins_next
++ |.endif
++ break;
++ case BC_KSHORT:
++ | ins_AD // RA = dst, RD = signed int16 literal
++ |.if DUALNUM
++ | movsx RD, RDW
++ | mov dword [BASE+RA*8+4], LJ_TISNUM
++ | mov dword [BASE+RA*8], RD
++ |.else
++ | movsx RD, RDW // Sign-extend literal.
++ | cvtsi2sd xmm0, RD
++ | movsd qword [BASE+RA*8], xmm0
++ |.endif
++ | ins_next
++ break;
++ case BC_KNUM:
++ | ins_AD // RA = dst, RD = num const
++ | movsd xmm0, qword [KBASE+RD*8]
++ | movsd qword [BASE+RA*8], xmm0
++ | ins_next
++ break;
++ case BC_KPRI:
++ | ins_AND // RA = dst, RD = primitive type (~)
++ | mov [BASE+RA*8+4], RD
++ | ins_next
++ break;
++ case BC_KNIL:
++ | ins_AD // RA = dst_start, RD = dst_end
++ | lea RA, [BASE+RA*8+12]
++ | lea RD, [BASE+RD*8+4]
++ | mov RB, LJ_TNIL
++ | mov [RA-8], RB // Sets minimum 2 slots.
++ |1:
++ | mov [RA], RB
++ | add RA, 8
++ | cmp RA, RD
++ | jbe <1
++ | ins_next
++ break;
++
++ /* -- Upvalue and function ops ------------------------------------------ */
++
++ case BC_UGET:
++ | ins_AD // RA = dst, RD = upvalue #
++ | mov LFUNC:RB, [BASE-8]
++ | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
++ | mov RB, UPVAL:RB->v
++ |.if X64
++ | mov RDa, [RB]
++ | mov [BASE+RA*8], RDa
++ |.else
++ | mov RD, [RB+4]
++ | mov RB, [RB]
++ | mov [BASE+RA*8+4], RD
++ | mov [BASE+RA*8], RB
++ |.endif
++ | ins_next
++ break;
++ case BC_USETV:
++#define TV2MARKOFS \
++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
++ | ins_AD // RA = upvalue #, RD = src
++ | mov LFUNC:RB, [BASE-8]
++ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
++ | cmp byte UPVAL:RB->closed, 0
++ | mov RB, UPVAL:RB->v
++ | mov RA, [BASE+RD*8]
++ | mov RD, [BASE+RD*8+4]
++ | mov [RB], RA
++ | mov [RB+4], RD
++ | jz >1
++ | // Check barrier for closed upvalue.
++ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
++ | jnz >2
++ |1:
++ | ins_next
++ |
++ |2: // Upvalue is black. Check if new value is collectable and white.
++ | sub RD, LJ_TISGCV
++ | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
++ | jbe <1
++ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
++ | jz <1
++ | // Crossed a write barrier. Move the barrier forward.
++ |.if X64 and not X64WIN
++ | mov FCARG2, RB
++ | mov RB, BASE // Save BASE.
++ |.else
++ | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
++ |.endif
++ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
++ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
++ | mov BASE, RB // Restore BASE.
++ | jmp <1
++ break;
++#undef TV2MARKOFS
++ case BC_USETS:
++ | ins_AND // RA = upvalue #, RD = str const (~)
++ | mov LFUNC:RB, [BASE-8]
++ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
++ | mov GCOBJ:RA, [KBASE+RD*4]
++ | mov RD, UPVAL:RB->v
++ | mov [RD], GCOBJ:RA
++ | mov dword [RD+4], LJ_TSTR
++ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
++ | jnz >2
++ |1:
++ | ins_next
++ |
++ |2: // Check if string is white and ensure upvalue is closed.
++ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
++ | jz <1
++ | cmp byte UPVAL:RB->closed, 0
++ | jz <1
++ | // Crossed a write barrier. Move the barrier forward.
++ | mov RB, BASE // Save BASE (FCARG2 == BASE).
++ | mov FCARG2, RD
++ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
++ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
++ | mov BASE, RB // Restore BASE.
++ | jmp <1
++ break;
++ case BC_USETN:
++ | ins_AD // RA = upvalue #, RD = num const
++ | mov LFUNC:RB, [BASE-8]
++ | movsd xmm0, qword [KBASE+RD*8]
++ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
++ | mov RA, UPVAL:RB->v
++ | movsd qword [RA], xmm0
++ | ins_next
++ break;
++ case BC_USETP:
++ | ins_AND // RA = upvalue #, RD = primitive type (~)
++ | mov LFUNC:RB, [BASE-8]
++ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
++ | mov RA, UPVAL:RB->v
++ | mov [RA+4], RD
++ | ins_next
++ break;
++ case BC_UCLO:
++ | ins_AD // RA = level, RD = target
++ | branchPC RD // Do this first to free RD.
++ | mov L:RB, SAVE_L
++ | cmp dword L:RB->openupval, 0
++ | je >1
++ | mov L:RB->base, BASE
++ | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
++ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
++ | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
++ | mov BASE, L:RB->base
++ |1:
++ | ins_next
++ break;
++
++ case BC_FNEW:
++ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
++ |.if X64
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
++ | mov CARG3d, [BASE-8]
++ | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
++ | mov CARG1d, L:RB
++ |.else
++ | mov LFUNC:RA, [BASE-8]
++ | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
++ | mov L:RB, SAVE_L
++ | mov ARG3, LFUNC:RA
++ | mov ARG2, PROTO:RD
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
++ | call extern lj_func_newL_gc
++ | // GCfuncL * returned in eax (RC).
++ | mov BASE, L:RB->base
++ | movzx RA, PC_RA
++ | mov [BASE+RA*8], LFUNC:RC
++ | mov dword [BASE+RA*8+4], LJ_TFUNC
++ | ins_next
++ break;
++
++ /* -- Table ops --------------------------------------------------------- */
++
++ case BC_TNEW:
++ | ins_AD // RA = dst, RD = hbits|asize
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
++ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
++ | mov SAVE_PC, PC
++ | jae >5
++ |1:
++ |.if X64
++ | mov CARG3d, RD
++ | and RD, 0x7ff
++ | shr CARG3d, 11
++ |.else
++ | mov RA, RD
++ | and RD, 0x7ff
++ | shr RA, 11
++ | mov ARG3, RA
++ |.endif
++ | cmp RD, 0x7ff
++ | je >3
++ |2:
++ |.if X64
++ | mov L:CARG1d, L:RB
++ | mov CARG2d, RD
++ |.else
++ | mov ARG1, L:RB
++ | mov ARG2, RD
++ |.endif
++ | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
++ | // Table * returned in eax (RC).
++ | mov BASE, L:RB->base
++ | movzx RA, PC_RA
++ | mov [BASE+RA*8], TAB:RC
++ | mov dword [BASE+RA*8+4], LJ_TTAB
++ | ins_next
++ |3: // Turn 0x7ff into 0x801.
++ | mov RD, 0x801
++ | jmp <2
++ |5:
++ | mov L:FCARG1, L:RB
++ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
++ | movzx RD, PC_RD
++ | jmp <1
++ break;
++ case BC_TDUP:
++ | ins_AND // RA = dst, RD = table const (~) (holding template table)
++ | mov L:RB, SAVE_L
++ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
++ | mov SAVE_PC, PC
++ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
++ | mov L:RB->base, BASE
++ | jae >3
++ |2:
++ | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
++ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
++ | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
++ | // Table * returned in eax (RC).
++ | mov BASE, L:RB->base
++ | movzx RA, PC_RA
++ | mov [BASE+RA*8], TAB:RC
++ | mov dword [BASE+RA*8+4], LJ_TTAB
++ | ins_next
++ |3:
++ | mov L:FCARG1, L:RB
++ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
++ | movzx RD, PC_RD // Need to reload RD.
++ | not RDa
++ | jmp <2
++ break;
++
++ case BC_GGET:
++ | ins_AND // RA = dst, RD = str const (~)
++ | mov LFUNC:RB, [BASE-8]
++ | mov TAB:RB, LFUNC:RB->env
++ | mov STR:RC, [KBASE+RD*4]
++ | jmp ->BC_TGETS_Z
++ break;
++ case BC_GSET:
++ | ins_AND // RA = src, RD = str const (~)
++ | mov LFUNC:RB, [BASE-8]
++ | mov TAB:RB, LFUNC:RB->env
++ | mov STR:RC, [KBASE+RD*4]
++ | jmp ->BC_TSETS_Z
++ break;
++
++ case BC_TGETV:
++ | ins_ABC // RA = dst, RB = table, RC = key
++ | checktab RB, ->vmeta_tgetv
++ | mov TAB:RB, [BASE+RB*8]
++ |
++ | // Integer key?
++ |.if DUALNUM
++ | checkint RC, >5
++ | mov RC, dword [BASE+RC*8]
++ |.else
++ | // Convert number to int and back and compare.
++ | checknum RC, >5
++ | movsd xmm0, qword [BASE+RC*8]
++ | cvttsd2si RC, xmm0
++ | cvtsi2sd xmm1, RC
++ | ucomisd xmm0, xmm1
++ | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
++ |.endif
++ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
++ | jae ->vmeta_tgetv // Not in array part? Use fallback.
++ | shl RC, 3
++ | add RC, TAB:RB->array
++ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
++ | je >2
++ | // Get array slot.
++ |.if X64
++ | mov RBa, [RC]
++ | mov [BASE+RA*8], RBa
++ |.else
++ | mov RB, [RC]
++ | mov RC, [RC+4]
++ | mov [BASE+RA*8], RB
++ | mov [BASE+RA*8+4], RC
++ |.endif
++ |1:
++ | ins_next
++ |
++ |2: // Check for __index if table value is nil.
++ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
++ | jz >3
++ | mov TAB:RA, TAB:RB->metatable
++ | test byte TAB:RA->nomm, 1<<MM_index
++ | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
++ | movzx RA, PC_RA // Restore RA.
++ |3:
++ | mov dword [BASE+RA*8+4], LJ_TNIL
++ | jmp <1
++ |
++ |5: // String key?
++ | checkstr RC, ->vmeta_tgetv
++ | mov STR:RC, [BASE+RC*8]
++ | jmp ->BC_TGETS_Z
++ break;
++ case BC_TGETS:
++ | ins_ABC // RA = dst, RB = table, RC = str const (~)
++ | not RCa
++ | mov STR:RC, [KBASE+RC*4]
++ | checktab RB, ->vmeta_tgets
++ | mov TAB:RB, [BASE+RB*8]
++ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
++ | mov RA, TAB:RB->hmask
++ | and RA, STR:RC->hash
++ | imul RA, #NODE
++ | add NODE:RA, TAB:RB->node
++ |1:
++ | cmp dword NODE:RA->key.it, LJ_TSTR
++ | jne >4
++ | cmp dword NODE:RA->key.gcr, STR:RC
++ | jne >4
++ | // Ok, key found. Assumes: offsetof(Node, val) == 0
++ | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
++ | je >5 // Key found, but nil value?
++ | movzx RC, PC_RA
++ | // Get node value.
++ |.if X64
++ | mov RBa, [RA]
++ | mov [BASE+RC*8], RBa
++ |.else
++ | mov RB, [RA]
++ | mov RA, [RA+4]
++ | mov [BASE+RC*8], RB
++ | mov [BASE+RC*8+4], RA
++ |.endif
++ |2:
++ | ins_next
++ |
++ |3:
++ | movzx RC, PC_RA
++ | mov dword [BASE+RC*8+4], LJ_TNIL
++ | jmp <2
++ |
++ |4: // Follow hash chain.
++ | mov NODE:RA, NODE:RA->next
++ | test NODE:RA, NODE:RA
++ | jnz <1
++ | // End of hash chain: key not found, nil result.
++ |
++ |5: // Check for __index if table value is nil.
++ | mov TAB:RA, TAB:RB->metatable
++ | test TAB:RA, TAB:RA
++ | jz <3 // No metatable: done.
++ | test byte TAB:RA->nomm, 1<<MM_index
++ | jnz <3 // 'no __index' flag set: done.
++ | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
++ break;
++ case BC_TGETB:
++ | ins_ABC // RA = dst, RB = table, RC = byte literal
++ | checktab RB, ->vmeta_tgetb
++ | mov TAB:RB, [BASE+RB*8]
++ | cmp RC, TAB:RB->asize
++ | jae ->vmeta_tgetb
++ | shl RC, 3
++ | add RC, TAB:RB->array
++ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
++ | je >2
++ | // Get array slot.
++ |.if X64
++ | mov RBa, [RC]
++ | mov [BASE+RA*8], RBa
++ |.else
++ | mov RB, [RC]
++ | mov RC, [RC+4]
++ | mov [BASE+RA*8], RB
++ | mov [BASE+RA*8+4], RC
++ |.endif
++ |1:
++ | ins_next
++ |
++ |2: // Check for __index if table value is nil.
++ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
++ | jz >3
++ | mov TAB:RA, TAB:RB->metatable
++ | test byte TAB:RA->nomm, 1<<MM_index
++ | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
++ | movzx RA, PC_RA // Restore RA.
++ |3:
++ | mov dword [BASE+RA*8+4], LJ_TNIL
++ | jmp <1
++ break;
++ case BC_TGETR:
++ | ins_ABC // RA = dst, RB = table, RC = key
++ | mov TAB:RB, [BASE+RB*8]
++ |.if DUALNUM
++ | mov RC, dword [BASE+RC*8]
++ |.else
++ | cvttsd2si RC, qword [BASE+RC*8]
++ |.endif
++ | cmp RC, TAB:RB->asize
++ | jae ->vmeta_tgetr // Not in array part? Use fallback.
++ | shl RC, 3
++ | add RC, TAB:RB->array
++ | // Get array slot.
++ |->BC_TGETR_Z:
++ |.if X64
++ | mov RBa, [RC]
++ | mov [BASE+RA*8], RBa
++ |.else
++ | mov RB, [RC]
++ | mov RC, [RC+4]
++ | mov [BASE+RA*8], RB
++ | mov [BASE+RA*8+4], RC
++ |.endif
++ |->BC_TGETR2_Z:
++ | ins_next
++ break;
++
++ case BC_TSETV:
++ | ins_ABC // RA = src, RB = table, RC = key
++ | checktab RB, ->vmeta_tsetv
++ | mov TAB:RB, [BASE+RB*8]
++ |
++ | // Integer key?
++ |.if DUALNUM
++ | checkint RC, >5
++ | mov RC, dword [BASE+RC*8]
++ |.else
++ | // Convert number to int and back and compare.
++ | checknum RC, >5
++ | movsd xmm0, qword [BASE+RC*8]
++ | cvttsd2si RC, xmm0
++ | cvtsi2sd xmm1, RC
++ | ucomisd xmm0, xmm1
++ | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
++ |.endif
++ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
++ | jae ->vmeta_tsetv
++ | shl RC, 3
++ | add RC, TAB:RB->array
++ | cmp dword [RC+4], LJ_TNIL
++ | je >3 // Previous value is nil?
++ |1:
++ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
++ | jnz >7
++ |2: // Set array slot.
++ |.if X64
++ | mov RBa, [BASE+RA*8]
++ | mov [RC], RBa
++ |.else
++ | mov RB, [BASE+RA*8+4]
++ | mov RA, [BASE+RA*8]
++ | mov [RC+4], RB
++ | mov [RC], RA
++ |.endif
++ | ins_next
++ |
++ |3: // Check for __newindex if previous value is nil.
++ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
++ | jz <1
++ | mov TAB:RA, TAB:RB->metatable
++ | test byte TAB:RA->nomm, 1<<MM_newindex
++ | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
++ | movzx RA, PC_RA // Restore RA.
++ | jmp <1
++ |
++ |5: // String key?
++ | checkstr RC, ->vmeta_tsetv
++ | mov STR:RC, [BASE+RC*8]
++ | jmp ->BC_TSETS_Z
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, RA
++ | movzx RA, PC_RA // Restore RA.
++ | jmp <2
++ break;
++ case BC_TSETS:
++ | ins_ABC // RA = src, RB = table, RC = str const (~)
++ | not RCa
++ | mov STR:RC, [KBASE+RC*4]
++ | checktab RB, ->vmeta_tsets
++ | mov TAB:RB, [BASE+RB*8]
++ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
++ | mov RA, TAB:RB->hmask
++ | and RA, STR:RC->hash
++ | imul RA, #NODE
++ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
++ | add NODE:RA, TAB:RB->node
++ |1:
++ | cmp dword NODE:RA->key.it, LJ_TSTR
++ | jne >5
++ | cmp dword NODE:RA->key.gcr, STR:RC
++ | jne >5
++ | // Ok, key found. Assumes: offsetof(Node, val) == 0
++ | cmp dword [RA+4], LJ_TNIL
++ | je >4 // Previous value is nil?
++ |2:
++ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
++ | jnz >7
++ |3: // Set node value.
++ | movzx RC, PC_RA
++ |.if X64
++ | mov RBa, [BASE+RC*8]
++ | mov [RA], RBa
++ |.else
++ | mov RB, [BASE+RC*8+4]
++ | mov RC, [BASE+RC*8]
++ | mov [RA+4], RB
++ | mov [RA], RC
++ |.endif
++ | ins_next
++ |
++ |4: // Check for __newindex if previous value is nil.
++ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
++ | jz <2
++ | mov TMP1, RA // Save RA.
++ | mov TAB:RA, TAB:RB->metatable
++ | test byte TAB:RA->nomm, 1<<MM_newindex
++ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
++ | mov RA, TMP1 // Restore RA.
++ | jmp <2
++ |
++ |5: // Follow hash chain.
++ | mov NODE:RA, NODE:RA->next
++ | test NODE:RA, NODE:RA
++ | jnz <1
++ | // End of hash chain: key not found, add a new one.
++ |
++ | // But check for __newindex first.
++ | mov TAB:RA, TAB:RB->metatable
++ | test TAB:RA, TAB:RA
++ | jz >6 // No metatable: continue.
++ | test byte TAB:RA->nomm, 1<<MM_newindex
++ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
++ |6:
++ | mov TMP1, STR:RC
++ | mov TMP2, LJ_TSTR
++ | mov TMP3, TAB:RB // Save TAB:RB for us.
++ |.if X64
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE
++ | lea CARG3, TMP1
++ | mov CARG2d, TAB:RB
++ | mov L:RB, L:CARG1d
++ |.else
++ | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
++ | mov ARG2, TAB:RB
++ | mov L:RB, SAVE_L
++ | mov ARG3, RC
++ | mov ARG1, L:RB
++ | mov L:RB->base, BASE
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
++ | // Handles write barrier for the new key. TValue * returned in eax (RC).
++ | mov BASE, L:RB->base
++ | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
++ | mov RA, eax
++ | jmp <2 // Must check write barrier for value.
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, RC // Destroys STR:RC.
++ | jmp <3
++ break;
++ case BC_TSETB:
++ | ins_ABC // RA = src, RB = table, RC = byte literal
++ | checktab RB, ->vmeta_tsetb
++ | mov TAB:RB, [BASE+RB*8]
++ | cmp RC, TAB:RB->asize
++ | jae ->vmeta_tsetb
++ | shl RC, 3
++ | add RC, TAB:RB->array
++ | cmp dword [RC+4], LJ_TNIL
++ | je >3 // Previous value is nil?
++ |1:
++ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
++ | jnz >7
++ |2: // Set array slot.
++ |.if X64
++ | mov RAa, [BASE+RA*8]
++ | mov [RC], RAa
++ |.else
++ | mov RB, [BASE+RA*8+4]
++ | mov RA, [BASE+RA*8]
++ | mov [RC+4], RB
++ | mov [RC], RA
++ |.endif
++ | ins_next
++ |
++ |3: // Check for __newindex if previous value is nil.
++ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
++ | jz <1
++ | mov TAB:RA, TAB:RB->metatable
++ | test byte TAB:RA->nomm, 1<<MM_newindex
++ | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
++ | movzx RA, PC_RA // Restore RA.
++ | jmp <1
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, RA
++ | movzx RA, PC_RA // Restore RA.
++ | jmp <2
++ break;
++ case BC_TSETR:
++ | ins_ABC // RA = src, RB = table, RC = key
++ | mov TAB:RB, [BASE+RB*8]
++ |.if DUALNUM
++ | mov RC, dword [BASE+RC*8]
++ |.else
++ | cvttsd2si RC, qword [BASE+RC*8]
++ |.endif
++ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
++ | jnz >7
++ |2:
++ | cmp RC, TAB:RB->asize
++ | jae ->vmeta_tsetr
++ | shl RC, 3
++ | add RC, TAB:RB->array
++ | // Set array slot.
++ |->BC_TSETR_Z:
++ |.if X64
++ | mov RBa, [BASE+RA*8]
++ | mov [RC], RBa
++ |.else
++ | mov RB, [BASE+RA*8+4]
++ | mov RA, [BASE+RA*8]
++ | mov [RC+4], RB
++ | mov [RC], RA
++ |.endif
++ | ins_next
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, RA
++ | movzx RA, PC_RA // Restore RA.
++ | jmp <2
++ break;
++
++ case BC_TSETM:
++ | ins_AD // RA = base (table at base-1), RD = num const (start index)
++ | mov TMP1, KBASE // Need one more free register.
++ | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word.
++ |1:
++ | lea RA, [BASE+RA*8]
++ | mov TAB:RB, [RA-8] // Guaranteed to be a table.
++ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
++ | jnz >7
++ |2:
++ | mov RD, MULTRES
++ | sub RD, 1
++ | jz >4 // Nothing to copy?
++ | add RD, KBASE // Compute needed size.
++ | cmp RD, TAB:RB->asize
++ | ja >5 // Doesn't fit into array part?
++ | sub RD, KBASE
++ | shl KBASE, 3
++ | add KBASE, TAB:RB->array
++ |3: // Copy result slots to table.
++ |.if X64
++ | mov RBa, [RA]
++ | add RA, 8
++ | mov [KBASE], RBa
++ |.else
++ | mov RB, [RA]
++ | mov [KBASE], RB
++ | mov RB, [RA+4]
++ | add RA, 8
++ | mov [KBASE+4], RB
++ |.endif
++ | add KBASE, 8
++ | sub RD, 1
++ | jnz <3
++ |4:
++ | mov KBASE, TMP1
++ | ins_next
++ |
++ |5: // Need to resize array part.
++ |.if X64
++ | mov L:CARG1d, SAVE_L
++ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
++ | mov CARG2d, TAB:RB
++ | mov CARG3d, RD
++ | mov L:RB, L:CARG1d
++ |.else
++ | mov ARG2, TAB:RB
++ | mov L:RB, SAVE_L
++ | mov L:RB->base, BASE
++ | mov ARG3, RD
++ | mov ARG1, L:RB
++ |.endif
++ | mov SAVE_PC, PC
++ | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
++ | mov BASE, L:RB->base
++ | movzx RA, PC_RA // Restore RA.
++ | jmp <1 // Retry.
++ |
++ |7: // Possible table write barrier for any value. Skip valiswhite check.
++ | barrierback TAB:RB, RD
++ | jmp <2
++ break;
++
++ /* -- Calls and vararg handling ----------------------------------------- */
++
++ case BC_CALL: case BC_CALLM:
++ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
++ if (op == BC_CALLM) {
++ | add NARGS:RD, MULTRES
++ }
++ | cmp dword [BASE+RA*8+4], LJ_TFUNC
++ | mov LFUNC:RB, [BASE+RA*8]
++ | jne ->vmeta_call_ra
++ | lea BASE, [BASE+RA*8+8]
++ | ins_call
++ break;
++
++ case BC_CALLMT:
++ | ins_AD // RA = base, RD = extra_nargs
++ | add NARGS:RD, MULTRES
++ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
++ break;
++ case BC_CALLT:
++ | ins_AD // RA = base, RD = nargs+1
++ | lea RA, [BASE+RA*8+8]
++ | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
++ | mov LFUNC:RB, [RA-8]
++ | cmp dword [RA-4], LJ_TFUNC
++ | jne ->vmeta_call
++ |->BC_CALLT_Z:
++ | mov PC, [BASE-4]
++ | test PC, FRAME_TYPE
++ | jnz >7
++ |1:
++ | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
++ | mov MULTRES, NARGS:RD
++ | sub NARGS:RD, 1
++ | jz >3
++ |2: // Move args down.
++ |.if X64
++ | mov RBa, [RA]
++ | add RA, 8
++ | mov [KBASE], RBa
++ |.else
++ | mov RB, [RA]
++ | mov [KBASE], RB
++ | mov RB, [RA+4]
++ | add RA, 8
++ | mov [KBASE+4], RB
++ |.endif
++ | add KBASE, 8
++ | sub NARGS:RD, 1
++ | jnz <2
++ |
++ | mov LFUNC:RB, [BASE-8]
++ |3:
++ | mov NARGS:RD, MULTRES
++ | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
++ | ja >5
++ |4:
++ | ins_callt
++ |
++ |5: // Tailcall to a fast function.
++ | test PC, FRAME_TYPE // Lua frame below?
++ | jnz <4
++ | movzx RA, PC_RA
++ | not RAa
++ | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE.
++ | mov KBASE, LFUNC:KBASE->pc
++ | mov KBASE, [KBASE+PC2PROTO(k)]
++ | jmp <4
++ |
++ |7: // Tailcall from a vararg function.
++ | sub PC, FRAME_VARG
++ | test PC, FRAME_TYPEP
++ | jnz >8 // Vararg frame below?
++ | sub BASE, PC // Need to relocate BASE/KBASE down.
++ | mov KBASE, BASE
++ | mov PC, [BASE-4]
++ | jmp <1
++ |8:
++ | add PC, FRAME_VARG
++ | jmp <1
++ break;
++
++ case BC_ITERC:
++ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
++ | lea RA, [BASE+RA*8+8] // fb = base+1
++ |.if X64
++ | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3].
++ | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2].
++ | mov [RA], RBa
++ | mov [RA+8], RCa
++ |.else
++ | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
++ | mov RC, [RA-20]
++ | mov [RA], RB
++ | mov [RA+4], RC
++ | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
++ | mov RC, [RA-12]
++ | mov [RA+8], RB
++ | mov [RA+12], RC
++ |.endif
++ | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
++ | mov RC, [RA-28]
++ | mov [RA-8], LFUNC:RB
++ | mov [RA-4], RC
++ | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
++ | mov NARGS:RD, 2+1
++ | jne ->vmeta_call
++ | mov BASE, RA
++ | ins_call
++ break;
++
++ case BC_ITERN:
++ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
++ |.if JIT
++ | // NYI: add hotloop, record BC_ITERN.
++ |.endif
++ | mov TMP1, KBASE // Need two more free registers.
++ | mov TMP2, DISPATCH
++ | mov TAB:RB, [BASE+RA*8-16]
++ | mov RC, [BASE+RA*8-8] // Get index from control var.
++ | mov DISPATCH, TAB:RB->asize
++ | add PC, 4
++ | mov KBASE, TAB:RB->array
++ |1: // Traverse array part.
++ | cmp RC, DISPATCH; jae >5 // Index points after array part?
++ | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
++ |.if DUALNUM
++ | mov dword [BASE+RA*8+4], LJ_TISNUM
++ | mov dword [BASE+RA*8], RC
++ |.else
++ | cvtsi2sd xmm0, RC
++ |.endif
++ | // Copy array slot to returned value.
++ |.if X64
++ | mov RBa, [KBASE+RC*8]
++ | mov [BASE+RA*8+8], RBa
++ |.else
++ | mov RB, [KBASE+RC*8+4]
++ | mov [BASE+RA*8+12], RB
++ | mov RB, [KBASE+RC*8]
++ | mov [BASE+RA*8+8], RB
++ |.endif
++ | add RC, 1
++ | // Return array index as a numeric key.
++ |.if DUALNUM
++ | // See above.
++ |.else
++ | movsd qword [BASE+RA*8], xmm0
++ |.endif
++ | mov [BASE+RA*8-8], RC // Update control var.
++ |2:
++ | movzx RD, PC_RD // Get target from ITERL.
++ | branchPC RD
++ |3:
++ | mov DISPATCH, TMP2
++ | mov KBASE, TMP1
++ | ins_next
++ |
++ |4: // Skip holes in array part.
++ | add RC, 1
++ | jmp <1
++ |
++ |5: // Traverse hash part.
++ | sub RC, DISPATCH
++ |6:
++ | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
++ | imul KBASE, RC, #NODE
++ | add NODE:KBASE, TAB:RB->node
++ | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
++ | lea DISPATCH, [RC+DISPATCH+1]
++ | // Copy key and value from hash slot.
++ |.if X64
++ | mov RBa, NODE:KBASE->key
++ | mov RCa, NODE:KBASE->val
++ | mov [BASE+RA*8], RBa
++ | mov [BASE+RA*8+8], RCa
++ |.else
++ | mov RB, NODE:KBASE->key.gcr
++ | mov RC, NODE:KBASE->key.it
++ | mov [BASE+RA*8], RB
++ | mov [BASE+RA*8+4], RC
++ | mov RB, NODE:KBASE->val.gcr
++ | mov RC, NODE:KBASE->val.it
++ | mov [BASE+RA*8+8], RB
++ | mov [BASE+RA*8+12], RC
++ |.endif
++ | mov [BASE+RA*8-8], DISPATCH
++ | jmp <2
++ |
++ |7: // Skip holes in hash part.
++ | add RC, 1
++ | jmp <6
++ break;
++
++ case BC_ISNEXT:
++ | ins_AD // RA = base, RD = target (points to ITERN)
++ | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
++ | mov CFUNC:RB, [BASE+RA*8-24]
++ | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
++ | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
++ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
++ | branchPC RD
++ | mov dword [BASE+RA*8-8], 0 // Initialize control var.
++ | mov dword [BASE+RA*8-4], 0xfffe7fff
++ |1:
++ | ins_next
++ |5: // Despecialize bytecode if any of the checks fail.
++ | mov PC_OP, BC_JMP
++ | branchPC RD
++ | mov byte [PC], BC_ITERC
++ | jmp <1
++ break;
++
++ case BC_VARG:
++ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
++ | mov TMP1, KBASE // Need one more free register.
++ | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
++ | lea RA, [BASE+RA*8]
++ | sub KBASE, [BASE-4]
++ | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
++ | test RB, RB
++ | jz >5 // Copy all varargs?
++ | lea RB, [RA+RB*8-8]
++ | cmp KBASE, BASE // No vararg slots?
++ | jnb >2
++ |1: // Copy vararg slots to destination slots.
++ |.if X64
++ | mov RCa, [KBASE-8]
++ | add KBASE, 8
++ | mov [RA], RCa
++ |.else
++ | mov RC, [KBASE-8]
++ | mov [RA], RC
++ | mov RC, [KBASE-4]
++ | add KBASE, 8
++ | mov [RA+4], RC
++ |.endif
++ | add RA, 8
++ | cmp RA, RB // All destination slots filled?
++ | jnb >3
++ | cmp KBASE, BASE // No more vararg slots?
++ | jb <1
++ |2: // Fill up remainder with nil.
++ | mov dword [RA+4], LJ_TNIL
++ | add RA, 8
++ | cmp RA, RB
++ | jb <2
++ |3:
++ | mov KBASE, TMP1
++ | ins_next
++ |
++ |5: // Copy all varargs.
++ | mov MULTRES, 1 // MULTRES = 0+1
++ | mov RC, BASE
++ | sub RC, KBASE
++ | jbe <3 // No vararg slots?
++ | mov RB, RC
++ | shr RB, 3
++ | add RB, 1
++ | mov MULTRES, RB // MULTRES = #varargs+1
++ | mov L:RB, SAVE_L
++ | add RC, RA
++ | cmp RC, L:RB->maxstack
++ | ja >7 // Need to grow stack?
++ |6: // Copy all vararg slots.
++ |.if X64
++ | mov RCa, [KBASE-8]
++ | add KBASE, 8
++ | mov [RA], RCa
++ |.else
++ | mov RC, [KBASE-8]
++ | mov [RA], RC
++ | mov RC, [KBASE-4]
++ | add KBASE, 8
++ | mov [RA+4], RC
++ |.endif
++ | add RA, 8
++ | cmp KBASE, BASE // No more vararg slots?
++ | jb <6
++ | jmp <3
++ |
++ |7: // Grow stack for varargs.
++ | mov L:RB->base, BASE
++ | mov L:RB->top, RA
++ | mov SAVE_PC, PC
++ | sub KBASE, BASE // Need delta, because BASE may change.
++ | mov FCARG2, MULTRES
++ | sub FCARG2, 1
++ | mov FCARG1, L:RB
++ | call extern lj_state_growstack@8 // (lua_State *L, int n)
++ | mov BASE, L:RB->base
++ | mov RA, L:RB->top
++ | add KBASE, BASE
++ | jmp <6
++ break;
++
++ /* -- Returns ----------------------------------------------------------- */
++
++ case BC_RETM:
++ | ins_AD // RA = results, RD = extra_nresults
++ | add RD, MULTRES // MULTRES >=1, so RD >=1.
++ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
++ break;
++
++ case BC_RET: case BC_RET0: case BC_RET1:
++ | ins_AD // RA = results, RD = nresults+1
++ if (op != BC_RET0) {
++ | shl RA, 3
++ }
++ |1:
++ | mov PC, [BASE-4]
++ | mov MULTRES, RD // Save nresults+1.
++ | test PC, FRAME_TYPE // Check frame type marker.
++ | jnz >7 // Not returning to a fixarg Lua func?
++ switch (op) {
++ case BC_RET:
++ |->BC_RET_Z:
++ | mov KBASE, BASE // Use KBASE for result move.
++ | sub RD, 1
++ | jz >3
++ |2: // Move results down.
++ |.if X64
++ | mov RBa, [KBASE+RA]
++ | mov [KBASE-8], RBa
++ |.else
++ | mov RB, [KBASE+RA]
++ | mov [KBASE-8], RB
++ | mov RB, [KBASE+RA+4]
++ | mov [KBASE-4], RB
++ |.endif
++ | add KBASE, 8
++ | sub RD, 1
++ | jnz <2
++ |3:
++ | mov RD, MULTRES // Note: MULTRES may be >255.
++ | movzx RB, PC_RB // So cannot compare with RDL!
++ |5:
++ | cmp RB, RD // More results expected?
++ | ja >6
++ break;
++ case BC_RET1:
++ |.if X64
++ | mov RBa, [BASE+RA]
++ | mov [BASE-8], RBa
++ |.else
++ | mov RB, [BASE+RA+4]
++ | mov [BASE-4], RB
++ | mov RB, [BASE+RA]
++ | mov [BASE-8], RB
++ |.endif
++ /* fallthrough */
++ case BC_RET0:
++ |5:
++ | cmp PC_RB, RDL // More results expected?
++ | ja >6
++ default:
++ break;
++ }
++ | movzx RA, PC_RA
++ | not RAa // Note: ~RA = -(RA+1)
++ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
++ | mov LFUNC:KBASE, [BASE-8]
++ | mov KBASE, LFUNC:KBASE->pc
++ | mov KBASE, [KBASE+PC2PROTO(k)]
++ | ins_next
++ |
++ |6: // Fill up results with nil.
++ if (op == BC_RET) {
++ | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
++ | add KBASE, 8
++ } else {
++ | mov dword [BASE+RD*8-12], LJ_TNIL
++ }
++ | add RD, 1
++ | jmp <5
++ |
++ |7: // Non-standard return case.
++ | lea RB, [PC-FRAME_VARG]
++ | test RB, FRAME_TYPEP
++ | jnz ->vm_return
++ | // Return from vararg function: relocate BASE down and RA up.
++ | sub BASE, RB
++ if (op != BC_RET0) {
++ | add RA, RB
++ }
++ | jmp <1
++ break;
++
++ /* -- Loops and branches ------------------------------------------------ */
++
++ |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
++ |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
++ |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
++ |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
++
++ case BC_FORL:
++ |.if JIT
++ | hotloop RB
++ |.endif
++ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
++ break;
++
++ case BC_JFORI:
++ case BC_JFORL:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_FORI:
++ case BC_IFORL:
++ vk = (op == BC_IFORL || op == BC_JFORL);
++ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
++ | lea RA, [BASE+RA*8]
++ if (LJ_DUALNUM) {
++ | cmp FOR_TIDX, LJ_TISNUM; jne >9
++ if (!vk) {
++ | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
++ | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
++ | mov RB, dword FOR_IDX
++ | cmp dword FOR_STEP, 0; jl >5
++ } else {
++#ifdef LUA_USE_ASSERT
++ | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
++ | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
++#endif
++ | mov RB, dword FOR_STEP
++ | test RB, RB; js >5
++ | add RB, dword FOR_IDX; jo >1
++ | mov dword FOR_IDX, RB
++ }
++ | cmp RB, dword FOR_STOP
++ | mov FOR_TEXT, LJ_TISNUM
++ | mov dword FOR_EXT, RB
++ if (op == BC_FORI) {
++ | jle >7
++ |1:
++ |6:
++ | branchPC RD
++ } else if (op == BC_JFORI) {
++ | branchPC RD
++ | movzx RD, PC_RD
++ | jle =>BC_JLOOP
++ |1:
++ |6:
++ } else if (op == BC_IFORL) {
++ | jg >7
++ |6:
++ | branchPC RD
++ |1:
++ } else {
++ | jle =>BC_JLOOP
++ |1:
++ |6:
++ }
++ |7:
++ | ins_next
++ |
++ |5: // Invert check for negative step.
++ if (vk) {
++ | add RB, dword FOR_IDX; jo <1
++ | mov dword FOR_IDX, RB
++ }
++ | cmp RB, dword FOR_STOP
++ | mov FOR_TEXT, LJ_TISNUM
++ | mov dword FOR_EXT, RB
++ if (op == BC_FORI) {
++ | jge <7
++ } else if (op == BC_JFORI) {
++ | branchPC RD
++ | movzx RD, PC_RD
++ | jge =>BC_JLOOP
++ } else if (op == BC_IFORL) {
++ | jl <7
++ } else {
++ | jge =>BC_JLOOP
++ }
++ | jmp <6
++ |9: // Fallback to FP variant.
++ } else if (!vk) {
++ | cmp FOR_TIDX, LJ_TISNUM
++ }
++ if (!vk) {
++ | jae ->vmeta_for
++ | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
++ } else {
++#ifdef LUA_USE_ASSERT
++ | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
++ | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
++#endif
++ }
++ | mov RB, FOR_TSTEP // Load type/hiword of for step.
++ if (!vk) {
++ | cmp RB, LJ_TISNUM; jae ->vmeta_for
++ }
++ | movsd xmm0, qword FOR_IDX
++ | movsd xmm1, qword FOR_STOP
++ if (vk) {
++ | addsd xmm0, qword FOR_STEP
++ | movsd qword FOR_IDX, xmm0
++ | test RB, RB; js >3
++ } else {
++ | jl >3
++ }
++ | ucomisd xmm1, xmm0
++ |1:
++ | movsd qword FOR_EXT, xmm0
++ if (op == BC_FORI) {
++ |.if DUALNUM
++ | jnb <7
++ |.else
++ | jnb >2
++ | branchPC RD
++ |.endif
++ } else if (op == BC_JFORI) {
++ | branchPC RD
++ | movzx RD, PC_RD
++ | jnb =>BC_JLOOP
++ } else if (op == BC_IFORL) {
++ |.if DUALNUM
++ | jb <7
++ |.else
++ | jb >2
++ | branchPC RD
++ |.endif
++ } else {
++ | jnb =>BC_JLOOP
++ }
++ |.if DUALNUM
++ | jmp <6
++ |.else
++ |2:
++ | ins_next
++ |.endif
++ |
++ |3: // Invert comparison if step is negative.
++ | ucomisd xmm0, xmm1
++ | jmp <1
++ break;
++
++ case BC_ITERL:
++ |.if JIT
++ | hotloop RB
++ |.endif
++ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
++ break;
++
++ case BC_JITERL:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_IITERL:
++ | ins_AJ // RA = base, RD = target
++ | lea RA, [BASE+RA*8]
++ | mov RB, [RA+4]
++ | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
++ if (op == BC_JITERL) {
++ | mov [RA-4], RB
++ | mov RB, [RA]
++ | mov [RA-8], RB
++ | jmp =>BC_JLOOP
++ } else {
++ | branchPC RD // Otherwise save control var + branch.
++ | mov RD, [RA]
++ | mov [RA-4], RB
++ | mov [RA-8], RD
++ }
++ |1:
++ | ins_next
++ break;
++
++ case BC_LOOP:
++ | ins_A // RA = base, RD = target (loop extent)
++ | // Note: RA/RD is only used by trace recorder to determine scope/extent
++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
++ |.if JIT
++ | hotloop RB
++ |.endif
++ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
++ break;
++
++ case BC_ILOOP:
++ | ins_A // RA = base, RD = target (loop extent)
++ | ins_next
++ break;
++
++ case BC_JLOOP:
++ |.if JIT
++ | ins_AD // RA = base (ignored), RD = traceno
++ | mov RA, [DISPATCH+DISPATCH_J(trace)]
++ | mov TRACE:RD, [RA+RD*4]
++ | mov RDa, TRACE:RD->mcode
++ | mov L:RB, SAVE_L
++ | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
++ | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
++ | // Save additional callee-save registers only used in compiled code.
++ |.if X64WIN
++ | mov TMPQ, r12
++ | mov TMPa, r13
++ | mov CSAVE_4, r14
++ | mov CSAVE_3, r15
++ | mov RAa, rsp
++ | sub rsp, 9*16+4*8
++ | movdqa [RAa], xmm6
++ | movdqa [RAa-1*16], xmm7
++ | movdqa [RAa-2*16], xmm8
++ | movdqa [RAa-3*16], xmm9
++ | movdqa [RAa-4*16], xmm10
++ | movdqa [RAa-5*16], xmm11
++ | movdqa [RAa-6*16], xmm12
++ | movdqa [RAa-7*16], xmm13
++ | movdqa [RAa-8*16], xmm14
++ | movdqa [RAa-9*16], xmm15
++ |.elif X64
++ | mov TMPQ, r12
++ | mov TMPa, r13
++ | sub rsp, 16
++ |.endif
++ | jmp RDa
++ |.endif
++ break;
++
++ case BC_JMP:
++ | ins_AJ // RA = unused, RD = target
++ | branchPC RD
++ | ins_next
++ break;
++
++ /* -- Function headers -------------------------------------------------- */
++
++ /*
++ ** Reminder: A function may be called with func/args above L->maxstack,
++ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
++ ** too. This means all FUNC* ops (including fast functions) must check
++ ** for stack overflow _before_ adding more slots!
++ */
++
++ case BC_FUNCF:
++ |.if JIT
++ | hotcall RB
++ |.endif
++ case BC_FUNCV: /* NYI: compiled vararg functions. */
++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
++ break;
++
++ case BC_JFUNCF:
++#if !LJ_HASJIT
++ break;
++#endif
++ case BC_IFUNCF:
++ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
++ | mov KBASE, [PC-4+PC2PROTO(k)]
++ | mov L:RB, SAVE_L
++ | lea RA, [BASE+RA*8] // Top of frame.
++ | cmp RA, L:RB->maxstack
++ | ja ->vm_growstack_f
++ | movzx RA, byte [PC-4+PC2PROTO(numparams)]
++ | cmp NARGS:RD, RA // Check for missing parameters.
++ | jbe >3
++ |2:
++ if (op == BC_JFUNCF) {
++ | movzx RD, PC_RD
++ | jmp =>BC_JLOOP
++ } else {
++ | ins_next
++ }
++ |
++ |3: // Clear missing parameters.
++ | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
++ | add NARGS:RD, 1
++ | cmp NARGS:RD, RA
++ | jbe <3
++ | jmp <2
++ break;
++
++ case BC_JFUNCV:
++#if !LJ_HASJIT
++ break;
++#endif
++ | int3 // NYI: compiled vararg functions
++ break; /* NYI: compiled vararg functions. */
++
++ case BC_IFUNCV:
++ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
++ | lea RB, [NARGS:RD*8+FRAME_VARG]
++ | lea RD, [BASE+NARGS:RD*8]
++ | mov LFUNC:KBASE, [BASE-8]
++ | mov [RD-4], RB // Store delta + FRAME_VARG.
++ | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
++ | mov L:RB, SAVE_L
++ | lea RA, [RD+RA*8]
++ | cmp RA, L:RB->maxstack
++ | ja ->vm_growstack_v // Need to grow stack.
++ | mov RA, BASE
++ | mov BASE, RD
++ | movzx RB, byte [PC-4+PC2PROTO(numparams)]
++ | test RB, RB
++ | jz >2
++ |1: // Copy fixarg slots up to new frame.
++ | add RA, 8
++ | cmp RA, BASE
++ | jnb >3 // Less args than parameters?
++ | mov KBASE, [RA-8]
++ | mov [RD], KBASE
++ | mov KBASE, [RA-4]
++ | mov [RD+4], KBASE
++ | add RD, 8
++ | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
++ | sub RB, 1
++ | jnz <1
++ |2:
++ if (op == BC_JFUNCV) {
++ | movzx RD, PC_RD
++ | jmp =>BC_JLOOP
++ } else {
++ | mov KBASE, [PC-4+PC2PROTO(k)]
++ | ins_next
++ }
++ |
++ |3: // Clear missing parameters.
++ | mov dword [RD+4], LJ_TNIL
++ | add RD, 8
++ | sub RB, 1
++ | jnz <3
++ | jmp <2
++ break;
++
++ case BC_FUNCC:
++ case BC_FUNCCW:
++ | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
++ | mov CFUNC:RB, [BASE-8]
++ | mov KBASEa, CFUNC:RB->f
++ | mov L:RB, SAVE_L
++ | lea RD, [BASE+NARGS:RD*8-8]
++ | mov L:RB->base, BASE
++ | lea RA, [RD+8*LUA_MINSTACK]
++ | cmp RA, L:RB->maxstack
++ | mov L:RB->top, RD
++ if (op == BC_FUNCC) {
++ |.if X64
++ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
++ |.else
++ | mov ARG1, L:RB
++ |.endif
++ } else {
++ |.if X64
++ | mov CARG2, KBASEa
++ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
++ |.else
++ | mov ARG2, KBASEa
++ | mov ARG1, L:RB
++ |.endif
++ }
++ | ja ->vm_growstack_c // Need to grow stack.
++ | set_vmstate C
++ if (op == BC_FUNCC) {
++ | call KBASEa // (lua_State *L)
++ } else {
++ | // (lua_State *L, lua_CFunction f)
++ | call aword [DISPATCH+DISPATCH_GL(wrapf)]
++ }
++ | // nresults returned in eax (RD).
++ | mov BASE, L:RB->base
++ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
++ | set_vmstate INTERP
++ | lea RA, [BASE+RD*8]
++ | neg RA
++ | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
++ | mov PC, [BASE-4] // Fetch PC of caller.
++ | jmp ->vm_returnc
++ break;
++
++ /* ---------------------------------------------------------------------- */
++
++ default:
++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
++ exit(2);
++ break;
++ }
++}
++
++static int build_backend(BuildCtx *ctx)
++{
++ int op;
++ dasm_growpc(Dst, BC__MAX);
++ build_subroutines(ctx);
++ |.code_op
++ for (op = 0; op < BC__MAX; op++)
++ build_ins(ctx, (BCOp)op, op);
++ return BC__MAX;
++}
++
++/* Emit pseudo frame-info for all assembler functions. */
++static void emit_asm_debug(BuildCtx *ctx)
++{
++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
++#if LJ_64
++#define SZPTR "8"
++#define BSZPTR "3"
++#define REG_SP "0x7"
++#define REG_RA "0x10"
++#else
++#define SZPTR "4"
++#define BSZPTR "2"
++#define REG_SP "0x4"
++#define REG_RA "0x8"
++#endif
++ switch (ctx->mode) {
++ case BUILD_elfasm:
++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
++ fprintf(ctx->fp,
++ ".Lframe0:\n"
++ "\t.long .LECIE0-.LSCIE0\n"
++ ".LSCIE0:\n"
++ "\t.long 0xffffffff\n"
++ "\t.byte 0x1\n"
++ "\t.string \"\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -" SZPTR "\n"
++ "\t.byte " REG_RA "\n"
++ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
++ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
++ "\t.align " SZPTR "\n"
++ ".LECIE0:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE0:\n"
++ "\t.long .LEFDE0-.LASFDE0\n"
++ ".LASFDE0:\n"
++ "\t.long .Lframe0\n"
++#if LJ_64
++ "\t.quad .Lbegin\n"
++ "\t.quad %d\n"
++ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
++ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
++ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
++ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
++ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
++#if LJ_NO_UNWIND
++ "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
++ "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
++#endif
++#else
++ "\t.long .Lbegin\n"
++ "\t.long %d\n"
++ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
++ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
++ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
++ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
++ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
++#endif
++ "\t.align " SZPTR "\n"
++ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".LSFDE1:\n"
++ "\t.long .LEFDE1-.LASFDE1\n"
++ ".LASFDE1:\n"
++ "\t.long .Lframe0\n"
++#if LJ_64
++ "\t.quad lj_vm_ffi_call\n"
++ "\t.quad %d\n"
++ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
++ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
++ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
++ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
++#else
++ "\t.long lj_vm_ffi_call\n"
++ "\t.long %d\n"
++ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
++ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
++ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
++ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
++#endif
++ "\t.align " SZPTR "\n"
++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
++#endif
++#if !LJ_NO_UNWIND
++#if (defined(__sun__) && defined(__svr4__))
++#if LJ_64
++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
++#else
++ fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
++#endif
++#else
++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
++#endif
++ fprintf(ctx->fp,
++ ".Lframe1:\n"
++ "\t.long .LECIE1-.LSCIE1\n"
++ ".LSCIE1:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zPR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -" SZPTR "\n"
++ "\t.byte " REG_RA "\n"
++ "\t.uleb128 6\n" /* augmentation length */
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.long lj_err_unwind_dwarf-.\n"
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
++ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
++ "\t.align " SZPTR "\n"
++ ".LECIE1:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE2:\n"
++ "\t.long .LEFDE2-.LASFDE2\n"
++ ".LASFDE2:\n"
++ "\t.long .LASFDE2-.Lframe1\n"
++ "\t.long .Lbegin-.\n"
++ "\t.long %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
++#if LJ_64
++ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
++ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
++ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
++ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
++#else
++ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
++ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
++ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
++ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
++#endif
++ "\t.align " SZPTR "\n"
++ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".Lframe2:\n"
++ "\t.long .LECIE2-.LSCIE2\n"
++ ".LSCIE2:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -" SZPTR "\n"
++ "\t.byte " REG_RA "\n"
++ "\t.uleb128 1\n" /* augmentation length */
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
++ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
++ "\t.align " SZPTR "\n"
++ ".LECIE2:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE3:\n"
++ "\t.long .LEFDE3-.LASFDE3\n"
++ ".LASFDE3:\n"
++ "\t.long .LASFDE3-.Lframe2\n"
++ "\t.long lj_vm_ffi_call-.\n"
++ "\t.long %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++#if LJ_64
++ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
++ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
++ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
++ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
++#else
++ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
++ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
++ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
++ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
++#endif
++ "\t.align " SZPTR "\n"
++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
++#endif
++#endif
++ break;
++#if !LJ_NO_UNWIND
++ /* Mental note: never let Apple design an assembler.
++ ** Or a linker. Or a plastic case. But I digress.
++ */
++ case BUILD_machasm: {
++#if LJ_HASFFI
++ int fcsize = 0;
++#endif
++ int i;
++ fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
++ fprintf(ctx->fp,
++ "EH_frame1:\n"
++ "\t.set L$set$x,LECIEX-LSCIEX\n"
++ "\t.long L$set$x\n"
++ "LSCIEX:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.ascii \"zPR\\0\"\n"
++ "\t.byte 0x1\n"
++ "\t.byte 128-" SZPTR "\n"
++ "\t.byte " REG_RA "\n"
++ "\t.byte 6\n" /* augmentation length */
++ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
++#if LJ_64
++ "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
++#else
++ "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
++#endif
++ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
++ "\t.align " BSZPTR "\n"
++ "LECIEX:\n\n");
++ for (i = 0; i < ctx->nsym; i++) {
++ const char *name = ctx->sym[i].name;
++ int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
++ if (size == 0) continue;
++#if LJ_HASFFI
++ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
++#endif
++ fprintf(ctx->fp,
++ "%s.eh:\n"
++ "LSFDE%d:\n"
++ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
++ "\t.long L$set$%d\n"
++ "LASFDE%d:\n"
++ "\t.long LASFDE%d-EH_frame1\n"
++ "\t.long %s-.\n"
++ "\t.long %d\n"
++ "\t.byte 0\n" /* augmentation length */
++ "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
++#if LJ_64
++ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
++ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
++ "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
++ "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
++#else
++ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
++ "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
++ "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
++ "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
++#endif
++ "\t.align " BSZPTR "\n"
++ "LEFDE%d:\n\n",
++ name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
++ }
++#if LJ_HASFFI
++ if (fcsize) {
++ fprintf(ctx->fp,
++ "EH_frame2:\n"
++ "\t.set L$set$y,LECIEY-LSCIEY\n"
++ "\t.long L$set$y\n"
++ "LSCIEY:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.ascii \"zR\\0\"\n"
++ "\t.byte 0x1\n"
++ "\t.byte 128-" SZPTR "\n"
++ "\t.byte " REG_RA "\n"
++ "\t.byte 1\n" /* augmentation length */
++#if LJ_64
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
++#else
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */
++#endif
++ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
++ "\t.align " BSZPTR "\n"
++ "LECIEY:\n\n");
++ fprintf(ctx->fp,
++ "_lj_vm_ffi_call.eh:\n"
++ "LSFDEY:\n"
++ "\t.set L$set$yy,LEFDEY-LASFDEY\n"
++ "\t.long L$set$yy\n"
++ "LASFDEY:\n"
++ "\t.long LASFDEY-EH_frame2\n"
++ "\t.long _lj_vm_ffi_call-.\n"
++ "\t.long %d\n"
++ "\t.byte 0\n" /* augmentation length */
++#if LJ_64
++ "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
++ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
++ "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
++ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
++#else
++ "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */
++ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
++ "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */
++ "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */
++#endif
++ "\t.align " BSZPTR "\n"
++ "LEFDEY:\n\n", fcsize);
++ }
++#endif
++#if !LJ_64
++ fprintf(ctx->fp,
++ "\t.non_lazy_symbol_pointer\n"
++ "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
++ ".indirect_symbol _lj_err_unwind_dwarf\n"
++ ".long 0\n\n");
++ fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
++ {
++ const char *const *xn;
++ for (xn = ctx->extnames; *xn; xn++)
++ if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
++ fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
++ }
++#endif
++ fprintf(ctx->fp, ".subsections_via_symbols\n");
++ }
++ break;
++#endif
++ default: /* Difficult for other modes. */
++ break;
++ }
++}
+
+From 2315613b3835fc0b89601a1fc2e72fffa1857b35 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 15 Nov 2016 13:50:15 -0500
+Subject: [PATCH 011/260] Fix some s390x declarations.
+
+s/S390x/S390X/
+---
+ src/Makefile | 3 ++-
+ src/lj_arch.h | 29 +++++++++++++++++++----------
+ 3 files changed, 22 insertions(+), 12 deletions(-)
+
+diff --git a/src/Makefile b/src/Makefile
+index 40cd10159..1450adc03 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -238,7 +238,7 @@ else
+ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= arm
+ else
+-ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH)))
++ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= s390x
+ else
+ ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+@@ -269,6 +269,7 @@ endif
+ endif
+ endif
+ endif
++endif
+
+ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
+ TARGET_SYS= PS3
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index 2638a9412..f699e90c3 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -29,7 +29,8 @@
+ #define LUAJIT_ARCH_mips32 6
+ #define LUAJIT_ARCH_MIPS64 7
+ #define LUAJIT_ARCH_mips64 7
+-#define LUAJIT_ARCH_S390x 8
++#define LUAJIT_ARCH_S390X 8
++#define LUAJIT_ARCH_s390x 8
+
+ /* Target OS. */
+ #define LUAJIT_OS_OTHER 0
+@@ -50,8 +51,8 @@
+ #define LUAJIT_TARGET LUAJIT_ARCH_ARM
+ #elif defined(__aarch64__)
+ #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
+-#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x) || defined(S390x)
+-#define LUAJIT_TARGET LUAJIT_ARCH_S390x
++#elif defined(__s390x__) || defined(__s390x)
++#define LUAJIT_TARGET LUAJIT_ARCH_S390X
+ #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
+ #define LUAJIT_TARGET LUAJIT_ARCH_PPC
+ #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
+@@ -233,13 +234,6 @@
+
+ #define LJ_ARCH_VERSION 80
+
+-#elif LUAJIT_TARGET == LUAJIT_ARCH_S390
+-
+- #define LJ_ARCH_NAME "s390x"
+- #define LJ_ARCH_BITS 64
+- #define LJ_ARCH_ENDIAN LUAJIT_BE
+- #define LJ_TARGET_S390 1
+-
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
+
+ #ifndef LJ_ARCH_ENDIAN
+@@ -362,6 +356,21 @@
+ #define LJ_ARCH_VERSION 10
+ #endif
+
++#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X
++
++#define LJ_ARCH_NAME "s390x"
++#define LJ_ARCH_BITS 64
++#define LJ_ARCH_ENDIAN LUAJIT_BE
++#define LJ_TARGET_S390X 1
++#define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
++#define LJ_TARGET_MASKSHIFT 1
++#define LJ_TARGET_MASKROT 1
++#define LJ_TARGET_UNALIGNED 1
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
++#define LJ_TARGET_GC64 1
++#define LJ_ARCH_NOJIT 1 /* NYI */
++
+ #else
+ #error "No target architecture defined"
+ #endif
+
+From 71d40ba670d0d28be3b473a1e3042e82bf1ce9c1 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 15 Nov 2016 14:39:34 -0500
+Subject: [PATCH 012/260] Add some s390x C calling convention constants.
+
+Guesses for now based on the ELF ABI supplement for zSeries.
+---
+ src/host/buildvm.c | 2 ++
+ src/lj_ccall.h | 11 +++++++++++
+ 2 files changed, 13 insertions(+)
+
+diff --git a/src/host/buildvm.c b/src/host/buildvm.c
+index 57b4dc973..ad2a8171d 100644
+--- a/src/host/buildvm.c
++++ b/src/host/buildvm.c
+@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
+ #include "../dynasm/dasm_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "../dynasm/dasm_mips.h"
++#elif LJ_TARGET_S390X
++#include "../dynasm/dasm_s390x.h"
+ #else
+ #error "No support for this architecture (yet)"
+ #endif
+diff --git a/src/lj_ccall.h b/src/lj_ccall.h
+index d97227a6b..2a10a5e88 100644
+--- a/src/lj_ccall.h
++++ b/src/lj_ccall.h
+@@ -126,6 +126,17 @@ typedef union FPRArg {
+ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+ } FPRArg;
+
++#elif LJ_TARGET_S390X
++
++#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */
++#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */
++#define CCALL_NRET_GPR 1 /* GPR 2 */
++#define CCALL_NRET_FPR 1 /* FPR 0 */
++#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */
++#define CCALL_SPS_FREE 0
++
++typedef intptr_t GPRArg;
++typedef double FPRArg;
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+
+From 820fa8a0495b69090ef84d32822adb8a24aa42f0 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 15 Nov 2016 14:53:00 -0500
+Subject: [PATCH 013/260] Delete gcc version check for now.
+
+Stick to the default until we know what we actually need.
+---
+ src/lj_arch.h | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index f699e90c3..31503e83e 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -398,10 +398,6 @@
+ #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+ #error "Need at least Clang 3.5 or newer"
+ #endif
+-#elif LJ_TARGET_S390x
+-#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
+-#error "Need at least GCC 4.2 or newer"
+-#endif
+ #else
+ #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+ #error "Need at least GCC 4.8 or newer"
+
+From 3e472eb2615737916f0fb4a2a59b36cae73f3934 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 16 Nov 2016 10:31:34 +0530
+Subject: [PATCH 014/260] Update lj_arch.h
+
+Added missing elif condition for s390x for GCC dependency
+---
+ src/lj_arch.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index 31503e83e..7f24386e1 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -393,6 +393,10 @@
+ #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
+ #error "Need at least GCC 4.2 or newer"
+ #endif
++#elif LJ_TARGET_S390x
++#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
++#error "Need at least GCC 4.2 or newer"
++#endif
+ #elif LJ_TARGET_ARM64
+ #if __clang__
+ #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+
+From 31fb648a50a3cb854b9c4ff771b8b6e34da85163 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 16 Nov 2016 10:32:53 +0530
+Subject: [PATCH 015/260] Update lj_arch.h
+
+Removing the gcc check for now .. missed micheal's comment earlier
+---
+ src/lj_arch.h | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index 7f24386e1..31503e83e 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -393,10 +393,6 @@
+ #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
+ #error "Need at least GCC 4.2 or newer"
+ #endif
+-#elif LJ_TARGET_S390x
+-#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
+-#error "Need at least GCC 4.2 or newer"
+-#endif
+ #elif LJ_TARGET_ARM64
+ #if __clang__
+ #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+
+From 498f028e69d81bfe6718dc24f71c93ae58130a23 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 16 Nov 2016 11:50:46 +0530
+Subject: [PATCH 016/260] Update lj_target_s390x.h
+
+changed instruction opcode to 64bit
+---
+ src/lj_target_s390x.h | 30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
+index 27bb34963..551bb7d46 100644
+--- a/src/lj_target_s390x.h
++++ b/src/lj_target_s390x.h
+@@ -154,27 +154,27 @@ typedef struct {
+ #define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
+
+ typedef enum S390xIns {
+- S390I_SR = 0x1B000000,
+- S390I_AR = 0x1A000000,
+- S390I_NR = 0x14000000,
+- S390I_XR = 0x17000000,
+- S390I_MR = 0x1C000000,
+- S390I_LR = 0x18000000,
+- S390I_C = 0x59000000,
+- S390I_LH = 0x48000000,
+- S390I_BASR = 0x0D000000,
+- S390I_MVCL = 0x0e000000,
+- S390I_ST = 0x50000000,
+- S390I_TM = 0x91000000,
+- S390I_MP = 0xbd000090,
+- S390I_CLR = 0x15000000,
++ S390I_SR = 0x1B00000000000000,
++ S390I_AR = 0x1A00000000000000,
++ S390I_NR = 0x1400000000000000,
++ S390I_XR = 0x1700000000000000,
++ S390I_MR = 0x1C00000000000000,
++ S390I_LR = 0x1800000000000000,
++ S390I_C = 0x5900000000000000,
++ S390I_LH = 0x4800000000000000,
++ S390I_BASR = 0x0D00000000000000,
++ S390I_MVCL = 0x0e00000000000000,
++ S390I_ST = 0x5000000000000000,
++ S390I_TM = 0x9100000000000000,
++ S390I_MP = 0xbd00009000000000,
++ S390I_CLR = 0x1500000000000000,
+ } S390xIns;
+
+ typedef enum S390xShift {
+ S390SH_SLL, S390SH_SRL, S390SH_SRA
+ } S390xShift;
+
+-/* ARM condition codes. */
++/* S390x condition codes. */
+ typedef enum S390xCC {
+
+ } S390xCC;
+
+From dcb977d1db91ea6600faf173cbd79df3aaff7c2e Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 16 Nov 2016 14:44:12 +0530
+Subject: [PATCH 017/260] Changed the encoding for add,and,branch instructions
+
+---
+ dynasm/dasm_s390x.lua | 116 +++++++++++++++++++++++++++---------------
+ 1 file changed, 76 insertions(+), 40 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index a0a50e1e1..3542e7ee0 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -653,47 +653,83 @@ end)
+ -- Template strings for ARM instructions.
+ map_op = {
+ -- Basic data processing instructions.
+- add_2 = "00000000005a0000RX-a|00000000001aRR|00000000b9f80000RRF-a|00000000e35a0000RXY-a|00000000e3080000RXY-a",
+-
+--- and has several possible ways, need to find one, currently added two type of
+- and_2 = "0000000000540000RX-a|00000000140000RR|00000000b9f4RRF-a|00000000e3540000RXY-a|00000000b9800000RRE| 00000000b9e40000RRF-a",
+- and_c = "0000000000d40000SS-a",
+- and_i = "0000000000940000SI|00000000eb540000SIY",
++ --add
++ ar = "0000000000001a00", --RR
++ ay = "0000e3000000005a", --RXY-a
++ ag = "0000e30000000008",
++ agr = "00000000b9080000", --RRE
++ agf = "0000e30000000018",
++ agfr = "00000000b9180000",
++ agbr = "00000000b34a0000",
++ adbr = "00000000b31a0000",
++ aebr = "00000000b30a0000",
++ ah = "000000004a000000", --RXa
++ ahy = "0000e3000000007a",
++ afi = "0000c20900000000", --RIL-a --pls check if this is correct
++ agfi = "0000c20800000000",
++ aih = "0000cc0800000000",
++ al = "000000005e000000",
++ alr = "0000000000001e00",
++ aly = "0000e3000000005e", -- RXY-a
++ alg = "0000e3000000000a",
++ algr = "00000000b90a0000",
++ algf = "0000e3000000001a",
++ algfr = "00000000b91a0000",
++ alfi = "0000c20b00000000",
++ algfi = "0000c20a00000000",
++ alc = "0000e30000000098",
++ alcr = "00000000b9980000", -- RRE
++ alcg = "0000e30000000088",
++ alcgr = "00000000b9880000",
++ alsih = "0000cc0a00000000",
++ alsihn ="0000cc0b00000000",
++ axr = "0000000000003600", -- RR
++ ad = "000000006a000000", -- Rx-a
++ adr = "0000000000002a00",
++ ae = "000000007a000000",
++ aer = "0000000000003a00",
++ aw = "000000006e000000",
++ awr = "0000000000002e00",
++ au = "000000007e000000",
++ aur = "0000000000003e00",
++
++-- and
++ n = "0000000054000000",
++ nr = "0000000000001400",
++ ny = "0000e30000000054", -- RXY-a
++ ng = "0000e30000000080",
++ ngr = "00000000b9800000",
++ nihf = "0000c00a00000000", --RIL-a
++ nihl = "0000c00b00000000",
+
+-and_2 = "0000000000540000RX-a|0000000000140000RR|00000000b9f40000RRF-a|00000000e3540000RXY-a",
+- and_3 = "00000000e3800000RXY-a|00000000b9800000RRE|00000000b9e40000RRF-a",
+- and_c = "0000000000d40000SS-a",
+- and_i = "0000000000940000SI",
+- and_i4 = "00000000eb540000SIY"
+- and_i3 = "000000000a540000RI-a|000000000a550000RI-a|000000000c0a0000RIL-a|000000000a560000RI-a|000000000a570000RI-a|000000000c0bRIL-a"
+- --branch related instrcutions
+- bal = "0000000000450000RX-a",
+- balr = "0000000000050000RR",
+- bas = "00000000004d0000RX-a",
+- basr = "00000000000d0000RR",
+- bassm = "00000000000c0000RR",
+- bsm = "00000000000b0000RR",
+- bc = "0000000000470000Rx-b",
+- bcr = "00000000000070000RR",
+- bct = "0000000000460000RX-a",
+- bctr = "0000000000060000RR",
+- bctg = "00000000e3460000RXY-a",
+- bctgr = "00000000b9460000RRE",
+- bxh = "0000000000860000RS-a",
+- bxhg = "00000000eb440000RSY-a",
+- bxle = "0000000000870000RS-a",
+- bxleg = "00000000eb450000RSY-a",
+- bras = "000000000a750000RI-b",
+- brasl = "000000000c050000RIL-b",
+- brc = "000000000a740000RI-c",
+- brcl = "000000000c040000RIL-c",
+- brct = "000000000a760000RI-b",
+- brctg = "000000000a770000RI-b",
+- brctg = "00000000occ60000RIL-b",
+- brxh = "0000000000840000RSI",
+- brxhg = "00000000ec440000RIE-e",
+- brxle = "0000000000850000RSI",
+- brxlg = "00000000ec450000RIE-e",
++ --branch related instrcutions
++ bal = "0000000045000000", --RX-a
++ balr = "0000000000005000", --RR
++ bas = "000000004d000000",
++ basr = "0000000000000d00", -- this has leading zero in the instrcution opcode: 0d, need to take into consideration
++ bassm = "0000000000000c00",
++ bsm = "0000000000000b00",
++ bc = "0000000047000000",
++ bcr = "0000000000000700",
++ bct = "0000000046000000",
++ bctr = "0000000000000600",
++ bctg = "0000e30000000046",
++ bctgr = "00000000b9460000",
++ bxh = "0000000086000000", --RS-a
++ bxhg = "0000eb0000000044",
++ bxle = "0000000087000000",
++ bxleg = "0000eb0000000045", -- RSY-a
++ --bras = "000000000a750000RI-b",
++ brasl = "0000c00500000000", --RIL-b
++ --brc = "000000000a740000RI-c",
++ brcl = "0000c00400000000", --RIL-c
++ --brct = "000000000a760000RI-b",
++ --brctg = "000000000a770000RI-b",
++ brcth = "0000cc0600000000",
++ --brxh = "0000000000840000RSI",
++ --brxhg = "00000000ec440000RIE-e",
++ --brxle = "0000000000850000RSI",
++ --brxlg = "00000000ec450000RIE-e",
+
+ ----subtraction (basic operation)
+ sub = "00000000005b0000RX-a"
+
+From 777b0671d3d496be7f2f53449943c40de0f6da9c Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 16 Nov 2016 15:09:59 +0530
+Subject: [PATCH 018/260] Update lj_target_s390x.h
+
+Added s390x specific condition codes
+---
+ src/lj_target_s390x.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
+index 551bb7d46..4e35891a3 100644
+--- a/src/lj_target_s390x.h
++++ b/src/lj_target_s390x.h
+@@ -176,7 +176,14 @@ typedef enum S390xShift {
+
+ /* S390x condition codes. */
+ typedef enum S390xCC {
+-
++ /* Z- Zero , LZ - Less thena Zero , GZ - Greater than Zero
++ O - Overflow , NZ - Not Zero , ZC - Zero with carry
++ NZC - No Zero with carry , ZNC - Zero with No Carry
++ EQ - Equal , NE - Not Equal , LO - Loq , HI - High
++ */
++ CC_Z , CC_LZ , CC_GZ , CC_O ,
++ CC_NZ , CC_ZC , CC_NZC ,
++ CC_ZNC , CC_EQ , CC_NE , CC_LO , CC_HI
+ } S390xCC;
+
+ #endif
+
+From 547b158ba4a82907db676460acaa2a7ba89680b4 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 16 Nov 2016 15:34:32 +0530
+Subject: [PATCH 019/260] Update vm_s390x.dasc
+
+made some changes like mentioning arch from x86 to S390x
+removed some x86 specific code
+---
+ src/vm_s390x.dasc | 195 +---------------------------------------------
+ 1 file changed, 3 insertions(+), 192 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index d7d618d3b..7f12f625f 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1,12 +1,9 @@
+-|// Low-level VM code for x86 CPUs.
++|// Low-level VM code for S390x CPUs.
+ |// Bytecode interpreter, fast functions and helper functions.
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+ |
+-|.if P64
+-|.arch x64
+-|.else
+-|.arch x86
+-|.endif
++
++|.arch S390x
+ |.section code_op, code_sub
+ |
+ |.actionlist build_actionlist
+@@ -16,13 +13,6 @@
+ |
+ |//-----------------------------------------------------------------------
+ |
+-|.if P64
+-|.define X64, 1
+-|.if WIN
+-|.define X64WIN, 1
+-|.endif
+-|.endif
+-|
+ |// Fixed register assignments for the interpreter.
+ |// This is very fragile and has many dependencies. Caveat emptor.
+ |.define BASE, edx // Not C callee-save, refetched anyway.
+@@ -119,10 +109,6 @@
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+ |//-----------------------------------------------------------------------
+-|.if not X64 // x86 stack layout.
+-|
+-|.if WIN
+-|
+ |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
+ |.macro saveregs_
+ | push edi; push esi; push ebx
+@@ -138,51 +124,9 @@
+ | pop ebx; pop esi; pop edi; pop ebp
+ |.endmacro
+ |
+-|.else
+-|
+-|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
+-|.macro saveregs_
+-| push edi; push esi; push ebx
+-| sub esp, CFRAME_SPACE
+-|.endmacro
+-|.macro restoreregs
+-| add esp, CFRAME_SPACE
+-| pop ebx; pop esi; pop edi; pop ebp
+-|.endmacro
+-|
+-|.endif
+-|
+ |.macro saveregs
+ | push ebp; saveregs_
+ |.endmacro
+-|
+-|.if WIN
+-|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
+-|.define SAVE_NRES, aword [esp+aword*18]
+-|.define SAVE_CFRAME, aword [esp+aword*17]
+-|.define SAVE_L, aword [esp+aword*16]
+-|//----- 16 byte aligned, ^^^ arguments from C caller
+-|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
+-|.define SAVE_R4, aword [esp+aword*14]
+-|.define SAVE_R3, aword [esp+aword*13]
+-|.define SAVE_R2, aword [esp+aword*12]
+-|//----- 16 byte aligned
+-|.define SAVE_R1, aword [esp+aword*11]
+-|.define SEH_FUNC, aword [esp+aword*10]
+-|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
+-|.define UNUSED2, aword [esp+aword*8]
+-|//----- 16 byte aligned
+-|.define UNUSED1, aword [esp+aword*7]
+-|.define SAVE_PC, aword [esp+aword*6]
+-|.define TMP2, aword [esp+aword*5]
+-|.define TMP1, aword [esp+aword*4]
+-|//----- 16 byte aligned
+-|.define ARG4, aword [esp+aword*3]
+-|.define ARG3, aword [esp+aword*2]
+-|.define ARG2, aword [esp+aword*1]
+-|.define ARG1, aword [esp] //<-- esp while in interpreter.
+-|//----- 16 byte aligned, ^^^ arguments for C callee
+-|.else
+ |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
+ |.define SAVE_NRES, aword [esp+aword*14]
+ |.define SAVE_CFRAME, aword [esp+aword*13]
+@@ -203,7 +147,6 @@
+ |.define ARG2, aword [esp+aword*1]
+ |.define ARG1, aword [esp] //<-- esp while in interpreter.
+ |//----- 16 byte aligned, ^^^ arguments for C callee
+-|.endif
+ |
+ |// FPARGx overlaps ARGx and ARG(x+1) on x86.
+ |.define FPARG3, qword [esp+qword*1]
+@@ -215,112 +158,6 @@
+ |.define TMPa, TMP1
+ |.define MULTRES, TMP2
+ |
+-|// Arguments for vm_call and vm_pcall.
+-|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
+-|
+-|// Arguments for vm_cpcall.
+-|.define INARG_CP_CALL, SAVE_ERRF
+-|.define INARG_CP_UD, SAVE_NRES
+-|.define INARG_CP_FUNC, SAVE_CFRAME
+-|
+-|//-----------------------------------------------------------------------
+-|.elif X64WIN // x64/Windows stack layout
+-|
+-|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
+-|.macro saveregs_
+-| push rdi; push rsi; push rbx
+-| sub rsp, CFRAME_SPACE
+-|.endmacro
+-|.macro saveregs
+-| push rbp; saveregs_
+-|.endmacro
+-|.macro restoreregs
+-| add rsp, CFRAME_SPACE
+-| pop rbx; pop rsi; pop rdi; pop rbp
+-|.endmacro
+-|
+-|.define SAVE_CFRAME, aword [rsp+aword*13]
+-|.define SAVE_PC, dword [rsp+dword*25]
+-|.define SAVE_L, dword [rsp+dword*24]
+-|.define SAVE_ERRF, dword [rsp+dword*23]
+-|.define SAVE_NRES, dword [rsp+dword*22]
+-|.define TMP2, dword [rsp+dword*21]
+-|.define TMP1, dword [rsp+dword*20]
+-|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
+-|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
+-|.define SAVE_R4, aword [rsp+aword*8]
+-|.define SAVE_R3, aword [rsp+aword*7]
+-|.define SAVE_R2, aword [rsp+aword*6]
+-|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
+-|.define ARG5, aword [rsp+aword*4]
+-|.define CSAVE_4, aword [rsp+aword*3]
+-|.define CSAVE_3, aword [rsp+aword*2]
+-|.define CSAVE_2, aword [rsp+aword*1]
+-|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
+-|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
+-|
+-|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
+-|.define TMPQ, qword [rsp+aword*10]
+-|.define MULTRES, TMP2
+-|.define TMPa, ARG5
+-|.define ARG5d, dword [rsp+aword*4]
+-|.define TMP3, ARG5d
+-|
+-|//-----------------------------------------------------------------------
+-|.else // x64/POSIX stack layout
+-|
+-|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
+-|.macro saveregs_
+-| push rbx; push r15; push r14
+-|.if NO_UNWIND
+-| push r13; push r12
+-|.endif
+-| sub rsp, CFRAME_SPACE
+-|.endmacro
+-|.macro saveregs
+-| push rbp; saveregs_
+-|.endmacro
+-|.macro restoreregs
+-| add rsp, CFRAME_SPACE
+-|.if NO_UNWIND
+-| pop r12; pop r13
+-|.endif
+-| pop r14; pop r15; pop rbx; pop rbp
+-|.endmacro
+-|
+-|//----- 16 byte aligned,
+-|.if NO_UNWIND
+-|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
+-|.define SAVE_R4, aword [rsp+aword*10]
+-|.define SAVE_R3, aword [rsp+aword*9]
+-|.define SAVE_R2, aword [rsp+aword*8]
+-|.define SAVE_R1, aword [rsp+aword*7]
+-|.define SAVE_RU2, aword [rsp+aword*6]
+-|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
+-|.else
+-|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
+-|.define SAVE_R4, aword [rsp+aword*8]
+-|.define SAVE_R3, aword [rsp+aword*7]
+-|.define SAVE_R2, aword [rsp+aword*6]
+-|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
+-|.endif
+-|.define SAVE_CFRAME, aword [rsp+aword*4]
+-|.define SAVE_PC, dword [rsp+dword*7]
+-|.define SAVE_L, dword [rsp+dword*6]
+-|.define SAVE_ERRF, dword [rsp+dword*5]
+-|.define SAVE_NRES, dword [rsp+dword*4]
+-|.define TMPa, aword [rsp+aword*1]
+-|.define TMP2, dword [rsp+dword*1]
+-|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
+-|//----- 16 byte aligned
+-|
+-|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
+-|.define TMPQ, qword [rsp]
+-|.define TMP3, dword [rsp+aword*1]
+-|.define MULTRES, TMP2
+-|
+-|.endif
+-|
+ |//-----------------------------------------------------------------------
+ |
+ |// Instruction headers.
+@@ -339,11 +176,6 @@
+ | movzx OP, RCL
+ | add PC, 4
+ | shr RC, 16
+-|.if X64
+-| jmp aword [DISPATCH+OP*8]
+-|.else
+-| jmp aword [DISPATCH+OP*4]
+-|.endif
+ |.endmacro
+ |
+ |// Instruction footer.
+@@ -433,30 +265,9 @@
+ | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
+ |.endmacro
+ |
+-|// x87 compares.
+-|.macro fcomparepp // Compare and pop st0 >< st1.
+-| fucomip st1
+-| fpop
+-|.endmacro
+ |
+ |.macro fpop1; fstp st1; .endmacro
+ |
+-|// Synthesize SSE FP constants.
+-|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
+-|.if X64
+-| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
+-|.else
+-| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
+-|.endif
+-|.endmacro
+-|
+-|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
+-|.if X64
+-| mov64 tmp, U64x(val,00000000); movd reg, tmp
+-|.else
+-| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
+-|.endif
+-|.endmacro
+ |
+ |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
+ | sseconst_hi reg, tmp, 80000000
+
+From c36895a872079cdbb683a470edfa137b6ff50928 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 16 Nov 2016 17:19:10 +0530
+Subject: [PATCH 020/260] Update lj_frame.h
+
+Added CFrame definations for S390X
+values un assigned as i am unsure of the values
+---
+ src/lj_frame.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/src/lj_frame.h b/src/lj_frame.h
+index d8d8cff29..017bdaf9e 100644
+--- a/src/lj_frame.h
++++ b/src/lj_frame.h
+@@ -200,6 +200,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+ #define CFRAME_OFS_MULTRES 192
+ #define CFRAME_SIZE 208
+ #define CFRAME_SHIFT_MULTRES 3
++#elif LJ_TARGET_S390X
++#define CFRAME_OFS_ERRF
++#define CFRAME_OFS_NRES
++#define CFRAME_OFS_PREV
++#define CFRAME_OFS_L
++#define CFRAME_OFS_PC
++#define CFRAME_OFS_MULTRES
++#define CFRAME_SIZE
++#define CFRAME_SHIFT_MULTRES
+ #elif LJ_TARGET_PPC
+ #if LJ_TARGET_XBOX360
+ #define CFRAME_OFS_ERRF 424
+
+From 4aa1099d6a5484f78cc6336e0987f1d541685228 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 17 Nov 2016 14:58:17 +0530
+Subject: [PATCH 021/260] Update vm_s390x.dasc
+
+Referred arm dasc file have created slots wherein i have to replace them with s390x registers and instructions
+---
+ src/vm_s390x.dasc | 219 +++++++++++++++++++++-------------------------
+ 1 file changed, 101 insertions(+), 118 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 7f12f625f..ff599470b 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -15,81 +15,85 @@
+ |
+ |// Fixed register assignments for the interpreter.
+ |// This is very fragile and has many dependencies. Caveat emptor.
+-|.define BASE, edx // Not C callee-save, refetched anyway.
+-|.if not X64
+-|.define KBASE, edi // Must be C callee-save.
+-|.define KBASEa, KBASE
+-|.define PC, esi // Must be C callee-save.
+-|.define PCa, PC
+-|.define DISPATCH, ebx // Must be C callee-save.
+-|.elif X64WIN
+-|.define KBASE, edi // Must be C callee-save.
+-|.define KBASEa, rdi
+-|.define PC, esi // Must be C callee-save.
+-|.define PCa, rsi
+-|.define DISPATCH, ebx // Must be C callee-save.
+-|.else
+-|.define KBASE, r15d // Must be C callee-save.
+-|.define KBASEa, r15
+-|.define PC, ebx // Must be C callee-save.
+-|.define PCa, rbx
+-|.define DISPATCH, r14d // Must be C callee-save.
+-|.endif
++.define BASE, // Base of current Lua stack frame.
++|.define KBASE, // Constants of current Lua function.
++|.define PC, // Next PC.
++|.define GLREG, // Global state.
++|.define LREG, // Register holding lua_State (also in SAVE_L).
++|.define TISNUM, // Constant LJ_TISNUM << 47.
++|.define TISNUMhi, // Constant LJ_TISNUM << 15.
++|.define TISNIL, // Constant -1LL.
++|.define fp, // Yes, we have to maintain a frame pointer.
+ |
+-|.define RA, ecx
+-|.define RAH, ch
+-|.define RAL, cl
+-|.define RB, ebp // Must be ebp (C callee-save).
+-|.define RC, eax // Must be eax.
+-|.define RCW, ax
+-|.define RCH, ah
+-|.define RCL, al
+-|.define OP, RB
+-|.define RD, RC
+-|.define RDW, RCW
+-|.define RDL, RCL
+-|.if X64
+-|.define RAa, rcx
+-|.define RBa, rbp
+-|.define RCa, rax
+-|.define RDa, rax
+-|.else
+-|.define RAa, RA
+-|.define RBa, RB
+-|.define RCa, RC
+-|.define RDa, RD
+-|.endif
++|// The following temporaries are not saved across C calls, except for RA/RC.
++|.define RA,
++|.define RC,
++|.define RB,
++|.define RAw,
++|.define RCw,
++|.define RBw,
++|.define INS,
++|.define INSw,
++|.define ITYPE,
++|.define TMP0,
++|.define TMP1,
++|.define TMP2,
++|.define TMP3,
++|.define TMP0w,
++|.define TMP1w,
++|.define TMP2w,
++|.define TMP3w,
+ |
+-|.if not X64
+-|.define FCARG1, ecx // x86 fastcall arguments.
+-|.define FCARG2, edx
+-|.elif X64WIN
+-|.define CARG1, rcx // x64/WIN64 C call arguments.
+-|.define CARG2, rdx
+-|.define CARG3, r8
+-|.define CARG4, r9
+-|.define CARG1d, ecx
+-|.define CARG2d, edx
+-|.define CARG3d, r8d
+-|.define CARG4d, r9d
+-|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
+-|.define FCARG2, CARG2d
+-|.else
+-|.define CARG1, rdi // x64/POSIX C call arguments.
+-|.define CARG2, rsi
+-|.define CARG3, rdx
+-|.define CARG4, rcx
+-|.define CARG5, r8
+-|.define CARG6, r9
+-|.define CARG1d, edi
+-|.define CARG2d, esi
+-|.define CARG3d, edx
+-|.define CARG4d, ecx
+-|.define CARG5d, r8d
+-|.define CARG6d, r9d
+-|.define FCARG1, CARG1d // Simulate x86 fastcall.
+-|.define FCARG2, CARG2d
+-|.endif
++|// Calling conventions. Also used as temporaries.
++|.define CARG1,
++|.define CARG2,
++|.define CARG3,
++|.define CARG4,
++|.define CARG5,
++|.define CARG1w,
++|.define CARG2w,
++|.define CARG3w,
++|.define CARG4w,
++|.define CARG5w,
++|
++|.define FARG1,
++|.define FARG2,
++|
++|.define CRET1,
++|.define CRET1w,
++|// Stack layout while in interpreter. Must match with lj_frame.h.
++|
++|.define CFRAME_SPACE, 208
++|//----- 16 byte aligned, <-- sp entering interpreter
++|// Unused [sp, #204] // 32 bit values
++|.define SAVE_NRES,
++|.define SAVE_ERRF,
++|.define SAVE_MULTRES,
++|.define TMPD,
++|.define SAVE_L,
++|.define SAVE_PC,
++|.define SAVE_CFRAME,
++|.define SAVE_FPR_,
++|.define SAVE_GPR_,
++|.define SAVE_LR,
++|.define SAVE_FP,
++|//----- 16 byte aligned, <-- sp while in interpreter.
++|
++|.define TMPDofs,
++|
++|.macro save_, gpr1, gpr2, fpr1, fpr2
++]
++|.endmacro
++|.macro rest_, gpr1, gpr2, fpr1, fpr2
++]
++|.endmacro
++|
++|.macro saveregs
++
++|.endmacro
++|.macro restoreregs
++
++|.endmacro
+ |
+ |// Type definitions. Some of these are only used for documentation.
+ |.type L, lua_State
+@@ -111,22 +115,16 @@
+ |//-----------------------------------------------------------------------
+ |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
+ |.macro saveregs_
+-| push edi; push esi; push ebx
+-| push extern lj_err_unwind_win
+-| fs; push dword [0]
+-| fs; mov [0], esp
+-| sub esp, CFRAME_SPACE
++
+ |.endmacro
+ |.macro restoreregs
+-| add esp, CFRAME_SPACE
+-| fs; pop dword [0]
+-| pop edi // Short for esp += 4.
+-| pop ebx; pop esi; pop edi; pop ebp
++
+ |.endmacro
+ |
+ |.macro saveregs
+-| push ebp; saveregs_
++
+ |.endmacro
++
+ |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
+ |.define SAVE_NRES, aword [esp+aword*14]
+ |.define SAVE_CFRAME, aword [esp+aword*13]
+@@ -164,18 +162,14 @@
+ |.macro ins_A; .endmacro
+ |.macro ins_AD; .endmacro
+ |.macro ins_AJ; .endmacro
+-|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
+-|.macro ins_AB_; movzx RB, RCH; .endmacro
+-|.macro ins_A_C; movzx RC, RCL; .endmacro
+-|.macro ins_AND; not RDa; .endmacro
++|.macro ins_ABC; .endmacro
++|.macro ins_AB_; .endmacro
++|.macro ins_A_C; .endmacro
++|.macro ins_AND; .endmacro
+ |
+ |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
+ |.macro ins_NEXT
+-| mov RC, [PC]
+-| movzx RA, RCH
+-| movzx OP, RCL
+-| add PC, 4
+-| shr RC, 16
++
+ |.endmacro
+ |
+ |// Instruction footer.
+@@ -220,11 +214,11 @@
+ |//-----------------------------------------------------------------------
+ |
+ |// Macros to test operand types.
+-|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
+-|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
+-|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
+-|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
+-|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
++|.macro checktp, .endmacro
++|.macro checknum, .endmacro
++|.macro checkint, .endmacro
++|.macro checkstr, .endmacro
++|.macro checktab, .endmacro
+ |
+ |// These operands must be used with movzx.
+ |.define PC_OP, byte [PC-4]
+@@ -234,7 +228,7 @@
+ |.define PC_RD, word [PC-2]
+ |
+ |.macro branchPC, reg
+-| lea PC, [PC+reg*4-BCBIAS_J*4]
++
+ |.endmacro
+ |
+ |// Assumes DISPATCH is relative to GL.
+@@ -245,24 +239,16 @@
+ |
+ |// Decrement hashed hotcount and trigger trace recorder if zero.
+ |.macro hotloop, reg
+-| mov reg, PC
+-| shr reg, 1
+-| and reg, HOTCOUNT_PCMASK
+-| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
+-| jb ->vm_hotloop
++
+ |.endmacro
+ |
+ |.macro hotcall, reg
+-| mov reg, PC
+-| shr reg, 1
+-| and reg, HOTCOUNT_PCMASK
+-| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
+-| jb ->vm_hotcall
++
+ |.endmacro
+ |
+ |// Set current VM state.
+ |.macro set_vmstate, st
+-| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
++
+ |.endmacro
+ |
+ |
+@@ -270,27 +256,24 @@
+ |
+ |
+ |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
+-| sseconst_hi reg, tmp, 80000000
++|
+ |.endmacro
+ |.macro sseconst_1, reg, tmp // Synthesize 1.0.
+-| sseconst_hi reg, tmp, 3ff00000
++|
+ |.endmacro
+ |.macro sseconst_m1, reg, tmp // Synthesize -1.0.
+-| sseconst_hi reg, tmp, bff00000
++|
+ |.endmacro
+ |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
+-| sseconst_hi reg, tmp, 43300000
++|
+ |.endmacro
+ |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
+-| sseconst_hi reg, tmp, 43380000
++|
+ |.endmacro
+ |
+ |// Move table write barrier back. Overwrites reg.
+ |.macro barrierback, tab, reg
+-| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
+-| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
+-| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
+-| mov tab->gclist, reg
++
+ |.endmacro
+ |
+ |//-----------------------------------------------------------------------
+
+From 3ac644ecee044a90b806cc87626c3370471b14da Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Fri, 18 Nov 2016 17:09:20 +0530
+Subject: [PATCH 022/260] Update vm_s390x.dasc
+
+Assigned general purpose register to existing macros
+---
+ src/vm_s390x.dasc | 19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index ff599470b..656ed051f 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -15,15 +15,15 @@
+ |
+ |// Fixed register assignments for the interpreter.
+ |// This is very fragile and has many dependencies. Caveat emptor.
+-.define BASE, // Base of current Lua stack frame.
+-|.define KBASE, // Constants of current Lua function.
+-|.define PC, // Next PC.
+-|.define GLREG, // Global state.
+-|.define LREG, // Register holding lua_State (also in SAVE_L).
+-|.define TISNUM, // Constant LJ_TISNUM << 47.
+-|.define TISNUMhi, // Constant LJ_TISNUM << 15.
+-|.define TISNIL, // Constant -1LL.
+-|.define fp, // Yes, we have to maintain a frame pointer.
++|.define BASE, gr0 // Base of current Lua stack frame.
++|.define KBASE, gr1 // Constants of current Lua function.
++|.define PC, gr14 // Next PC.
++|.define GLREG, gr2 // Global state.
++|.define LREG, gr3 // Register holding lua_State (also in SAVE_L).
++|.define TISNUM, gr4 // Constant LJ_TISNUM << 47.
++|.define TISNUMhi, gr5 // Constant LJ_TISNUM << 15.
++|.define TISNIL, gr6 // Constant -1LL.
++|.define fp, gr7 // Yes, we have to maintain a frame pointer.
+ |
+ |// The following temporaries are not saved across C calls, except for RA/RC.
+ |.define RA,
+@@ -66,6 +66,7 @@
+ |.define CFRAME_SPACE, 208
+ |//----- 16 byte aligned, <-- sp entering interpreter
+ |// Unused [sp, #204] // 32 bit values
++|
+ |.define SAVE_NRES,
+ |.define SAVE_ERRF,
+ |.define SAVE_MULTRES,
+
+From e90d985d080eeede2aa623888fc13f4c8c3edd0e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 18 Nov 2016 13:06:31 -0500
+Subject: [PATCH 023/260] Look for s390x file rather than S390x file.
+
+---
+ src/vm_s390x.dasc | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 656ed051f..4b5ae2ad8 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1,9 +1,9 @@
+-|// Low-level VM code for S390x CPUs.
++|// Low-level VM code for IBM z/Architecture (s390x) CPUs.
+ |// Bytecode interpreter, fast functions and helper functions.
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+ |
+
+-|.arch S390x
++|.arch s390x
+ |.section code_op, code_sub
+ |
+ |.actionlist build_actionlist
+
+From 31c0e6016a144aefefcb9ab4642aebb0b0633694 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Tue, 22 Nov 2016 10:20:56 +0530
+Subject: [PATCH 024/260] Update vm_s390x.dasc
+
+Added definitions to macros savereg and restreg
+used Store and Load instructions
+to store and load register contents to n from memory
+---
+ src/vm_s390x.dasc | 36 +++++++++++++++++-------------------
+ 1 file changed, 17 insertions(+), 19 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 4b5ae2ad8..a9a383556 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -15,15 +15,15 @@
+ |
+ |// Fixed register assignments for the interpreter.
+ |// This is very fragile and has many dependencies. Caveat emptor.
+-|.define BASE, gr0 // Base of current Lua stack frame.
+-|.define KBASE, gr1 // Constants of current Lua function.
+-|.define PC, gr14 // Next PC.
+-|.define GLREG, gr2 // Global state.
+-|.define LREG, gr3 // Register holding lua_State (also in SAVE_L).
+-|.define TISNUM, gr4 // Constant LJ_TISNUM << 47.
+-|.define TISNUMhi, gr5 // Constant LJ_TISNUM << 15.
+-|.define TISNIL, gr6 // Constant -1LL.
+-|.define fp, gr7 // Yes, we have to maintain a frame pointer.
++|.define BASE, gr0
++|.define KBASE, gr1
++|.define PC, gr14
++|.define GLREG, gr2
++|.define LREG, gr3
++|.define TISNUM, gr4
++|.define TISNUMhi, gr5
++|.define TISNIL, gr6
++|.define fp, gr7
+ |
+ |// The following temporaries are not saved across C calls, except for RA/RC.
+ |.define RA,
+@@ -82,18 +82,16 @@
+ |
+ |.define TMPDofs,
+ |
+-|.macro save_, gpr1, gpr2, fpr1, fpr2
+-]
+-|.endmacro
+-|.macro rest_, gpr1, gpr2, fpr1, fpr2
+-]
++|.macro savereg arg1 arg2 arg3
++| STG arg1; // Store 64bit content
++| STG arg2; // Store 64bit content
++| STG arg3; // Store 64bit content
+ |.endmacro
+ |
+-|.macro saveregs
+-
+-|.endmacro
+-|.macro restoreregs
+-
++|.macro restreg arg1 arg2 arg3
++| LG arg1; // Load 64 bit content
++| LG arg2; // Load 64 bit content
++| LG arg3; // Load 64 bit content
+ |.endmacro
+ |
+ |// Type definitions. Some of these are only used for documentation.
+
+From d50f8aa92b1022f4b4be40d8e4ff6badb0b1f336 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 22 Nov 2016 11:48:56 -0500
+Subject: [PATCH 025/260] Add preliminary frame offsets.
+
+These are educated guesses at this point. We might need more stack space because
+we don't have many free registers available.
+---
+ src/lj_asm.c | 2 +
+ src/lj_frame.h | 18 +++----
+ src/vm_s390x.dasc | 118 +++++++++++++++++++++++-----------------------
+ 3 files changed, 70 insertions(+), 68 deletions(-)
+
+diff --git a/src/lj_asm.c b/src/lj_asm.c
+index 7ce589248..d427fa5b2 100644
+--- a/src/lj_asm.c
++++ b/src/lj_asm.c
+@@ -1567,6 +1567,8 @@ static void asm_loop(ASMState *as)
+ #include "lj_asm_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_asm_mips.h"
++#elif LJ_TARGET_S390X
++#include "lj_asm_s390x.h"
+ #else
+ #error "Missing assembler for target CPU"
+ #endif
+diff --git a/src/lj_frame.h b/src/lj_frame.h
+index 017bdaf9e..65affb5da 100644
+--- a/src/lj_frame.h
++++ b/src/lj_frame.h
+@@ -200,15 +200,6 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+ #define CFRAME_OFS_MULTRES 192
+ #define CFRAME_SIZE 208
+ #define CFRAME_SHIFT_MULTRES 3
+-#elif LJ_TARGET_S390X
+-#define CFRAME_OFS_ERRF
+-#define CFRAME_OFS_NRES
+-#define CFRAME_OFS_PREV
+-#define CFRAME_OFS_L
+-#define CFRAME_OFS_PC
+-#define CFRAME_OFS_MULTRES
+-#define CFRAME_SIZE
+-#define CFRAME_SHIFT_MULTRES
+ #elif LJ_TARGET_PPC
+ #if LJ_TARGET_XBOX360
+ #define CFRAME_OFS_ERRF 424
+@@ -273,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+ #endif
+ #define CFRAME_OFS_MULTRES 0
+ #define CFRAME_SHIFT_MULTRES 3
++#elif LJ_TARGET_S390X
++#define CFRAME_OFS_ERRF 216
++#define CFRAME_OFS_NRES 208
++#define CFRAME_OFS_PREV 200
++#define CFRAME_OFS_L 192
++#define CFRAME_OFS_PC 168
++#define CFRAME_OFS_MULTRES 160
++#define CFRAME_SIZE 172
++#define CFRAME_SHIFT_MULTRES 3
+ #else
+ #error "Missing CFRAME_* definitions for this architecture"
+ #endif
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index a9a383556..dc30593e5 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2,7 +2,22 @@
+ |// Bytecode interpreter, fast functions and helper functions.
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+ |
+-
++|// ELF ABI registers:
++|// r0,r1 | | volatile |
++|// r2 | parameter and return value | volatile |
++|// r3-r5 | parameter | volatile |
++|// r6 | parameter | saved |
++|// r7-r11 | | saved |
++|// r12 | GOT pointer (needed?) | saved |
++|// r13 | literal pool (needed?) | saved |
++|// r14 | return address | volatile |
++|// r15 | stack pointer | saved |
++|// f0,f2,f4,f6 | parameter and return value | volatile |
++|// f1,f3,f5,f7 | | volatile |
++|// f8-f15 | | saved |
++|// ar0,ar1 | TLS | volatile |
++|// ar2-ar15 | | volatile |
++|
+ |.arch s390x
+ |.section code_op, code_sub
+ |
+@@ -13,72 +28,57 @@
+ |
+ |//-----------------------------------------------------------------------
+ |
+-|// Fixed register assignments for the interpreter.
+-|// This is very fragile and has many dependencies. Caveat emptor.
+-|.define BASE, gr0
+-|.define KBASE, gr1
+-|.define PC, gr14
+-|.define GLREG, gr2
+-|.define LREG, gr3
+-|.define TISNUM, gr4
+-|.define TISNUMhi, gr5
+-|.define TISNIL, gr6
+-|.define fp, gr7
++|// Fixed register assignments for the interpreter, callee-saved.
++|.define BASE, r7 // Base of current Lua stack frame.
++|.define KBASE, r8 // Constants of current Lua function.
++|.define PC, r9 // Next PC.
++|.define GLREG, r10 // Global state.
++|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
+ |
+-|// The following temporaries are not saved across C calls, except for RA/RC.
+-|.define RA,
+-|.define RC,
+-|.define RB,
+-|.define RAw,
+-|.define RCw,
+-|.define RBw,
+-|.define INS,
+-|.define INSw,
+-|.define ITYPE,
+-|.define TMP0,
+-|.define TMP1,
+-|.define TMP2,
+-|.define TMP3,
+-|.define TMP0w,
+-|.define TMP1w,
+-|.define TMP2w,
+-|.define TMP3w,
++|// The following temporaries are not saved across C calls, except for RD.
++|.define RA, r0 // Cannot be dereferenced.
++|.define RB, r1
++|.define RC, r5 // Overlaps CARG4.
++|.define RD, r6 // Overlaps CARG5. Callee-saved.
+ |
+ |// Calling conventions. Also used as temporaries.
+-|.define CARG1,
+-|.define CARG2,
+-|.define CARG3,
+-|.define CARG4,
+-|.define CARG5,
+-|.define CARG1w,
+-|.define CARG2w,
+-|.define CARG3w,
+-|.define CARG4w,
+-|.define CARG5w,
++|.define CARG1, r2
++|.define CARG2, r3
++|.define CARG3, r4
++|.define CARG4, r5
++|.define CARG5, r6
++|
++|.define FARG1, f0
++|.define FARG2, f2
++|.define FARG3, f4
++|.define FARG4, f6
+ |
+-|.define FARG1,
+-|.define FARG2,
++|.define CRET1, r2
++|
++|.define SP, r15
+ |
+-|.define CRET1,
+-|.define CRET1w,
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
++|.define CFRAME_SPACE, 176 // Delta for SP, 8 byte aligned.
++|
++|// Register save area.
++|.define SAVE_FPR6, 328(SP)
++|.define SAVE_FPR4, 320(SP)
++|.define SAVE_FPR2, 312(SP)
++|.define SAVE_FPR0, 304(SP)
++|.define SAVE_GPRS, 224(SP) // Save area for r6-r15 (10*8 bytes).
+ |
+-|.define CFRAME_SPACE, 208
+-|//----- 16 byte aligned, <-- sp entering interpreter
+-|// Unused [sp, #204] // 32 bit values
++|// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
++|.define SAVE_ERRF, 216(SP) // Argument 4, in r5.
++|.define SAVE_NRES, 208(SP) // Argument 3, in r4.
++|.define SAVE_CFRAME, 200(SP) // Argument 2, in r3.
++|.define SAVE_L, 192(SP) // Argument 1, in r2.
++|.define RESERVED, 184(SP) // Reserved for compiler use.
++|.define BACKCHAIN, 176(SP) // <- SP entering interpreter.
++|.define SAVE_PC, 168(SP)
++|.define SAVE_MULTRES, 160(SP)
+ |
+-|.define SAVE_NRES,
+-|.define SAVE_ERRF,
+-|.define SAVE_MULTRES,
+-|.define TMPD,
+-|.define SAVE_L,
+-|.define SAVE_PC,
+-|.define SAVE_CFRAME,
+-|.define SAVE_FPR_,
+-|.define SAVE_GPR_,
+-|.define SAVE_LR,
+-|.define SAVE_FP,
+-|//----- 16 byte aligned, <-- sp while in interpreter.
++|// Callee save area (allocated by interpreter).
++|.define CALLEESAVE 000(SP) // <- SP in interpreter.
+ |
+ |.define TMPDofs,
+ |
+
+From d505a0e0bae07611f6361af6823e072e7f8b9b84 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 22 Nov 2016 13:47:35 -0500
+Subject: [PATCH 026/260] Cleanup.
+
+---
+ src/vm_s390x.dasc | 47 -----------------------------------------------
+ 1 file changed, 47 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index dc30593e5..44c056d36 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -80,8 +80,6 @@
+ |// Callee save area (allocated by interpreter).
+ |.define CALLEESAVE 000(SP) // <- SP in interpreter.
+ |
+-|.define TMPDofs,
+-|
+ |.macro savereg arg1 arg2 arg3
+ | STG arg1; // Store 64bit content
+ | STG arg2; // Store 64bit content
+@@ -110,51 +108,6 @@
+ |.type TRACE, GCtrace
+ |.type SBUF, SBuf
+ |
+-|// Stack layout while in interpreter. Must match with lj_frame.h.
+-|//-----------------------------------------------------------------------
+-|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
+-|.macro saveregs_
+-
+-|.endmacro
+-|.macro restoreregs
+-
+-|.endmacro
+-|
+-|.macro saveregs
+-
+-|.endmacro
+-
+-|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
+-|.define SAVE_NRES, aword [esp+aword*14]
+-|.define SAVE_CFRAME, aword [esp+aword*13]
+-|.define SAVE_L, aword [esp+aword*12]
+-|//----- 16 byte aligned, ^^^ arguments from C caller
+-|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
+-|.define SAVE_R4, aword [esp+aword*10]
+-|.define SAVE_R3, aword [esp+aword*9]
+-|.define SAVE_R2, aword [esp+aword*8]
+-|//----- 16 byte aligned
+-|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
+-|.define SAVE_PC, aword [esp+aword*6]
+-|.define TMP2, aword [esp+aword*5]
+-|.define TMP1, aword [esp+aword*4]
+-|//----- 16 byte aligned
+-|.define ARG4, aword [esp+aword*3]
+-|.define ARG3, aword [esp+aword*2]
+-|.define ARG2, aword [esp+aword*1]
+-|.define ARG1, aword [esp] //<-- esp while in interpreter.
+-|//----- 16 byte aligned, ^^^ arguments for C callee
+-|
+-|// FPARGx overlaps ARGx and ARG(x+1) on x86.
+-|.define FPARG3, qword [esp+qword*1]
+-|.define FPARG1, qword [esp]
+-|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
+-|.define TMPQ, qword [esp+aword*4]
+-|.define TMP3, ARG4
+-|.define ARG5, TMP1
+-|.define TMPa, TMP1
+-|.define MULTRES, TMP2
+-|
+ |//-----------------------------------------------------------------------
+ |
+ |// Instruction headers.
+
+From 5a69b4638a5eb960e9cd6f95611d5d5b6fdcd21d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 22 Nov 2016 13:58:10 -0500
+Subject: [PATCH 027/260] Fixup the save/restore register macros.
+
+I believe these macros obey the C calling convention, so we need to
+allocate our stack frame and save all callee-save registers. We
+can tune it later if it turns out we don't need all the registers.
+---
+ src/vm_s390x.dasc | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 44c056d36..49ea335a3 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -80,16 +80,23 @@
+ |// Callee save area (allocated by interpreter).
+ |.define CALLEESAVE 000(SP) // <- SP in interpreter.
+ |
+-|.macro savereg arg1 arg2 arg3
+-| STG arg1; // Store 64bit content
+-| STG arg2; // Store 64bit content
+-| STG arg3; // Store 64bit content
++|.macro saveregs
++| lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
++| stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
++| std f0, SAVE_FPR0
++| std f2, SAVE_FPR2
++| std f4, SAVE_FPR4
++| std f6, SAVE_FPR6
+ |.endmacro
+ |
+-|.macro restreg arg1 arg2 arg3
+-| LG arg1; // Load 64 bit content
+-| LG arg2; // Load 64 bit content
+-| LG arg3; // Load 64 bit content
++|.macro restoreregs
++| la SP, CFRAME_SPACE(SP) // De-allocate stack frame.
++| lmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
++| ld f0, SAVE_FPR0
++| ld f2, SAVE_FPR2
++| ld f4, SAVE_FPR4
++| ld f6, SAVE_FPR6
++|// br r14 to return?
+ |.endmacro
+ |
+ |// Type definitions. Some of these are only used for documentation.
+
+From dbf789536cfea0b3ac0a1f0a16a807b807735837 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 23 Nov 2016 17:30:10 -0500
+Subject: [PATCH 028/260] Fix stack frame layout.
+
+f8-f15 are callee-saved (not f0,f2,f4 and f6). There isn't space
+for them in the caller's stack frame so we need to increase the
+size of the interpreter's stack frame.
+---
+ src/lj_frame.h | 10 ++++-----
+ src/vm_s390x.dasc | 57 +++++++++++++++++++++++++++++------------------
+ 2 files changed, 40 insertions(+), 27 deletions(-)
+
+diff --git a/src/lj_frame.h b/src/lj_frame.h
+index 65affb5da..0b90f1421 100644
+--- a/src/lj_frame.h
++++ b/src/lj_frame.h
+@@ -265,13 +265,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+ #define CFRAME_OFS_MULTRES 0
+ #define CFRAME_SHIFT_MULTRES 3
+ #elif LJ_TARGET_S390X
+-#define CFRAME_OFS_ERRF 216
+-#define CFRAME_OFS_NRES 208
+-#define CFRAME_OFS_PREV 200
+-#define CFRAME_OFS_L 192
++#define CFRAME_OFS_ERRF 280
++#define CFRAME_OFS_NRES 272
++#define CFRAME_OFS_PREV 264
++#define CFRAME_OFS_L 256
+ #define CFRAME_OFS_PC 168
+ #define CFRAME_OFS_MULTRES 160
+-#define CFRAME_SIZE 172
++#define CFRAME_SIZE 240
+ #define CFRAME_SHIFT_MULTRES 3
+ #else
+ #error "Missing CFRAME_* definitions for this architecture"
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 49ea335a3..f54711177 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -58,22 +58,28 @@
+ |.define SP, r15
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+-|.define CFRAME_SPACE, 176 // Delta for SP, 8 byte aligned.
++|.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
+ |
+ |// Register save area.
+-|.define SAVE_FPR6, 328(SP)
+-|.define SAVE_FPR4, 320(SP)
+-|.define SAVE_FPR2, 312(SP)
+-|.define SAVE_FPR0, 304(SP)
+-|.define SAVE_GPRS, 224(SP) // Save area for r6-r15 (10*8 bytes).
++|.define SAVE_GPRS, 288(SP) // Save area for r6-r15 (10*8 bytes).
+ |
+ |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
+-|.define SAVE_ERRF, 216(SP) // Argument 4, in r5.
+-|.define SAVE_NRES, 208(SP) // Argument 3, in r4.
+-|.define SAVE_CFRAME, 200(SP) // Argument 2, in r3.
+-|.define SAVE_L, 192(SP) // Argument 1, in r2.
+-|.define RESERVED, 184(SP) // Reserved for compiler use.
+-|.define BACKCHAIN, 176(SP) // <- SP entering interpreter.
++|.define SAVE_ERRF, 280(SP) // Argument 4, in r5.
++|.define SAVE_NRES, 272(SP) // Argument 3, in r4.
++|.define SAVE_CFRAME, 264(SP) // Argument 2, in r3.
++|.define SAVE_L, 256(SP) // Argument 1, in r2.
++|.define RESERVED, 248(SP) // Reserved for compiler use.
++|.define BACKCHAIN, 240(SP) // <- SP entering interpreter.
++|
++|// Interpreter stack frame.
++|.define SAVE_FPR15, 232(SP)
++|.define SAVE_FPR14, 224(SP)
++|.define SAVE_FPR13, 216(SP)
++|.define SAVE_FPR12, 208(SP)
++|.define SAVE_FPR11, 200(SP)
++|.define SAVE_FPR10, 192(SP)
++|.define SAVE_FPR9, 184(SP)
++|.define SAVE_FPR8, 176(SP)
+ |.define SAVE_PC, 168(SP)
+ |.define SAVE_MULTRES, 160(SP)
+ |
+@@ -83,19 +89,26 @@
+ |.macro saveregs
+ | lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
+ | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
+-| std f0, SAVE_FPR0
+-| std f2, SAVE_FPR2
+-| std f4, SAVE_FPR4
+-| std f6, SAVE_FPR6
++| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
++| std f9, SAVE_FPR9
++| std f10, SAVE_FPR10
++| std f11, SAVE_FPR11
++| std f12, SAVE_FPR12
++| std f13, SAVE_FPR13
++| std f14, SAVE_FPR14
++| std f15, SAVE_FPR15
+ |.endmacro
+ |
+ |.macro restoreregs
+-| la SP, CFRAME_SPACE(SP) // De-allocate stack frame.
+-| lmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
+-| ld f0, SAVE_FPR0
+-| ld f2, SAVE_FPR2
+-| ld f4, SAVE_FPR4
+-| ld f6, SAVE_FPR6
++| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
++| ld f9, SAVE_FPR9
++| ld f10, SAVE_FPR10
++| ld f11, SAVE_FPR11
++| ld f12, SAVE_FPR12
++| ld f13, SAVE_FPR13
++| ld f14, SAVE_FPR14
++| ld f15, SAVE_FPR15
++| lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
+ |// br r14 to return?
+ |.endmacro
+ |
+
+From 5887962b0e956264f91357f168db7a182aff0cba Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 23 Nov 2016 18:02:00 -0500
+Subject: [PATCH 029/260] Add assembly for decoding instructions.
+
+Still guessing at this point. This code will need to be changed.
+---
+ src/vm_s390x.dasc | 31 +++++++++++++++++++++++--------
+ 1 file changed, 23 insertions(+), 8 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f54711177..f6f1adb1d 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1,4 +1,4 @@
+-|// Low-level VM code for IBM z/Architecture (s390x) CPUs.
++|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
+ |// Bytecode interpreter, fast functions and helper functions.
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+ |
+@@ -32,7 +32,7 @@
+ |.define BASE, r7 // Base of current Lua stack frame.
+ |.define KBASE, r8 // Constants of current Lua function.
+ |.define PC, r9 // Next PC.
+-|.define GLREG, r10 // Global state.
++|.define DISPATCH, r10 // Opcode dispatch table.
+ |.define LREG, r11 // Register holding lua_State (also in SAVE_L).
+ |
+ |// The following temporaries are not saved across C calls, except for RD.
+@@ -56,6 +56,8 @@
+ |.define CRET1, r2
+ |
+ |.define SP, r15
++|.define OP, r2
++|.define TMP1, r3
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+ |.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
+@@ -134,14 +136,29 @@
+ |.macro ins_A; .endmacro
+ |.macro ins_AD; .endmacro
+ |.macro ins_AJ; .endmacro
+-|.macro ins_ABC; .endmacro
+-|.macro ins_AB_; .endmacro
++|.macro ins_ABC; .endmacro
++|.macro ins_AB_; .endmacro
+ |.macro ins_A_C; .endmacro
+ |.macro ins_AND; .endmacro
+ |
+-|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
++|// Instruction decode+dispatch.
++| // TODO: tune this, right now we always decode RA-D even if they aren't used.
+ |.macro ins_NEXT
+-
++| l RD, (PC)
++| // 32 63
++| // [ B | C | A | OP ]
++| // [ D | A | OP ]
++| llhr RA, RD
++| srl RA, #8
++| llcr OP, RD
++| srl RD, #16
++| lr RB, RD
++| srl RB, #8
++| llcr RC, RD
++| la PC, 4(PC)
++| llgfr TMP1, OP
++| sll TMP1, #3 // TMP1=OP*8
++| b 0(TMP1, DISPATCH)
+ |.endmacro
+ |
+ |// Instruction footer.
+@@ -151,8 +168,6 @@
+ | .define ins_next_, ins_NEXT
+ |.else
+ | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+-| // Affects only certain kinds of benchmarks (and only with -j off).
+-| // Around 10%-30% slower on Core2, a lot more slower on P4.
+ | .macro ins_next
+ | jmp ->ins_next
+ | .endmacro
+
+From 372f721e60691cbc22f0ca98edb4c7510ff35110 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 24 Nov 2016 11:25:07 +0530
+Subject: [PATCH 030/260] Update vm_s390x.dasc
+
+used MOVE LONG EXTENDED in place of mov and
+MOVE LONG instead of movzx
+---
+ src/vm_s390x.dasc | 15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f6f1adb1d..3758ee31e 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -180,21 +180,16 @@
+ |// Call decode and dispatch.
+ |.macro ins_callt
+ | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
+-| mov PC, LFUNC:RB->pc
+-| mov RA, [PC]
+-| movzx OP, RAL
+-| movzx RA, RAH
++| mvcle PC, LFUNC:RB->pc
++| mvcle RA, [PC]
++| movcl OP, RAL
++| movcl RA, RAH
+ | add PC, 4
+-|.if X64
+-| jmp aword [DISPATCH+OP*8]
+-|.else
+-| jmp aword [DISPATCH+OP*4]
+-|.endif
+ |.endmacro
+ |
+ |.macro ins_call
+ | // BASE = new base, RB = LFUNC, RD = nargs+1
+-| mov [BASE-4], PC
++| mvcle [BASE-4], PC
+ | ins_callt
+ |.endmacro
+ |
+
+From 4ea7607e02a74aad1a7102e4df1a464cb18d037a Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 24 Nov 2016 14:02:50 +0530
+Subject: [PATCH 031/260] Update vm_s390x.dasc
+
+added instructions to macros, referring macro defination of x86
+for macro ins_ANDdid not find equivalent s390x replacement instruction for 'Not' hence have currently marked the place as '????'
+
+'????' has to be replaced with s390x complement instruction
+---
+ src/vm_s390x.dasc | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 3758ee31e..b2640e809 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -136,10 +136,10 @@
+ |.macro ins_A; .endmacro
+ |.macro ins_AD; .endmacro
+ |.macro ins_AJ; .endmacro
+-|.macro ins_ABC; .endmacro
+-|.macro ins_AB_; .endmacro
+-|.macro ins_A_C; .endmacro
+-|.macro ins_AND; .endmacro
++|.macro ins_ABC; mvcl RB, RCH; mvcl RC, RCL; .endmacro
++|.macro ins_AB_; mvcl RB, RCH; .endmacro
++|.macro ins_A_C; mvcl RC, RCL; .endmacro
++|.macro ins_AND; ??? RD; .endmacro
+ |
+ |// Instruction decode+dispatch.
+ | // TODO: tune this, right now we always decode RA-D even if they aren't used.
+
+From 3288e547bf6961fe04d79bd2e4f8daff819a47f0 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 24 Nov 2016 14:58:52 +0530
+Subject: [PATCH 032/260] Update vm_s390x.dasc
+
+added definations to macros to test operand type refeered x86 definations
+no JUMP instruction found for s390x used BRANCH RELATIVE on CONDITION instead (brc)
+Not sure how the condition will be checked , need to discuss this
+---
+ src/vm_s390x.dasc | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index b2640e809..72fe5d26f 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -196,11 +196,11 @@
+ |//-----------------------------------------------------------------------
+ |
+ |// Macros to test operand types.
+-|.macro checktp, .endmacro
+-|.macro checknum, .endmacro
+-|.macro checkint, .endmacro
+-|.macro checkstr, .endmacro
+-|.macro checktab, .endmacro
++|.macro checktp, reg, tp; CG dword [BASE+reg*8+4], tp; .endmacro
++|.macro checknum, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro // condition to chk is result is above or equal
++|.macro checkint, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro // condition to chk is result is not equal
++|.macro checkstr, reg, target; checktp reg, LJ_TSTR; brc target; .endmacro // condition to chk is result is nto equal
++|.macro checktab, reg, target; checktp reg, LJ_TTAB; brc target; .endmacro // condition to chk is result is nto equal
+ |
+ |// These operands must be used with movzx.
+ |.define PC_OP, byte [PC-4]
+
+From 9b01b4dc6f1663aa9117b4903c4a22822e1bdf34 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 25 Nov 2016 19:44:04 +0530
+Subject: [PATCH 033/260] Added s390x instructions with their encoding
+
+---
+ dynasm/dasm_s390x.lua | 953 +++++++++++++++++++++++++-----------------
+ 1 file changed, 575 insertions(+), 378 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 3542e7ee0..e39a27f1b 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -652,311 +652,565 @@ end)
+
+ -- Template strings for ARM instructions.
+ map_op = {
+- -- Basic data processing instructions.
+- --add
+- ar = "0000000000001a00", --RR
+- ay = "0000e3000000005a", --RXY-a
+- ag = "0000e30000000008",
+- agr = "00000000b9080000", --RRE
+- agf = "0000e30000000018",
+- agfr = "00000000b9180000",
+- agbr = "00000000b34a0000",
+- adbr = "00000000b31a0000",
+- aebr = "00000000b30a0000",
+- ah = "000000004a000000", --RXa
+- ahy = "0000e3000000007a",
+- afi = "0000c20900000000", --RIL-a --pls check if this is correct
+- agfi = "0000c20800000000",
+- aih = "0000cc0800000000",
+- al = "000000005e000000",
+- alr = "0000000000001e00",
+- aly = "0000e3000000005e", -- RXY-a
+- alg = "0000e3000000000a",
+- algr = "00000000b90a0000",
+- algf = "0000e3000000001a",
+- algfr = "00000000b91a0000",
+- alfi = "0000c20b00000000",
+- algfi = "0000c20a00000000",
+- alc = "0000e30000000098",
+- alcr = "00000000b9980000", -- RRE
+- alcg = "0000e30000000088",
+- alcgr = "00000000b9880000",
+- alsih = "0000cc0a00000000",
+- alsihn ="0000cc0b00000000",
+- axr = "0000000000003600", -- RR
+- ad = "000000006a000000", -- Rx-a
+- adr = "0000000000002a00",
+- ae = "000000007a000000",
+- aer = "0000000000003a00",
+- aw = "000000006e000000",
+- awr = "0000000000002e00",
+- au = "000000007e000000",
+- aur = "0000000000003e00",
+-
+--- and
+- n = "0000000054000000",
+- nr = "0000000000001400",
+- ny = "0000e30000000054", -- RXY-a
+- ng = "0000e30000000080",
+- ngr = "00000000b9800000",
+- nihf = "0000c00a00000000", --RIL-a
+- nihl = "0000c00b00000000",
+-
+- --branch related instrcutions
+- bal = "0000000045000000", --RX-a
+- balr = "0000000000005000", --RR
+- bas = "000000004d000000",
+- basr = "0000000000000d00", -- this has leading zero in the instrcution opcode: 0d, need to take into consideration
+- bassm = "0000000000000c00",
+- bsm = "0000000000000b00",
+- bc = "0000000047000000",
+- bcr = "0000000000000700",
+- bct = "0000000046000000",
+- bctr = "0000000000000600",
+- bctg = "0000e30000000046",
+- bctgr = "00000000b9460000",
+- bxh = "0000000086000000", --RS-a
+- bxhg = "0000eb0000000044",
+- bxle = "0000000087000000",
+- bxleg = "0000eb0000000045", -- RSY-a
+- --bras = "000000000a750000RI-b",
+- brasl = "0000c00500000000", --RIL-b
+- --brc = "000000000a740000RI-c",
+- brcl = "0000c00400000000", --RIL-c
+- --brct = "000000000a760000RI-b",
+- --brctg = "000000000a770000RI-b",
+- brcth = "0000cc0600000000",
+- --brxh = "0000000000840000RSI",
+- --brxhg = "00000000ec440000RIE-e",
+- --brxle = "0000000000850000RSI",
+- --brxlg = "00000000ec450000RIE-e",
+-
+- ----subtraction (basic operation)
+- sub = "00000000005b0000RX-a"
+- sr = "00000000001b0000RR"
+- srk = "00000000b9f90000RRF-a"
+- sy = "00000000e35b0000RXY-a"
+- sg = "00000000e3090000RXY-a"
+- sgr = "00000000b9090000RRE"
+- sgrk = "00000000b9e90000RRF-a"
+- sgf = "00000000e3190000RXY-a"
+- sgfr = "00000000b9190000RRE"
+- sh = "00000000004b0000RX-a"
+- shy = "00000000e37b0000RXY-a"
+- shhhr = "00000000b9c90000RRF-a"
+- shhlr = "00000000b9d90000RX-a"
+- sl = "00000000005f0000RX-a"
+- slr = "00000000001f0000RR"
+- slrk = "00000000b9f80000RR"
+- sly = "00000000e35f0000RXY-a",
+- slg = "00000000e30b0000RXY-a",
+- slgr = "00000000b9080000RRE",
+- slgrk = "00000000b9eb0000RRF-a",
+- slgf = "00000000e3180000RXY-a",
+- slgfr = "00000000b91b0000RRE",
+- slhhhr = "00000000b9cb0000RRF-a",
+- slhhlr = "00000000b9db0000RRF-a",
+- slfi = "000000000c250000RIL-a",
+- slgfi = "000000000c240000RIL-a",
+- slb = "00000000e3990000RXY-a",
+- slbr = "00000000b9990000RRE" ,
+- slbg = "00000000e3890000RXY-a",
+- slbgr = "00000000b9890000RXY-a",
+-
+- cmp_2 = "0000000000590000RX-a|0000000000190000RR|00000000e3590000RXY-a",
+- cmp_3 = "00000000e3200000RXY-a|00000000b9200000RRE|00000000e3300000RXY-a| 00000000b9300000RRE",
+-
+- div_2 = "00000000005d0000RX-a|00000000001d0000RR|00000000e3970000RXY-a|00000000b9970000RRE",
+- div_3 ="00000000e3870000RXY-a|00000000b9870000RRE",
+- div_sing ="00000000e30d0000RXY-a|00000000b90d0000RRE|00000000e31d0000RXY-a|00000000b91d0000RRE",
+-
+- eor_2 = "0000000000570000RX-a|0000000000170000RR|00000000b9f70000RRF-a|00000000e3570000RXY-a",
+- eor_3 = "00000000e3820000RXY-a|00000000b9820000RRE|00000000b9e70000RRF-a|
+- eor_c = "0000000000d70000SS-a",
+- eor_i = "0000000000970000SI| 00000000eb570000|000000000c060000a|000000000c070000RIL-a",
+-
+- -- load instruction to be added and the following instructions need to be changed (are not s390x related)
+-
+- neg_2 = "4b0003e0DMg",
+- neg_3 = "4b0003e0DMSg",
+- negs_2 = "6b0003e0DMg",
+- negs_3 = "6b0003e0DMSg",
+- adc_3 = "1a000000DNMg",
+- adcs_3 = "3a000000DNMg",
+- sbc_3 = "5a000000DNMg",
+- sbcs_3 = "7a000000DNMg",
+- ngc_2 = "5a0003e0DMg",
+- ngcs_2 = "7a0003e0DMg",
+- and_3 = "0a000000DNMg|12000000pDNig",
+- and_4 = "0a000000DNMSg",
+- orr_3 = "2a000000DNMg|32000000pDNig",
+- orr_4 = "2a000000DNMSg",
+- eor_3 = "4a000000DNMg|52000000pDNig",
+- eor_4 = "4a000000DNMSg",
+- ands_3 = "6a000000DNMg|72000000DNig",
+- ands_4 = "6a000000DNMSg",
+- tst_2 = "6a00001fNMg|7200001fNig",
+- tst_3 = "6a00001fNMSg",
+- bic_3 = "0a200000DNMg",
+- bic_4 = "0a200000DNMSg",
+- orn_3 = "2a200000DNMg",
+- orn_4 = "2a200000DNMSg",
+- eon_3 = "4a200000DNMg",
+- eon_4 = "4a200000DNMSg",
+- bics_3 = "6a200000DNMg",
+- bics_4 = "6a200000DNMSg",
+- movn_2 = "12800000DWg",
+- movn_3 = "12800000DWRg",
+- movz_2 = "52800000DWg",
+- movz_3 = "52800000DWRg",
+- movk_2 = "72800000DWg",
+- movk_3 = "72800000DWRg",
+- -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+- mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+- mov_3 = "2a0003e0DMSg",
+- mvn_2 = "2a2003e0DMg",
+- mvn_3 = "2a2003e0DMSg",
+- adr_2 = "10000000DBx",
+- adrp_2 = "90000000DBx",
+- csel_4 = "1a800000DNMCg",
+- csinc_4 = "1a800400DNMCg",
+- csinv_4 = "5a800000DNMCg",
+- csneg_4 = "5a800400DNMCg",
+- cset_2 = "1a9f07e0Dcg",
+- csetm_2 = "5a9f03e0Dcg",
+- cinc_3 = "1a800400DNmcg",
+- cinv_3 = "5a800000DNmcg",
+- cneg_3 = "5a800400DNmcg",
+- ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+- ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+- madd_4 = "1b000000DNMAg",
+- msub_4 = "1b008000DNMAg",
+- mul_3 = "1b007c00DNMg",
+- mneg_3 = "1b00fc00DNMg",
+- smaddl_4 = "9b200000DxNMwAx",
+- smsubl_4 = "9b208000DxNMwAx",
+- smull_3 = "9b207c00DxNMw",
+- smnegl_3 = "9b20fc00DxNMw",
+- smulh_3 = "9b407c00DNMx",
+- umaddl_4 = "9ba00000DxNMwAx",
+- umsubl_4 = "9ba08000DxNMwAx",
+- umull_3 = "9ba07c00DxNMw",
+- umnegl_3 = "9ba0fc00DxNMw",
+- umulh_3 = "9bc07c00DNMx",
+- udiv_3 = "1ac00800DNMg",
+- sdiv_3 = "1ac00c00DNMg",
+- -- Bit operations.
+- sbfm_4 = "13000000DN12w|93400000DN12x",
+- bfm_4 = "33000000DN12w|b3400000DN12x",
+- ubfm_4 = "53000000DN12w|d3400000DN12x",
+- extr_4 = "13800000DNM2w|93c00000DNM2x",
+- sxtb_2 = "13001c00DNw|93401c00DNx",
+- sxth_2 = "13003c00DNw|93403c00DNx",
+- sxtw_2 = "93407c00DxNw",
+- uxtb_2 = "53001c00DNw",
+- uxth_2 = "53003c00DNw",
+- sbfx_4 = op_alias("sbfm_4", alias_bfx),
+- bfxil_4 = op_alias("bfm_4", alias_bfx),
+- ubfx_4 = op_alias("ubfm_4", alias_bfx),
+- sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+- bfi_4 = op_alias("bfm_4", alias_bfiz),
+- ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+- lsl_3 = function(params, nparams)
+- if params and params[3]:byte() == 35 then
+- return alias_lslimm(params, nparams)
+- else
+- return op_template(params, "1ac02000DNMg", nparams)
+- end
+- end,
+- lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+- asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+- ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+- clz_2 = "5ac01000DNg",
+- cls_2 = "5ac01400DNg",
+- rbit_2 = "5ac00000DNg",
+- rev_2 = "5ac00800DNw|dac00c00DNx",
+- rev16_2 = "5ac00400DNg",
+- rev32_2 = "dac00800DNx",
+- -- Loads and stores.
+- ["strb_*"] = "38000000DwL",
+- ["ldrb_*"] = "38400000DwL",
+- ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+- ["strh_*"] = "78000000DwL",
+- ["ldrh_*"] = "78400000DwL",
+- ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+- ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+- ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+- ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+- -- NOTE: ldur etc. are handled by ldr et al.
+- ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+- ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+- ["ldpsw_*"] = "68400000DAxP",
+- -- Branches.
+- b_1 = "14000000B",
+- bl_1 = "94000000B",
+- blr_1 = "d63f0000Nx",
+- br_1 = "d61f0000Nx",
+- ret_0 = "d65f03c0",
+- ret_1 = "d65f0000Nx",
+- -- b.cond is added below.
+- cbz_2 = "34000000DBg",
+- cbnz_2 = "35000000DBg",
+- tbz_3 = "36000000DTBw|36000000DTBx",
+- tbnz_3 = "37000000DTBw|37000000DTBx",
+- -- Miscellaneous instructions.
+- -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+- -- TODO: sys, sysl, ic, dc, at, tlbi
+- -- TODO: hint, yield, wfe, wfi, sev, sevl
+- -- TODO: clrex, dsb, dmb, isb
+- nop_0 = "d503201f",
+- brk_0 = "d4200000",
+- brk_1 = "d4200000W",
+- -- Floating point instructions.
+- fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+- fabs_2 = "1e20c000DNf",
+- fneg_2 = "1e214000DNf",
+- fsqrt_2 = "1e21c000DNf",
+- fcvt_2 = "1e22c000DdNs|1e624000DsNd",
+- -- TODO: half-precision and fixed-point conversions.
+- fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+- fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+- fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+- fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+- fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+- fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+- fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+- fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+- fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+- fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+- scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+- ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+- frintn_2 = "1e244000DNf",
+- frintp_2 = "1e24c000DNf",
+- frintm_2 = "1e254000DNf",
+- frintz_2 = "1e25c000DNf",
+- frinta_2 = "1e264000DNf",
+- frintx_2 = "1e274000DNf",
+- frinti_2 = "1e27c000DNf",
+- fadd_3 = "1e202800DNMf",
+- fsub_3 = "1e203800DNMf",
+- fmul_3 = "1e200800DNMf",
+- fnmul_3 = "1e208800DNMf",
+- fdiv_3 = "1e201800DNMf",
+- fmadd_4 = "1f000000DNMAf",
+- fmsub_4 = "1f008000DNMAf",
+- fnmadd_4 = "1f200000DNMAf",
+- fnmsub_4 = "1f208000DNMAf",
+- fmax_3 = "1e204800DNMf",
+- fmaxnm_3 = "1e206800DNMf",
+- fmin_3 = "1e205800DNMf",
+- fminnm_3 = "1e207800DNMf",
+- fcmp_2 = "1e202000NMf|1e202008NZf",
+- fcmpe_2 = "1e202010NMf|1e202018NZf",
+- fccmp_4 = "1e200400NMVCf",
+- fccmpe_4 = "1e200410NMVCf",
+- fcsel_4 = "1e200c00DNMCf",
+- -- TODO: crc32*, aes*, sha*, pmull
+- -- TODO: SIMD instructions.
++ a = "000000005a000000j",
++ar = "0000000000001a00g",
++ay = "0000e3000000005ak",
++ag = "0000e30000000008k",
++agr = "00000000b9080000h",
++agf = "0000e30000000018k",
++agfr = "00000000b9180000h",
++axbr = "00000000b34a0000h",
++adbr = "00000000b31a0000h",
++aebr = "00000000b30a0000h",
++ah = "000000004a000000j",
++ahy = "0000e3000000007ak",
++afi = "0000c20900000000l",
++agfi = "0000c20800000000l",
++aih = "0000cc0800000000l",
++al = "000000005e000000j",
++alr = "0000000000001e00g",
++aly = "0000e3000000005ek",
++alg = "0000e3000000000ak",
++algr = "00000000b90a0000h",
++algf = "0000e3000000001ak",
++algfr = "00000000b91a0000h",
++alfi = "0000c20b00000000l",
++algfi = "0000c20a00000000l",
++alc = "0000e30000000098k",
++alcr = "00000000b9980000h",
++alcg = "0000e30000000088k",
++alcgr = "00000000b9880000h",
++alsih = "0000cc0a00000000l",
++alsihn = "0000cc0b00000000l",
++axr = "0000000000003600g",
++ad = "000000006a000000j",
++adr = "0000000000002a00g",
++ae = "000000007a000000j",
++aer = "0000000000003a00g",
++aw = "000000006e000000j",
++awr = "0000000000002e00g",
++au = "000000007e000000j",
++aur = "0000000000003e00g",
++n = "0000000054000000j",
++nr = "0000000000001400g",
++ny = "0000e30000000054k",
++ng = "0000e30000000080k",
++ngr = "00000000b9800000h",
++nihf = "0000c00a00000000l",
++nilf = "0000c00b00000000l",
++bal = "0000000045000000j",
++balr = "000000000000500g",
++bas = "000000004d000000j",
++basr = "0000000000000d00g",
++bassm = "0000000000000c00g",
++bsa = "00000000b25a0000h",
++bsm = "0000000000000b00g",
++bakr = "00000000b2400000h",
++bsg = "00000000b2580000h",
++bc = "0000000047000000j",
++bcr = "000000000000700g",
++bct = "0000000046000000j",
++bctr = "000000000000600g",
++bctg = "0000e30000000046k",
++bctgr = "00000000b9460000h",
++bxh = "0000000086000000m",
++bxhg = "0000eb0000000044n",
++bxle = "0000000087000000m",
++bxleg = "0000eb0000000045n",
++brasl = "0000c00500000000l",
++brcl = "0000c00400000000l",
++brcth = "0000cc0600000000l",
++cksm = "00000000b2410000h",
++km = "00000000b92e0000h",
++kmf = "00000000b92a0000h",
++kmc = "00000000b92f0000h",
++kmo = "00000000b92b0000h",
++c = "0000000059000000j",
++cr = "0000000000001900g",
++cy = "0000e30000000059k",
++cg = "0000e30000000020k",
++cgr = "00000000b9200000h",
++cgf = "0000e30000000030k",
++cgfr = "00000000b9300000h",
++cxbr = "00000000b3490000h",
++cxtr = "00000000b3ec0000h",
++cxr = "00000000b3690000h",
++cdbr = "00000000b3190000h",
++cdtr = "00000000b3e40000h",
++cd = "0000000069000000j",
++cdr = "0000000000002900g",
++cebr = "00000000b3090000h",
++ce = "0000000079000000j",
++cer = "0000000000003900g",
++kxbr = "00000000b3480000h",
++kxtr = "00000000b3e80000h",
++kdbr = "00000000b3180000h",
++kdtr = "00000000b3e00000h",
++kebr = "00000000b3080000h",
++cs = "00000000ba000000m",
++csy = "0000eb0000000014n",
++csg = "0000eb0000000030n",
++csp = "00000000b2500000h",
++cspg = "00000000b98a0000h",
++cextr = "00000000b3fc0000h",
++cedtr = "00000000b3f40000h",
++cds = "00000000bb000000m",
++cdsy = "0000eb0000000031n",
++cdsg = "0000eb000000003en",
++ch = "0000000049000000j",
++chy = "0000e30000000079k",
++cgh = "0000e30000000034k",
++chrl = "0000c60500000000l",
++cghrl = "0000c60400000000l",
++chf = "0000e300000000cdk",
++chhr = "00000000b9cd0000h",
++chlr = "00000000b9dd0000h",
++cfi = "0000c20d00000000l",
++cgfi = "0000c20c00000000l",
++cih = "0000cc0d00000000l",
++cl = "0000000055000000j",
++clr = "0000000000001500g",
++cly = "0000e30000000055k",
++clg = "0000e30000000021k",
++clgr = "00000000b9210000h",
++clgf = "0000e30000000031k",
++clgfr = "00000000b9310000h",
++clmh = "0000eb0000000020n",
++clm = "00000000bd000000m",
++clmy = "0000eb0000000021n",
++clhf = "0000e300000000cfk",
++clhhr = "00000000b9cf0000h",
++clhlr = "00000000b9df0000h",
++clfi = "0000c20f00000000l",
++clgfi = "0000c20e00000000l",
++clih = "0000cc0f00000000l",
++clcl = "0000000000000f00g",
++clcle = "00000000a9000000m",
++clclu = "0000eb000000008fn",
++clrl = "0000c60f00000000l",
++clhrl = "0000c60700000000l",
++clgrl = "0000c60a00000000l",
++clghrl = "0000c60600000000l",
++clgfrl = "0000c60e00000000l",
++clst = "00000000b25d0000h",
++crl = "0000c60d00000000l",
++cgrl = "0000c60800000000l",
++cgfrl = "0000c60c00000000l",
++ cuse = "00000000b2570000h",
++cmpsc = "00000000b2630000h",
++kimd = "00000000b93e0000h",
++klmd = "00000000b93f0000h",
++kmac = "00000000b91e0000h",
++thdr = "00000000b3590000h",
++thder = "00000000b3580000h",
++cxfbr = "00000000b3960000h",
++cxftr = "00000000b9590000h",
++cxfr = "00000000b3b60000h",
++cdfbr = "00000000b3950000h",
++cdftr = "00000000b9510000h",
++cdfr = "00000000b3b50000h",
++cefbr = "00000000b3940000h",
++cefr = "00000000b3b40000h",
++cxgbr = "00000000b3a60000h",
++cxgtr = "00000000b3f90000h",
++cxgr = "00000000b3c60000h",
++cdgbr = "00000000b3a50000h",
++cdgtr = "00000000b3f10000h",
++cdgr = "00000000b3c50000h",
++cegbr = "00000000b3a40000h",
++cegr = "00000000b3c40000h",
++cxstr = "00000000b3fb0000h",
++cdstr = "00000000b3f30000h",
++cxutr = "00000000b3fa0000h",
++cdutr = "00000000b3f20000h",
++cvb = "000000004f000000j",
++cvby = "0000e30000000006k",
++cvbg = "0000e3000000000ek",
++cvd = "000000004e000000j",
++cvdy = "0000e30000000026k",
++cvdg = "0000e3000000002ek",
++cuxtr = "00000000b3ea0000h",
++cudtr = "00000000b3e20000h",
++cu42 = "00000000b9b30000h",
++cu41 = "00000000b9b20000h",
++cpya = "00000000b24d0000h",
++d = "000000005d000000j",
++dr = "0000000000001d00g",
++dxbr = "00000000b34d0000h",
++dxr = "00000000b22d0000h",
++ddbr = "00000000b31d0000h",
++dd = "000000006d000000j",
++ddr = "0000000000002d00g",
++debr = "00000000b30d0000h",
++de = "000000007d000000j",
++der = "0000000000003d00g",
++dl = "0000e30000000097k",
++dlr = "00000000b9970000h",
++dlg = "0000e30000000087k",
++dlgr = "00000000b9870000h",
++dsg = "0000e3000000000dk",
++dsgr = "00000000b90d0000h",
++dsgf = "0000e3000000001dk",
++dsgfr = "00000000b91d0000h",
++x = "0000000057000000j",
++xr = "0000000000001700g",
++xy = "0000e30000000057k",
++xg = "0000e30000000082k",
++xgr = "00000000b9820000h",
++xihf = "0000c00600000000l",
++xilf = "0000c00700000000l",
++ex = "0000000044000000j",
++exrl = "0000c60000000000l",
++ear = "00000000b24f0000h",
++esea = "00000000b99d0000h",
++eextr = "00000000b3ed0000h",
++eedtr = "00000000b3e50000h",
++ecag = "0000eb000000004cn",
++efpc = "00000000b38c0000h",
++epar = "00000000b2260000h",
++epair = "00000000b99a0000h",
++epsw = "00000000b98d0000h",
++esar = "00000000b2270000h",
++esair = "00000000b99b0000h",
++esxtr = "00000000b3ef0000h",
++esdtr = "00000000b3e70000h",
++ereg = "00000000b2490000h",
++eregg = "00000000b90e0000h",
++esta = "00000000b24a0000h",
++flogr = "00000000b9830000h",
++hdr = "0000000000002400g",
++her = "0000000000003400g",
++iac = "00000000b2240000h",
++ic = "0000000043000000j",
++icy = "0000e30000000073k",
++icmh = "0000eb0000000080n",
++icm = "00000000bf000000m",
++icmy = "0000eb0000000081n",
++iihf = "0000c00800000000l",
++iilf = "0000c00900000000l",
++ipm = "00000000b2220000h",
++iske = "00000000b2290000h",
++ivsk = "00000000b2230000h",
++l = "0000000058000000j",
++lr = "0000000000001800g",
++ly = "0000e30000000058k",
++lg = "0000e30000000004k",
++lgr = "00000000b9040000h",
++lgf = "0000e30000000014k",
++lgfr = "00000000b9140000h",
++lxr = "00000000b3650000h",
++ld = "0000000068000000j",
++ldr = "0000000000002800g",
++ldy = "0000ed0000000065k",
++le = "0000000078000000j",
++ler = "0000000000003800g",
++ ley = "0000ed0000000064k",
++lam = "000000009a000000m",
++lamy = "0000eb000000009an",
++la = "0000000041000000j",
++lay = "0000e30000000071k",
++lae = "0000000051000000j",
++laey = "0000e30000000075k",
++larl = "0000c00000000000l",
++laa = "0000eb00000000f8n",
++laag = "0000eb00000000e8n",
++laal = "0000eb00000000fan",
++laalg = "0000eb00000000ean",
++lan = "0000eb00000000f4n",
++lang = "0000eb00000000e4n",
++lax = "0000eb00000000f7n",
++laxg = "0000eb00000000e7n",
++lao = "0000eb00000000f6n",
++laog = "0000eb00000000e6n",
++lt = "0000e30000000012k",
++ltr = "0000000000001200g",
++ltg = "0000e30000000002k",
++ltgr = "00000000b9020000h",
++ltgf = "0000e30000000032k",
++ltgfr = "00000000b9120000h",
++ltxbr = "00000000b3420000h",
++ltxtr = "00000000b3de0000h",
++ltxr = "00000000b3620000h",
++ltdbr = "00000000b3120000h",
++ltdtr = "00000000b3d60000h",
++ltdr = "0000000000002200g",
++ltebr = "00000000b3020000h",
++lter = "0000000000003200g",
++lb = "0000e30000000076k",
++lbr = "00000000b9260000h",
++lgb = "0000e30000000077k",
++lgbr = "00000000b9060000h",
++ lbh = "0000e300000000c0k",
++lcr = "0000000000001300g",
++lcgr = "00000000b9030000h",
++lcgfr = "00000000b9130000h",
++lcxbr = "00000000b3430000h",
++lcxr = "00000000b3630000h",
++lcdbr = "00000000b3130000h",
++lcdr = "0000000000002300g",
++lcdfr = "00000000b3730000h",
++lcebr = "00000000b3030000h",
++lcer = "0000000000003300g",
++lctl = "00000000b7000000m",
++lctlg = "0000eb000000002fn",
++fixr = "00000000b3670000h",
++fidr = "00000000b37f0000h",
++fier = "00000000b3770000h",
++ldgr = "00000000b3c10000h",
++lgdr = "00000000b3cd0000h",
++lh = "0000000048000000j",
++lhr = "00000000b9270000h",
++lhy = "0000e30000000078k",
++lgh = "0000e30000000015k",
++lghr = "00000000b9070000h",
++lhh = "0000e300000000c4k",
++lhrl = "0000c40500000000l",
++lghrl = "0000c40400000000l",
++lfh = "0000e300000000cak",
++lgfi = "0000c00100000000l",
++lxdbr = "00000000b3050000h",
++lxdr = "00000000b3250000h",
++lxebr = "00000000b3060000h",
++lxer = "00000000b3260000h",
++ldebr = "00000000b3040000h",
++lder = "00000000b3240000h",
++llgf = "0000e30000000016k",
++llgfr = "00000000b9160000h",
++llc = "0000e30000000094k",
++llcr = "00000000b9940000h",
++llgc = "0000e30000000090k",
++llgcr = "00000000b9840000h",
++llch = "0000e300000000c2k",
++llh = "0000e30000000095k",
++llhr = "00000000b9950000h",
++llgh = "0000e30000000091k",
++llghr = "00000000b9850000h",
++llhh = "0000e300000000c6k",
++llhrl = "0000c40200000000l",
++llghrl = "0000c40600000000l",
++llihf = "0000c00e00000000l",
++llilf = "0000c00f00000000l",
++llgfrl = "0000c40e00000000l",
++llgt = "0000e30000000017k",
++llgtr = "00000000b9170000h",
++lm = "0000000098000000m",
++lmy = "0000eb0000000098n",
++lmg = "0000eb0000000004n",
++lmh = "0000eb0000000096n",
++lnr = "0000000000001100g",
++lngr = "00000000b9010000h",
++lngfr = "00000000b9110000h",
++lnxbr = "00000000b3410000h",
++lnxr = "00000000b3610000h",
++lndbr = "00000000b3110000h",
++lndr = "0000000000002100g",
++lndfr = "00000000b3710000h",
++lnebr = "00000000b3010000h",
++lner = "0000000000003100g",
++loc = "0000eb00000000f2n",
++locg = "0000eb00000000e2n",
++lpq = "0000e3000000008fk",
++lpr = "0000000000001000g",
++lpgr = "00000000b9000000h",
++lpgfr = "00000000b9100000h",
++lpxbr = "00000000b3400000h",
++lpxr = "00000000b3600000h",
++lpdbr = "00000000b3100000h",
++lpdr = "0000000000002000g",
++lpdfr = "00000000b3700000h",
++lpebr = "00000000b3000000h",
++lper = "0000000000003000g",
++lra = "00000000b1000000j",
++lray = "0000e30000000013k",
++lrag = "0000e30000000003k",
++lrl = "0000c40d00000000l",
++lgrl = "0000c40800000000l",
++lgfrl = "0000c40c00000000l",
++lrvh = "0000e3000000001fk",
++lrv = "0000e3000000001ek",
++lrvr = "00000000b91f0000h",
++lrvg = "0000e3000000000fk",
++lrvgr = "00000000b90f0000h",
++ldxbr = "00000000b3450000h",
++ldxr = "0000000000002500g",
++lrdr = "0000000000002500g",
++lexbr = "00000000b3460000h",
++lexr = "00000000b3660000h",
++ledbr = "00000000b3440000h",
++ledr = "0000000000003500g",
++lrer = "0000000000003500g",
++lura = "00000000b24b0000h",
++lurag = "00000000b9050000h",
++lzxr = "00000000b3760000h",
++lzdr = "00000000b3750000h",
++lzer = "00000000b3740000h",
++msta = "00000000b2470000h",
++mvcl = "0000000000000e00g",
++mvcle = "00000000a8000000m",
++mvclu = "0000eb000000008en",
++mvpg = "00000000b2540000h",
++mvst = "00000000b2550000h",
++m = "000000005c000000j",
++mfy = "0000e3000000005ck",
++mr = "0000000000001c00g",
++mxbr = "00000000b34c0000h",
++mxr = "0000000000002600g",
++mdbr = "00000000b31c0000h",
++md = "000000006c000000j",
++mdr = "0000000000002c00g",
++mxdbr = "00000000b3070000h",
++mxd = "0000000067000000j",
++mxdr = "0000000000002700g",
++meebr = "00000000b3170000h",
++meer = "00000000b3370000h",
++mdebr = "00000000b30c0000h",
++mde = "000000007c000000j",
++mder = "0000000000003c00g",
++me = "000000007c000000j",
++mer = "0000000000003c00g",
++mh = "000000004c000000j",
++mhy = "0000e3000000007ck",
++mlg = "0000e30000000086k",
++mlgr = "00000000b9860000h",
++ml = "0000e30000000096k",
++mlr = "00000000b9960000h",
++ms = "0000000071000000j",
++msr = "00000000b2520000h",
++msy = "0000e30000000051k",
++msg = "0000e3000000000ck",
++msgr = "00000000b90c0000h",
++msgf = "0000e3000000001ck",
++msgfr = "00000000b91c0000h",
++msfi = "0000c20100000000l",
++msgfi = "0000c20000000000l",
++o = "0000000056000000j",
++or = "0000000000001600g",
++oy = "0000e30000000056k",
++og = "0000e30000000081k",
++ogr = "00000000b9810000h",
++oihf = "0000c00c00000000l",
++oilf = "0000c00d00000000l",
++pgin = "00000000b22e0000h",
++pgout = "00000000b22f0000h",
++pcc = "00000000b92c0000h",
++pckmo = "00000000b9280000h",
++pfmf = "00000000b9af0000h",
++ptf = "00000000b9a20000h",
++popcnt = "00000000b9e10000h",
++pfd = "0000e30000000036k",
++pfdrl = "0000c60200000000l",
++pt = "00000000b2280000h",
++pti = "00000000b99e0000h",
++palb = "00000000b2480000h",
++rrbe = "00000000b22a0000h",
++rrbm = "00000000b9ae0000h",
++rll = "0000eb000000001dn",
++rllg = "0000eb000000001cn",
++srst = "00000000b25e0000h",
++srstu = "00000000b9be0000h",
++sar = "00000000b24e0000h",
++sfpc = "00000000b3840000h",
++sfasr = "00000000b3850000h",
++spm = "000000000000400g",
++ssar = "00000000b2250000h",
++ssair = "00000000b99f0000h",
++slda = "000000008f000000m",
++sldl = "000000008d000000m",
++sla = "000000008b000000m",
++slak = "0000eb00000000ddn",
++slag = "0000eb000000000bn",
++sll = "0000000089000000m",
++sllk = "0000eb00000000dfn",
++sllg = "0000eb000000000dn",
++srda = "000000008e000000m",
++srdl = "000000008c000000m",
++sra = "000000008a000000m",
++srak = "0000eb00000000dcn",
++srag = "0000eb000000000an",
++srl = "0000000088000000m",
++srlk = "0000eb00000000den",
++srlg = "0000eb000000000cn",
++sqxbr = "00000000b3160000h",
++sqxr = "00000000b3360000h",
++sqdbr = "00000000b3150000h",
++sqdr = "00000000b2440000h",
++sqebr = "00000000b3140000h",
++sqer = "00000000b2450000h",
++st = "0000000050000000j",
++sty = "0000e30000000050k",
++stg = "0000e30000000024k",
++std = "0000000060000000j",
++stdy = "0000ed0000000067k",
++ste = "0000000070000000j",
++stey = "0000ed0000000066k",
++stam = "000000009b000000m",
++stamy = "0000eb000000009bn",
++stc = "0000000042000000j",
++stcy = "0000e30000000072k",
++stch = "0000e300000000c3k",
++stcmh = "0000eb000000002cn",
++stcm = "00000000be000000m",
++stcmy = "0000eb000000002dn",
++stctl = "00000000b6000000m",
++stctg = "0000eb0000000025n",
++sth = "0000000040000000j",
++sthy = "0000e30000000070k",
++sthh = "0000e300000000c7k",
++sthrl = "0000c40700000000l",
++stfh = "0000e300000000cbk",
++stm = "0000000090000000m",
++stmy = "0000eb0000000090n",
++stmg = "0000eb0000000024n",
++stmh = "0000eb0000000026n",
++stoc = "0000eb00000000f3n",
++stocg = "0000eb00000000e3n",
++stpq = "0000e3000000008ek",
++strl = "0000c40f00000000l",
++stgrl = "0000c40b00000000l",
++strvh = "0000e3000000003fk",
++strv = "0000e3000000003ek",
++strvg = "0000e3000000002fk",
++stura = "00000000b2460000h",
++sturg = "00000000b9250000h",
++s = "000000005b000000j",
++sr = "0000000000001b00g",
++sy = "0000e3000000005bk",
++sg = "0000e30000000009k",
++sgr = "00000000b9090000h",
++sgf = "0000e30000000019k",
++sgfr = "00000000b9190000h",
++sxbr = "00000000b34b0000h",
++sdbr = "00000000b31b0000h",
++sebr = "00000000b30b0000h",
++sh = "000000004b000000j",
++shy = "0000e3000000007bk",
++sl = "000000005f000000j",
++slr = "0000000000001f00g",
++sly = "0000e3000000005fk",
++slg = "0000e3000000000bk",
++slgr = "00000000b90b0000h",
++slgf = "0000e3000000001bk",
++slgfr = "00000000b91b0000h",
++slfi = "0000c20500000000l",
++slgfi = "0000c20400000000l",
++slb = "0000e30000000099k",
++slbr = "00000000b9990000h",
++slbg = "0000e30000000089k",
++slbgr = "00000000b9890000h",
++sxr = "0000000000003700g",
++sd = "000000006b000000j",
++sdr = "0000000000002b00g",
++se = "000000007b000000j",
++ser = "0000000000003b00g",
++su = "000000007f000000j",
++sur = "0000000000003f00g",
++sw = "000000006f000000j",
++swr = "0000000000002f00g",
++tar = "00000000b24c0000h",
++tb = "00000000b22c0000h",
++trace = "0000000099000000m",
++tracg = "0000eb000000000fn",
++tre = "00000000b2a50000h",
+ }
+ for cond,c in pairs(map_cond) do
+ map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+@@ -964,87 +1218,30 @@ end
+ ------------------------------------------------------------------------------
+ -- Handle opcodes defined with template strings.
+ local function parse_template(params, template, nparams, pos)
+- local op = tonumber(sub(template, 1, 12), 16) -- 13-16 ignored since those are trailing zeros added after the instruction
++ local op = tonumber(sub(template, 1, 16), 16) --
+ -- 00000000005a0000 converts to 90
+ local n,rs = 1,26
+
+ parse_reg_type = false
+ -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
+ for p in gmatch(sub(template, 17), ".") do
+- local q = params[n]
+- if p == "R" then
+- op = op + parse_reg(q); n = n + 1
+- elseif p == "N" then
+- op = op + shl(parse_reg(q), 5); n = n + 1
+- elseif p == "M" then
+- op = op + shl(parse_reg(q), 16); n = n + 1
+- elseif p == "A" then
+- op = op + shl(parse_reg(q), 10); n = n + 1
++ local pr1,pr2,pr3
++ if p == "g" then
++ pr1,pr2=param[n],param[n+1]
++ op = op + parse_reg(pr1)+parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
++ elseif p == "h" then
++
++ elseif p == "j" then
++
++ elseif p == "k" then
++
++ elseif p == "l" then
++
+ elseif p == "m" then
+- op = op + shl(parse_reg(params[n-1]), 16)
+- elseif p == "p" then
+- if q == "sp" then params[n] = "@x31" end
+- elseif p == "g" then
+- if parse_reg_type == "x" then
+- op = op + 0x80000000
+- elseif parse_reg_type ~= "w" then
+- werror("bad register type")
+- end
+- parse_reg_type = false
+- elseif p == "f" then
+- if parse_reg_type == "d" then
+- op = op + 0x00400000
+- elseif parse_reg_type ~= "s" then
+- werror("bad register type")
+- end
+- parse_reg_type = false
+- elseif p == "x" or p == "w" or p == "d" or p == "s" then
+- if parse_reg_type ~= p then
+- werror("register size mismatch")
++
++ elseif p == "n" then
++
+ end
+- parse_reg_type = false
+- elseif p == "L" then
+- op = parse_load(params, nparams, n, op)
+- elseif p == "P" then
+- op = parse_load_pair(params, nparams, n, op)
+- elseif p == "B" then
+- local mode, v, s = parse_label(q, false); n = n + 1
+- local m = branch_type(op)
+- waction("REL_"..mode, v+m, s, 1)
+- elseif p == "I" then
+- op = op + parse_imm12(q); n = n + 1
+- elseif p == "i" then
+- op = op + parse_imm13(q); n = n + 1
+- elseif p == "W" then
+- op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+- elseif p == "T" then
+- op = op + parse_imm6(q); n = n + 1
+- elseif p == "1" then
+- op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+- elseif p == "2" then
+- op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+- elseif p == "5" then
+- op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+- elseif p == "V" then
+- op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+- elseif p == "F" then
+- op = op + parse_fpimm(q); n = n + 1
+- elseif p == "Z" then
+- if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+- n = n + 1
+- elseif p == "S" then
+- op = op + parse_shift(q); n = n + 1
+- elseif p == "X" then
+- op = op + parse_extend(q); n = n + 1
+- elseif p == "R" then
+- op = op + parse_lslx16(q); n = n + 1
+- elseif p == "C" then
+- op = op + parse_cond(q, 0); n = n + 1
+- elseif p == "c" then
+- op = op + parse_cond(q, 1); n = n + 1
+- else
+- assert(false)
+- end
+ end
+ wputpos(pos, op)
+ end
+
+From cac5d4f2fbebccf57dc34e443fbd7d55db77b242 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 25 Nov 2016 16:38:32 -0500
+Subject: [PATCH 034/260] Add extended mnemonics for branches.
+
+---
+ dynasm/dasm_s390x.lua | 22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index e39a27f1b..76fe281e0 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -239,9 +239,10 @@ local map_extend = {
+ }
+
+ local map_cond = {
+- eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+- hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+- hs = 2, lo = 3,
++ o = 1, h = 2, hle = 3, l = 4,
++ nhe = 5, lh = 6, ne = 7, e = 8,
++ nlh = 9, he = 10, nl = 11, le = 12,
++ nh = 13, no = 14, [""] = 15,
+ }
+
+ ------------------------------------------------------------------------------
+@@ -650,7 +651,7 @@ local alias_lslimm = op_alias("ubfm_4", function(p)
+ end
+ end)
+
+--- Template strings for ARM instructions.
++-- Template strings for s390x instructions.
+ map_op = {
+ a = "000000005a000000j",
+ ar = "0000000000001a00g",
+@@ -1084,7 +1085,7 @@ msgfr = "00000000b91c0000h",
+ msfi = "0000c20100000000l",
+ msgfi = "0000c20000000000l",
+ o = "0000000056000000j",
+-or = "0000000000001600g",
++["or"] = "0000000000001600g",
+ oy = "0000e30000000056k",
+ og = "0000e30000000081k",
+ ogr = "00000000b9810000h",
+@@ -1213,7 +1214,16 @@ tracg = "0000eb000000000fn",
+ tre = "00000000b2a50000h",
+ }
+ for cond,c in pairs(map_cond) do
+- map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
++ -- Extended mnemonics for branches.
++ -- TODO: replace 'B' with correct encoding.
++ -- brc
++ map_op["j"..cond.."_1"] = "00000000"..tohex(0xa7040000+shl(c, 20)).."B"
++ -- brcl
++ map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c, 4)).."00000000".."B"
++ -- bc
++ map_op["b"..cond.."_1"] = "00000000"..tohex(0x47000000+shl(c, 20)).."B"
++ -- bcr
++ map_op["b"..cond.."r_1"] = "00000000"..tohex(0x0700+shl(c, 4)).."B"
+ end
+ ------------------------------------------------------------------------------
+ -- Handle opcodes defined with template strings.
+
+From 203006579f28d0aa0a0108845fd589f44a00e164 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Mon, 28 Nov 2016 13:32:30 +0530
+Subject: [PATCH 035/260] Removed the extra check in parse_reg
+
+The extra check for register is currently ignored, and trying to see what value does the encode function return. Its still to be worked out, how this value is used later, after decoding.
+---
+ dynasm/dasm_s390x.lua | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 76fe281e0..340ad24c9 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -251,15 +251,7 @@ local parse_reg_type
+
+
+ local function parse_gpr(expr)
+- local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
+- local tp = map_type[tname or expr]
+- if tp then
+- local reg = ovreg or tp.reg
+- if not reg then
+- werror("type `"..(tname or expr).."' needs a register override")
+- end
+- expr = reg
+- end
++ -- assuming we get r0-r31 for now
+ local r = match(expr, "^r([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+
+From 538a4afee2995fee75ec97faddbb5c7cb16b4432 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Mon, 28 Nov 2016 15:29:58 +0530
+Subject: [PATCH 036/260] Updated size of the instruction word
+
+We can discuss if we need to keep it 6 bytes or 8 bytes long, Not clear enough to me as well
+---
+ dynasm/dasm_s390x.lua | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 340ad24c9..2965034a4 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -88,7 +88,7 @@ end
+
+ -- Add word to action list.
+ local function wputxw(n)
+- assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range") -- s390x inst can be 6 bytes
+ actlist[#actlist+1] = n
+ end
+
+@@ -109,7 +109,7 @@ local function wflush(term)
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+ end
+
+--- Put escaped word.
++-- Put escaped word. --Need to check this as well, not sure how it will work on s390x
+ local function wputw(n)
+ if n <= 0x000fffff then waction("ESC") end
+ wputxw(n)
+@@ -122,9 +122,9 @@ local function wpos()
+ return pos
+ end
+
+--- Store word to reserved position.
++-- Store word to reserved position. -- added 2 bytes more since s390x has 6 bytes inst as well
+ local function wputpos(pos, n)
+- assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
++ assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range")
+ if n <= 0x000fffff then
+ insert(actlist, pos+1, n)
+ n = map_action.ESC * 0x10000
+@@ -278,7 +278,7 @@ local function parse_reg_base(expr)
+ local base, tp = parse_reg(expr)
+ if parse_reg_type ~= "x" then werror("bad register type") end
+ parse_reg_type = false
+- return shl(base, 5), tp
++ return shl(base, 5), tp -- why is it shifted not able to make out
+ end
+
+ local parse_ctx = {}
+
+From 7a49be07be90657e5fe2f3e960d85736d927d2f8 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 29 Nov 2016 19:00:28 +0530
+Subject: [PATCH 037/260] Added the required character for encoding
+
+I have added the number depending on the number of operands, pls check for the ones which access memory.
+Also For base register and displacement, should I assume that it will be passed in the same order as it is expected, since I dont have any means to see the output, I am confused a bit for those add modes.
+Since we decided to test RR first, thats in progress, but would like to add others as well.
+---
+ dynasm/dasm_s390x.lua | 1130 +++++++++++++++++++++--------------------
+ 1 file changed, 567 insertions(+), 563 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 2965034a4..f1d492c12 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -645,565 +645,565 @@ end)
+
+ -- Template strings for s390x instructions.
+ map_op = {
+- a = "000000005a000000j",
+-ar = "0000000000001a00g",
+-ay = "0000e3000000005ak",
+-ag = "0000e30000000008k",
+-agr = "00000000b9080000h",
+-agf = "0000e30000000018k",
+-agfr = "00000000b9180000h",
+-axbr = "00000000b34a0000h",
+-adbr = "00000000b31a0000h",
+-aebr = "00000000b30a0000h",
+-ah = "000000004a000000j",
+-ahy = "0000e3000000007ak",
+-afi = "0000c20900000000l",
+-agfi = "0000c20800000000l",
+-aih = "0000cc0800000000l",
+-al = "000000005e000000j",
+-alr = "0000000000001e00g",
+-aly = "0000e3000000005ek",
+-alg = "0000e3000000000ak",
+-algr = "00000000b90a0000h",
+-algf = "0000e3000000001ak",
+-algfr = "00000000b91a0000h",
+-alfi = "0000c20b00000000l",
+-algfi = "0000c20a00000000l",
+-alc = "0000e30000000098k",
+-alcr = "00000000b9980000h",
+-alcg = "0000e30000000088k",
+-alcgr = "00000000b9880000h",
+-alsih = "0000cc0a00000000l",
+-alsihn = "0000cc0b00000000l",
+-axr = "0000000000003600g",
+-ad = "000000006a000000j",
+-adr = "0000000000002a00g",
+-ae = "000000007a000000j",
+-aer = "0000000000003a00g",
+-aw = "000000006e000000j",
+-awr = "0000000000002e00g",
+-au = "000000007e000000j",
+-aur = "0000000000003e00g",
+-n = "0000000054000000j",
+-nr = "0000000000001400g",
+-ny = "0000e30000000054k",
+-ng = "0000e30000000080k",
+-ngr = "00000000b9800000h",
+-nihf = "0000c00a00000000l",
+-nilf = "0000c00b00000000l",
+-bal = "0000000045000000j",
+-balr = "000000000000500g",
+-bas = "000000004d000000j",
+-basr = "0000000000000d00g",
+-bassm = "0000000000000c00g",
+-bsa = "00000000b25a0000h",
+-bsm = "0000000000000b00g",
+-bakr = "00000000b2400000h",
+-bsg = "00000000b2580000h",
+-bc = "0000000047000000j",
+-bcr = "000000000000700g",
+-bct = "0000000046000000j",
+-bctr = "000000000000600g",
+-bctg = "0000e30000000046k",
+-bctgr = "00000000b9460000h",
+-bxh = "0000000086000000m",
+-bxhg = "0000eb0000000044n",
+-bxle = "0000000087000000m",
+-bxleg = "0000eb0000000045n",
+-brasl = "0000c00500000000l",
+-brcl = "0000c00400000000l",
+-brcth = "0000cc0600000000l",
+-cksm = "00000000b2410000h",
+-km = "00000000b92e0000h",
+-kmf = "00000000b92a0000h",
+-kmc = "00000000b92f0000h",
+-kmo = "00000000b92b0000h",
+-c = "0000000059000000j",
+-cr = "0000000000001900g",
+-cy = "0000e30000000059k",
+-cg = "0000e30000000020k",
+-cgr = "00000000b9200000h",
+-cgf = "0000e30000000030k",
+-cgfr = "00000000b9300000h",
+-cxbr = "00000000b3490000h",
+-cxtr = "00000000b3ec0000h",
+-cxr = "00000000b3690000h",
+-cdbr = "00000000b3190000h",
+-cdtr = "00000000b3e40000h",
+-cd = "0000000069000000j",
+-cdr = "0000000000002900g",
+-cebr = "00000000b3090000h",
+-ce = "0000000079000000j",
+-cer = "0000000000003900g",
+-kxbr = "00000000b3480000h",
+-kxtr = "00000000b3e80000h",
+-kdbr = "00000000b3180000h",
+-kdtr = "00000000b3e00000h",
+-kebr = "00000000b3080000h",
+-cs = "00000000ba000000m",
+-csy = "0000eb0000000014n",
+-csg = "0000eb0000000030n",
+-csp = "00000000b2500000h",
+-cspg = "00000000b98a0000h",
+-cextr = "00000000b3fc0000h",
+-cedtr = "00000000b3f40000h",
+-cds = "00000000bb000000m",
+-cdsy = "0000eb0000000031n",
+-cdsg = "0000eb000000003en",
+-ch = "0000000049000000j",
+-chy = "0000e30000000079k",
+-cgh = "0000e30000000034k",
+-chrl = "0000c60500000000l",
+-cghrl = "0000c60400000000l",
+-chf = "0000e300000000cdk",
+-chhr = "00000000b9cd0000h",
+-chlr = "00000000b9dd0000h",
+-cfi = "0000c20d00000000l",
+-cgfi = "0000c20c00000000l",
+-cih = "0000cc0d00000000l",
+-cl = "0000000055000000j",
+-clr = "0000000000001500g",
+-cly = "0000e30000000055k",
+-clg = "0000e30000000021k",
+-clgr = "00000000b9210000h",
+-clgf = "0000e30000000031k",
+-clgfr = "00000000b9310000h",
+-clmh = "0000eb0000000020n",
+-clm = "00000000bd000000m",
+-clmy = "0000eb0000000021n",
+-clhf = "0000e300000000cfk",
+-clhhr = "00000000b9cf0000h",
+-clhlr = "00000000b9df0000h",
+-clfi = "0000c20f00000000l",
+-clgfi = "0000c20e00000000l",
+-clih = "0000cc0f00000000l",
+-clcl = "0000000000000f00g",
+-clcle = "00000000a9000000m",
+-clclu = "0000eb000000008fn",
+-clrl = "0000c60f00000000l",
+-clhrl = "0000c60700000000l",
+-clgrl = "0000c60a00000000l",
+-clghrl = "0000c60600000000l",
+-clgfrl = "0000c60e00000000l",
+-clst = "00000000b25d0000h",
+-crl = "0000c60d00000000l",
+-cgrl = "0000c60800000000l",
+-cgfrl = "0000c60c00000000l",
+- cuse = "00000000b2570000h",
+-cmpsc = "00000000b2630000h",
+-kimd = "00000000b93e0000h",
+-klmd = "00000000b93f0000h",
+-kmac = "00000000b91e0000h",
+-thdr = "00000000b3590000h",
+-thder = "00000000b3580000h",
+-cxfbr = "00000000b3960000h",
+-cxftr = "00000000b9590000h",
+-cxfr = "00000000b3b60000h",
+-cdfbr = "00000000b3950000h",
+-cdftr = "00000000b9510000h",
+-cdfr = "00000000b3b50000h",
+-cefbr = "00000000b3940000h",
+-cefr = "00000000b3b40000h",
+-cxgbr = "00000000b3a60000h",
+-cxgtr = "00000000b3f90000h",
+-cxgr = "00000000b3c60000h",
+-cdgbr = "00000000b3a50000h",
+-cdgtr = "00000000b3f10000h",
+-cdgr = "00000000b3c50000h",
+-cegbr = "00000000b3a40000h",
+-cegr = "00000000b3c40000h",
+-cxstr = "00000000b3fb0000h",
+-cdstr = "00000000b3f30000h",
+-cxutr = "00000000b3fa0000h",
+-cdutr = "00000000b3f20000h",
+-cvb = "000000004f000000j",
+-cvby = "0000e30000000006k",
+-cvbg = "0000e3000000000ek",
+-cvd = "000000004e000000j",
+-cvdy = "0000e30000000026k",
+-cvdg = "0000e3000000002ek",
+-cuxtr = "00000000b3ea0000h",
+-cudtr = "00000000b3e20000h",
+-cu42 = "00000000b9b30000h",
+-cu41 = "00000000b9b20000h",
+-cpya = "00000000b24d0000h",
+-d = "000000005d000000j",
+-dr = "0000000000001d00g",
+-dxbr = "00000000b34d0000h",
+-dxr = "00000000b22d0000h",
+-ddbr = "00000000b31d0000h",
+-dd = "000000006d000000j",
+-ddr = "0000000000002d00g",
+-debr = "00000000b30d0000h",
+-de = "000000007d000000j",
+-der = "0000000000003d00g",
+-dl = "0000e30000000097k",
+-dlr = "00000000b9970000h",
+-dlg = "0000e30000000087k",
+-dlgr = "00000000b9870000h",
+-dsg = "0000e3000000000dk",
+-dsgr = "00000000b90d0000h",
+-dsgf = "0000e3000000001dk",
+-dsgfr = "00000000b91d0000h",
+-x = "0000000057000000j",
+-xr = "0000000000001700g",
+-xy = "0000e30000000057k",
+-xg = "0000e30000000082k",
+-xgr = "00000000b9820000h",
+-xihf = "0000c00600000000l",
+-xilf = "0000c00700000000l",
+-ex = "0000000044000000j",
+-exrl = "0000c60000000000l",
+-ear = "00000000b24f0000h",
+-esea = "00000000b99d0000h",
+-eextr = "00000000b3ed0000h",
+-eedtr = "00000000b3e50000h",
+-ecag = "0000eb000000004cn",
+-efpc = "00000000b38c0000h",
+-epar = "00000000b2260000h",
+-epair = "00000000b99a0000h",
+-epsw = "00000000b98d0000h",
+-esar = "00000000b2270000h",
+-esair = "00000000b99b0000h",
+-esxtr = "00000000b3ef0000h",
+-esdtr = "00000000b3e70000h",
+-ereg = "00000000b2490000h",
+-eregg = "00000000b90e0000h",
+-esta = "00000000b24a0000h",
+-flogr = "00000000b9830000h",
+-hdr = "0000000000002400g",
+-her = "0000000000003400g",
+-iac = "00000000b2240000h",
+-ic = "0000000043000000j",
+-icy = "0000e30000000073k",
+-icmh = "0000eb0000000080n",
+-icm = "00000000bf000000m",
+-icmy = "0000eb0000000081n",
+-iihf = "0000c00800000000l",
+-iilf = "0000c00900000000l",
+-ipm = "00000000b2220000h",
+-iske = "00000000b2290000h",
+-ivsk = "00000000b2230000h",
+-l = "0000000058000000j",
+-lr = "0000000000001800g",
+-ly = "0000e30000000058k",
+-lg = "0000e30000000004k",
+-lgr = "00000000b9040000h",
+-lgf = "0000e30000000014k",
+-lgfr = "00000000b9140000h",
+-lxr = "00000000b3650000h",
+-ld = "0000000068000000j",
+-ldr = "0000000000002800g",
+-ldy = "0000ed0000000065k",
+-le = "0000000078000000j",
+-ler = "0000000000003800g",
+- ley = "0000ed0000000064k",
+-lam = "000000009a000000m",
+-lamy = "0000eb000000009an",
+-la = "0000000041000000j",
+-lay = "0000e30000000071k",
+-lae = "0000000051000000j",
+-laey = "0000e30000000075k",
+-larl = "0000c00000000000l",
+-laa = "0000eb00000000f8n",
+-laag = "0000eb00000000e8n",
+-laal = "0000eb00000000fan",
+-laalg = "0000eb00000000ean",
+-lan = "0000eb00000000f4n",
+-lang = "0000eb00000000e4n",
+-lax = "0000eb00000000f7n",
+-laxg = "0000eb00000000e7n",
+-lao = "0000eb00000000f6n",
+-laog = "0000eb00000000e6n",
+-lt = "0000e30000000012k",
+-ltr = "0000000000001200g",
+-ltg = "0000e30000000002k",
+-ltgr = "00000000b9020000h",
+-ltgf = "0000e30000000032k",
+-ltgfr = "00000000b9120000h",
+-ltxbr = "00000000b3420000h",
+-ltxtr = "00000000b3de0000h",
+-ltxr = "00000000b3620000h",
+-ltdbr = "00000000b3120000h",
+-ltdtr = "00000000b3d60000h",
+-ltdr = "0000000000002200g",
+-ltebr = "00000000b3020000h",
+-lter = "0000000000003200g",
+-lb = "0000e30000000076k",
+-lbr = "00000000b9260000h",
+-lgb = "0000e30000000077k",
+-lgbr = "00000000b9060000h",
+- lbh = "0000e300000000c0k",
+-lcr = "0000000000001300g",
+-lcgr = "00000000b9030000h",
+-lcgfr = "00000000b9130000h",
+-lcxbr = "00000000b3430000h",
+-lcxr = "00000000b3630000h",
+-lcdbr = "00000000b3130000h",
+-lcdr = "0000000000002300g",
+-lcdfr = "00000000b3730000h",
+-lcebr = "00000000b3030000h",
+-lcer = "0000000000003300g",
+-lctl = "00000000b7000000m",
+-lctlg = "0000eb000000002fn",
+-fixr = "00000000b3670000h",
+-fidr = "00000000b37f0000h",
+-fier = "00000000b3770000h",
+-ldgr = "00000000b3c10000h",
+-lgdr = "00000000b3cd0000h",
+-lh = "0000000048000000j",
+-lhr = "00000000b9270000h",
+-lhy = "0000e30000000078k",
+-lgh = "0000e30000000015k",
+-lghr = "00000000b9070000h",
+-lhh = "0000e300000000c4k",
+-lhrl = "0000c40500000000l",
+-lghrl = "0000c40400000000l",
+-lfh = "0000e300000000cak",
+-lgfi = "0000c00100000000l",
+-lxdbr = "00000000b3050000h",
+-lxdr = "00000000b3250000h",
+-lxebr = "00000000b3060000h",
+-lxer = "00000000b3260000h",
+-ldebr = "00000000b3040000h",
+-lder = "00000000b3240000h",
+-llgf = "0000e30000000016k",
+-llgfr = "00000000b9160000h",
+-llc = "0000e30000000094k",
+-llcr = "00000000b9940000h",
+-llgc = "0000e30000000090k",
+-llgcr = "00000000b9840000h",
+-llch = "0000e300000000c2k",
+-llh = "0000e30000000095k",
+-llhr = "00000000b9950000h",
+-llgh = "0000e30000000091k",
+-llghr = "00000000b9850000h",
+-llhh = "0000e300000000c6k",
+-llhrl = "0000c40200000000l",
+-llghrl = "0000c40600000000l",
+-llihf = "0000c00e00000000l",
+-llilf = "0000c00f00000000l",
+-llgfrl = "0000c40e00000000l",
+-llgt = "0000e30000000017k",
+-llgtr = "00000000b9170000h",
+-lm = "0000000098000000m",
+-lmy = "0000eb0000000098n",
+-lmg = "0000eb0000000004n",
+-lmh = "0000eb0000000096n",
+-lnr = "0000000000001100g",
+-lngr = "00000000b9010000h",
+-lngfr = "00000000b9110000h",
+-lnxbr = "00000000b3410000h",
+-lnxr = "00000000b3610000h",
+-lndbr = "00000000b3110000h",
+-lndr = "0000000000002100g",
+-lndfr = "00000000b3710000h",
+-lnebr = "00000000b3010000h",
+-lner = "0000000000003100g",
+-loc = "0000eb00000000f2n",
+-locg = "0000eb00000000e2n",
+-lpq = "0000e3000000008fk",
+-lpr = "0000000000001000g",
+-lpgr = "00000000b9000000h",
+-lpgfr = "00000000b9100000h",
+-lpxbr = "00000000b3400000h",
+-lpxr = "00000000b3600000h",
+-lpdbr = "00000000b3100000h",
+-lpdr = "0000000000002000g",
+-lpdfr = "00000000b3700000h",
+-lpebr = "00000000b3000000h",
+-lper = "0000000000003000g",
+-lra = "00000000b1000000j",
+-lray = "0000e30000000013k",
+-lrag = "0000e30000000003k",
+-lrl = "0000c40d00000000l",
+-lgrl = "0000c40800000000l",
+-lgfrl = "0000c40c00000000l",
+-lrvh = "0000e3000000001fk",
+-lrv = "0000e3000000001ek",
+-lrvr = "00000000b91f0000h",
+-lrvg = "0000e3000000000fk",
+-lrvgr = "00000000b90f0000h",
+-ldxbr = "00000000b3450000h",
+-ldxr = "0000000000002500g",
+-lrdr = "0000000000002500g",
+-lexbr = "00000000b3460000h",
+-lexr = "00000000b3660000h",
+-ledbr = "00000000b3440000h",
+-ledr = "0000000000003500g",
+-lrer = "0000000000003500g",
+-lura = "00000000b24b0000h",
+-lurag = "00000000b9050000h",
+-lzxr = "00000000b3760000h",
+-lzdr = "00000000b3750000h",
+-lzer = "00000000b3740000h",
+-msta = "00000000b2470000h",
+-mvcl = "0000000000000e00g",
+-mvcle = "00000000a8000000m",
+-mvclu = "0000eb000000008en",
+-mvpg = "00000000b2540000h",
+-mvst = "00000000b2550000h",
+-m = "000000005c000000j",
+-mfy = "0000e3000000005ck",
+-mr = "0000000000001c00g",
+-mxbr = "00000000b34c0000h",
+-mxr = "0000000000002600g",
+-mdbr = "00000000b31c0000h",
+-md = "000000006c000000j",
+-mdr = "0000000000002c00g",
+-mxdbr = "00000000b3070000h",
+-mxd = "0000000067000000j",
+-mxdr = "0000000000002700g",
+-meebr = "00000000b3170000h",
+-meer = "00000000b3370000h",
+-mdebr = "00000000b30c0000h",
+-mde = "000000007c000000j",
+-mder = "0000000000003c00g",
+-me = "000000007c000000j",
+-mer = "0000000000003c00g",
+-mh = "000000004c000000j",
+-mhy = "0000e3000000007ck",
+-mlg = "0000e30000000086k",
+-mlgr = "00000000b9860000h",
+-ml = "0000e30000000096k",
+-mlr = "00000000b9960000h",
+-ms = "0000000071000000j",
+-msr = "00000000b2520000h",
+-msy = "0000e30000000051k",
+-msg = "0000e3000000000ck",
+-msgr = "00000000b90c0000h",
+-msgf = "0000e3000000001ck",
+-msgfr = "00000000b91c0000h",
+-msfi = "0000c20100000000l",
+-msgfi = "0000c20000000000l",
+-o = "0000000056000000j",
+-["or"] = "0000000000001600g",
+-oy = "0000e30000000056k",
+-og = "0000e30000000081k",
+-ogr = "00000000b9810000h",
+-oihf = "0000c00c00000000l",
+-oilf = "0000c00d00000000l",
+-pgin = "00000000b22e0000h",
+-pgout = "00000000b22f0000h",
+-pcc = "00000000b92c0000h",
+-pckmo = "00000000b9280000h",
+-pfmf = "00000000b9af0000h",
+-ptf = "00000000b9a20000h",
+-popcnt = "00000000b9e10000h",
+-pfd = "0000e30000000036k",
+-pfdrl = "0000c60200000000l",
+-pt = "00000000b2280000h",
+-pti = "00000000b99e0000h",
+-palb = "00000000b2480000h",
+-rrbe = "00000000b22a0000h",
+-rrbm = "00000000b9ae0000h",
+-rll = "0000eb000000001dn",
+-rllg = "0000eb000000001cn",
+-srst = "00000000b25e0000h",
+-srstu = "00000000b9be0000h",
+-sar = "00000000b24e0000h",
+-sfpc = "00000000b3840000h",
+-sfasr = "00000000b3850000h",
+-spm = "000000000000400g",
+-ssar = "00000000b2250000h",
+-ssair = "00000000b99f0000h",
+-slda = "000000008f000000m",
+-sldl = "000000008d000000m",
+-sla = "000000008b000000m",
+-slak = "0000eb00000000ddn",
+-slag = "0000eb000000000bn",
+-sll = "0000000089000000m",
+-sllk = "0000eb00000000dfn",
+-sllg = "0000eb000000000dn",
+-srda = "000000008e000000m",
+-srdl = "000000008c000000m",
+-sra = "000000008a000000m",
+-srak = "0000eb00000000dcn",
+-srag = "0000eb000000000an",
+-srl = "0000000088000000m",
+-srlk = "0000eb00000000den",
+-srlg = "0000eb000000000cn",
+-sqxbr = "00000000b3160000h",
+-sqxr = "00000000b3360000h",
+-sqdbr = "00000000b3150000h",
+-sqdr = "00000000b2440000h",
+-sqebr = "00000000b3140000h",
+-sqer = "00000000b2450000h",
+-st = "0000000050000000j",
+-sty = "0000e30000000050k",
+-stg = "0000e30000000024k",
+-std = "0000000060000000j",
+-stdy = "0000ed0000000067k",
+-ste = "0000000070000000j",
+-stey = "0000ed0000000066k",
+-stam = "000000009b000000m",
+-stamy = "0000eb000000009bn",
+-stc = "0000000042000000j",
+-stcy = "0000e30000000072k",
+-stch = "0000e300000000c3k",
+-stcmh = "0000eb000000002cn",
+-stcm = "00000000be000000m",
+-stcmy = "0000eb000000002dn",
+-stctl = "00000000b6000000m",
+-stctg = "0000eb0000000025n",
+-sth = "0000000040000000j",
+-sthy = "0000e30000000070k",
+-sthh = "0000e300000000c7k",
+-sthrl = "0000c40700000000l",
+-stfh = "0000e300000000cbk",
+-stm = "0000000090000000m",
+-stmy = "0000eb0000000090n",
+-stmg = "0000eb0000000024n",
+-stmh = "0000eb0000000026n",
+-stoc = "0000eb00000000f3n",
+-stocg = "0000eb00000000e3n",
+-stpq = "0000e3000000008ek",
+-strl = "0000c40f00000000l",
+-stgrl = "0000c40b00000000l",
+-strvh = "0000e3000000003fk",
+-strv = "0000e3000000003ek",
+-strvg = "0000e3000000002fk",
+-stura = "00000000b2460000h",
+-sturg = "00000000b9250000h",
+-s = "000000005b000000j",
+-sr = "0000000000001b00g",
+-sy = "0000e3000000005bk",
+-sg = "0000e30000000009k",
+-sgr = "00000000b9090000h",
+-sgf = "0000e30000000019k",
+-sgfr = "00000000b9190000h",
+-sxbr = "00000000b34b0000h",
+-sdbr = "00000000b31b0000h",
+-sebr = "00000000b30b0000h",
+-sh = "000000004b000000j",
+-shy = "0000e3000000007bk",
+-sl = "000000005f000000j",
+-slr = "0000000000001f00g",
+-sly = "0000e3000000005fk",
+-slg = "0000e3000000000bk",
+-slgr = "00000000b90b0000h",
+-slgf = "0000e3000000001bk",
+-slgfr = "00000000b91b0000h",
+-slfi = "0000c20500000000l",
+-slgfi = "0000c20400000000l",
+-slb = "0000e30000000099k",
+-slbr = "00000000b9990000h",
+-slbg = "0000e30000000089k",
+-slbgr = "00000000b9890000h",
+-sxr = "0000000000003700g",
+-sd = "000000006b000000j",
+-sdr = "0000000000002b00g",
+-se = "000000007b000000j",
+-ser = "0000000000003b00g",
+-su = "000000007f000000j",
+-sur = "0000000000003f00g",
+-sw = "000000006f000000j",
+-swr = "0000000000002f00g",
+-tar = "00000000b24c0000h",
+-tb = "00000000b22c0000h",
+-trace = "0000000099000000m",
+-tracg = "0000eb000000000fn",
+-tre = "00000000b2a50000h",
++a_4 = "000000005a000000j",
++ar_2 = "0000000000001a00g",
++ay_5 = "0000e3000000005al",
++ag_5 = "0000e30000000008l",
++agr_2 = "00000000b9080000h",
++agf_5 = "0000e30000000018l",
++agfr_2 = "00000000b9180000h",
++axbr_2 = "00000000b34a0000h",
++adbr_2 = "00000000b31a0000h",
++aebr_2 = "00000000b30a0000h",
++ah_4 = "000000004a000000j",
++ahy_5 = "0000e3000000007al",
++afi_3 = "0000c20900000000n",
++agfi_3 = "0000c20800000000n",
++aih_3 = "0000cc0800000000n",
++al_4 = "000000005e000000j",
++alr_2 = "0000000000001e00g",
++aly_5 = "0000e3000000005el",
++alg_5 = "0000e3000000000al",
++algr_2 = "00000000b90a0000h",
++algf_5 = "0000e3000000001al",
++algfr_2 = "00000000b91a0000h",
++alfi_3 = "0000c20b00000000n",
++algfi_3 = "0000c20a00000000n",
++alc_5 = "0000e30000000098l",
++alcr_2 = "00000000b9980000h",
++alcg_5 = "0000e30000000088l",
++alcgr_2 = "00000000b9880000h",
++alsih_3 = "0000cc0a00000000n",
++alsihn_3 = "0000cc0b00000000n",
++axr_2 = "0000000000003600g",
++ad_4 = "000000006a000000j",
++adr_2 = "0000000000002a00g",
++ae_4 = "000000007a000000j",
++aer_2 = "0000000000003a00g",
++aw_4 = "000000006e000000j",
++awr_2 = "0000000000002e00g",
++au_4 = "000000007e000000j",
++aur_2 = "0000000000003e00g",
++n_4 = "0000000054000000j",
++nr_2 = "0000000000001400g",
++ny_5 = "0000e30000000054l",
++ng_5 = "0000e30000000080l",
++ngr_2 = "00000000b9800000h",
++nihf_3 = "0000c00a00000000n",
++nilf_3 = "0000c00b00000000n",
++bal_4 = "0000000045000000j",
++balr_2 = "000000000000500g",
++bas_4 = "000000004d000000j",
++basr_2 = "0000000000000d00g",
++bassm_2 = "0000000000000c00g",
++bsa_2 = "00000000b25a0000h",
++bsm_2 = "0000000000000b00g",
++bakr_2 = "00000000b2400000h",
++bsg_2 = "00000000b2580000h",
++bc_4 = "0000000047000000k",
++bcr_2 = "000000000000700g",
++bct_4 = "0000000046000000j",
++bctr_2 = "000000000000600g",
++bctg_5 = "0000e30000000046l",
++bctgr_2 = "00000000b9460000h",
++bxh_4 = "0000000086000000q",
++bxhg_5 = "0000eb0000000044s",
++bxle_4 = "0000000087000000q",
++bxleg_5 = "0000eb0000000045s",
++brasl_3 = "0000c00500000000o",
++brcl_3 = "0000c00400000000p",
++brcth_3 = "0000cc0600000000o",
++cksm_2 = "00000000b2410000h",
++km_2 = "00000000b92e0000h",
++kmf_2 = "00000000b92a0000h",
++kmc_2 = "00000000b92f0000h",
++kmo_2 = "00000000b92b0000h",
++c_4 = "0000000059000000j",
++cr_2 = "0000000000001900g",
++cy_5 = "0000e30000000059l",
++cg_5 = "0000e30000000020l",
++cgr_2 = "00000000b9200000h",
++cgf_5 = "0000e30000000030l",
++cgfr_2 = "00000000b9300000h",
++cxbr_2 = "00000000b3490000h",
++cxtr_2 = "00000000b3ec0000h",
++cxr_2 = "00000000b3690000h",
++cdbr_2 = "00000000b3190000h",
++cdtr_2 = "00000000b3e40000h",
++cd_4 = "0000000069000000j",
++cdr_2 = "0000000000002900g",
++cebr_2 = "00000000b3090000h",
++ce_4 = "0000000079000000j",
++cer_2 = "0000000000003900g",
++kxbr_2 = "00000000b3480000h",
++kxtr_2 = "00000000b3e80000h",
++kdbr_2 = "00000000b3180000h",
++kdtr_2 = "00000000b3e00000h",
++kebr_2 = "00000000b3080000h",
++cs_4 = "00000000ba000000q",
++csy_5 = "0000eb0000000014s",
++csg_5 = "0000eb0000000030s",
++csp_2 = "00000000b2500000h",
++cspg_2 = "00000000b98a0000h",
++cextr_2 = "00000000b3fc0000h",
++cedtr_2 = "00000000b3f40000h",
++cds_4 = "00000000bb000000q",
++cdsy_5 = "0000eb0000000031s",
++cdsg_5 = "0000eb000000003es",
++ch_4 = "0000000049000000j",
++chy_5 = "0000e30000000079l",
++cgh_5 = "0000e30000000034l",
++chrl_3 = "0000c60500000000o",
++cghrl_3 = "0000c60400000000o",
++chf_5 = "0000e300000000cdl",
++chhr_2 = "00000000b9cd0000h",
++chlr_2 = "00000000b9dd0000h",
++cfi_3 = "0000c20d00000000n",
++cgfi_3 = "0000c20c00000000n",
++cih_3 = "0000cc0d00000000n",
++cl_4 = "0000000055000000j",
++clr_2 = "0000000000001500g",
++cly_5 = "0000e30000000055l",
++clg_5 = "0000e30000000021l",
++clgr_2 = "00000000b9210000h",
++clgf_5 = "0000e30000000031l",
++clgfr_2 = "00000000b9310000h",
++clmh_5 = "0000eb0000000020t",
++clm_4 = "00000000bd000000r",
++clmy_5 = "0000eb0000000021t",
++clhf_5 = "0000e300000000cfl",
++clhhr_2 = "00000000b9cf0000h",
++clhlr_2 = "00000000b9df0000h",
++clfi_3 = "0000c20f00000000n",
++clgfi_3 = "0000c20e00000000n",
++clih_3 = "0000cc0f00000000n",
++clcl_2 = "0000000000000f00g",
++clcle_4 = "00000000a9000000q",
++clclu_5 = "0000eb000000008fs",
++clrl_3 = "0000c60f00000000o",
++clhrl_3 = "0000c60700000000o",
++clgrl_3 = "0000c60a00000000o",
++clghrl_3 = "0000c60600000000o",
++clgfrl_3 = "0000c60e00000000o",
++clst_2 = "00000000b25d0000h",
++crl_3 = "0000c60d00000000o",
++cgrl_3 = "0000c60800000000o",
++cgfrl_3 = "0000c60c00000000o",
++cuse_2 = "00000000b2570000h",
++cmpsc_2 = "00000000b2630000h",
++kimd_2 = "00000000b93e0000h",
++klmd_2 = "00000000b93f0000h",
++kmac_2 = "00000000b91e0000h",
++thdr_2 = "00000000b3590000h",
++thder_2 = "00000000b3580000h",
++cxfbr_2 = "00000000b3960000h",
++cxftr_2 = "00000000b9590000h",
++cxfr_2 = "00000000b3b60000h",
++cdfbr_2 = "00000000b3950000h",
++cdftr_2 = "00000000b9510000h",
++cdfr_2 = "00000000b3b50000h",
++cefbr_2 = "00000000b3940000h",
++cefr_2 = "00000000b3b40000h",
++cxgbr_2 = "00000000b3a60000h",
++cxgtr_2 = "00000000b3f90000h",
++cxgr_2 = "00000000b3c60000h",
++cdgbr_2 = "00000000b3a50000h",
++cdgtr_2 = "00000000b3f10000h",
++cdgr_2 = "00000000b3c50000h",
++cegbr_2 = "00000000b3a40000h",
++cegr_2 = "00000000b3c40000h",
++cxstr_2 = "00000000b3fb0000h",
++cdstr_2 = "00000000b3f30000h",
++cxutr_2 = "00000000b3fa0000h",
++cdutr_2 = "00000000b3f20000h",
++cvb_4 = "000000004f000000j",
++cvby_5 = "0000e30000000006l",
++cvbg_5 = "0000e3000000000el",
++cvd_4 = "000000004e000000j",
++cvdy_5 = "0000e30000000026l",
++cvdg_5 = "0000e3000000002el",
++cuxtr_2 = "00000000b3ea0000h",
++cudtr_2 = "00000000b3e20000h",
++cu42_2 = "00000000b9b30000h",
++cu41_2 = "00000000b9b20000h",
++cpya_2 = "00000000b24d0000h",
++d_4 = "000000005d000000j",
++dr_2 = "0000000000001d00g",
++dxbr_2 = "00000000b34d0000h",
++dxr_2 = "00000000b22d0000h",
++ddbr_2 = "00000000b31d0000h",
++dd_4 = "000000006d000000j",
++ddr_2 = "0000000000002d00g",
++debr_2 = "00000000b30d0000h",
++de_4 = "000000007d000000j",
++der_2 = "0000000000003d00g",
++dl_5 = "0000e30000000097l",
++dlr_2 = "00000000b9970000h",
++dlg_5 = "0000e30000000087l",
++dlgr_2 = "00000000b9870000h",
++dsg_5 = "0000e3000000000dl",
++dsgr_2 = "00000000b90d0000h",
++dsgf_5 = "0000e3000000001dl",
++dsgfr_2 = "00000000b91d0000h",
++x_4 = "0000000057000000j",
++xr_2 = "0000000000001700g",
++xy_5 = "0000e30000000057l",
++xg_5 = "0000e30000000082l",
++xgr_2 = "00000000b9820000h",
++xihf_3 = "0000c00600000000n",
++xilf_3 = "0000c00700000000n",
++ex_4 = "0000000044000000j",
++exrl_3 = "0000c60000000000o",
++ear_2 = "00000000b24f0000h",
++esea_2 = "00000000b99d0000h",
++eextr_2 = "00000000b3ed0000h",
++eedtr_2 = "00000000b3e50000h",
++ecag_5 = "0000eb000000004cs",
++efpc_2 = "00000000b38c0000h",
++epar_2 = "00000000b2260000h",
++epair_2 = "00000000b99a0000h",
++epsw_2 = "00000000b98d0000h",
++esar_2 = "00000000b2270000h",
++esair_2 = "00000000b99b0000h",
++esxtr_2 = "00000000b3ef0000h",
++esdtr_2 = "00000000b3e70000h",
++ereg_2 = "00000000b2490000h",
++eregg_2 = "00000000b90e0000h",
++esta_2 = "00000000b24a0000h",
++flogr_2 = "00000000b9830000h",
++hdr_2 = "0000000000002400g",
++her_2 = "0000000000003400g",
++iac_2 = "00000000b2240000h",
++ic_4 = "0000000043000000j",
++icy_5 = "0000e30000000073l",
++icmh_5 = "0000eb0000000080t",
++icm_4 = "00000000bf000000r",
++icmy_5 = "0000eb0000000081t",
++iihf_3 = "0000c00800000000n",
++iilf_3 = "0000c00900000000n",
++ipm_2 = "00000000b2220000h",
++iske_2 = "00000000b2290000h",
++ivsk_2 = "00000000b2230000h",
++l_4 = "0000000058000000j",
++lr_2 = "0000000000001800g",
++ly_5 = "0000e30000000058l",
++lg_5 = "0000e30000000004l",
++lgr_2 = "00000000b9040000h",
++lgf_5 = "0000e30000000014l",
++lgfr_2 = "00000000b9140000h",
++lxr_2 = "00000000b3650000h",
++ld_4 = "0000000068000000j",
++ldr_2 = "0000000000002800g",
++ldy_5 = "0000ed0000000065l",
++le_4 = "0000000078000000j",
++ler_2 = "0000000000003800g",
++ley_5 = "0000ed0000000064l",
++lam_4 = "000000009a000000q",
++lamy_5 = "0000eb000000009as",
++la_4 = "0000000041000000j",
++lay_5 = "0000e30000000071l",
++lae_4 = "0000000051000000j",
++laey_5 = "0000e30000000075l",
++larl_3 = "0000c00000000000o",
++laa_5 = "0000eb00000000f8s",
++laag_5 = "0000eb00000000e8s",
++laal_5 = "0000eb00000000fas",
++laalg_5 = "0000eb00000000eas",
++lan_5 = "0000eb00000000f4s",
++lang_5 = "0000eb00000000e4s",
++lax_5 = "0000eb00000000f7s",
++laxg_5 = "0000eb00000000e7s",
++lao_5 = "0000eb00000000f6s",
++laog_5 = "0000eb00000000e6s",
++lt_5 = "0000e30000000012l",
++ltr_2 = "0000000000001200g",
++ltg_5 = "0000e30000000002l",
++ltgr_2 = "00000000b9020000h",
++ltgf_5 = "0000e30000000032l",
++ltgfr_2 = "00000000b9120000h",
++ltxbr_2 = "00000000b3420000h",
++ltxtr_2 = "00000000b3de0000h",
++ltxr_2 = "00000000b3620000h",
++ltdbr_2 = "00000000b3120000h",
++ltdtr_2 = "00000000b3d60000h",
++ltdr_2 = "0000000000002200g",
++ltebr_2 = "00000000b3020000h",
++lter_2 = "0000000000003200g",
++lb_5 = "0000e30000000076l",
++lbr_2 = "00000000b9260000h",
++lgb_5 = "0000e30000000077l",
++lgbr_2 = "00000000b9060000h",
++lbh_5 = "0000e300000000c0l",
++lcr_2 = "0000000000001300g",
++lcgr_2 = "00000000b9030000h",
++lcgfr_2 = "00000000b9130000h",
++lcxbr_2 = "00000000b3430000h",
++lcxr_2 = "00000000b3630000h",
++lcdbr_2 = "00000000b3130000h",
++lcdr_2 = "0000000000002300g",
++lcdfr_2 = "00000000b3730000h",
++lcebr_2 = "00000000b3030000h",
++lcer_2 = "0000000000003300g",
++lctl_4 = "00000000b7000000q",
++lctlg_5 = "0000eb000000002fs",
++fixr_2 = "00000000b3670000h",
++fidr_2 = "00000000b37f0000h",
++fier_2 = "00000000b3770000h",
++ldgr_2 = "00000000b3c10000h",
++lgdr_2 = "00000000b3cd0000h",
++lh_4 = "0000000048000000j",
++lhr_2 = "00000000b9270000h",
++lhy_5 = "0000e30000000078l",
++lgh_5 = "0000e30000000015l",
++lghr_2 = "00000000b9070000h",
++lhh_5 = "0000e300000000c4l",
++lhrl_3 = "0000c40500000000o",
++lghrl_3 = "0000c40400000000o",
++lfh_5 = "0000e300000000cal",
++lgfi_3 = "0000c00100000000n",
++lxdbr_2 = "00000000b3050000h",
++lxdr_2 = "00000000b3250000h",
++lxebr_2 = "00000000b3060000h",
++lxer_2 = "00000000b3260000h",
++ldebr_2 = "00000000b3040000h",
++lder_2 = "00000000b3240000h",
++llgf_5 = "0000e30000000016l",
++llgfr_2 = "00000000b9160000h",
++llc_5 = "0000e30000000094l",
++llcr_2 = "00000000b9940000h",
++llgc_5 = "0000e30000000090l",
++llgcr_2 = "00000000b9840000h",
++llch_5 = "0000e300000000c2l",
++llh_5 = "0000e30000000095l",
++llhr_2 = "00000000b9950000h",
++llgh_5 = "0000e30000000091l",
++llghr_2 = "00000000b9850000h",
++llhh_5 = "0000e300000000c6l",
++llhrl_3 = "0000c40200000000o",
++llghrl_3 = "0000c40600000000o",
++llihf_3 = "0000c00e00000000n",
++llilf_3 = "0000c00f00000000n",
++llgfrl_3 = "0000c40e00000000o",
++llgt_5 = "0000e30000000017l",
++llgtr_2 = "00000000b9170000h",
++lm_4 = "0000000098000000q",
++lmy_5 = "0000eb0000000098s",
++lmg_5 = "0000eb0000000004s",
++lmh_5 = "0000eb0000000096s",
++lnr_2 = "0000000000001100g",
++lngr_2 = "00000000b9010000h",
++lngfr_2 = "00000000b9110000h",
++lnxbr_2 = "00000000b3410000h",
++lnxr_2 = "00000000b3610000h",
++lndbr_2 = "00000000b3110000h",
++lndr_2 = "0000000000002100g",
++lndfr_2 = "00000000b3710000h",
++lnebr_2 = "00000000b3010000h",
++lner_2 = "0000000000003100g",
++loc_5 = "0000eb00000000f2t",
++locg_5 = "0000eb00000000e2t",
++lpq_5 = "0000e3000000008fl",
++lpr_2 = "0000000000001000g",
++lpgr_2 = "00000000b9000000h",
++lpgfr_2 = "00000000b9100000h",
++lpxbr_2 = "00000000b3400000h",
++lpxr_2 = "00000000b3600000h",
++lpdbr_2 = "00000000b3100000h",
++lpdr_2 = "0000000000002000g",
++lpdfr_2 = "00000000b3700000h",
++lpebr_2 = "00000000b3000000h",
++lper_2 = "0000000000003000g",
++lra_4 = "00000000b1000000j",
++lray_5 = "0000e30000000013l",
++lrag_5 = "0000e30000000003l",
++lrl_3 = "0000c40d00000000o",
++lgrl_3 = "0000c40800000000o",
++lgfrl_3 = "0000c40c00000000o",
++lrvh_5 = "0000e3000000001fl",
++lrv_5 = "0000e3000000001el",
++lrvr_2 = "00000000b91f0000h",
++lrvg_5 = "0000e3000000000fl",
++lrvgr_2 = "00000000b90f0000h",
++ldxbr_2 = "00000000b3450000h",
++ldxr_2 = "0000000000002500g",
++lrdr_2 = "0000000000002500g",
++lexbr_2 = "00000000b3460000h",
++lexr_2 = "00000000b3660000h",
++ledbr_2 = "00000000b3440000h",
++ledr_2 = "0000000000003500g",
++lrer_2 = "0000000000003500g",
++lura_2 = "00000000b24b0000h",
++lurag_2 = "00000000b9050000h",
++lzxr_2 = "00000000b3760000h",
++lzdr_2 = "00000000b3750000h",
++lzer_2 = "00000000b3740000h",
++msta_2 = "00000000b2470000h",
++mvcl_2 = "0000000000000e00g",
++mvcle_4 = "00000000a8000000q",
++mvclu_5 = "0000eb000000008es",
++mvpg_2 = "00000000b2540000h",
++mvst_2 = "00000000b2550000h",
++m_4 = "000000005c000000j",
++mfy_5 = "0000e3000000005cl",
++mr_2 = "0000000000001c00g",
++mxbr_2 = "00000000b34c0000h",
++mxr_2 = "0000000000002600g",
++mdbr_2 = "00000000b31c0000h",
++md_4 = "000000006c000000j",
++mdr_2 = "0000000000002c00g",
++mxdbr_2 = "00000000b3070000h",
++mxd_4 = "0000000067000000j",
++mxdr_2 = "0000000000002700g",
++meebr_2 = "00000000b3170000h",
++meer_2 = "00000000b3370000h",
++mdebr_2 = "00000000b30c0000h",
++mde_4 = "000000007c000000j",
++mder_2 = "0000000000003c00g",
++me_4 = "000000007c000000j",
++mer_2 = "0000000000003c00g",
++mh_4 = "000000004c000000j",
++mhy_5 = "0000e3000000007cl",
++mlg_5 = "0000e30000000086l",
++mlgr_2 = "00000000b9860000h",
++ml_5 = "0000e30000000096l",
++mlr_2 = "00000000b9960000h",
++ms_4 = "0000000071000000j",
++msr_2 = "00000000b2520000h",
++msy_5 = "0000e30000000051l",
++msg_5 = "0000e3000000000cl",
++msgr_2 = "00000000b90c0000h",
++msgf_5 = "0000e3000000001cl",
++msgfr_2 = "00000000b91c0000h",
++msfi_3 = "0000c20100000000n",
++msgfi_3 = "0000c20000000000n",
++o_4 = "0000000056000000j",
++["or_2"] = "0000000000001600g",
++oy_5 = "0000e30000000056l",
++og_5 = "0000e30000000081l",
++ogr_2 = "00000000b9810000h",
++oihf_3 = "0000c00c00000000n",
++oilf_3 = "0000c00d00000000n",
++pgin_2 = "00000000b22e0000h",
++pgout_2 = "00000000b22f0000h",
++pcc_2 = "00000000b92c0000h",
++pckmo_2 = "00000000b9280000h",
++pfmf_2 = "00000000b9af0000h",
++ptf_2 = "00000000b9a20000h",
++popcnt_2 = "00000000b9e10000h",
++pfd_5 = "0000e30000000036m",
++pfdrl_3 = "0000c60200000000p",
++pt_2 = "00000000b2280000h",
++pti_2 = "00000000b99e0000h",
++palb_2 = "00000000b2480000h",
++rrbe_2 = "00000000b22a0000h",
++rrbm_2 = "00000000b9ae0000h",
++rll_5 = "0000eb000000001ds",
++rllg_5 = "0000eb000000001cs",
++srst_2 = "00000000b25e0000h",
++srstu_2 = "00000000b9be0000h",
++sar_2 = "00000000b24e0000h",
++sfpc_2 = "00000000b3840000h",
++sfasr_2 = "00000000b3850000h",
++spm_2 = "000000000000400g",
++ssar_2 = "00000000b2250000h",
++ssair_2 = "00000000b99f0000h",
++slda_4 = "000000008f000000q",
++sldl_4 = "000000008d000000q",
++sla_4 = "000000008b000000q",
++slak_5 = "0000eb00000000dds",
++slag_5 = "0000eb000000000bs",
++sll_4 = "0000000089000000q",
++sllk_5 = "0000eb00000000dfs",
++sllg_5 = "0000eb000000000ds",
++srda_4 = "000000008e000000q",
++srdl_4 = "000000008c000000q",
++sra_4 = "000000008a000000q",
++srak_5 = "0000eb00000000dcs",
++srag_5 = "0000eb000000000as",
++srl_4 = "0000000088000000q",
++srlk_5 = "0000eb00000000des",
++srlg_5 = "0000eb000000000cs",
++sqxbr_2 = "00000000b3160000h",
++sqxr_2 = "00000000b3360000h",
++sqdbr_2 = "00000000b3150000h",
++sqdr_2 = "00000000b2440000h",
++sqebr_2 = "00000000b3140000h",
++sqer_2 = "00000000b2450000h",
++st_4 = "0000000050000000j",
++sty_5 = "0000e30000000050l",
++stg_5 = "0000e30000000024l",
++std_4 = "0000000060000000j",
++stdy_5 = "0000ed0000000067l",
++ste_4 = "0000000070000000j",
++stey_5 = "0000ed0000000066l",
++stam_4 = "000000009b000000q",
++stamy_5 = "0000eb000000009bs",
++stc_4 = "0000000042000000j",
++stcy_5 = "0000e30000000072l",
++stch_5 = "0000e300000000c3l",
++stcmh_5 = "0000eb000000002ct",
++stcm_4 = "00000000be000000r",
++stcmy_5 = "0000eb000000002dt",
++stctl_4 = "00000000b6000000q",
++stctg_5 = "0000eb0000000025s",
++sth_4 = "0000000040000000j",
++sthy_5 = "0000e30000000070l",
++sthh_5 = "0000e300000000c7l",
++sthrl_3 = "0000c40700000000o",
++stfh_5 = "0000e300000000cbl",
++stm_4 = "0000000090000000q",
++stmy_5 = "0000eb0000000090s",
++stmg_5 = "0000eb0000000024s",
++stmh_5 = "0000eb0000000026s",
++stoc_5 = "0000eb00000000f3t",
++stocg_5 = "0000eb00000000e3t",
++stpq_5 = "0000e3000000008el",
++strl_3 = "0000c40f00000000o",
++stgrl_3 = "0000c40b00000000o",
++strvh_5 = "0000e3000000003fl",
++strv_5 = "0000e3000000003el",
++strvg_5 = "0000e3000000002fl",
++stura_2 = "00000000b2460000h",
++sturg_2 = "00000000b9250000h",
++s_4 = "000000005b000000j",
++sr_2 = "0000000000001b00g",
++sy_5 = "0000e3000000005bl",
++sg_5 = "0000e30000000009l",
++sgr_2 = "00000000b9090000h",
++sgf_5 = "0000e30000000019l",
++sgfr_2 = "00000000b9190000h",
++sxbr_2 = "00000000b34b0000h",
++sdbr_2 = "00000000b31b0000h",
++sebr_2 = "00000000b30b0000h",
++sh_4 = "000000004b000000j",
++shy_5 = "0000e3000000007bl",
++sl_4 = "000000005f000000j",
++slr_2 = "0000000000001f00g",
++sly_5 = "0000e3000000005fl",
++slg_5 = "0000e3000000000bl",
++slgr_2 = "00000000b90b0000h",
++slgf_5 = "0000e3000000001bl",
++slgfr_2 = "00000000b91b0000h",
++slfi_3 = "0000c20500000000n",
++slgfi_3 = "0000c20400000000n",
++slb_5 = "0000e30000000099l",
++slbr_2 = "00000000b9990000h",
++slbg_5 = "0000e30000000089l",
++slbgr_2 = "00000000b9890000h",
++sxr_2 = "0000000000003700g",
++sd_4 = "000000006b000000j",
++sdr_2 = "0000000000002b00g",
++se_4 = "000000007b000000j",
++ser_2 = "0000000000003b00g",
++su_4 = "000000007f000000j",
++sur_2 = "0000000000003f00g",
++sw_4 = "000000006f000000j",
++swr_2 = "0000000000002f00g",
++tar_2 = "00000000b24c0000h",
++tb_2 = "00000000b22c0000h",
++trace_4 = "0000000099000000q",
++tracg_5 = "0000eb000000000fs",
++tre_2 = "00000000b2a50000h",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1230,13 +1230,17 @@ local function parse_template(params, template, nparams, pos)
+ local pr1,pr2,pr3
+ if p == "g" then
+ pr1,pr2=param[n],param[n+1]
+- op = op + parse_reg(pr1)+parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
++ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
+ elseif p == "h" then
+-
++ pr1,pr2=param[n],param[n+1]
++ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
+ elseif p == "j" then
+-
++ op = op + shl(parse_reg(param[1],24) + shl(parse_reg(param[2],20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
++ -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
++
+ elseif p == "k" then
+-
++ op = op + shl(parse_reg(param[1],40) + shl(parse_reg(param[2],36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) parse_number(param[5])
++ -- assuming params are passed as (R1,X2,B2,DL2,DH2)
+ elseif p == "l" then
+
+ elseif p == "m" then
+
+From 0e3241180f4e5e54a45e147bbedfa022d4bccb58 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 29 Nov 2016 13:45:59 -0500
+Subject: [PATCH 038/260] Various cleanup of dasm_s390x.lua
+
+ - Fix syntax errors
+ - Fix whitespace (use two-space indentation to match surrounding code)
+---
+ dynasm/dasm_s390x.lua | 23 +++++++++++------------
+ 1 file changed, 11 insertions(+), 12 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index f1d492c12..2ae9e5944 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1225,29 +1225,28 @@ local function parse_template(params, template, nparams, pos)
+ local n,rs = 1,26
+
+ parse_reg_type = false
+- -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
++ -- Process each character.
+ for p in gmatch(sub(template, 17), ".") do
+ local pr1,pr2,pr3
+ if p == "g" then
+- pr1,pr2=param[n],param[n+1]
+- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
++ pr1,pr2=param[n],param[n+1]
++ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
+ elseif p == "h" then
+- pr1,pr2=param[n],param[n+1]
+- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
++ pr1,pr2=param[n],param[n+1]
++ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
+ elseif p == "j" then
+- op = op + shl(parse_reg(param[1],24) + shl(parse_reg(param[2],20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
+- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
+-
++ op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
++ -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
+ elseif p == "k" then
+- op = op + shl(parse_reg(param[1],40) + shl(parse_reg(param[2],36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) parse_number(param[5])
+- -- assuming params are passed as (R1,X2,B2,DL2,DH2)
++ op = op + shl(parse_reg(param[1]),40) + shl(parse_reg(param[2]),36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) + parse_number(param[5])
++ -- assuming params are passed as (R1,X2,B2,DL2,DH2)
+ elseif p == "l" then
+
+ elseif p == "m" then
+
+ elseif p == "n" then
+-
+- end
++
++ end
+ end
+ wputpos(pos, op)
+ end
+
+From 89ca41cca5537f4b4fe7d2802997f1a8626ddc22 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 29 Nov 2016 13:59:37 -0500
+Subject: [PATCH 039/260] Add sp -> r15 mapping and don't special case or_2
+
+It's convenient for sp to be a pseudonym for r15 (the stack pointer).
+'or_2' doesn't need to be special cased ('or' did because it is a
+keyword).
+---
+ dynasm/dasm_s390x.lua | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 2ae9e5944..0ec789334 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -214,15 +214,12 @@ end
+ ------------------------------------------------------------------------------
+
+ -- Arch-specific maps.
+--- TODO: add s390x related register names
+ -- Ext. register name -> int. name.
+---local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+-local map_archdef = {}
++local map_archdef = { sp = "r15" }
+
+ -- Int. register name -> ext. name.
+--- local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+-local map_reg_rev = {}
+-
++local map_reg_rev = { r15 = "sp" }
++
+ local map_type = {} -- Type name -> { ctype, reg }
+ local ctypenum = 0 -- Type number (for Dt... macros).
+
+@@ -1077,7 +1074,7 @@ msgfr_2 = "00000000b91c0000h",
+ msfi_3 = "0000c20100000000n",
+ msgfi_3 = "0000c20000000000n",
+ o_4 = "0000000056000000j",
+-["or_2"] = "0000000000001600g",
++or_2 = "0000000000001600g",
+ oy_5 = "0000e30000000056l",
+ og_5 = "0000e30000000081l",
+ ogr_2 = "00000000b9810000h",
+
+From 36479af87a0cd75781b5626152da70ab9f7b2f0a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 29 Nov 2016 15:24:11 -0500
+Subject: [PATCH 040/260] Add stubs for parsing memory operands and delete
+ unwanted code.
+
+Each memory operand will be a single parameter so we also need
+to update the instruction encoding nargs field.
+---
+ dynasm/dasm_s390x.h | 2 +-
+ dynasm/dasm_s390x.lua | 333 ++++--------------------------------------
+ 2 files changed, 30 insertions(+), 305 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 577920ac9..b327e7a60 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -21,7 +21,7 @@ enum {
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+- DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
++ DASM_REL_PC, DASM_LABEL_PC, DASM_DISP12, DASM_DISP20, DASM_IMM16, DASM_IMM32,
+ DASM__MAX
+ };
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 0ec789334..556f7fe4d 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
+ local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+- "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
++ "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM16", "IMM32",
+ }
+
+ -- Maximum number of section buffer positions for dasm_put().
+@@ -227,13 +227,6 @@ local ctypenum = 0 -- Type number (for Dt... macros).
+ function _M.revdef(s)
+ return map_reg_rev[s] or s
+ end
+--- not sure of these
+-local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+-
+-local map_extend = {
+- uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+- sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+-}
+
+ local map_cond = {
+ o = 1, h = 2, hle = 3, l = 4,
+@@ -246,13 +239,11 @@ local map_cond = {
+
+ local parse_reg_type
+
+-
+ local function parse_gpr(expr)
+- -- assuming we get r0-r31 for now
+ local r = match(expr, "^r([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+- if r <= 31 then return r, tp end
++ if r <= 15 then return r, tp end
+ end
+ werror("bad register name `"..expr.."'")
+ end
+@@ -261,23 +252,11 @@ local function parse_fpr(expr)
+ local r = match(expr, "^f([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+- if r <= 31 then return r end
++ if r <= 15 then return r end
+ end
+ werror("bad register name `"..expr.."'")
+ end
+
+-
+-
+-
+-
+-local function parse_reg_base(expr)
+- if expr == "sp" then return 0x3e0 end
+- local base, tp = parse_reg(expr)
+- if parse_reg_type ~= "x" then werror("bad register type") end
+- parse_reg_type = false
+- return shl(base, 5), tp -- why is it shifted not able to make out
+-end
+-
+ local parse_ctx = {}
+
+ local loadenv = setfenv and function(s)
+@@ -300,262 +279,35 @@ local function parse_number(n)
+ return nil
+ end
+
+-local function parse_imm(imm, bits, shift, scale, signed)
+- imm = match(imm, "^#(.*)$")
+- if not imm then werror("expected immediate operand") end
+- local n = parse_number(imm)
+- if n then
+- local m = sar(n, scale)
+- if shl(m, scale) == n then
+- if signed then
+- local s = sar(m, bits-1)
+- if s == 0 then return shl(m, shift)
+- elseif s == -1 then return shl(m + shl(1, bits), shift) end
+- else
+- if sar(m, bits) == 0 then return shl(m, shift) end
+- end
+- end
+- werror("out of range immediate `"..imm.."'")
+- else
+- waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+- return 0
+- end
+-end
+-
+-local function parse_imm12(imm)
+- imm = match(imm, "^#(.*)$")
+- if not imm then werror("expected immediate operand") end
+- local n = parse_number(imm)
+- if n then
+- if shr(n, 12) == 0 then
+- return shl(n, 10)
+- elseif band(n, 0xff000fff) == 0 then
+- return shr(n, 2) + 0x00400000
+- end
+- werror("out of range immediate `"..imm.."'")
+- else
+- waction("IMM12", 0, imm)
+- return 0
+- end
+-end
+-
+-local function parse_imm13(imm)
+- imm = match(imm, "^#(.*)$")
+- if not imm then werror("expected immediate operand") end
+- local n = parse_number(imm)
+- local r64 = parse_reg_type == "x"
+- if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+- local inv = false
+- if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+- local t = {}
+- for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+- local b = table.concat(t)
+- b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+- local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+- if p0 then
+- local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+- if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+- local s = band(-2*w, 0x3f) - 1
+- if w == 64 then s = s + 0x1000 end
+- if inv then
+- return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+- else
+- return shl(w-#p0, 16) + shl(s+#p1, 10)
+- end
+- end
+- end
+- werror("out of range immediate `"..imm.."'")
+- elseif r64 then
+- waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+- actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+- return 0
+- else
+- waction("IMM13W", 0, imm)
+- return 0
+- end
+-end
+-
+-local function parse_imm6(imm)
+- imm = match(imm, "^#(.*)$")
+- if not imm then werror("expected immediate operand") end
+- local n = parse_number(imm)
+- if n then
+- if n >= 0 and n <= 63 then
+- return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+- end
+- werror("out of range immediate `"..imm.."'")
+- else
+- waction("IMM6", 0, imm)
+- return 0
+- end
+-end
+-
+-local function parse_imm_load(imm, scale)
+- local n = parse_number(imm)
+- if n then
+- local m = sar(n, scale)
+- if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+- return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+- elseif n >= -256 and n < 256 then
+- return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+- end
+- werror("out of range immediate `"..imm.."'")
+- else
+- waction("IMML", 0, imm)
+- return 0
+- end
+-end
+-
+-local function parse_fpimm(imm)
+- imm = match(imm, "^#(.*)$")
+- if not imm then werror("expected immediate operand") end
+- local n = parse_number(imm)
+- if n then
+- local m, e = math.frexp(n)
+- local s, e2 = 0, band(e-2, 7)
+- if m < 0 then m = -m; s = 0x00100000 end
+- m = m*32-16
+- if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+- return s + shl(e2, 17) + shl(m, 13)
+- end
+- werror("out of range immediate `"..imm.."'")
+- else
+- werror("NYI fpimm action")
+- end
+-end
+-
+-local function parse_shift(expr)
+- local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+- s = map_shift[s]
+- if not s then werror("expected shift operand") end
+- return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+-end
+-
+-local function parse_lslx16(expr)
+- local n = match(expr, "^lsl%s*#(%d+)$")
+- n = tonumber(n)
+- if not n then werror("expected shift operand") end
+- if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+- werror("bad shift amount")
+- end
+- return shl(n, 17)
+-end
+-
+-local function parse_extend(expr)
+- local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+- if s == "lsl" then
+- s = parse_reg_type == "x" and 3 or 2
+- else
+- s = map_extend[s]
+- end
+- if not s then werror("expected extend operand") end
+- return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
++-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
++-- Encoded as: bddd
++local function parse_mem_b(arg)
++ werror("parse_mem_b: not implemented")
++ return nil
+ end
+
+-local function parse_cond(expr, inv)
+- local c = map_cond[expr]
+- if not c then werror("expected condition operand") end
+- return shl(bit.bxor(c, inv), 12)
++-- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
++-- are GPRs.
++-- Encoded as: xbddd
++local function parse_mem_bx(arg)
++ werror("parse_mem_bx: not implemented")
++ return nil
+ end
+
+-local function parse_load(params, nparams, n, op)
+- if params[n+2] then werror("too many operands") end
+- local pn, p2 = params[n], params[n+1]
+- local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+- if not p1 then
+- if not p2 then
+- local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+- if reg and tailr ~= "" then
+- local base, tp = parse_reg_base(reg)
+- if tp then
+- waction("IMML", 0, format(tp.ctypefmt, tailr))
+- return op + base
+- end
+- end
+- end
+- werror("expected address operand")
+- end
+- local scale = shr(op, 30)
+- if p2 then
+- if wb == "!" then werror("bad use of '!'") end
+- op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+- elseif wb == "!" then
+- local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+- if not p1a then werror("bad use of '!'") end
+- op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+- else
+- local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+- op = op + parse_reg_base(p1a)
+- if p2a ~= "" then
+- local imm = match(p2a, "^,%s*#(.*)$")
+- if imm then
+- op = op + parse_imm_load(imm, scale)
+- else
+- local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+- op = op + shl(parse_reg(p2b), 16) + 0x00200800
+- if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+- werror("bad index register type")
+- end
+- if p3b == "" then
+- if parse_reg_type ~= "x" then werror("bad index register type") end
+- op = op + 0x6000
+- else
+- if p3s == "" or p3s == "#0" then
+- elseif p3s == "#"..scale then
+- op = op + 0x1000
+- else
+- werror("bad scale")
+- end
+- if parse_reg_type == "x" then
+- if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+- elseif p3b == "sxtx" then op = op + 0xe000
+- else
+- werror("bad extend/shift specifier")
+- end
+- else
+- if p3b == "uxtw" then op = op + 0x4000
+- elseif p3b == "sxtw" then op = op + 0xc000
+- else
+- werror("bad extend/shift specifier")
+- end
+- end
+- end
+- end
+- else
+- if wb == "!" then werror("bad use of '!'") end
+- op = op + 0x01000000
+- end
+- end
+- return op
++-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
++-- b is a GPR.
++-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
++local function parse_mem_by(arg)
++ werror("parse_mem_by: not implemented")
++ return nil
+ end
+
+-local function parse_load_pair(params, nparams, n, op)
+- if params[n+2] then werror("too many operands") end
+- local pn, p2 = params[n], params[n+1]
+- local scale = shr(op, 30) == 0 and 2 or 3
+- local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+- if not p1 then
+- if not p2 then
+- local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+- if reg and tailr ~= "" then
+- local base, tp = parse_reg_base(reg)
+- if tp then
+- waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+- return op + base + 0x01000000
+- end
+- end
+- end
+- werror("expected address operand")
+- end
+- if p2 then
+- if wb == "!" then werror("bad use of '!'") end
+- op = op + 0x00800000
+- else
+- local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+- if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+- op = op + (wb == "!" and 0x01800000 or 0x01000000)
+- end
+- return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
++-- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
++-- and b and x are GPRs.
++-- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
++local function parse_mem_bxy(arg)
++ werror("parse_mem_bxy: not implemented")
++ return nil
+ end
+
+ local function parse_label(label, def)
+@@ -613,33 +365,6 @@ local function op_alias(opname, f)
+ end
+ end
+
+-local function alias_bfx(p)
+- p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+-end
+-
+-local function alias_bfiz(p)
+- parse_reg(p[1])
+- if parse_reg_type == "w" then
+- p[3] = "#-("..p[3]:sub(2)..")%32"
+- p[4] = "#("..p[4]:sub(2)..")-1"
+- else
+- p[3] = "#-("..p[3]:sub(2)..")%64"
+- p[4] = "#("..p[4]:sub(2)..")-1"
+- end
+-end
+-
+-local alias_lslimm = op_alias("ubfm_4", function(p)
+- parse_reg(p[1])
+- local sh = p[3]:sub(2)
+- if parse_reg_type == "w" then
+- p[3] = "#-("..sh..")%32"
+- p[4] = "#31-("..sh..")"
+- else
+- p[3] = "#-("..sh..")%64"
+- p[4] = "#63-("..sh..")"
+- end
+-end)
+-
+ -- Template strings for s390x instructions.
+ map_op = {
+ a_4 = "000000005a000000j",
+@@ -1226,11 +951,11 @@ local function parse_template(params, template, nparams, pos)
+ for p in gmatch(sub(template, 17), ".") do
+ local pr1,pr2,pr3
+ if p == "g" then
+- pr1,pr2=param[n],param[n+1]
++ pr1,pr2=params[n],params[n+1]
+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
+ elseif p == "h" then
+- pr1,pr2=param[n],param[n+1]
+- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
++ pr1,pr2=params[n],params[n+1]
++ op = op + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
+ elseif p == "j" then
+ op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
+ -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
+
+From d97dea2e3fc4ed351a45e056137ad7fae7a59547 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 29 Nov 2016 16:29:42 -0500
+Subject: [PATCH 041/260] Add a description of how immediate actions should be
+ encoded.
+
+Also sets the action list type to unsigned short (uint16_t) which
+I think is the most appropriate type for s390x (x86 uses uint8_t
+and other platforms use uint32_t).
+---
+ dynasm/dasm_s390x.h | 4 ++--
+ dynasm/dasm_s390x.lua | 13 +++++++++++++
+ 2 files changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index b327e7a60..254db8b87 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -9,7 +9,7 @@
+ #include <string.h>
+ #include <stdlib.h>
+
+-#define DASM_ARCH "s390"
++#define DASM_ARCH "s390x"
+
+ #ifndef DASM_EXTERN
+ #define DASM_EXTERN(a,b,c,d) 0
+@@ -49,7 +49,7 @@ enum {
+ #define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+ /* Action list type. */
+-typedef const unsigned int *dasm_ActList;
++typedef const unsigned short *dasm_ActList;
+
+ /* Per-section structure. */
+ typedef struct dasm_Section {
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 556f7fe4d..c73e317e3 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -970,6 +970,19 @@ local function parse_template(params, template, nparams, pos)
+
+ end
+ end
++
++ -- TODO
++ -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
++ -- one of two locations relative to the end of the instruction.
++ -- To make decoding easier we should insert the actions for these immediately
++ -- after the halfword they modify.
++ -- For example, take the instruction ahik, which is laid out as follows (each
++ -- char is 4 bits):
++ -- o = op code, r = register, i = immediate
++ -- oorr iiii 00oo
++ -- This should be emitted as oorr, followed by the immediate action, followed by
++ -- 00oo.
++
+ wputpos(pos, op)
+ end
+ function op_template(params, template, nparams)
+
+From 000b1a84f099ff001fa8dd6c5e2ff32b115975ed Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 29 Nov 2016 18:06:59 -0500
+Subject: [PATCH 042/260] Breakup instructions and action list into halfword
+ chunks.
+
+This should allow us to encode the instructions relatively naturally
+and efficiently. For now I've escaped halfwords with a value <=
+the maximum action. This means that 0 is escaped which probably
+isn't ideal, so we may want to revisit that decision at some point.
+---
+ dynasm/dasm_s390x.lua | 100 ++++++++++++++++++++++--------------------
+ 1 file changed, 53 insertions(+), 47 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index c73e317e3..ef7f35e51 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -48,8 +48,10 @@ local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+ -- Action name -> action number.
+ local map_action = {}
++local max_action = 0
+ for n,name in ipairs(action_names) do
+ map_action[name] = n-1
++ max_action = n
+ end
+
+ -- Action list buffer.
+@@ -77,25 +79,35 @@ end
+ local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+- out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+- for i = 1,nn-1 do
+- assert(out:write("0x", tohex(actlist[i]), ",\n"))
++ out:write("static const unsigned short ", name, "[", nn, "] = {")
++ local esc = false -- also need to escape for action arguments
++ for i = 1,nn do
++ assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8)))
++ if i ~= nn then assert(out:write(",")) end
++ local name = action_names[actlist[i]+1]
++ if not esc and name then
++ assert(out:write(" /* ", name, " */"))
++ esc = name == "ESC" or name == "SECTION"
++ else
++ esc = false
++ end
+ end
+- assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
++ assert(out:write("\n};\n\n"))
+ end
+
+ ------------------------------------------------------------------------------
+
+--- Add word to action list.
+-local function wputxw(n)
+- assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range") -- s390x inst can be 6 bytes
++-- Add halfword to action list.
++local function wputxhw(n)
++ assert(n >= 0 and n <= 0xffff, "halfword out of range")
+ actlist[#actlist+1] = n
+ end
+
+ -- Add action to list with optional arg. Advance buffer pos, too.
+ local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+- wputxw(w * 0x10000 + (val or 0))
++ wputxhw(w)
++ if val then wputxhw(val) end -- Not sure about this, do we always have one arg?
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+ end
+@@ -109,29 +121,19 @@ local function wflush(term)
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+ end
+
+--- Put escaped word. --Need to check this as well, not sure how it will work on s390x
+-local function wputw(n)
+- if n <= 0x000fffff then waction("ESC") end
+- wputxw(n)
++-- Put escaped halfword.
++local function wputhw(n)
++ if n <= max_action then waction("ESC") end
++ wputxhw(n)
+ end
+
+--- Reserve position for word.
++-- Reserve position for halfword.
+ local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+ end
+
+--- Store word to reserved position. -- added 2 bytes more since s390x has 6 bytes inst as well
+-local function wputpos(pos, n)
+- assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range")
+- if n <= 0x000fffff then
+- insert(actlist, pos+1, n)
+- n = map_action.ESC * 0x10000
+- end
+- actlist[pos] = n
+-end
+-
+ ------------------------------------------------------------------------------
+
+ -- Global label name -> global label number. With auto assignment on 1st use.
+@@ -942,26 +944,44 @@ end
+ ------------------------------------------------------------------------------
+ -- Handle opcodes defined with template strings.
+ local function parse_template(params, template, nparams, pos)
+- local op = tonumber(sub(template, 1, 16), 16) --
+- -- 00000000005a0000 converts to 90
++ -- Read the template in 16-bit chunks.
++ -- Leading halfword zeroes should not be written out.
++ local op0 = tonumber(sub(template, 5, 8), 16)
++ local op1 = tonumber(sub(template, 9, 12), 16)
++ local op2 = tonumber(sub(template, 13, 16), 16)
++
+ local n,rs = 1,26
+
+ parse_reg_type = false
+ -- Process each character.
++ -- TODO
++ -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
++ -- one of two locations relative to the end of the instruction.
++ -- To make decoding easier we should insert the actions for these immediately
++ -- after the halfword they modify.
++ -- For example, take the instruction ahik, which is laid out as follows (each
++ -- char is 4 bits):
++ -- o = op code, r = register, i = immediate
++ -- oorr iiii 00oo
++ -- This should be emitted as oorr, followed by the immediate action, followed by
++ -- 00oo.
+ for p in gmatch(sub(template, 17), ".") do
+- local pr1,pr2,pr3
++ local pr1,pr2,pr3
+ if p == "g" then
+ pr1,pr2=params[n],params[n+1]
+- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
++ op2 = op2 + shl(parse_reg(pr1),4) + parse_reg(pr2)
++ wputhw(op2)
+ elseif p == "h" then
+ pr1,pr2=params[n],params[n+1]
+- op = op + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
++ op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
++ wputhw(op1); wputhw(op2)
+ elseif p == "j" then
+- op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
+- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
++ op1 = op1 + shl(parse_reg(param[1], 8))
++ wputhw(op1); wputhw(op2)
++ -- TODO: parse param[2] using parse_mem_bx, need to put x into op1, b and d
++ -- into op2, emitting an action for the DISP12 afterwards if necessary.
+ elseif p == "k" then
+- op = op + shl(parse_reg(param[1]),40) + shl(parse_reg(param[2]),36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) + parse_number(param[5])
+- -- assuming params are passed as (R1,X2,B2,DL2,DH2)
++
+ elseif p == "l" then
+
+ elseif p == "m" then
+@@ -971,30 +991,16 @@ local function parse_template(params, template, nparams, pos)
+ end
+ end
+
+- -- TODO
+- -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
+- -- one of two locations relative to the end of the instruction.
+- -- To make decoding easier we should insert the actions for these immediately
+- -- after the halfword they modify.
+- -- For example, take the instruction ahik, which is laid out as follows (each
+- -- char is 4 bits):
+- -- o = op code, r = register, i = immediate
+- -- oorr iiii 00oo
+- -- This should be emitted as oorr, followed by the immediate action, followed by
+- -- 00oo.
+-
+- wputpos(pos, op)
+ end
+ function op_template(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 3 positions.
+ if secpos+3 > maxsecpos then wflush() end
+- local pos = wpos()
+ local lpos, apos, spos = #actlist, #actargs, secpos
+ local ok, err
+ for t in gmatch(template, "[^|]+") do
+- ok, err = pcall(parse_template, params, t, nparams, pos)
++ ok, err = pcall(parse_template, params, t, nparams)
+ if ok then return end
+ secpos = spos
+ actlist[lpos+1] = nil
+
+From cf225d27cc56c31487638579857594b8dae4783b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 30 Nov 2016 14:11:01 -0500
+Subject: [PATCH 043/260] Fix C code in header file and handle br template.
+
+This means that code like this can now be generated on s390x:
+
+| ar r2, r3
+| br r14
+
+Still need to add support for immediates, memory, labels, other
+instructions and so on.
+---
+ dynasm/dasm_s390x.h | 56 +++++++++++++++++++------------------------
+ dynasm/dasm_s390x.lua | 13 ++++++----
+ 2 files changed, 33 insertions(+), 36 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 254db8b87..837a2ed0d 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -186,10 +186,10 @@ void dasm_put(Dst_DECL, int start, ...)
+
+ va_start(ap, start);
+ while (1) {
+- unsigned int ins = *p++;
+- unsigned int action = (ins >> 16);
++ unsigned short ins = *p++;
++ unsigned short action = ins;
+ if (action >= DASM__MAX) {
+- ofs += 4;
++ ofs += 2;
+ } else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+@@ -231,22 +231,11 @@ void dasm_put(Dst_DECL, int start, ...)
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+- case DASM_IMM:
+-#ifdef DASM_CHECKS
+- CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+-#endif
+- n >>= ((ins>>10)&31);
+-#ifdef DASM_CHECKS
+- if (ins & 0x8000)
+- CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+- else
+- CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+-#endif
+- b[pos++] = n;
+- break;
+- case DASM_IMMSH:
+- CK((n >> 6) == 0, RANGE_I);
+- b[pos++] = n;
++ case DASM_IMM16:
++ case DASM_IMM32:
++ case DASM_DISP20:
++ case DASM_DISP12:
++ fprintf(stderr, "not implemented\n");
+ break;
+ }
+ }
+@@ -294,8 +283,8 @@ int dasm_link(Dst_DECL, size_t *szp)
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+- unsigned int ins = *p++;
+- unsigned int action = (ins >> 16);
++ unsigned short ins = *p++;
++ unsigned short action = ins;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: p++; break;
+@@ -303,7 +292,12 @@ int dasm_link(Dst_DECL, size_t *szp)
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+- case DASM_IMM: case DASM_IMMSH: pos++; break;
++ case DASM_IMM16:
++ case DASM_IMM32:
++ case DASM_DISP20:
++ case DASM_DISP12:
++ fprintf(stderr, "not implemented\n");
++ break;
+ }
+ }
+ stop: (void)0;
+@@ -328,7 +322,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ {
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+- unsigned int *cp = (unsigned int *)buffer;
++ unsigned short *cp = (unsigned short *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+@@ -340,8 +334,8 @@ int dasm_encode(Dst_DECL, void *buffer)
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+- unsigned int ins = *p++;
+- unsigned int action = (ins >> 16);
++ unsigned short ins = *p++;
++ unsigned short action = ins;
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+@@ -350,7 +344,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
+ goto patchrel;
+ case DASM_ALIGN:
+- ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
++ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x0707;
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+@@ -367,11 +361,11 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC: break;
+- case DASM_IMM:
+- cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+- break;
+- case DASM_IMMSH:
+- cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
++ case DASM_IMM16:
++ case DASM_IMM32:
++ case DASM_DISP20:
++ case DASM_DISP12:
++ fprintf(stderr, "not implemented\n");
+ break;
+ default: *cp++ = ins; break;
+ }
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index ef7f35e51..52acbdbd6 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -933,13 +933,13 @@ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+ -- TODO: replace 'B' with correct encoding.
+ -- brc
+- map_op["j"..cond.."_1"] = "00000000"..tohex(0xa7040000+shl(c, 20)).."B"
++ map_op["j"..cond.."_1"] = "00000000"..tohex(0xa7040000+shl(c, 20)).."w"
+ -- brcl
+- map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c, 4)).."00000000".."B"
++ map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c, 4)).."00000000".."x"
+ -- bc
+- map_op["b"..cond.."_1"] = "00000000"..tohex(0x47000000+shl(c, 20)).."B"
++ map_op["b"..cond.."_1"] = "00000000"..tohex(0x47000000+shl(c, 20)).."y"
+ -- bcr
+- map_op["b"..cond.."r_1"] = "00000000"..tohex(0x0700+shl(c, 4)).."B"
++ map_op["b"..cond.."r_1"] = "00000000"..tohex(0x0700+shl(c, 4)).."z"
+ end
+ ------------------------------------------------------------------------------
+ -- Handle opcodes defined with template strings.
+@@ -969,7 +969,7 @@ local function parse_template(params, template, nparams, pos)
+ local pr1,pr2,pr3
+ if p == "g" then
+ pr1,pr2=params[n],params[n+1]
+- op2 = op2 + shl(parse_reg(pr1),4) + parse_reg(pr2)
++ op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
+ wputhw(op2)
+ elseif p == "h" then
+ pr1,pr2=params[n],params[n+1]
+@@ -988,6 +988,9 @@ local function parse_template(params, template, nparams, pos)
+
+ elseif p == "n" then
+
++ elseif p == "z" then
++ op2 = op2 + parse_gpr(params[1])
++ wputhw(op2)
+ end
+ end
+
+
+From a34bcf9ef464b9e599efca9bb762b181f8c18c3d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 30 Nov 2016 16:05:36 -0500
+Subject: [PATCH 044/260] Add initial support for D(B,X) memory operands
+ (12-bit only).
+
+Most RX instructions don't specify the correct number of operands
+so this won't work on many yet. It also won't yet emit an action
+if D is a variable rather than a constant.
+---
+ dynasm/dasm_s390x.lua | 60 ++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 54 insertions(+), 6 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 52acbdbd6..eac9d6032 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -281,6 +281,32 @@ local function parse_number(n)
+ return nil
+ end
+
++local function is_uint12(num)
++ return 0 <= num and num < 4096
++end
++
++local function is_int20(num)
++ return -shl(1, 19) <= num and num < shl(1, 19)
++end
++
++-- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
++-- If x is not specified then it is 0.
++local function split_memop(arg)
++ local reg = "r[0-1]?[0-9]"
++ local d, x, b = match(arg, "^(.*)%(("..reg.."), ("..reg..")%)$")
++ if d then
++ return d, parse_gpr(x), parse_gpr(b)
++ end
++ local d, b = match(arg, "^(.*)%(("..reg..")%)$")
++ if d then
++ return d, 0, parse_gpr(b)
++ end
++ -- TODO: handle values without registers?
++ -- TODO: handle registers without a displacement?
++ werror("bad memory operand: "..arg)
++ return nil
++end
++
+ -- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
+ -- Encoded as: bddd
+ local function parse_mem_b(arg)
+@@ -292,6 +318,17 @@ end
+ -- are GPRs.
+ -- Encoded as: xbddd
+ local function parse_mem_bx(arg)
++ local d, x, b = split_memop(arg)
++ local dval = tonumber(d)
++ if dval then
++ if not is_uint12(dval) then
++ werror("displacement out of range: ", dval)
++ end
++ return dval, x, b, nil
++ end
++ -- TODO: handle d being a symbol.
++ -- Action is currently the final return value (the caller needs to add it
++ -- to the action list at a later point).
+ werror("parse_mem_bx: not implemented")
+ return nil
+ end
+@@ -369,7 +406,7 @@ end
+
+ -- Template strings for s390x instructions.
+ map_op = {
+-a_4 = "000000005a000000j",
++a_2 = "000000005a000000j",
+ ar_2 = "0000000000001a00g",
+ ay_5 = "0000e3000000005al",
+ ag_5 = "0000e30000000008l",
+@@ -853,7 +890,7 @@ sqdbr_2 = "00000000b3150000h",
+ sqdr_2 = "00000000b2440000h",
+ sqebr_2 = "00000000b3140000h",
+ sqer_2 = "00000000b2450000h",
+-st_4 = "0000000050000000j",
++st_2 = "0000000050000000j",
+ sty_5 = "0000e30000000050l",
+ stg_5 = "0000e30000000024l",
+ std_4 = "0000000060000000j",
+@@ -976,10 +1013,13 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
+ wputhw(op1); wputhw(op2)
+ elseif p == "j" then
+- op1 = op1 + shl(parse_reg(param[1], 8))
+- wputhw(op1); wputhw(op2)
+- -- TODO: parse param[2] using parse_mem_bx, need to put x into op1, b and d
+- -- into op2, emitting an action for the DISP12 afterwards if necessary.
++ local d, x, b, a = parse_mem_bx(params[2])
++ op1 = op1 + shl(parse_gpr(params[1]), 4) + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2);
++ if a then
++ werror("disp12 actions not yet implemented")
++ end
+ elseif p == "k" then
+
+ elseif p == "l" then
+@@ -988,6 +1028,14 @@ local function parse_template(params, template, nparams, pos)
+
+ elseif p == "n" then
+
++ elseif p == "y" then
++ local d, x, b, a = parse_mem_bx(params[1])
++ op1 = op1 + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2);
++ if a then
++ werror("disp12 actions not yet implemented")
++ end
+ elseif p == "z" then
+ op2 = op2 + parse_gpr(params[1])
+ wputhw(op2)
+
+From 575c9075448b26316195c26dbcc600656ad44849 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 30 Nov 2016 17:07:17 -0500
+Subject: [PATCH 045/260] Minor cleanup of regular expressions.
+
+---
+ dynasm/dasm_s390x.lua | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index eac9d6032..c15719b73 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -242,7 +242,7 @@ local map_cond = {
+ local parse_reg_type
+
+ local function parse_gpr(expr)
+- local r = match(expr, "^r([1-3]?[0-9])$")
++ local r = match(expr, "^r(1?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 15 then return r, tp end
+@@ -251,7 +251,7 @@ local function parse_gpr(expr)
+ end
+
+ local function parse_fpr(expr)
+- local r = match(expr, "^f([1-3]?[0-9])$")
++ local r = match(expr, "^f(1?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 15 then return r end
+@@ -292,7 +292,7 @@ end
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+ -- If x is not specified then it is 0.
+ local function split_memop(arg)
+- local reg = "r[0-1]?[0-9]"
++ local reg = "r1?[0-9]"
+ local d, x, b = match(arg, "^(.*)%(("..reg.."), ("..reg..")%)$")
+ if d then
+ return d, parse_gpr(x), parse_gpr(b)
+
+From dd6448ff1e7242c3ad4a1f21823143cfb104349d Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Thu, 1 Dec 2016 15:15:06 +0530
+Subject: [PATCH 046/260] Changed the templates based on no of arguments
+
+Have changed the templates based on number of parameters passed, mainly the memory and immediate ones are modified.
+---
+ dynasm/dasm_s390x.lua | 552 +++++++++++++++++++++---------------------
+ 1 file changed, 276 insertions(+), 276 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index c15719b73..467e21828 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -408,148 +408,148 @@ end
+ map_op = {
+ a_2 = "000000005a000000j",
+ ar_2 = "0000000000001a00g",
+-ay_5 = "0000e3000000005al",
+-ag_5 = "0000e30000000008l",
++ay_2 = "0000e3000000005al",
++ag_2 = "0000e30000000008l",
+ agr_2 = "00000000b9080000h",
+-agf_5 = "0000e30000000018l",
++agf_2 = "0000e30000000018l",
+ agfr_2 = "00000000b9180000h",
+ axbr_2 = "00000000b34a0000h",
+ adbr_2 = "00000000b31a0000h",
+ aebr_2 = "00000000b30a0000h",
+-ah_4 = "000000004a000000j",
+-ahy_5 = "0000e3000000007al",
+-afi_3 = "0000c20900000000n",
+-agfi_3 = "0000c20800000000n",
+-aih_3 = "0000cc0800000000n",
+-al_4 = "000000005e000000j",
++ah_2 = "000000004a000000j",
++ahy_2 = "0000e3000000007al",
++afi_2 = "0000c20900000000n",
++agfi_2 = "0000c20800000000n",
++aih_2 = "0000cc0800000000n",
++al_2 = "000000005e000000j",
+ alr_2 = "0000000000001e00g",
+-aly_5 = "0000e3000000005el",
+-alg_5 = "0000e3000000000al",
++aly_2 = "0000e3000000005el",
++alg_2 = "0000e3000000000al",
+ algr_2 = "00000000b90a0000h",
+-algf_5 = "0000e3000000001al",
++algf_2 = "0000e3000000001al",
+ algfr_2 = "00000000b91a0000h",
+-alfi_3 = "0000c20b00000000n",
+-algfi_3 = "0000c20a00000000n",
+-alc_5 = "0000e30000000098l",
++alfi_2 = "0000c20b00000000n",
++algfi_2 = "0000c20a00000000n",
++alc_2 = "0000e30000000098l",
+ alcr_2 = "00000000b9980000h",
+-alcg_5 = "0000e30000000088l",
++alcg_2 = "0000e30000000088l",
+ alcgr_2 = "00000000b9880000h",
+-alsih_3 = "0000cc0a00000000n",
+-alsihn_3 = "0000cc0b00000000n",
++alsih_2 = "0000cc0a00000000n",
++alsihn_2 = "0000cc0b00000000n",
+ axr_2 = "0000000000003600g",
+-ad_4 = "000000006a000000j",
++ad_2 = "000000006a000000j",
+ adr_2 = "0000000000002a00g",
+-ae_4 = "000000007a000000j",
++ae_2 = "000000007a000000j",
+ aer_2 = "0000000000003a00g",
+-aw_4 = "000000006e000000j",
++aw_2 = "000000006e000000j",
+ awr_2 = "0000000000002e00g",
+-au_4 = "000000007e000000j",
++au_2 = "000000007e000000j",
+ aur_2 = "0000000000003e00g",
+-n_4 = "0000000054000000j",
++n_2 = "0000000054000000j",
+ nr_2 = "0000000000001400g",
+-ny_5 = "0000e30000000054l",
+-ng_5 = "0000e30000000080l",
++ny_2 = "0000e30000000054l",
++ng_2 = "0000e30000000080l",
+ ngr_2 = "00000000b9800000h",
+-nihf_3 = "0000c00a00000000n",
+-nilf_3 = "0000c00b00000000n",
+-bal_4 = "0000000045000000j",
++nihf_2 = "0000c00a00000000n",
++nilf_2 = "0000c00b00000000n",
++bal_2 = "0000000045000000j",
+ balr_2 = "000000000000500g",
+-bas_4 = "000000004d000000j",
++bas_2 = "000000004d000000j",
+ basr_2 = "0000000000000d00g",
+ bassm_2 = "0000000000000c00g",
+ bsa_2 = "00000000b25a0000h",
+ bsm_2 = "0000000000000b00g",
+ bakr_2 = "00000000b2400000h",
+ bsg_2 = "00000000b2580000h",
+-bc_4 = "0000000047000000k",
++bc_2 = "0000000047000000k",
+ bcr_2 = "000000000000700g",
+-bct_4 = "0000000046000000j",
++bct_2 = "0000000046000000j",
+ bctr_2 = "000000000000600g",
+-bctg_5 = "0000e30000000046l",
++bctg_2 = "0000e30000000046l",
+ bctgr_2 = "00000000b9460000h",
+-bxh_4 = "0000000086000000q",
+-bxhg_5 = "0000eb0000000044s",
+-bxle_4 = "0000000087000000q",
+-bxleg_5 = "0000eb0000000045s",
+-brasl_3 = "0000c00500000000o",
+-brcl_3 = "0000c00400000000p",
+-brcth_3 = "0000cc0600000000o",
++bxh_3 = "0000000086000000q",
++bxhg_3 = "0000eb0000000044s",
++bxle_3 = "0000000087000000q",
++bxleg_3 = "0000eb0000000045s",
++brasl_2 = "0000c00500000000o",
++brcl_2 = "0000c00400000000p",
++brcth_2 = "0000cc0600000000o",
+ cksm_2 = "00000000b2410000h",
+ km_2 = "00000000b92e0000h",
+ kmf_2 = "00000000b92a0000h",
+ kmc_2 = "00000000b92f0000h",
+ kmo_2 = "00000000b92b0000h",
+-c_4 = "0000000059000000j",
++c_2 = "0000000059000000j",
+ cr_2 = "0000000000001900g",
+-cy_5 = "0000e30000000059l",
+-cg_5 = "0000e30000000020l",
++cy_2 = "0000e30000000059l",
++cg_2 = "0000e30000000020l",
+ cgr_2 = "00000000b9200000h",
+-cgf_5 = "0000e30000000030l",
++cgf_2 = "0000e30000000030l",
+ cgfr_2 = "00000000b9300000h",
+ cxbr_2 = "00000000b3490000h",
+ cxtr_2 = "00000000b3ec0000h",
+ cxr_2 = "00000000b3690000h",
+ cdbr_2 = "00000000b3190000h",
+ cdtr_2 = "00000000b3e40000h",
+-cd_4 = "0000000069000000j",
++cd_2 = "0000000069000000j",
+ cdr_2 = "0000000000002900g",
+ cebr_2 = "00000000b3090000h",
+-ce_4 = "0000000079000000j",
++ce_2 = "0000000079000000j",
+ cer_2 = "0000000000003900g",
+ kxbr_2 = "00000000b3480000h",
+ kxtr_2 = "00000000b3e80000h",
+ kdbr_2 = "00000000b3180000h",
+ kdtr_2 = "00000000b3e00000h",
+ kebr_2 = "00000000b3080000h",
+-cs_4 = "00000000ba000000q",
+-csy_5 = "0000eb0000000014s",
+-csg_5 = "0000eb0000000030s",
++cs_3 = "00000000ba000000q",
++csy_3 = "0000eb0000000014s",
++csg_3 = "0000eb0000000030s",
+ csp_2 = "00000000b2500000h",
+ cspg_2 = "00000000b98a0000h",
+ cextr_2 = "00000000b3fc0000h",
+ cedtr_2 = "00000000b3f40000h",
+-cds_4 = "00000000bb000000q",
+-cdsy_5 = "0000eb0000000031s",
+-cdsg_5 = "0000eb000000003es",
+-ch_4 = "0000000049000000j",
+-chy_5 = "0000e30000000079l",
+-cgh_5 = "0000e30000000034l",
+-chrl_3 = "0000c60500000000o",
+-cghrl_3 = "0000c60400000000o",
+-chf_5 = "0000e300000000cdl",
++cds_3 = "00000000bb000000q",
++cdsy_3 = "0000eb0000000031s",
++cdsg_3 = "0000eb000000003es",
++ch_2 = "0000000049000000j",
++chy_2 = "0000e30000000079l",
++cgh_2 = "0000e30000000034l",
++chrl_2 = "0000c60500000000o",
++cghrl_2 = "0000c60400000000o",
++chf_2 = "0000e300000000cdl",
+ chhr_2 = "00000000b9cd0000h",
+ chlr_2 = "00000000b9dd0000h",
+-cfi_3 = "0000c20d00000000n",
+-cgfi_3 = "0000c20c00000000n",
+-cih_3 = "0000cc0d00000000n",
+-cl_4 = "0000000055000000j",
++cfi_2 = "0000c20d00000000n",
++cgfi_2 = "0000c20c00000000n",
++cih_2 = "0000cc0d00000000n",
++cl_2 = "0000000055000000j",
+ clr_2 = "0000000000001500g",
+-cly_5 = "0000e30000000055l",
+-clg_5 = "0000e30000000021l",
++cly_2 = "0000e30000000055l",
++clg_2 = "0000e30000000021l",
+ clgr_2 = "00000000b9210000h",
+-clgf_5 = "0000e30000000031l",
++clgf_2 = "0000e30000000031l",
+ clgfr_2 = "00000000b9310000h",
+-clmh_5 = "0000eb0000000020t",
+-clm_4 = "00000000bd000000r",
+-clmy_5 = "0000eb0000000021t",
+-clhf_5 = "0000e300000000cfl",
++clmh_3 = "0000eb0000000020t",
++clm_3 = "00000000bd000000r",
++clmy_3 = "0000eb0000000021t",
++clhf_2 = "0000e300000000cfl",
+ clhhr_2 = "00000000b9cf0000h",
+ clhlr_2 = "00000000b9df0000h",
+-clfi_3 = "0000c20f00000000n",
+-clgfi_3 = "0000c20e00000000n",
+-clih_3 = "0000cc0f00000000n",
++clfi_2 = "0000c20f00000000n",
++clgfi_2 = "0000c20e00000000n",
++clih_2 = "0000cc0f00000000n",
+ clcl_2 = "0000000000000f00g",
+-clcle_4 = "00000000a9000000q",
+-clclu_5 = "0000eb000000008fs",
+-clrl_3 = "0000c60f00000000o",
+-clhrl_3 = "0000c60700000000o",
+-clgrl_3 = "0000c60a00000000o",
+-clghrl_3 = "0000c60600000000o",
+-clgfrl_3 = "0000c60e00000000o",
++clcle_3 = "00000000a9000000q",
++clclu_3 = "0000eb000000008fs",
++clrl_2 = "0000c60f00000000o",
++clhrl_2 = "0000c60700000000o",
++clgrl_2 = "0000c60a00000000o",
++clghrl_2 = "0000c60600000000o",
++clgfrl_2 = "0000c60e00000000o",
+ clst_2 = "00000000b25d0000h",
+-crl_3 = "0000c60d00000000o",
+-cgrl_3 = "0000c60800000000o",
+-cgfrl_3 = "0000c60c00000000o",
++crl_2 = "0000c60d00000000o",
++cgrl_2 = "0000c60800000000o",
++cgfrl_2 = "0000c60c00000000o",
+ cuse_2 = "00000000b2570000h",
+ cmpsc_2 = "00000000b2630000h",
+ kimd_2 = "00000000b93e0000h",
+@@ -577,49 +577,49 @@ cxstr_2 = "00000000b3fb0000h",
+ cdstr_2 = "00000000b3f30000h",
+ cxutr_2 = "00000000b3fa0000h",
+ cdutr_2 = "00000000b3f20000h",
+-cvb_4 = "000000004f000000j",
+-cvby_5 = "0000e30000000006l",
+-cvbg_5 = "0000e3000000000el",
+-cvd_4 = "000000004e000000j",
+-cvdy_5 = "0000e30000000026l",
+-cvdg_5 = "0000e3000000002el",
++cvb_2 = "000000004f000000j",
++cvby_2 = "0000e30000000006l",
++cvbg_2 = "0000e3000000000el",
++cvd_2 = "000000004e000000j",
++cvdy_2 = "0000e30000000026l",
++cvdg_2 = "0000e3000000002el",
+ cuxtr_2 = "00000000b3ea0000h",
+ cudtr_2 = "00000000b3e20000h",
+ cu42_2 = "00000000b9b30000h",
+ cu41_2 = "00000000b9b20000h",
+ cpya_2 = "00000000b24d0000h",
+-d_4 = "000000005d000000j",
++d_2 = "000000005d000000j",
+ dr_2 = "0000000000001d00g",
+ dxbr_2 = "00000000b34d0000h",
+ dxr_2 = "00000000b22d0000h",
+ ddbr_2 = "00000000b31d0000h",
+-dd_4 = "000000006d000000j",
++dd_2 = "000000006d000000j",
+ ddr_2 = "0000000000002d00g",
+ debr_2 = "00000000b30d0000h",
+-de_4 = "000000007d000000j",
++de_2 = "000000007d000000j",
+ der_2 = "0000000000003d00g",
+-dl_5 = "0000e30000000097l",
++dl_2 = "0000e30000000097l",
+ dlr_2 = "00000000b9970000h",
+-dlg_5 = "0000e30000000087l",
++dlg_2 = "0000e30000000087l",
+ dlgr_2 = "00000000b9870000h",
+-dsg_5 = "0000e3000000000dl",
++dsg_2 = "0000e3000000000dl",
+ dsgr_2 = "00000000b90d0000h",
+-dsgf_5 = "0000e3000000001dl",
++dsgf_2 = "0000e3000000001dl",
+ dsgfr_2 = "00000000b91d0000h",
+-x_4 = "0000000057000000j",
++x_2 = "0000000057000000j",
+ xr_2 = "0000000000001700g",
+-xy_5 = "0000e30000000057l",
+-xg_5 = "0000e30000000082l",
++xy_2 = "0000e30000000057l",
++xg_2 = "0000e30000000082l",
+ xgr_2 = "00000000b9820000h",
+-xihf_3 = "0000c00600000000n",
+-xilf_3 = "0000c00700000000n",
+-ex_4 = "0000000044000000j",
+-exrl_3 = "0000c60000000000o",
++xihf_2 = "0000c00600000000n",
++xilf_2 = "0000c00700000000n",
++ex_2 = "0000000044000000j",
++exrl_2 = "0000c60000000000o",
+ ear_2 = "00000000b24f0000h",
+ esea_2 = "00000000b99d0000h",
+ eextr_2 = "00000000b3ed0000h",
+ eedtr_2 = "00000000b3e50000h",
+-ecag_5 = "0000eb000000004cs",
++ecag_3 = "0000eb000000004cs",
+ efpc_2 = "00000000b38c0000h",
+ epar_2 = "00000000b2260000h",
+ epair_2 = "00000000b99a0000h",
+@@ -635,52 +635,52 @@ flogr_2 = "00000000b9830000h",
+ hdr_2 = "0000000000002400g",
+ her_2 = "0000000000003400g",
+ iac_2 = "00000000b2240000h",
+-ic_4 = "0000000043000000j",
+-icy_5 = "0000e30000000073l",
+-icmh_5 = "0000eb0000000080t",
+-icm_4 = "00000000bf000000r",
+-icmy_5 = "0000eb0000000081t",
+-iihf_3 = "0000c00800000000n",
+-iilf_3 = "0000c00900000000n",
++ic_2 = "0000000043000000j",
++icy_2 = "0000e30000000073l",
++icmh_3 = "0000eb0000000080t",
++icm_3 = "00000000bf000000r",
++icmy_3 = "0000eb0000000081t",
++iihf_2 = "0000c00800000000n",
++iilf_2 = "0000c00900000000n",
+ ipm_2 = "00000000b2220000h",
+ iske_2 = "00000000b2290000h",
+ ivsk_2 = "00000000b2230000h",
+-l_4 = "0000000058000000j",
++l_2 = "0000000058000000j",
+ lr_2 = "0000000000001800g",
+-ly_5 = "0000e30000000058l",
+-lg_5 = "0000e30000000004l",
++ly_2 = "0000e30000000058l",
++lg_2 = "0000e30000000004l",
+ lgr_2 = "00000000b9040000h",
+-lgf_5 = "0000e30000000014l",
++lgf_2 = "0000e30000000014l",
+ lgfr_2 = "00000000b9140000h",
+ lxr_2 = "00000000b3650000h",
+-ld_4 = "0000000068000000j",
++ld_2 = "0000000068000000j",
+ ldr_2 = "0000000000002800g",
+-ldy_5 = "0000ed0000000065l",
+-le_4 = "0000000078000000j",
++ldy_2 = "0000ed0000000065l",
++le_2 = "0000000078000000j",
+ ler_2 = "0000000000003800g",
+-ley_5 = "0000ed0000000064l",
+-lam_4 = "000000009a000000q",
+-lamy_5 = "0000eb000000009as",
+-la_4 = "0000000041000000j",
+-lay_5 = "0000e30000000071l",
+-lae_4 = "0000000051000000j",
+-laey_5 = "0000e30000000075l",
+-larl_3 = "0000c00000000000o",
+-laa_5 = "0000eb00000000f8s",
+-laag_5 = "0000eb00000000e8s",
+-laal_5 = "0000eb00000000fas",
+-laalg_5 = "0000eb00000000eas",
+-lan_5 = "0000eb00000000f4s",
+-lang_5 = "0000eb00000000e4s",
+-lax_5 = "0000eb00000000f7s",
+-laxg_5 = "0000eb00000000e7s",
+-lao_5 = "0000eb00000000f6s",
+-laog_5 = "0000eb00000000e6s",
+-lt_5 = "0000e30000000012l",
++ley_2 = "0000ed0000000064l",
++lam_3 = "000000009a000000q",
++lamy_3 = "0000eb000000009as",
++la_2 = "0000000041000000j",
++lay_2 = "0000e30000000071l",
++lae_2 = "0000000051000000j",
++laey_2 = "0000e30000000075l",
++larl_2 = "0000c00000000000o",
++laa_3 = "0000eb00000000f8s",
++laag_3 = "0000eb00000000e8s",
++laal_3 = "0000eb00000000fas",
++laalg_3 = "0000eb00000000eas",
++lan_3 = "0000eb00000000f4s",
++lang_3 = "0000eb00000000e4s",
++lax_3 = "0000eb00000000f7s",
++laxg_3 = "0000eb00000000e7s",
++lao_3 = "0000eb00000000f6s",
++laog_3 = "0000eb00000000e6s",
++lt_2 = "0000e30000000012l",
+ ltr_2 = "0000000000001200g",
+-ltg_5 = "0000e30000000002l",
++ltg_2 = "0000e30000000002l",
+ ltgr_2 = "00000000b9020000h",
+-ltgf_5 = "0000e30000000032l",
++ltgf_2 = "0000e30000000032l",
+ ltgfr_2 = "00000000b9120000h",
+ ltxbr_2 = "00000000b3420000h",
+ ltxtr_2 = "00000000b3de0000h",
+@@ -690,11 +690,11 @@ ltdtr_2 = "00000000b3d60000h",
+ ltdr_2 = "0000000000002200g",
+ ltebr_2 = "00000000b3020000h",
+ lter_2 = "0000000000003200g",
+-lb_5 = "0000e30000000076l",
++lb_2 = "0000e30000000076l",
+ lbr_2 = "00000000b9260000h",
+-lgb_5 = "0000e30000000077l",
++lgb_2 = "0000e30000000077l",
+ lgbr_2 = "00000000b9060000h",
+-lbh_5 = "0000e300000000c0l",
++lbh_2 = "0000e300000000c0l",
+ lcr_2 = "0000000000001300g",
+ lcgr_2 = "00000000b9030000h",
+ lcgfr_2 = "00000000b9130000h",
+@@ -705,52 +705,52 @@ lcdr_2 = "0000000000002300g",
+ lcdfr_2 = "00000000b3730000h",
+ lcebr_2 = "00000000b3030000h",
+ lcer_2 = "0000000000003300g",
+-lctl_4 = "00000000b7000000q",
+-lctlg_5 = "0000eb000000002fs",
++lctl_3 = "00000000b7000000q",
++lctlg_3 = "0000eb000000002fs",
+ fixr_2 = "00000000b3670000h",
+ fidr_2 = "00000000b37f0000h",
+ fier_2 = "00000000b3770000h",
+ ldgr_2 = "00000000b3c10000h",
+ lgdr_2 = "00000000b3cd0000h",
+-lh_4 = "0000000048000000j",
++lh_2 = "0000000048000000j",
+ lhr_2 = "00000000b9270000h",
+-lhy_5 = "0000e30000000078l",
+-lgh_5 = "0000e30000000015l",
++lhy_2 = "0000e30000000078l",
++lgh_2 = "0000e30000000015l",
+ lghr_2 = "00000000b9070000h",
+-lhh_5 = "0000e300000000c4l",
+-lhrl_3 = "0000c40500000000o",
+-lghrl_3 = "0000c40400000000o",
+-lfh_5 = "0000e300000000cal",
+-lgfi_3 = "0000c00100000000n",
++lhh_2 = "0000e300000000c4l",
++lhrl_2 = "0000c40500000000o",
++lghrl_2 = "0000c40400000000o",
++lfh_2 = "0000e300000000cal",
++lgfi_2 = "0000c00100000000n",
+ lxdbr_2 = "00000000b3050000h",
+ lxdr_2 = "00000000b3250000h",
+ lxebr_2 = "00000000b3060000h",
+ lxer_2 = "00000000b3260000h",
+ ldebr_2 = "00000000b3040000h",
+ lder_2 = "00000000b3240000h",
+-llgf_5 = "0000e30000000016l",
++llgf_2 = "0000e30000000016l",
+ llgfr_2 = "00000000b9160000h",
+-llc_5 = "0000e30000000094l",
++llc_2 = "0000e30000000094l",
+ llcr_2 = "00000000b9940000h",
+-llgc_5 = "0000e30000000090l",
++llgc_2 = "0000e30000000090l",
+ llgcr_2 = "00000000b9840000h",
+-llch_5 = "0000e300000000c2l",
+-llh_5 = "0000e30000000095l",
++llch_2 = "0000e300000000c2l",
++llh_2 = "0000e30000000095l",
+ llhr_2 = "00000000b9950000h",
+-llgh_5 = "0000e30000000091l",
++llgh_2 = "0000e30000000091l",
+ llghr_2 = "00000000b9850000h",
+-llhh_5 = "0000e300000000c6l",
+-llhrl_3 = "0000c40200000000o",
+-llghrl_3 = "0000c40600000000o",
+-llihf_3 = "0000c00e00000000n",
+-llilf_3 = "0000c00f00000000n",
+-llgfrl_3 = "0000c40e00000000o",
+-llgt_5 = "0000e30000000017l",
++llhh_2 = "0000e300000000c6l",
++llhrl_2 = "0000c40200000000o",
++llghrl_2 = "0000c40600000000o",
++llihf_2 = "0000c00e00000000n",
++llilf_2 = "0000c00f00000000n",
++llgfrl_2 = "0000c40e00000000o",
++llgt_2 = "0000e30000000017l",
+ llgtr_2 = "00000000b9170000h",
+-lm_4 = "0000000098000000q",
+-lmy_5 = "0000eb0000000098s",
+-lmg_5 = "0000eb0000000004s",
+-lmh_5 = "0000eb0000000096s",
++lm_3 = "0000000098000000q",
++lmy_3 = "0000eb0000000098s",
++lmg_3 = "0000eb0000000004s",
++lmh_3 = "0000eb0000000096s",
+ lnr_2 = "0000000000001100g",
+ lngr_2 = "00000000b9010000h",
+ lngfr_2 = "00000000b9110000h",
+@@ -761,9 +761,9 @@ lndr_2 = "0000000000002100g",
+ lndfr_2 = "00000000b3710000h",
+ lnebr_2 = "00000000b3010000h",
+ lner_2 = "0000000000003100g",
+-loc_5 = "0000eb00000000f2t",
+-locg_5 = "0000eb00000000e2t",
+-lpq_5 = "0000e3000000008fl",
++loc_3 = "0000eb00000000f2t",
++locg_3 = "0000eb00000000e2t",
++lpq_2 = "0000e3000000008fl",
+ lpr_2 = "0000000000001000g",
+ lpgr_2 = "00000000b9000000h",
+ lpgfr_2 = "00000000b9100000h",
+@@ -774,16 +774,16 @@ lpdr_2 = "0000000000002000g",
+ lpdfr_2 = "00000000b3700000h",
+ lpebr_2 = "00000000b3000000h",
+ lper_2 = "0000000000003000g",
+-lra_4 = "00000000b1000000j",
+-lray_5 = "0000e30000000013l",
+-lrag_5 = "0000e30000000003l",
+-lrl_3 = "0000c40d00000000o",
+-lgrl_3 = "0000c40800000000o",
+-lgfrl_3 = "0000c40c00000000o",
+-lrvh_5 = "0000e3000000001fl",
+-lrv_5 = "0000e3000000001el",
++lra_2 = "00000000b1000000j",
++lray_2 = "0000e30000000013l",
++lrag_2 = "0000e30000000003l",
++lrl_2 = "0000c40d00000000o",
++lgrl_2 = "0000c40800000000o",
++lgfrl_2 = "0000c40c00000000o",
++lrvh_2 = "0000e3000000001fl",
++lrv_2 = "0000e3000000001el",
+ lrvr_2 = "00000000b91f0000h",
+-lrvg_5 = "0000e3000000000fl",
++lrvg_2 = "0000e3000000000fl",
+ lrvgr_2 = "00000000b90f0000h",
+ ldxbr_2 = "00000000b3450000h",
+ ldxr_2 = "0000000000002500g",
+@@ -800,50 +800,50 @@ lzdr_2 = "00000000b3750000h",
+ lzer_2 = "00000000b3740000h",
+ msta_2 = "00000000b2470000h",
+ mvcl_2 = "0000000000000e00g",
+-mvcle_4 = "00000000a8000000q",
+-mvclu_5 = "0000eb000000008es",
++mvcle_3 = "00000000a8000000q",
++mvclu_3 = "0000eb000000008es",
+ mvpg_2 = "00000000b2540000h",
+ mvst_2 = "00000000b2550000h",
+-m_4 = "000000005c000000j",
+-mfy_5 = "0000e3000000005cl",
++m_2 = "000000005c000000j",
++mfy_2 = "0000e3000000005cl",
+ mr_2 = "0000000000001c00g",
+ mxbr_2 = "00000000b34c0000h",
+ mxr_2 = "0000000000002600g",
+ mdbr_2 = "00000000b31c0000h",
+-md_4 = "000000006c000000j",
++md_2 = "000000006c000000j",
+ mdr_2 = "0000000000002c00g",
+ mxdbr_2 = "00000000b3070000h",
+-mxd_4 = "0000000067000000j",
++mxd_2 = "0000000067000000j",
+ mxdr_2 = "0000000000002700g",
+ meebr_2 = "00000000b3170000h",
+ meer_2 = "00000000b3370000h",
+ mdebr_2 = "00000000b30c0000h",
+-mde_4 = "000000007c000000j",
++mde_2 = "000000007c000000j",
+ mder_2 = "0000000000003c00g",
+-me_4 = "000000007c000000j",
++me_2 = "000000007c000000j",
+ mer_2 = "0000000000003c00g",
+-mh_4 = "000000004c000000j",
+-mhy_5 = "0000e3000000007cl",
+-mlg_5 = "0000e30000000086l",
++mh_2 = "000000004c000000j",
++mhy_2 = "0000e3000000007cl",
++mlg_2 = "0000e30000000086l",
+ mlgr_2 = "00000000b9860000h",
+-ml_5 = "0000e30000000096l",
++ml_2 = "0000e30000000096l",
+ mlr_2 = "00000000b9960000h",
+-ms_4 = "0000000071000000j",
++ms_2 = "0000000071000000j",
+ msr_2 = "00000000b2520000h",
+-msy_5 = "0000e30000000051l",
+-msg_5 = "0000e3000000000cl",
++msy_2 = "0000e30000000051l",
++msg_2 = "0000e3000000000cl",
+ msgr_2 = "00000000b90c0000h",
+-msgf_5 = "0000e3000000001cl",
++msgf_2 = "0000e3000000001cl",
+ msgfr_2 = "00000000b91c0000h",
+-msfi_3 = "0000c20100000000n",
+-msgfi_3 = "0000c20000000000n",
+-o_4 = "0000000056000000j",
+-or_2 = "0000000000001600g",
+-oy_5 = "0000e30000000056l",
+-og_5 = "0000e30000000081l",
++msfi_2 = "0000c20100000000n",
++msgfi_2 = "0000c20000000000n",
++o_2 = "0000000056000000j",
++or_2 = "0000000000001600g",
++oy_2 = "0000e30000000056l",
++og_2 = "0000e30000000081l",
+ ogr_2 = "00000000b9810000h",
+-oihf_3 = "0000c00c00000000n",
+-oilf_3 = "0000c00d00000000n",
++oihf_2 = "0000c00c00000000n",
++oilf_2 = "0000c00d00000000n",
+ pgin_2 = "00000000b22e0000h",
+ pgout_2 = "00000000b22f0000h",
+ pcc_2 = "00000000b92c0000h",
+@@ -851,15 +851,15 @@ pckmo_2 = "00000000b9280000h",
+ pfmf_2 = "00000000b9af0000h",
+ ptf_2 = "00000000b9a20000h",
+ popcnt_2 = "00000000b9e10000h",
+-pfd_5 = "0000e30000000036m",
+-pfdrl_3 = "0000c60200000000p",
++pfd_2 = "0000e30000000036m",
++pfdrl_2 = "0000c60200000000p",
+ pt_2 = "00000000b2280000h",
+ pti_2 = "00000000b99e0000h",
+ palb_2 = "00000000b2480000h",
+ rrbe_2 = "00000000b22a0000h",
+ rrbm_2 = "00000000b9ae0000h",
+-rll_5 = "0000eb000000001ds",
+-rllg_5 = "0000eb000000001cs",
++rll_3 = "0000eb000000001ds",
++rllg_3 = "0000eb000000001cs",
+ srst_2 = "00000000b25e0000h",
+ srstu_2 = "00000000b9be0000h",
+ sar_2 = "00000000b24e0000h",
+@@ -868,22 +868,22 @@ sfasr_2 = "00000000b3850000h",
+ spm_2 = "000000000000400g",
+ ssar_2 = "00000000b2250000h",
+ ssair_2 = "00000000b99f0000h",
+-slda_4 = "000000008f000000q",
+-sldl_4 = "000000008d000000q",
+-sla_4 = "000000008b000000q",
+-slak_5 = "0000eb00000000dds",
+-slag_5 = "0000eb000000000bs",
+-sll_4 = "0000000089000000q",
+-sllk_5 = "0000eb00000000dfs",
+-sllg_5 = "0000eb000000000ds",
+-srda_4 = "000000008e000000q",
+-srdl_4 = "000000008c000000q",
+-sra_4 = "000000008a000000q",
+-srak_5 = "0000eb00000000dcs",
+-srag_5 = "0000eb000000000as",
+-srl_4 = "0000000088000000q",
+-srlk_5 = "0000eb00000000des",
+-srlg_5 = "0000eb000000000cs",
++slda_3 = "000000008f000000q",
++sldl_3 = "000000008d000000q",
++sla_3 = "000000008b000000q",
++slak_3 = "0000eb00000000dds",
++slag_3 = "0000eb000000000bs",
++sll_3 = "0000000089000000q",
++sllk_3 = "0000eb00000000dfs",
++sllg_3 = "0000eb000000000ds",
++srda_3 = "000000008e000000q",
++srdl_3 = "000000008c000000q",
++sra_3 = "000000008a000000q",
++srak_3 = "0000eb00000000dcs",
++srag_3 = "0000eb000000000as",
++srl_3 = "0000000088000000q",
++srlk_3 = "0000eb00000000des",
++srlg_3 = "0000eb000000000cs",
+ sqxbr_2 = "00000000b3160000h",
+ sqxr_2 = "00000000b3360000h",
+ sqdbr_2 = "00000000b3150000h",
+@@ -891,79 +891,79 @@ sqdr_2 = "00000000b2440000h",
+ sqebr_2 = "00000000b3140000h",
+ sqer_2 = "00000000b2450000h",
+ st_2 = "0000000050000000j",
+-sty_5 = "0000e30000000050l",
+-stg_5 = "0000e30000000024l",
+-std_4 = "0000000060000000j",
+-stdy_5 = "0000ed0000000067l",
+-ste_4 = "0000000070000000j",
+-stey_5 = "0000ed0000000066l",
+-stam_4 = "000000009b000000q",
+-stamy_5 = "0000eb000000009bs",
+-stc_4 = "0000000042000000j",
+-stcy_5 = "0000e30000000072l",
+-stch_5 = "0000e300000000c3l",
+-stcmh_5 = "0000eb000000002ct",
+-stcm_4 = "00000000be000000r",
+-stcmy_5 = "0000eb000000002dt",
+-stctl_4 = "00000000b6000000q",
+-stctg_5 = "0000eb0000000025s",
+-sth_4 = "0000000040000000j",
+-sthy_5 = "0000e30000000070l",
+-sthh_5 = "0000e300000000c7l",
+-sthrl_3 = "0000c40700000000o",
+-stfh_5 = "0000e300000000cbl",
+-stm_4 = "0000000090000000q",
+-stmy_5 = "0000eb0000000090s",
+-stmg_5 = "0000eb0000000024s",
+-stmh_5 = "0000eb0000000026s",
+-stoc_5 = "0000eb00000000f3t",
+-stocg_5 = "0000eb00000000e3t",
+-stpq_5 = "0000e3000000008el",
+-strl_3 = "0000c40f00000000o",
+-stgrl_3 = "0000c40b00000000o",
+-strvh_5 = "0000e3000000003fl",
+-strv_5 = "0000e3000000003el",
+-strvg_5 = "0000e3000000002fl",
++sty_2 = "0000e30000000050l",
++stg_2 = "0000e30000000024l",
++std_2 = "0000000060000000j",
++stdy_2 = "0000ed0000000067l",
++ste_2 = "0000000070000000j",
++stey_2 = "0000ed0000000066l",
++stam_3 = "000000009b000000q",
++stamy_3 = "0000eb000000009bs",
++stc_2 = "0000000042000000j",
++stcy_2 = "0000e30000000072l",
++stch_2 = "0000e300000000c3l",
++stcmh_3 = "0000eb000000002ct",
++stcm_3 = "00000000be000000r",
++stcmy_3 = "0000eb000000002dt",
++stctl_3 = "00000000b6000000q",
++stctg_3 = "0000eb0000000025s",
++sth_2 = "0000000040000000j",
++sthy_2 = "0000e30000000070l",
++sthh_2 = "0000e300000000c7l",
++sthrl_2 = "0000c40700000000o",
++stfh_2 = "0000e300000000cbl",
++stm_3 = "0000000090000000q",
++stmy_3 = "0000eb0000000090s",
++stmg_3 = "0000eb0000000024s",
++stmh_3 = "0000eb0000000026s",
++stoc_3 = "0000eb00000000f3t",
++stocg_3 = "0000eb00000000e3t",
++stpq_2 = "0000e3000000008el",
++strl_2 = "0000c40f00000000o",
++stgrl_2 = "0000c40b00000000o",
++strvh_2 = "0000e3000000003fl",
++strv_2 = "0000e3000000003el",
++strvg_2 = "0000e3000000002fl",
+ stura_2 = "00000000b2460000h",
+ sturg_2 = "00000000b9250000h",
+-s_4 = "000000005b000000j",
++s_2 = "000000005b000000j",
+ sr_2 = "0000000000001b00g",
+-sy_5 = "0000e3000000005bl",
+-sg_5 = "0000e30000000009l",
++sy_2 = "0000e3000000005bl",
++sg_2 = "0000e30000000009l",
+ sgr_2 = "00000000b9090000h",
+-sgf_5 = "0000e30000000019l",
++sgf_2 = "0000e30000000019l",
+ sgfr_2 = "00000000b9190000h",
+ sxbr_2 = "00000000b34b0000h",
+ sdbr_2 = "00000000b31b0000h",
+ sebr_2 = "00000000b30b0000h",
+-sh_4 = "000000004b000000j",
+-shy_5 = "0000e3000000007bl",
+-sl_4 = "000000005f000000j",
++sh_2 = "000000004b000000j",
++shy_2 = "0000e3000000007bl",
++sl_2 = "000000005f000000j",
+ slr_2 = "0000000000001f00g",
+-sly_5 = "0000e3000000005fl",
+-slg_5 = "0000e3000000000bl",
++sly_2 = "0000e3000000005fl",
++slg_2 = "0000e3000000000bl",
+ slgr_2 = "00000000b90b0000h",
+-slgf_5 = "0000e3000000001bl",
++slgf_2 = "0000e3000000001bl",
+ slgfr_2 = "00000000b91b0000h",
+-slfi_3 = "0000c20500000000n",
+-slgfi_3 = "0000c20400000000n",
+-slb_5 = "0000e30000000099l",
++slfi_2 = "0000c20500000000n",
++slgfi_2 = "0000c20400000000n",
++slb_2 = "0000e30000000099l",
+ slbr_2 = "00000000b9990000h",
+-slbg_5 = "0000e30000000089l",
++slbg_2 = "0000e30000000089l",
+ slbgr_2 = "00000000b9890000h",
+ sxr_2 = "0000000000003700g",
+-sd_4 = "000000006b000000j",
++sd_2 = "000000006b000000j",
+ sdr_2 = "0000000000002b00g",
+-se_4 = "000000007b000000j",
++se_2 = "000000007b000000j",
+ ser_2 = "0000000000003b00g",
+-su_4 = "000000007f000000j",
++su_2 = "000000007f000000j",
+ sur_2 = "0000000000003f00g",
+-sw_4 = "000000006f000000j",
++sw_2 = "000000006f000000j",
+ swr_2 = "0000000000002f00g",
+ tar_2 = "00000000b24c0000h",
+ tb_2 = "00000000b22c0000h",
+-trace_4 = "0000000099000000q",
+-tracg_5 = "0000eb000000000fs",
++trace_3 = "0000000099000000q",
++tracg_3 = "0000eb000000000fs",
+ tre_2 = "00000000b2a50000h",
+ }
+ for cond,c in pairs(map_cond) do
+
+From c71a6189bb91c9ebcffc2d26192cf9f899832f5e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 1 Dec 2016 14:42:42 -0500
+Subject: [PATCH 047/260] Fix indentation.
+
+I miss gofmt.
+---
+ dynasm/dasm_s390x.lua | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 467e21828..2ee949300 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1029,16 +1029,16 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "n" then
+
+ elseif p == "y" then
+- local d, x, b, a = parse_mem_bx(params[1])
+- op1 = op1 + x
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2);
+- if a then
+- werror("disp12 actions not yet implemented")
+- end
++ local d, x, b, a = parse_mem_bx(params[1])
++ op1 = op1 + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2);
++ if a then
++ werror("disp12 actions not yet implemented")
++ end
+ elseif p == "z" then
+- op2 = op2 + parse_gpr(params[1])
+- wputhw(op2)
++ op2 = op2 + parse_gpr(params[1])
++ wputhw(op2)
+ end
+ end
+
+
+From 77f283c328b45f65656075443757522a860a9910 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 1 Dec 2016 17:09:45 -0500
+Subject: [PATCH 048/260] Allow symbols to be used for 12-bit displacements.
+
+The parse_mem_bx function now returns a function to call to add an
+action to the action list to handle the evaluation of the
+displacement. This allows us to delay adding said action until
+after we have emitted the actions for the instruction encodings
+themselves.
+
+Code like this should now work:
+
+int x = 24
+| st r1, x(sp)
+---
+ dynasm/dasm_s390x.h | 10 +++++++---
+ dynasm/dasm_s390x.lua | 19 ++++++++++---------
+ 2 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 837a2ed0d..8b43a78cd 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -234,8 +234,10 @@ void dasm_put(Dst_DECL, int start, ...)
+ case DASM_IMM16:
+ case DASM_IMM32:
+ case DASM_DISP20:
+- case DASM_DISP12:
+ fprintf(stderr, "not implemented\n");
++ case DASM_DISP12:
++ CK((n>>12) == 0, RANGE_I);
++ b[pos++] = n;
+ break;
+ }
+ }
+@@ -296,7 +298,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+ case DASM_IMM32:
+ case DASM_DISP20:
+ case DASM_DISP12:
+- fprintf(stderr, "not implemented\n");
++ pos++;
+ break;
+ }
+ }
+@@ -364,8 +366,10 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_IMM16:
+ case DASM_IMM32:
+ case DASM_DISP20:
+- case DASM_DISP12:
+ fprintf(stderr, "not implemented\n");
++ break;
++ case DASM_DISP12:
++ cp[-1] |= n&0xfff;
+ break;
+ default: *cp++ = ins; break;
+ }
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 2ee949300..b3061653a 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -316,6 +316,8 @@ end
+
+ -- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
+ -- are GPRs.
++-- If the fourth return value is not-nil then it needs to be called to
++-- insert an action.
+ -- Encoded as: xbddd
+ local function parse_mem_bx(arg)
+ local d, x, b = split_memop(arg)
+@@ -326,11 +328,10 @@ local function parse_mem_bx(arg)
+ end
+ return dval, x, b, nil
+ end
+- -- TODO: handle d being a symbol.
+- -- Action is currently the final return value (the caller needs to add it
+- -- to the action list at a later point).
+- werror("parse_mem_bx: not implemented")
+- return nil
++ if match(d, "^[rf]1?[0-9]?") then
++ werror("expected immediate operand, got register")
++ end
++ return 0, x, b, function() waction("DISP12", nil, d) end
+ end
+
+ -- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
+@@ -1018,7 +1019,7 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+ if a then
+- werror("disp12 actions not yet implemented")
++ a()
+ end
+ elseif p == "k" then
+
+@@ -1034,7 +1035,7 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+ if a then
+- werror("disp12 actions not yet implemented")
++ a()
+ end
+ elseif p == "z" then
+ op2 = op2 + parse_gpr(params[1])
+@@ -1046,8 +1047,8 @@ end
+ function op_template(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+ -- Limit number of section buffer positions used by a single dasm_put().
+- -- A single opcode needs a maximum of 3 positions.
+- if secpos+3 > maxsecpos then wflush() end
++ -- A single opcode needs a maximum of 5 positions.
++ if secpos+5 > maxsecpos then wflush() end
+ local lpos, apos, spos = #actlist, #actargs, secpos
+ local ok, err
+ for t in gmatch(template, "[^|]+") do
+
+From 6ae327df75bef4bcaba56f24639b09d9b2645982 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 1 Dec 2016 19:25:32 -0500
+Subject: [PATCH 049/260] Add support for RXY instructions (20-bit
+ displacements).
+
+---
+ dynasm/dasm_s390x.h | 10 ++++++++--
+ dynasm/dasm_s390x.lua | 23 ++++++++++++++++++++---
+ 2 files changed, 28 insertions(+), 5 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 8b43a78cd..66dfd79a1 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -233,8 +233,11 @@ void dasm_put(Dst_DECL, int start, ...)
+ break;
+ case DASM_IMM16:
+ case DASM_IMM32:
+- case DASM_DISP20:
+ fprintf(stderr, "not implemented\n");
++ case DASM_DISP20:
++ CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
++ b[pos++] = n;
++ break;
+ case DASM_DISP12:
+ CK((n>>12) == 0, RANGE_I);
+ b[pos++] = n;
+@@ -365,9 +368,12 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_LABEL_PC: break;
+ case DASM_IMM16:
+ case DASM_IMM32:
+- case DASM_DISP20:
+ fprintf(stderr, "not implemented\n");
+ break;
++ case DASM_DISP20:
++ cp[-2] |= n&0xfff;
++ cp[-1] |= (n>>4)&0xff00;
++ break;
+ case DASM_DISP12:
+ cp[-1] |= n&0xfff;
+ break;
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index b3061653a..6900944b0 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -346,8 +346,18 @@ end
+ -- and b and x are GPRs.
+ -- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
+ local function parse_mem_bxy(arg)
+- werror("parse_mem_bxy: not implemented")
+- return nil
++ local d, x, b = split_memop(arg)
++ local dval = tonumber(d)
++ if dval then
++ if not is_int20(dval) then
++ werror("displacement out of range: ", dval)
++ end
++ return dval, x, b, nil
++ end
++ if match(d, "^[rf]1?[0-9]?") then
++ werror("expected immediate operand, got register")
++ end
++ return 0, x, b, function() waction("DISP20", nil, d) end
+ end
+
+ local function parse_label(label, def)
+@@ -1024,7 +1034,14 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "k" then
+
+ elseif p == "l" then
+-
++ local d, x, b, a = parse_mem_bxy(params[2])
++ op0 = op0 + shl(parse_gpr(params[1]), 4) + x
++ op1 = op1 + shl(b, 12) + band(d, 0xfff)
++ op2 = op2 + band(shr(d, 4), 0xff00)
++ wputhw(op0); wputhw(op1); wputhw(op2)
++ if a then
++ a()
++ end
+ elseif p == "m" then
+
+ elseif p == "n" then
+
+From 1b7ded54749bf76a4dc44ff3da309da131925b4f Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 1 Dec 2016 19:45:06 -0500
+Subject: [PATCH 050/260] Add support for RS-a and RSY-a instructions like stm
+ and stmg.
+
+---
+ dynasm/dasm_s390x.lua | 57 ++++++++++++++++++++++++++-----------------
+ 1 file changed, 35 insertions(+), 22 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 6900944b0..039681b49 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -307,13 +307,6 @@ local function split_memop(arg)
+ return nil
+ end
+
+--- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
+--- Encoded as: bddd
+-local function parse_mem_b(arg)
+- werror("parse_mem_b: not implemented")
+- return nil
+-end
+-
+ -- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
+ -- are GPRs.
+ -- If the fourth return value is not-nil then it needs to be called to
+@@ -334,12 +327,14 @@ local function parse_mem_bx(arg)
+ return 0, x, b, function() waction("DISP12", nil, d) end
+ end
+
+--- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
+--- b is a GPR.
+--- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
+-local function parse_mem_by(arg)
+- werror("parse_mem_by: not implemented")
+- return nil
++-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
++-- Encoded as: bddd
++local function parse_mem_b(arg)
++ local d, x, b, a = parse_mem_bx(arg)
++ if x ~= 0 then
++ werror("unexpected index register")
++ end
++ return d, b, a
+ end
+
+ -- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
+@@ -360,6 +355,17 @@ local function parse_mem_bxy(arg)
+ return 0, x, b, function() waction("DISP20", nil, d) end
+ end
+
++-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
++-- b is a GPR.
++-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
++local function parse_mem_by(arg)
++ local d, x, b, a = parse_mem_bxy(arg)
++ if x ~= 0 then
++ werror("unexpected index register")
++ end
++ return d, b, a
++end
++
+ local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+@@ -1028,9 +1034,7 @@ local function parse_template(params, template, nparams, pos)
+ op1 = op1 + shl(parse_gpr(params[1]), 4) + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+- if a then
+- a()
+- end
++ if a then a() end
+ elseif p == "k" then
+
+ elseif p == "l" then
+@@ -1039,21 +1043,30 @@ local function parse_template(params, template, nparams, pos)
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+- if a then
+- a()
+- end
++ if a then a() end
+ elseif p == "m" then
+
+ elseif p == "n" then
+
++ elseif p == "q" then
++ local d, b, a = parse_mem_b(params[3])
++ op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2)
++ if a then a() end
++ elseif p == "s" then
++ local d, b, a = parse_mem_by(params[3])
++ op0 = op0 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
++ op1 = op1 + shl(b, 12) + band(d, 0xfff)
++ op2 = op2 + band(shr(d, 4), 0xff00)
++ wputhw(op0); wputhw(op1); wputhw(op2)
++ if a then a() end
+ elseif p == "y" then
+ local d, x, b, a = parse_mem_bx(params[1])
+ op1 = op1 + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+- if a then
+- a()
+- end
++ if a then a() end
+ elseif p == "z" then
+ op2 = op2 + parse_gpr(params[1])
+ wputhw(op2)
+
+From 4c7e494e0aac66011bfb149c0a71c8acd0ae9c4d Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 2 Dec 2016 12:55:43 +0530
+Subject: [PATCH 051/260] Added support for Immediate addressing mode
+
+Adding support for Immediate add mode, need to check how 32 bits is returned, currently followed the displacement method.
+---
+ dynasm/dasm_s390x.lua | 16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 039681b49..c2c5a79a3 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -289,6 +289,10 @@ local function is_int20(num)
+ return -shl(1, 19) <= num and num < shl(1, 19)
+ end
+
++local function is_int32(num)
++ return -shl(1,31) <= num and num <shl(1,31)
++end
++
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+ -- If x is not specified then it is 0.
+ local function split_memop(arg)
+@@ -366,6 +370,14 @@ local function parse_mem_by(arg)
+ return d, b, a
+ end
+
++local function parse_imm(arg)
++ local imm_val = tonumber(arg,16)
++ if not is_int32(imm_val) then
++ werror("Immediate value out of range: ", imm_val)
++ end
++ return imm_val
++end
++
+ local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+@@ -1047,7 +1059,9 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "m" then
+
+ elseif p == "n" then
+-
++ op0 = op0 + shl(parse_gpr(params[1], 4)
++ local imm = parse_imm(param[2])
++ wputhw(op0); waction("IMM32", nil, imm)
+ elseif p == "q" then
+ local d, b, a = parse_mem_b(params[3])
+ op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
+
+From 45553891da72cccd6c5504b91ffed0045beef885 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 2 Dec 2016 14:13:55 +0530
+Subject: [PATCH 052/260] Minor change , missed out brace
+
+---
+ dynasm/dasm_s390x.lua | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index c2c5a79a3..536f51724 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1059,7 +1059,7 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "m" then
+
+ elseif p == "n" then
+- op0 = op0 + shl(parse_gpr(params[1], 4)
++ op0 = op0 + shl(parse_gpr(params[1]), 4)
+ local imm = parse_imm(param[2])
+ wputhw(op0); waction("IMM32", nil, imm)
+ elseif p == "q" then
+
+From 1d960f228643d223a67069786cecdd71b49dcd6d Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Fri, 2 Dec 2016 15:21:18 +0530
+Subject: [PATCH 053/260] Create test_z_inst.c
+
+Added examples folder
+Added test code to test basic instructions like add , sub and msr
+This code is in processes of further expansion and tuning
+---
+ dynasm/Examples/test_z_inst.c | 80 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 80 insertions(+)
+ create mode 100644 dynasm/Examples/test_z_inst.c
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+new file mode 100644
+index 000000000..314ea0c34
+--- /dev/null
++++ b/dynasm/Examples/test_z_inst.c
+@@ -0,0 +1,80 @@
++#include <assert.h>
++#include <stdio.h>
++#include <sys/mman.h>
++
++#include "../dynasm/dasm_proto.h"
++#include "../dynasm/dasm_s390x.h"
++
++//DynASM directives.
++ |.arch s390x
++ |.actionlist actions
++
++/* Instructio modes
++ mode 0 : RR Mode
++ mode 1 : I Mode
++*/
++
++void *jitcode(dasm_State **state);
++void add(dasm_State * , int);
++void sub(dasm_State * , int);
++void mul(dasm_State * , int);
++
++void *jitcode(dasm_State **state)
++{
++ size_t size;
++ int dasm_status = dasm_link(state, &size);
++ assert(dasm_status == DASM_S_OK);
++
++ void *ret = (int *)calloc(10,sizeof(int));
++ dasm_encode(state, ret);
++ dasm_free(state);
++
++ return (int *)ret;
++}
++
++void add(dasm_State *state)
++{
++ dasm_State ** Dst = &state;
++
++ | ar r2,r3
++ | br r14
++}
++
++void sub(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | sr r2,r3
++ | br r14
++}
++
++void mul(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | msr r2 , r3
++ | br r14
++}
++
++void main(int argc, char *argv[])
++{
++ dasm_State *state;
++ dasm_State **Dst = &state;
++ int num1 , num2;
++ int *ret;
++ size_t size;
++
++ int* (*fptr)(int , int) = jitcode(&state);
++
++ num1 = atoi(argv[1]);
++ num2 = atoi(argv[2]);
++
++ dasm_init(&state, 1);
++ dasm_setup(&state, actions);
++
++ /* Call respective test function */
++ sub(state);
++
++ ret = fptr(num1 , num2);
++ printf("The value is %d\n" ,ret);
++}
+
+From f0cc29436c7bab9027352edfd03ccaba0af9f8a9 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Fri, 2 Dec 2016 15:46:45 +0530
+Subject: [PATCH 054/260] Update test_z_inst.c
+
+added functionality to test different modes of same instruction type
+---
+ dynasm/Examples/test_z_inst.c | 36 +++++++++++++++++++++++++++--------
+ 1 file changed, 28 insertions(+), 8 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 314ea0c34..65ca39ac2 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -32,15 +32,35 @@ void *jitcode(dasm_State **state)
+ return (int *)ret;
+ }
+
+-void add(dasm_State *state)
++void add(dasm_State *state , int mode)
+ {
+ dasm_State ** Dst = &state;
+-
+- | ar r2,r3
+- | br r14
++
++ switch(mode)
++ {
++ /* Case RR instruction mode */
++ case 0:
++ {
++ | ar r2,r3
++ | br r14
++ break;
++ }
++ /* Case RIL instruction mode */
++ case 1:
++ {
++ | ar r2,0x16
++ | br r14
++ break;
++ }
++ default:
++ {
++ printf( " Mode not recognised \n ");
++ break;
++ }
++ }
+ }
+
+-void sub(dasm_State *state)
++void sub(dasm_State *state , int mode)
+ {
+ dasm_State **Dst = &state;
+
+@@ -48,7 +68,7 @@ void sub(dasm_State *state)
+ | br r14
+ }
+
+-void mul(dasm_State *state)
++void mul(dasm_State *state, int mode)
+ {
+ dasm_State **Dst = &state;
+
+@@ -73,8 +93,8 @@ void main(int argc, char *argv[])
+ dasm_setup(&state, actions);
+
+ /* Call respective test function */
+- sub(state);
++ add(state , 0);
+
+ ret = fptr(num1 , num2);
+- printf("The value is %d\n" ,ret);
++ printf("Result is %d\n" ,ret);
+ }
+
+From b97a7f7b44fd0643d2032cd66362b1476e5eabb6 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 2 Dec 2016 17:19:29 +0530
+Subject: [PATCH 055/260] Minor cleanup and modified 32 bit signed check
+
+Modified 32 bit signed check for the immediate value
+---
+ dynasm/dasm_s390x.lua | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 536f51724..76d770e79 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -290,7 +290,7 @@ local function is_int20(num)
+ end
+
+ local function is_int32(num)
+- return -shl(1,31) <= num and num <shl(1,31)
++ return -2147483648 <= num and num < 2147483648
+ end
+
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+@@ -1060,7 +1060,7 @@ local function parse_template(params, template, nparams, pos)
+
+ elseif p == "n" then
+ op0 = op0 + shl(parse_gpr(params[1]), 4)
+- local imm = parse_imm(param[2])
++ local imm = parse_imm(params[2])
+ wputhw(op0); waction("IMM32", nil, imm)
+ elseif p == "q" then
+ local d, b, a = parse_mem_b(params[3])
+
+From f0dd40dc50b626bf58acdba70b65572c11485dce Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 2 Dec 2016 17:37:20 +0530
+Subject: [PATCH 056/260] Adding support for Immediate add mode
+
+The masking in immediate mode might not be proper. I could understand that you had masked 12bits and then 8bits to get the displacement in place for 20-bit displacement ( cp[-2] |= n&0xfff; cp[-1] |= (n>>4)&0xff00;) But in my case I need all the 32bits, so not sure how to go about it. Currently I have just used "n" since no point in "and with 0xffff" But I am getting core dump. Please Let me know your comments on these.
+---
+ dynasm/dasm_s390x.h | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 66dfd79a1..6314ff805 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -233,7 +233,10 @@ void dasm_put(Dst_DECL, int start, ...)
+ break;
+ case DASM_IMM16:
+ case DASM_IMM32:
+- fprintf(stderr, "not implemented\n");
++ CK((n>>32) == 0, RANGE_I);
++ b[pos++]=n;
++ break;
++ //fprintf(stderr, "not implemented\n");
+ case DASM_DISP20:
+ CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
+ b[pos++] = n;
+@@ -368,7 +371,9 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_LABEL_PC: break;
+ case DASM_IMM16:
+ case DASM_IMM32:
+- fprintf(stderr, "not implemented\n");
++ //pintf(stderr, "not implemented\n");
++ cp[-1] |= n
++ cp[-2] |= n
+ break;
+ case DASM_DISP20:
+ cp[-2] |= n&0xfff;
+
+From 3d5c692e13c38ea1d09e562e3f8dd5e6157dd217 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 2 Dec 2016 17:39:00 +0530
+Subject: [PATCH 057/260] Minor change: Cleanup
+
+---
+ dynasm/dasm_s390x.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 6314ff805..ccfe98f12 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -373,7 +373,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_IMM32:
+ //pintf(stderr, "not implemented\n");
+ cp[-1] |= n
+- cp[-2] |= n
++ cp[-2] |= (n >>4)
+ break;
+ case DASM_DISP20:
+ cp[-2] |= n&0xfff;
+
+From 621ae87058da541506e0b43240352e09d76ddd60 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 13:06:03 -0500
+Subject: [PATCH 058/260] Cleanup and fix compilation.
+
+---
+ dynasm/dasm_s390x.h | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index ccfe98f12..d3039815a 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -232,11 +232,14 @@ void dasm_put(Dst_DECL, int start, ...)
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM16:
++ ofs += 2;
++ fprintf(stderr, "DASM_IMM16 not implemented\n");
++ break;
+ case DASM_IMM32:
++ ofs += 4;
+ CK((n>>32) == 0, RANGE_I);
+ b[pos++]=n;
+ break;
+- //fprintf(stderr, "not implemented\n");
+ case DASM_DISP20:
+ CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
+ b[pos++] = n;
+@@ -370,10 +373,11 @@ int dasm_encode(Dst_DECL, void *buffer)
+ break;
+ case DASM_LABEL_PC: break;
+ case DASM_IMM16:
++ fprintf(stderr, "DASM_IMM16 not implemented\n");
++ break;
+ case DASM_IMM32:
+- //pintf(stderr, "not implemented\n");
+- cp[-1] |= n
+- cp[-2] |= (n >>4)
++ *cp++ = n >> 16;
++ *cp++ = n;
+ break;
+ case DASM_DISP20:
+ cp[-2] |= n&0xfff;
+
+From fc2b633532dc80058d18af3a14ebf3931982a7ff Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 13:41:45 -0500
+Subject: [PATCH 059/260] Auto-format dasm_s390x.h.
+
+I did this mostly to get rid of the annoying tabs/spaces mix in this
+file. It has the side effect of forcing newlines before statements
+which I think is a better style (and not particularly inconsistent
+with the original which used both styles). Other than that I've tried
+to match the original style as closely as possible.
+
+Generated with this command:
+
+indent -i2 -brs -cli0 -br -ce -npcs -nbc -di1 -npsl -ncs dasm_s390x.h
+---
+ dynasm/dasm_s390x.h | 269 +++++++++++++++++++++++++++-----------------
+ 1 file changed, 168 insertions(+), 101 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index d3039815a..a5daaa354 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -21,7 +21,9 @@ enum {
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+- DASM_REL_PC, DASM_LABEL_PC, DASM_DISP12, DASM_DISP20, DASM_IMM16, DASM_IMM32,
++ DASM_REL_PC, DASM_LABEL_PC,
++ DASM_DISP12, DASM_DISP20,
++ DASM_IMM16, DASM_IMM32,
+ DASM__MAX
+ };
+
+@@ -53,12 +55,12 @@ typedef const unsigned short *dasm_ActList;
+
+ /* Per-section structure. */
+ typedef struct dasm_Section {
+- int *rbuf; /* Biased buffer pointer (negative section bias). */
+- int *buf; /* True buffer pointer. */
+- size_t bsize; /* Buffer size in bytes. */
+- int pos; /* Biased buffer position. */
+- int epos; /* End of biased buffer position - max single put. */
+- int ofs; /* Byte offset into section. */
++ int *rbuf; /* Biased buffer pointer (negative section bias). */
++ int *buf; /* True buffer pointer. */
++ size_t bsize; /* Buffer size in bytes. */
++ int pos; /* Biased buffer position. */
++ int epos; /* End of biased buffer position - max single put. */
++ int ofs; /* Byte offset into section. */
+ } dasm_Section;
+
+ /* Core structure holding the DynASM encoding state. */
+@@ -98,10 +100,10 @@ void dasm_init(Dst_DECL, int maxsection)
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+- D->sections[i].buf = NULL; /* Need this for pass3. */
++ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+- D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
++ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+ }
+
+@@ -113,8 +115,10 @@ void dasm_free(Dst_DECL)
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+- if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+- if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
++ if (D->pclabels)
++ DASM_M_FREE(Dst, D->pclabels, D->pcsize);
++ if (D->lglabels)
++ DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+ }
+
+@@ -122,8 +126,8 @@ void dasm_free(Dst_DECL)
+ void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+ {
+ dasm_State *D = Dst_REF;
+- D->globals = gl - 10; /* Negative bias to compensate for locals. */
+- DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
++ D->globals = gl - 10; /* Negative bias to compensate for locals. */
++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
+ }
+
+ /* Grow PC label array. Can be called after dasm_setup(), too. */
+@@ -131,8 +135,8 @@ void dasm_growpc(Dst_DECL, unsigned int maxpc)
+ {
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+- DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+- memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int));
++ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
+ }
+
+ /* Setup encoder. */
+@@ -140,11 +144,12 @@ void dasm_setup(Dst_DECL, const void *actionlist)
+ {
+ dasm_State *D = Dst_REF;
+ int i;
+- D->actionlist = (dasm_ActList)actionlist;
++ D->actionlist = (dasm_ActList) actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+- if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
++ if (D->pclabels)
++ memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+@@ -176,9 +181,10 @@ void dasm_put(Dst_DECL, int start, ...)
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+- sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
++ sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+- sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
++ sec->epos =
++ (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+@@ -193,60 +199,84 @@ void dasm_put(Dst_DECL, int start, ...)
+ } else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+- case DASM_STOP: goto stop;
++ case DASM_STOP:
++ goto stop;
+ case DASM_SECTION:
+- n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+- D->section = &D->sections[n]; goto stop;
+- case DASM_ESC: p++; ofs += 4; break;
+- case DASM_REL_EXT: break;
+- case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
++ n = (ins & 255);
++ CK(n < D->maxsection, RANGE_SEC);
++ D->section = &D->sections[n];
++ goto stop;
++ case DASM_ESC:
++ p++;
++ ofs += 4;
++ break;
++ case DASM_REL_EXT:
++ break;
++ case DASM_ALIGN:
++ ofs += (ins & 255);
++ b[pos++] = ofs;
++ break;
+ case DASM_REL_LG:
+- n = (ins & 2047) - 10; pl = D->lglabels + n;
++ n = (ins & 2047) - 10;
++ pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+- if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+- pl += 10; n = *pl;
+- if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
++ if (n >= 0) {
++ CK(n >= 10 || *pl < 0, RANGE_LG);
++ CKPL(lg, LG);
++ goto putrel;
++ }
++ pl += 10;
++ n = *pl;
++ if (n < 0)
++ n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+- pl = D->pclabels + n; CKPL(pc, PC);
++ pl = D->pclabels + n;
++ CKPL(pc, PC);
+ putrel:
+ n = *pl;
+- if (n < 0) { /* Label exists. Get label pos and store it. */
++ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+- linkrel:
+- b[pos] = n; /* Else link to rel chain, anchored at label. */
++ linkrel:
++ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+- pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
++ pl = D->lglabels + (ins & 2047) - 10;
++ CKPL(lg, LG);
++ goto putlabel;
+ case DASM_LABEL_PC:
+- pl = D->pclabels + n; CKPL(pc, PC);
++ pl = D->pclabels + n;
++ CKPL(pc, PC);
+ putlabel:
+- n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+- while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
++ while (n > 0) {
++ int *pb = DASM_POS2PTR(D, n);
++ n = *pb;
++ *pb = pos;
+ }
+- *pl = -pos; /* Label exists now. */
+- b[pos++] = ofs; /* Store pass1 offset estimate. */
++ *pl = -pos; /* Label exists now. */
++ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM16:
+- ofs += 2;
+- fprintf(stderr, "DASM_IMM16 not implemented\n");
+- break;
++ ofs += 2;
++ fprintf(stderr, "DASM_IMM16 not implemented\n");
++ break;
+ case DASM_IMM32:
+- ofs += 4;
+- CK((n>>32) == 0, RANGE_I);
+- b[pos++]=n;
+- break;
++ ofs += 4;
++ CK((n >> 32) == 0, RANGE_I);
++ b[pos++] = n;
++ break;
+ case DASM_DISP20:
+- CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
+- b[pos++] = n;
+- break;
++ CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
++ b[pos++] = n;
++ break;
+ case DASM_DISP12:
+- CK((n>>12) == 0, RANGE_I);
+- b[pos++] = n;
++ CK((n >> 12) == 0, RANGE_I);
++ b[pos++] = n;
+ break;
+ }
+ }
+@@ -256,10 +286,11 @@ void dasm_put(Dst_DECL, int start, ...)
+ sec->pos = pos;
+ sec->ofs = ofs;
+ }
++
+ #undef CK
+
+ /* Pass 2: Link sections, shrink aligns, fix label offsets. */
+-int dasm_link(Dst_DECL, size_t *szp)
++int dasm_link(Dst_DECL, size_t * szp)
+ {
+ dasm_State *D = Dst_REF;
+ int secnum;
+@@ -267,20 +298,26 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ #ifdef DASM_CHECKS
+ *szp = 0;
+- if (D->status != DASM_S_OK) return D->status;
++ if (D->status != DASM_S_OK)
++ return D->status;
+ {
+ int pc;
+- for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+- if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
++ for (pc = 0; pc * sizeof(int) < D->pcsize; pc++)
++ if (D->pclabels[pc] > 0)
++ return DASM_S_UNDEF_PC | pc;
+ }
+ #endif
+
+- { /* Handle globals not defined in this translation unit. */
++ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+- while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
++ while (n > 0) {
++ int *pb = DASM_POS2PTR(D, n);
++ n = *pb;
++ *pb = -idx;
++ }
+ }
+ }
+
+@@ -297,26 +334,39 @@ int dasm_link(Dst_DECL, size_t *szp)
+ unsigned short ins = *p++;
+ unsigned short action = ins;
+ switch (action) {
+- case DASM_STOP: case DASM_SECTION: goto stop;
+- case DASM_ESC: p++; break;
+- case DASM_REL_EXT: break;
+- case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+- case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+- case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+- case DASM_IMM16:
+- case DASM_IMM32:
+- case DASM_DISP20:
+- case DASM_DISP12:
+- pos++;
++ case DASM_STOP:
++ case DASM_SECTION:
++ goto stop;
++ case DASM_ESC:
++ p++;
++ break;
++ case DASM_REL_EXT:
++ break;
++ case DASM_ALIGN:
++ ofs -= (b[pos++] + ofs) & (ins & 255);
++ break;
++ case DASM_REL_LG:
++ case DASM_REL_PC:
++ pos++;
++ break;
++ case DASM_LABEL_LG:
++ case DASM_LABEL_PC:
++ b[pos++] += ofs;
++ break;
++ case DASM_IMM16:
++ case DASM_IMM32:
++ case DASM_DISP20:
++ case DASM_DISP12:
++ pos++;
+ break;
+ }
+ }
+- stop: (void)0;
++ stop:(void)0;
+ }
+- ofs += sec->ofs; /* Next section starts right after current section. */
++ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+- D->codesize = ofs; /* Total size of all code sections */
++ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+ }
+@@ -349,13 +399,19 @@ int dasm_encode(Dst_DECL, void *buffer)
+ unsigned short action = ins;
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+- case DASM_STOP: case DASM_SECTION: goto stop;
+- case DASM_ESC: *cp++ = *p++; break;
++ case DASM_STOP:
++ case DASM_SECTION:
++ goto stop;
++ case DASM_ESC:
++ *cp++ = *p++;
++ break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
+ goto patchrel;
+ case DASM_ALIGN:
+- ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x0707;
++ ins &= 255;
++ while ((((char *)cp - base) & ins))
++ *cp++ = 0x0707;
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+@@ -364,51 +420,59 @@ int dasm_encode(Dst_DECL, void *buffer)
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+ patchrel:
+ CK((n & 3) == 0 &&
+- (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
+- ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
+- cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
++ (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
++ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
++ cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
+ break;
+ case DASM_LABEL_LG:
+- ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
++ ins &= 2047;
++ if (ins >= 20)
++ D->globals[ins - 10] = (void *)(base + n);
++ break;
++ case DASM_LABEL_PC:
+ break;
+- case DASM_LABEL_PC: break;
+- case DASM_IMM16:
+- fprintf(stderr, "DASM_IMM16 not implemented\n");
+- break;
+- case DASM_IMM32:
++ case DASM_IMM16:
++ fprintf(stderr, "DASM_IMM16 not implemented\n");
++ break;
++ case DASM_IMM32:
+ *cp++ = n >> 16;
+- *cp++ = n;
+- break;
+- case DASM_DISP20:
+- cp[-2] |= n&0xfff;
+- cp[-1] |= (n>>4)&0xff00;
+- break;
+- case DASM_DISP12:
+- cp[-1] |= n&0xfff;
++ *cp++ = n;
++ break;
++ case DASM_DISP20:
++ cp[-2] |= n & 0xfff;
++ cp[-1] |= (n >> 4) & 0xff00;
++ break;
++ case DASM_DISP12:
++ cp[-1] |= n & 0xfff;
++ break;
++ default:
++ *cp++ = ins;
+ break;
+- default: *cp++ = ins; break;
+ }
+ }
+- stop: (void)0;
++ stop:(void)0;
+ }
+ }
+
+- if (base + D->codesize != (char *)cp) /* Check for phase errors. */
++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+ }
++
+ #undef CK
+
+ /* Get PC label offset. */
+ int dasm_getpclabel(Dst_DECL, unsigned int pc)
+ {
+ dasm_State *D = Dst_REF;
+- if (pc*sizeof(int) < D->pcsize) {
++ if (pc * sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+- if (pos < 0) return *DASM_POS2PTR(D, -pos);
+- if (pos > 0) return -1; /* Undefined. */
++ if (pos < 0)
++ return *DASM_POS2PTR(D, -pos);
++ if (pos > 0)
++ return -1; /* Undefined. */
+ }
+- return -2; /* Unused or out of range. */
++ return -2; /* Unused or out of range. */
+ }
+
+ #ifdef DASM_CHECKS
+@@ -419,13 +483,16 @@ int dasm_checkstep(Dst_DECL, int secmatch)
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+- if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
++ if (D->lglabels[i] > 0) {
++ D->status = DASM_S_UNDEF_LG | i;
++ break;
++ }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+- D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
++ D->status = DASM_S_MATCH_SEC | (D->section - D->sections);
+ return D->status;
+ }
+ #endif
+
+From 2324be897e2cd55e9bf30a1be5ab67074e455c81 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 13:50:09 -0500
+Subject: [PATCH 060/260] Reduce indentation level of big switch statement.
+
+A style thing. I find it easier to read this way.
+
+i.e. do:
+
+while(1) {
+ if (blah) {
+ ...
+ continue;
+ }
+ ... // big switch statement
+}
+
+instead of:
+
+while(1) {
+ if (blah) {
+ ...
+ } else {
+ ... // big switch statement
+ }
+}
+---
+ dynasm/dasm_s390x.h | 163 ++++++++++++++++++++++----------------------
+ 1 file changed, 82 insertions(+), 81 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index a5daaa354..d8566034d 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -196,89 +196,90 @@ void dasm_put(Dst_DECL, int start, ...)
+ unsigned short action = ins;
+ if (action >= DASM__MAX) {
+ ofs += 2;
+- } else {
+- int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+- switch (action) {
+- case DASM_STOP:
+- goto stop;
+- case DASM_SECTION:
+- n = (ins & 255);
+- CK(n < D->maxsection, RANGE_SEC);
+- D->section = &D->sections[n];
+- goto stop;
+- case DASM_ESC:
+- p++;
+- ofs += 4;
+- break;
+- case DASM_REL_EXT:
+- break;
+- case DASM_ALIGN:
+- ofs += (ins & 255);
+- b[pos++] = ofs;
+- break;
+- case DASM_REL_LG:
+- n = (ins & 2047) - 10;
+- pl = D->lglabels + n;
+- /* Bkwd rel or global. */
+- if (n >= 0) {
+- CK(n >= 10 || *pl < 0, RANGE_LG);
+- CKPL(lg, LG);
+- goto putrel;
+- }
+- pl += 10;
+- n = *pl;
+- if (n < 0)
+- n = 0; /* Start new chain for fwd rel if label exists. */
+- goto linkrel;
+- case DASM_REL_PC:
+- pl = D->pclabels + n;
+- CKPL(pc, PC);
+- putrel:
+- n = *pl;
+- if (n < 0) { /* Label exists. Get label pos and store it. */
+- b[pos] = -n;
+- } else {
+- linkrel:
+- b[pos] = n; /* Else link to rel chain, anchored at label. */
+- *pl = pos;
+- }
+- pos++;
+- break;
+- case DASM_LABEL_LG:
+- pl = D->lglabels + (ins & 2047) - 10;
++ continue;
++ }
++
++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
++ switch (action) {
++ case DASM_STOP:
++ goto stop;
++ case DASM_SECTION:
++ n = (ins & 255);
++ CK(n < D->maxsection, RANGE_SEC);
++ D->section = &D->sections[n];
++ goto stop;
++ case DASM_ESC:
++ p++;
++ ofs += 4;
++ break;
++ case DASM_REL_EXT:
++ break;
++ case DASM_ALIGN:
++ ofs += (ins & 255);
++ b[pos++] = ofs;
++ break;
++ case DASM_REL_LG:
++ n = (ins & 2047) - 10;
++ pl = D->lglabels + n;
++ /* Bkwd rel or global. */
++ if (n >= 0) {
++ CK(n >= 10 || *pl < 0, RANGE_LG);
+ CKPL(lg, LG);
+- goto putlabel;
+- case DASM_LABEL_PC:
+- pl = D->pclabels + n;
+- CKPL(pc, PC);
+- putlabel:
+- n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+- while (n > 0) {
+- int *pb = DASM_POS2PTR(D, n);
+- n = *pb;
+- *pb = pos;
+- }
+- *pl = -pos; /* Label exists now. */
+- b[pos++] = ofs; /* Store pass1 offset estimate. */
+- break;
+- case DASM_IMM16:
+- ofs += 2;
+- fprintf(stderr, "DASM_IMM16 not implemented\n");
+- break;
+- case DASM_IMM32:
+- ofs += 4;
+- CK((n >> 32) == 0, RANGE_I);
+- b[pos++] = n;
+- break;
+- case DASM_DISP20:
+- CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
+- b[pos++] = n;
+- break;
+- case DASM_DISP12:
+- CK((n >> 12) == 0, RANGE_I);
+- b[pos++] = n;
+- break;
++ goto putrel;
++ }
++ pl += 10;
++ n = *pl;
++ if (n < 0)
++ n = 0; /* Start new chain for fwd rel if label exists. */
++ goto linkrel;
++ case DASM_REL_PC:
++ pl = D->pclabels + n;
++ CKPL(pc, PC);
++ putrel:
++ n = *pl;
++ if (n < 0) { /* Label exists. Get label pos and store it. */
++ b[pos] = -n;
++ } else {
++ linkrel:
++ b[pos] = n; /* Else link to rel chain, anchored at label. */
++ *pl = pos;
+ }
++ pos++;
++ break;
++ case DASM_LABEL_LG:
++ pl = D->lglabels + (ins & 2047) - 10;
++ CKPL(lg, LG);
++ goto putlabel;
++ case DASM_LABEL_PC:
++ pl = D->pclabels + n;
++ CKPL(pc, PC);
++ putlabel:
++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
++ while (n > 0) {
++ int *pb = DASM_POS2PTR(D, n);
++ n = *pb;
++ *pb = pos;
++ }
++ *pl = -pos; /* Label exists now. */
++ b[pos++] = ofs; /* Store pass1 offset estimate. */
++ break;
++ case DASM_IMM16:
++ ofs += 2;
++ fprintf(stderr, "DASM_IMM16 not implemented\n");
++ break;
++ case DASM_IMM32:
++ ofs += 4;
++ CK((n >> 32) == 0, RANGE_I);
++ b[pos++] = n;
++ break;
++ case DASM_DISP20:
++ CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
++ b[pos++] = n;
++ break;
++ case DASM_DISP12:
++ CK((n >> 12) == 0, RANGE_I);
++ b[pos++] = n;
++ break;
+ }
+ }
+ stop:
+
+From 7181c391bd50cbe91b0a8ba8783efa716354c77d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 14:20:59 -0500
+Subject: [PATCH 061/260] Add C code to handle IMM16.
+
+---
+ dynasm/dasm_s390x.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index d8566034d..12b8b2bd9 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -264,12 +264,12 @@ void dasm_put(Dst_DECL, int start, ...)
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM16:
++ CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
+ ofs += 2;
+- fprintf(stderr, "DASM_IMM16 not implemented\n");
++ b[pos++] = n;
+ break;
+ case DASM_IMM32:
+ ofs += 4;
+- CK((n >> 32) == 0, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_DISP20:
+@@ -433,7 +433,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_LABEL_PC:
+ break;
+ case DASM_IMM16:
+- fprintf(stderr, "DASM_IMM16 not implemented\n");
++ *cp++ = n;
+ break;
+ case DASM_IMM32:
+ *cp++ = n >> 16;
+
+From 3ec573e750fca863ee46c699431852155761b507 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 14:35:33 -0500
+Subject: [PATCH 062/260] Add support for .align directive.
+
+---
+ dynasm/dasm_s390x.h | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 12b8b2bd9..d505b381c 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -215,7 +215,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ case DASM_REL_EXT:
+ break;
+ case DASM_ALIGN:
+- ofs += (ins & 255);
++ ofs += *p++;
+ b[pos++] = ofs;
+ break;
+ case DASM_REL_LG:
+@@ -344,7 +344,7 @@ int dasm_link(Dst_DECL, size_t * szp)
+ case DASM_REL_EXT:
+ break;
+ case DASM_ALIGN:
+- ofs -= (b[pos++] + ofs) & (ins & 255);
++ ofs -= (b[pos++] + ofs) & *p++;
+ break;
+ case DASM_REL_LG:
+ case DASM_REL_PC:
+@@ -410,9 +410,10 @@ int dasm_encode(Dst_DECL, void *buffer)
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
+ goto patchrel;
+ case DASM_ALIGN:
+- ins &= 255;
++ ins = *p++;
++ /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
+ while ((((char *)cp - base) & ins))
+- *cp++ = 0x0707;
++ *cp++ = 0x0700; /* nop */
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+
+From 1dd736f09abe20053a751b0eb0c7d615226d1052 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 15:14:37 -0500
+Subject: [PATCH 063/260] Auto-format dasm_s390x.h (again).
+
+This time explicitly ban tabs.
+
+indent -i2 -brs -cli0 -br -ce -npcs -nbc -di1 -npsl -ncs -nut dasm_s390x.h
+---
+ dynasm/dasm_s390x.h | 262 ++++++++++++++++++++++----------------------
+ 1 file changed, 131 insertions(+), 131 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index d505b381c..8cd2fc25e 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -55,28 +55,28 @@ typedef const unsigned short *dasm_ActList;
+
+ /* Per-section structure. */
+ typedef struct dasm_Section {
+- int *rbuf; /* Biased buffer pointer (negative section bias). */
+- int *buf; /* True buffer pointer. */
+- size_t bsize; /* Buffer size in bytes. */
+- int pos; /* Biased buffer position. */
+- int epos; /* End of biased buffer position - max single put. */
+- int ofs; /* Byte offset into section. */
++ int *rbuf; /* Biased buffer pointer (negative section bias). */
++ int *buf; /* True buffer pointer. */
++ size_t bsize; /* Buffer size in bytes. */
++ int pos; /* Biased buffer position. */
++ int epos; /* End of biased buffer position - max single put. */
++ int ofs; /* Byte offset into section. */
+ } dasm_Section;
+
+ /* Core structure holding the DynASM encoding state. */
+ struct dasm_State {
+- size_t psize; /* Allocated size of this structure. */
+- dasm_ActList actionlist; /* Current actionlist pointer. */
+- int *lglabels; /* Local/global chain/pos ptrs. */
++ size_t psize; /* Allocated size of this structure. */
++ dasm_ActList actionlist; /* Current actionlist pointer. */
++ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+- int *pclabels; /* PC label chains/pos ptrs. */
++ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+- void **globals; /* Array of globals (bias -10). */
+- dasm_Section *section; /* Pointer to active section. */
+- size_t codesize; /* Total size of all code sections. */
+- int maxsection; /* 0 <= sectionidx < maxsection. */
+- int status; /* Status code. */
+- dasm_Section sections[1]; /* All sections. Alloc-extended. */
++ void **globals; /* Array of globals (bias -10). */
++ dasm_Section *section; /* Pointer to active section. */
++ size_t codesize; /* Total size of all code sections. */
++ int maxsection; /* 0 <= sectionidx < maxsection. */
++ int status; /* Status code. */
++ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+ };
+
+ /* The size of the core structure depends on the max. number of sections. */
+@@ -100,10 +100,10 @@ void dasm_init(Dst_DECL, int maxsection)
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+- D->sections[i].buf = NULL; /* Need this for pass3. */
++ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+- D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
++ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+ }
+
+@@ -126,7 +126,7 @@ void dasm_free(Dst_DECL)
+ void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+ {
+ dasm_State *D = Dst_REF;
+- D->globals = gl - 10; /* Negative bias to compensate for locals. */
++ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
+ }
+
+@@ -181,7 +181,7 @@ void dasm_put(Dst_DECL, int start, ...)
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+- sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
++ sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos =
+ (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
+@@ -223,26 +223,26 @@ void dasm_put(Dst_DECL, int start, ...)
+ pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+ if (n >= 0) {
+- CK(n >= 10 || *pl < 0, RANGE_LG);
+- CKPL(lg, LG);
+- goto putrel;
++ CK(n >= 10 || *pl < 0, RANGE_LG);
++ CKPL(lg, LG);
++ goto putrel;
+ }
+ pl += 10;
+ n = *pl;
+ if (n < 0)
+- n = 0; /* Start new chain for fwd rel if label exists. */
++ n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ pl = D->pclabels + n;
+ CKPL(pc, PC);
+ putrel:
+ n = *pl;
+- if (n < 0) { /* Label exists. Get label pos and store it. */
+- b[pos] = -n;
++ if (n < 0) { /* Label exists. Get label pos and store it. */
++ b[pos] = -n;
+ } else {
+ linkrel:
+- b[pos] = n; /* Else link to rel chain, anchored at label. */
+- *pl = pos;
++ b[pos] = n; /* Else link to rel chain, anchored at label. */
++ *pl = pos;
+ }
+ pos++;
+ break;
+@@ -254,17 +254,17 @@ void dasm_put(Dst_DECL, int start, ...)
+ pl = D->pclabels + n;
+ CKPL(pc, PC);
+ putlabel:
+- n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) {
+- int *pb = DASM_POS2PTR(D, n);
+- n = *pb;
+- *pb = pos;
++ int *pb = DASM_POS2PTR(D, n);
++ n = *pb;
++ *pb = pos;
+ }
+- *pl = -pos; /* Label exists now. */
+- b[pos++] = ofs; /* Store pass1 offset estimate. */
++ *pl = -pos; /* Label exists now. */
++ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM16:
+- CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
++ CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
+ ofs += 2;
+ b[pos++] = n;
+ break;
+@@ -305,19 +305,19 @@ int dasm_link(Dst_DECL, size_t * szp)
+ int pc;
+ for (pc = 0; pc * sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0)
+- return DASM_S_UNDEF_PC | pc;
++ return DASM_S_UNDEF_PC | pc;
+ }
+ #endif
+
+- { /* Handle globals not defined in this translation unit. */
++ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) {
+- int *pb = DASM_POS2PTR(D, n);
+- n = *pb;
+- *pb = -idx;
++ int *pb = DASM_POS2PTR(D, n);
++ n = *pb;
++ *pb = -idx;
+ }
+ }
+ }
+@@ -332,42 +332,42 @@ int dasm_link(Dst_DECL, size_t * szp)
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+- unsigned short ins = *p++;
+- unsigned short action = ins;
+- switch (action) {
+- case DASM_STOP:
+- case DASM_SECTION:
+- goto stop;
+- case DASM_ESC:
+- p++;
+- break;
+- case DASM_REL_EXT:
+- break;
+- case DASM_ALIGN:
+- ofs -= (b[pos++] + ofs) & *p++;
+- break;
+- case DASM_REL_LG:
+- case DASM_REL_PC:
+- pos++;
+- break;
+- case DASM_LABEL_LG:
+- case DASM_LABEL_PC:
+- b[pos++] += ofs;
+- break;
+- case DASM_IMM16:
+- case DASM_IMM32:
+- case DASM_DISP20:
+- case DASM_DISP12:
+- pos++;
+- break;
+- }
++ unsigned short ins = *p++;
++ unsigned short action = ins;
++ switch (action) {
++ case DASM_STOP:
++ case DASM_SECTION:
++ goto stop;
++ case DASM_ESC:
++ p++;
++ break;
++ case DASM_REL_EXT:
++ break;
++ case DASM_ALIGN:
++ ofs -= (b[pos++] + ofs) & *p++;
++ break;
++ case DASM_REL_LG:
++ case DASM_REL_PC:
++ pos++;
++ break;
++ case DASM_LABEL_LG:
++ case DASM_LABEL_PC:
++ b[pos++] += ofs;
++ break;
++ case DASM_IMM16:
++ case DASM_IMM32:
++ case DASM_DISP20:
++ case DASM_DISP12:
++ pos++;
++ break;
++ }
+ }
+ stop:(void)0;
+ }
+- ofs += sec->ofs; /* Next section starts right after current section. */
++ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+- D->codesize = ofs; /* Total size of all code sections */
++ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+ }
+@@ -396,67 +396,67 @@ int dasm_encode(Dst_DECL, void *buffer)
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+- unsigned short ins = *p++;
+- unsigned short action = ins;
+- int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+- switch (action) {
+- case DASM_STOP:
+- case DASM_SECTION:
+- goto stop;
+- case DASM_ESC:
+- *cp++ = *p++;
+- break;
+- case DASM_REL_EXT:
+- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
+- goto patchrel;
+- case DASM_ALIGN:
+- ins = *p++;
+- /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
+- while ((((char *)cp - base) & ins))
+- *cp++ = 0x0700; /* nop */
+- break;
+- case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
+- case DASM_REL_PC:
+- CK(n >= 0, UNDEF_PC);
+- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+- patchrel:
+- CK((n & 3) == 0 &&
+- (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
+- ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
+- cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
+- break;
+- case DASM_LABEL_LG:
+- ins &= 2047;
+- if (ins >= 20)
+- D->globals[ins - 10] = (void *)(base + n);
+- break;
+- case DASM_LABEL_PC:
+- break;
+- case DASM_IMM16:
+- *cp++ = n;
+- break;
+- case DASM_IMM32:
+- *cp++ = n >> 16;
+- *cp++ = n;
+- break;
+- case DASM_DISP20:
+- cp[-2] |= n & 0xfff;
+- cp[-1] |= (n >> 4) & 0xff00;
+- break;
+- case DASM_DISP12:
+- cp[-1] |= n & 0xfff;
+- break;
+- default:
+- *cp++ = ins;
+- break;
+- }
++ unsigned short ins = *p++;
++ unsigned short action = ins;
++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
++ switch (action) {
++ case DASM_STOP:
++ case DASM_SECTION:
++ goto stop;
++ case DASM_ESC:
++ *cp++ = *p++;
++ break;
++ case DASM_REL_EXT:
++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
++ goto patchrel;
++ case DASM_ALIGN:
++ ins = *p++;
++ /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
++ while ((((char *)cp - base) & ins))
++ *cp++ = 0x0700; /* nop */
++ break;
++ case DASM_REL_LG:
++ CK(n >= 0, UNDEF_LG);
++ case DASM_REL_PC:
++ CK(n >= 0, UNDEF_PC);
++ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
++ patchrel:
++ CK((n & 3) == 0 &&
++ (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
++ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
++ cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
++ break;
++ case DASM_LABEL_LG:
++ ins &= 2047;
++ if (ins >= 20)
++ D->globals[ins - 10] = (void *)(base + n);
++ break;
++ case DASM_LABEL_PC:
++ break;
++ case DASM_IMM16:
++ *cp++ = n;
++ break;
++ case DASM_IMM32:
++ *cp++ = n >> 16;
++ *cp++ = n;
++ break;
++ case DASM_DISP20:
++ cp[-2] |= n & 0xfff;
++ cp[-1] |= (n >> 4) & 0xff00;
++ break;
++ case DASM_DISP12:
++ cp[-1] |= n & 0xfff;
++ break;
++ default:
++ *cp++ = ins;
++ break;
++ }
+ }
+ stop:(void)0;
+ }
+ }
+
+- if (base + D->codesize != (char *)cp) /* Check for phase errors. */
++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+ }
+@@ -472,9 +472,9 @@ int dasm_getpclabel(Dst_DECL, unsigned int pc)
+ if (pos < 0)
+ return *DASM_POS2PTR(D, -pos);
+ if (pos > 0)
+- return -1; /* Undefined. */
++ return -1; /* Undefined. */
+ }
+- return -2; /* Unused or out of range. */
++ return -2; /* Unused or out of range. */
+ }
+
+ #ifdef DASM_CHECKS
+@@ -486,8 +486,8 @@ int dasm_checkstep(Dst_DECL, int secmatch)
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) {
+- D->status = DASM_S_UNDEF_LG | i;
+- break;
++ D->status = DASM_S_UNDEF_LG | i;
++ break;
+ }
+ D->lglabels[i] = 0;
+ }
+
+From 17d91e2f0c6b6244729ef6d2bd957e82dcc762d7 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Mon, 5 Dec 2016 13:46:44 +0530
+Subject: [PATCH 064/260] Update test_z_inst.c
+
+changed code to in the form of a test table
+currently handles RR based , addition, subtraction and multiply test
+---
+ dynasm/Examples/test_z_inst.c | 84 ++++++++++++++++-------------------
+ 1 file changed, 38 insertions(+), 46 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 65ca39ac2..9c1ae26f4 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -9,15 +9,26 @@
+ |.arch s390x
+ |.actionlist actions
+
+-/* Instructio modes
+- mode 0 : RR Mode
+- mode 1 : I Mode
+-*/
++typedef struct
++{
++ int arg1;
++ int arg2;
++ void (*fn)(dasm_State *);
++ int want;
++ char *testname;
++}test_table;
++
++test_table test[] = {
++ {1,2,add,3,"add"},
++ {10,5 ,sub ,5,"subract"} ,
++ {2,3,mul,6,"Multiply"}
++ };
++
+
+ void *jitcode(dasm_State **state);
+-void add(dasm_State * , int);
+-void sub(dasm_State * , int);
+-void mul(dasm_State * , int);
++void add(dasm_State *);
++void sub(dasm_State *);
++void mul(dasm_State *);
+
+ void *jitcode(dasm_State **state)
+ {
+@@ -32,35 +43,15 @@ void *jitcode(dasm_State **state)
+ return (int *)ret;
+ }
+
+-void add(dasm_State *state , int mode)
++void add(dasm_State *state)
+ {
+ dasm_State ** Dst = &state;
+
+- switch(mode)
+- {
+- /* Case RR instruction mode */
+- case 0:
+- {
+- | ar r2,r3
+- | br r14
+- break;
+- }
+- /* Case RIL instruction mode */
+- case 1:
+- {
+- | ar r2,0x16
+- | br r14
+- break;
+- }
+- default:
+- {
+- printf( " Mode not recognised \n ");
+- break;
+- }
+- }
++ | ar r2,r3
++ | br r14
+ }
+
+-void sub(dasm_State *state , int mode)
++void sub(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+
+@@ -68,7 +59,7 @@ void sub(dasm_State *state , int mode)
+ | br r14
+ }
+
+-void mul(dasm_State *state, int mode)
++void mul(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+
+@@ -80,21 +71,22 @@ void main(int argc, char *argv[])
+ {
+ dasm_State *state;
+ dasm_State **Dst = &state;
+- int num1 , num2;
+- int *ret;
++ int i;
+ size_t size;
+
+- int* (*fptr)(int , int) = jitcode(&state);
+-
+- num1 = atoi(argv[1]);
+- num2 = atoi(argv[2]);
+-
+- dasm_init(&state, 1);
+- dasm_setup(&state, actions);
+-
+- /* Call respective test function */
+- add(state , 0);
++ for(i=0;i<sizeof(test)/sizeof(test[0]);i++)
++ {
++ dasm_init(&state, 1);
++ dasm_setup(&state, actions);
++ test[i].fn(state);
++ int (*fptr)(int, int) = jitcode(&state);
++ int got = fptr(test[i].arg1, test[i].arg2);
+
+- ret = fptr(num1 , num2);
+- printf("Result is %d\n" ,ret);
++ if (got != test[i].want) {
++ fprintf(stderr, "test %s failed: want %d, got %d\n", test[i].testname, test[i].want, got);
++ exit(1);
++ }
++ free(fptr);
++ }
++ printf("All test passed\n");
+ }
+
+From 9745e9df262928b9b078f84b05e54f7eb677688d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 15:30:09 -0500
+Subject: [PATCH 065/260] Get DASM_SECTION argument from the correct place.
+
+---
+ dynasm/dasm_s390x.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 8cd2fc25e..e2cd51973 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -204,7 +204,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ case DASM_STOP:
+ goto stop;
+ case DASM_SECTION:
+- n = (ins & 255);
++ n = *p++ & 255;
+ CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n];
+ goto stop;
+
+From d7f7509894222d6756da05c7007afb6b292c515c Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 2 Dec 2016 15:36:19 -0500
+Subject: [PATCH 066/260] Minor indentation fixes.
+
+---
+ dynasm/dasm_s390x.lua | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 76d770e79..8cc37a9fa 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -290,7 +290,7 @@ local function is_int20(num)
+ end
+
+ local function is_int32(num)
+- return -2147483648 <= num and num < 2147483648
++ return -2147483648 <= num and num < 2147483648
+ end
+
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+@@ -371,11 +371,11 @@ local function parse_mem_by(arg)
+ end
+
+ local function parse_imm(arg)
+- local imm_val = tonumber(arg,16)
+- if not is_int32(imm_val) then
+- werror("Immediate value out of range: ", imm_val)
+- end
+- return imm_val
++ local imm_val = tonumber(arg,16)
++ if not is_int32(imm_val) then
++ werror("Immediate value out of range: ", imm_val)
++ end
++ return imm_val
+ end
+
+ local function parse_label(label, def)
+
+From 54199bd9bcd9cc547ccd6ad082added55e5a8096 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 5 Dec 2016 13:59:44 -0500
+Subject: [PATCH 067/260] Clean up test file and add script to run tests.
+
+./run.sh will now execute the tests. It is a very simple setup
+currently, and is limited to linux on s390x. Enough to get started
+with.
+---
+ dynasm/Examples/run.sh | 13 ++++
+ dynasm/Examples/test_z_inst.c | 117 ++++++++++++++++------------------
+ 2 files changed, 67 insertions(+), 63 deletions(-)
+ create mode 100755 dynasm/Examples/run.sh
+
+diff --git a/dynasm/Examples/run.sh b/dynasm/Examples/run.sh
+new file mode 100755
+index 000000000..dbe93b008
+--- /dev/null
++++ b/dynasm/Examples/run.sh
+@@ -0,0 +1,13 @@
++#!/bin/bash
++# set -x
++
++# run test
++lua ../dynasm.lua test_z_inst.c | gcc -std=gnu99 -Wall -Werror -g -x c -o test_z_inst -
++./test_z_inst
++ec=$?
++
++# cleanup
++rm -f ./test_z_inst
++
++# exit
++exit $ec
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 9c1ae26f4..ed20ea657 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -2,91 +2,82 @@
+ #include <stdio.h>
+ #include <sys/mman.h>
+
+-#include "../dynasm/dasm_proto.h"
+-#include "../dynasm/dasm_s390x.h"
++#include "../dasm_proto.h"
++#include "../dasm_s390x.h"
+
+-//DynASM directives.
+- |.arch s390x
+- |.actionlist actions
++// DynASM directives.
++|.arch s390x
++|.actionlist actions
+
+-typedef struct
++static void add(dasm_State *state)
+ {
++ dasm_State ** Dst = &state;
++
++ | ar r2,r3
++ | br r14
++}
++
++static void sub(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | sr r2,r3
++ | br r14
++}
++
++static void mul(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | msr r2 , r3
++ | br r14
++}
++
++typedef struct {
+ int arg1;
+ int arg2;
+ void (*fn)(dasm_State *);
+ int want;
+- char *testname;
+-}test_table;
++ const char *testname;
++} test_table;
+
+ test_table test[] = {
+- {1,2,add,3,"add"},
+- {10,5 ,sub ,5,"subract"} ,
+- {2,3,mul,6,"Multiply"}
+- };
+-
+-
+-void *jitcode(dasm_State **state);
+-void add(dasm_State *);
+-void sub(dasm_State *);
+-void mul(dasm_State *);
+-
+-void *jitcode(dasm_State **state)
++ { 1, 2, add, 3, "add"},
++ {10, 5, sub, 5, "sub"},
++ { 2, 3, mul, 6, "mul"}
++};
++
++static void *jitcode(dasm_State **state, size_t *size)
+ {
+- size_t size;
+- int dasm_status = dasm_link(state, &size);
++ int dasm_status = dasm_link(state, size);
+ assert(dasm_status == DASM_S_OK);
+
+- void *ret = (int *)calloc(10,sizeof(int));
++ void *ret = mmap(0, *size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ dasm_encode(state, ret);
+ dasm_free(state);
+
++ mprotect(ret, *size, PROT_READ | PROT_EXEC);
+ return (int *)ret;
+ }
+
+-void add(dasm_State *state)
++int main(int argc, char *argv[])
+ {
+- dasm_State ** Dst = &state;
+-
+- | ar r2,r3
+- | br r14
+-}
++ dasm_State *state;
+
+-void sub(dasm_State *state)
+-{
+- dasm_State **Dst = &state;
+-
+- | sr r2,r3
+- | br r14
+-}
++ for(int i=0; i < sizeof(test)/sizeof(test[0]); i++) {
++ dasm_init(&state, 1);
++ dasm_setup(&state, actions);
++ test[i].fn(state);
++ size_t size;
++ int (*fptr)(int, int) = jitcode(&state, &size);
++ int got = fptr(test[i].arg1, test[i].arg2);
+
+-void mul(dasm_State *state)
+-{
+- dasm_State **Dst = &state;
+-
+- | msr r2 , r3
+- | br r14
+-}
+-
+-void main(int argc, char *argv[])
+-{
+- dasm_State *state;
+- dasm_State **Dst = &state;
+- int i;
+- size_t size;
+-
+- for(i=0;i<sizeof(test)/sizeof(test[0]);i++)
+- {
+- dasm_init(&state, 1);
+- dasm_setup(&state, actions);
+- test[i].fn(state);
+- int (*fptr)(int, int) = jitcode(&state);
+- int got = fptr(test[i].arg1, test[i].arg2);
+-
+- if (got != test[i].want) {
+- fprintf(stderr, "test %s failed: want %d, got %d\n", test[i].testname, test[i].want, got);
++ if (got != test[i].want) {
++ fprintf(stderr, "FAIL: test %s: want %d, got %d\n", test[i].testname, test[i].want, got);
+ exit(1);
+ }
+- free(fptr);
++ munmap(fptr, size);
+ }
+- printf("All test passed\n");
++ printf("all tests passed\n");
++ return 0;
+ }
+
+From d9e61fe7a8df5930c2dac481acd14b16eef93c06 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 5 Dec 2016 14:23:35 -0500
+Subject: [PATCH 068/260] Shorten templates by four characters.
+
+We only have 6-byte instructions, so we don't really need the ability
+to encode 8-bytes.
+---
+ dynasm/dasm_s390x.lua | 1134 ++++++++++++++++++++---------------------
+ 1 file changed, 567 insertions(+), 567 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 8cc37a9fa..6416438e6 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -435,586 +435,586 @@ end
+
+ -- Template strings for s390x instructions.
+ map_op = {
+-a_2 = "000000005a000000j",
+-ar_2 = "0000000000001a00g",
+-ay_2 = "0000e3000000005al",
+-ag_2 = "0000e30000000008l",
+-agr_2 = "00000000b9080000h",
+-agf_2 = "0000e30000000018l",
+-agfr_2 = "00000000b9180000h",
+-axbr_2 = "00000000b34a0000h",
+-adbr_2 = "00000000b31a0000h",
+-aebr_2 = "00000000b30a0000h",
+-ah_2 = "000000004a000000j",
+-ahy_2 = "0000e3000000007al",
+-afi_2 = "0000c20900000000n",
+-agfi_2 = "0000c20800000000n",
+-aih_2 = "0000cc0800000000n",
+-al_2 = "000000005e000000j",
+-alr_2 = "0000000000001e00g",
+-aly_2 = "0000e3000000005el",
+-alg_2 = "0000e3000000000al",
+-algr_2 = "00000000b90a0000h",
+-algf_2 = "0000e3000000001al",
+-algfr_2 = "00000000b91a0000h",
+-alfi_2 = "0000c20b00000000n",
+-algfi_2 = "0000c20a00000000n",
+-alc_2 = "0000e30000000098l",
+-alcr_2 = "00000000b9980000h",
+-alcg_2 = "0000e30000000088l",
+-alcgr_2 = "00000000b9880000h",
+-alsih_2 = "0000cc0a00000000n",
+-alsihn_2 = "0000cc0b00000000n",
+-axr_2 = "0000000000003600g",
+-ad_2 = "000000006a000000j",
+-adr_2 = "0000000000002a00g",
+-ae_2 = "000000007a000000j",
+-aer_2 = "0000000000003a00g",
+-aw_2 = "000000006e000000j",
+-awr_2 = "0000000000002e00g",
+-au_2 = "000000007e000000j",
+-aur_2 = "0000000000003e00g",
+-n_2 = "0000000054000000j",
+-nr_2 = "0000000000001400g",
+-ny_2 = "0000e30000000054l",
+-ng_2 = "0000e30000000080l",
+-ngr_2 = "00000000b9800000h",
+-nihf_2 = "0000c00a00000000n",
+-nilf_2 = "0000c00b00000000n",
+-bal_2 = "0000000045000000j",
+-balr_2 = "000000000000500g",
+-bas_2 = "000000004d000000j",
+-basr_2 = "0000000000000d00g",
+-bassm_2 = "0000000000000c00g",
+-bsa_2 = "00000000b25a0000h",
+-bsm_2 = "0000000000000b00g",
+-bakr_2 = "00000000b2400000h",
+-bsg_2 = "00000000b2580000h",
+-bc_2 = "0000000047000000k",
+-bcr_2 = "000000000000700g",
+-bct_2 = "0000000046000000j",
+-bctr_2 = "000000000000600g",
+-bctg_2 = "0000e30000000046l",
+-bctgr_2 = "00000000b9460000h",
+-bxh_3 = "0000000086000000q",
+-bxhg_3 = "0000eb0000000044s",
+-bxle_3 = "0000000087000000q",
+-bxleg_3 = "0000eb0000000045s",
+-brasl_2 = "0000c00500000000o",
+-brcl_2 = "0000c00400000000p",
+-brcth_2 = "0000cc0600000000o",
+-cksm_2 = "00000000b2410000h",
+-km_2 = "00000000b92e0000h",
+-kmf_2 = "00000000b92a0000h",
+-kmc_2 = "00000000b92f0000h",
+-kmo_2 = "00000000b92b0000h",
+-c_2 = "0000000059000000j",
+-cr_2 = "0000000000001900g",
+-cy_2 = "0000e30000000059l",
+-cg_2 = "0000e30000000020l",
+-cgr_2 = "00000000b9200000h",
+-cgf_2 = "0000e30000000030l",
+-cgfr_2 = "00000000b9300000h",
+-cxbr_2 = "00000000b3490000h",
+-cxtr_2 = "00000000b3ec0000h",
+-cxr_2 = "00000000b3690000h",
+-cdbr_2 = "00000000b3190000h",
+-cdtr_2 = "00000000b3e40000h",
+-cd_2 = "0000000069000000j",
+-cdr_2 = "0000000000002900g",
+-cebr_2 = "00000000b3090000h",
+-ce_2 = "0000000079000000j",
+-cer_2 = "0000000000003900g",
+-kxbr_2 = "00000000b3480000h",
+-kxtr_2 = "00000000b3e80000h",
+-kdbr_2 = "00000000b3180000h",
+-kdtr_2 = "00000000b3e00000h",
+-kebr_2 = "00000000b3080000h",
+-cs_3 = "00000000ba000000q",
+-csy_3 = "0000eb0000000014s",
+-csg_3 = "0000eb0000000030s",
+-csp_2 = "00000000b2500000h",
+-cspg_2 = "00000000b98a0000h",
+-cextr_2 = "00000000b3fc0000h",
+-cedtr_2 = "00000000b3f40000h",
+-cds_3 = "00000000bb000000q",
+-cdsy_3 = "0000eb0000000031s",
+-cdsg_3 = "0000eb000000003es",
+-ch_2 = "0000000049000000j",
+-chy_2 = "0000e30000000079l",
+-cgh_2 = "0000e30000000034l",
+-chrl_2 = "0000c60500000000o",
+-cghrl_2 = "0000c60400000000o",
+-chf_2 = "0000e300000000cdl",
+-chhr_2 = "00000000b9cd0000h",
+-chlr_2 = "00000000b9dd0000h",
+-cfi_2 = "0000c20d00000000n",
+-cgfi_2 = "0000c20c00000000n",
+-cih_2 = "0000cc0d00000000n",
+-cl_2 = "0000000055000000j",
+-clr_2 = "0000000000001500g",
+-cly_2 = "0000e30000000055l",
+-clg_2 = "0000e30000000021l",
+-clgr_2 = "00000000b9210000h",
+-clgf_2 = "0000e30000000031l",
+-clgfr_2 = "00000000b9310000h",
+-clmh_3 = "0000eb0000000020t",
+-clm_3 = "00000000bd000000r",
+-clmy_3 = "0000eb0000000021t",
+-clhf_2 = "0000e300000000cfl",
+-clhhr_2 = "00000000b9cf0000h",
+-clhlr_2 = "00000000b9df0000h",
+-clfi_2 = "0000c20f00000000n",
+-clgfi_2 = "0000c20e00000000n",
+-clih_2 = "0000cc0f00000000n",
+-clcl_2 = "0000000000000f00g",
+-clcle_3 = "00000000a9000000q",
+-clclu_3 = "0000eb000000008fs",
+-clrl_2 = "0000c60f00000000o",
+-clhrl_2 = "0000c60700000000o",
+-clgrl_2 = "0000c60a00000000o",
+-clghrl_2 = "0000c60600000000o",
+-clgfrl_2 = "0000c60e00000000o",
+-clst_2 = "00000000b25d0000h",
+-crl_2 = "0000c60d00000000o",
+-cgrl_2 = "0000c60800000000o",
+-cgfrl_2 = "0000c60c00000000o",
+-cuse_2 = "00000000b2570000h",
+-cmpsc_2 = "00000000b2630000h",
+-kimd_2 = "00000000b93e0000h",
+-klmd_2 = "00000000b93f0000h",
+-kmac_2 = "00000000b91e0000h",
+-thdr_2 = "00000000b3590000h",
+-thder_2 = "00000000b3580000h",
+-cxfbr_2 = "00000000b3960000h",
+-cxftr_2 = "00000000b9590000h",
+-cxfr_2 = "00000000b3b60000h",
+-cdfbr_2 = "00000000b3950000h",
+-cdftr_2 = "00000000b9510000h",
+-cdfr_2 = "00000000b3b50000h",
+-cefbr_2 = "00000000b3940000h",
+-cefr_2 = "00000000b3b40000h",
+-cxgbr_2 = "00000000b3a60000h",
+-cxgtr_2 = "00000000b3f90000h",
+-cxgr_2 = "00000000b3c60000h",
+-cdgbr_2 = "00000000b3a50000h",
+-cdgtr_2 = "00000000b3f10000h",
+-cdgr_2 = "00000000b3c50000h",
+-cegbr_2 = "00000000b3a40000h",
+-cegr_2 = "00000000b3c40000h",
+-cxstr_2 = "00000000b3fb0000h",
+-cdstr_2 = "00000000b3f30000h",
+-cxutr_2 = "00000000b3fa0000h",
+-cdutr_2 = "00000000b3f20000h",
+-cvb_2 = "000000004f000000j",
+-cvby_2 = "0000e30000000006l",
+-cvbg_2 = "0000e3000000000el",
+-cvd_2 = "000000004e000000j",
+-cvdy_2 = "0000e30000000026l",
+-cvdg_2 = "0000e3000000002el",
+-cuxtr_2 = "00000000b3ea0000h",
+-cudtr_2 = "00000000b3e20000h",
+-cu42_2 = "00000000b9b30000h",
+-cu41_2 = "00000000b9b20000h",
+-cpya_2 = "00000000b24d0000h",
+-d_2 = "000000005d000000j",
+-dr_2 = "0000000000001d00g",
+-dxbr_2 = "00000000b34d0000h",
+-dxr_2 = "00000000b22d0000h",
+-ddbr_2 = "00000000b31d0000h",
+-dd_2 = "000000006d000000j",
+-ddr_2 = "0000000000002d00g",
+-debr_2 = "00000000b30d0000h",
+-de_2 = "000000007d000000j",
+-der_2 = "0000000000003d00g",
+-dl_2 = "0000e30000000097l",
+-dlr_2 = "00000000b9970000h",
+-dlg_2 = "0000e30000000087l",
+-dlgr_2 = "00000000b9870000h",
+-dsg_2 = "0000e3000000000dl",
+-dsgr_2 = "00000000b90d0000h",
+-dsgf_2 = "0000e3000000001dl",
+-dsgfr_2 = "00000000b91d0000h",
+-x_2 = "0000000057000000j",
+-xr_2 = "0000000000001700g",
+-xy_2 = "0000e30000000057l",
+-xg_2 = "0000e30000000082l",
+-xgr_2 = "00000000b9820000h",
+-xihf_2 = "0000c00600000000n",
+-xilf_2 = "0000c00700000000n",
+-ex_2 = "0000000044000000j",
+-exrl_2 = "0000c60000000000o",
+-ear_2 = "00000000b24f0000h",
+-esea_2 = "00000000b99d0000h",
+-eextr_2 = "00000000b3ed0000h",
+-eedtr_2 = "00000000b3e50000h",
+-ecag_3 = "0000eb000000004cs",
+-efpc_2 = "00000000b38c0000h",
+-epar_2 = "00000000b2260000h",
+-epair_2 = "00000000b99a0000h",
+-epsw_2 = "00000000b98d0000h",
+-esar_2 = "00000000b2270000h",
+-esair_2 = "00000000b99b0000h",
+-esxtr_2 = "00000000b3ef0000h",
+-esdtr_2 = "00000000b3e70000h",
+-ereg_2 = "00000000b2490000h",
+-eregg_2 = "00000000b90e0000h",
+-esta_2 = "00000000b24a0000h",
+-flogr_2 = "00000000b9830000h",
+-hdr_2 = "0000000000002400g",
+-her_2 = "0000000000003400g",
+-iac_2 = "00000000b2240000h",
+-ic_2 = "0000000043000000j",
+-icy_2 = "0000e30000000073l",
+-icmh_3 = "0000eb0000000080t",
+-icm_3 = "00000000bf000000r",
+-icmy_3 = "0000eb0000000081t",
+-iihf_2 = "0000c00800000000n",
+-iilf_2 = "0000c00900000000n",
+-ipm_2 = "00000000b2220000h",
+-iske_2 = "00000000b2290000h",
+-ivsk_2 = "00000000b2230000h",
+-l_2 = "0000000058000000j",
+-lr_2 = "0000000000001800g",
+-ly_2 = "0000e30000000058l",
+-lg_2 = "0000e30000000004l",
+-lgr_2 = "00000000b9040000h",
+-lgf_2 = "0000e30000000014l",
+-lgfr_2 = "00000000b9140000h",
+-lxr_2 = "00000000b3650000h",
+-ld_2 = "0000000068000000j",
+-ldr_2 = "0000000000002800g",
+-ldy_2 = "0000ed0000000065l",
+-le_2 = "0000000078000000j",
+-ler_2 = "0000000000003800g",
+-ley_2 = "0000ed0000000064l",
+-lam_3 = "000000009a000000q",
+-lamy_3 = "0000eb000000009as",
+-la_2 = "0000000041000000j",
+-lay_2 = "0000e30000000071l",
+-lae_2 = "0000000051000000j",
+-laey_2 = "0000e30000000075l",
+-larl_2 = "0000c00000000000o",
+-laa_3 = "0000eb00000000f8s",
+-laag_3 = "0000eb00000000e8s",
+-laal_3 = "0000eb00000000fas",
+-laalg_3 = "0000eb00000000eas",
+-lan_3 = "0000eb00000000f4s",
+-lang_3 = "0000eb00000000e4s",
+-lax_3 = "0000eb00000000f7s",
+-laxg_3 = "0000eb00000000e7s",
+-lao_3 = "0000eb00000000f6s",
+-laog_3 = "0000eb00000000e6s",
+-lt_2 = "0000e30000000012l",
+-ltr_2 = "0000000000001200g",
+-ltg_2 = "0000e30000000002l",
+-ltgr_2 = "00000000b9020000h",
+-ltgf_2 = "0000e30000000032l",
+-ltgfr_2 = "00000000b9120000h",
+-ltxbr_2 = "00000000b3420000h",
+-ltxtr_2 = "00000000b3de0000h",
+-ltxr_2 = "00000000b3620000h",
+-ltdbr_2 = "00000000b3120000h",
+-ltdtr_2 = "00000000b3d60000h",
+-ltdr_2 = "0000000000002200g",
+-ltebr_2 = "00000000b3020000h",
+-lter_2 = "0000000000003200g",
+-lb_2 = "0000e30000000076l",
+-lbr_2 = "00000000b9260000h",
+-lgb_2 = "0000e30000000077l",
+-lgbr_2 = "00000000b9060000h",
+-lbh_2 = "0000e300000000c0l",
+-lcr_2 = "0000000000001300g",
+-lcgr_2 = "00000000b9030000h",
+-lcgfr_2 = "00000000b9130000h",
+-lcxbr_2 = "00000000b3430000h",
+-lcxr_2 = "00000000b3630000h",
+-lcdbr_2 = "00000000b3130000h",
+-lcdr_2 = "0000000000002300g",
+-lcdfr_2 = "00000000b3730000h",
+-lcebr_2 = "00000000b3030000h",
+-lcer_2 = "0000000000003300g",
+-lctl_3 = "00000000b7000000q",
+-lctlg_3 = "0000eb000000002fs",
+-fixr_2 = "00000000b3670000h",
+-fidr_2 = "00000000b37f0000h",
+-fier_2 = "00000000b3770000h",
+-ldgr_2 = "00000000b3c10000h",
+-lgdr_2 = "00000000b3cd0000h",
+-lh_2 = "0000000048000000j",
+-lhr_2 = "00000000b9270000h",
+-lhy_2 = "0000e30000000078l",
+-lgh_2 = "0000e30000000015l",
+-lghr_2 = "00000000b9070000h",
+-lhh_2 = "0000e300000000c4l",
+-lhrl_2 = "0000c40500000000o",
+-lghrl_2 = "0000c40400000000o",
+-lfh_2 = "0000e300000000cal",
+-lgfi_2 = "0000c00100000000n",
+-lxdbr_2 = "00000000b3050000h",
+-lxdr_2 = "00000000b3250000h",
+-lxebr_2 = "00000000b3060000h",
+-lxer_2 = "00000000b3260000h",
+-ldebr_2 = "00000000b3040000h",
+-lder_2 = "00000000b3240000h",
+-llgf_2 = "0000e30000000016l",
+-llgfr_2 = "00000000b9160000h",
+-llc_2 = "0000e30000000094l",
+-llcr_2 = "00000000b9940000h",
+-llgc_2 = "0000e30000000090l",
+-llgcr_2 = "00000000b9840000h",
+-llch_2 = "0000e300000000c2l",
+-llh_2 = "0000e30000000095l",
+-llhr_2 = "00000000b9950000h",
+-llgh_2 = "0000e30000000091l",
+-llghr_2 = "00000000b9850000h",
+-llhh_2 = "0000e300000000c6l",
+-llhrl_2 = "0000c40200000000o",
+-llghrl_2 = "0000c40600000000o",
+-llihf_2 = "0000c00e00000000n",
+-llilf_2 = "0000c00f00000000n",
+-llgfrl_2 = "0000c40e00000000o",
+-llgt_2 = "0000e30000000017l",
+-llgtr_2 = "00000000b9170000h",
+-lm_3 = "0000000098000000q",
+-lmy_3 = "0000eb0000000098s",
+-lmg_3 = "0000eb0000000004s",
+-lmh_3 = "0000eb0000000096s",
+-lnr_2 = "0000000000001100g",
+-lngr_2 = "00000000b9010000h",
+-lngfr_2 = "00000000b9110000h",
+-lnxbr_2 = "00000000b3410000h",
+-lnxr_2 = "00000000b3610000h",
+-lndbr_2 = "00000000b3110000h",
+-lndr_2 = "0000000000002100g",
+-lndfr_2 = "00000000b3710000h",
+-lnebr_2 = "00000000b3010000h",
+-lner_2 = "0000000000003100g",
+-loc_3 = "0000eb00000000f2t",
+-locg_3 = "0000eb00000000e2t",
+-lpq_2 = "0000e3000000008fl",
+-lpr_2 = "0000000000001000g",
+-lpgr_2 = "00000000b9000000h",
+-lpgfr_2 = "00000000b9100000h",
+-lpxbr_2 = "00000000b3400000h",
+-lpxr_2 = "00000000b3600000h",
+-lpdbr_2 = "00000000b3100000h",
+-lpdr_2 = "0000000000002000g",
+-lpdfr_2 = "00000000b3700000h",
+-lpebr_2 = "00000000b3000000h",
+-lper_2 = "0000000000003000g",
+-lra_2 = "00000000b1000000j",
+-lray_2 = "0000e30000000013l",
+-lrag_2 = "0000e30000000003l",
+-lrl_2 = "0000c40d00000000o",
+-lgrl_2 = "0000c40800000000o",
+-lgfrl_2 = "0000c40c00000000o",
+-lrvh_2 = "0000e3000000001fl",
+-lrv_2 = "0000e3000000001el",
+-lrvr_2 = "00000000b91f0000h",
+-lrvg_2 = "0000e3000000000fl",
+-lrvgr_2 = "00000000b90f0000h",
+-ldxbr_2 = "00000000b3450000h",
+-ldxr_2 = "0000000000002500g",
+-lrdr_2 = "0000000000002500g",
+-lexbr_2 = "00000000b3460000h",
+-lexr_2 = "00000000b3660000h",
+-ledbr_2 = "00000000b3440000h",
+-ledr_2 = "0000000000003500g",
+-lrer_2 = "0000000000003500g",
+-lura_2 = "00000000b24b0000h",
+-lurag_2 = "00000000b9050000h",
+-lzxr_2 = "00000000b3760000h",
+-lzdr_2 = "00000000b3750000h",
+-lzer_2 = "00000000b3740000h",
+-msta_2 = "00000000b2470000h",
+-mvcl_2 = "0000000000000e00g",
+-mvcle_3 = "00000000a8000000q",
+-mvclu_3 = "0000eb000000008es",
+-mvpg_2 = "00000000b2540000h",
+-mvst_2 = "00000000b2550000h",
+-m_2 = "000000005c000000j",
+-mfy_2 = "0000e3000000005cl",
+-mr_2 = "0000000000001c00g",
+-mxbr_2 = "00000000b34c0000h",
+-mxr_2 = "0000000000002600g",
+-mdbr_2 = "00000000b31c0000h",
+-md_2 = "000000006c000000j",
+-mdr_2 = "0000000000002c00g",
+-mxdbr_2 = "00000000b3070000h",
+-mxd_2 = "0000000067000000j",
+-mxdr_2 = "0000000000002700g",
+-meebr_2 = "00000000b3170000h",
+-meer_2 = "00000000b3370000h",
+-mdebr_2 = "00000000b30c0000h",
+-mde_2 = "000000007c000000j",
+-mder_2 = "0000000000003c00g",
+-me_2 = "000000007c000000j",
+-mer_2 = "0000000000003c00g",
+-mh_2 = "000000004c000000j",
+-mhy_2 = "0000e3000000007cl",
+-mlg_2 = "0000e30000000086l",
+-mlgr_2 = "00000000b9860000h",
+-ml_2 = "0000e30000000096l",
+-mlr_2 = "00000000b9960000h",
+-ms_2 = "0000000071000000j",
+-msr_2 = "00000000b2520000h",
+-msy_2 = "0000e30000000051l",
+-msg_2 = "0000e3000000000cl",
+-msgr_2 = "00000000b90c0000h",
+-msgf_2 = "0000e3000000001cl",
+-msgfr_2 = "00000000b91c0000h",
+-msfi_2 = "0000c20100000000n",
+-msgfi_2 = "0000c20000000000n",
+-o_2 = "0000000056000000j",
+-or_2 = "0000000000001600g",
+-oy_2 = "0000e30000000056l",
+-og_2 = "0000e30000000081l",
+-ogr_2 = "00000000b9810000h",
+-oihf_2 = "0000c00c00000000n",
+-oilf_2 = "0000c00d00000000n",
+-pgin_2 = "00000000b22e0000h",
+-pgout_2 = "00000000b22f0000h",
+-pcc_2 = "00000000b92c0000h",
+-pckmo_2 = "00000000b9280000h",
+-pfmf_2 = "00000000b9af0000h",
+-ptf_2 = "00000000b9a20000h",
+-popcnt_2 = "00000000b9e10000h",
+-pfd_2 = "0000e30000000036m",
+-pfdrl_2 = "0000c60200000000p",
+-pt_2 = "00000000b2280000h",
+-pti_2 = "00000000b99e0000h",
+-palb_2 = "00000000b2480000h",
+-rrbe_2 = "00000000b22a0000h",
+-rrbm_2 = "00000000b9ae0000h",
+-rll_3 = "0000eb000000001ds",
+-rllg_3 = "0000eb000000001cs",
+-srst_2 = "00000000b25e0000h",
+-srstu_2 = "00000000b9be0000h",
+-sar_2 = "00000000b24e0000h",
+-sfpc_2 = "00000000b3840000h",
+-sfasr_2 = "00000000b3850000h",
+-spm_2 = "000000000000400g",
+-ssar_2 = "00000000b2250000h",
+-ssair_2 = "00000000b99f0000h",
+-slda_3 = "000000008f000000q",
+-sldl_3 = "000000008d000000q",
+-sla_3 = "000000008b000000q",
+-slak_3 = "0000eb00000000dds",
+-slag_3 = "0000eb000000000bs",
+-sll_3 = "0000000089000000q",
+-sllk_3 = "0000eb00000000dfs",
+-sllg_3 = "0000eb000000000ds",
+-srda_3 = "000000008e000000q",
+-srdl_3 = "000000008c000000q",
+-sra_3 = "000000008a000000q",
+-srak_3 = "0000eb00000000dcs",
+-srag_3 = "0000eb000000000as",
+-srl_3 = "0000000088000000q",
+-srlk_3 = "0000eb00000000des",
+-srlg_3 = "0000eb000000000cs",
+-sqxbr_2 = "00000000b3160000h",
+-sqxr_2 = "00000000b3360000h",
+-sqdbr_2 = "00000000b3150000h",
+-sqdr_2 = "00000000b2440000h",
+-sqebr_2 = "00000000b3140000h",
+-sqer_2 = "00000000b2450000h",
+-st_2 = "0000000050000000j",
+-sty_2 = "0000e30000000050l",
+-stg_2 = "0000e30000000024l",
+-std_2 = "0000000060000000j",
+-stdy_2 = "0000ed0000000067l",
+-ste_2 = "0000000070000000j",
+-stey_2 = "0000ed0000000066l",
+-stam_3 = "000000009b000000q",
+-stamy_3 = "0000eb000000009bs",
+-stc_2 = "0000000042000000j",
+-stcy_2 = "0000e30000000072l",
+-stch_2 = "0000e300000000c3l",
+-stcmh_3 = "0000eb000000002ct",
+-stcm_3 = "00000000be000000r",
+-stcmy_3 = "0000eb000000002dt",
+-stctl_3 = "00000000b6000000q",
+-stctg_3 = "0000eb0000000025s",
+-sth_2 = "0000000040000000j",
+-sthy_2 = "0000e30000000070l",
+-sthh_2 = "0000e300000000c7l",
+-sthrl_2 = "0000c40700000000o",
+-stfh_2 = "0000e300000000cbl",
+-stm_3 = "0000000090000000q",
+-stmy_3 = "0000eb0000000090s",
+-stmg_3 = "0000eb0000000024s",
+-stmh_3 = "0000eb0000000026s",
+-stoc_3 = "0000eb00000000f3t",
+-stocg_3 = "0000eb00000000e3t",
+-stpq_2 = "0000e3000000008el",
+-strl_2 = "0000c40f00000000o",
+-stgrl_2 = "0000c40b00000000o",
+-strvh_2 = "0000e3000000003fl",
+-strv_2 = "0000e3000000003el",
+-strvg_2 = "0000e3000000002fl",
+-stura_2 = "00000000b2460000h",
+-sturg_2 = "00000000b9250000h",
+-s_2 = "000000005b000000j",
+-sr_2 = "0000000000001b00g",
+-sy_2 = "0000e3000000005bl",
+-sg_2 = "0000e30000000009l",
+-sgr_2 = "00000000b9090000h",
+-sgf_2 = "0000e30000000019l",
+-sgfr_2 = "00000000b9190000h",
+-sxbr_2 = "00000000b34b0000h",
+-sdbr_2 = "00000000b31b0000h",
+-sebr_2 = "00000000b30b0000h",
+-sh_2 = "000000004b000000j",
+-shy_2 = "0000e3000000007bl",
+-sl_2 = "000000005f000000j",
+-slr_2 = "0000000000001f00g",
+-sly_2 = "0000e3000000005fl",
+-slg_2 = "0000e3000000000bl",
+-slgr_2 = "00000000b90b0000h",
+-slgf_2 = "0000e3000000001bl",
+-slgfr_2 = "00000000b91b0000h",
+-slfi_2 = "0000c20500000000n",
+-slgfi_2 = "0000c20400000000n",
+-slb_2 = "0000e30000000099l",
+-slbr_2 = "00000000b9990000h",
+-slbg_2 = "0000e30000000089l",
+-slbgr_2 = "00000000b9890000h",
+-sxr_2 = "0000000000003700g",
+-sd_2 = "000000006b000000j",
+-sdr_2 = "0000000000002b00g",
+-se_2 = "000000007b000000j",
+-ser_2 = "0000000000003b00g",
+-su_2 = "000000007f000000j",
+-sur_2 = "0000000000003f00g",
+-sw_2 = "000000006f000000j",
+-swr_2 = "0000000000002f00g",
+-tar_2 = "00000000b24c0000h",
+-tb_2 = "00000000b22c0000h",
+-trace_3 = "0000000099000000q",
+-tracg_3 = "0000eb000000000fs",
+-tre_2 = "00000000b2a50000h",
++ a_2 = "00005a000000j",
++ ar_2 = "000000001a00g",
++ ay_2 = "e3000000005al",
++ ag_2 = "e30000000008l",
++ agr_2 = "0000b9080000h",
++ agf_2 = "e30000000018l",
++ agfr_2 = "0000b9180000h",
++ axbr_2 = "0000b34a0000h",
++ adbr_2 = "0000b31a0000h",
++ aebr_2 = "0000b30a0000h",
++ ah_2 = "00004a000000j",
++ ahy_2 = "e3000000007al",
++ afi_2 = "c20900000000n",
++ agfi_2 = "c20800000000n",
++ aih_2 = "cc0800000000n",
++ al_2 = "00005e000000j",
++ alr_2 = "000000001e00g",
++ aly_2 = "e3000000005el",
++ alg_2 = "e3000000000al",
++ algr_2 = "0000b90a0000h",
++ algf_2 = "e3000000001al",
++ algfr_2 = "0000b91a0000h",
++ alfi_2 = "c20b00000000n",
++ algfi_2 = "c20a00000000n",
++ alc_2 = "e30000000098l",
++ alcr_2 = "0000b9980000h",
++ alcg_2 = "e30000000088l",
++ alcgr_2 = "0000b9880000h",
++ alsih_2 = "cc0a00000000n",
++ alsihn_2 = "cc0b00000000n",
++ axr_2 = "000000003600g",
++ ad_2 = "00006a000000j",
++ adr_2 = "000000002a00g",
++ ae_2 = "00007a000000j",
++ aer_2 = "000000003a00g",
++ aw_2 = "00006e000000j",
++ awr_2 = "000000002e00g",
++ au_2 = "00007e000000j",
++ aur_2 = "000000003e00g",
++ n_2 = "000054000000j",
++ nr_2 = "000000001400g",
++ ny_2 = "e30000000054l",
++ ng_2 = "e30000000080l",
++ ngr_2 = "0000b9800000h",
++ nihf_2 = "c00a00000000n",
++ nilf_2 = "c00b00000000n",
++ bal_2 = "000045000000j",
++ balr_2 = "00000000500g",
++ bas_2 = "00004d000000j",
++ basr_2 = "000000000d00g",
++ bassm_2 = "000000000c00g",
++ bsa_2 = "0000b25a0000h",
++ bsm_2 = "000000000b00g",
++ bakr_2 = "0000b2400000h",
++ bsg_2 = "0000b2580000h",
++ bc_2 = "000047000000k",
++ bcr_2 = "00000000700g",
++ bct_2 = "000046000000j",
++ bctr_2 = "00000000600g",
++ bctg_2 = "e30000000046l",
++ bctgr_2 = "0000b9460000h",
++ bxh_3 = "000086000000q",
++ bxhg_3 = "eb0000000044s",
++ bxle_3 = "000087000000q",
++ bxleg_3 = "eb0000000045s",
++ brasl_2 = "c00500000000o",
++ brcl_2 = "c00400000000p",
++ brcth_2 = "cc0600000000o",
++ cksm_2 = "0000b2410000h",
++ km_2 = "0000b92e0000h",
++ kmf_2 = "0000b92a0000h",
++ kmc_2 = "0000b92f0000h",
++ kmo_2 = "0000b92b0000h",
++ c_2 = "000059000000j",
++ cr_2 = "000000001900g",
++ cy_2 = "e30000000059l",
++ cg_2 = "e30000000020l",
++ cgr_2 = "0000b9200000h",
++ cgf_2 = "e30000000030l",
++ cgfr_2 = "0000b9300000h",
++ cxbr_2 = "0000b3490000h",
++ cxtr_2 = "0000b3ec0000h",
++ cxr_2 = "0000b3690000h",
++ cdbr_2 = "0000b3190000h",
++ cdtr_2 = "0000b3e40000h",
++ cd_2 = "000069000000j",
++ cdr_2 = "000000002900g",
++ cebr_2 = "0000b3090000h",
++ ce_2 = "000079000000j",
++ cer_2 = "000000003900g",
++ kxbr_2 = "0000b3480000h",
++ kxtr_2 = "0000b3e80000h",
++ kdbr_2 = "0000b3180000h",
++ kdtr_2 = "0000b3e00000h",
++ kebr_2 = "0000b3080000h",
++ cs_3 = "0000ba000000q",
++ csy_3 = "eb0000000014s",
++ csg_3 = "eb0000000030s",
++ csp_2 = "0000b2500000h",
++ cspg_2 = "0000b98a0000h",
++ cextr_2 = "0000b3fc0000h",
++ cedtr_2 = "0000b3f40000h",
++ cds_3 = "0000bb000000q",
++ cdsy_3 = "eb0000000031s",
++ cdsg_3 = "eb000000003es",
++ ch_2 = "000049000000j",
++ chy_2 = "e30000000079l",
++ cgh_2 = "e30000000034l",
++ chrl_2 = "c60500000000o",
++ cghrl_2 = "c60400000000o",
++ chf_2 = "e300000000cdl",
++ chhr_2 = "0000b9cd0000h",
++ chlr_2 = "0000b9dd0000h",
++ cfi_2 = "c20d00000000n",
++ cgfi_2 = "c20c00000000n",
++ cih_2 = "cc0d00000000n",
++ cl_2 = "000055000000j",
++ clr_2 = "000000001500g",
++ cly_2 = "e30000000055l",
++ clg_2 = "e30000000021l",
++ clgr_2 = "0000b9210000h",
++ clgf_2 = "e30000000031l",
++ clgfr_2 = "0000b9310000h",
++ clmh_3 = "eb0000000020t",
++ clm_3 = "0000bd000000r",
++ clmy_3 = "eb0000000021t",
++ clhf_2 = "e300000000cfl",
++ clhhr_2 = "0000b9cf0000h",
++ clhlr_2 = "0000b9df0000h",
++ clfi_2 = "c20f00000000n",
++ clgfi_2 = "c20e00000000n",
++ clih_2 = "cc0f00000000n",
++ clcl_2 = "000000000f00g",
++ clcle_3 = "0000a9000000q",
++ clclu_3 = "eb000000008fs",
++ clrl_2 = "c60f00000000o",
++ clhrl_2 = "c60700000000o",
++ clgrl_2 = "c60a00000000o",
++ clghrl_2 = "c60600000000o",
++ clgfrl_2 = "c60e00000000o",
++ clst_2 = "0000b25d0000h",
++ crl_2 = "c60d00000000o",
++ cgrl_2 = "c60800000000o",
++ cgfrl_2 = "c60c00000000o",
++ cuse_2 = "0000b2570000h",
++ cmpsc_2 = "0000b2630000h",
++ kimd_2 = "0000b93e0000h",
++ klmd_2 = "0000b93f0000h",
++ kmac_2 = "0000b91e0000h",
++ thdr_2 = "0000b3590000h",
++ thder_2 = "0000b3580000h",
++ cxfbr_2 = "0000b3960000h",
++ cxftr_2 = "0000b9590000h",
++ cxfr_2 = "0000b3b60000h",
++ cdfbr_2 = "0000b3950000h",
++ cdftr_2 = "0000b9510000h",
++ cdfr_2 = "0000b3b50000h",
++ cefbr_2 = "0000b3940000h",
++ cefr_2 = "0000b3b40000h",
++ cxgbr_2 = "0000b3a60000h",
++ cxgtr_2 = "0000b3f90000h",
++ cxgr_2 = "0000b3c60000h",
++ cdgbr_2 = "0000b3a50000h",
++ cdgtr_2 = "0000b3f10000h",
++ cdgr_2 = "0000b3c50000h",
++ cegbr_2 = "0000b3a40000h",
++ cegr_2 = "0000b3c40000h",
++ cxstr_2 = "0000b3fb0000h",
++ cdstr_2 = "0000b3f30000h",
++ cxutr_2 = "0000b3fa0000h",
++ cdutr_2 = "0000b3f20000h",
++ cvb_2 = "00004f000000j",
++ cvby_2 = "e30000000006l",
++ cvbg_2 = "e3000000000el",
++ cvd_2 = "00004e000000j",
++ cvdy_2 = "e30000000026l",
++ cvdg_2 = "e3000000002el",
++ cuxtr_2 = "0000b3ea0000h",
++ cudtr_2 = "0000b3e20000h",
++ cu42_2 = "0000b9b30000h",
++ cu41_2 = "0000b9b20000h",
++ cpya_2 = "0000b24d0000h",
++ d_2 = "00005d000000j",
++ dr_2 = "000000001d00g",
++ dxbr_2 = "0000b34d0000h",
++ dxr_2 = "0000b22d0000h",
++ ddbr_2 = "0000b31d0000h",
++ dd_2 = "00006d000000j",
++ ddr_2 = "000000002d00g",
++ debr_2 = "0000b30d0000h",
++ de_2 = "00007d000000j",
++ der_2 = "000000003d00g",
++ dl_2 = "e30000000097l",
++ dlr_2 = "0000b9970000h",
++ dlg_2 = "e30000000087l",
++ dlgr_2 = "0000b9870000h",
++ dsg_2 = "e3000000000dl",
++ dsgr_2 = "0000b90d0000h",
++ dsgf_2 = "e3000000001dl",
++ dsgfr_2 = "0000b91d0000h",
++ x_2 = "000057000000j",
++ xr_2 = "000000001700g",
++ xy_2 = "e30000000057l",
++ xg_2 = "e30000000082l",
++ xgr_2 = "0000b9820000h",
++ xihf_2 = "c00600000000n",
++ xilf_2 = "c00700000000n",
++ ex_2 = "000044000000j",
++ exrl_2 = "c60000000000o",
++ ear_2 = "0000b24f0000h",
++ esea_2 = "0000b99d0000h",
++ eextr_2 = "0000b3ed0000h",
++ eedtr_2 = "0000b3e50000h",
++ ecag_3 = "eb000000004cs",
++ efpc_2 = "0000b38c0000h",
++ epar_2 = "0000b2260000h",
++ epair_2 = "0000b99a0000h",
++ epsw_2 = "0000b98d0000h",
++ esar_2 = "0000b2270000h",
++ esair_2 = "0000b99b0000h",
++ esxtr_2 = "0000b3ef0000h",
++ esdtr_2 = "0000b3e70000h",
++ ereg_2 = "0000b2490000h",
++ eregg_2 = "0000b90e0000h",
++ esta_2 = "0000b24a0000h",
++ flogr_2 = "0000b9830000h",
++ hdr_2 = "000000002400g",
++ her_2 = "000000003400g",
++ iac_2 = "0000b2240000h",
++ ic_2 = "000043000000j",
++ icy_2 = "e30000000073l",
++ icmh_3 = "eb0000000080t",
++ icm_3 = "0000bf000000r",
++ icmy_3 = "eb0000000081t",
++ iihf_2 = "c00800000000n",
++ iilf_2 = "c00900000000n",
++ ipm_2 = "0000b2220000h",
++ iske_2 = "0000b2290000h",
++ ivsk_2 = "0000b2230000h",
++ l_2 = "000058000000j",
++ lr_2 = "000000001800g",
++ ly_2 = "e30000000058l",
++ lg_2 = "e30000000004l",
++ lgr_2 = "0000b9040000h",
++ lgf_2 = "e30000000014l",
++ lgfr_2 = "0000b9140000h",
++ lxr_2 = "0000b3650000h",
++ ld_2 = "000068000000j",
++ ldr_2 = "000000002800g",
++ ldy_2 = "ed0000000065l",
++ le_2 = "000078000000j",
++ ler_2 = "000000003800g",
++ ley_2 = "ed0000000064l",
++ lam_3 = "00009a000000q",
++ lamy_3 = "eb000000009as",
++ la_2 = "000041000000j",
++ lay_2 = "e30000000071l",
++ lae_2 = "000051000000j",
++ laey_2 = "e30000000075l",
++ larl_2 = "c00000000000o",
++ laa_3 = "eb00000000f8s",
++ laag_3 = "eb00000000e8s",
++ laal_3 = "eb00000000fas",
++ laalg_3 = "eb00000000eas",
++ lan_3 = "eb00000000f4s",
++ lang_3 = "eb00000000e4s",
++ lax_3 = "eb00000000f7s",
++ laxg_3 = "eb00000000e7s",
++ lao_3 = "eb00000000f6s",
++ laog_3 = "eb00000000e6s",
++ lt_2 = "e30000000012l",
++ ltr_2 = "000000001200g",
++ ltg_2 = "e30000000002l",
++ ltgr_2 = "0000b9020000h",
++ ltgf_2 = "e30000000032l",
++ ltgfr_2 = "0000b9120000h",
++ ltxbr_2 = "0000b3420000h",
++ ltxtr_2 = "0000b3de0000h",
++ ltxr_2 = "0000b3620000h",
++ ltdbr_2 = "0000b3120000h",
++ ltdtr_2 = "0000b3d60000h",
++ ltdr_2 = "000000002200g",
++ ltebr_2 = "0000b3020000h",
++ lter_2 = "000000003200g",
++ lb_2 = "e30000000076l",
++ lbr_2 = "0000b9260000h",
++ lgb_2 = "e30000000077l",
++ lgbr_2 = "0000b9060000h",
++ lbh_2 = "e300000000c0l",
++ lcr_2 = "000000001300g",
++ lcgr_2 = "0000b9030000h",
++ lcgfr_2 = "0000b9130000h",
++ lcxbr_2 = "0000b3430000h",
++ lcxr_2 = "0000b3630000h",
++ lcdbr_2 = "0000b3130000h",
++ lcdr_2 = "000000002300g",
++ lcdfr_2 = "0000b3730000h",
++ lcebr_2 = "0000b3030000h",
++ lcer_2 = "000000003300g",
++ lctl_3 = "0000b7000000q",
++ lctlg_3 = "eb000000002fs",
++ fixr_2 = "0000b3670000h",
++ fidr_2 = "0000b37f0000h",
++ fier_2 = "0000b3770000h",
++ ldgr_2 = "0000b3c10000h",
++ lgdr_2 = "0000b3cd0000h",
++ lh_2 = "000048000000j",
++ lhr_2 = "0000b9270000h",
++ lhy_2 = "e30000000078l",
++ lgh_2 = "e30000000015l",
++ lghr_2 = "0000b9070000h",
++ lhh_2 = "e300000000c4l",
++ lhrl_2 = "c40500000000o",
++ lghrl_2 = "c40400000000o",
++ lfh_2 = "e300000000cal",
++ lgfi_2 = "c00100000000n",
++ lxdbr_2 = "0000b3050000h",
++ lxdr_2 = "0000b3250000h",
++ lxebr_2 = "0000b3060000h",
++ lxer_2 = "0000b3260000h",
++ ldebr_2 = "0000b3040000h",
++ lder_2 = "0000b3240000h",
++ llgf_2 = "e30000000016l",
++ llgfr_2 = "0000b9160000h",
++ llc_2 = "e30000000094l",
++ llcr_2 = "0000b9940000h",
++ llgc_2 = "e30000000090l",
++ llgcr_2 = "0000b9840000h",
++ llch_2 = "e300000000c2l",
++ llh_2 = "e30000000095l",
++ llhr_2 = "0000b9950000h",
++ llgh_2 = "e30000000091l",
++ llghr_2 = "0000b9850000h",
++ llhh_2 = "e300000000c6l",
++ llhrl_2 = "c40200000000o",
++ llghrl_2 = "c40600000000o",
++ llihf_2 = "c00e00000000n",
++ llilf_2 = "c00f00000000n",
++ llgfrl_2 = "c40e00000000o",
++ llgt_2 = "e30000000017l",
++ llgtr_2 = "0000b9170000h",
++ lm_3 = "000098000000q",
++ lmy_3 = "eb0000000098s",
++ lmg_3 = "eb0000000004s",
++ lmh_3 = "eb0000000096s",
++ lnr_2 = "000000001100g",
++ lngr_2 = "0000b9010000h",
++ lngfr_2 = "0000b9110000h",
++ lnxbr_2 = "0000b3410000h",
++ lnxr_2 = "0000b3610000h",
++ lndbr_2 = "0000b3110000h",
++ lndr_2 = "000000002100g",
++ lndfr_2 = "0000b3710000h",
++ lnebr_2 = "0000b3010000h",
++ lner_2 = "000000003100g",
++ loc_3 = "eb00000000f2t",
++ locg_3 = "eb00000000e2t",
++ lpq_2 = "e3000000008fl",
++ lpr_2 = "000000001000g",
++ lpgr_2 = "0000b9000000h",
++ lpgfr_2 = "0000b9100000h",
++ lpxbr_2 = "0000b3400000h",
++ lpxr_2 = "0000b3600000h",
++ lpdbr_2 = "0000b3100000h",
++ lpdr_2 = "000000002000g",
++ lpdfr_2 = "0000b3700000h",
++ lpebr_2 = "0000b3000000h",
++ lper_2 = "000000003000g",
++ lra_2 = "0000b1000000j",
++ lray_2 = "e30000000013l",
++ lrag_2 = "e30000000003l",
++ lrl_2 = "c40d00000000o",
++ lgrl_2 = "c40800000000o",
++ lgfrl_2 = "c40c00000000o",
++ lrvh_2 = "e3000000001fl",
++ lrv_2 = "e3000000001el",
++ lrvr_2 = "0000b91f0000h",
++ lrvg_2 = "e3000000000fl",
++ lrvgr_2 = "0000b90f0000h",
++ ldxbr_2 = "0000b3450000h",
++ ldxr_2 = "000000002500g",
++ lrdr_2 = "000000002500g",
++ lexbr_2 = "0000b3460000h",
++ lexr_2 = "0000b3660000h",
++ ledbr_2 = "0000b3440000h",
++ ledr_2 = "000000003500g",
++ lrer_2 = "000000003500g",
++ lura_2 = "0000b24b0000h",
++ lurag_2 = "0000b9050000h",
++ lzxr_2 = "0000b3760000h",
++ lzdr_2 = "0000b3750000h",
++ lzer_2 = "0000b3740000h",
++ msta_2 = "0000b2470000h",
++ mvcl_2 = "000000000e00g",
++ mvcle_3 = "0000a8000000q",
++ mvclu_3 = "eb000000008es",
++ mvpg_2 = "0000b2540000h",
++ mvst_2 = "0000b2550000h",
++ m_2 = "00005c000000j",
++ mfy_2 = "e3000000005cl",
++ mr_2 = "000000001c00g",
++ mxbr_2 = "0000b34c0000h",
++ mxr_2 = "000000002600g",
++ mdbr_2 = "0000b31c0000h",
++ md_2 = "00006c000000j",
++ mdr_2 = "000000002c00g",
++ mxdbr_2 = "0000b3070000h",
++ mxd_2 = "000067000000j",
++ mxdr_2 = "000000002700g",
++ meebr_2 = "0000b3170000h",
++ meer_2 = "0000b3370000h",
++ mdebr_2 = "0000b30c0000h",
++ mde_2 = "00007c000000j",
++ mder_2 = "000000003c00g",
++ me_2 = "00007c000000j",
++ mer_2 = "000000003c00g",
++ mh_2 = "00004c000000j",
++ mhy_2 = "e3000000007cl",
++ mlg_2 = "e30000000086l",
++ mlgr_2 = "0000b9860000h",
++ ml_2 = "e30000000096l",
++ mlr_2 = "0000b9960000h",
++ ms_2 = "000071000000j",
++ msr_2 = "0000b2520000h",
++ msy_2 = "e30000000051l",
++ msg_2 = "e3000000000cl",
++ msgr_2 = "0000b90c0000h",
++ msgf_2 = "e3000000001cl",
++ msgfr_2 = "0000b91c0000h",
++ msfi_2 = "c20100000000n",
++ msgfi_2 = "c20000000000n",
++ o_2 = "000056000000j",
++ or_2 = "000000001600g",
++ oy_2 = "e30000000056l",
++ og_2 = "e30000000081l",
++ ogr_2 = "0000b9810000h",
++ oihf_2 = "c00c00000000n",
++ oilf_2 = "c00d00000000n",
++ pgin_2 = "0000b22e0000h",
++ pgout_2 = "0000b22f0000h",
++ pcc_2 = "0000b92c0000h",
++ pckmo_2 = "0000b9280000h",
++ pfmf_2 = "0000b9af0000h",
++ ptf_2 = "0000b9a20000h",
++ popcnt_2 = "0000b9e10000h",
++ pfd_2 = "e30000000036m",
++ pfdrl_2 = "c60200000000p",
++ pt_2 = "0000b2280000h",
++ pti_2 = "0000b99e0000h",
++ palb_2 = "0000b2480000h",
++ rrbe_2 = "0000b22a0000h",
++ rrbm_2 = "0000b9ae0000h",
++ rll_3 = "eb000000001ds",
++ rllg_3 = "eb000000001cs",
++ srst_2 = "0000b25e0000h",
++ srstu_2 = "0000b9be0000h",
++ sar_2 = "0000b24e0000h",
++ sfpc_2 = "0000b3840000h",
++ sfasr_2 = "0000b3850000h",
++ spm_2 = "00000000400g",
++ ssar_2 = "0000b2250000h",
++ ssair_2 = "0000b99f0000h",
++ slda_3 = "00008f000000q",
++ sldl_3 = "00008d000000q",
++ sla_3 = "00008b000000q",
++ slak_3 = "eb00000000dds",
++ slag_3 = "eb000000000bs",
++ sll_3 = "000089000000q",
++ sllk_3 = "eb00000000dfs",
++ sllg_3 = "eb000000000ds",
++ srda_3 = "00008e000000q",
++ srdl_3 = "00008c000000q",
++ sra_3 = "00008a000000q",
++ srak_3 = "eb00000000dcs",
++ srag_3 = "eb000000000as",
++ srl_3 = "000088000000q",
++ srlk_3 = "eb00000000des",
++ srlg_3 = "eb000000000cs",
++ sqxbr_2 = "0000b3160000h",
++ sqxr_2 = "0000b3360000h",
++ sqdbr_2 = "0000b3150000h",
++ sqdr_2 = "0000b2440000h",
++ sqebr_2 = "0000b3140000h",
++ sqer_2 = "0000b2450000h",
++ st_2 = "000050000000j",
++ sty_2 = "e30000000050l",
++ stg_2 = "e30000000024l",
++ std_2 = "000060000000j",
++ stdy_2 = "ed0000000067l",
++ ste_2 = "000070000000j",
++ stey_2 = "ed0000000066l",
++ stam_3 = "00009b000000q",
++ stamy_3 = "eb000000009bs",
++ stc_2 = "000042000000j",
++ stcy_2 = "e30000000072l",
++ stch_2 = "e300000000c3l",
++ stcmh_3 = "eb000000002ct",
++ stcm_3 = "0000be000000r",
++ stcmy_3 = "eb000000002dt",
++ stctl_3 = "0000b6000000q",
++ stctg_3 = "eb0000000025s",
++ sth_2 = "000040000000j",
++ sthy_2 = "e30000000070l",
++ sthh_2 = "e300000000c7l",
++ sthrl_2 = "c40700000000o",
++ stfh_2 = "e300000000cbl",
++ stm_3 = "000090000000q",
++ stmy_3 = "eb0000000090s",
++ stmg_3 = "eb0000000024s",
++ stmh_3 = "eb0000000026s",
++ stoc_3 = "eb00000000f3t",
++ stocg_3 = "eb00000000e3t",
++ stpq_2 = "e3000000008el",
++ strl_2 = "c40f00000000o",
++ stgrl_2 = "c40b00000000o",
++ strvh_2 = "e3000000003fl",
++ strv_2 = "e3000000003el",
++ strvg_2 = "e3000000002fl",
++ stura_2 = "0000b2460000h",
++ sturg_2 = "0000b9250000h",
++ s_2 = "00005b000000j",
++ sr_2 = "000000001b00g",
++ sy_2 = "e3000000005bl",
++ sg_2 = "e30000000009l",
++ sgr_2 = "0000b9090000h",
++ sgf_2 = "e30000000019l",
++ sgfr_2 = "0000b9190000h",
++ sxbr_2 = "0000b34b0000h",
++ sdbr_2 = "0000b31b0000h",
++ sebr_2 = "0000b30b0000h",
++ sh_2 = "00004b000000j",
++ shy_2 = "e3000000007bl",
++ sl_2 = "00005f000000j",
++ slr_2 = "000000001f00g",
++ sly_2 = "e3000000005fl",
++ slg_2 = "e3000000000bl",
++ slgr_2 = "0000b90b0000h",
++ slgf_2 = "e3000000001bl",
++ slgfr_2 = "0000b91b0000h",
++ slfi_2 = "c20500000000n",
++ slgfi_2 = "c20400000000n",
++ slb_2 = "e30000000099l",
++ slbr_2 = "0000b9990000h",
++ slbg_2 = "e30000000089l",
++ slbgr_2 = "0000b9890000h",
++ sxr_2 = "000000003700g",
++ sd_2 = "00006b000000j",
++ sdr_2 = "000000002b00g",
++ se_2 = "00007b000000j",
++ ser_2 = "000000003b00g",
++ su_2 = "00007f000000j",
++ sur_2 = "000000003f00g",
++ sw_2 = "00006f000000j",
++ swr_2 = "000000002f00g",
++ tar_2 = "0000b24c0000h",
++ tb_2 = "0000b22c0000h",
++ trace_3 = "000099000000q",
++ tracg_3 = "eb000000000fs",
++ tre_2 = "0000b2a50000h",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+ -- TODO: replace 'B' with correct encoding.
+ -- brc
+- map_op["j"..cond.."_1"] = "00000000"..tohex(0xa7040000+shl(c, 20)).."w"
++ map_op["j"..cond.."_1"] = "0000"..tohex(0xa7040000+shl(c, 20)).."w"
+ -- brcl
+- map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c, 4)).."00000000".."x"
++ map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c, 20)).."0000".."x"
+ -- bc
+- map_op["b"..cond.."_1"] = "00000000"..tohex(0x47000000+shl(c, 20)).."y"
++ map_op["b"..cond.."_1"] = "0000"..tohex(0x47000000+shl(c, 20)).."y"
+ -- bcr
+- map_op["b"..cond.."r_1"] = "00000000"..tohex(0x0700+shl(c, 4)).."z"
++ map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c, 4)).."z"
+ end
+ ------------------------------------------------------------------------------
+ -- Handle opcodes defined with template strings.
+ local function parse_template(params, template, nparams, pos)
+ -- Read the template in 16-bit chunks.
+ -- Leading halfword zeroes should not be written out.
+- local op0 = tonumber(sub(template, 5, 8), 16)
+- local op1 = tonumber(sub(template, 9, 12), 16)
+- local op2 = tonumber(sub(template, 13, 16), 16)
++ local op0 = tonumber(sub(template, 1, 4), 16)
++ local op1 = tonumber(sub(template, 5, 8), 16)
++ local op2 = tonumber(sub(template, 9, 12), 16)
+
+ local n,rs = 1,26
+
+@@ -1031,7 +1031,7 @@ local function parse_template(params, template, nparams, pos)
+ -- oorr iiii 00oo
+ -- This should be emitted as oorr, followed by the immediate action, followed by
+ -- 00oo.
+- for p in gmatch(sub(template, 17), ".") do
++ for p in gmatch(sub(template, 13), ".") do
+ local pr1,pr2,pr3
+ if p == "g" then
+ pr1,pr2=params[n],params[n+1]
+
+From be3efbc65a077e35a9ea7096af49cf4c161a46a8 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 5 Dec 2016 14:51:48 -0500
+Subject: [PATCH 069/260] Add simple tests for RX and RXY style instructions.
+
+Also changed the test function signature to int64_t fn(int64_t, int64_t)
+to make it easier to test 64-bit operations.
+---
+ dynasm/Examples/test_z_inst.c | 48 ++++++++++++++++++++++++++++-------
+ 1 file changed, 39 insertions(+), 9 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index ed20ea657..7259638bd 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -33,18 +33,48 @@ static void mul(dasm_State *state)
+ | br r14
+ }
+
++static void rx(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ int x = 1;
++ int y = 4095;
++
++ | la r4, 4095(r2, r3)
++ | la r5, 4095(r4)
++ | la r1, x(r5)
++ | la r2, y(r1, r0)
++ | br r14
++}
++
++static void rxy(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ int x = -524287;
++ int y = 524286;
++
++ | lay r4, -524288(r2, r3)
++ | lay r5, 524287(r4)
++ | lay r1, x(r5)
++ | lay r2, y(r1, r0)
++ | br r14
++}
++
+ typedef struct {
+- int arg1;
+- int arg2;
++ int64_t arg1;
++ int64_t arg2;
+ void (*fn)(dasm_State *);
+- int want;
++ int64_t want;
+ const char *testname;
+ } test_table;
+
+ test_table test[] = {
+- { 1, 2, add, 3, "add"},
+- {10, 5, sub, 5, "sub"},
+- { 2, 3, mul, 6, "mul"}
++ { 1, 2, add, 3, "add"},
++ {10, 5, sub, 5, "sub"},
++ { 2, 3, mul, 6, "mul"},
++ { 5, 7, rx, 12298, "rx"},
++ { 5, 7, rxy, 10, "rxy"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+@@ -69,11 +99,11 @@ int main(int argc, char *argv[])
+ dasm_setup(&state, actions);
+ test[i].fn(state);
+ size_t size;
+- int (*fptr)(int, int) = jitcode(&state, &size);
+- int got = fptr(test[i].arg1, test[i].arg2);
++ int64_t (*fptr)(int64_t, int64_t) = jitcode(&state, &size);
++ int64_t got = fptr(test[i].arg1, test[i].arg2);
+
+ if (got != test[i].want) {
+- fprintf(stderr, "FAIL: test %s: want %d, got %d\n", test[i].testname, test[i].want, got);
++ fprintf(stderr, "FAIL: test %s: want %ld, got %ld\n", test[i].testname, test[i].want, got);
+ exit(1);
+ }
+ munmap(fptr, size);
+
+From 410bdb81823ff0fddff39bd1f43e22ec4c6801cc Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 5 Dec 2016 14:57:53 -0500
+Subject: [PATCH 070/260] Minor cleanup of dasm_s390x.lua.
+
+---
+ dynasm/dasm_s390x.lua | 30 ++++++------------------------
+ 1 file changed, 6 insertions(+), 24 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 6416438e6..5abfe3bff 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -239,8 +239,6 @@ local map_cond = {
+
+ ------------------------------------------------------------------------------
+
+-local parse_reg_type
+-
+ local function parse_gpr(expr)
+ local r = match(expr, "^r(1?[0-9])$")
+ if r then
+@@ -1016,30 +1014,14 @@ local function parse_template(params, template, nparams, pos)
+ local op1 = tonumber(sub(template, 5, 8), 16)
+ local op2 = tonumber(sub(template, 9, 12), 16)
+
+- local n,rs = 1,26
+-
+- parse_reg_type = false
+ -- Process each character.
+- -- TODO
+- -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
+- -- one of two locations relative to the end of the instruction.
+- -- To make decoding easier we should insert the actions for these immediately
+- -- after the halfword they modify.
+- -- For example, take the instruction ahik, which is laid out as follows (each
+- -- char is 4 bits):
+- -- o = op code, r = register, i = immediate
+- -- oorr iiii 00oo
+- -- This should be emitted as oorr, followed by the immediate action, followed by
+- -- 00oo.
+ for p in gmatch(sub(template, 13), ".") do
+ local pr1,pr2,pr3
+ if p == "g" then
+- pr1,pr2=params[n],params[n+1]
+- op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
++ op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
+ wputhw(op2)
+ elseif p == "h" then
+- pr1,pr2=params[n],params[n+1]
+- op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
++ op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
+ wputhw(op1); wputhw(op2)
+ elseif p == "j" then
+ local d, x, b, a = parse_mem_bx(params[2])
+@@ -1067,20 +1049,20 @@ local function parse_template(params, template, nparams, pos)
+ op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+- if a then a() end
++ if a then a() end -- a() emits action.
+ elseif p == "s" then
+ local d, b, a = parse_mem_by(params[3])
+ op0 = op0 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+- if a then a() end
++ if a then a() end -- a() emits action.
+ elseif p == "y" then
+ local d, x, b, a = parse_mem_bx(params[1])
+ op1 = op1 + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+- if a then a() end
++ if a then a() end -- a() emits action.
+ elseif p == "z" then
+ op2 = op2 + parse_gpr(params[1])
+ wputhw(op2)
+@@ -1166,7 +1148,7 @@ map_op[".align_1"] = function(params)
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+- waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
++ waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1.
+ return
+ end
+ end
+
+From 73ad6dc77dd2d6a5dff43cf5c5e0e58c8eba6eed Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 5 Dec 2016 17:21:49 -0500
+Subject: [PATCH 071/260] Add support for jumps to local labels.
+
+Currently limited to 16-bits ONLY.
+
+Allows code like:
+
+|1:
+| ...
+| j <1
+---
+ dynasm/Examples/test_z_inst.c | 23 ++++++++++++++++++++---
+ dynasm/dasm_s390x.h | 15 ++++++++-------
+ dynasm/dasm_s390x.lua | 10 ++++++++++
+ 3 files changed, 38 insertions(+), 10 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 7259638bd..ad8e6a002 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -8,6 +8,7 @@
+ // DynASM directives.
+ |.arch s390x
+ |.actionlist actions
++|.globals lab_
+
+ static void add(dasm_State *state)
+ {
+@@ -61,6 +62,20 @@ static void rxy(dasm_State *state)
+ | br r14
+ }
+
++static void lab(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
++ | la r1, 0(r0)
++ |1:
++ | agr r2, r2
++ | la r1, 1(r1)
++ | cgr r1, r3
++ | jl <1
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -74,7 +89,8 @@ test_table test[] = {
+ {10, 5, sub, 5, "sub"},
+ { 2, 3, mul, 6, "mul"},
+ { 5, 7, rx, 12298, "rx"},
+- { 5, 7, rxy, 10, "rxy"}
++ { 5, 7, rxy, 10, "rxy"},
++ { 2, 4, lab, 32, "lab"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+@@ -93,9 +109,10 @@ static void *jitcode(dasm_State **state, size_t *size)
+ int main(int argc, char *argv[])
+ {
+ dasm_State *state;
+-
+- for(int i=0; i < sizeof(test)/sizeof(test[0]); i++) {
++ for(int i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
+ dasm_init(&state, 1);
++ void* labels[lab__MAX];
++ dasm_setupglobal(&state, labels, lab__MAX);
+ dasm_setup(&state, actions);
+ test[i].fn(state);
+ size_t size;
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index e2cd51973..8fc4cd633 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -219,7 +219,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ b[pos++] = ofs;
+ break;
+ case DASM_REL_LG:
+- n = (ins & 2047) - 10;
++ n = *p++ - 10;
+ pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+ if (n >= 0) {
+@@ -247,7 +247,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+- pl = D->lglabels + (ins & 2047) - 10;
++ pl = D->lglabels + *p++ - 10;
+ CKPL(lg, LG);
+ goto putlabel;
+ case DASM_LABEL_PC:
+@@ -262,6 +262,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
++ ofs += 2;
+ break;
+ case DASM_IMM16:
+ CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
+@@ -348,10 +349,12 @@ int dasm_link(Dst_DECL, size_t * szp)
+ break;
+ case DASM_REL_LG:
+ case DASM_REL_PC:
++ p++;
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ case DASM_LABEL_PC:
++ p++;
+ b[pos++] += ofs;
+ break;
+ case DASM_IMM16:
+@@ -421,13 +424,11 @@ int dasm_encode(Dst_DECL, void *buffer)
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+ patchrel:
+- CK((n & 3) == 0 &&
+- (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
+- ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
+- cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
++ *cp++ = n/2; /* TODO: only 16-bit relative jump currently works. */
++ p++; /* skip argument */
+ break;
+ case DASM_LABEL_LG:
+- ins &= 2047;
++ ins = *p++;
+ if (ins >= 20)
+ D->globals[ins - 10] = (void *)(base + n);
+ break;
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 5abfe3bff..0625d5fc1 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1057,6 +1057,14 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+ if a then a() end -- a() emits action.
++ elseif p == "w" then
++ local mode, n, s = parse_label(params[1])
++ wputhw(op1)
++ waction("REL_"..mode, n, s)
++ elseif p == "x" then
++ local mode, n, s = parse_label(params[1])
++ wputhw(op0)
++ waction("REL_"..mode, n, s)
+ elseif p == "y" then
+ local d, x, b, a = parse_mem_bx(params[1])
+ op1 = op1 + x
+@@ -1066,6 +1074,8 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "z" then
+ op2 = op2 + parse_gpr(params[1])
+ wputhw(op2)
++ else
++ werror("unrecognized encoding")
+ end
+ end
+
+
+From 2156278508c72714fd1eb229e7409ba660fbcc44 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 6 Dec 2016 12:23:51 +0530
+Subject: [PATCH 072/260] Updated the addressing mode working
+
+The case where immediate was passed as label was not covered initially, so updated it
+---
+ dynasm/dasm_s390x.lua | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 0625d5fc1..7ed35f3b7 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -370,10 +370,15 @@ end
+
+ local function parse_imm(arg)
+ local imm_val = tonumber(arg,16)
+- if not is_int32(imm_val) then
+- werror("Immediate value out of range: ", imm_val)
++ if imm_val then
++ if not is_int32(imm_val) then
++ werror("Immediate value out of range: ", imm_val)
++ end
++ wputhw(band(shr(imm_val, 16), 0xffff));
++ wputhw(band(imm_val, 0xffff));
++ else
++ waction("IMM32", nil, arg) -- if we get label
+ end
+- return imm_val
+ end
+
+ local function parse_label(label, def)
+@@ -1042,8 +1047,8 @@ local function parse_template(params, template, nparams, pos)
+
+ elseif p == "n" then
+ op0 = op0 + shl(parse_gpr(params[1]), 4)
+- local imm = parse_imm(params[2])
+- wputhw(op0); waction("IMM32", nil, imm)
++ wputhw(op0);
++ parse_imm(params[2])
+ elseif p == "q" then
+ local d, b, a = parse_mem_b(params[3])
+ op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
+
+From 7c79bbc76839e9ffe4e1a5439d7e273ebf03d5c1 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Tue, 6 Dec 2016 14:11:30 +0530
+Subject: [PATCH 073/260] Update test_z_inst.c
+
+Added test case for add immediate 16 bits RI-a
+Added test case for add immediate 32 bits RIL-a
+---
+ dynasm/Examples/test_z_inst.c | 31 +++++++++++++++++++++++++------
+ 1 file changed, 25 insertions(+), 6 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index ad8e6a002..dbb50eb51 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -76,6 +76,23 @@ static void lab(dasm_State *state)
+ | br r14
+ }
+
++static void add_imm16(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | ahi r2 , 0xf
++ | br r14
++}
++
++
++static void add_imm32(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | afi r2 , 0xe
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -85,12 +102,14 @@ typedef struct {
+ } test_table;
+
+ test_table test[] = {
+- { 1, 2, add, 3, "add"},
+- {10, 5, sub, 5, "sub"},
+- { 2, 3, mul, 6, "mul"},
+- { 5, 7, rx, 12298, "rx"},
+- { 5, 7, rxy, 10, "rxy"},
+- { 2, 4, lab, 32, "lab"}
++ { 1, 2, add, 3, "add"},
++ {10, 5, sub, 5, "sub"},
++ { 2, 3, mul, 6, "mul"},
++ { 5, 7, rx, 12298, "rx"},
++ { 5, 7, rxy, 10, "rxy"},
++ { 2, 4, lab, 32, "lab"},
++ { 2, 0, add_imm16,17, "imm16"}
++ { 2, 0, add_imm32,16, "imm32"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From ba4343d9d12f4aa5b64d75e053965c160c7a2948 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 6 Dec 2016 19:17:54 +0530
+Subject: [PATCH 074/260] Added support for Immediate mode 16 bit
+
+Currently only afi instruction is encoded, will be adding other instructions too. For encoding we are running out of characters so was planning to append the complete modes (RXa or rxa) which one do you think is better, and also thinking of just adding remaining instruction modes as well, which we don't support as of now. Let me know if you want me to add those, or we will wait for sometime before we add those.
+---
+ dynasm/dasm_s390x.lua | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 7ed35f3b7..9853aaca3 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -291,6 +291,9 @@ local function is_int32(num)
+ return -2147483648 <= num and num < 2147483648
+ end
+
++local function_is_int16(num)
++ return -32768 <= num and num < 32768
++
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+ -- If x is not specified then it is 0.
+ local function split_memop(arg)
+@@ -381,6 +384,18 @@ local function parse_imm(arg)
+ end
+ end
+
++local function parse_imm16(arg)
++ local imm_val = tonumber(arg,16)
++ if imm_val then
++ if not is_int16(imm_val) then
++ werror("Immediate value out of range: ", imm_val)
++ end
++ wputhw(imm_val)
++ else
++ waction("IMM16", nil, arg)
++ end
++end
++
+ local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+@@ -449,6 +464,7 @@ map_op = {
+ adbr_2 = "0000b31a0000h",
+ aebr_2 = "0000b30a0000h",
+ ah_2 = "00004a000000j",
++ ahi_2 = "0000a70a0000i",
+ ahy_2 = "e3000000007al",
+ afi_2 = "c20900000000n",
+ agfi_2 = "c20800000000n",
+@@ -1028,6 +1044,10 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "h" then
+ op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
+ wputhw(op1); wputhw(op2)
++ else if p == "i" then
++ op1 = op1 + shl(parse_gpr(params[1]),4)
++ wputhw(op1);
++ parse_imm16(params[2])
+ elseif p == "j" then
+ local d, x, b, a = parse_mem_bx(params[2])
+ op1 = op1 + shl(parse_gpr(params[1]), 4) + x
+
+From 1362e9aee2213437b1b6b0636634f2245b6a3ef4 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 6 Dec 2016 19:34:47 +0530
+Subject: [PATCH 075/260] Minor Cleanup
+
+---
+ dynasm/dasm_s390x.lua | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 9853aaca3..9a77d4e6a 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -291,8 +291,9 @@ local function is_int32(num)
+ return -2147483648 <= num and num < 2147483648
+ end
+
+-local function_is_int16(num)
++local function is_int16(num)
+ return -32768 <= num and num < 32768
++end
+
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+ -- If x is not specified then it is 0.
+@@ -1044,7 +1045,7 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "h" then
+ op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
+ wputhw(op1); wputhw(op2)
+- else if p == "i" then
++ elseif p == "i" then
+ op1 = op1 + shl(parse_gpr(params[1]),4)
+ wputhw(op1);
+ parse_imm16(params[2])
+
+From b24d490a42390d9d83b4b523bb92462098aac1f2 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 09:16:30 -0500
+Subject: [PATCH 076/260] Fix test table (needed comma at end of line to
+ compile).
+
+Also, re-align table columns.
+---
+ dynasm/Examples/test_z_inst.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index dbb50eb51..547a1c469 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -102,14 +102,14 @@ typedef struct {
+ } test_table;
+
+ test_table test[] = {
+- { 1, 2, add, 3, "add"},
+- {10, 5, sub, 5, "sub"},
+- { 2, 3, mul, 6, "mul"},
+- { 5, 7, rx, 12298, "rx"},
+- { 5, 7, rxy, 10, "rxy"},
+- { 2, 4, lab, 32, "lab"},
+- { 2, 0, add_imm16,17, "imm16"}
+- { 2, 0, add_imm32,16, "imm32"}
++ { 1, 2, add, 3, "add"},
++ {10, 5, sub, 5, "sub"},
++ { 2, 3, mul, 6, "mul"},
++ { 5, 7, rx, 12298, "rx"},
++ { 5, 7, rxy, 10, "rxy"},
++ { 2, 4, lab, 32, "lab"},
++ { 2, 0, add_imm16, 17, "imm16"},
++ { 2, 0, add_imm32, 16, "imm32"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From e3ab67aed48b5c05ae0ac3f107df70262a7c9552 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 11:56:37 -0500
+Subject: [PATCH 077/260] Support forward local branches.
+
+---
+ dynasm/Examples/test_z_inst.c | 18 ++++++++++++++++++
+ dynasm/dasm_s390x.h | 18 ++++++++++++++----
+ dynasm/dasm_s390x.lua | 9 ++++++++-
+ 3 files changed, 40 insertions(+), 5 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 547a1c469..3938a741a 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -76,6 +76,23 @@ static void lab(dasm_State *state)
+ | br r14
+ }
+
++static void labg(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
++ | la r1, 0(r0)
++ |1:
++ | agr r2, r2
++ | la r1, 1(r1)
++ | cgr r1, r3
++ | jgl <1
++ | jgnl >1
++ | stg r0, 0(r0)
++ |1:
++ | br r14
++}
++
+ static void add_imm16(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -108,6 +125,7 @@ test_table test[] = {
+ { 5, 7, rx, 12298, "rx"},
+ { 5, 7, rxy, 10, "rxy"},
+ { 2, 4, lab, 32, "lab"},
++ { 2, 4, labg, 32, "labg"},
+ { 2, 0, add_imm16, 17, "imm16"},
+ { 2, 0, add_imm32, 16, "imm32"}
+ };
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 8fc4cd633..18a7338e2 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -210,7 +210,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ goto stop;
+ case DASM_ESC:
+ p++;
+- ofs += 4;
++ ofs += 2;
+ break;
+ case DASM_REL_EXT:
+ break;
+@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
++ ofs += 2;
++ if (p[-3] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
++ ofs += 2;
++ }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+@@ -262,7 +266,6 @@ void dasm_put(Dst_DECL, int start, ...)
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+- ofs += 2;
+ break;
+ case DASM_IMM16:
+ CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
+@@ -424,8 +427,15 @@ int dasm_encode(Dst_DECL, void *buffer)
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+ patchrel:
+- *cp++ = n/2; /* TODO: only 16-bit relative jump currently works. */
+- p++; /* skip argument */
++ /* Offsets are halfword aligned (so need to be halved). */
++ n += 2; /* Offset is relative to start of instruction. */
++ if (cp[-1] >> 12 == 0xc) {
++ *cp++ = n >> 17;
++ } else {
++ CK(-(1 << 16) <= n && n < (1 << 16) && n & 1 == 0, RANGE_LG);
++ }
++ *cp++ = n >> 1;
++ p++; /* skip argument */
+ break;
+ case DASM_LABEL_LG:
+ ins = *p++;
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 9a77d4e6a..847a02e75 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -75,6 +75,13 @@ local function dumpactions(out)
+ out:write("\n")
+ end
+
++local function havearg(a)
++ return a == "ESC" or
++ a == "SECTION" or
++ a == "REL_LG" or
++ a == "LABEL_LG"
++end
++
+ -- Write action list buffer as a huge static C array.
+ local function writeactions(out, name)
+ local nn = #actlist
+@@ -87,7 +94,7 @@ local function writeactions(out, name)
+ local name = action_names[actlist[i]+1]
+ if not esc and name then
+ assert(out:write(" /* ", name, " */"))
+- esc = name == "ESC" or name == "SECTION"
++ esc = havearg(name)
+ else
+ esc = false
+ end
+
+From 53b627b21cc02fa0687ac6d1eeeb4c972ca593b0 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 12:47:23 -0500
+Subject: [PATCH 078/260] s/SP/sp/ in vm_s390x.dasc.
+
+We support the pseudo-register sp now.
+---
+ src/vm_s390x.dasc | 41 ++++++++++++++++++++---------------------
+ 1 file changed, 20 insertions(+), 21 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 72fe5d26f..e639159c8 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -55,41 +55,40 @@
+ |
+ |.define CRET1, r2
+ |
+-|.define SP, r15
+ |.define OP, r2
+ |.define TMP1, r3
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+-|.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
++|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
+ |
+ |// Register save area.
+-|.define SAVE_GPRS, 288(SP) // Save area for r6-r15 (10*8 bytes).
++|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
+ |
+ |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
+-|.define SAVE_ERRF, 280(SP) // Argument 4, in r5.
+-|.define SAVE_NRES, 272(SP) // Argument 3, in r4.
+-|.define SAVE_CFRAME, 264(SP) // Argument 2, in r3.
+-|.define SAVE_L, 256(SP) // Argument 1, in r2.
+-|.define RESERVED, 248(SP) // Reserved for compiler use.
+-|.define BACKCHAIN, 240(SP) // <- SP entering interpreter.
++|.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
++|.define SAVE_NRES, 272(sp) // Argument 3, in r4.
++|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
++|.define SAVE_L, 256(sp) // Argument 1, in r2.
++|.define RESERVED, 248(sp) // Reserved for compiler use.
++|.define BACKCHAIN, 240(sp) // <- sp entering interpreter.
+ |
+ |// Interpreter stack frame.
+-|.define SAVE_FPR15, 232(SP)
+-|.define SAVE_FPR14, 224(SP)
+-|.define SAVE_FPR13, 216(SP)
+-|.define SAVE_FPR12, 208(SP)
+-|.define SAVE_FPR11, 200(SP)
+-|.define SAVE_FPR10, 192(SP)
+-|.define SAVE_FPR9, 184(SP)
+-|.define SAVE_FPR8, 176(SP)
+-|.define SAVE_PC, 168(SP)
+-|.define SAVE_MULTRES, 160(SP)
++|.define SAVE_FPR15, 232(sp)
++|.define SAVE_FPR14, 224(sp)
++|.define SAVE_FPR13, 216(sp)
++|.define SAVE_FPR12, 208(sp)
++|.define SAVE_FPR11, 200(sp)
++|.define SAVE_FPR10, 192(sp)
++|.define SAVE_FPR9, 184(sp)
++|.define SAVE_FPR8, 176(sp)
++|.define SAVE_PC, 168(sp)
++|.define SAVE_MULTRES, 160(sp)
+ |
+ |// Callee save area (allocated by interpreter).
+-|.define CALLEESAVE 000(SP) // <- SP in interpreter.
++|.define CALLEESAVE, 000(sp) // <- sp in interpreter.
+ |
+ |.macro saveregs
+-| lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
++| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+ | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
+ | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | std f9, SAVE_FPR9
+
+From 3ae1c4fd6b43ea89f9cac2b8634756bfb3864251 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 13:15:29 -0500
+Subject: [PATCH 079/260] Support floating point register arguments.
+
+It would be nice to verify that floating-point/general-purpose
+registers are indeed expected by the instruction, but for now treat
+them both the same so we can use floating-point instructions.
+---
+ dynasm/Examples/test_z_inst.c | 65 +++++++++++++++++++++++++++++++++--
+ dynasm/dasm_s390x.lua | 35 +++++++------------
+ 2 files changed, 76 insertions(+), 24 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 3938a741a..b54c383f4 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -101,7 +101,6 @@ static void add_imm16(dasm_State *state)
+ | br r14
+ }
+
+-
+ static void add_imm32(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -110,6 +109,67 @@ static void add_imm32(dasm_State *state)
+ | br r14
+ }
+
++static void save(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ |.define CFRAME_SPACE, 224 // Delta for sp, 8 byte aligned.
++ |
++ |// Register save area.
++ |.define SAVE_GPRS, 264(sp) // Save area for r6-r15 (10*8 bytes).
++ |
++ |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
++ |.define RESERVED, 232(sp) // Reserved for compiler use.
++ |.define BACKCHAIN, 224(sp)
++ |
++ |// Current stack frame.
++ |.define SAVE_FPR15, 216(sp)
++ |.define SAVE_FPR14, 208(sp)
++ |.define SAVE_FPR13, 200(sp)
++ |.define SAVE_FPR12, 192(sp)
++ |.define SAVE_FPR11, 184(sp)
++ |.define SAVE_FPR10, 176(sp)
++ |.define SAVE_FPR9, 168(sp)
++ |.define SAVE_FPR8, 160(sp)
++ |
++ |// Callee save area.
++ |.define CALLEESAVE, 000(sp)
++ |
++ |.macro saveregs
++ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
++ | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
++ | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
++ | std f9, SAVE_FPR9
++ | std f10, SAVE_FPR10
++ | std f11, SAVE_FPR11
++ | std f12, SAVE_FPR12
++ | std f13, SAVE_FPR13
++ | std f14, SAVE_FPR14
++ | std f15, SAVE_FPR15
++ |.endmacro
++ |
++ |.macro restoreregs
++ | ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
++ | ld f9, SAVE_FPR9
++ | ld f10, SAVE_FPR10
++ | ld f11, SAVE_FPR11
++ | ld f12, SAVE_FPR12
++ | ld f13, SAVE_FPR13
++ | ld f14, SAVE_FPR14
++ | ld f15, SAVE_FPR15
++ | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
++ |.endmacro
++ |
++ | saveregs
++ | lgfi r7, 10 // 16
++ | lgfi r8, 20 // 32
++ | agr r2, r3
++ | agr r7, r8
++ | msgr r2, r7
++ | restoreregs
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -127,7 +187,8 @@ test_table test[] = {
+ { 2, 4, lab, 32, "lab"},
+ { 2, 4, labg, 32, "labg"},
+ { 2, 0, add_imm16, 17, "imm16"},
+- { 2, 0, add_imm32, 16, "imm32"}
++ { 2, 0, add_imm32, 16, "imm32"},
++ { 7, 3, save, 480, "save"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 847a02e75..c799bb6c9 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -246,8 +246,8 @@ local map_cond = {
+
+ ------------------------------------------------------------------------------
+
+-local function parse_gpr(expr)
+- local r = match(expr, "^r(1?[0-9])$")
++local function parse_reg(expr)
++ local r = match(expr, "^[r|f](1?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 15 then return r, tp end
+@@ -255,15 +255,6 @@ local function parse_gpr(expr)
+ werror("bad register name `"..expr.."'")
+ end
+
+-local function parse_fpr(expr)
+- local r = match(expr, "^f(1?[0-9])$")
+- if r then
+- r = tonumber(r)
+- if r <= 15 then return r end
+- end
+- werror("bad register name `"..expr.."'")
+-end
+-
+ local parse_ctx = {}
+
+ local loadenv = setfenv and function(s)
+@@ -308,11 +299,11 @@ local function split_memop(arg)
+ local reg = "r1?[0-9]"
+ local d, x, b = match(arg, "^(.*)%(("..reg.."), ("..reg..")%)$")
+ if d then
+- return d, parse_gpr(x), parse_gpr(b)
++ return d, parse_reg(x), parse_reg(b)
+ end
+ local d, b = match(arg, "^(.*)%(("..reg..")%)$")
+ if d then
+- return d, 0, parse_gpr(b)
++ return d, 0, parse_reg(b)
+ end
+ -- TODO: handle values without registers?
+ -- TODO: handle registers without a displacement?
+@@ -1047,18 +1038,18 @@ local function parse_template(params, template, nparams, pos)
+ for p in gmatch(sub(template, 13), ".") do
+ local pr1,pr2,pr3
+ if p == "g" then
+- op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
++ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+ wputhw(op2)
+ elseif p == "h" then
+- op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
++ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+ wputhw(op1); wputhw(op2)
+ elseif p == "i" then
+- op1 = op1 + shl(parse_gpr(params[1]),4)
++ op1 = op1 + shl(parse_reg(params[1]),4)
+ wputhw(op1);
+ parse_imm16(params[2])
+ elseif p == "j" then
+ local d, x, b, a = parse_mem_bx(params[2])
+- op1 = op1 + shl(parse_gpr(params[1]), 4) + x
++ op1 = op1 + shl(parse_reg(params[1]), 4) + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+ if a then a() end
+@@ -1066,7 +1057,7 @@ local function parse_template(params, template, nparams, pos)
+
+ elseif p == "l" then
+ local d, x, b, a = parse_mem_bxy(params[2])
+- op0 = op0 + shl(parse_gpr(params[1]), 4) + x
++ op0 = op0 + shl(parse_reg(params[1]), 4) + x
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+@@ -1074,18 +1065,18 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "m" then
+
+ elseif p == "n" then
+- op0 = op0 + shl(parse_gpr(params[1]), 4)
++ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+ parse_imm(params[2])
+ elseif p == "q" then
+ local d, b, a = parse_mem_b(params[3])
+- op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+ if a then a() end -- a() emits action.
+ elseif p == "s" then
+ local d, b, a = parse_mem_by(params[3])
+- op0 = op0 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
++ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+@@ -1105,7 +1096,7 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1); wputhw(op2);
+ if a then a() end -- a() emits action.
+ elseif p == "z" then
+- op2 = op2 + parse_gpr(params[1])
++ op2 = op2 + parse_reg(params[1])
+ wputhw(op2)
+ else
+ werror("unrecognized encoding")
+
+From c688a0a3ab07bf2c13bbceaba4423656f7bd9f3e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 14:38:11 -0500
+Subject: [PATCH 080/260] Add support for brasl instruction.
+
+Important for calling subroutines.
+---
+ dynasm/Examples/test_z_inst.c | 54 ++++++++++++++++++++++++++++-------
+ dynasm/dasm_s390x.lua | 5 ++++
+ 2 files changed, 49 insertions(+), 10 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index b54c383f4..ee005b5cd 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -93,6 +93,38 @@ static void labg(dasm_State *state)
+ | br r14
+ }
+
++static void labmul(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ // Multiply using an add function.
++ // Only correct if input is positive.
++ |->mul_func:
++ | stmg r6, r14, 48(sp)
++ | lgr r6, r2
++ | lgr r7, r3
++ | cgfi r7, 0
++ | je >3
++ | cgfi r7, 1
++ | je >2
++ |1:
++ | lgr r3, r6
++ | brasl r14, ->add_func
++ | lay r7, -1(r7)
++ | cgfi r7, 1
++ | jh <1
++ |2:
++ | lmg r6, r14, 48(sp)
++ | br r14
++ |3:
++ | la r2, 0(r0)
++ | j <2
++
++ |->add_func:
++ | agr r2, r3
++ | br r14
++}
++
+ static void add_imm16(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -179,16 +211,18 @@ typedef struct {
+ } test_table;
+
+ test_table test[] = {
+- { 1, 2, add, 3, "add"},
+- {10, 5, sub, 5, "sub"},
+- { 2, 3, mul, 6, "mul"},
+- { 5, 7, rx, 12298, "rx"},
+- { 5, 7, rxy, 10, "rxy"},
+- { 2, 4, lab, 32, "lab"},
+- { 2, 4, labg, 32, "labg"},
+- { 2, 0, add_imm16, 17, "imm16"},
+- { 2, 0, add_imm32, 16, "imm32"},
+- { 7, 3, save, 480, "save"}
++ { 1, 2, add, 3, "add"},
++ {10, 5, sub, 5, "sub"},
++ { 2, 3, mul, 6, "mul"},
++ { 5, 7, rx, 12298, "rx"},
++ { 5, 7, rxy, 10, "rxy"},
++ { 2, 4, lab, 32, "lab"},
++ { 2, 4, labg, 32, "labg"},
++ { 2, 0, add_imm16, 17, "imm16"},
++ { 2, 0, add_imm32, 16, "imm32"},
++ { 7, 3, save, 480, "save"},
++ { 7, 3, labmul, 21, "labmul0"},
++ { 7, 0, labmul, 0, "labmul1"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index c799bb6c9..602428b83 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1068,6 +1068,11 @@ local function parse_template(params, template, nparams, pos)
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+ parse_imm(params[2])
++ elseif p == "o" then
++ op0 = op0 + shl(parse_reg(params[1]), 4)
++ wputhw(op0);
++ local mode, n, s = parse_label(params[2])
++ waction("REL_"..mode, n, s)
+ elseif p == "q" then
+ local d, b, a = parse_mem_b(params[3])
+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+
+From cdfb632a4f72d9d9f15516b23b393d31e74d1ddb Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 15:37:45 -0500
+Subject: [PATCH 081/260] Add ADD HALFWORD IMMEDIATE (64-bit) instruction
+ (aghi).
+
+---
+ dynasm/dasm_s390x.lua | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 602428b83..cfe861cbf 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -462,6 +462,7 @@ map_op = {
+ axbr_2 = "0000b34a0000h",
+ adbr_2 = "0000b31a0000h",
+ aebr_2 = "0000b30a0000h",
++ aghi_2 = "0000a70b0000i",
+ ah_2 = "00004a000000j",
+ ahi_2 = "0000a70a0000i",
+ ahy_2 = "e3000000007al",
+
+From 155577093a35012b93cae9b7be2c85970f5aee0e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 15:31:07 -0500
+Subject: [PATCH 082/260] Add test for PC-style labels.
+
+These are labels which are given a numeric value. For example, the
+following code would generate PC labels 0 to 4:
+
+for (int i = 0; i < 5; i++) {
+ |=>i:
+}
+---
+ dynasm/Examples/test_z_inst.c | 20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index ee005b5cd..4633b6d89 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -202,6 +202,23 @@ static void save(dasm_State *state)
+ | br r14
+ }
+
++static void pc(dasm_State *state) {
++ dasm_State **Dst = &state;
++ int MAX = 10;
++ dasm_growpc(Dst, MAX+1);
++
++ | j =>MAX
++ for (int i = 0; i <= MAX; i++) {
++ |=>i:
++ if (i == 0) {
++ | br r14
++ } else {
++ | aghi r2, i
++ | j =>i-1
++ }
++ }
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -222,7 +239,8 @@ test_table test[] = {
+ { 2, 0, add_imm32, 16, "imm32"},
+ { 7, 3, save, 480, "save"},
+ { 7, 3, labmul, 21, "labmul0"},
+- { 7, 0, labmul, 0, "labmul1"}
++ { 7, 0, labmul, 0, "labmul1"},
++ { 0, 0, pc, 55, "pc"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From a03638880c961e4546d6e4bb522d5ff2e29682fb Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 6 Dec 2016 15:45:06 -0500
+Subject: [PATCH 083/260] Re-order test function.
+
+---
+ dynasm/Examples/test_z_inst.c | 64 +++++++++++++++++------------------
+ 1 file changed, 32 insertions(+), 32 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 4633b6d89..5208d4b1b 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -93,38 +93,6 @@ static void labg(dasm_State *state)
+ | br r14
+ }
+
+-static void labmul(dasm_State *state)
+-{
+- dasm_State **Dst = &state;
+-
+- // Multiply using an add function.
+- // Only correct if input is positive.
+- |->mul_func:
+- | stmg r6, r14, 48(sp)
+- | lgr r6, r2
+- | lgr r7, r3
+- | cgfi r7, 0
+- | je >3
+- | cgfi r7, 1
+- | je >2
+- |1:
+- | lgr r3, r6
+- | brasl r14, ->add_func
+- | lay r7, -1(r7)
+- | cgfi r7, 1
+- | jh <1
+- |2:
+- | lmg r6, r14, 48(sp)
+- | br r14
+- |3:
+- | la r2, 0(r0)
+- | j <2
+-
+- |->add_func:
+- | agr r2, r3
+- | br r14
+-}
+-
+ static void add_imm16(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -202,6 +170,38 @@ static void save(dasm_State *state)
+ | br r14
+ }
+
++static void labmul(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ // Multiply using an add function.
++ // Only correct if input is positive.
++ |->mul_func:
++ | stmg r6, r14, 48(sp)
++ | lgr r6, r2
++ | lgr r7, r3
++ | cgfi r7, 0
++ | je >3
++ | cgfi r7, 1
++ | je >2
++ |1:
++ | lgr r3, r6
++ | brasl r14, ->add_func
++ | lay r7, -1(r7)
++ | cgfi r7, 1
++ | jh <1
++ |2:
++ | lmg r6, r14, 48(sp)
++ | br r14
++ |3:
++ | la r2, 0(r0)
++ | j <2
++
++ |->add_func:
++ | agr r2, r3
++ | br r14
++}
++
+ static void pc(dasm_State *state) {
+ dasm_State **Dst = &state;
+ int MAX = 10;
+
+From 22f4e6a1703b47676628b9ebcf534e35a3f18d06 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 7 Dec 2016 13:13:55 +0530
+Subject: [PATCH 084/260] Update test_z_inst.c
+
+Added another test for forward jump
+---
+ dynasm/Examples/test_z_inst.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 5208d4b1b..0458ce1c4 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -93,6 +93,22 @@ static void labg(dasm_State *state)
+ | br r14
+ }
+
++static void jmp_fwd(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++ | j >1
++ |1:
++ | cgr r2 , r3
++ | jne >2
++ | je >3
++ |2:
++ | afi r2, 0x2
++ | j <1
++ |3:
++ | br r14
++
++}
++
+ static void add_imm16(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -240,7 +256,8 @@ test_table test[] = {
+ { 7, 3, save, 480, "save"},
+ { 7, 3, labmul, 21, "labmul0"},
+ { 7, 0, labmul, 0, "labmul1"},
+- { 0, 0, pc, 55, "pc"}
++ { 0, 0, pc, 55, "pc"},
++ { 2,12, jmp_fwd, 12, "jmp_fwd"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From 79cc499172b069ca37a2ff0c9f59b03f876f0949 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 7 Dec 2016 14:11:20 +0530
+Subject: [PATCH 085/260] Update test_z_inst.c
+
+Added comments to fwd_jmp
+added test for RRD based test case add_rrd()---> functionality to be tested
+added test for RR based test case load_test()--> test fails
+---
+ dynasm/Examples/test_z_inst.c | 23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 0458ce1c4..dd36c1a54 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -18,6 +18,15 @@ static void add(dasm_State *state)
+ | br r14
+ }
+
++static void add_rrd(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | lgfi r4 , 0x02
++ | maer r2 , r3 , r4
++ | br r14
++}
++
+ static void sub(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -96,6 +105,8 @@ static void labg(dasm_State *state)
+ static void jmp_fwd(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
++
++ // compare r2 == r3; do { r2 += r2; } while(r2 != r3);
+ | j >1
+ |1:
+ | cgr r2 , r3
+@@ -235,6 +246,14 @@ static void pc(dasm_State *state) {
+ }
+ }
+
++static void load_test(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | ltdr r2 , r3
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -257,7 +276,9 @@ test_table test[] = {
+ { 7, 3, labmul, 21, "labmul0"},
+ { 7, 0, labmul, 0, "labmul1"},
+ { 0, 0, pc, 55, "pc"},
+- { 2,12, jmp_fwd, 12, "jmp_fwd"}
++ { 2,12, jmp_fwd, 12, "jmp_fwd"},
++ { 9,8, add_rrd, 25, "add_rrd"},
++ { 2,4, load_test, 4,"load_test"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From 127f0fd7451f2f2ffa366748655951fd101e8adc Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 7 Dec 2016 20:21:06 +0530
+Subject: [PATCH 086/260] Adding RRD support
+
+Currently only "maer" is implemented. I am not able to get this working, don't know if I am missing out something, or we need to add some more functionality for RRD.
+---
+ dynasm/dasm_s390x.lua | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index cfe861cbf..ff6984ca4 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -885,6 +885,7 @@ map_op = {
+ msgfr_2 = "0000b91c0000h",
+ msfi_2 = "c20100000000n",
+ msgfi_2 = "c20000000000n",
++ maer_3 = "0000b32e0000r",
+ o_2 = "000056000000j",
+ or_2 = "000000001600g",
+ oy_2 = "e30000000056l",
+@@ -1080,6 +1081,9 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+ if a then a() end -- a() emits action.
++ elseif p == "r" then
++ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
++ wputhw(op1); wputhw(op2)
+ elseif p == "s" then
+ local d, b, a = parse_mem_by(params[3])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+
+From 8281c33747aad8c76400029aa50f10bc1cfbbf1a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 7 Dec 2016 14:06:37 -0500
+Subject: [PATCH 087/260] Comment out failing tests for now.
+
+---
+ dynasm/Examples/test_z_inst.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index dd36c1a54..078428b98 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -18,6 +18,7 @@ static void add(dasm_State *state)
+ | br r14
+ }
+
++/*
+ static void add_rrd(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -26,6 +27,7 @@ static void add_rrd(dasm_State *state)
+ | maer r2 , r3 , r4
+ | br r14
+ }
++*/
+
+ static void sub(dasm_State *state)
+ {
+@@ -246,6 +248,7 @@ static void pc(dasm_State *state) {
+ }
+ }
+
++/*
+ static void load_test(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+@@ -253,6 +256,7 @@ static void load_test(dasm_State *state)
+ | ltdr r2 , r3
+ | br r14
+ }
++*/
+
+ typedef struct {
+ int64_t arg1;
+@@ -276,9 +280,9 @@ test_table test[] = {
+ { 7, 3, labmul, 21, "labmul0"},
+ { 7, 0, labmul, 0, "labmul1"},
+ { 0, 0, pc, 55, "pc"},
+- { 2,12, jmp_fwd, 12, "jmp_fwd"},
+- { 9,8, add_rrd, 25, "add_rrd"},
+- { 2,4, load_test, 4,"load_test"}
++ { 2,12, jmp_fwd, 12, "jmp_fwd"}
++// { 9,8, add_rrd, 25, "add_rrd"},
++// { 2,4, load_test, 4,"load_test"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From 106718249e485cd9f450c018cf7c0b0dad936fdd Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 7 Dec 2016 16:56:00 -0500
+Subject: [PATCH 088/260] Add support for SS-a instructions.
+
+I've also changed the template parser so that it can handle suffixes
+which are longer than 1 character. The suffix for SS-a instructions
+is "SS-a". We could change this again later.
+---
+ dynasm/Examples/test_z_inst.c | 67 ++++++++---
+ dynasm/dasm_s390x.h | 9 ++
+ dynasm/dasm_s390x.lua | 213 ++++++++++++++++++++++------------
+ 3 files changed, 197 insertions(+), 92 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 078428b98..7b3c0f273 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -258,6 +258,39 @@ static void load_test(dasm_State *state)
+ }
+ */
+
++static void ssa(dasm_State *state) {
++ dasm_State **Dst = &state;
++
++ | lay sp, -16(sp)
++ | lay r0, -1(r0)
++ | stg r0, 8(sp)
++ | xc 8(8, sp), 8(sp)
++ | stg r2, 0(sp)
++ | mvc 13(2, sp), 6(sp)
++ | lg r2, 8(sp)
++ | la sp, 16(sp)
++ | br r14
++}
++
++static void ssa_act(dasm_State *state) {
++ dasm_State **Dst = &state;
++
++ int xl = 8;
++ int d1 = 13;
++ int l1 = 2;
++ int d2 = 6;
++
++ | lay sp, -16(sp)
++ | lay r0, -1(r0)
++ | stg r0, 8(sp)
++ | xc 8(xl, sp), 8(sp)
++ | stg r2, 0(sp)
++ | mvc d1(l1, sp), d2(sp)
++ | lg r2, 8(sp)
++ | la sp, 16(sp)
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -267,22 +300,24 @@ typedef struct {
+ } test_table;
+
+ test_table test[] = {
+- { 1, 2, add, 3, "add"},
+- {10, 5, sub, 5, "sub"},
+- { 2, 3, mul, 6, "mul"},
+- { 5, 7, rx, 12298, "rx"},
+- { 5, 7, rxy, 10, "rxy"},
+- { 2, 4, lab, 32, "lab"},
+- { 2, 4, labg, 32, "labg"},
+- { 2, 0, add_imm16, 17, "imm16"},
+- { 2, 0, add_imm32, 16, "imm32"},
+- { 7, 3, save, 480, "save"},
+- { 7, 3, labmul, 21, "labmul0"},
+- { 7, 0, labmul, 0, "labmul1"},
+- { 0, 0, pc, 55, "pc"},
+- { 2,12, jmp_fwd, 12, "jmp_fwd"}
+-// { 9,8, add_rrd, 25, "add_rrd"},
+-// { 2,4, load_test, 4,"load_test"}
++ { 1, 2, add, 3, "add"},
++ {10, 5, sub, 5, "sub"},
++ { 2, 3, mul, 6, "mul"},
++ { 5, 7, rx, 12298, "rx"},
++ { 5, 7, rxy, 10, "rxy"},
++ { 2, 4, lab, 32, "lab"},
++ { 2, 4, labg, 32, "labg"},
++ { 2, 0, add_imm16, 17, "imm16"},
++ { 2, 0, add_imm32, 16, "imm32"},
++ { 7, 3, save, 480, "save"},
++ { 7, 3, labmul, 21, "labmul0"},
++ { 7, 0, labmul, 0, "labmul1"},
++ { 0, 0, pc, 55, "pc"},
++ { 2,12, jmp_fwd, 12, "jmp_fwd"},
++// { 9,8, add_rrd, 25, "add_rrd"},
++// { 2,4, load_test, 4,"load_test"},
++ {-1, 0, ssa, 65535<<8, "ssa"},
++ {-1, 0, ssa_act, 65535<<8, "ssa_act"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 18a7338e2..69e4fc146 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -24,6 +24,7 @@ enum {
+ DASM_REL_PC, DASM_LABEL_PC,
+ DASM_DISP12, DASM_DISP20,
+ DASM_IMM16, DASM_IMM32,
++ DASM_LEN8R,
+ DASM__MAX
+ };
+
+@@ -284,6 +285,10 @@ void dasm_put(Dst_DECL, int start, ...)
+ CK((n >> 12) == 0, RANGE_I);
+ b[pos++] = n;
+ break;
++ case DASM_LEN8R:
++ CK(n >= 1 && n <= 256, RANGE_I);
++ b[pos++] = n;
++ break;
+ }
+ }
+ stop:
+@@ -364,6 +369,7 @@ int dasm_link(Dst_DECL, size_t * szp)
+ case DASM_IMM32:
+ case DASM_DISP20:
+ case DASM_DISP12:
++ case DASM_LEN8R:
+ pos++;
+ break;
+ }
+@@ -458,6 +464,9 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_DISP12:
+ cp[-1] |= n & 0xfff;
+ break;
++ case DASM_LEN8R:
++ cp[-1] |= (n - 1) & 0xff;
++ break;
+ default:
+ *cp++ = ins;
+ break;
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index ff6984ca4..6c2a904ba 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
+ local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+- "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM16", "IMM32",
++ "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM16", "IMM32", "LEN8R",
+ }
+
+ -- Maximum number of section buffer positions for dasm_put().
+@@ -370,6 +370,41 @@ local function parse_mem_by(arg)
+ return d, b, a
+ end
+
++-- Parse memory operand of the form d(l, b) where 0 <= d < 4096, 1 <= l <= 256,
++-- and b is a GPR.
++local function parse_mem_lb(arg)
++ local reg = "r1?[0-9]"
++ local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
++ if not d then
++ -- TODO: handle values without registers?
++ -- TODO: handle registers without a displacement?
++ werror("bad memory operand: "..arg)
++ return nil
++ end
++ local dval = tonumber(d)
++ local dact = nil
++ if dval then
++ if not is_uint12(dval) then
++ werror("displacement out of range: ", dval)
++ end
++ else
++ dval = 0
++ dact = function() waction("DISP12", nil, d) end
++ end
++ local lval = tonumber(l)
++ local lact = nil
++ if lval then
++ if lval < 1 or lval > 256 then
++ werror("length out of range: ", dval)
++ end
++ lval = lval - 1
++ else
++ lval = 0
++ lact = function() waction("LEN8R", nil, l) end
++ end
++ return dval, lval, parse_reg(b), dact, lact
++end
++
+ local function parse_imm(arg)
+ local imm_val = tonumber(arg,16)
+ if imm_val then
+@@ -1014,6 +1049,23 @@ map_op = {
+ trace_3 = "000099000000q",
+ tracg_3 = "eb000000000fs",
+ tre_2 = "0000b2a50000h",
++
++ -- SS-a instructions
++ clc_2 = "d50000000000SS-a",
++ ed_2 = "de0000000000SS-a",
++ edmk_2 = "df0000000000SS-a",
++ mvc_2 = "d20000000000SS-a",
++ mvcin_2 = "e80000000000SS-a",
++ mvn_2 = "d10000000000SS-a",
++ mvz_2 = "d30000000000SS-a",
++ nc_2 = "d40000000000SS-a",
++ oc_2 = "d60000000000SS-a",
++ tr_2 = "dc0000000000SS-a",
++ trt_2 = "dd0000000000SS-a",
++ trtr_2 = "d00000000000SS-a",
++ unpka_2 = "ea0000000000SS-a",
++ unpku_2 = "e20000000000SS-a",
++ xc_2 = "d70000000000SS-a",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1037,85 +1089,94 @@ local function parse_template(params, template, nparams, pos)
+ local op2 = tonumber(sub(template, 9, 12), 16)
+
+ -- Process each character.
+- for p in gmatch(sub(template, 13), ".") do
+- local pr1,pr2,pr3
+- if p == "g" then
+- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+- wputhw(op2)
+- elseif p == "h" then
+- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+- wputhw(op1); wputhw(op2)
+- elseif p == "i" then
+- op1 = op1 + shl(parse_reg(params[1]),4)
+- wputhw(op1);
+- parse_imm16(params[2])
+- elseif p == "j" then
+- local d, x, b, a = parse_mem_bx(params[2])
+- op1 = op1 + shl(parse_reg(params[1]), 4) + x
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2);
+- if a then a() end
+- elseif p == "k" then
++ local p = sub(template, 13)
++ if p == "g" then
++ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ wputhw(op2)
++ elseif p == "h" then
++ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ wputhw(op1); wputhw(op2)
++ elseif p == "i" then
++ op1 = op1 + shl(parse_reg(params[1]),4)
++ wputhw(op1);
++ parse_imm16(params[2])
++ elseif p == "j" then
++ local d, x, b, a = parse_mem_bx(params[2])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2);
++ if a then a() end
++ elseif p == "k" then
++ elseif p == "l" then
++ local d, x, b, a = parse_mem_bxy(params[2])
++ op0 = op0 + shl(parse_reg(params[1]), 4) + x
++ op1 = op1 + shl(b, 12) + band(d, 0xfff)
++ op2 = op2 + band(shr(d, 4), 0xff00)
++ wputhw(op0); wputhw(op1); wputhw(op2)
++ if a then a() end
++ elseif p == "m" then
+
+- elseif p == "l" then
+- local d, x, b, a = parse_mem_bxy(params[2])
+- op0 = op0 + shl(parse_reg(params[1]), 4) + x
+- op1 = op1 + shl(b, 12) + band(d, 0xfff)
+- op2 = op2 + band(shr(d, 4), 0xff00)
+- wputhw(op0); wputhw(op1); wputhw(op2)
+- if a then a() end
+- elseif p == "m" then
+-
+- elseif p == "n" then
+- op0 = op0 + shl(parse_reg(params[1]), 4)
+- wputhw(op0);
+- parse_imm(params[2])
+- elseif p == "o" then
+- op0 = op0 + shl(parse_reg(params[1]), 4)
+- wputhw(op0);
+- local mode, n, s = parse_label(params[2])
+- waction("REL_"..mode, n, s)
+- elseif p == "q" then
+- local d, b, a = parse_mem_b(params[3])
+- op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2)
+- if a then a() end -- a() emits action.
+- elseif p == "r" then
+- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
+- wputhw(op1); wputhw(op2)
+- elseif p == "s" then
+- local d, b, a = parse_mem_by(params[3])
+- op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+- op1 = op1 + shl(b, 12) + band(d, 0xfff)
+- op2 = op2 + band(shr(d, 4), 0xff00)
+- wputhw(op0); wputhw(op1); wputhw(op2)
+- if a then a() end -- a() emits action.
+- elseif p == "w" then
+- local mode, n, s = parse_label(params[1])
+- wputhw(op1)
+- waction("REL_"..mode, n, s)
+- elseif p == "x" then
+- local mode, n, s = parse_label(params[1])
+- wputhw(op0)
+- waction("REL_"..mode, n, s)
+- elseif p == "y" then
+- local d, x, b, a = parse_mem_bx(params[1])
+- op1 = op1 + x
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2);
+- if a then a() end -- a() emits action.
+- elseif p == "z" then
+- op2 = op2 + parse_reg(params[1])
+- wputhw(op2)
+- else
+- werror("unrecognized encoding")
+- end
++ elseif p == "n" then
++ op0 = op0 + shl(parse_reg(params[1]), 4)
++ wputhw(op0);
++ parse_imm(params[2])
++ elseif p == "o" then
++ op0 = op0 + shl(parse_reg(params[1]), 4)
++ wputhw(op0);
++ local mode, n, s = parse_label(params[2])
++ waction("REL_"..mode, n, s)
++ elseif p == "q" then
++ local d, b, a = parse_mem_b(params[3])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2)
++ if a then a() end -- a() emits action.
++ elseif p == "r" then
++ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
++ wputhw(op1); wputhw(op2)
++ elseif p == "s" then
++ local d, b, a = parse_mem_by(params[3])
++ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
++ op1 = op1 + shl(b, 12) + band(d, 0xfff)
++ op2 = op2 + band(shr(d, 4), 0xff00)
++ wputhw(op0); wputhw(op1); wputhw(op2)
++ if a then a() end -- a() emits action.
++ elseif p == "SS-a" then
++ local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
++ local d2, b2, d2a = parse_mem_b(params[2])
++ op0 = op0 + l1
++ op1 = op1 + shl(b1, 12) + d1
++ op2 = op2 + shl(b2, 12) + d2
++ wputhw(op0)
++ if l1a then l1a() end
++ wputhw(op1)
++ if d1a then d1a() end
++ wputhw(op2)
++ if d2a then d2a() end
++ elseif p == "w" then
++ local mode, n, s = parse_label(params[1])
++ wputhw(op1)
++ waction("REL_"..mode, n, s)
++ elseif p == "x" then
++ local mode, n, s = parse_label(params[1])
++ wputhw(op0)
++ waction("REL_"..mode, n, s)
++ elseif p == "y" then
++ local d, x, b, a = parse_mem_bx(params[1])
++ op1 = op1 + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2);
++ if a then a() end -- a() emits action.
++ elseif p == "z" then
++ op2 = op2 + parse_reg(params[1])
++ wputhw(op2)
++ else
++ werror("unrecognized encoding")
+ end
+-
+ end
++
+ function op_template(params, template, nparams)
+- if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
++ if not params then return template:gsub("%x%x%x%x%x%x%x%x%x%x%x%x", "") end
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 5 positions.
+ if secpos+5 > maxsecpos then wflush() end
+
+From 1f034811a3e8477da4a62623092287efae4d7bf7 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 8 Dec 2016 10:22:28 +0530
+Subject: [PATCH 089/260] Updated comment for test jmp_fwd()
+
+---
+ dynasm/Examples/test_z_inst.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 7b3c0f273..c17aebcd8 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -108,7 +108,7 @@ static void jmp_fwd(dasm_State *state)
+ {
+ dasm_State **Dst = &state;
+
+- // compare r2 == r3; do { r2 += r2; } while(r2 != r3);
++ // while(r2!=r3){r2 += 2};
+ | j >1
+ |1:
+ | cgr r2 , r3
+
+From b98c92e142918700d8c8413aed6d640b4e47c677 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 8 Dec 2016 13:59:05 -0500
+Subject: [PATCH 090/260] Delete unused branch_type function.
+
+---
+ dynasm/dasm_s390x.lua | 13 -------------
+ 1 file changed, 13 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 6c2a904ba..3fa4c13a0 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -460,19 +460,6 @@ local function parse_label(label, def)
+ werror("bad label `"..label.."'")
+ end
+
+-local function branch_type(op)
+- if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+- elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+- band(op, 0x3b000000) == 0x18000000 then
+- return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+- elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+- elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+- elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+- else
+- assert(false, "unknown branch type")
+- end
+-end
+-
+ ------------------------------------------------------------------------------
+
+ local map_op, op_template
+
+From d472a3cc1cbdd8a820f2638291d0968bb6312f47 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 8 Dec 2016 15:29:26 -0500
+Subject: [PATCH 091/260] Add support for .type directives.
+
+---
+ dynasm/Examples/test_z_inst.c | 20 +++++++++++++++++++-
+ dynasm/dasm_s390x.lua | 25 +++++++++++++++++++++----
+ 2 files changed, 40 insertions(+), 5 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index c17aebcd8..a8895c052 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -291,6 +291,23 @@ static void ssa_act(dasm_State *state) {
+ | br r14
+ }
+
++typedef struct {
++ int a;
++ int b;
++} SimpleStruct;
++
++static void type(dasm_State *state) {
++ dasm_State **Dst = &state;
++
++ | .type SIMPLE, SimpleStruct
++ | lay sp, -8(sp)
++ | stg r2, 0(sp)
++ | xgr r2, r2
++ | l r2, SIMPLE:sp->b
++ | la sp, 8(sp)
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -317,7 +334,8 @@ test_table test[] = {
+ // { 9,8, add_rrd, 25, "add_rrd"},
+ // { 2,4, load_test, 4,"load_test"},
+ {-1, 0, ssa, 65535<<8, "ssa"},
+- {-1, 0, ssa_act, 65535<<8, "ssa_act"}
++ {-1, 0, ssa_act, 65535<<8, "ssa_act"},
++ {27, 0, type, 27, "type"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 3fa4c13a0..3a5c50003 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -247,7 +247,17 @@ local map_cond = {
+ ------------------------------------------------------------------------------
+
+ local function parse_reg(expr)
+- local r = match(expr, "^[r|f](1?[0-9])$")
++ if not expr then werror("expected register name") end
++ local tname, ovreg = match(expr, "^([%w_]+):(r1?%d)$")
++ local tp = map_type[tname or expr]
++ if tp then
++ local reg = ovreg or tp.reg
++ if not reg then
++ werror("type `"..(tname or expr).."' needs a register override")
++ end
++ expr = reg
++ end
++ local r = match(expr, "^[rf](1?%d)$")
+ if r then
+ r = tonumber(r)
+ if r <= 15 then return r, tp end
+@@ -296,15 +306,22 @@ end
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+ -- If x is not specified then it is 0.
+ local function split_memop(arg)
+- local reg = "r1?[0-9]"
+- local d, x, b = match(arg, "^(.*)%(("..reg.."), ("..reg..")%)$")
++ local reg = "[%w_:]+"
++ local d, x, b = match(arg, "^(.*)%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
+ if d then
+ return d, parse_reg(x), parse_reg(b)
+ end
+- local d, b = match(arg, "^(.*)%(("..reg..")%)$")
++ local d, b = match(arg, "^(.*)%(%s*("..reg..")%s*%)$")
+ if d then
+ return d, 0, parse_reg(b)
+ end
++ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
++ if reg then
++ local r, tp = parse_reg(reg)
++ if tp then
++ return format(tp.ctypefmt, tailr), 0, r
++ end
++ end
+ -- TODO: handle values without registers?
+ -- TODO: handle registers without a displacement?
+ werror("bad memory operand: "..arg)
+
+From 1a06b651e23ee9f20939e171267bfbb46e35fa73 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 8 Dec 2016 16:00:59 -0500
+Subject: [PATCH 092/260] Fix a couple of templates that were too short.
+
+---
+ dynasm/dasm_s390x.lua | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 3a5c50003..4c716eec1 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -540,7 +540,7 @@ map_op = {
+ nihf_2 = "c00a00000000n",
+ nilf_2 = "c00b00000000n",
+ bal_2 = "000045000000j",
+- balr_2 = "00000000500g",
++ balr_2 = "000000000500g",
+ bas_2 = "00004d000000j",
+ basr_2 = "000000000d00g",
+ bassm_2 = "000000000c00g",
+@@ -549,9 +549,9 @@ map_op = {
+ bakr_2 = "0000b2400000h",
+ bsg_2 = "0000b2580000h",
+ bc_2 = "000047000000k",
+- bcr_2 = "00000000700g",
++ bcr_2 = "000000000700g",
+ bct_2 = "000046000000j",
+- bctr_2 = "00000000600g",
++ bctr_2 = "000000000600g",
+ bctg_2 = "e30000000046l",
+ bctgr_2 = "0000b9460000h",
+ bxh_3 = "000086000000q",
+@@ -953,7 +953,7 @@ map_op = {
+ sar_2 = "0000b24e0000h",
+ sfpc_2 = "0000b3840000h",
+ sfasr_2 = "0000b3850000h",
+- spm_2 = "00000000400g",
++ spm_2 = "000000000400g",
+ ssar_2 = "0000b2250000h",
+ ssair_2 = "0000b99f0000h",
+ slda_3 = "00008f000000q",
+
+From 9b9e1ea66762e933607c667be63cfcd8ca6f29a1 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 9 Dec 2016 19:18:02 +0530
+Subject: [PATCH 093/260] Added SS-b mode
+
+Currently I am not able to test the functionality of this mode, need some help in that.
+Also for the time being I have created different function for parsing, we can merge that later, just to make sure SS-a doesnt break, I have not merged this since I was not able to test it.
+Let me know your comments on this
+---
+ dynasm/dasm_s390x.lua | 55 ++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 54 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 4c716eec1..08d44a3ee 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
+ local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+- "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM16", "IMM32", "LEN8R",
++ "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM16", "IMM32", "LEN8R","LEN4HR","LEN4LR",
+ }
+
+ -- Maximum number of section buffer positions for dasm_put().
+@@ -422,6 +422,43 @@ local function parse_mem_lb(arg)
+ return dval, lval, parse_reg(b), dact, lact
+ end
+
++local function parse_mem_l2b(arg,high_l)
++ local reg = "r1?[0-9]"
++ local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
++ if not d then
++ -- TODO: handle values without registers?
++ -- TODO: handle registers without a displacement?
++ werror("bad memory operand: "..arg)
++ return nil
++ end
++ local dval = tonumber(d)
++ local dact = nil
++ if dval then
++ if not is_uint12(dval) then
++ werror("displacement out of range: ", dval)
++ end
++ else
++ dval = 0
++ dact = function() waction("DISP12", nil, d) end
++ end
++ local lval = tonumber(l)
++ local lact = nil
++ if lval then
++ if lval < 1 or lval > 128 then
++ werror("length out of range: ", dval)
++ end
++ lval = lval - 1
++ else
++ lval = 0
++ if high_l then
++ lact = function() waction("LEN4HR", nil, l) end
++ else
++ lact = function() waction("LEN4LR",nil,l) end
++ end
++ end
++ return dval, lval, parse_reg(b), dact, lact
++end
++
+ local function parse_imm(arg)
+ local imm_val = tonumber(arg,16)
+ if imm_val then
+@@ -1070,6 +1107,7 @@ map_op = {
+ unpka_2 = "ea0000000000SS-a",
+ unpku_2 = "e20000000000SS-a",
+ xc_2 = "d70000000000SS-a",
++ ap_2 = "fa0000000000SS-b",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1157,6 +1195,21 @@ local function parse_template(params, template, nparams, pos)
+ if d1a then d1a() end
+ wputhw(op2)
+ if d2a then d2a() end
++ elseif p == "SS-b" then
++ local high_l=true;
++ local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
++ high_l=false;
++ local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
++ op0 = op0 + shl(l1,4) + l2
++ op1 = op1 + shl(b1, 12) + d1
++ op2 = op2 + shl(b2, 12) + d2
++ wputhw(op0)
++ if l1a then l1a() end
++ if l2a then l2a() end
++ wputhw(op1)
++ if d1a then d1a() end
++ wputhw(op2)
++ if d2a then d2a() end
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From cf4813f4ab37dfbc16c7e87bf27592d63d5a6f34 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 9 Dec 2016 19:24:55 +0530
+Subject: [PATCH 094/260] Added SS-b support in C
+
+---
+ dynasm/dasm_s390x.h | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 69e4fc146..e99dc39ed 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -24,7 +24,7 @@ enum {
+ DASM_REL_PC, DASM_LABEL_PC,
+ DASM_DISP12, DASM_DISP20,
+ DASM_IMM16, DASM_IMM32,
+- DASM_LEN8R,
++ DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR,
+ DASM__MAX
+ };
+
+@@ -289,6 +289,14 @@ void dasm_put(Dst_DECL, int start, ...)
+ CK(n >= 1 && n <= 256, RANGE_I);
+ b[pos++] = n;
+ break;
++ case DASM_LEN4HR:
++ CK(n >= 1 && n <= 128, RANGE_I);
++ b[pos++] = n;
++ break;
++ case DASM_LEN4LR:
++ CK(n >= 1 && n <= 128, RANGE_I);
++ b[pos++] = n;
++ break;
+ }
+ }
+ stop:
+@@ -370,6 +378,8 @@ int dasm_link(Dst_DECL, size_t * szp)
+ case DASM_DISP20:
+ case DASM_DISP12:
+ case DASM_LEN8R:
++ case DASM_LEN4HR:
++ case DASM_LEN4LR:
+ pos++;
+ break;
+ }
+@@ -467,6 +477,12 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_LEN8R:
+ cp[-1] |= (n - 1) & 0xff;
+ break;
++ case DASM_LEN4HR:
++ cp[-1] |= (n - 1) & 0xf0;
++ break;
++ case DASM_LEN4LR:
++ cp[-1] |= (n - 1) & 0xof;
++ break;
+ default:
+ *cp++ = ins;
+ break;
+
+From aa096eda0e2a0f67d16cc660cec99c16e9b57601 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Fri, 9 Dec 2016 19:46:12 +0530
+Subject: [PATCH 095/260] Minor fix
+
+---
+ dynasm/dasm_s390x.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index e99dc39ed..b5a22ce18 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -481,7 +481,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ cp[-1] |= (n - 1) & 0xf0;
+ break;
+ case DASM_LEN4LR:
+- cp[-1] |= (n - 1) & 0xof;
++ cp[-1] |= (n - 1) & 0x0f;
+ break;
+ default:
+ *cp++ = ins;
+
+From 052eb697506b5a1a7834dc64d641aae153db0e94 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 9 Dec 2016 14:32:02 -0500
+Subject: [PATCH 096/260] Make host/buildvm_asm.c compile.
+
+I've added a rough implementation of this code. It is untested but
+does compile.
+---
+ src/host/buildvm_asm.c | 53 ++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 53 insertions(+)
+
+diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
+index 28419c077..3fd034c08 100644
+--- a/src/host/buildvm_asm.c
++++ b/src/host/buildvm_asm.c
+@@ -87,6 +87,54 @@ static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
+ }
+ fprintf(ctx->fp, "\t%s %s\n", opname, sym);
+ }
++#elif LJ_TARGET_S390X
++/* Emit halfwords piecewise as assembler text. */
++static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n)
++{
++ uint16_t *cp = (uint16_t*)p;
++ n /= 2;
++ int i;
++ for (i = 0; i < n; i++) {
++ if ((i & 7) == 0)
++ fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]);
++ else
++ fprintf(ctx->fp, ",0x%hx", cp[i]);
++ if ((i & 7) == 7) putc('\n', ctx->fp);
++ }
++ if ((n & 7) != 0) putc('\n', ctx->fp);
++}
++
++/* Emit s390x text relocations. */
++static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
++ const char *sym)
++{
++ if (n & 1 || n < 2) {
++ fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n);
++ exit(1);
++ }
++ n -= 2;
++ const char *opname = NULL;
++ const char *argt = ""; /* Inserted before argument. */
++ int opcode = *(uint16_t*)(&cp[n]);
++ int arg = (opcode>>4) & 0xf;
++ switch (opcode & 0xff0f) {
++ case 0xa705: opname = "bras"; argt = "r"; break;
++ case 0xc005: opname = "brasl"; argt = "r"; break;
++ case 0xa704: opname = "brc"; break;
++ case 0xc004: opname = "brcl"; break;
++ default:
++ fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
++ sym);
++ exit(1);
++ }
++ emit_asm_halfwords(ctx, cp, n);
++ if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
++ /* Various fixups for external symbols outside of our binary. */
++ fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym);
++ return;
++ }
++ fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym);
++}
+ #else
+ /* Emit words piecewise as assembler text. */
+ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
+@@ -305,6 +353,9 @@ void emit_asm(BuildCtx *ctx)
+ emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
+ }
+ ofs += n+4;
++#elif LJ_TARGET_S390X
++ emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
++ ofs += n;
+ #else
+ emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+ ofs += n;
+@@ -313,6 +364,8 @@ void emit_asm(BuildCtx *ctx)
+ }
+ #if LJ_TARGET_X86ORX64
+ emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
++#elif LJ_TARGET_S390X
++ emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs);
+ #else
+ emit_asm_words(ctx, ctx->code+ofs, next-ofs);
+ #endif
+
+From 308fa09b6f5e35a84a04d6419a053b520ad2ea57 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 9 Dec 2016 14:35:21 -0500
+Subject: [PATCH 097/260] Fix warning in dasm_s390x.h.
+
+---
+ dynasm/dasm_s390x.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index b5a22ce18..5be8e8a71 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -448,7 +448,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ if (cp[-1] >> 12 == 0xc) {
+ *cp++ = n >> 17;
+ } else {
+- CK(-(1 << 16) <= n && n < (1 << 16) && n & 1 == 0, RANGE_LG);
++ CK(-(1 << 16) <= n && n < (1 << 16) && (n & 1) == 0, RANGE_LG);
+ }
+ *cp++ = n >> 1;
+ p++; /* skip argument */
+
+From 111b62229a287f255c79ad25a8de97bafd51ce33 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 9 Dec 2016 14:38:37 -0500
+Subject: [PATCH 098/260] Define DASM_CHECKS when running tests.
+
+---
+ dynasm/Examples/run.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/run.sh b/dynasm/Examples/run.sh
+index dbe93b008..a4542e86b 100755
+--- a/dynasm/Examples/run.sh
++++ b/dynasm/Examples/run.sh
+@@ -2,7 +2,7 @@
+ # set -x
+
+ # run test
+-lua ../dynasm.lua test_z_inst.c | gcc -std=gnu99 -Wall -Werror -g -x c -o test_z_inst -
++lua ../dynasm.lua test_z_inst.c | gcc -DDASM_CHECKS -std=gnu99 -Wall -Werror -g -x c -o test_z_inst -
+ ./test_z_inst
+ ec=$?
+
+
+From 625aad5da01fd05c38d9c34762417c61279ab6b5 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 9 Dec 2016 16:28:39 -0500
+Subject: [PATCH 099/260] Make LuaJIT compile on s390x.
+
+I've disabled both the JIT and FFI for now. I've also stripped almost
+all of the assembly out of vm_s390x.dasc, leaving only labels for the
+most part. This is enough to get LuaJIT to compile but of course if
+you try and run it it will explode.
+
+The idea now is to re-add enough functionality to get a very basic
+Lua program to run.
+---
+ src/lib_jit.c | 2 +
+ src/lj_arch.h | 1 +
+ src/vm_s390x.dasc | 4895 +--------------------------------------------
+ 3 files changed, 26 insertions(+), 4872 deletions(-)
+
+diff --git a/src/lib_jit.c b/src/lib_jit.c
+index 592538bd8..1e410610d 100644
+--- a/src/lib_jit.c
++++ b/src/lib_jit.c
+@@ -731,6 +731,8 @@ static uint32_t jit_cpudetect(lua_State *L)
+ }
+ #endif
+ #endif
++#elif LJ_TARGET_S390X
++ /* No optional CPU features to detect (for now). */
+ #else
+ #error "Missing CPU detection for this architecture"
+ #endif
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index 31503e83e..3839027b2 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -370,6 +370,7 @@
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
+ #define LJ_TARGET_GC64 1
+ #define LJ_ARCH_NOJIT 1 /* NYI */
++#define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */
+
+ #else
+ #error "No target architecture defined"
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index e639159c8..bdd063d8f 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -135,10 +135,10 @@
+ |.macro ins_A; .endmacro
+ |.macro ins_AD; .endmacro
+ |.macro ins_AJ; .endmacro
+-|.macro ins_ABC; mvcl RB, RCH; mvcl RC, RCL; .endmacro
+-|.macro ins_AB_; mvcl RB, RCH; .endmacro
+-|.macro ins_A_C; mvcl RC, RCL; .endmacro
+-|.macro ins_AND; ??? RD; .endmacro
++|.macro ins_ABC; .endmacro
++|.macro ins_AB_; .endmacro
++|.macro ins_A_C; .endmacro
++|.macro ins_AND; .endmacro
+ |
+ |// Instruction decode+dispatch.
+ | // TODO: tune this, right now we always decode RA-D even if they aren't used.
+@@ -175,89 +175,6 @@
+ | ins_NEXT
+ | .endmacro
+ |.endif
+-|
+-|// Call decode and dispatch.
+-|.macro ins_callt
+-| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
+-| mvcle PC, LFUNC:RB->pc
+-| mvcle RA, [PC]
+-| movcl OP, RAL
+-| movcl RA, RAH
+-| add PC, 4
+-|.endmacro
+-|
+-|.macro ins_call
+-| // BASE = new base, RB = LFUNC, RD = nargs+1
+-| mvcle [BASE-4], PC
+-| ins_callt
+-|.endmacro
+-|
+-|//-----------------------------------------------------------------------
+-|
+-|// Macros to test operand types.
+-|.macro checktp, reg, tp; CG dword [BASE+reg*8+4], tp; .endmacro
+-|.macro checknum, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro // condition to chk is result is above or equal
+-|.macro checkint, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro // condition to chk is result is not equal
+-|.macro checkstr, reg, target; checktp reg, LJ_TSTR; brc target; .endmacro // condition to chk is result is nto equal
+-|.macro checktab, reg, target; checktp reg, LJ_TTAB; brc target; .endmacro // condition to chk is result is nto equal
+-|
+-|// These operands must be used with movzx.
+-|.define PC_OP, byte [PC-4]
+-|.define PC_RA, byte [PC-3]
+-|.define PC_RB, byte [PC-1]
+-|.define PC_RC, byte [PC-2]
+-|.define PC_RD, word [PC-2]
+-|
+-|.macro branchPC, reg
+-
+-|.endmacro
+-|
+-|// Assumes DISPATCH is relative to GL.
+-#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+-#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+-|
+-#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+-|
+-|// Decrement hashed hotcount and trigger trace recorder if zero.
+-|.macro hotloop, reg
+-
+-|.endmacro
+-|
+-|.macro hotcall, reg
+-
+-|.endmacro
+-|
+-|// Set current VM state.
+-|.macro set_vmstate, st
+-
+-|.endmacro
+-|
+-|
+-|.macro fpop1; fstp st1; .endmacro
+-|
+-|
+-|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
+-|
+-|.endmacro
+-|.macro sseconst_1, reg, tmp // Synthesize 1.0.
+-|
+-|.endmacro
+-|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
+-|
+-|.endmacro
+-|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
+-|
+-|.endmacro
+-|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
+-|
+-|.endmacro
+-|
+-|// Move table write barrier back. Overwrites reg.
+-|.macro barrierback, tab, reg
+-
+-|.endmacro
+-|
+-|//-----------------------------------------------------------------------
+
+ /* Generate subroutines used by opcodes and other parts of the VM. */
+ /* The .code_sub section should be last to help static branch prediction. */
+@@ -270,359 +187,49 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+- | test PC, FRAME_P
+- | jz ->cont_dispatch
+- |
+- | // Return from pcall or xpcall fast func.
+- | and PC, -8
+- | sub BASE, PC // Restore caller base.
+- | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
+- | mov PC, [BASE-4] // Fetch PC of previous frame.
+- | // Prepending may overwrite the pcall frame, so do it at the end.
+- | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
+ |
+ |->vm_returnc:
+- | add RD, 1 // RD = nresults+1
+- | jz ->vm_unwind_yield
+- | mov MULTRES, RD
+- | test PC, FRAME_TYPE
+- | jz ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+- | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
+- | xor PC, FRAME_C
+- | test PC, FRAME_TYPE
+- | jnz ->vm_returnp
+- |
+- | // Return to C.
+- | set_vmstate C
+- | and PC, -8
+- | sub PC, BASE
+- | neg PC // Previous base = BASE - delta.
+- |
+- | sub RD, 1
+- | jz >2
+- |1: // Move results down.
+- |.if X64
+- | mov RBa, [BASE+RA]
+- | mov [BASE-8], RBa
+- |.else
+- | mov RB, [BASE+RA]
+- | mov [BASE-8], RB
+- | mov RB, [BASE+RA+4]
+- | mov [BASE-4], RB
+- |.endif
+- | add BASE, 8
+- | sub RD, 1
+- | jnz <1
+- |2:
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, PC
+- |3:
+- | mov RD, MULTRES
+- | mov RA, SAVE_NRES // RA = wanted nresults+1
+- |4:
+- | cmp RA, RD
+- | jne >6 // More/less results wanted?
+- |5:
+- | sub BASE, 8
+- | mov L:RB->top, BASE
+ |
+ |->vm_leave_cp:
+- | mov RAa, SAVE_CFRAME // Restore previous C frame.
+- | mov L:RB->cframe, RAa
+- | xor eax, eax // Ok return status for vm_pcall.
+ |
+ |->vm_leave_unw:
+- | restoreregs
+- | ret
+- |
+- |6:
+- | jb >7 // Less results wanted?
+- | // More results wanted. Check stack size and fill up results with nil.
+- | cmp BASE, L:RB->maxstack
+- | ja >8
+- | mov dword [BASE-4], LJ_TNIL
+- | add BASE, 8
+- | add RD, 1
+- | jmp <4
+- |
+- |7: // Less results wanted.
+- | test RA, RA
+- | jz <5 // But check for LUA_MULTRET+1.
+- | sub RA, RD // Negative result!
+- | lea BASE, [BASE+RA*8] // Correct top.
+- | jmp <5
+- |
+- |8: // Corner case: need to grow stack for filling up results.
+- | // This can happen if:
+- | // - A C function grows the stack (a lot).
+- | // - The GC shrinks the stack in between.
+- | // - A return back from a lua_call() with (high) nresults adjustment.
+- | mov L:RB->top, BASE // Save current top held in BASE (yes).
+- | mov MULTRES, RD // Need to fill only remainder with nil.
+- | mov FCARG2, RA
+- | mov FCARG1, L:RB
+- | call extern lj_state_growstack@8 // (lua_State *L, int n)
+- | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
+- | jmp <3
+ |
+ |->vm_unwind_yield:
+- | mov al, LUA_YIELD
+- | jmp ->vm_unwind_c_eh
+- |
+- |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
+- | // (void *cframe, int errcode)
+- |.if X64
+- | mov eax, CARG2d // Error return status for vm_pcall.
+- | mov rsp, CARG1
+- |.else
+- | mov eax, FCARG2 // Error return status for vm_pcall.
+- | mov esp, FCARG1
+- |.if WIN
+- | lea FCARG1, SEH_NEXT
+- | fs; mov [0], FCARG1
+- |.endif
+- |.endif
+- |->vm_unwind_c_eh: // Landing pad for external unwinder.
+- | mov L:RB, SAVE_L
+- | mov GL:RB, L:RB->glref
+- | mov dword GL:RB->vmstate, ~LJ_VMST_C
+- | jmp ->vm_leave_unw
+ |
++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
++ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ |->vm_unwind_rethrow:
+- |.if X64 and not X64WIN
+- | mov FCARG1, SAVE_L
+- | mov FCARG2, eax
+- | restoreregs
+- | jmp extern lj_err_throw@8 // (lua_State *L, int errcode)
+- |.endif
+- |
+- |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
+- | // (void *cframe)
+- |.if X64
+- | and CARG1, CFRAME_RAWMASK
+- | mov rsp, CARG1
+- |.else
+- | and FCARG1, CFRAME_RAWMASK
+- | mov esp, FCARG1
+- |.if WIN
+- | lea FCARG1, SEH_NEXT
+- | fs; mov [0], FCARG1
+- |.endif
+- |.endif
++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+- | mov L:RB, SAVE_L
+- | mov RAa, -8 // Results start at BASE+RA = BASE-8.
+- | mov RD, 1+1 // Really 1+2 results, incr. later.
+- | mov BASE, L:RB->base
+- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+- | add DISPATCH, GG_G2DISP
+- | mov PC, [BASE-4] // Fetch PC of previous frame.
+- | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
+- | set_vmstate INTERP
+- | jmp ->vm_returnc // Increments RD/MULTRES and returns.
+- |
+- |.if WIN and not X64
+- |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
+- | // (void *cframe, void *excptrec, void *unwinder, int errcode)
+- | mov [esp], FCARG1 // Return value for RtlUnwind.
+- | push FCARG2 // Exception record for RtlUnwind.
+- | push 0 // Ignored by RtlUnwind.
+- | push dword [FCARG1+CFRAME_OFS_SEH]
+- | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
+- | mov FCARG1, eax
+- | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
+- | ret // Jump to unwinder.
+- |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+- | mov FCARG2, LUA_MINSTACK
+- | jmp >2
+ |
+ |->vm_growstack_v: // Grow stack for vararg Lua function.
+- | sub RD, 8
+- | jmp >1
+ |
+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
+- | lea RD, [BASE+NARGS:RD*8-8]
+- |1:
+- | movzx RA, byte [PC-4+PC2PROTO(framesize)]
+- | add PC, 4 // Must point after first instruction.
+- | mov L:RB->base, BASE
+- | mov L:RB->top, RD
+- | mov SAVE_PC, PC
+- | mov FCARG2, RA
+- |2:
+- | // RB = L, L->base = new base, L->top = top
+- | mov FCARG1, L:RB
+- | call extern lj_state_growstack@8 // (lua_State *L, int n)
+- | mov BASE, L:RB->base
+- | mov RD, L:RB->top
+- | mov LFUNC:RB, [BASE-8]
+- | sub RD, BASE
+- | shr RD, 3
+- | add NARGS:RD, 1
+- | // BASE = new base, RB = LFUNC, RD = nargs+1
+- | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+- | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+- | saveregs
+- |.if X64
+- | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
+- | mov SAVE_L, CARG1d
+- | mov RA, CARG2d
+- |.else
+- | mov L:RB, SAVE_L
+- | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
+- |.endif
+- | mov PC, FRAME_CP
+- | xor RD, RD
+- | lea KBASEa, [esp+CFRAME_RESUME]
+- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+- | add DISPATCH, GG_G2DISP
+- | mov SAVE_PC, RD // Any value outside of bytecode is ok.
+- | mov SAVE_CFRAME, RDa
+- |.if X64
+- | mov SAVE_NRES, RD
+- | mov SAVE_ERRF, RD
+- |.endif
+- | mov L:RB->cframe, KBASEa
+- | cmp byte L:RB->status, RDL
+- | je >2 // Initial resume (like a call).
+- |
+- | // Resume after yield (like a return).
+- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+- | set_vmstate INTERP
+- | mov byte L:RB->status, RDL
+- | mov BASE, L:RB->base
+- | mov RD, L:RB->top
+- | sub RD, RA
+- | shr RD, 3
+- | add RD, 1 // RD = nresults+1
+- | sub RA, BASE // RA = resultofs
+- | mov PC, [BASE-4]
+- | mov MULTRES, RD
+- | test PC, FRAME_TYPE
+- | jz ->BC_RET_Z
+- | jmp ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+- | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+- | saveregs
+- | mov PC, FRAME_CP
+- |.if X64
+- | mov SAVE_ERRF, CARG4d
+- |.endif
+- | jmp >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+- | // (lua_State *L, TValue *base, int nres1)
+- | saveregs
+- | mov PC, FRAME_C
+- |
+- |1: // Entry point for vm_pcall above (PC = ftype).
+- |.if X64
+- | mov SAVE_NRES, CARG3d
+- | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
+- | mov SAVE_L, CARG1d
+- | mov RA, CARG2d
+- |.else
+- | mov L:RB, SAVE_L
+- | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
+- |.endif
+- |
+- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+- | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
+- | mov SAVE_CFRAME, KBASEa
+- | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
+- | add DISPATCH, GG_G2DISP
+- |.if X64
+- | mov L:RB->cframe, rsp
+- |.else
+- | mov L:RB->cframe, esp
+- |.endif
+- |
+- |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+- | set_vmstate INTERP
+- | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
+- | add PC, RA
+- | sub PC, BASE // PC = frame delta + frame type
+- |
+- | mov RD, L:RB->top
+- | sub RD, RA
+- | shr NARGS:RD, 3
+- | add NARGS:RD, 1 // RD = nargs+1
+ |
+ |->vm_call_dispatch:
+- | mov LFUNC:RB, [RA-8]
+- | cmp dword [RA-4], LJ_TFUNC
+- | jne ->vmeta_call // Ensure KBASE defined and != BASE.
+ |
+ |->vm_call_dispatch_f:
+- | mov BASE, RA
+- | ins_call
+- | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+- | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+- | saveregs
+- |.if X64
+- | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
+- | mov SAVE_L, CARG1d
+- |.else
+- | mov L:RB, SAVE_L
+- | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
+- | mov RC, INARG_CP_UD // Get args before they are overwritten.
+- | mov RA, INARG_CP_FUNC
+- | mov BASE, INARG_CP_CALL
+- |.endif
+- | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
+- |
+- | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
+- | sub KBASE, L:RB->top
+- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+- | mov SAVE_ERRF, 0 // No error function.
+- | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
+- | add DISPATCH, GG_G2DISP
+- | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+- |
+- |.if X64
+- | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
+- | mov SAVE_CFRAME, KBASEa
+- | mov L:RB->cframe, rsp
+- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+- |
+- | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+- |.else
+- | mov ARG3, RC // Have to copy args downwards.
+- | mov ARG2, RA
+- | mov ARG1, L:RB
+- |
+- | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
+- | mov SAVE_CFRAME, KBASE
+- | mov L:RB->cframe, esp
+- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+- |
+- | call BASE // (lua_State *L, lua_CFunction func, void *ud)
+- |.endif
+- | // TValue * (new base) or NULL returned in eax (RC).
+- | test RC, RC
+- | jz ->vm_leave_cp // No base? Just remove C frame.
+- | mov RA, RC
+- | mov PC, FRAME_CP
+- | jmp <2 // Else continue with the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+@@ -631,546 +238,69 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+- | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
+- | add RA, BASE
+- | and PC, -8
+- | mov RB, BASE
+- | sub BASE, PC // Restore caller BASE.
+- | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
+- | mov RC, RA // ... in [RC]
+- | mov PC, [RB-12] // Restore PC from [cont|PC].
+- |.if X64
+- | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
+- |.if FFI
+- | cmp RA, 1
+- | jbe >1
+- |.endif
+- | lea KBASEa, qword [=>0]
+- | add RAa, KBASEa
+- |.else
+- | mov RA, dword [RB-16]
+- |.if FFI
+- | cmp RA, 1
+- | jbe >1
+- |.endif
+- |.endif
+- | mov LFUNC:KBASE, [BASE-8]
+- | mov KBASE, LFUNC:KBASE->pc
+- | mov KBASE, [KBASE+PC2PROTO(k)]
+- | // BASE = base, RC = result, RB = meta base
+- | jmp RAa // Jump to continuation.
+- |
+- |.if FFI
+- |1:
+- | je ->cont_ffi_callback // cont = 1: return from FFI callback.
+- | // cont = 0: Tail call from C function.
+- | sub RB, BASE
+- | shr RB, 3
+- | lea RD, [RB-1]
+- | jmp ->vm_call_tail
+- |.endif
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+- | movzx RA, PC_RB
+- | sub RB, 16
+- | lea RA, [BASE+RA*8]
+- | sub RA, RB
+- | je ->cont_ra
+- | neg RA
+- | shr RA, 3
+- |.if X64WIN
+- | mov CARG3d, RA
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE
+- | mov RCa, [RC]
+- | mov [RB], RCa
+- | mov CARG2d, RB
+- |.elif X64
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE
+- | mov CARG3d, RA
+- | mov RAa, [RC]
+- | mov [RB], RAa
+- | mov CARG2d, RB
+- |.else
+- | mov ARG3, RA
+- | mov RA, [RC+4]
+- | mov RC, [RC]
+- | mov [RB+4], RA
+- | mov [RB], RC
+- | mov ARG2, RB
+- |.endif
+- | jmp ->BC_CAT_Z
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets:
+- | mov TMP1, RC // RC = GCstr *
+- | mov TMP2, LJ_TSTR
+- | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
+- | cmp PC_OP, BC_GGET
+- | jne >1
+- | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
+- | mov [RA], TAB:RB // RB = GCtab *
+- | mov dword [RA+4], LJ_TTAB
+- | mov RB, RA
+- | jmp >2
+ |
+ |->vmeta_tgetb:
+- | movzx RC, PC_RC
+- |.if DUALNUM
+- | mov TMP2, LJ_TISNUM
+- | mov TMP1, RC
+- |.else
+- | cvtsi2sd xmm0, RC
+- | movsd TMPQ, xmm0
+- |.endif
+- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
+- | jmp >1
+ |
+ |->vmeta_tgetv:
+- | movzx RC, PC_RC // Reload TValue *k from RC.
+- | lea RC, [BASE+RC*8]
+- |1:
+- | movzx RB, PC_RB // Reload TValue *t from RB.
+- | lea RB, [BASE+RB*8]
+- |2:
+- |.if X64
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
+- | mov CARG2d, RB
+- | mov CARG3, RCa // May be 64 bit ptr to stack.
+- | mov L:RB, L:CARG1d
+- |.else
+- | mov ARG2, RB
+- | mov L:RB, SAVE_L
+- | mov ARG3, RC
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+- | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+- | mov BASE, L:RB->base
+- | test RC, RC
+- | jz >3
+ |->cont_ra: // BASE = base, RC = result
+- | movzx RA, PC_RA
+- |.if X64
+- | mov RBa, [RC]
+- | mov [BASE+RA*8], RBa
+- |.else
+- | mov RB, [RC+4]
+- | mov RC, [RC]
+- | mov [BASE+RA*8+4], RB
+- | mov [BASE+RA*8], RC
+- |.endif
+- | ins_next
+- |
+- |3: // Call __index metamethod.
+- | // BASE = base, L->top = new base, stack = cont/func/t/k
+- | mov RA, L:RB->top
+- | mov [RA-12], PC // [cont|PC]
+- | lea PC, [RA+FRAME_CONT]
+- | sub PC, BASE
+- | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
+- | mov NARGS:RD, 2+1 // 2 args for func(t, k).
+- | jmp ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+- | mov FCARG1, TAB:RB
+- | mov RB, BASE // Save BASE.
+- | mov FCARG2, RC // Caveat: FCARG2 == BASE
+- | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
+- | // cTValue * or NULL returned in eax (RC).
+- | movzx RA, PC_RA
+- | mov BASE, RB // Restore BASE.
+- | test RC, RC
+- | jnz ->BC_TGETR_Z
+- | mov dword [BASE+RA*8+4], LJ_TNIL
+- | jmp ->BC_TGETR2_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets:
+- | mov TMP1, RC // RC = GCstr *
+- | mov TMP2, LJ_TSTR
+- | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
+- | cmp PC_OP, BC_GSET
+- | jne >1
+- | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
+- | mov [RA], TAB:RB // RB = GCtab *
+- | mov dword [RA+4], LJ_TTAB
+- | mov RB, RA
+- | jmp >2
+ |
+ |->vmeta_tsetb:
+- | movzx RC, PC_RC
+- |.if DUALNUM
+- | mov TMP2, LJ_TISNUM
+- | mov TMP1, RC
+- |.else
+- | cvtsi2sd xmm0, RC
+- | movsd TMPQ, xmm0
+- |.endif
+- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
+- | jmp >1
+ |
+ |->vmeta_tsetv:
+- | movzx RC, PC_RC // Reload TValue *k from RC.
+- | lea RC, [BASE+RC*8]
+- |1:
+- | movzx RB, PC_RB // Reload TValue *t from RB.
+- | lea RB, [BASE+RB*8]
+- |2:
+- |.if X64
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
+- | mov CARG2d, RB
+- | mov CARG3, RCa // May be 64 bit ptr to stack.
+- | mov L:RB, L:CARG1d
+- |.else
+- | mov ARG2, RB
+- | mov L:RB, SAVE_L
+- | mov ARG3, RC
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+- | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+- | mov BASE, L:RB->base
+- | test RC, RC
+- | jz >3
+- | // NOBARRIER: lj_meta_tset ensures the table is not black.
+- | movzx RA, PC_RA
+- |.if X64
+- | mov RBa, [BASE+RA*8]
+- | mov [RC], RBa
+- |.else
+- | mov RB, [BASE+RA*8+4]
+- | mov RA, [BASE+RA*8]
+- | mov [RC+4], RB
+- | mov [RC], RA
+- |.endif
+ |->cont_nop: // BASE = base, (RC = result)
+- | ins_next
+- |
+- |3: // Call __newindex metamethod.
+- | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+- | mov RA, L:RB->top
+- | mov [RA-12], PC // [cont|PC]
+- | movzx RC, PC_RA
+- | // Copy value to third argument.
+- |.if X64
+- | mov RBa, [BASE+RC*8]
+- | mov [RA+16], RBa
+- |.else
+- | mov RB, [BASE+RC*8+4]
+- | mov RC, [BASE+RC*8]
+- | mov [RA+20], RB
+- | mov [RA+16], RC
+- |.endif
+- | lea PC, [RA+FRAME_CONT]
+- | sub PC, BASE
+- | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
+- | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
+- | jmp ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+- |.if X64WIN
+- | mov L:CARG1d, SAVE_L
+- | mov CARG3d, RC
+- | mov L:CARG1d->base, BASE
+- | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
+- |.elif X64
+- | mov L:CARG1d, SAVE_L
+- | mov CARG2d, TAB:RB
+- | mov L:CARG1d->base, BASE
+- | mov RB, BASE // Save BASE.
+- | mov CARG3d, RC // Caveat: CARG3d == BASE.
+- |.else
+- | mov L:RA, SAVE_L
+- | mov ARG2, TAB:RB
+- | mov RB, BASE // Save BASE.
+- | mov ARG3, RC
+- | mov ARG1, L:RA
+- | mov L:RA->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+- | // TValue * returned in eax (RC).
+- | movzx RA, PC_RA
+- | mov BASE, RB // Restore BASE.
+- | jmp ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+- |->vmeta_comp:
+- |.if X64
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
+- |.if X64WIN
+- | lea CARG3d, [BASE+RD*8]
+- | lea CARG2d, [BASE+RA*8]
+- |.else
+- | lea CARG2d, [BASE+RA*8]
+- | lea CARG3d, [BASE+RD*8]
+- |.endif
+- | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
+- | movzx CARG4d, PC_OP
+- |.else
+- | movzx RB, PC_OP
+- | lea RD, [BASE+RD*8]
+- | lea RA, [BASE+RA*8]
+- | mov ARG4, RB
+- | mov L:RB, SAVE_L
+- | mov ARG3, RD
+- | mov ARG2, RA
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+- | // 0/1 or TValue * (metamethod) returned in eax (RC).
+- |3:
+- | mov BASE, L:RB->base
+- | cmp RC, 1
+- | ja ->vmeta_binop
+- |4:
+- | lea PC, [PC+4]
+- | jb >6
+- |5:
+- | movzx RD, PC_RD
+- | branchPC RD
+- |6:
+- | ins_next
+- |
+ |->cont_condt: // BASE = base, RC = result
+- | add PC, 4
+- | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
+- | jb <5
+- | jmp <6
+ |
+ |->cont_condf: // BASE = base, RC = result
+- | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
+- | jmp <4
+ |
+ |->vmeta_equal:
+- | sub PC, 4
+- |.if X64WIN
+- | mov CARG3d, RD
+- | mov CARG4d, RB
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
+- | mov CARG2d, RA
+- | mov CARG1d, L:RB // Caveat: CARG1d == RA.
+- |.elif X64
+- | mov CARG2d, RA
+- | mov CARG4d, RB // Caveat: CARG4d == RA.
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
+- | mov CARG3d, RD
+- | mov CARG1d, L:RB
+- |.else
+- | mov ARG4, RB
+- | mov L:RB, SAVE_L
+- | mov ARG3, RD
+- | mov ARG2, RA
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+- | // 0/1 or TValue * (metamethod) returned in eax (RC).
+- | jmp <3
+ |
+ |->vmeta_equal_cd:
+- |.if FFI
+- | sub PC, 4
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov FCARG1, L:RB
+- | mov FCARG2, dword [PC-4]
+- | mov SAVE_PC, PC
+- | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
+- | // 0/1 or TValue * (metamethod) returned in eax (RC).
+- | jmp <3
+- |.endif
+ |
+ |->vmeta_istype:
+- |.if X64
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
+- | mov CARG2d, RA
+- | movzx CARG3d, PC_RD
+- | mov L:CARG1d, L:RB
+- |.else
+- | movzx RD, PC_RD
+- | mov ARG2, RA
+- | mov L:RB, SAVE_L
+- | mov ARG3, RD
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+- | mov BASE, L:RB->base
+- | jmp <6
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vno:
+- |.if DUALNUM
+- | movzx RB, PC_RB
+- |.endif
+ |->vmeta_arith_vn:
+- | lea RC, [KBASE+RC*8]
+- | jmp >1
+ |
+ |->vmeta_arith_nvo:
+- |.if DUALNUM
+- | movzx RC, PC_RC
+- |.endif
+ |->vmeta_arith_nv:
+- | lea RC, [KBASE+RC*8]
+- | lea RB, [BASE+RB*8]
+- | xchg RB, RC
+- | jmp >2
+ |
+ |->vmeta_unm:
+- | lea RC, [BASE+RD*8]
+- | mov RB, RC
+- | jmp >2
+ |
+ |->vmeta_arith_vvo:
+- |.if DUALNUM
+- | movzx RB, PC_RB
+- |.endif
+ |->vmeta_arith_vv:
+- | lea RC, [BASE+RC*8]
+- |1:
+- | lea RB, [BASE+RB*8]
+- |2:
+- | lea RA, [BASE+RA*8]
+- |.if X64WIN
+- | mov CARG3d, RB
+- | mov CARG4d, RC
+- | movzx RC, PC_OP
+- | mov ARG5d, RC
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
+- | mov CARG2d, RA
+- | mov CARG1d, L:RB // Caveat: CARG1d == RA.
+- |.elif X64
+- | movzx CARG5d, PC_OP
+- | mov CARG2d, RA
+- | mov CARG4d, RC // Caveat: CARG4d == RA.
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
+- | mov CARG3d, RB
+- | mov L:RB, L:CARG1d
+- |.else
+- | mov ARG3, RB
+- | mov L:RB, SAVE_L
+- | mov ARG4, RC
+- | movzx RC, PC_OP
+- | mov ARG2, RA
+- | mov ARG5, RC
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+- | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+- | mov BASE, L:RB->base
+- | test RC, RC
+- | jz ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+- | // BASE = base, RC = new base, stack = cont/func/o1/o2
+- | mov RA, RC
+- | sub RC, BASE
+- | mov [RA-12], PC // [cont|PC]
+- | lea PC, [RC+FRAME_CONT]
+- | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
+- | jmp ->vm_call_dispatch
+ |
+ |->vmeta_len:
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
+- | mov L:FCARG1, L:RB
+- | mov SAVE_PC, PC
+- | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
+- | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
+- | mov BASE, L:RB->base
+-#if LJ_52
+- | test RC, RC
+- | jne ->vmeta_binop // Binop call for compatibility.
+- | movzx RD, PC_RD
+- | mov TAB:FCARG1, [BASE+RD*8]
+- | jmp ->BC_LEN_Z
+-#else
+- | jmp ->vmeta_binop // Binop call for compatibility.
+-#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call_ra:
+- | lea RA, [BASE+RA*8+8]
+ |->vmeta_call: // Resolve and call __call metamethod.
+- | // BASE = old base, RA = new base, RC = nargs+1, PC = return
+- | mov TMP2, RA // Save RA, RC for us.
+- | mov TMP1, NARGS:RD
+- | sub RA, 8
+- |.if X64
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
+- | mov CARG2d, RA
+- | lea CARG3d, [RA+NARGS:RD*8]
+- | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
+- |.else
+- | lea RC, [RA+NARGS:RD*8]
+- | mov L:RB, SAVE_L
+- | mov ARG2, RA
+- | mov ARG3, RC
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE // This is the callers base!
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+- | mov BASE, L:RB->base
+- | mov RA, TMP2
+- | mov NARGS:RD, TMP1
+- | mov LFUNC:RB, [RA-8]
+- | add NARGS:RD, 1
+- | // This is fragile. L->base must not move, KBASE must always be defined.
+- | cmp KBASE, BASE // Continue with CALLT if flag set.
+- | je ->BC_CALLT_Z
+- | mov BASE, RA
+- | ins_call // Otherwise call resolved metamethod.
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov FCARG2, RA // Caveat: FCARG2 == BASE
+- | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
+- | mov SAVE_PC, PC
+- | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
+- | mov BASE, L:RB->base
+- | mov RC, [PC-4]
+- | movzx RA, RCH
+- | movzx OP, RCL
+- | shr RC, 16
+- |.if X64
+- | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
+- |.else
+- | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
+- |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+@@ -1182,761 +312,109 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+- | cmp NARGS:RD, 1+1; jb ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+- | cmp NARGS:RD, 2+1; jb ->fff_fallback
+ |.endmacro
+ |
+- |.macro .ffunc_nsse, name, op
++ |.macro .ffunc_n, name, op
+ | .ffunc_1 name
+- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
+- | op xmm0, qword [BASE]
+ |.endmacro
+ |
+- |.macro .ffunc_nsse, name
+- | .ffunc_nsse name, movsd
++ |.macro .ffunc_n, name
++ | .ffunc_n name, mvc
+ |.endmacro
+ |
+- |.macro .ffunc_nnsse, name
++ |.macro .ffunc_nn, name
+ | .ffunc_2 name
+- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
+- | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
+- | movsd xmm0, qword [BASE]
+- | movsd xmm1, qword [BASE+8]
+- |.endmacro
+- |
+- |.macro .ffunc_nnr, name
+- | .ffunc_2 name
+- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
+- | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
+- | fld qword [BASE+8]
+- | fld qword [BASE]
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+ |.macro ffgccheck
+- | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
+- | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
+- | jb >1
+- | call ->fff_gcstep
+- |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+- | mov RB, [BASE+4]
+- | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
+- | mov PC, [BASE-4]
+- | mov MULTRES, RD
+- | mov [BASE-4], RB
+- | mov RB, [BASE]
+- | mov [BASE-8], RB
+- | sub RD, 2
+- | jz >2
+- | mov RA, BASE
+- |1:
+- | add RA, 8
+- |.if X64
+- | mov RBa, [RA]
+- | mov [RA-8], RBa
+- |.else
+- | mov RB, [RA+4]
+- | mov [RA-4], RB
+- | mov RB, [RA]
+- | mov [RA-8], RB
+- |.endif
+- | sub RD, 1
+- | jnz <1
+- |2:
+- | mov RD, MULTRES
+- | jmp ->fff_res_
+ |
+ |.ffunc_1 type
+- | mov RB, [BASE+4]
+- |.if X64
+- | mov RA, RB
+- | sar RA, 15
+- | cmp RA, -2
+- | je >3
+- |.endif
+- | mov RC, ~LJ_TNUMX
+- | not RB
+- | cmp RC, RB
+- | cmova RC, RB
+- |2:
+- | mov CFUNC:RB, [BASE-8]
+- | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
+- | mov PC, [BASE-4]
+- | mov dword [BASE-4], LJ_TSTR
+- | mov [BASE-8], STR:RC
+- | jmp ->fff_res1
+- |.if X64
+- |3:
+- | mov RC, ~LJ_TLIGHTUD
+- | jmp <2
+- |.endif
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+- | mov RB, [BASE+4]
+- | mov PC, [BASE-4]
+- | cmp RB, LJ_TTAB; jne >6
+- |1: // Field metatable must be at same offset for GCtab and GCudata!
+- | mov TAB:RB, [BASE]
+- | mov TAB:RB, TAB:RB->metatable
+- |2:
+- | test TAB:RB, TAB:RB
+- | mov dword [BASE-4], LJ_TNIL
+- | jz ->fff_res1
+- | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
+- | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
+- | mov [BASE-8], TAB:RB
+- | mov RA, TAB:RB->hmask
+- | and RA, STR:RC->hash
+- | imul RA, #NODE
+- | add NODE:RA, TAB:RB->node
+- |3: // Rearranged logic, because we expect _not_ to find the key.
+- | cmp dword NODE:RA->key.it, LJ_TSTR
+- | jne >4
+- | cmp dword NODE:RA->key.gcr, STR:RC
+- | je >5
+- |4:
+- | mov NODE:RA, NODE:RA->next
+- | test NODE:RA, NODE:RA
+- | jnz <3
+- | jmp ->fff_res1 // Not found, keep default result.
+- |5:
+- | mov RB, [RA+4]
+- | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
+- | mov RC, [RA]
+- | mov [BASE-4], RB // Return value of mt.__metatable.
+- | mov [BASE-8], RC
+- | jmp ->fff_res1
+- |
+- |6:
+- | cmp RB, LJ_TUDATA; je <1
+- |.if X64
+- | cmp RB, LJ_TNUMX; ja >8
+- | cmp RB, LJ_TISNUM; jbe >7
+- | mov RB, LJ_TLIGHTUD
+- | jmp >8
+- |7:
+- |.else
+- | cmp RB, LJ_TISNUM; ja >8
+- |.endif
+- | mov RB, LJ_TNUMX
+- |8:
+- | not RB
+- | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
+- | jmp <2
+ |
+ |.ffunc_2 setmetatable
+- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
+- | // Fast path: no mt for table yet and not clearing the mt.
+- | mov TAB:RB, [BASE]
+- | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
+- | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
+- | mov TAB:RC, [BASE+8]
+- | mov TAB:RB->metatable, TAB:RC
+- | mov PC, [BASE-4]
+- | mov dword [BASE-4], LJ_TTAB // Return original table.
+- | mov [BASE-8], TAB:RB
+- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+- | jz >1
+- | // Possible write barrier. Table is black, but skip iswhite(mt) check.
+- | barrierback TAB:RB, RC
+- |1:
+- | jmp ->fff_res1
+ |
+ |.ffunc_2 rawget
+- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
+- |.if X64WIN
+- | mov RB, BASE // Save BASE.
+- | lea CARG3d, [BASE+8]
+- | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
+- | mov CARG1d, SAVE_L
+- |.elif X64
+- | mov RB, BASE // Save BASE.
+- | mov CARG2d, [BASE]
+- | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
+- | mov CARG1d, SAVE_L
+- |.else
+- | mov TAB:RD, [BASE]
+- | mov L:RB, SAVE_L
+- | mov ARG2, TAB:RD
+- | mov ARG1, L:RB
+- | mov RB, BASE // Save BASE.
+- | add BASE, 8
+- | mov ARG3, BASE
+- |.endif
+- | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+- | // cTValue * returned in eax (RD).
+- | mov BASE, RB // Restore BASE.
+- | // Copy table slot.
+- |.if X64
+- | mov RBa, [RD]
+- | mov PC, [BASE-4]
+- | mov [BASE-8], RBa
+- |.else
+- | mov RB, [RD]
+- | mov RD, [RD+4]
+- | mov PC, [BASE-4]
+- | mov [BASE-8], RB
+- | mov [BASE-4], RD
+- |.endif
+- | jmp ->fff_res1
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+- | // Only handles the number case inline (without a base argument).
+- | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+- | cmp dword [BASE+4], LJ_TISNUM
+- |.if DUALNUM
+- | jne >1
+- | mov RB, dword [BASE]; jmp ->fff_resi
+- |1:
+- | ja ->fff_fallback
+- |.else
+- | jae ->fff_fallback
+- |.endif
+- | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
+ |
+ |.ffunc_1 tostring
+- | // Only handles the string or number case inline.
+- | mov PC, [BASE-4]
+- | cmp dword [BASE+4], LJ_TSTR; jne >3
+- | // A __tostring method in the string base metatable is ignored.
+- | mov STR:RD, [BASE]
+- |2:
+- | mov dword [BASE-4], LJ_TSTR
+- | mov [BASE-8], STR:RD
+- | jmp ->fff_res1
+- |3: // Handle numbers inline, unless a number base metatable is present.
+- | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
+- | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
+- | jne ->fff_fallback
+- | ffgccheck // Caveat: uses label 1.
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Add frame since C call can throw.
+- | mov SAVE_PC, PC // Redundant (but a defined value).
+- |.if X64 and not X64WIN
+- | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
+- |.endif
+- | mov L:FCARG1, L:RB
+- |.if DUALNUM
+- | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
+- |.else
+- | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
+- |.endif
+- | // GCstr returned in eax (RD).
+- | mov BASE, L:RB->base
+- | jmp <2
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+- | je >2 // Missing 2nd arg?
+- |1:
+- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Add frame since C call can throw.
+- | mov L:RB->top, BASE // Dummy frame length is ok.
+- | mov PC, [BASE-4]
+- |.if X64WIN
+- | lea CARG3d, [BASE+8]
+- | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
+- | mov CARG1d, L:RB
+- |.elif X64
+- | mov CARG2d, [BASE]
+- | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
+- | mov CARG1d, L:RB
+- |.else
+- | mov TAB:RD, [BASE]
+- | mov ARG2, TAB:RD
+- | mov ARG1, L:RB
+- | add BASE, 8
+- | mov ARG3, BASE
+- |.endif
+- | mov SAVE_PC, PC // Needed for ITERN fallback.
+- | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- | // Flag returned in eax (RD).
+- | mov BASE, L:RB->base
+- | test RD, RD; jz >3 // End of traversal?
+- | // Copy key and value to results.
+- |.if X64
+- | mov RBa, [BASE+8]
+- | mov RDa, [BASE+16]
+- | mov [BASE-8], RBa
+- | mov [BASE], RDa
+- |.else
+- | mov RB, [BASE+8]
+- | mov RD, [BASE+12]
+- | mov [BASE-8], RB
+- | mov [BASE-4], RD
+- | mov RB, [BASE+16]
+- | mov RD, [BASE+20]
+- | mov [BASE], RB
+- | mov [BASE+4], RD
+- |.endif
+- |->fff_res2:
+- | mov RD, 1+2
+- | jmp ->fff_res
+- |2: // Set missing 2nd arg to nil.
+- | mov dword [BASE+12], LJ_TNIL
+- | jmp <1
+- |3: // End of traversal: return nil.
+- | mov dword [BASE-4], LJ_TNIL
+- | jmp ->fff_res1
+ |
+ |.ffunc_1 pairs
+- | mov TAB:RB, [BASE]
+- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
+-#if LJ_52
+- | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
+-#endif
+- | mov CFUNC:RB, [BASE-8]
+- | mov CFUNC:RD, CFUNC:RB->upvalue[0]
+- | mov PC, [BASE-4]
+- | mov dword [BASE-4], LJ_TFUNC
+- | mov [BASE-8], CFUNC:RD
+- | mov dword [BASE+12], LJ_TNIL
+- | mov RD, 1+3
+- | jmp ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
+- | cmp dword [BASE+12], LJ_TISNUM
+- |.if DUALNUM
+- | jne ->fff_fallback
+- |.else
+- | jae ->fff_fallback
+- |.endif
+- | mov PC, [BASE-4]
+- |.if DUALNUM
+- | mov RD, dword [BASE+8]
+- | add RD, 1
+- | mov dword [BASE-4], LJ_TISNUM
+- | mov dword [BASE-8], RD
+- |.else
+- | movsd xmm0, qword [BASE+8]
+- | sseconst_1 xmm1, RBa
+- | addsd xmm0, xmm1
+- | cvttsd2si RD, xmm0
+- | movsd qword [BASE-8], xmm0
+- |.endif
+- | mov TAB:RB, [BASE]
+- | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
+- | shl RD, 3
+- | add RD, TAB:RB->array
+- |1:
+- | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
+- | // Copy array slot.
+- |.if X64
+- | mov RBa, [RD]
+- | mov [BASE], RBa
+- |.else
+- | mov RB, [RD]
+- | mov RD, [RD+4]
+- | mov [BASE], RB
+- | mov [BASE+4], RD
+- |.endif
+- | jmp ->fff_res2
+- |2: // Check for empty hash part first. Otherwise call C function.
+- | cmp dword TAB:RB->hmask, 0; je ->fff_res0
+- | mov FCARG1, TAB:RB
+- | mov RB, BASE // Save BASE.
+- | mov FCARG2, RD // Caveat: FCARG2 == BASE
+- | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
+- | // cTValue * or NULL returned in eax (RD).
+- | mov BASE, RB
+- | test RD, RD
+- | jnz <1
+ |->fff_res0:
+- | mov RD, 1+0
+- | jmp ->fff_res
+ |
+ |.ffunc_1 ipairs
+- | mov TAB:RB, [BASE]
+- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
+-#if LJ_52
+- | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
+-#endif
+- | mov CFUNC:RB, [BASE-8]
+- | mov CFUNC:RD, CFUNC:RB->upvalue[0]
+- | mov PC, [BASE-4]
+- | mov dword [BASE-4], LJ_TFUNC
+- | mov [BASE-8], CFUNC:RD
+- |.if DUALNUM
+- | mov dword [BASE+12], LJ_TISNUM
+- | mov dword [BASE+8], 0
+- |.else
+- | xorps xmm0, xmm0
+- | movsd qword [BASE+8], xmm0
+- |.endif
+- | mov RD, 1+3
+- | jmp ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc_1 pcall
+- | lea RA, [BASE+8]
+- | sub NARGS:RD, 1
+- | mov PC, 8+FRAME_PCALL
+- |1:
+- | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
+- | shr RB, HOOK_ACTIVE_SHIFT
+- | and RB, 1
+- | add PC, RB // Remember active hook before pcall.
+- | jmp ->vm_call_dispatch
+ |
+ |.ffunc_2 xpcall
+- | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
+- | mov RB, [BASE+4] // Swap function and traceback.
+- | mov [BASE+12], RB
+- | mov dword [BASE+4], LJ_TFUNC
+- | mov LFUNC:RB, [BASE]
+- | mov PC, [BASE+8]
+- | mov [BASE+8], LFUNC:RB
+- | mov [BASE], PC
+- | lea RA, [BASE+16]
+- | sub NARGS:RD, 2
+- | mov PC, 16+FRAME_PCALL
+- | jmp <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+- | mov L:RB, [BASE]
+ |.else
+ |.ffunc coroutine_wrap_aux
+- | mov CFUNC:RB, [BASE-8]
+- | mov L:RB, CFUNC:RB->upvalue[0].gcr
+- |.endif
+- | mov PC, [BASE-4]
+- | mov SAVE_PC, PC
+- |.if X64
+- | mov TMP1, L:RB
+- |.else
+- | mov ARG1, L:RB
+- |.endif
+- |.if resume
+- | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
+- |.endif
+- | cmp aword L:RB->cframe, 0; jne ->fff_fallback
+- | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
+- | mov RA, L:RB->top
+- | je >1 // Status != LUA_YIELD (i.e. 0)?
+- | cmp RA, L:RB->base // Check for presence of initial func.
+- | je ->fff_fallback
+- |1:
+- |.if resume
+- | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
+- |.else
+- | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
+- |.endif
+- | cmp PC, L:RB->maxstack; ja ->fff_fallback
+- | mov L:RB->top, PC
+- |
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- |.if resume
+- | add BASE, 8 // Keep resumed thread in stack for GC.
+- |.endif
+- | mov L:RB->top, BASE
+- |.if resume
+- | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
+- |.else
+- | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
+- |.endif
+- | sub RBa, PCa // Relative to PC.
+- |
+- | cmp PC, RA
+- | je >3
+- |2: // Move args to coroutine.
+- |.if X64
+- | mov RCa, [PC+RB]
+- | mov [PC-8], RCa
+- |.else
+- | mov RC, [PC+RB+4]
+- | mov [PC-4], RC
+- | mov RC, [PC+RB]
+- | mov [PC-8], RC
+- |.endif
+- | sub PC, 8
+- | cmp PC, RA
+- | jne <2
+- |3:
+- |.if X64
+- | mov CARG2d, RA
+- | mov CARG1d, TMP1
+- |.else
+- | mov ARG2, RA
+- | xor RA, RA
+- | mov ARG4, RA
+- | mov ARG3, RA
+- |.endif
+- | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+- |
+- | mov L:RB, SAVE_L
+- |.if X64
+- | mov L:PC, TMP1
+- |.else
+- | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
+- |.endif
+- | mov BASE, L:RB->base
+- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+- | set_vmstate INTERP
+- |
+- | cmp eax, LUA_YIELD
+- | ja >8
+- |4:
+- | mov RA, L:PC->base
+- | mov KBASE, L:PC->top
+- | mov L:PC->top, RA // Clear coroutine stack.
+- | mov PC, KBASE
+- | sub PC, RA
+- | je >6 // No results?
+- | lea RD, [BASE+PC]
+- | shr PC, 3
+- | cmp RD, L:RB->maxstack
+- | ja >9 // Need to grow stack?
+- |
+- | mov RB, BASE
+- | sub RBa, RAa
+- |5: // Move results from coroutine.
+- |.if X64
+- | mov RDa, [RA]
+- | mov [RA+RB], RDa
+- |.else
+- | mov RD, [RA]
+- | mov [RA+RB], RD
+- | mov RD, [RA+4]
+- | mov [RA+RB+4], RD
+- |.endif
+- | add RA, 8
+- | cmp RA, KBASE
+- | jne <5
+- |6:
+- |.if resume
+- | lea RD, [PC+2] // nresults+1 = 1 + true + results.
+- | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
+- |.else
+- | lea RD, [PC+1] // nresults+1 = 1 + results.
+- |.endif
+- |7:
+- | mov PC, SAVE_PC
+- | mov MULTRES, RD
+- |.if resume
+- | mov RAa, -8
+- |.else
+- | xor RA, RA
+- |.endif
+- | test PC, FRAME_TYPE
+- | jz ->BC_RET_Z
+- | jmp ->vm_return
+- |
+- |8: // Coroutine returned with error (at co->top-1).
+- |.if resume
+- | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
+- | mov RA, L:PC->top
+- | sub RA, 8
+- | mov L:PC->top, RA // Clear error from coroutine stack.
+- | // Copy error message.
+- |.if X64
+- | mov RDa, [RA]
+- | mov [BASE], RDa
+- |.else
+- | mov RD, [RA]
+- | mov [BASE], RD
+- | mov RD, [RA+4]
+- | mov [BASE+4], RD
+- |.endif
+- | mov RD, 1+2 // nresults+1 = 1 + false + error.
+- | jmp <7
+- |.else
+- | mov FCARG2, L:PC
+- | mov FCARG1, L:RB
+- | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
+- | // Error function does not return.
+- |.endif
+- |
+- |9: // Handle stack expansion on return from yield.
+- |.if X64
+- | mov L:RA, TMP1
+- |.else
+- | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
+ |.endif
+- | mov L:RA->top, KBASE // Undo coroutine stack clearing.
+- | mov FCARG2, PC
+- | mov FCARG1, L:RB
+- | call extern lj_state_growstack@8 // (lua_State *L, int n)
+- |.if X64
+- | mov L:PC, TMP1
+- |.else
+- | mov L:PC, ARG1
+- |.endif
+- | mov BASE, L:RB->base
+- | jmp <4 // Retry the stack move.
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+- | mov L:RB, SAVE_L
+- | test aword L:RB->cframe, CFRAME_RESUME
+- | jz ->fff_fallback
+- | mov L:RB->base, BASE
+- | lea RD, [BASE+NARGS:RD*8-8]
+- | mov L:RB->top, RD
+- | xor RD, RD
+- | mov aword L:RB->cframe, RDa
+- | mov al, LUA_YIELD
+- | mov byte L:RB->status, al
+- | jmp ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+- |.if not DUALNUM
+- |->fff_resi: // Dummy.
+- |.endif
+- |
+- |->fff_resn:
+- | mov PC, [BASE-4]
+- | fstp qword [BASE-8]
+- | jmp ->fff_res1
+- |
+ | .ffunc_1 math_abs
+- |.if DUALNUM
+- | cmp dword [BASE+4], LJ_TISNUM; jne >2
+- | mov RB, dword [BASE]
+- | cmp RB, 0; jns ->fff_resi
+- | neg RB; js >1
+ |->fff_resbit:
+ |->fff_resi:
+- | mov PC, [BASE-4]
+- | mov dword [BASE-4], LJ_TISNUM
+- | mov dword [BASE-8], RB
+- | jmp ->fff_res1
+- |1:
+- | mov PC, [BASE-4]
+- | mov dword [BASE-4], 0x41e00000 // 2^31.
+- | mov dword [BASE-8], 0
+- | jmp ->fff_res1
+- |2:
+- | ja ->fff_fallback
+- |.else
+- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
+- |.endif
+- | movsd xmm0, qword [BASE]
+- | sseconst_abs xmm1, RDa
+- | andps xmm0, xmm1
++ |->fff_resRB:
++ |
++ |.ffunc_n math_sqrt, sqrtsd
+ |->fff_resxmm0:
+- | mov PC, [BASE-4]
+- | movsd qword [BASE-8], xmm0
+- | // fallthrough
+ |
+ |->fff_res1:
+- | mov RD, 1+1
+ |->fff_res:
+- | mov MULTRES, RD
+ |->fff_res_:
+- | test PC, FRAME_TYPE
+- | jnz >7
+- |5:
+- | cmp PC_RB, RDL // More results expected?
+- | ja >6
+- | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+- | movzx RA, PC_RA
+- | not RAa // Note: ~RA = -(RA+1)
+- | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
+- | ins_next
+- |
+- |6: // Fill up results with nil.
+- | mov dword [BASE+RD*8-12], LJ_TNIL
+- | add RD, 1
+- | jmp <5
+- |
+- |7: // Non-standard return case.
+- | mov RAa, -8 // Results start at BASE+RA = BASE-8.
+- | jmp ->vm_return
+- |
+- |.if X64
+- |.define fff_resfp, fff_resxmm0
+- |.else
+- |.define fff_resfp, fff_resn
+- |.endif
+ |
+ |.macro math_round, func
+ | .ffunc math_ .. func
+- |.if DUALNUM
+- | cmp dword [BASE+4], LJ_TISNUM; jne >1
+- | mov RB, dword [BASE]; jmp ->fff_resi
+- |1:
+- | ja ->fff_fallback
+- |.else
+- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
+- |.endif
+- | movsd xmm0, qword [BASE]
+- | call ->vm_ .. func .. _sse
+- |.if DUALNUM
+- | cvttsd2si RB, xmm0
+- | cmp RB, 0x80000000
+- | jne ->fff_resi
+- | cvtsi2sd xmm1, RB
+- | ucomisd xmm0, xmm1
+- | jp ->fff_resxmm0
+- | je ->fff_resi
+- |.endif
+- | jmp ->fff_resxmm0
+ |.endmacro
+ |
+ | math_round floor
+ | math_round ceil
+ |
+- |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
+- |
+ |.ffunc math_log
+- | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
+- | movsd xmm0, qword [BASE]
+- |.if not X64
+- | movsd FPARG1, xmm0
+- |.endif
+- | mov RB, BASE
+- | call extern log
+- | mov BASE, RB
+- | jmp ->fff_resfp
+ |
+ |.macro math_extern, func
+- | .ffunc_nsse math_ .. func
+- |.if not X64
+- | movsd FPARG1, xmm0
+- |.endif
+- | mov RB, BASE
+- | call extern func
+- | mov BASE, RB
+- | jmp ->fff_resfp
++ | .ffunc_n math_ .. func
+ |.endmacro
+ |
+ |.macro math_extern2, func
+- | .ffunc_nnsse math_ .. func
+- |.if not X64
+- | movsd FPARG1, xmm0
+- | movsd FPARG3, xmm1
+- |.endif
+- | mov RB, BASE
+- | call extern func
+- | mov BASE, RB
+- | jmp ->fff_resfp
++ | .ffunc_nn math_ .. func
+ |.endmacro
+ |
+ | math_extern log10
+@@ -1954,102 +432,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+- |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
+- |
+- |.ffunc_1 math_frexp
+- | mov RB, [BASE+4]
+- | cmp RB, LJ_TISNUM; jae ->fff_fallback
+- | mov PC, [BASE-4]
+- | mov RC, [BASE]
+- | mov [BASE-4], RB; mov [BASE-8], RC
+- | shl RB, 1; cmp RB, 0xffe00000; jae >3
+- | or RC, RB; jz >3
+- | mov RC, 1022
+- | cmp RB, 0x00200000; jb >4
+- |1:
+- | shr RB, 21; sub RB, RC // Extract and unbias exponent.
+- | cvtsi2sd xmm0, RB
+- | mov RB, [BASE-4]
+- | and RB, 0x800fffff // Mask off exponent.
+- | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
+- | mov [BASE-4], RB
+- |2:
+- | movsd qword [BASE], xmm0
+- | mov RD, 1+2
+- | jmp ->fff_res
+- |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
+- | xorps xmm0, xmm0; jmp <2
+- |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
+- | movsd xmm0, qword [BASE]
+- | sseconst_hi xmm1, RBa, 43500000 // 2^54.
+- | mulsd xmm0, xmm1
+- | movsd qword [BASE-8], xmm0
+- | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
+- |
+- |.ffunc_nsse math_modf
+- | mov RB, [BASE+4]
+- | mov PC, [BASE-4]
+- | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
+- | movaps xmm4, xmm0
+- | call ->vm_trunc_sse
+- | subsd xmm4, xmm0
+- |1:
+- | movsd qword [BASE-8], xmm0
+- | movsd qword [BASE], xmm4
+- | mov RC, [BASE-4]; mov RB, [BASE+4]
+- | xor RC, RB; js >3 // Need to adjust sign?
+- |2:
+- | mov RD, 1+2
+- | jmp ->fff_res
+- |3:
+- | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
+- | jmp <2
+- |4:
+- | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
++ |.ffunc_2 math_ldexp
++ |
++ |.ffunc_n math_frexp
+ |
++ |.ffunc_n math_modf
+ |.macro math_minmax, name, cmovop, sseop
+ | .ffunc name
+- | mov RA, 2
+- | cmp dword [BASE+4], LJ_TISNUM
+- |.if DUALNUM
+- | jne >4
+- | mov RB, dword [BASE]
+- |1: // Handle integers.
+- | cmp RA, RD; jae ->fff_resi
+- | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
+- | cmp RB, dword [BASE+RA*8-8]
+- | cmovop RB, dword [BASE+RA*8-8]
+- | add RA, 1
+- | jmp <1
+- |3:
+- | ja ->fff_fallback
+- | // Convert intermediate result to number and continue below.
+- | cvtsi2sd xmm0, RB
+- | jmp >6
+- |4:
+- | ja ->fff_fallback
+- |.else
+- | jae ->fff_fallback
+- |.endif
+- |
+- | movsd xmm0, qword [BASE]
+- |5: // Handle numbers or integers.
+- | cmp RA, RD; jae ->fff_resxmm0
+- | cmp dword [BASE+RA*8-4], LJ_TISNUM
+- |.if DUALNUM
+- | jb >6
+- | ja ->fff_fallback
+- | cvtsi2sd xmm1, dword [BASE+RA*8-8]
+- | jmp >7
+- |.else
+- | jae ->fff_fallback
+- |.endif
+- |6:
+- | movsd xmm1, qword [BASE+RA*8-8]
+- |7:
+- | sseop xmm0, xmm1
+- | add RA, 1
+- | jmp <5
+ |.endmacro
+ |
+ | math_minmax math_min, cmovg, minsd
+@@ -2058,150 +447,17 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+- | cmp NARGS:RD, 1+1; jne ->fff_fallback
+- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
+- | mov STR:RB, [BASE]
+- | mov PC, [BASE-4]
+- | cmp dword STR:RB->len, 1
+- | jb ->fff_res0 // Return no results for empty string.
+- | movzx RB, byte STR:RB[1]
+- |.if DUALNUM
+- | jmp ->fff_resi
+- |.else
+- | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
+- |.endif
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+- | ffgccheck
+- | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
+- | cmp dword [BASE+4], LJ_TISNUM
+- |.if DUALNUM
+- | jne ->fff_fallback
+- | mov RB, dword [BASE]
+- | cmp RB, 255; ja ->fff_fallback
+- | mov TMP2, RB
+- |.else
+- | jae ->fff_fallback
+- | cvttsd2si RB, qword [BASE]
+- | cmp RB, 255; ja ->fff_fallback
+- | mov TMP2, RB
+- |.endif
+- |.if X64
+- | mov TMP3, 1
+- |.else
+- | mov ARG3, 1
+- |.endif
+- | lea RDa, TMP2 // Points to stack. Little-endian.
+ |->fff_newstr:
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- |.if X64
+- | mov CARG3d, TMP3 // Zero-extended to size_t.
+- | mov CARG2, RDa // May be 64 bit ptr to stack.
+- | mov CARG1d, L:RB
+- |.else
+- | mov ARG2, RD
+- | mov ARG1, L:RB
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+- | // GCstr * returned in eax (RD).
+- | mov BASE, L:RB->base
+- | mov PC, [BASE-4]
+- | mov dword [BASE-4], LJ_TSTR
+- | mov [BASE-8], STR:RD
+- | jmp ->fff_res1
+ |
+ |.ffunc string_sub
+- | ffgccheck
+- | mov TMP2, -1
+- | cmp NARGS:RD, 1+2; jb ->fff_fallback
+- | jna >1
+- | cmp dword [BASE+20], LJ_TISNUM
+- |.if DUALNUM
+- | jne ->fff_fallback
+- | mov RB, dword [BASE+16]
+- | mov TMP2, RB
+- |.else
+- | jae ->fff_fallback
+- | cvttsd2si RB, qword [BASE+16]
+- | mov TMP2, RB
+- |.endif
+- |1:
+- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
+- | cmp dword [BASE+12], LJ_TISNUM
+- |.if DUALNUM
+- | jne ->fff_fallback
+- |.else
+- | jae ->fff_fallback
+- |.endif
+- | mov STR:RB, [BASE]
+- | mov TMP3, STR:RB
+- | mov RB, STR:RB->len
+- |.if DUALNUM
+- | mov RA, dword [BASE+8]
+- |.else
+- | cvttsd2si RA, qword [BASE+8]
+- |.endif
+- | mov RC, TMP2
+- | cmp RB, RC // len < end? (unsigned compare)
+- | jb >5
+- |2:
+- | test RA, RA // start <= 0?
+- | jle >7
+- |3:
+- | mov STR:RB, TMP3
+- | sub RC, RA // start > end?
+- | jl ->fff_emptystr
+- | lea RB, [STR:RB+RA+#STR-1]
+- | add RC, 1
+- |4:
+- |.if X64
+- | mov TMP3, RC
+- |.else
+- | mov ARG3, RC
+- |.endif
+- | mov RD, RB
+- | jmp ->fff_newstr
+- |
+- |5: // Negative end or overflow.
+- | jl >6
+- | lea RC, [RC+RB+1] // end = end+(len+1)
+- | jmp <2
+- |6: // Overflow.
+- | mov RC, RB // end = len
+- | jmp <2
+- |
+- |7: // Negative start or underflow.
+- | je >8
+- | add RA, RB // start = start+(len+1)
+- | add RA, 1
+- | jg <3 // start > 0?
+- |8: // Underflow.
+- | mov RA, 1 // start = 1
+- | jmp <3
+ |
+ |->fff_emptystr: // Range underflow.
+- | xor RC, RC // Zero length. Any ptr in RB is ok.
+- | jmp <4
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
+- | ffgccheck
+- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
+- | mov L:RB, SAVE_L
+- | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
+- | mov L:RB->base, BASE
+- | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
+- | mov RC, SBUF:FCARG1->b
+- | mov SBUF:FCARG1->L, L:RB
+- | mov SBUF:FCARG1->p, RC
+- | mov SAVE_PC, PC
+- | call extern lj_buf_putstr_ .. name .. @8
+- | mov FCARG1, eax
+- | call extern lj_buf_tostr@4
+- | jmp ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+@@ -2212,30 +468,6 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc_bit, name, kind, fdef
+ | fdef name
+- |.if kind == 2
+- | sseconst_tobit xmm1, RBa
+- |.endif
+- | cmp dword [BASE+4], LJ_TISNUM
+- |.if DUALNUM
+- | jne >1
+- | mov RB, dword [BASE]
+- |.if kind > 0
+- | jmp >2
+- |.else
+- | jmp ->fff_resbit
+- |.endif
+- |1:
+- | ja ->fff_fallback
+- |.else
+- | jae ->fff_fallback
+- |.endif
+- | movsd xmm0, qword [BASE]
+- |.if kind < 2
+- | sseconst_tobit xmm1, RBa
+- |.endif
+- | addsd xmm0, xmm1
+- | movd RB, xmm0
+- |2:
+ |.endmacro
+ |
+ |.macro .ffunc_bit, name, kind
+@@ -2243,32 +475,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endmacro
+ |
+ |.ffunc_bit bit_tobit, 0
+- | jmp ->fff_resbit
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name, 2
+- | mov TMP2, NARGS:RD // Save for fallback.
+- | lea RD, [BASE+NARGS:RD*8-16]
+- |1:
+- | cmp RD, BASE
+- | jbe ->fff_resbit
+- | cmp dword [RD+4], LJ_TISNUM
+- |.if DUALNUM
+- | jne >2
+- | ins RB, dword [RD]
+- | sub RD, 8
+- | jmp <1
+- |2:
+- | ja ->fff_fallback_bit_op
+- |.else
+- | jae ->fff_fallback_bit_op
+- |.endif
+- | movsd xmm0, qword [RD]
+- | addsd xmm0, xmm1
+- | movd RA, xmm0
+- | ins RB, RA
+- | sub RD, 8
+- | jmp <1
+ |.endmacro
+ |
+ |.ffunc_bit_op bit_band, and
+@@ -2276,39 +485,14 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc_bit_op bit_bxor, xor
+ |
+ |.ffunc_bit bit_bswap, 1
+- | bswap RB
+- | jmp ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot, 1
+- | not RB
+- |.if DUALNUM
+- | jmp ->fff_resbit
+- |.else
+ |->fff_resbit:
+- | cvtsi2sd xmm0, RB
+- | jmp ->fff_resxmm0
+- |.endif
+ |
+ |->fff_fallback_bit_op:
+- | mov NARGS:RD, TMP2 // Restore for fallback
+- | jmp ->fff_fallback
+ |
+ |.macro .ffunc_bit_sh, name, ins
+- |.if DUALNUM
+ | .ffunc_bit name, 1, .ffunc_2
+- | // Note: no inline conversion from number for 2nd argument!
+- | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
+- | mov RA, dword [BASE+8]
+- |.else
+- | .ffunc_nnsse name
+- | sseconst_tobit xmm2, RBa
+- | addsd xmm0, xmm2
+- | addsd xmm1, xmm2
+- | movd RB, xmm0
+- | movd RA, xmm1
+- |.endif
+- | ins RB, cl // Assumes RA is ecx.
+- | jmp ->fff_resbit
+ |.endmacro
+ |
+ |.ffunc_bit_sh bit_lshift, shl
+@@ -2320,268 +504,36 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback_2:
+- | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
+- | jmp ->fff_fallback
+ |->fff_fallback_1:
+- | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
+ |->fff_fallback: // Call fast function fallback handler.
+- | // BASE = new base, RD = nargs+1
+- | mov L:RB, SAVE_L
+- | mov PC, [BASE-4] // Fallback may overwrite PC.
+- | mov SAVE_PC, PC // Redundant (but a defined value).
+- | mov L:RB->base, BASE
+- | lea RD, [BASE+NARGS:RD*8-8]
+- | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
+- | mov L:RB->top, RD
+- | mov CFUNC:RD, [BASE-8]
+- | cmp RA, L:RB->maxstack
+- | ja >5 // Need to grow stack.
+- |.if X64
+- | mov CARG1d, L:RB
+- |.else
+- | mov ARG1, L:RB
+- |.endif
+- | call aword CFUNC:RD->f // (lua_State *L)
+- | mov BASE, L:RB->base
+- | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+- | test RD, RD; jg ->fff_res // Returned nresults+1?
+- |1:
+- | mov RA, L:RB->top
+- | sub RA, BASE
+- | shr RA, 3
+- | test RD, RD
+- | lea NARGS:RD, [RA+1]
+- | mov LFUNC:RB, [BASE-8]
+- | jne ->vm_call_tail // Returned -1?
+- | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+- | mov RA, BASE
+- | test PC, FRAME_TYPE
+- | jnz >3
+- | movzx RB, PC_RA
+- | not RBa // Note: ~RB = -(RB+1)
+- | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
+- | jmp ->vm_call_dispatch // Resolve again for tailcall.
+- |3:
+- | mov RB, PC
+- | and RB, -8
+- | sub BASE, RB
+- | jmp ->vm_call_dispatch // Resolve again for tailcall.
+- |
+- |5: // Grow stack for fallback handler.
+- | mov FCARG2, LUA_MINSTACK
+- | mov FCARG1, L:RB
+- | call extern lj_state_growstack@8 // (lua_State *L, int n)
+- | mov BASE, L:RB->base
+- | xor RD, RD // Simulate a return 0.
+- | jmp <1 // Dumb retry (goes through ff first).
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
+- | pop RBa // Must keep stack at same level.
+- | mov TMPa, RBa // Save return address
+- | mov L:RB, SAVE_L
+- | mov SAVE_PC, PC // Redundant (but a defined value).
+- | mov L:RB->base, BASE
+- | lea RD, [BASE+NARGS:RD*8-8]
+- | mov FCARG1, L:RB
+- | mov L:RB->top, RD
+- | call extern lj_gc_step@4 // (lua_State *L)
+- | mov BASE, L:RB->base
+- | mov RD, L:RB->top
+- | sub RD, BASE
+- | shr RD, 3
+- | add NARGS:RD, 1
+- | mov RBa, TMPa
+- | push RBa // Restore return address.
+- | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+- |.if JIT
+- | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
+- | test RDL, HOOK_VMEVENT // No recording while in vmevent.
+- | jnz >5
+- | // Decrement the hookcount for consistency, but always do the call.
+- | test RDL, HOOK_ACTIVE
+- | jnz >1
+- | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+- | jz >1
+- | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+- | jmp >1
+- |.endif
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+- | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
+- | test RDL, HOOK_ACTIVE // Hook already active?
+- | jnz >5
+- | jmp >1
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+- | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
+- | test RDL, HOOK_ACTIVE // Hook already active?
+- | jnz >5
+- |
+- | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+- | jz >5
+- | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+- | jz >1
+- | test RDL, LUA_MASKLINE
+- | jz >5
+- |1:
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov FCARG2, PC // Caveat: FCARG2 == BASE
+- | mov FCARG1, L:RB
+- | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+- | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
+- |3:
+- | mov BASE, L:RB->base
+- |4:
+- | movzx RA, PC_RA
+- |5:
+- | movzx OP, PC_OP
+- | movzx RD, PC_RD
+- |.if X64
+- | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
+- |.else
+- | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
+- |.endif
+ |
+ |->cont_hook: // Continue from hook yield.
+- | add PC, 4
+- | mov RA, [RB-24]
+- | mov MULTRES, RA // Restore MULTRES for *M ins.
+- | jmp <4
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+- |.if JIT
+- | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
+- | mov RB, LFUNC:RB->pc
+- | movzx RD, byte [RB+PC2PROTO(framesize)]
+- | lea RD, [BASE+RD*8]
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov L:RB->top, RD
+- | mov FCARG2, PC
+- | lea FCARG1, [DISPATCH+GG_DISP2J]
+- | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
+- | mov SAVE_PC, PC
+- | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
+- | jmp <3
+- |.endif
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+- | mov SAVE_PC, PC
+- |.if JIT
+- | jmp >1
+- |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+- |.if JIT
+- | mov SAVE_PC, PC
+- | or PC, 1 // Marker for hot call.
+- |1:
+- |.endif
+- | lea RD, [BASE+NARGS:RD*8-8]
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov L:RB->top, RD
+- | mov FCARG2, PC
+- | mov FCARG1, L:RB
+- | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
+- | // ASMFunction returned in eax/rax (RDa).
+- | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
+- |.if JIT
+- | and PC, -2
+- |.endif
+- | mov BASE, L:RB->base
+- | mov RAa, RDa
+- | mov RD, L:RB->top
+- | sub RD, BASE
+- | mov RBa, RAa
+- | movzx RA, PC_RA
+- | shr RD, 3
+- | add NARGS:RD, 1
+- | jmp RBa
+ |
+ |->cont_stitch: // Trace stitching.
+- |.if JIT
+- | // BASE = base, RC = result, RB = mbase
+- | mov TRACE:RA, [RB-24] // Save previous trace.
+- | mov TMP1, TRACE:RA
+- | mov TMP3, DISPATCH // Need one more register.
+- | mov DISPATCH, MULTRES
+- | movzx RA, PC_RA
+- | lea RA, [BASE+RA*8] // Call base.
+- | sub DISPATCH, 1
+- | jz >2
+- |1: // Move results down.
+- |.if X64
+- | mov RBa, [RC]
+- | mov [RA], RBa
+- |.else
+- | mov RB, [RC]
+- | mov [RA], RB
+- | mov RB, [RC+4]
+- | mov [RA+4], RB
+- |.endif
+- | add RC, 8
+- | add RA, 8
+- | sub DISPATCH, 1
+- | jnz <1
+- |2:
+- | movzx RC, PC_RA
+- | movzx RB, PC_RB
+- | add RC, RB
+- | lea RC, [BASE+RC*8-8]
+- |3:
+- | cmp RC, RA
+- | ja >9 // More results wanted?
+- |
+- | mov DISPATCH, TMP3
+- | mov TRACE:RD, TMP1 // Get previous trace.
+- | movzx RB, word TRACE:RD->traceno
+- | movzx RD, word TRACE:RD->link
+- | cmp RD, RB
+- | je ->cont_nop // Blacklisted.
+- | test RD, RD
+- | jne =>BC_JLOOP // Jump to stitched trace.
+- |
+- | // Stitch a new trace to the previous trace.
+- | mov [DISPATCH+DISPATCH_J(exitno)], RB
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov FCARG2, PC
+- | lea FCARG1, [DISPATCH+GG_DISP2J]
+- | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
+- | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
+- | mov BASE, L:RB->base
+- | jmp ->cont_nop
+- |
+- |9: // Fill up results with nil.
+- | mov dword [RA+4], LJ_TNIL
+- | add RA, 8
+- | jmp <3
+- |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+-#if LJ_HASPROFILE
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov FCARG2, PC // Caveat: FCARG2 == BASE
+- | mov FCARG1, L:RB
+- | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
+- | mov BASE, L:RB->base
+- | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+- | sub PC, 4
+- | jmp ->cont_nop
+-#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+@@ -2590,207 +542,14 @@ static void build_subroutines(BuildCtx *ctx)
+ |// Called from an exit stub with the exit number on the stack.
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+ |->vm_exit_handler:
+- |.if JIT
+- |.if X64
+- | push r13; push r12
+- | push r11; push r10; push r9; push r8
+- | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
+- | push rbx; push rdx; push rcx; push rax
+- | movzx RC, byte [rbp-8] // Reconstruct exit number.
+- | mov RCH, byte [rbp-16]
+- | mov [rbp-8], r15; mov [rbp-16], r14
+- |.else
+- | push ebp; lea ebp, [esp+12]; push ebp
+- | push ebx; push edx; push ecx; push eax
+- | movzx RC, byte [ebp-4] // Reconstruct exit number.
+- | mov RCH, byte [ebp-8]
+- | mov [ebp-4], edi; mov [ebp-8], esi
+- |.endif
+- | // Caveat: DISPATCH is ebx.
+- | mov DISPATCH, [ebp]
+- | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
+- | set_vmstate EXIT
+- | mov [DISPATCH+DISPATCH_J(exitno)], RC
+- | mov [DISPATCH+DISPATCH_J(parent)], RA
+- |.if X64
+- |.if X64WIN
+- | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
+- |.else
+- | sub rsp, 16*8 // Room for SSE regs.
+- |.endif
+- | add rbp, -128
+- | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
+- | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
+- | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
+- | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
+- | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
+- | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
+- | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
+- | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
+- |.else
+- | sub esp, 8*8+16 // Room for SSE regs + args.
+- | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
+- | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
+- | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
+- | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
+- |.endif
+- | // Caveat: RB is ebp.
+- | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
+- | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
+- | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
+- | mov L:RB->base, BASE
+- |.if X64WIN
+- | lea CARG2, [rsp+4*8]
+- |.elif X64
+- | mov CARG2, rsp
+- |.else
+- | lea FCARG2, [esp+16]
+- |.endif
+- | lea FCARG1, [DISPATCH+GG_DISP2J]
+- | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
+- | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
+- | // MULTRES or negated error code returned in eax (RD).
+- | mov RAa, L:RB->cframe
+- | and RAa, CFRAME_RAWMASK
+- |.if X64WIN
+- | // Reposition stack later.
+- |.elif X64
+- | mov rsp, RAa // Reposition stack to C frame.
+- |.else
+- | mov esp, RAa // Reposition stack to C frame.
+- |.endif
+- | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
+- | mov BASE, L:RB->base
+- | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC.
+- |.if X64
+- | jmp >1
+- |.endif
+- |.endif
+ |->vm_exit_interp:
+- | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
+- |.if JIT
+- |.if X64
+- | // Restore additional callee-save registers only used in compiled code.
+- |.if X64WIN
+- | lea RAa, [rsp+9*16+4*8]
+- |1:
+- | movdqa xmm15, [RAa-9*16]
+- | movdqa xmm14, [RAa-8*16]
+- | movdqa xmm13, [RAa-7*16]
+- | movdqa xmm12, [RAa-6*16]
+- | movdqa xmm11, [RAa-5*16]
+- | movdqa xmm10, [RAa-4*16]
+- | movdqa xmm9, [RAa-3*16]
+- | movdqa xmm8, [RAa-2*16]
+- | movdqa xmm7, [RAa-1*16]
+- | mov rsp, RAa // Reposition stack to C frame.
+- | movdqa xmm6, [RAa]
+- | mov r15, CSAVE_3
+- | mov r14, CSAVE_4
+- |.else
+- | add rsp, 16 // Reposition stack to C frame.
+- |1:
+- |.endif
+- | mov r13, TMPa
+- | mov r12, TMPQ
+- |.endif
+- | test RD, RD; js >9 // Check for error from exit.
+- | mov L:RB, SAVE_L
+- | mov MULTRES, RD
+- | mov LFUNC:KBASE, [BASE-8]
+- | mov KBASE, LFUNC:KBASE->pc
+- | mov KBASE, [KBASE+PC2PROTO(k)]
+- | mov L:RB->base, BASE
+- | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
+- | set_vmstate INTERP
+- | // Modified copy of ins_next which handles function header dispatch, too.
+- | mov RC, [PC]
+- | movzx RA, RCH
+- | movzx OP, RCL
+- | add PC, 4
+- | shr RC, 16
+- | cmp OP, BC_FUNCF // Function header?
+- | jb >3
+- | cmp OP, BC_FUNCC+2 // Fast function?
+- | jae >4
+- |2:
+- | mov RC, MULTRES // RC/RD holds nres+1.
+- |3:
+- |.if X64
+- | jmp aword [DISPATCH+OP*8]
+- |.else
+- | jmp aword [DISPATCH+OP*4]
+- |.endif
+- |
+- |4: // Check frame below fast function.
+- | mov RC, [BASE-4]
+- | test RC, FRAME_TYPE
+- | jnz <2 // Trace stitching continuation?
+- | // Otherwise set KBASE for Lua function below fast function.
+- | movzx RC, byte [RC-3]
+- | not RCa
+- | mov LFUNC:KBASE, [BASE+RC*8-8]
+- | mov KBASE, LFUNC:KBASE->pc
+- | mov KBASE, [KBASE+PC2PROTO(k)]
+- | jmp <2
+- |
+- |9: // Rethrow error from the right C frame.
+- | neg RD
+- | mov FCARG1, L:RB
+- | mov FCARG2, RD
+- | call extern lj_err_throw@8 // (lua_State *L, int errcode)
+- |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+- |// FP value rounding. Called by math.floor/math.ceil fast functions
+- |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
+ |.macro vm_round, name, mode, cond
+ |->name:
+- |.if not X64 and cond
+- | movsd xmm0, qword [esp+4]
+- | call ->name .. _sse
+- | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
+- | fld qword [esp+4]
+- | ret
+- |.endif
+- |
+- |->name .. _sse:
+- | sseconst_abs xmm2, RDa
+- | sseconst_2p52 xmm3, RDa
+- | movaps xmm1, xmm0
+- | andpd xmm1, xmm2 // |x|
+- | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
+- | jbe >1
+- | andnpd xmm2, xmm0 // Isolate sign bit.
+- |.if mode == 2 // trunc(x)?
+- | movaps xmm0, xmm1
+- | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
+- | subsd xmm1, xmm3
+- | sseconst_1 xmm3, RDa
+- | cmpsd xmm0, xmm1, 1 // |x| < result?
+- | andpd xmm0, xmm3
+- | subsd xmm1, xmm0 // If yes, subtract -1.
+- | orpd xmm1, xmm2 // Merge sign bit back in.
+- |.else
+- | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
+- | subsd xmm1, xmm3
+- | orpd xmm1, xmm2 // Merge sign bit back in.
+- | .if mode == 1 // ceil(x)?
+- | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
+- | cmpsd xmm0, xmm1, 6 // x > result?
+- | .else // floor(x)?
+- | sseconst_1 xmm2, RDa
+- | cmpsd xmm0, xmm1, 1 // x < result?
+- | .endif
+- | andpd xmm0, xmm2
+- | subsd xmm1, xmm0 // If yes, subtract +-1.
+- |.endif
+- | movaps xmm0, xmm1
+- |1:
+- | ret
+ |.endmacro
+ |
+ | vm_round vm_floor, 0, 1
+@@ -2799,68 +558,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |->vm_mod:
+- |// Args in xmm0/xmm1, return value in xmm0.
+- |// Caveat: xmm0-xmm5 and RC (eax) modified!
+- | movaps xmm5, xmm0
+- | divsd xmm0, xmm1
+- | sseconst_abs xmm2, RDa
+- | sseconst_2p52 xmm3, RDa
+- | movaps xmm4, xmm0
+- | andpd xmm4, xmm2 // |x/y|
+- | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
+- | jbe >1
+- | andnpd xmm2, xmm0 // Isolate sign bit.
+- | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
+- | subsd xmm4, xmm3
+- | orpd xmm4, xmm2 // Merge sign bit back in.
+- | sseconst_1 xmm2, RDa
+- | cmpsd xmm0, xmm4, 1 // x/y < result?
+- | andpd xmm0, xmm2
+- | subsd xmm4, xmm0 // If yes, subtract 1.0.
+- | movaps xmm0, xmm5
+- | mulsd xmm1, xmm4
+- | subsd xmm0, xmm1
+- | ret
+- |1:
+- | mulsd xmm1, xmm0
+- | movaps xmm0, xmm5
+- | subsd xmm0, xmm1
+- | ret
+ |
+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
+ |->vm_powi_sse:
+- | cmp eax, 1; jle >6 // i<=1?
+- | // Now 1 < (unsigned)i <= 0x80000000.
+- |1: // Handle leading zeros.
+- | test eax, 1; jnz >2
+- | mulsd xmm0, xmm0
+- | shr eax, 1
+- | jmp <1
+- |2:
+- | shr eax, 1; jz >5
+- | movaps xmm1, xmm0
+- |3: // Handle trailing bits.
+- | mulsd xmm0, xmm0
+- | shr eax, 1; jz >4
+- | jnc <3
+- | mulsd xmm1, xmm0
+- | jmp <3
+- |4:
+- | mulsd xmm0, xmm1
+- |5:
+- | ret
+- |6:
+- | je <5 // x^1 ==> x
+- | jb >7 // x^0 ==> 1
+- | neg eax
+- | call <1
+- | sseconst_1 xmm1, RDa
+- | divsd xmm1, xmm0
+- | movaps xmm0, xmm1
+- | ret
+- |7:
+- | sseconst_1 xmm0, RDa
+- | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+@@ -2868,46 +568,6 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
+ |->vm_cpuid:
+- |.if X64
+- | mov eax, CARG1d
+- | .if X64WIN; push rsi; mov rsi, CARG2; .endif
+- | push rbx
+- | xor ecx, ecx
+- | cpuid
+- | mov [rsi], eax
+- | mov [rsi+4], ebx
+- | mov [rsi+8], ecx
+- | mov [rsi+12], edx
+- | pop rbx
+- | .if X64WIN; pop rsi; .endif
+- | ret
+- |.else
+- | pushfd
+- | pop edx
+- | mov ecx, edx
+- | xor edx, 0x00200000 // Toggle ID bit in flags.
+- | push edx
+- | popfd
+- | pushfd
+- | pop edx
+- | xor eax, eax // Zero means no features supported.
+- | cmp ecx, edx
+- | jz >1 // No ID toggle means no CPUID support.
+- | mov eax, [esp+4] // Argument 1 is function number.
+- | push edi
+- | push ebx
+- | xor ecx, ecx
+- | cpuid
+- | mov edi, [esp+16] // Argument 2 is result area.
+- | mov [edi], eax
+- | mov [edi+4], ebx
+- | mov [edi+8], ecx
+- | mov [edi+12], edx
+- | pop ebx
+- | pop edi
+- |1:
+- | ret
+- |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+@@ -2915,9 +575,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->assert_bad_for_arg_type:
+ #ifdef LUA_USE_ASSERT
+- | int3
+ #endif
+- | int3
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+@@ -2925,198 +583,10 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// Handler for callback functions. Callback slot number in ah/al.
+ |->vm_ffi_callback:
+- |.if FFI
+- |.type CTSTATE, CTState, PC
+- |.if not X64
+- | sub esp, 16 // Leave room for SAVE_ERRF etc.
+- |.endif
+- | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
+- | lea DISPATCH, [ebp+GG_G2DISP]
+- | mov CTSTATE, GL:ebp->ctype_state
+- | movzx eax, ax
+- | mov CTSTATE->cb.slot, eax
+- |.if X64
+- | mov CTSTATE->cb.gpr[0], CARG1
+- | mov CTSTATE->cb.gpr[1], CARG2
+- | mov CTSTATE->cb.gpr[2], CARG3
+- | mov CTSTATE->cb.gpr[3], CARG4
+- | movsd qword CTSTATE->cb.fpr[0], xmm0
+- | movsd qword CTSTATE->cb.fpr[1], xmm1
+- | movsd qword CTSTATE->cb.fpr[2], xmm2
+- | movsd qword CTSTATE->cb.fpr[3], xmm3
+- |.if X64WIN
+- | lea rax, [rsp+CFRAME_SIZE+4*8]
+- |.else
+- | lea rax, [rsp+CFRAME_SIZE]
+- | mov CTSTATE->cb.gpr[4], CARG5
+- | mov CTSTATE->cb.gpr[5], CARG6
+- | movsd qword CTSTATE->cb.fpr[4], xmm4
+- | movsd qword CTSTATE->cb.fpr[5], xmm5
+- | movsd qword CTSTATE->cb.fpr[6], xmm6
+- | movsd qword CTSTATE->cb.fpr[7], xmm7
+- |.endif
+- | mov CTSTATE->cb.stack, rax
+- | mov CARG2, rsp
+- |.else
+- | lea eax, [esp+CFRAME_SIZE+16]
+- | mov CTSTATE->cb.gpr[0], FCARG1
+- | mov CTSTATE->cb.gpr[1], FCARG2
+- | mov CTSTATE->cb.stack, eax
+- | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp.
+- | mov FCARG2, [esp+CFRAME_SIZE+8]
+- | mov SAVE_RET, FCARG1
+- | mov SAVE_R4, FCARG2
+- | mov FCARG2, esp
+- |.endif
+- | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
+- | mov FCARG1, CTSTATE
+- | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf)
+- | // lua_State * returned in eax (RD).
+- | set_vmstate INTERP
+- | mov BASE, L:RD->base
+- | mov RD, L:RD->top
+- | sub RD, BASE
+- | mov LFUNC:RB, [BASE-8]
+- | shr RD, 3
+- | add RD, 1
+- | ins_callt
+- |.endif
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+- |.if FFI
+- | mov L:RA, SAVE_L
+- | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
+- | mov aword CTSTATE->L, L:RAa
+- | mov L:RA->base, BASE
+- | mov L:RA->top, RB
+- | mov FCARG1, CTSTATE
+- | mov FCARG2, RC
+- | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o)
+- |.if X64
+- | mov rax, CTSTATE->cb.gpr[0]
+- | movsd xmm0, qword CTSTATE->cb.fpr[0]
+- | jmp ->vm_leave_unw
+- |.else
+- | mov L:RB, SAVE_L
+- | mov eax, CTSTATE->cb.gpr[0]
+- | mov edx, CTSTATE->cb.gpr[1]
+- | cmp dword CTSTATE->cb.gpr[2], 1
+- | jb >7
+- | je >6
+- | fld qword CTSTATE->cb.fpr[0].d
+- | jmp >7
+- |6:
+- | fld dword CTSTATE->cb.fpr[0].f
+- |7:
+- | mov ecx, L:RB->top
+- | movzx ecx, word [ecx+6] // Get stack adjustment and copy up.
+- | mov SAVE_L, ecx // Must be one slot above SAVE_RET
+- | restoreregs
+- | pop ecx // Move return addr from SAVE_RET.
+- | add esp, [esp] // Adjust stack.
+- | add esp, 16
+- | push ecx
+- | ret
+- |.endif
+- |.endif
+- |
+- |->vm_ffi_call@4: // Call C function via FFI.
+- | // Caveat: needs special frame unwinding, see below.
+- |.if FFI
+- |.if X64
+- | .type CCSTATE, CCallState, rbx
+- | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
+- |.else
+- | .type CCSTATE, CCallState, ebx
+- | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
+- |.endif
+- |
+- | // Readjust stack.
+- |.if X64
+- | mov eax, CCSTATE->spadj
+- | sub rsp, rax
+- |.else
+- | sub esp, CCSTATE->spadj
+- |.if WIN
+- | mov CCSTATE->spadj, esp
+- |.endif
+- |.endif
+ |
+- | // Copy stack slots.
+- | movzx ecx, byte CCSTATE->nsp
+- | sub ecx, 1
+- | js >2
+- |1:
+- |.if X64
+- | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
+- | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
+- |.else
+- | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
+- | mov [esp+ecx*4], eax
+- |.endif
+- | sub ecx, 1
+- | jns <1
+- |2:
+- |
+- |.if X64
+- | movzx eax, byte CCSTATE->nfpr
+- | mov CARG1, CCSTATE->gpr[0]
+- | mov CARG2, CCSTATE->gpr[1]
+- | mov CARG3, CCSTATE->gpr[2]
+- | mov CARG4, CCSTATE->gpr[3]
+- |.if not X64WIN
+- | mov CARG5, CCSTATE->gpr[4]
+- | mov CARG6, CCSTATE->gpr[5]
+- |.endif
+- | test eax, eax; jz >5
+- | movaps xmm0, CCSTATE->fpr[0]
+- | movaps xmm1, CCSTATE->fpr[1]
+- | movaps xmm2, CCSTATE->fpr[2]
+- | movaps xmm3, CCSTATE->fpr[3]
+- |.if not X64WIN
+- | cmp eax, 4; jbe >5
+- | movaps xmm4, CCSTATE->fpr[4]
+- | movaps xmm5, CCSTATE->fpr[5]
+- | movaps xmm6, CCSTATE->fpr[6]
+- | movaps xmm7, CCSTATE->fpr[7]
+- |.endif
+- |5:
+- |.else
+- | mov FCARG1, CCSTATE->gpr[0]
+- | mov FCARG2, CCSTATE->gpr[1]
+- |.endif
+- |
+- | call aword CCSTATE->func
+- |
+- |.if X64
+- | mov CCSTATE->gpr[0], rax
+- | movaps CCSTATE->fpr[0], xmm0
+- |.if not X64WIN
+- | mov CCSTATE->gpr[1], rdx
+- | movaps CCSTATE->fpr[1], xmm1
+- |.endif
+- |.else
+- | mov CCSTATE->gpr[0], eax
+- | mov CCSTATE->gpr[1], edx
+- | cmp byte CCSTATE->resx87, 1
+- | jb >7
+- | je >6
+- | fstp qword CCSTATE->fpr[0].d[0]
+- | jmp >7
+- |6:
+- | fstp dword CCSTATE->fpr[0].f[0]
+- |7:
+- |.if WIN
+- | sub CCSTATE->spadj, esp
+- |.endif
+- |.endif
+- |
+- |.if X64
+- | mov rbx, [rbp-8]; leave; ret
+- |.else
+- | mov ebx, [ebp-4]; leave; ret
+- |.endif
+- |.endif
++ |->vm_ffi_call: // Call C function via FFI.
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+@@ -3126,2096 +596,87 @@ static void build_subroutines(BuildCtx *ctx)
+ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ {
+ int vk = 0;
++ (void)vk;
+ |// Note: aligning all instructions does not pay off.
+ |=>defop:
+
+ switch (op) {
+-
+- /* -- Comparison ops ---------------------------------------------------- */
+-
+- /* Remember: all ops branch for a true comparison, fall through otherwise. */
+-
+- |.macro jmp_comp, lt, ge, le, gt, target
+- ||switch (op) {
+- ||case BC_ISLT:
+- | lt target
+- ||break;
+- ||case BC_ISGE:
+- | ge target
+- ||break;
+- ||case BC_ISLE:
+- | le target
+- ||break;
+- ||case BC_ISGT:
+- | gt target
+- ||break;
+- ||default: break; /* Shut up GCC. */
+- ||}
+- |.endmacro
+-
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+- | // RA = src1, RD = src2, JMP with RD = target
+- | ins_AD
+- |.if DUALNUM
+- | checkint RA, >7
+- | checkint RD, >8
+- | mov RB, dword [BASE+RA*8]
+- | add PC, 4
+- | cmp RB, dword [BASE+RD*8]
+- | jmp_comp jge, jl, jg, jle, >9
+- |6:
+- | movzx RD, PC_RD
+- | branchPC RD
+- |9:
+- | ins_next
+- |
+- |7: // RA is not an integer.
+- | ja ->vmeta_comp
+- | // RA is a number.
+- | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
+- | // RA is a number, RD is an integer.
+- | cvtsi2sd xmm0, dword [BASE+RD*8]
+- | jmp >2
+- |
+- |8: // RA is an integer, RD is not an integer.
+- | ja ->vmeta_comp
+- | // RA is an integer, RD is a number.
+- | cvtsi2sd xmm1, dword [BASE+RA*8]
+- | movsd xmm0, qword [BASE+RD*8]
+- | add PC, 4
+- | ucomisd xmm0, xmm1
+- | jmp_comp jbe, ja, jb, jae, <9
+- | jmp <6
+- |.else
+- | checknum RA, ->vmeta_comp
+- | checknum RD, ->vmeta_comp
+- |.endif
+- |1:
+- | movsd xmm0, qword [BASE+RD*8]
+- |2:
+- | add PC, 4
+- | ucomisd xmm0, qword [BASE+RA*8]
+- |3:
+- | // Unordered: all of ZF CF PF set, ordered: PF clear.
+- | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+- |.if DUALNUM
+- | jmp_comp jbe, ja, jb, jae, <9
+- | jmp <6
+- |.else
+- | jmp_comp jbe, ja, jb, jae, >1
+- | movzx RD, PC_RD
+- | branchPC RD
+- |1:
+- | ins_next
+- |.endif
+- break;
+-
+ case BC_ISEQV: case BC_ISNEV:
+- vk = op == BC_ISEQV;
+- | ins_AD // RA = src1, RD = src2, JMP with RD = target
+- | mov RB, [BASE+RD*8+4]
+- | add PC, 4
+- |.if DUALNUM
+- | cmp RB, LJ_TISNUM; jne >7
+- | checkint RA, >8
+- | mov RB, dword [BASE+RD*8]
+- | cmp RB, dword [BASE+RA*8]
+- if (vk) {
+- | jne >9
+- } else {
+- | je >9
+- }
+- | movzx RD, PC_RD
+- | branchPC RD
+- |9:
+- | ins_next
+- |
+- |7: // RD is not an integer.
+- | ja >5
+- | // RD is a number.
+- | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
+- | // RD is a number, RA is an integer.
+- | cvtsi2sd xmm0, dword [BASE+RA*8]
+- | jmp >2
+- |
+- |8: // RD is an integer, RA is not an integer.
+- | ja >5
+- | // RD is an integer, RA is a number.
+- | cvtsi2sd xmm0, dword [BASE+RD*8]
+- | ucomisd xmm0, qword [BASE+RA*8]
+- | jmp >4
+- |
+- |.else
+- | cmp RB, LJ_TISNUM; jae >5
+- | checknum RA, >5
+- |.endif
+- |1:
+- | movsd xmm0, qword [BASE+RA*8]
+- |2:
+- | ucomisd xmm0, qword [BASE+RD*8]
+- |4:
+- iseqne_fp:
+- if (vk) {
+- | jp >2 // Unordered means not equal.
+- | jne >2
+- } else {
+- | jp >2 // Unordered means not equal.
+- | je >1
+- }
+- iseqne_end:
+- if (vk) {
+- |1: // EQ: Branch to the target.
+- | movzx RD, PC_RD
+- | branchPC RD
+- |2: // NE: Fallthrough to next instruction.
+- |.if not FFI
+- |3:
+- |.endif
+- } else {
+- |.if not FFI
+- |3:
+- |.endif
+- |2: // NE: Branch to the target.
+- | movzx RD, PC_RD
+- | branchPC RD
+- |1: // EQ: Fallthrough to next instruction.
+- }
+- if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
+- op == BC_ISEQN || op == BC_ISNEN)) {
+- | jmp <9
+- } else {
+- | ins_next
+- }
+- |
+- if (op == BC_ISEQV || op == BC_ISNEV) {
+- |5: // Either or both types are not numbers.
+- |.if FFI
+- | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
+- | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
+- |.endif
+- | checktp RA, RB // Compare types.
+- | jne <2 // Not the same type?
+- | cmp RB, LJ_TISPRI
+- | jae <1 // Same type and primitive type?
+- |
+- | // Same types and not a primitive type. Compare GCobj or pvalue.
+- | mov RA, [BASE+RA*8]
+- | mov RD, [BASE+RD*8]
+- | cmp RA, RD
+- | je <1 // Same GCobjs or pvalues?
+- | cmp RB, LJ_TISTABUD
+- | ja <2 // Different objects and not table/ud?
+- |.if X64
+- | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata.
+- | jb <2
+- |.endif
+- |
+- | // Different tables or userdatas. Need to check __eq metamethod.
+- | // Field metatable must be at same offset for GCtab and GCudata!
+- | mov TAB:RB, TAB:RA->metatable
+- | test TAB:RB, TAB:RB
+- | jz <2 // No metatable?
+- | test byte TAB:RB->nomm, 1<<MM_eq
+- | jnz <2 // Or 'no __eq' flag set?
+- if (vk) {
+- | xor RB, RB // ne = 0
+- } else {
+- | mov RB, 1 // ne = 1
+- }
+- | jmp ->vmeta_equal // Handle __eq metamethod.
+- } else {
+- |.if FFI
+- |3:
+- | cmp RB, LJ_TCDATA
+- if (LJ_DUALNUM && vk) {
+- | jne <9
+- } else {
+- | jne <2
+- }
+- | jmp ->vmeta_equal_cd
+- |.endif
+- }
+- break;
+ case BC_ISEQS: case BC_ISNES:
+- vk = op == BC_ISEQS;
+- | ins_AND // RA = src, RD = str const, JMP with RD = target
+- | mov RB, [BASE+RA*8+4]
+- | add PC, 4
+- | cmp RB, LJ_TSTR; jne >3
+- | mov RA, [BASE+RA*8]
+- | cmp RA, [KBASE+RD*4]
+- iseqne_test:
+- if (vk) {
+- | jne >2
+- } else {
+- | je >1
+- }
+- goto iseqne_end;
+ case BC_ISEQN: case BC_ISNEN:
+- vk = op == BC_ISEQN;
+- | ins_AD // RA = src, RD = num const, JMP with RD = target
+- | mov RB, [BASE+RA*8+4]
+- | add PC, 4
+- |.if DUALNUM
+- | cmp RB, LJ_TISNUM; jne >7
+- | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
+- | mov RB, dword [KBASE+RD*8]
+- | cmp RB, dword [BASE+RA*8]
+- if (vk) {
+- | jne >9
+- } else {
+- | je >9
+- }
+- | movzx RD, PC_RD
+- | branchPC RD
+- |9:
+- | ins_next
+- |
+- |7: // RA is not an integer.
+- | ja >3
+- | // RA is a number.
+- | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
+- | // RA is a number, RD is an integer.
+- | cvtsi2sd xmm0, dword [KBASE+RD*8]
+- | jmp >2
+- |
+- |8: // RA is an integer, RD is a number.
+- | cvtsi2sd xmm0, dword [BASE+RA*8]
+- | ucomisd xmm0, qword [KBASE+RD*8]
+- | jmp >4
+- |.else
+- | cmp RB, LJ_TISNUM; jae >3
+- |.endif
+- |1:
+- | movsd xmm0, qword [KBASE+RD*8]
+- |2:
+- | ucomisd xmm0, qword [BASE+RA*8]
+- |4:
+- goto iseqne_fp;
+ case BC_ISEQP: case BC_ISNEP:
+- vk = op == BC_ISEQP;
+- | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
+- | mov RB, [BASE+RA*8+4]
+- | add PC, 4
+- | cmp RB, RD
+- if (!LJ_HASFFI) goto iseqne_test;
+- if (vk) {
+- | jne >3
+- | movzx RD, PC_RD
+- | branchPC RD
+- |2:
+- | ins_next
+- |3:
+- | cmp RB, LJ_TCDATA; jne <2
+- | jmp ->vmeta_equal_cd
+- } else {
+- | je >2
+- | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
+- | movzx RD, PC_RD
+- | branchPC RD
+- |2:
+- | ins_next
+- }
+- break;
+-
+- /* -- Unary test and copy ops ------------------------------------------- */
+-
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+- | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
+- | mov RB, [BASE+RD*8+4]
+- | add PC, 4
+- | cmp RB, LJ_TISTRUECOND
+- if (op == BC_IST || op == BC_ISTC) {
+- | jae >1
+- } else {
+- | jb >1
+- }
+- if (op == BC_ISTC || op == BC_ISFC) {
+- | mov [BASE+RA*8+4], RB
+- | mov RB, [BASE+RD*8]
+- | mov [BASE+RA*8], RB
+- }
+- | movzx RD, PC_RD
+- | branchPC RD
+- |1: // Fallthrough to the next instruction.
+- | ins_next
+- break;
+-
+ case BC_ISTYPE:
+- | ins_AD // RA = src, RD = -type
+- | add RD, [BASE+RA*8+4]
+- | jne ->vmeta_istype
+- | ins_next
+- break;
+ case BC_ISNUM:
+- | ins_AD // RA = src, RD = -(TISNUM-1)
+- | checknum RA, ->vmeta_istype
+- | ins_next
+- break;
+-
+- /* -- Unary ops --------------------------------------------------------- */
+-
+ case BC_MOV:
+- | ins_AD // RA = dst, RD = src
+- |.if X64
+- | mov RBa, [BASE+RD*8]
+- | mov [BASE+RA*8], RBa
+- |.else
+- | mov RB, [BASE+RD*8+4]
+- | mov RD, [BASE+RD*8]
+- | mov [BASE+RA*8+4], RB
+- | mov [BASE+RA*8], RD
+- |.endif
+- | ins_next_
+- break;
+ case BC_NOT:
+- | ins_AD // RA = dst, RD = src
+- | xor RB, RB
+- | checktp RD, LJ_TISTRUECOND
+- | adc RB, LJ_TTRUE
+- | mov [BASE+RA*8+4], RB
+- | ins_next
+- break;
+ case BC_UNM:
+- | ins_AD // RA = dst, RD = src
+- |.if DUALNUM
+- | checkint RD, >5
+- | mov RB, [BASE+RD*8]
+- | neg RB
+- | jo >4
+- | mov dword [BASE+RA*8+4], LJ_TISNUM
+- | mov dword [BASE+RA*8], RB
+- |9:
+- | ins_next
+- |4:
+- | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
+- | mov dword [BASE+RA*8], 0
+- | jmp <9
+- |5:
+- | ja ->vmeta_unm
+- |.else
+- | checknum RD, ->vmeta_unm
+- |.endif
+- | movsd xmm0, qword [BASE+RD*8]
+- | sseconst_sign xmm1, RDa
+- | xorps xmm0, xmm1
+- | movsd qword [BASE+RA*8], xmm0
+- |.if DUALNUM
+- | jmp <9
+- |.else
+- | ins_next
+- |.endif
+- break;
+ case BC_LEN:
+- | ins_AD // RA = dst, RD = src
+- | checkstr RD, >2
+- | mov STR:RD, [BASE+RD*8]
+- |.if DUALNUM
+- | mov RD, dword STR:RD->len
+- |1:
+- | mov dword [BASE+RA*8+4], LJ_TISNUM
+- | mov dword [BASE+RA*8], RD
+- |.else
+- | xorps xmm0, xmm0
+- | cvtsi2sd xmm0, dword STR:RD->len
+- |1:
+- | movsd qword [BASE+RA*8], xmm0
+- |.endif
+- | ins_next
+- |2:
+- | checktab RD, ->vmeta_len
+- | mov TAB:FCARG1, [BASE+RD*8]
+-#if LJ_52
+- | mov TAB:RB, TAB:FCARG1->metatable
+- | cmp TAB:RB, 0
+- | jnz >9
+- |3:
+-#endif
+- |->BC_LEN_Z:
+- | mov RB, BASE // Save BASE.
+- | call extern lj_tab_len@4 // (GCtab *t)
+- | // Length of table returned in eax (RD).
+- |.if DUALNUM
+- | // Nothing to do.
+- |.else
+- | cvtsi2sd xmm0, RD
+- |.endif
+- | mov BASE, RB // Restore BASE.
+- | movzx RA, PC_RA
+- | jmp <1
+-#if LJ_52
+- |9: // Check for __len.
+- | test byte TAB:RB->nomm, 1<<MM_len
+- | jnz <3
+- | jmp ->vmeta_len // 'no __len' flag NOT set: check.
+-#endif
+- break;
+-
+- /* -- Binary ops -------------------------------------------------------- */
+-
+- |.macro ins_arithpre, sseins, ssereg
+- | ins_ABC
+- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+- ||switch (vk) {
+- ||case 0:
+- | checknum RB, ->vmeta_arith_vn
+- | .if DUALNUM
+- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
+- | .endif
+- | movsd xmm0, qword [BASE+RB*8]
+- | sseins ssereg, qword [KBASE+RC*8]
+- || break;
+- ||case 1:
+- | checknum RB, ->vmeta_arith_nv
+- | .if DUALNUM
+- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
+- | .endif
+- | movsd xmm0, qword [KBASE+RC*8]
+- | sseins ssereg, qword [BASE+RB*8]
+- || break;
+- ||default:
+- | checknum RB, ->vmeta_arith_vv
+- | checknum RC, ->vmeta_arith_vv
+- | movsd xmm0, qword [BASE+RB*8]
+- | sseins ssereg, qword [BASE+RC*8]
+- || break;
+- ||}
+- |.endmacro
+- |
+- |.macro ins_arithdn, intins
+- | ins_ABC
+- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+- ||switch (vk) {
+- ||case 0:
+- | checkint RB, ->vmeta_arith_vn
+- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
+- | mov RB, [BASE+RB*8]
+- | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
+- || break;
+- ||case 1:
+- | checkint RB, ->vmeta_arith_nv
+- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
+- | mov RC, [KBASE+RC*8]
+- | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
+- || break;
+- ||default:
+- | checkint RB, ->vmeta_arith_vv
+- | checkint RC, ->vmeta_arith_vv
+- | mov RB, [BASE+RB*8]
+- | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
+- || break;
+- ||}
+- | mov dword [BASE+RA*8+4], LJ_TISNUM
+- ||if (vk == 1) {
+- | mov dword [BASE+RA*8], RC
+- ||} else {
+- | mov dword [BASE+RA*8], RB
+- ||}
+- | ins_next
+- |.endmacro
+- |
+- |.macro ins_arithpost
+- | movsd qword [BASE+RA*8], xmm0
+- |.endmacro
+- |
+- |.macro ins_arith, sseins
+- | ins_arithpre sseins, xmm0
+- | ins_arithpost
+- | ins_next
+- |.endmacro
+- |
+- |.macro ins_arith, intins, sseins
+- |.if DUALNUM
+- | ins_arithdn intins
+- |.else
+- | ins_arith, sseins
+- |.endif
+- |.endmacro
+-
+- | // RA = dst, RB = src1 or num const, RC = src2 or num const
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+- | ins_arith add, addsd
+- break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+- | ins_arith sub, subsd
+- break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+- | ins_arith imul, mulsd
+- break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+- | ins_arith divsd
+- break;
+ case BC_MODVN:
+- | ins_arithpre movsd, xmm1
+- |->BC_MODVN_Z:
+- | call ->vm_mod
+- | ins_arithpost
+- | ins_next
+- break;
+ case BC_MODNV: case BC_MODVV:
+- | ins_arithpre movsd, xmm1
+- | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
+- break;
+ case BC_POW:
+- | ins_arithpre movsd, xmm1
+- | mov RB, BASE
+- |.if not X64
+- | movsd FPARG1, xmm0
+- | movsd FPARG3, xmm1
+- |.endif
+- | call extern pow
+- | movzx RA, PC_RA
+- | mov BASE, RB
+- |.if X64
+- | ins_arithpost
+- |.else
+- | fstp qword [BASE+RA*8]
+- |.endif
+- | ins_next
+- break;
+-
+ case BC_CAT:
+- | ins_ABC // RA = dst, RB = src_start, RC = src_end
+- |.if X64
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE
+- | lea CARG2d, [BASE+RC*8]
+- | mov CARG3d, RC
+- | sub CARG3d, RB
+- |->BC_CAT_Z:
+- | mov L:RB, L:CARG1d
+- |.else
+- | lea RA, [BASE+RC*8]
+- | sub RC, RB
+- | mov ARG2, RA
+- | mov ARG3, RC
+- |->BC_CAT_Z:
+- | mov L:RB, SAVE_L
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+- | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+- | mov BASE, L:RB->base
+- | test RC, RC
+- | jnz ->vmeta_binop
+- | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
+- | movzx RA, PC_RA
+- |.if X64
+- | mov RCa, [BASE+RB*8]
+- | mov [BASE+RA*8], RCa
+- |.else
+- | mov RC, [BASE+RB*8+4]
+- | mov RB, [BASE+RB*8]
+- | mov [BASE+RA*8+4], RC
+- | mov [BASE+RA*8], RB
+- |.endif
+- | ins_next
+- break;
+-
+- /* -- Constant ops ------------------------------------------------------ */
+-
+ case BC_KSTR:
+- | ins_AND // RA = dst, RD = str const (~)
+- | mov RD, [KBASE+RD*4]
+- | mov dword [BASE+RA*8+4], LJ_TSTR
+- | mov [BASE+RA*8], RD
+- | ins_next
+- break;
+ case BC_KCDATA:
+- |.if FFI
+- | ins_AND // RA = dst, RD = cdata const (~)
+- | mov RD, [KBASE+RD*4]
+- | mov dword [BASE+RA*8+4], LJ_TCDATA
+- | mov [BASE+RA*8], RD
+- | ins_next
+- |.endif
+- break;
+ case BC_KSHORT:
+- | ins_AD // RA = dst, RD = signed int16 literal
+- |.if DUALNUM
+- | movsx RD, RDW
+- | mov dword [BASE+RA*8+4], LJ_TISNUM
+- | mov dword [BASE+RA*8], RD
+- |.else
+- | movsx RD, RDW // Sign-extend literal.
+- | cvtsi2sd xmm0, RD
+- | movsd qword [BASE+RA*8], xmm0
+- |.endif
+- | ins_next
+- break;
+ case BC_KNUM:
+- | ins_AD // RA = dst, RD = num const
+- | movsd xmm0, qword [KBASE+RD*8]
+- | movsd qword [BASE+RA*8], xmm0
+- | ins_next
+- break;
+ case BC_KPRI:
+- | ins_AND // RA = dst, RD = primitive type (~)
+- | mov [BASE+RA*8+4], RD
+- | ins_next
+- break;
+ case BC_KNIL:
+- | ins_AD // RA = dst_start, RD = dst_end
+- | lea RA, [BASE+RA*8+12]
+- | lea RD, [BASE+RD*8+4]
+- | mov RB, LJ_TNIL
+- | mov [RA-8], RB // Sets minimum 2 slots.
+- |1:
+- | mov [RA], RB
+- | add RA, 8
+- | cmp RA, RD
+- | jbe <1
+- | ins_next
+- break;
+-
+- /* -- Upvalue and function ops ------------------------------------------ */
+-
+ case BC_UGET:
+- | ins_AD // RA = dst, RD = upvalue #
+- | mov LFUNC:RB, [BASE-8]
+- | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
+- | mov RB, UPVAL:RB->v
+- |.if X64
+- | mov RDa, [RB]
+- | mov [BASE+RA*8], RDa
+- |.else
+- | mov RD, [RB+4]
+- | mov RB, [RB]
+- | mov [BASE+RA*8+4], RD
+- | mov [BASE+RA*8], RB
+- |.endif
+- | ins_next
+- break;
+ case BC_USETV:
+-#define TV2MARKOFS \
+- ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
+- | ins_AD // RA = upvalue #, RD = src
+- | mov LFUNC:RB, [BASE-8]
+- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+- | cmp byte UPVAL:RB->closed, 0
+- | mov RB, UPVAL:RB->v
+- | mov RA, [BASE+RD*8]
+- | mov RD, [BASE+RD*8+4]
+- | mov [RB], RA
+- | mov [RB+4], RD
+- | jz >1
+- | // Check barrier for closed upvalue.
+- | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
+- | jnz >2
+- |1:
+- | ins_next
+- |
+- |2: // Upvalue is black. Check if new value is collectable and white.
+- | sub RD, LJ_TISGCV
+- | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
+- | jbe <1
+- | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
+- | jz <1
+- | // Crossed a write barrier. Move the barrier forward.
+- |.if X64 and not X64WIN
+- | mov FCARG2, RB
+- | mov RB, BASE // Save BASE.
+- |.else
+- | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
+- |.endif
+- | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
+- | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
+- | mov BASE, RB // Restore BASE.
+- | jmp <1
+- break;
+-#undef TV2MARKOFS
+ case BC_USETS:
+- | ins_AND // RA = upvalue #, RD = str const (~)
+- | mov LFUNC:RB, [BASE-8]
+- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+- | mov GCOBJ:RA, [KBASE+RD*4]
+- | mov RD, UPVAL:RB->v
+- | mov [RD], GCOBJ:RA
+- | mov dword [RD+4], LJ_TSTR
+- | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+- | jnz >2
+- |1:
+- | ins_next
+- |
+- |2: // Check if string is white and ensure upvalue is closed.
+- | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
+- | jz <1
+- | cmp byte UPVAL:RB->closed, 0
+- | jz <1
+- | // Crossed a write barrier. Move the barrier forward.
+- | mov RB, BASE // Save BASE (FCARG2 == BASE).
+- | mov FCARG2, RD
+- | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
+- | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
+- | mov BASE, RB // Restore BASE.
+- | jmp <1
+- break;
+ case BC_USETN:
+- | ins_AD // RA = upvalue #, RD = num const
+- | mov LFUNC:RB, [BASE-8]
+- | movsd xmm0, qword [KBASE+RD*8]
+- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+- | mov RA, UPVAL:RB->v
+- | movsd qword [RA], xmm0
+- | ins_next
+- break;
+ case BC_USETP:
+- | ins_AND // RA = upvalue #, RD = primitive type (~)
+- | mov LFUNC:RB, [BASE-8]
+- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+- | mov RA, UPVAL:RB->v
+- | mov [RA+4], RD
+- | ins_next
+- break;
+ case BC_UCLO:
+- | ins_AD // RA = level, RD = target
+- | branchPC RD // Do this first to free RD.
+- | mov L:RB, SAVE_L
+- | cmp dword L:RB->openupval, 0
+- | je >1
+- | mov L:RB->base, BASE
+- | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
+- | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
+- | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
+- | mov BASE, L:RB->base
+- |1:
+- | ins_next
+- break;
+-
+ case BC_FNEW:
+- | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
+- |.if X64
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
+- | mov CARG3d, [BASE-8]
+- | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
+- | mov CARG1d, L:RB
+- |.else
+- | mov LFUNC:RA, [BASE-8]
+- | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
+- | mov L:RB, SAVE_L
+- | mov ARG3, LFUNC:RA
+- | mov ARG2, PROTO:RD
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+- | call extern lj_func_newL_gc
+- | // GCfuncL * returned in eax (RC).
+- | mov BASE, L:RB->base
+- | movzx RA, PC_RA
+- | mov [BASE+RA*8], LFUNC:RC
+- | mov dword [BASE+RA*8+4], LJ_TFUNC
+- | ins_next
+- break;
+-
+- /* -- Table ops --------------------------------------------------------- */
+-
+ case BC_TNEW:
+- | ins_AD // RA = dst, RD = hbits|asize
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+- | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+- | mov SAVE_PC, PC
+- | jae >5
+- |1:
+- |.if X64
+- | mov CARG3d, RD
+- | and RD, 0x7ff
+- | shr CARG3d, 11
+- |.else
+- | mov RA, RD
+- | and RD, 0x7ff
+- | shr RA, 11
+- | mov ARG3, RA
+- |.endif
+- | cmp RD, 0x7ff
+- | je >3
+- |2:
+- |.if X64
+- | mov L:CARG1d, L:RB
+- | mov CARG2d, RD
+- |.else
+- | mov ARG1, L:RB
+- | mov ARG2, RD
+- |.endif
+- | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
+- | // Table * returned in eax (RC).
+- | mov BASE, L:RB->base
+- | movzx RA, PC_RA
+- | mov [BASE+RA*8], TAB:RC
+- | mov dword [BASE+RA*8+4], LJ_TTAB
+- | ins_next
+- |3: // Turn 0x7ff into 0x801.
+- | mov RD, 0x801
+- | jmp <2
+- |5:
+- | mov L:FCARG1, L:RB
+- | call extern lj_gc_step_fixtop@4 // (lua_State *L)
+- | movzx RD, PC_RD
+- | jmp <1
+- break;
+ case BC_TDUP:
+- | ins_AND // RA = dst, RD = table const (~) (holding template table)
+- | mov L:RB, SAVE_L
+- | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+- | mov SAVE_PC, PC
+- | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+- | mov L:RB->base, BASE
+- | jae >3
+- |2:
+- | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
+- | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
+- | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
+- | // Table * returned in eax (RC).
+- | mov BASE, L:RB->base
+- | movzx RA, PC_RA
+- | mov [BASE+RA*8], TAB:RC
+- | mov dword [BASE+RA*8+4], LJ_TTAB
+- | ins_next
+- |3:
+- | mov L:FCARG1, L:RB
+- | call extern lj_gc_step_fixtop@4 // (lua_State *L)
+- | movzx RD, PC_RD // Need to reload RD.
+- | not RDa
+- | jmp <2
+- break;
+-
+ case BC_GGET:
+- | ins_AND // RA = dst, RD = str const (~)
+- | mov LFUNC:RB, [BASE-8]
+- | mov TAB:RB, LFUNC:RB->env
+- | mov STR:RC, [KBASE+RD*4]
+- | jmp ->BC_TGETS_Z
+- break;
+ case BC_GSET:
+- | ins_AND // RA = src, RD = str const (~)
+- | mov LFUNC:RB, [BASE-8]
+- | mov TAB:RB, LFUNC:RB->env
+- | mov STR:RC, [KBASE+RD*4]
+- | jmp ->BC_TSETS_Z
+- break;
+-
+ case BC_TGETV:
+- | ins_ABC // RA = dst, RB = table, RC = key
+- | checktab RB, ->vmeta_tgetv
+- | mov TAB:RB, [BASE+RB*8]
+- |
+- | // Integer key?
+- |.if DUALNUM
+- | checkint RC, >5
+- | mov RC, dword [BASE+RC*8]
+- |.else
+- | // Convert number to int and back and compare.
+- | checknum RC, >5
+- | movsd xmm0, qword [BASE+RC*8]
+- | cvttsd2si RC, xmm0
+- | cvtsi2sd xmm1, RC
+- | ucomisd xmm0, xmm1
+- | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
+- |.endif
+- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
+- | jae ->vmeta_tgetv // Not in array part? Use fallback.
+- | shl RC, 3
+- | add RC, TAB:RB->array
+- | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
+- | je >2
+- | // Get array slot.
+- |.if X64
+- | mov RBa, [RC]
+- | mov [BASE+RA*8], RBa
+- |.else
+- | mov RB, [RC]
+- | mov RC, [RC+4]
+- | mov [BASE+RA*8], RB
+- | mov [BASE+RA*8+4], RC
+- |.endif
+- |1:
+- | ins_next
+- |
+- |2: // Check for __index if table value is nil.
+- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+- | jz >3
+- | mov TAB:RA, TAB:RB->metatable
+- | test byte TAB:RA->nomm, 1<<MM_index
+- | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
+- | movzx RA, PC_RA // Restore RA.
+- |3:
+- | mov dword [BASE+RA*8+4], LJ_TNIL
+- | jmp <1
+- |
+- |5: // String key?
+- | checkstr RC, ->vmeta_tgetv
+- | mov STR:RC, [BASE+RC*8]
+- | jmp ->BC_TGETS_Z
+- break;
+ case BC_TGETS:
+- | ins_ABC // RA = dst, RB = table, RC = str const (~)
+- | not RCa
+- | mov STR:RC, [KBASE+RC*4]
+- | checktab RB, ->vmeta_tgets
+- | mov TAB:RB, [BASE+RB*8]
+- |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
+- | mov RA, TAB:RB->hmask
+- | and RA, STR:RC->hash
+- | imul RA, #NODE
+- | add NODE:RA, TAB:RB->node
+- |1:
+- | cmp dword NODE:RA->key.it, LJ_TSTR
+- | jne >4
+- | cmp dword NODE:RA->key.gcr, STR:RC
+- | jne >4
+- | // Ok, key found. Assumes: offsetof(Node, val) == 0
+- | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
+- | je >5 // Key found, but nil value?
+- | movzx RC, PC_RA
+- | // Get node value.
+- |.if X64
+- | mov RBa, [RA]
+- | mov [BASE+RC*8], RBa
+- |.else
+- | mov RB, [RA]
+- | mov RA, [RA+4]
+- | mov [BASE+RC*8], RB
+- | mov [BASE+RC*8+4], RA
+- |.endif
+- |2:
+- | ins_next
+- |
+- |3:
+- | movzx RC, PC_RA
+- | mov dword [BASE+RC*8+4], LJ_TNIL
+- | jmp <2
+- |
+- |4: // Follow hash chain.
+- | mov NODE:RA, NODE:RA->next
+- | test NODE:RA, NODE:RA
+- | jnz <1
+- | // End of hash chain: key not found, nil result.
+- |
+- |5: // Check for __index if table value is nil.
+- | mov TAB:RA, TAB:RB->metatable
+- | test TAB:RA, TAB:RA
+- | jz <3 // No metatable: done.
+- | test byte TAB:RA->nomm, 1<<MM_index
+- | jnz <3 // 'no __index' flag set: done.
+- | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
+- break;
+ case BC_TGETB:
+- | ins_ABC // RA = dst, RB = table, RC = byte literal
+- | checktab RB, ->vmeta_tgetb
+- | mov TAB:RB, [BASE+RB*8]
+- | cmp RC, TAB:RB->asize
+- | jae ->vmeta_tgetb
+- | shl RC, 3
+- | add RC, TAB:RB->array
+- | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
+- | je >2
+- | // Get array slot.
+- |.if X64
+- | mov RBa, [RC]
+- | mov [BASE+RA*8], RBa
+- |.else
+- | mov RB, [RC]
+- | mov RC, [RC+4]
+- | mov [BASE+RA*8], RB
+- | mov [BASE+RA*8+4], RC
+- |.endif
+- |1:
+- | ins_next
+- |
+- |2: // Check for __index if table value is nil.
+- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+- | jz >3
+- | mov TAB:RA, TAB:RB->metatable
+- | test byte TAB:RA->nomm, 1<<MM_index
+- | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
+- | movzx RA, PC_RA // Restore RA.
+- |3:
+- | mov dword [BASE+RA*8+4], LJ_TNIL
+- | jmp <1
+- break;
+ case BC_TGETR:
+- | ins_ABC // RA = dst, RB = table, RC = key
+- | mov TAB:RB, [BASE+RB*8]
+- |.if DUALNUM
+- | mov RC, dword [BASE+RC*8]
+- |.else
+- | cvttsd2si RC, qword [BASE+RC*8]
+- |.endif
+- | cmp RC, TAB:RB->asize
+- | jae ->vmeta_tgetr // Not in array part? Use fallback.
+- | shl RC, 3
+- | add RC, TAB:RB->array
+- | // Get array slot.
+- |->BC_TGETR_Z:
+- |.if X64
+- | mov RBa, [RC]
+- | mov [BASE+RA*8], RBa
+- |.else
+- | mov RB, [RC]
+- | mov RC, [RC+4]
+- | mov [BASE+RA*8], RB
+- | mov [BASE+RA*8+4], RC
+- |.endif
+- |->BC_TGETR2_Z:
+- | ins_next
+- break;
+-
+ case BC_TSETV:
+- | ins_ABC // RA = src, RB = table, RC = key
+- | checktab RB, ->vmeta_tsetv
+- | mov TAB:RB, [BASE+RB*8]
+- |
+- | // Integer key?
+- |.if DUALNUM
+- | checkint RC, >5
+- | mov RC, dword [BASE+RC*8]
+- |.else
+- | // Convert number to int and back and compare.
+- | checknum RC, >5
+- | movsd xmm0, qword [BASE+RC*8]
+- | cvttsd2si RC, xmm0
+- | cvtsi2sd xmm1, RC
+- | ucomisd xmm0, xmm1
+- | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
+- |.endif
+- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
+- | jae ->vmeta_tsetv
+- | shl RC, 3
+- | add RC, TAB:RB->array
+- | cmp dword [RC+4], LJ_TNIL
+- | je >3 // Previous value is nil?
+- |1:
+- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+- | jnz >7
+- |2: // Set array slot.
+- |.if X64
+- | mov RBa, [BASE+RA*8]
+- | mov [RC], RBa
+- |.else
+- | mov RB, [BASE+RA*8+4]
+- | mov RA, [BASE+RA*8]
+- | mov [RC+4], RB
+- | mov [RC], RA
+- |.endif
+- | ins_next
+- |
+- |3: // Check for __newindex if previous value is nil.
+- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+- | jz <1
+- | mov TAB:RA, TAB:RB->metatable
+- | test byte TAB:RA->nomm, 1<<MM_newindex
+- | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
+- | movzx RA, PC_RA // Restore RA.
+- | jmp <1
+- |
+- |5: // String key?
+- | checkstr RC, ->vmeta_tsetv
+- | mov STR:RC, [BASE+RC*8]
+- | jmp ->BC_TSETS_Z
+- |
+- |7: // Possible table write barrier for the value. Skip valiswhite check.
+- | barrierback TAB:RB, RA
+- | movzx RA, PC_RA // Restore RA.
+- | jmp <2
+- break;
+ case BC_TSETS:
+- | ins_ABC // RA = src, RB = table, RC = str const (~)
+- | not RCa
+- | mov STR:RC, [KBASE+RC*4]
+- | checktab RB, ->vmeta_tsets
+- | mov TAB:RB, [BASE+RB*8]
+- |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
+- | mov RA, TAB:RB->hmask
+- | and RA, STR:RC->hash
+- | imul RA, #NODE
+- | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
+- | add NODE:RA, TAB:RB->node
+- |1:
+- | cmp dword NODE:RA->key.it, LJ_TSTR
+- | jne >5
+- | cmp dword NODE:RA->key.gcr, STR:RC
+- | jne >5
+- | // Ok, key found. Assumes: offsetof(Node, val) == 0
+- | cmp dword [RA+4], LJ_TNIL
+- | je >4 // Previous value is nil?
+- |2:
+- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+- | jnz >7
+- |3: // Set node value.
+- | movzx RC, PC_RA
+- |.if X64
+- | mov RBa, [BASE+RC*8]
+- | mov [RA], RBa
+- |.else
+- | mov RB, [BASE+RC*8+4]
+- | mov RC, [BASE+RC*8]
+- | mov [RA+4], RB
+- | mov [RA], RC
+- |.endif
+- | ins_next
+- |
+- |4: // Check for __newindex if previous value is nil.
+- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+- | jz <2
+- | mov TMP1, RA // Save RA.
+- | mov TAB:RA, TAB:RB->metatable
+- | test byte TAB:RA->nomm, 1<<MM_newindex
+- | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+- | mov RA, TMP1 // Restore RA.
+- | jmp <2
+- |
+- |5: // Follow hash chain.
+- | mov NODE:RA, NODE:RA->next
+- | test NODE:RA, NODE:RA
+- | jnz <1
+- | // End of hash chain: key not found, add a new one.
+- |
+- | // But check for __newindex first.
+- | mov TAB:RA, TAB:RB->metatable
+- | test TAB:RA, TAB:RA
+- | jz >6 // No metatable: continue.
+- | test byte TAB:RA->nomm, 1<<MM_newindex
+- | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+- |6:
+- | mov TMP1, STR:RC
+- | mov TMP2, LJ_TSTR
+- | mov TMP3, TAB:RB // Save TAB:RB for us.
+- |.if X64
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE
+- | lea CARG3, TMP1
+- | mov CARG2d, TAB:RB
+- | mov L:RB, L:CARG1d
+- |.else
+- | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
+- | mov ARG2, TAB:RB
+- | mov L:RB, SAVE_L
+- | mov ARG3, RC
+- | mov ARG1, L:RB
+- | mov L:RB->base, BASE
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+- | // Handles write barrier for the new key. TValue * returned in eax (RC).
+- | mov BASE, L:RB->base
+- | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
+- | mov RA, eax
+- | jmp <2 // Must check write barrier for value.
+- |
+- |7: // Possible table write barrier for the value. Skip valiswhite check.
+- | barrierback TAB:RB, RC // Destroys STR:RC.
+- | jmp <3
+- break;
+ case BC_TSETB:
+- | ins_ABC // RA = src, RB = table, RC = byte literal
+- | checktab RB, ->vmeta_tsetb
+- | mov TAB:RB, [BASE+RB*8]
+- | cmp RC, TAB:RB->asize
+- | jae ->vmeta_tsetb
+- | shl RC, 3
+- | add RC, TAB:RB->array
+- | cmp dword [RC+4], LJ_TNIL
+- | je >3 // Previous value is nil?
+- |1:
+- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+- | jnz >7
+- |2: // Set array slot.
+- |.if X64
+- | mov RAa, [BASE+RA*8]
+- | mov [RC], RAa
+- |.else
+- | mov RB, [BASE+RA*8+4]
+- | mov RA, [BASE+RA*8]
+- | mov [RC+4], RB
+- | mov [RC], RA
+- |.endif
+- | ins_next
+- |
+- |3: // Check for __newindex if previous value is nil.
+- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+- | jz <1
+- | mov TAB:RA, TAB:RB->metatable
+- | test byte TAB:RA->nomm, 1<<MM_newindex
+- | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
+- | movzx RA, PC_RA // Restore RA.
+- | jmp <1
+- |
+- |7: // Possible table write barrier for the value. Skip valiswhite check.
+- | barrierback TAB:RB, RA
+- | movzx RA, PC_RA // Restore RA.
+- | jmp <2
+- break;
+ case BC_TSETR:
+- | ins_ABC // RA = src, RB = table, RC = key
+- | mov TAB:RB, [BASE+RB*8]
+- |.if DUALNUM
+- | mov RC, dword [BASE+RC*8]
+- |.else
+- | cvttsd2si RC, qword [BASE+RC*8]
+- |.endif
+- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+- | jnz >7
+- |2:
+- | cmp RC, TAB:RB->asize
+- | jae ->vmeta_tsetr
+- | shl RC, 3
+- | add RC, TAB:RB->array
+- | // Set array slot.
+- |->BC_TSETR_Z:
+- |.if X64
+- | mov RBa, [BASE+RA*8]
+- | mov [RC], RBa
+- |.else
+- | mov RB, [BASE+RA*8+4]
+- | mov RA, [BASE+RA*8]
+- | mov [RC+4], RB
+- | mov [RC], RA
+- |.endif
+- | ins_next
+- |
+- |7: // Possible table write barrier for the value. Skip valiswhite check.
+- | barrierback TAB:RB, RA
+- | movzx RA, PC_RA // Restore RA.
+- | jmp <2
+- break;
+-
+ case BC_TSETM:
+- | ins_AD // RA = base (table at base-1), RD = num const (start index)
+- | mov TMP1, KBASE // Need one more free register.
+- | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word.
+- |1:
+- | lea RA, [BASE+RA*8]
+- | mov TAB:RB, [RA-8] // Guaranteed to be a table.
+- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+- | jnz >7
+- |2:
+- | mov RD, MULTRES
+- | sub RD, 1
+- | jz >4 // Nothing to copy?
+- | add RD, KBASE // Compute needed size.
+- | cmp RD, TAB:RB->asize
+- | ja >5 // Doesn't fit into array part?
+- | sub RD, KBASE
+- | shl KBASE, 3
+- | add KBASE, TAB:RB->array
+- |3: // Copy result slots to table.
+- |.if X64
+- | mov RBa, [RA]
+- | add RA, 8
+- | mov [KBASE], RBa
+- |.else
+- | mov RB, [RA]
+- | mov [KBASE], RB
+- | mov RB, [RA+4]
+- | add RA, 8
+- | mov [KBASE+4], RB
+- |.endif
+- | add KBASE, 8
+- | sub RD, 1
+- | jnz <3
+- |4:
+- | mov KBASE, TMP1
+- | ins_next
+- |
+- |5: // Need to resize array part.
+- |.if X64
+- | mov L:CARG1d, SAVE_L
+- | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
+- | mov CARG2d, TAB:RB
+- | mov CARG3d, RD
+- | mov L:RB, L:CARG1d
+- |.else
+- | mov ARG2, TAB:RB
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE
+- | mov ARG3, RD
+- | mov ARG1, L:RB
+- |.endif
+- | mov SAVE_PC, PC
+- | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+- | mov BASE, L:RB->base
+- | movzx RA, PC_RA // Restore RA.
+- | jmp <1 // Retry.
+- |
+- |7: // Possible table write barrier for any value. Skip valiswhite check.
+- | barrierback TAB:RB, RD
+- | jmp <2
+- break;
+-
+- /* -- Calls and vararg handling ----------------------------------------- */
+-
+ case BC_CALL: case BC_CALLM:
+- | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+- if (op == BC_CALLM) {
+- | add NARGS:RD, MULTRES
+- }
+- | cmp dword [BASE+RA*8+4], LJ_TFUNC
+- | mov LFUNC:RB, [BASE+RA*8]
+- | jne ->vmeta_call_ra
+- | lea BASE, [BASE+RA*8+8]
+- | ins_call
+- break;
+-
+ case BC_CALLMT:
+- | ins_AD // RA = base, RD = extra_nargs
+- | add NARGS:RD, MULTRES
+- | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
+- break;
+ case BC_CALLT:
+- | ins_AD // RA = base, RD = nargs+1
+- | lea RA, [BASE+RA*8+8]
+- | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
+- | mov LFUNC:RB, [RA-8]
+- | cmp dword [RA-4], LJ_TFUNC
+- | jne ->vmeta_call
+- |->BC_CALLT_Z:
+- | mov PC, [BASE-4]
+- | test PC, FRAME_TYPE
+- | jnz >7
+- |1:
+- | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
+- | mov MULTRES, NARGS:RD
+- | sub NARGS:RD, 1
+- | jz >3
+- |2: // Move args down.
+- |.if X64
+- | mov RBa, [RA]
+- | add RA, 8
+- | mov [KBASE], RBa
+- |.else
+- | mov RB, [RA]
+- | mov [KBASE], RB
+- | mov RB, [RA+4]
+- | add RA, 8
+- | mov [KBASE+4], RB
+- |.endif
+- | add KBASE, 8
+- | sub NARGS:RD, 1
+- | jnz <2
+- |
+- | mov LFUNC:RB, [BASE-8]
+- |3:
+- | mov NARGS:RD, MULTRES
+- | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
+- | ja >5
+- |4:
+- | ins_callt
+- |
+- |5: // Tailcall to a fast function.
+- | test PC, FRAME_TYPE // Lua frame below?
+- | jnz <4
+- | movzx RA, PC_RA
+- | not RAa
+- | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE.
+- | mov KBASE, LFUNC:KBASE->pc
+- | mov KBASE, [KBASE+PC2PROTO(k)]
+- | jmp <4
+- |
+- |7: // Tailcall from a vararg function.
+- | sub PC, FRAME_VARG
+- | test PC, FRAME_TYPEP
+- | jnz >8 // Vararg frame below?
+- | sub BASE, PC // Need to relocate BASE/KBASE down.
+- | mov KBASE, BASE
+- | mov PC, [BASE-4]
+- | jmp <1
+- |8:
+- | add PC, FRAME_VARG
+- | jmp <1
+- break;
+-
+ case BC_ITERC:
+- | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+- | lea RA, [BASE+RA*8+8] // fb = base+1
+- |.if X64
+- | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3].
+- | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2].
+- | mov [RA], RBa
+- | mov [RA+8], RCa
+- |.else
+- | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
+- | mov RC, [RA-20]
+- | mov [RA], RB
+- | mov [RA+4], RC
+- | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
+- | mov RC, [RA-12]
+- | mov [RA+8], RB
+- | mov [RA+12], RC
+- |.endif
+- | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
+- | mov RC, [RA-28]
+- | mov [RA-8], LFUNC:RB
+- | mov [RA-4], RC
+- | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
+- | mov NARGS:RD, 2+1
+- | jne ->vmeta_call
+- | mov BASE, RA
+- | ins_call
+- break;
+-
+ case BC_ITERN:
+- | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+- |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
+- |.endif
+- | mov TMP1, KBASE // Need two more free registers.
+- | mov TMP2, DISPATCH
+- | mov TAB:RB, [BASE+RA*8-16]
+- | mov RC, [BASE+RA*8-8] // Get index from control var.
+- | mov DISPATCH, TAB:RB->asize
+- | add PC, 4
+- | mov KBASE, TAB:RB->array
+- |1: // Traverse array part.
+- | cmp RC, DISPATCH; jae >5 // Index points after array part?
+- | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
+- |.if DUALNUM
+- | mov dword [BASE+RA*8+4], LJ_TISNUM
+- | mov dword [BASE+RA*8], RC
+- |.else
+- | cvtsi2sd xmm0, RC
+- |.endif
+- | // Copy array slot to returned value.
+- |.if X64
+- | mov RBa, [KBASE+RC*8]
+- | mov [BASE+RA*8+8], RBa
+- |.else
+- | mov RB, [KBASE+RC*8+4]
+- | mov [BASE+RA*8+12], RB
+- | mov RB, [KBASE+RC*8]
+- | mov [BASE+RA*8+8], RB
+- |.endif
+- | add RC, 1
+- | // Return array index as a numeric key.
+- |.if DUALNUM
+- | // See above.
+- |.else
+- | movsd qword [BASE+RA*8], xmm0
+- |.endif
+- | mov [BASE+RA*8-8], RC // Update control var.
+- |2:
+- | movzx RD, PC_RD // Get target from ITERL.
+- | branchPC RD
+- |3:
+- | mov DISPATCH, TMP2
+- | mov KBASE, TMP1
+- | ins_next
+- |
+- |4: // Skip holes in array part.
+- | add RC, 1
+- | jmp <1
+- |
+- |5: // Traverse hash part.
+- | sub RC, DISPATCH
+- |6:
+- | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
+- | imul KBASE, RC, #NODE
+- | add NODE:KBASE, TAB:RB->node
+- | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
+- | lea DISPATCH, [RC+DISPATCH+1]
+- | // Copy key and value from hash slot.
+- |.if X64
+- | mov RBa, NODE:KBASE->key
+- | mov RCa, NODE:KBASE->val
+- | mov [BASE+RA*8], RBa
+- | mov [BASE+RA*8+8], RCa
+- |.else
+- | mov RB, NODE:KBASE->key.gcr
+- | mov RC, NODE:KBASE->key.it
+- | mov [BASE+RA*8], RB
+- | mov [BASE+RA*8+4], RC
+- | mov RB, NODE:KBASE->val.gcr
+- | mov RC, NODE:KBASE->val.it
+- | mov [BASE+RA*8+8], RB
+- | mov [BASE+RA*8+12], RC
+- |.endif
+- | mov [BASE+RA*8-8], DISPATCH
+- | jmp <2
+- |
+- |7: // Skip holes in hash part.
+- | add RC, 1
+- | jmp <6
+- break;
+-
+ case BC_ISNEXT:
+- | ins_AD // RA = base, RD = target (points to ITERN)
+- | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
+- | mov CFUNC:RB, [BASE+RA*8-24]
+- | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
+- | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
+- | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
+- | branchPC RD
+- | mov dword [BASE+RA*8-8], 0 // Initialize control var.
+- | mov dword [BASE+RA*8-4], 0xfffe7fff
+- |1:
+- | ins_next
+- |5: // Despecialize bytecode if any of the checks fail.
+- | mov PC_OP, BC_JMP
+- | branchPC RD
+- | mov byte [PC], BC_ITERC
+- | jmp <1
+- break;
+-
+ case BC_VARG:
+- | ins_ABC // RA = base, RB = nresults+1, RC = numparams
+- | mov TMP1, KBASE // Need one more free register.
+- | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
+- | lea RA, [BASE+RA*8]
+- | sub KBASE, [BASE-4]
+- | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
+- | test RB, RB
+- | jz >5 // Copy all varargs?
+- | lea RB, [RA+RB*8-8]
+- | cmp KBASE, BASE // No vararg slots?
+- | jnb >2
+- |1: // Copy vararg slots to destination slots.
+- |.if X64
+- | mov RCa, [KBASE-8]
+- | add KBASE, 8
+- | mov [RA], RCa
+- |.else
+- | mov RC, [KBASE-8]
+- | mov [RA], RC
+- | mov RC, [KBASE-4]
+- | add KBASE, 8
+- | mov [RA+4], RC
+- |.endif
+- | add RA, 8
+- | cmp RA, RB // All destination slots filled?
+- | jnb >3
+- | cmp KBASE, BASE // No more vararg slots?
+- | jb <1
+- |2: // Fill up remainder with nil.
+- | mov dword [RA+4], LJ_TNIL
+- | add RA, 8
+- | cmp RA, RB
+- | jb <2
+- |3:
+- | mov KBASE, TMP1
+- | ins_next
+- |
+- |5: // Copy all varargs.
+- | mov MULTRES, 1 // MULTRES = 0+1
+- | mov RC, BASE
+- | sub RC, KBASE
+- | jbe <3 // No vararg slots?
+- | mov RB, RC
+- | shr RB, 3
+- | add RB, 1
+- | mov MULTRES, RB // MULTRES = #varargs+1
+- | mov L:RB, SAVE_L
+- | add RC, RA
+- | cmp RC, L:RB->maxstack
+- | ja >7 // Need to grow stack?
+- |6: // Copy all vararg slots.
+- |.if X64
+- | mov RCa, [KBASE-8]
+- | add KBASE, 8
+- | mov [RA], RCa
+- |.else
+- | mov RC, [KBASE-8]
+- | mov [RA], RC
+- | mov RC, [KBASE-4]
+- | add KBASE, 8
+- | mov [RA+4], RC
+- |.endif
+- | add RA, 8
+- | cmp KBASE, BASE // No more vararg slots?
+- | jb <6
+- | jmp <3
+- |
+- |7: // Grow stack for varargs.
+- | mov L:RB->base, BASE
+- | mov L:RB->top, RA
+- | mov SAVE_PC, PC
+- | sub KBASE, BASE // Need delta, because BASE may change.
+- | mov FCARG2, MULTRES
+- | sub FCARG2, 1
+- | mov FCARG1, L:RB
+- | call extern lj_state_growstack@8 // (lua_State *L, int n)
+- | mov BASE, L:RB->base
+- | mov RA, L:RB->top
+- | add KBASE, BASE
+- | jmp <6
+- break;
+-
+- /* -- Returns ----------------------------------------------------------- */
+-
+ case BC_RETM:
+- | ins_AD // RA = results, RD = extra_nresults
+- | add RD, MULTRES // MULTRES >=1, so RD >=1.
+- | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+- break;
+-
+ case BC_RET: case BC_RET0: case BC_RET1:
+- | ins_AD // RA = results, RD = nresults+1
+- if (op != BC_RET0) {
+- | shl RA, 3
+- }
+- |1:
+- | mov PC, [BASE-4]
+- | mov MULTRES, RD // Save nresults+1.
+- | test PC, FRAME_TYPE // Check frame type marker.
+- | jnz >7 // Not returning to a fixarg Lua func?
+- switch (op) {
+- case BC_RET:
+- |->BC_RET_Z:
+- | mov KBASE, BASE // Use KBASE for result move.
+- | sub RD, 1
+- | jz >3
+- |2: // Move results down.
+- |.if X64
+- | mov RBa, [KBASE+RA]
+- | mov [KBASE-8], RBa
+- |.else
+- | mov RB, [KBASE+RA]
+- | mov [KBASE-8], RB
+- | mov RB, [KBASE+RA+4]
+- | mov [KBASE-4], RB
+- |.endif
+- | add KBASE, 8
+- | sub RD, 1
+- | jnz <2
+- |3:
+- | mov RD, MULTRES // Note: MULTRES may be >255.
+- | movzx RB, PC_RB // So cannot compare with RDL!
+- |5:
+- | cmp RB, RD // More results expected?
+- | ja >6
+- break;
+- case BC_RET1:
+- |.if X64
+- | mov RBa, [BASE+RA]
+- | mov [BASE-8], RBa
+- |.else
+- | mov RB, [BASE+RA+4]
+- | mov [BASE-4], RB
+- | mov RB, [BASE+RA]
+- | mov [BASE-8], RB
+- |.endif
+- /* fallthrough */
+- case BC_RET0:
+- |5:
+- | cmp PC_RB, RDL // More results expected?
+- | ja >6
+- default:
+- break;
+- }
+- | movzx RA, PC_RA
+- | not RAa // Note: ~RA = -(RA+1)
+- | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
+- | mov LFUNC:KBASE, [BASE-8]
+- | mov KBASE, LFUNC:KBASE->pc
+- | mov KBASE, [KBASE+PC2PROTO(k)]
+- | ins_next
+- |
+- |6: // Fill up results with nil.
+- if (op == BC_RET) {
+- | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
+- | add KBASE, 8
+- } else {
+- | mov dword [BASE+RD*8-12], LJ_TNIL
+- }
+- | add RD, 1
+- | jmp <5
+- |
+- |7: // Non-standard return case.
+- | lea RB, [PC-FRAME_VARG]
+- | test RB, FRAME_TYPEP
+- | jnz ->vm_return
+- | // Return from vararg function: relocate BASE down and RA up.
+- | sub BASE, RB
+- if (op != BC_RET0) {
+- | add RA, RB
+- }
+- | jmp <1
+- break;
+-
+- /* -- Loops and branches ------------------------------------------------ */
+-
+- |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
+- |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
+- |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
+- |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
+-
+ case BC_FORL:
+- |.if JIT
+- | hotloop RB
+- |.endif
+- | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+- break;
+-
+ case BC_JFORI:
+ case BC_JFORL:
+-#if !LJ_HASJIT
+- break;
+-#endif
+ case BC_FORI:
+ case BC_IFORL:
+- vk = (op == BC_IFORL || op == BC_JFORL);
+- | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
+- | lea RA, [BASE+RA*8]
+- if (LJ_DUALNUM) {
+- | cmp FOR_TIDX, LJ_TISNUM; jne >9
+- if (!vk) {
+- | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
+- | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
+- | mov RB, dword FOR_IDX
+- | cmp dword FOR_STEP, 0; jl >5
+- } else {
+-#ifdef LUA_USE_ASSERT
+- | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
+- | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
+-#endif
+- | mov RB, dword FOR_STEP
+- | test RB, RB; js >5
+- | add RB, dword FOR_IDX; jo >1
+- | mov dword FOR_IDX, RB
+- }
+- | cmp RB, dword FOR_STOP
+- | mov FOR_TEXT, LJ_TISNUM
+- | mov dword FOR_EXT, RB
+- if (op == BC_FORI) {
+- | jle >7
+- |1:
+- |6:
+- | branchPC RD
+- } else if (op == BC_JFORI) {
+- | branchPC RD
+- | movzx RD, PC_RD
+- | jle =>BC_JLOOP
+- |1:
+- |6:
+- } else if (op == BC_IFORL) {
+- | jg >7
+- |6:
+- | branchPC RD
+- |1:
+- } else {
+- | jle =>BC_JLOOP
+- |1:
+- |6:
+- }
+- |7:
+- | ins_next
+- |
+- |5: // Invert check for negative step.
+- if (vk) {
+- | add RB, dword FOR_IDX; jo <1
+- | mov dword FOR_IDX, RB
+- }
+- | cmp RB, dword FOR_STOP
+- | mov FOR_TEXT, LJ_TISNUM
+- | mov dword FOR_EXT, RB
+- if (op == BC_FORI) {
+- | jge <7
+- } else if (op == BC_JFORI) {
+- | branchPC RD
+- | movzx RD, PC_RD
+- | jge =>BC_JLOOP
+- } else if (op == BC_IFORL) {
+- | jl <7
+- } else {
+- | jge =>BC_JLOOP
+- }
+- | jmp <6
+- |9: // Fallback to FP variant.
+- } else if (!vk) {
+- | cmp FOR_TIDX, LJ_TISNUM
+- }
+- if (!vk) {
+- | jae ->vmeta_for
+- | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
+- } else {
+-#ifdef LUA_USE_ASSERT
+- | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
+- | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
+-#endif
+- }
+- | mov RB, FOR_TSTEP // Load type/hiword of for step.
+- if (!vk) {
+- | cmp RB, LJ_TISNUM; jae ->vmeta_for
+- }
+- | movsd xmm0, qword FOR_IDX
+- | movsd xmm1, qword FOR_STOP
+- if (vk) {
+- | addsd xmm0, qword FOR_STEP
+- | movsd qword FOR_IDX, xmm0
+- | test RB, RB; js >3
+- } else {
+- | jl >3
+- }
+- | ucomisd xmm1, xmm0
+- |1:
+- | movsd qword FOR_EXT, xmm0
+- if (op == BC_FORI) {
+- |.if DUALNUM
+- | jnb <7
+- |.else
+- | jnb >2
+- | branchPC RD
+- |.endif
+- } else if (op == BC_JFORI) {
+- | branchPC RD
+- | movzx RD, PC_RD
+- | jnb =>BC_JLOOP
+- } else if (op == BC_IFORL) {
+- |.if DUALNUM
+- | jb <7
+- |.else
+- | jb >2
+- | branchPC RD
+- |.endif
+- } else {
+- | jnb =>BC_JLOOP
+- }
+- |.if DUALNUM
+- | jmp <6
+- |.else
+- |2:
+- | ins_next
+- |.endif
+- |
+- |3: // Invert comparison if step is negative.
+- | ucomisd xmm0, xmm1
+- | jmp <1
+- break;
+-
+ case BC_ITERL:
+- |.if JIT
+- | hotloop RB
+- |.endif
+- | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+- break;
+-
+ case BC_JITERL:
+-#if !LJ_HASJIT
+- break;
+-#endif
+ case BC_IITERL:
+- | ins_AJ // RA = base, RD = target
+- | lea RA, [BASE+RA*8]
+- | mov RB, [RA+4]
+- | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
+- if (op == BC_JITERL) {
+- | mov [RA-4], RB
+- | mov RB, [RA]
+- | mov [RA-8], RB
+- | jmp =>BC_JLOOP
+- } else {
+- | branchPC RD // Otherwise save control var + branch.
+- | mov RD, [RA]
+- | mov [RA-4], RB
+- | mov [RA-8], RD
+- }
+- |1:
+- | ins_next
+- break;
+-
+ case BC_LOOP:
+- | ins_A // RA = base, RD = target (loop extent)
+- | // Note: RA/RD is only used by trace recorder to determine scope/extent
+- | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+- |.if JIT
+- | hotloop RB
+- |.endif
+- | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+- break;
+-
+ case BC_ILOOP:
+- | ins_A // RA = base, RD = target (loop extent)
+- | ins_next
+- break;
+-
+ case BC_JLOOP:
+- |.if JIT
+- | ins_AD // RA = base (ignored), RD = traceno
+- | mov RA, [DISPATCH+DISPATCH_J(trace)]
+- | mov TRACE:RD, [RA+RD*4]
+- | mov RDa, TRACE:RD->mcode
+- | mov L:RB, SAVE_L
+- | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
+- | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
+- | // Save additional callee-save registers only used in compiled code.
+- |.if X64WIN
+- | mov TMPQ, r12
+- | mov TMPa, r13
+- | mov CSAVE_4, r14
+- | mov CSAVE_3, r15
+- | mov RAa, rsp
+- | sub rsp, 9*16+4*8
+- | movdqa [RAa], xmm6
+- | movdqa [RAa-1*16], xmm7
+- | movdqa [RAa-2*16], xmm8
+- | movdqa [RAa-3*16], xmm9
+- | movdqa [RAa-4*16], xmm10
+- | movdqa [RAa-5*16], xmm11
+- | movdqa [RAa-6*16], xmm12
+- | movdqa [RAa-7*16], xmm13
+- | movdqa [RAa-8*16], xmm14
+- | movdqa [RAa-9*16], xmm15
+- |.elif X64
+- | mov TMPQ, r12
+- | mov TMPa, r13
+- | sub rsp, 16
+- |.endif
+- | jmp RDa
+- |.endif
+- break;
+-
+ case BC_JMP:
+- | ins_AJ // RA = unused, RD = target
+- | branchPC RD
+- | ins_next
+- break;
+-
+- /* -- Function headers -------------------------------------------------- */
+-
+- /*
+- ** Reminder: A function may be called with func/args above L->maxstack,
+- ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
+- ** too. This means all FUNC* ops (including fast functions) must check
+- ** for stack overflow _before_ adding more slots!
+- */
+-
+ case BC_FUNCF:
+- |.if JIT
+- | hotcall RB
+- |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+- | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+- break;
+-
+ case BC_JFUNCF:
+-#if !LJ_HASJIT
+- break;
+-#endif
+ case BC_IFUNCF:
+- | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+- | mov KBASE, [PC-4+PC2PROTO(k)]
+- | mov L:RB, SAVE_L
+- | lea RA, [BASE+RA*8] // Top of frame.
+- | cmp RA, L:RB->maxstack
+- | ja ->vm_growstack_f
+- | movzx RA, byte [PC-4+PC2PROTO(numparams)]
+- | cmp NARGS:RD, RA // Check for missing parameters.
+- | jbe >3
+- |2:
+- if (op == BC_JFUNCF) {
+- | movzx RD, PC_RD
+- | jmp =>BC_JLOOP
+- } else {
+- | ins_next
+- }
+- |
+- |3: // Clear missing parameters.
+- | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
+- | add NARGS:RD, 1
+- | cmp NARGS:RD, RA
+- | jbe <3
+- | jmp <2
+- break;
+-
+ case BC_JFUNCV:
+-#if !LJ_HASJIT
+- break;
+-#endif
+- | int3 // NYI: compiled vararg functions
+- break; /* NYI: compiled vararg functions. */
+-
+ case BC_IFUNCV:
+- | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+- | lea RB, [NARGS:RD*8+FRAME_VARG]
+- | lea RD, [BASE+NARGS:RD*8]
+- | mov LFUNC:KBASE, [BASE-8]
+- | mov [RD-4], RB // Store delta + FRAME_VARG.
+- | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
+- | mov L:RB, SAVE_L
+- | lea RA, [RD+RA*8]
+- | cmp RA, L:RB->maxstack
+- | ja ->vm_growstack_v // Need to grow stack.
+- | mov RA, BASE
+- | mov BASE, RD
+- | movzx RB, byte [PC-4+PC2PROTO(numparams)]
+- | test RB, RB
+- | jz >2
+- |1: // Copy fixarg slots up to new frame.
+- | add RA, 8
+- | cmp RA, BASE
+- | jnb >3 // Less args than parameters?
+- | mov KBASE, [RA-8]
+- | mov [RD], KBASE
+- | mov KBASE, [RA-4]
+- | mov [RD+4], KBASE
+- | add RD, 8
+- | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
+- | sub RB, 1
+- | jnz <1
+- |2:
+- if (op == BC_JFUNCV) {
+- | movzx RD, PC_RD
+- | jmp =>BC_JLOOP
+- } else {
+- | mov KBASE, [PC-4+PC2PROTO(k)]
+- | ins_next
+- }
+- |
+- |3: // Clear missing parameters.
+- | mov dword [RD+4], LJ_TNIL
+- | add RD, 8
+- | sub RB, 1
+- | jnz <3
+- | jmp <2
+- break;
+-
+ case BC_FUNCC:
+ case BC_FUNCCW:
+- | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
+- | mov CFUNC:RB, [BASE-8]
+- | mov KBASEa, CFUNC:RB->f
+- | mov L:RB, SAVE_L
+- | lea RD, [BASE+NARGS:RD*8-8]
+- | mov L:RB->base, BASE
+- | lea RA, [RD+8*LUA_MINSTACK]
+- | cmp RA, L:RB->maxstack
+- | mov L:RB->top, RD
+- if (op == BC_FUNCC) {
+- |.if X64
+- | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
+- |.else
+- | mov ARG1, L:RB
+- |.endif
+- } else {
+- |.if X64
+- | mov CARG2, KBASEa
+- | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
+- |.else
+- | mov ARG2, KBASEa
+- | mov ARG1, L:RB
+- |.endif
+- }
+- | ja ->vm_growstack_c // Need to grow stack.
+- | set_vmstate C
+- if (op == BC_FUNCC) {
+- | call KBASEa // (lua_State *L)
+- } else {
+- | // (lua_State *L, lua_CFunction f)
+- | call aword [DISPATCH+DISPATCH_GL(wrapf)]
+- }
+- | // nresults returned in eax (RD).
+- | mov BASE, L:RB->base
+- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+- | set_vmstate INTERP
+- | lea RA, [BASE+RD*8]
+- | neg RA
+- | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+- | mov PC, [BASE-4] // Fetch PC of caller.
+- | jmp ->vm_returnc
++ | lg r0, 0(r0) // Not implemented, seg fault.
+ break;
+
+ /* ---------------------------------------------------------------------- */
+@@ -5241,314 +702,4 @@ static int build_backend(BuildCtx *ctx)
+ /* Emit pseudo frame-info for all assembler functions. */
+ static void emit_asm_debug(BuildCtx *ctx)
+ {
+- int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+-#if LJ_64
+-#define SZPTR "8"
+-#define BSZPTR "3"
+-#define REG_SP "0x7"
+-#define REG_RA "0x10"
+-#else
+-#define SZPTR "4"
+-#define BSZPTR "2"
+-#define REG_SP "0x4"
+-#define REG_RA "0x8"
+-#endif
+- switch (ctx->mode) {
+- case BUILD_elfasm:
+- fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+- fprintf(ctx->fp,
+- ".Lframe0:\n"
+- "\t.long .LECIE0-.LSCIE0\n"
+- ".LSCIE0:\n"
+- "\t.long 0xffffffff\n"
+- "\t.byte 0x1\n"
+- "\t.string \"\"\n"
+- "\t.uleb128 0x1\n"
+- "\t.sleb128 -" SZPTR "\n"
+- "\t.byte " REG_RA "\n"
+- "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
+- "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
+- "\t.align " SZPTR "\n"
+- ".LECIE0:\n\n");
+- fprintf(ctx->fp,
+- ".LSFDE0:\n"
+- "\t.long .LEFDE0-.LASFDE0\n"
+- ".LASFDE0:\n"
+- "\t.long .Lframe0\n"
+-#if LJ_64
+- "\t.quad .Lbegin\n"
+- "\t.quad %d\n"
+- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+- "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
+- "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
+-#if LJ_NO_UNWIND
+- "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
+- "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
+-#endif
+-#else
+- "\t.long .Lbegin\n"
+- "\t.long %d\n"
+- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
+- "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
+- "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
+- "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
+-#endif
+- "\t.align " SZPTR "\n"
+- ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
+-#if LJ_HASFFI
+- fprintf(ctx->fp,
+- ".LSFDE1:\n"
+- "\t.long .LEFDE1-.LASFDE1\n"
+- ".LASFDE1:\n"
+- "\t.long .Lframe0\n"
+-#if LJ_64
+- "\t.quad lj_vm_ffi_call\n"
+- "\t.quad %d\n"
+- "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
+- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+- "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
+- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+-#else
+- "\t.long lj_vm_ffi_call\n"
+- "\t.long %d\n"
+- "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
+- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
+- "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
+- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
+-#endif
+- "\t.align " SZPTR "\n"
+- ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+-#endif
+-#if !LJ_NO_UNWIND
+-#if (defined(__sun__) && defined(__svr4__))
+-#if LJ_64
+- fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
+-#else
+- fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
+-#endif
+-#else
+- fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+-#endif
+- fprintf(ctx->fp,
+- ".Lframe1:\n"
+- "\t.long .LECIE1-.LSCIE1\n"
+- ".LSCIE1:\n"
+- "\t.long 0\n"
+- "\t.byte 0x1\n"
+- "\t.string \"zPR\"\n"
+- "\t.uleb128 0x1\n"
+- "\t.sleb128 -" SZPTR "\n"
+- "\t.byte " REG_RA "\n"
+- "\t.uleb128 6\n" /* augmentation length */
+- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.long lj_err_unwind_dwarf-.\n"
+- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
+- "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
+- "\t.align " SZPTR "\n"
+- ".LECIE1:\n\n");
+- fprintf(ctx->fp,
+- ".LSFDE2:\n"
+- "\t.long .LEFDE2-.LASFDE2\n"
+- ".LASFDE2:\n"
+- "\t.long .LASFDE2-.Lframe1\n"
+- "\t.long .Lbegin-.\n"
+- "\t.long %d\n"
+- "\t.uleb128 0\n" /* augmentation length */
+- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+-#if LJ_64
+- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+- "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
+- "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
+-#else
+- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
+- "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
+- "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
+- "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
+-#endif
+- "\t.align " SZPTR "\n"
+- ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
+-#if LJ_HASFFI
+- fprintf(ctx->fp,
+- ".Lframe2:\n"
+- "\t.long .LECIE2-.LSCIE2\n"
+- ".LSCIE2:\n"
+- "\t.long 0\n"
+- "\t.byte 0x1\n"
+- "\t.string \"zR\"\n"
+- "\t.uleb128 0x1\n"
+- "\t.sleb128 -" SZPTR "\n"
+- "\t.byte " REG_RA "\n"
+- "\t.uleb128 1\n" /* augmentation length */
+- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
+- "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
+- "\t.align " SZPTR "\n"
+- ".LECIE2:\n\n");
+- fprintf(ctx->fp,
+- ".LSFDE3:\n"
+- "\t.long .LEFDE3-.LASFDE3\n"
+- ".LASFDE3:\n"
+- "\t.long .LASFDE3-.Lframe2\n"
+- "\t.long lj_vm_ffi_call-.\n"
+- "\t.long %d\n"
+- "\t.uleb128 0\n" /* augmentation length */
+-#if LJ_64
+- "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
+- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+- "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
+- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+-#else
+- "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
+- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
+- "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
+- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
+-#endif
+- "\t.align " SZPTR "\n"
+- ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+-#endif
+-#endif
+- break;
+-#if !LJ_NO_UNWIND
+- /* Mental note: never let Apple design an assembler.
+- ** Or a linker. Or a plastic case. But I digress.
+- */
+- case BUILD_machasm: {
+-#if LJ_HASFFI
+- int fcsize = 0;
+-#endif
+- int i;
+- fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+- fprintf(ctx->fp,
+- "EH_frame1:\n"
+- "\t.set L$set$x,LECIEX-LSCIEX\n"
+- "\t.long L$set$x\n"
+- "LSCIEX:\n"
+- "\t.long 0\n"
+- "\t.byte 0x1\n"
+- "\t.ascii \"zPR\\0\"\n"
+- "\t.byte 0x1\n"
+- "\t.byte 128-" SZPTR "\n"
+- "\t.byte " REG_RA "\n"
+- "\t.byte 6\n" /* augmentation length */
+- "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
+-#if LJ_64
+- "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
+- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
+-#else
+- "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
+- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
+-#endif
+- "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
+- "\t.align " BSZPTR "\n"
+- "LECIEX:\n\n");
+- for (i = 0; i < ctx->nsym; i++) {
+- const char *name = ctx->sym[i].name;
+- int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
+- if (size == 0) continue;
+-#if LJ_HASFFI
+- if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+-#endif
+- fprintf(ctx->fp,
+- "%s.eh:\n"
+- "LSFDE%d:\n"
+- "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+- "\t.long L$set$%d\n"
+- "LASFDE%d:\n"
+- "\t.long LASFDE%d-EH_frame1\n"
+- "\t.long %s-.\n"
+- "\t.long %d\n"
+- "\t.byte 0\n" /* augmentation length */
+- "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
+-#if LJ_64
+- "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
+- "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
+- "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
+- "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
+-#else
+- "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
+- "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
+- "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
+- "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
+-#endif
+- "\t.align " BSZPTR "\n"
+- "LEFDE%d:\n\n",
+- name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
+- }
+-#if LJ_HASFFI
+- if (fcsize) {
+- fprintf(ctx->fp,
+- "EH_frame2:\n"
+- "\t.set L$set$y,LECIEY-LSCIEY\n"
+- "\t.long L$set$y\n"
+- "LSCIEY:\n"
+- "\t.long 0\n"
+- "\t.byte 0x1\n"
+- "\t.ascii \"zR\\0\"\n"
+- "\t.byte 0x1\n"
+- "\t.byte 128-" SZPTR "\n"
+- "\t.byte " REG_RA "\n"
+- "\t.byte 1\n" /* augmentation length */
+-#if LJ_64
+- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
+-#else
+- "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */
+-#endif
+- "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
+- "\t.align " BSZPTR "\n"
+- "LECIEY:\n\n");
+- fprintf(ctx->fp,
+- "_lj_vm_ffi_call.eh:\n"
+- "LSFDEY:\n"
+- "\t.set L$set$yy,LEFDEY-LASFDEY\n"
+- "\t.long L$set$yy\n"
+- "LASFDEY:\n"
+- "\t.long LASFDEY-EH_frame2\n"
+- "\t.long _lj_vm_ffi_call-.\n"
+- "\t.long %d\n"
+- "\t.byte 0\n" /* augmentation length */
+-#if LJ_64
+- "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
+- "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
+- "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
+- "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
+-#else
+- "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */
+- "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
+- "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */
+- "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */
+-#endif
+- "\t.align " BSZPTR "\n"
+- "LEFDEY:\n\n", fcsize);
+- }
+-#endif
+-#if !LJ_64
+- fprintf(ctx->fp,
+- "\t.non_lazy_symbol_pointer\n"
+- "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
+- ".indirect_symbol _lj_err_unwind_dwarf\n"
+- ".long 0\n\n");
+- fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
+- {
+- const char *const *xn;
+- for (xn = ctx->extnames; *xn; xn++)
+- if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
+- fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
+- }
+-#endif
+- fprintf(ctx->fp, ".subsections_via_symbols\n");
+- }
+- break;
+-#endif
+- default: /* Difficult for other modes. */
+- break;
+- }
+ }
+
+From 1c7a727f4aeda1f9e586648ce7508e808a951806 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Mon, 12 Dec 2016 11:21:42 +0530
+Subject: [PATCH 100/260] Correct the range of parameter, and merge the two
+ case
+
+---
+ dynasm/dasm_s390x.h | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index 5be8e8a71..c1de35711 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -290,9 +290,6 @@ void dasm_put(Dst_DECL, int start, ...)
+ b[pos++] = n;
+ break;
+ case DASM_LEN4HR:
+- CK(n >= 1 && n <= 128, RANGE_I);
+- b[pos++] = n;
+- break;
+ case DASM_LEN4LR:
+ CK(n >= 1 && n <= 128, RANGE_I);
+ b[pos++] = n;
+@@ -478,7 +475,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ cp[-1] |= (n - 1) & 0xff;
+ break;
+ case DASM_LEN4HR:
+- cp[-1] |= (n - 1) & 0xf0;
++ cp[-1] |= ((n - 1) << 4) & 0xf0;
+ break;
+ case DASM_LEN4LR:
+ cp[-1] |= (n - 1) & 0x0f;
+
+From 361a298371cbdf2fd4993156c6f2ac9211bd484c Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 12 Dec 2016 14:38:55 -0500
+Subject: [PATCH 101/260] Add lhi instruction and fix immediate parsing.
+
+We were reading immediate values as hexadecimal values, really we
+want the default to be decimal unless the immediate has a '0x' prefix.
+---
+ dynasm/Examples/test_z_inst.c | 4 ++--
+ dynasm/dasm_s390x.lua | 31 +++++++++++++++++++------------
+ 2 files changed, 21 insertions(+), 14 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index a8895c052..d0939064e 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -190,8 +190,8 @@ static void save(dasm_State *state)
+ |.endmacro
+ |
+ | saveregs
+- | lgfi r7, 10 // 16
+- | lgfi r8, 20 // 32
++ | lgfi r7, 0x10 // 16
++ | lgfi r8, 0x20 // 32
+ | agr r2, r3
+ | agr r7, r8
+ | msgr r2, r7
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 08d44a3ee..d3ed723f1 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -459,28 +459,34 @@ local function parse_mem_l2b(arg,high_l)
+ return dval, lval, parse_reg(b), dact, lact
+ end
+
+-local function parse_imm(arg)
+- local imm_val = tonumber(arg,16)
++local function parse_imm32(imm)
++ local imm_val = tonumber(imm)
+ if imm_val then
+ if not is_int32(imm_val) then
+- werror("Immediate value out of range: ", imm_val)
++ werror("immediate value out of range: ", imm_val)
+ end
+- wputhw(band(shr(imm_val, 16), 0xffff));
+- wputhw(band(imm_val, 0xffff));
++ wputhw(band(shr(imm_val, 16), 0xffff))
++ wputhw(band(imm_val, 0xffff))
++ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
++ match(imm, "^([%w_]+):(r1?[0-9])$") then
++ werror("expected immediate operand, got register")
+ else
+- waction("IMM32", nil, arg) -- if we get label
++ waction("IMM32", nil, imm) -- if we get label
+ end
+ end
+
+-local function parse_imm16(arg)
+- local imm_val = tonumber(arg,16)
++local function parse_imm16(imm)
++ local imm_val = tonumber(imm)
+ if imm_val then
+ if not is_int16(imm_val) then
+- werror("Immediate value out of range: ", imm_val)
++ werror("immediate value out of range: ", imm_val)
+ end
+- wputhw(imm_val)
++ wputhw(band(imm_val, 0xffff))
++ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
++ match(imm, "^([%w_]+):(r1?[0-9])$") then
++ werror("expected immediate operand, got register")
+ else
+- waction("IMM16", nil, arg)
++ waction("IMM16", nil, imm)
+ end
+ end
+
+@@ -842,6 +848,7 @@ map_op = {
+ lgh_2 = "e30000000015l",
+ lghr_2 = "0000b9070000h",
+ lhh_2 = "e300000000c4l",
++ lhi_2 = "0000a7080000i",
+ lhrl_2 = "c40500000000o",
+ lghrl_2 = "c40400000000o",
+ lfh_2 = "e300000000cal",
+@@ -1161,7 +1168,7 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "n" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+- parse_imm(params[2])
++ parse_imm32(params[2])
+ elseif p == "o" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+
+From 0b120ac64bcf35914b8b1d17eaa9b7f2ab0f012a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 12 Dec 2016 17:17:34 -0500
+Subject: [PATCH 102/260] Add partial implementation of vm_cpcall.
+
+Currently works if the call returns 0. Haven't yet written the code
+needed to handle the non-zero case.
+---
+ dynasm/dasm_s390x.lua | 2 ++
+ src/vm_s390x.dasc | 39 +++++++++++++++++++++++++++++++++++++--
+ 2 files changed, 39 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index d3ed723f1..60d61bd78 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -651,6 +651,7 @@ map_op = {
+ chlr_2 = "0000b9dd0000h",
+ cfi_2 = "c20d00000000n",
+ cgfi_2 = "c20c00000000n",
++ cghi_2 = "0000a70f0000i",
+ cih_2 = "cc0d00000000n",
+ cl_2 = "000055000000j",
+ clr_2 = "000000001500g",
+@@ -782,6 +783,7 @@ map_op = {
+ lgr_2 = "0000b9040000h",
+ lgf_2 = "e30000000014l",
+ lgfr_2 = "0000b9140000h",
++ lghi_2 = "0000a7090000i",
+ lxr_2 = "0000b3650000h",
+ ld_2 = "000068000000j",
+ ldr_2 = "000000002800g",
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index bdd063d8f..88fef7da9 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -63,6 +63,7 @@
+ |
+ |// Register save area.
+ |.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
++|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated).
+ |
+ |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
+ |.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
+@@ -88,8 +89,9 @@
+ |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
+ |
+ |.macro saveregs
++| stmg r6, r15, SAVE_GPRS_P
+ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+-| stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
++| // TODO: save backchain?
+ | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | std f9, SAVE_FPR9
+ | std f10, SAVE_FPR10
+@@ -110,7 +112,6 @@
+ | ld f14, SAVE_FPR14
+ | ld f15, SAVE_FPR15
+ | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
+-|// br r14 to return?
+ |.endmacro
+ |
+ |// Type definitions. Some of these are only used for documentation.
+@@ -175,6 +176,10 @@
+ | ins_NEXT
+ | .endmacro
+ |.endif
++|
++|// Assumes DISPATCH is relative to GL.
++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+
+ /* Generate subroutines used by opcodes and other parts of the VM. */
+ /* The .code_sub section should be last to help static branch prediction. */
+@@ -193,8 +198,13 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vm_return:
+ |
+ |->vm_leave_cp:
++ | lg RA, SAVE_CFRAME // Restore previous C frame.
++ | stg RA, L:LREG->cframe
++ | lghi CRET1, 0 // Ok return status for vm_pcall.
+ |
+ |->vm_leave_unw:
++ | restoreregs
++ | br r14
+ |
+ |->vm_unwind_yield:
+ |
+@@ -230,6 +240,31 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vm_call_dispatch_f:
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
++ | saveregs
++ | lgr LREG, CARG1
++ | stg LREG, SAVE_L
++ | stg LREG, SAVE_PC // Any value outside of bytecode is ok.
++ |
++ | lg KBASE, L:LREG->stack // Compute -savestack(L, L->top).
++ | sg KBASE, L:LREG->top
++ | lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
++ | lghi RA, 0
++ | stg RA, SAVE_ERRF // No error function.
++ | stg KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
++ | aghi DISPATCH, GG_G2DISP
++ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
++ |
++ | lg KBASE, L:LREG->cframe // Add our C frame to cframe chain.
++ | stg KBASE, SAVE_CFRAME
++ | stg sp, L:LREG->cframe
++ | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
++ |
++ | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
++ | // TValue * (new base) or NULL returned in r2 (CRET1/).
++ | cghi CRET1, 0
++ | je ->vm_leave_cp // No base? Just remove C frame.
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+
+From 3ef1f2153162402fc6abb3bf147a1abdcbcf9730 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 13 Dec 2016 11:26:53 -0500
+Subject: [PATCH 103/260] Add support for SIL instructions in DynASM.
+
+---
+ dynasm/Examples/test_z_inst.c | 19 ++++++++++++++++++-
+ dynasm/dasm_s390x.lua | 10 ++++++++++
+ 2 files changed, 28 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index d0939064e..c09ae0831 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -308,6 +308,22 @@ static void type(dasm_State *state) {
+ | br r14
+ }
+
++static void sil(dasm_State *state) {
++ dasm_State **Dst = &state;
++
++ | lay sp, -16(sp)
++ | xc 0(16, sp), 0(sp)
++ | mvghi 0(sp), 5
++ | mvhi 8(sp), 7
++ | mvhhi 12(sp), 11
++ | lghi r2, 0
++ | ag r2, 0(sp) // r2 += 5
++ | a r2, 8(sp) // r2 += 7
++ | ah r2, 12(sp) // r2 += 11
++ | la sp, 16(sp)
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -335,7 +351,8 @@ test_table test[] = {
+ // { 2,4, load_test, 4,"load_test"},
+ {-1, 0, ssa, 65535<<8, "ssa"},
+ {-1, 0, ssa_act, 65535<<8, "ssa_act"},
+- {27, 0, type, 27, "type"}
++ {27, 0, type, 27, "type"},
++ { 0, 0, sil, 23, "sil"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 60d61bd78..e3c8f2633 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -971,6 +971,9 @@ map_op = {
+ msfi_2 = "c20100000000n",
+ msgfi_2 = "c20000000000n",
+ maer_3 = "0000b32e0000r",
++ mvhhi_2 = "e54400000000SIL",
++ mvhi_2 = "e54c00000000SIL",
++ mvghi_2 = "e54800000000SIL",
+ o_2 = "000056000000j",
+ or_2 = "000000001600g",
+ oy_2 = "e30000000056l",
+@@ -1219,6 +1222,13 @@ local function parse_template(params, template, nparams, pos)
+ if d1a then d1a() end
+ wputhw(op2)
+ if d2a then d2a() end
++ elseif p == "SIL" then
++ wputhw(op0)
++ local d, b, a = parse_mem_b(params[1])
++ op1 = op1 + shl(b, 12) + d
++ wputhw(op1)
++ if a then a() end
++ parse_imm16(params[2])
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From c83f4af9cc6bb205ac6971308c39130b531bd874 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 13 Dec 2016 17:01:20 -0500
+Subject: [PATCH 104/260] Add more RI-a (register-immediate) instructions.
+
+---
+ dynasm/dasm_s390x.h | 2 +-
+ dynasm/dasm_s390x.lua | 26 +++++++++++++++++++++++++-
+ 2 files changed, 26 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index c1de35711..cebce222f 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -269,7 +269,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM16:
+- CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
++ CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the right way to handle unsigned immediates? */
+ ofs += 2;
+ b[pos++] = n;
+ break;
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index e3c8f2633..e0fb916ac 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -299,6 +299,10 @@ local function is_int32(num)
+ return -2147483648 <= num and num < 2147483648
+ end
+
++local function is_uint16(num)
++ return 0 <= num and num < 0xffff
++end
++
+ local function is_int16(num)
+ return -32768 <= num and num < 32768
+ end
+@@ -478,7 +482,7 @@ end
+ local function parse_imm16(imm)
+ local imm_val = tonumber(imm)
+ if imm_val then
+- if not is_int16(imm_val) then
++ if not is_int16(imm_val) and not is_uint16(imm_val) then
+ werror("immediate value out of range: ", imm_val)
+ end
+ wputhw(band(imm_val, 0xffff))
+@@ -581,7 +585,11 @@ map_op = {
+ ng_2 = "e30000000080l",
+ ngr_2 = "0000b9800000h",
+ nihf_2 = "c00a00000000n",
++ nihh_2 = "0000a5040000i",
++ nihl_2 = "0000a5050000i",
+ nilf_2 = "c00b00000000n",
++ nilh_2 = "0000a5060000i",
++ nill_2 = "0000a5070000i",
+ bal_2 = "000045000000j",
+ balr_2 = "000000000500g",
+ bas_2 = "00004d000000j",
+@@ -772,7 +780,11 @@ map_op = {
+ icm_3 = "0000bf000000r",
+ icmy_3 = "eb0000000081t",
+ iihf_2 = "c00800000000n",
++ iihh_2 = "0000a5000000i",
++ iihl_2 = "0000a5010000i",
+ iilf_2 = "c00900000000n",
++ iilh_2 = "0000a5020000i",
++ iill_2 = "0000a5030000i",
+ ipm_2 = "0000b2220000h",
+ iske_2 = "0000b2290000h",
+ ivsk_2 = "0000b2230000h",
+@@ -876,7 +888,11 @@ map_op = {
+ llhrl_2 = "c40200000000o",
+ llghrl_2 = "c40600000000o",
+ llihf_2 = "c00e00000000n",
++ llihh_2 = "0000a50c0000i",
++ llihl_2 = "0000a50d0000i",
+ llilf_2 = "c00f00000000n",
++ llilh_2 = "0000a50e0000i",
++ llill_2 = "0000a50f0000i",
+ llgfrl_2 = "c40e00000000o",
+ llgt_2 = "e30000000017l",
+ llgtr_2 = "0000b9170000h",
+@@ -980,7 +996,11 @@ map_op = {
+ og_2 = "e30000000081l",
+ ogr_2 = "0000b9810000h",
+ oihf_2 = "c00c00000000n",
++ oihh_2 = "0000a5080000i",
++ oihl_2 = "0000a5090000i",
+ oilf_2 = "c00d00000000n",
++ oilh_2 = "0000a50a0000i",
++ oill_2 = "0000a50b0000i",
+ pgin_2 = "0000b22e0000h",
+ pgout_2 = "0000b22f0000h",
+ pcc_2 = "0000b92c0000h",
+@@ -1099,6 +1119,10 @@ map_op = {
+ swr_2 = "000000002f00g",
+ tar_2 = "0000b24c0000h",
+ tb_2 = "0000b22c0000h",
++ tmhh_2 = "0000a7020000i",
++ tmhl_2 = "0000a7030000i",
++ tmlh_2 = "0000a7000000i",
++ tmll_2 = "0000a7010000i",
+ trace_3 = "000099000000q",
+ tracg_3 = "eb000000000fs",
+ tre_2 = "0000b2a50000h",
+
+From 7644f40b1a3cda946d733370a506e8a4b2cadd6b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 13 Dec 2016 18:31:43 -0500
+Subject: [PATCH 105/260] Add more interpreter code.
+
+Compilation is currently broken, a label is missing.
+---
+ src/vm_s390x.dasc | 323 ++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 309 insertions(+), 14 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 88fef7da9..a1a4d7695 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -34,10 +34,11 @@
+ |.define PC, r9 // Next PC.
+ |.define DISPATCH, r10 // Opcode dispatch table.
+ |.define LREG, r11 // Register holding lua_State (also in SAVE_L).
++|.define ITYPE, r13 //
+ |
+ |// The following temporaries are not saved across C calls, except for RD.
+-|.define RA, r0 // Cannot be dereferenced.
+-|.define RB, r1
++|.define RA, r1 // Cannot be dereferenced.
++|.define RB, r12
+ |.define RC, r5 // Overlaps CARG4.
+ |.define RD, r6 // Overlaps CARG5. Callee-saved.
+ |
+@@ -56,7 +57,7 @@
+ |.define CRET1, r2
+ |
+ |.define OP, r2
+-|.define TMP1, r3
++|.define TMP1, r14
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+ |.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
+@@ -144,20 +145,20 @@
+ |// Instruction decode+dispatch.
+ | // TODO: tune this, right now we always decode RA-D even if they aren't used.
+ |.macro ins_NEXT
+-| l RD, (PC)
++| llgf RD, 0(PC)
+ | // 32 63
+ | // [ B | C | A | OP ]
+ | // [ D | A | OP ]
+-| llhr RA, RD
+-| srl RA, #8
+-| llcr OP, RD
+-| srl RD, #16
+-| lr RB, RD
+-| srl RB, #8
+-| llcr RC, RD
++| llghr RA, RD
++| srlg RA, RA, 8(r0)
++| llgcr OP, RD
++| srlg RD, RD, 16(r0)
++| lgr RB, RD
++| srlg RB, RB, 8(r0)
++| llgcr RC, RD
+ | la PC, 4(PC)
+ | llgfr TMP1, OP
+-| sll TMP1, #3 // TMP1=OP*8
++| sllg TMP1, TMP1, 3(r0) // TMP1=OP*8
+ | b 0(TMP1, DISPATCH)
+ |.endmacro
+ |
+@@ -177,9 +178,89 @@
+ | .endmacro
+ |.endif
+ |
++|// Call decode and dispatch.
++|.macro ins_callt
++| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
++| lg PC, LFUNC:RB->pc
++| llgf RA, 0(PC) // TODO: combine loads?
++| llgcr OP, RA
++| sllg TMP1, OP, 3(r0)
++| la PC, 4(PC)
++| lg TMP1, 0(TMP1, DISPATCH)
++| br TMP1
++|.endmacro
++|
++|.macro ins_call
++| // BASE = new base, RB = LFUNC, RD = nargs+1
++| stg PC, -8(BASE)
++| ins_callt
++|.endmacro
++|
+ |// Assumes DISPATCH is relative to GL.
+ #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+ #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
++|
++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
++|
++|//-----------------------------------------------------------------------
++|
++|// Macros to clear or set tags.
++|.macro cleartp, reg; sllg reg, reg, 17(r0); srlg reg, reg, 17(r0); .endmacro // TODO: use nihf instead? would introduce dependence on z9-109.
++|.macro settp, reg, tp
++| oihh reg, ((tp>>1) &0xffff)
++| oihl reg, ((tp<<15)&0x8000)
++|.endmacro
++|.macro setint, reg
++| settp reg, LJ_TISNUM
++|.endmacro
++|
++|// Macros to test operand types.
++|.macro checktp_nc, reg, tp, target
++| srag ITYPE, reg, 47(r0)
++| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
++| jne target
++|.endmacro
++|.macro checktp, reg, tp, target
++| srag ITYPE, reg, 47(r0)
++| cleartp reg
++| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
++| jne target
++|.endmacro
++|.macro checktptp, src, tp, target
++| srag ITYPE, src, 47(r0)
++| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
++| jne target
++|.endmacro
++|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
++|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
++|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
++|
++|.macro checknumx, reg, target, jump
++| srag ITYPE, reg, 47(r0)
++| cghi ITYPE, LJ_TISNUM // Sign extend LJ_TISNUM tp from 16- to 64-bits.
++| jump target
++|.endmacro
++|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
++|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
++|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro
++|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro
++|.macro checknumber, src, target; checknumx src, target, jh; .endmacro
++|
++|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47)
++|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47)
++|
++|.define PC_OP, -4(PC)
++|.define PC_RA, -3(PC)
++|.define PC_RB, -1(PC)
++|.define PC_RC, -2(PC)
++|.define PC_RD, -2(PC)
++|
++|// Set current VM state.
++|.macro set_vmstate, st
++| lghi TMP1, ~LJ_VMST_..st
++| stg TMP1, DISPATCH_GL(vmstate)(DISPATCH)
++|.endmacro
++|
+
+ /* Generate subroutines used by opcodes and other parts of the VM. */
+ /* The .code_sub section should be last to help static branch prediction. */
+@@ -192,10 +273,58 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
++ | cghi PC, 0
++ | je ->cont_dispatch
++ |
++ | // Return from pcall or xpcall fast func.
++ | nill PC, -7
++ | sgr BASE, PC // Restore caller base.
++ | lay RA, -8(RA, PC) // Rebase RA and prepend one result.
++ | lg PC, -8(BASE) // Fetch PC of previous frame.
++ | // Prepending may overwrite the pcall frame, so do it at the end.
++ | load_true ITYPE
++ | stg ITYPE, 0(RA, BASE) // Prepend true to results.
+ |
+ |->vm_returnc:
++ | ahi RD, 1 // RD = nresults+1
++ | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
++ | stg RD, SAVE_MULTRES
++ | tmll PC, FRAME_TYPE
++ | je ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
++ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
++ | lghi TMP1, FRAME_C
++ | xgr PC, TMP1
++ | tmll PC, FRAME_TYPE
++ | jne ->vm_returnp
++ |
++ | // Return to C.
++ | set_vmstate C
++ | nill PC, -8
++ | sgr PC, BASE
++ | lcgr PC, PC // Previous base = BASE - delta.
++ |
++ | ahi RD, -1
++ | je >2
++ |1: // Move results down.
++ | lg RB, 0(BASE, RA)
++ | stg RB, -16(BASE)
++ | la BASE, 8(BASE)
++ | ahi RD, -1
++ | jne <1
++ |2:
++ | lg L:RB, SAVE_L
++ | stg PC, L:RB->base
++ |3:
++ | lg RD, SAVE_MULTRES
++ | lg RA, SAVE_NRES // RA = wanted nresults+1
++ |4:
++ | cgr RA, RD
++ | jne >6 // More/less results wanted?
++ |5:
++ | lay BASE, -16(BASE)
++ | stg BASE, L:RB->top
+ |
+ |->vm_leave_cp:
+ | lg RA, SAVE_CFRAME // Restore previous C frame.
+@@ -206,7 +335,40 @@ static void build_subroutines(BuildCtx *ctx)
+ | restoreregs
+ | br r14
+ |
++ |6:
++ | jl >7 // Less results wanted?
++ | // More results wanted. Check stack size and fill up results with nil.
++ | cg BASE, L:RB->maxstack
++ | jh >8
++ | lghi TMP1, LJ_TNIL
++ | stg TMP1, -16(BASE)
++ | la BASE, 8(BASE)
++ | aghi RD, 1
++ | j <4
++ |
++ |7: // Fewer results wanted.
++ | cghi RA, 0
++ | je <5 // But check for LUA_MULTRET+1.
++ | sgr RA, RD // Negative result!
++ | sllg TMP1, RA, 3(r0)
++ | lay BASE, 0(TMP1, BASE) // Correct top.
++ | j <5
++ |
++ |8: // Corner case: need to grow stack for filling up results.
++ | // This can happen if:
++ | // - A C function grows the stack (a lot).
++ | // - The GC shrinks the stack in between.
++ | // - A return back from a lua_call() with (high) nresults adjustment.
++ | stg BASE, L:RB->top // Save current top held in BASE (yes).
++ | stg RD, SAVE_MULTRES // Need to fill only remainder with nil.
++ | lgr CARG2, RA
++ | lgr CARG1, L:RB
++ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
++ | lg BASE, L:RB->top // Need the (realloced) L->top in BASE.
++ | j <3
++ |
+ |->vm_unwind_yield:
++ | stg r0, 0(r0)
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+@@ -219,6 +381,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
++ | stg r0, 0(r0)
+ |
+ |->vm_growstack_v: // Grow stack for vararg Lua function.
+ |
+@@ -235,9 +398,26 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ |
++ |2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
++ | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
++ | set_vmstate INTERP
++ | lg BASE, L:LREG->base // BASE = old base (used in vmeta_call).
++ | agr PC, RA
++ | sgr PC, BASE // PC = frame delta + frame type
++ |
++ | lg RD, L:LREG->top
++ | sgr RD, RA
++ | srlg NARGS:RD, NARGS:RD, 3(r0) // TODO: support '3' on its own in dynasm.
++ | aghi NARGS:RD, 1 // RD = nargs+1
++ |
+ |->vm_call_dispatch:
++ | lg LFUNC:RB, -16(RA)
++ | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
+ |
+ |->vm_call_dispatch_f:
++ | lgr BASE, RA
++ | ins_call
++ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+@@ -264,7 +444,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | // TValue * (new base) or NULL returned in r2 (CRET1/).
+ | cghi CRET1, 0
+ | je ->vm_leave_cp // No base? Just remove C frame.
+- | stg r0, 0(r0)
++ | lgr RA, CRET1
++ | lghi PC, FRAME_CP
++ | j <2 // Else continue with the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+@@ -690,7 +872,84 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISNEXT:
+ case BC_VARG:
+ case BC_RETM:
++ | stg r0, 0(r0) // not implemented
++ break;
++
+ case BC_RET: case BC_RET0: case BC_RET1:
++ | ins_AD // RA = results, RD = nresults+1
++ if (op != BC_RET0) {
++ | sllg RA, RA, 3(r0)
++ }
++ |1:
++ | lg PC, -8(BASE)
++ | stg RD, SAVE_MULTRES // Save nresults+1.
++ | tmll PC, FRAME_TYPE // Check frame type marker.
++ | jne >7 // Not returning to a fixarg Lua func?
++ switch (op) {
++ case BC_RET:
++ |->BC_RET_Z:
++ | lgr KBASE, BASE // Use KBASE for result move.
++ | aghi RD, -1
++ | je >3
++ |2: // Move results down.
++ | lg RB, 0(KBASE, RA)
++ | stg RB, -16(KBASE)
++ | la KBASE, 8(KBASE)
++ | // TODO: replace with brctg RD, <2 once supported.
++ | aghi RD, -1
++ | jne <2
++ |3:
++ | lg RD, SAVE_MULTRES // Note: MULTRES may be >255.
++ | llgc RB, PC_RB
++ |5:
++ | cgr RB, RD // More results expected?
++ | jh >6
++ break;
++ case BC_RET1:
++ | lg RB, 0(BASE, RA)
++ | stg RB, -16(BASE)
++ /* fallthrough */
++ case BC_RET0:
++ |5:
++ | llgc TMP1, PC_RB
++ | cgr TMP1, RD
++ | jh >6
++ default:
++ break;
++ }
++ | llgc RA, PC_RA
++ | lcgr RA, RA
++ | sllg RA, RA, 3(r0)
++ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
++ | lg LFUNC:KBASE, -16(BASE)
++ | cleartp LFUNC:KBASE
++ | lg KBASE, LFUNC:KBASE->pc
++ | lg KBASE, PC2PROTO(k)(KBASE)
++ | ins_next
++ |
++ |6: // Fill up results with nil.
++ | lghi TMP1, LJ_TNIL
++ if (op == BC_RET) {
++ | stg TMP1, -16(KBASE) // Note: relies on shifted base.
++ | la KBASE, 8(KBASE)
++ } else {
++ | sllg RC, RD, 3(r0) // RC used as temp.
++ | stg TMP1, -24(RC, BASE)
++ }
++ | la RD, 1(RD)
++ | j <5
++ |
++ |7: // Non-standard return case.
++ | lay RB, -FRAME_VARG(PC)
++ | tmll RB, FRAME_TYPEP
++ | jne ->vm_return
++ | // Return from vararg function: relocate BASE down and RA up.
++ | sgr BASE, RB
++ if (op != BC_RET0) {
++ | agr RA, RB
++ }
++ | j <1
++ break;
+ case BC_FORL:
+ case BC_JFORI:
+ case BC_JFORL:
+@@ -709,9 +968,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_IFUNCF:
+ case BC_JFUNCV:
+ case BC_IFUNCV:
++ | lg r0, 0(r0) // Not implemented, seg fault.
++ break;
++
+ case BC_FUNCC:
+ case BC_FUNCCW:
+- | lg r0, 0(r0) // Not implemented, seg fault.
++ | ins_AD // BASE = new base, RD = nargs+1
++ | lg CFUNC:RB, -16(BASE)
++ | cleartp CFUNC:RB
++ | lg KBASE, CFUNC:RB->f
++ | lg L:RB, SAVE_L
++ | sllg RD, NARGS:RD, 3(r0)
++ | lay RD, -8(RD,BASE)
++ | stg BASE, L:RB->base
++ | lay RA, (8*LUA_MINSTACK)(RD)
++ | cg RA, L:RB->maxstack
++ | stg RD, L:RB->top
++ | lgr CARG1, L:RB // Caveat: CARG1 may be RA.
++ if (op != BC_FUNCC) {
++ | lgr CARG2, KBASE
++ }
++ | jh ->vm_growstack_c // Need to grow stack.
++ | set_vmstate C
++ if (op == BC_FUNCC) {
++ | basr r14, KBASE // (lua_State *L)
++ } else {
++ | // (lua_State *L, lua_CFunction f)
++ | lg TMP1, (DISPATCH_GL(wrapf))(DISPATCH)
++ | basr r14, TMP1 // TODO: TMP1==r14, is this ok?
++ }
++ | // nresults returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
++ | set_vmstate INTERP
++ | sllg TMP1, RD, 3(r0)
++ | la RA, 0(TMP1, BASE)
++ | lcgr RA, RA
++ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
++ | lg PC, -8(BASE) // Fetch PC of caller.
++ | j ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+From fc5874c951db1c89152d50df7350e6c83569d38d Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 12:22:08 +0530
+Subject: [PATCH 106/260] Added RRF-e support
+
+---
+ dynasm/dasm_s390x.lua | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index e0fb916ac..da59ff9b9 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -494,6 +494,24 @@ local function parse_imm16(imm)
+ end
+ end
+
++local function parse_mask(arg)
++ local m3 = parse_number(arg)
++ if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
++ return m3
++ else
++ werror("Mask value should be 0,1 or 3-7: ", m3)
++ end
++end
++
++local function parse_mask2(arg)
++ local m4 = parse_number(arg)
++ if ( m4 >=0 and m4 <=1) then
++ return m4
++ else
++ werror("Mask value should be 0 or 1: ", m4)
++ end
++end
++
+ local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+@@ -1144,6 +1162,8 @@ map_op = {
+ unpku_2 = "e20000000000SS-a",
+ xc_2 = "d70000000000SS-a",
+ ap_2 = "fa0000000000SS-b",
++ cfebr_3 = "0000b3980000RRF-e",
++ cfebra_4 = "0000b3980000RRF-e",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1253,6 +1273,13 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1)
+ if a then a() end
+ parse_imm16(params[2])
++ elseif p == "RRF-e" then
++ wputhw(op1)
++ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[1]),12) + parse_reg(params[3])
++ if params[4] then
++ op2 = op2 + shl(parse_mask2(params[4]),8)
++ end
++ wputhw(op2)
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From 2f96ca3d9105b29a6477838c08c854a18da4ba16 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 12:29:46 +0530
+Subject: [PATCH 107/260] Adding support for RXE mode instructions
+
+---
+ dynasm/dasm_s390x.lua | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index da59ff9b9..8d30c93b4 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1162,8 +1162,11 @@ map_op = {
+ unpku_2 = "e20000000000SS-a",
+ xc_2 = "d70000000000SS-a",
+ ap_2 = "fa0000000000SS-b",
++ -- RRF-e instructions
+ cfebr_3 = "0000b3980000RRF-e",
+ cfebra_4 = "0000b3980000RRF-e",
++ -- RXE instructions
++ sqdb_2 = "ed0000000015RXE",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1280,6 +1283,15 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(parse_mask2(params[4]),8)
+ end
+ wputhw(op2)
++ elseif p == "RXE" then
++ local d, x, b, a = parse_mem_bx(params[2])
++ op0 = op0 + shl(parse_reg(params[1]), 4) + x
++ op1 = op1 + shl(b, 12) + d
++ -- m3 is not present, so assumed its not part of the instruction since its not passed as a prameter
++ wputhw(op0);
++ wputhw(op1);
++ if a then a() end
++ wputhw(op2);
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From 45669fecef45695a68481d3298b4c2d81e2afb40 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 12:34:11 +0530
+Subject: [PATCH 108/260] Added RRF-b mode support
+
+---
+ dynasm/dasm_s390x.lua | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 8d30c93b4..e0c47331e 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1167,6 +1167,8 @@ map_op = {
+ cfebra_4 = "0000b3980000RRF-e",
+ -- RXE instructions
+ sqdb_2 = "ed0000000015RXE",
++ -- RRF-b instructions
++ didbr_4 = "0000b3580000RRF-b",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1292,6 +1294,10 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1);
+ if a then a() end
+ wputhw(op2);
++ elseif p == "RRF-b" then
++ wputhw(op1);
++ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + parse_mask(params[4])
++ wputhw(op2)
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From bc490013a332a2f5701563c8e5252b84f5fcae7d Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 13:08:32 +0530
+Subject: [PATCH 109/260] Adding S mode instructions support
+
+---
+ dynasm/dasm_s390x.lua | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index e0c47331e..9ef15ad9b 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1166,9 +1166,11 @@ map_op = {
+ cfebr_3 = "0000b3980000RRF-e",
+ cfebra_4 = "0000b3980000RRF-e",
+ -- RXE instructions
+- sqdb_2 = "ed0000000015RXE",
++ sqdb_2 = "ed0000000015RXE",
+ -- RRF-b instructions
+- didbr_4 = "0000b3580000RRF-b",
++ didbr_4 = "0000b3580000RRF-b",
++ -- S mode instructions
++ stfl_1 = "0000b2b10000sS",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1298,6 +1300,12 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1);
+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + parse_mask(params[4])
+ wputhw(op2)
++ elseif p =="sS" then
++ wputhw(op1);
++ local d, b, a = parse_mem_b(params[1])
++ op2 = op2 + shl(b,12) + d;
++ wputhw(op2)
++ if a then a() end
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From 52ab0596dafeca6c680e3688a6303b2804ced6f2 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 13:16:44 +0530
+Subject: [PATCH 110/260] Added support for I mode instructions
+
+---
+ dynasm/dasm_s390x.lua | 27 +++++++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 9ef15ad9b..c2deaaa99 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -494,8 +494,20 @@ local function parse_imm16(imm)
+ end
+ end
+
+-local function parse_mask(arg)
+- local m3 = parse_number(arg)
++local function parse_imm8(imm)
++ local imm_val = tonumber(imm)
++ if imm_val then
++ if not is_int8(imm_val) then
++ werror("Immediate value out of range: ", imm_val)
++ end
++ else
++ iact = function() waction("IMM8",nil,imm) end
++ end
++ return imm_val, iact
++end
++
++local function parse_mask(mask)
++ local m3 = parse_number(mask)
+ if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
+ return m3
+ else
+@@ -503,8 +515,8 @@ local function parse_mask(arg)
+ end
+ end
+
+-local function parse_mask2(arg)
+- local m4 = parse_number(arg)
++local function parse_mask2(mask)
++ local m4 = parse_number(mask)
+ if ( m4 >=0 and m4 <=1) then
+ return m4
+ else
+@@ -1171,6 +1183,8 @@ map_op = {
+ didbr_4 = "0000b3580000RRF-b",
+ -- S mode instructions
+ stfl_1 = "0000b2b10000sS",
++ -- I- mdoe instructions
++ svc_1 = "000000000a00iI",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1306,6 +1320,11 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b,12) + d;
+ wputhw(op2)
+ if a then a() end
++ elseif p =="iI" then
++ local imm_val, a = parse_imm8(params[1])
++ op2 = op2 + imm_val;
++ wputhw(op2);
++ if a then a() end
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From cdb31062b775b56da0115993cd9f9a374f1dec73 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 13:24:44 +0530
+Subject: [PATCH 111/260] Added the action part for I mode
+
+---
+ dynasm/dasm_s390x.lua | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index c2deaaa99..a25cc9681 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
+ local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+- "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM16", "IMM32", "LEN8R","LEN4HR","LEN4LR",
++ "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM8", "IMM16", "IMM32", "LEN8R","LEN4HR","LEN4LR",
+ }
+
+ -- Maximum number of section buffer positions for dasm_put().
+@@ -307,6 +307,10 @@ local function is_int16(num)
+ return -32768 <= num and num < 32768
+ end
+
++local function is_int8(num)
++ return -128 <= num and num < 128
++end
++
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+ -- If x is not specified then it is 0.
+ local function split_memop(arg)
+
+From 4641b9a42dea1912952a2ba5115134f7c9d2639d Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 13:30:49 +0530
+Subject: [PATCH 112/260] Added C support for I mode instructions
+
+---
+ dynasm/dasm_s390x.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index cebce222f..b98df8fd3 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -23,7 +23,7 @@ enum {
+ /* The following actions also have an argument. */
+ DASM_REL_PC, DASM_LABEL_PC,
+ DASM_DISP12, DASM_DISP20,
+- DASM_IMM16, DASM_IMM32,
++ DASM_IMM8, DASM_IMM16, DASM_IMM32,
+ DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR,
+ DASM__MAX
+ };
+@@ -268,6 +268,9 @@ void dasm_put(Dst_DECL, int start, ...)
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
++ case DASM_IMM8:
++ b[pos++] = n;
++ break;
+ case DASM_IMM16:
+ CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the right way to handle unsigned immediates? */
+ ofs += 2;
+@@ -370,6 +373,7 @@ int dasm_link(Dst_DECL, size_t * szp)
+ p++;
+ b[pos++] += ofs;
+ break;
++ case DASM_IMM8:
+ case DASM_IMM16:
+ case DASM_IMM32:
+ case DASM_DISP20:
+@@ -457,6 +461,9 @@ int dasm_encode(Dst_DECL, void *buffer)
+ break;
+ case DASM_LABEL_PC:
+ break;
++ case DASM_IMM8:
++ cp[-1] |= n & 0xff;
++ break;
+ case DASM_IMM16:
+ *cp++ = n;
+ break;
+
+From a8244c02ecb37eb67fe288680a0ad8f1ee3fe278 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 13:41:55 +0530
+Subject: [PATCH 113/260] Added support for RI-b and RI-c mode instructions
+
+---
+ dynasm/dasm_s390x.lua | 16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index a25cc9681..fe6d6a30d 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1187,8 +1187,12 @@ map_op = {
+ didbr_4 = "0000b3580000RRF-b",
+ -- S mode instructions
+ stfl_1 = "0000b2b10000sS",
+- -- I- mdoe instructions
++ -- I- mode instructions
+ svc_1 = "000000000a00iI",
++ -- RI-b mode instructions
++ bras_2 = "0000a7050000RI-b",
++ -- RI-c mode instructions
++ brc_2 = "0000a7040000RI-c",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1329,6 +1333,16 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + imm_val;
+ wputhw(op2);
+ if a then a() end
++ elseif p == "RI-b" then
++ op1 = op1 + shl(parse_reg(params[1]),4)
++ wputhw(op1)
++ local mode, n, s = parse_label(params[2])
++ waction("REL_"..mode, n, s)
++ elseif p == "RI-c" then
++ op1 = op1 + shl(parse_num(params[1]),4)
++ wputhw(op1)
++ local mode, n, s = parse_label(params[2])
++ waction("REL_"..mode, n, s)
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From f01f45957395ef7969ef0095f22067196c042b20 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 13:51:58 +0530
+Subject: [PATCH 114/260] Added support for RIL-c and RX-b instructions
+
+---
+ dynasm/dasm_s390x.lua | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index fe6d6a30d..e0deef169 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1192,7 +1192,11 @@ map_op = {
+ -- RI-b mode instructions
+ bras_2 = "0000a7050000RI-b",
+ -- RI-c mode instructions
+- brc_2 = "0000a7040000RI-c",
++ brc_2 = "0000a7040000RI-c",
++ -- RIL-c
++ brcl_2 = "c00400000000RIL-c"
++ -- RX-b mode instructions
++ bc_2 = "000047000000RX-b",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1343,6 +1347,17 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1)
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
++ elseif p == "RIL-c" then
++ op0 = op0 + shl(parse_num(params[1]),4)
++ wputhhw(op0)
++ local mode, n, s = parse_label(params[2])
++ waction("REL_"..mode, n, s)
++ elseif p == "RX-b" then
++ local d, x, b, a = parse_mem_bx(params[2])
++ op1 = op1 + shl(parse_num(params[1]), 4) + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1);wputhw(op2);
++ if a then a() end
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From ccd26149026a2ce1d236fd79a072e373d46cd691 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 14:18:07 +0530
+Subject: [PATCH 115/260] Added support for RIE-e, RSI, RXF, SI instructions
+
+---
+ dynasm/dasm_s390x.lua | 36 ++++++++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index e0deef169..9f37bf4e9 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1197,6 +1197,14 @@ map_op = {
+ brcl_2 = "c00400000000RIL-c"
+ -- RX-b mode instructions
+ bc_2 = "000047000000RX-b",
++ -- RSI
++ brxh_3 = "000084000000RSI",
++ -- RIE-e
++ brxhg_3 = "ec0000000044RIE-e",
++ -- SI
++ ni_2 = "000094000000SI",
++ -- RXF
++ madb_3 = "ed000000001eRXF",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1358,6 +1366,34 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1);wputhw(op2);
+ if a then a() end
++ elseif p == "RSI" then
++ op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ wputhw(op1)
++ local mode, n, s = parse_label(params[3])
++ waction("REL_"..mode, n, s)
++ elseif p == "RIE-e" then
++ op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ wputhw1(op0)
++ local mode, n, s = parse_label(params[3])
++ waction("REL_"..mode, n, s)
++ wputhw(op2)
++ elseif p == "SI" then
++ local imm_val, a = parse_imm8(params[2])
++ op1 = op1 + imm_val
++ wputhw(op1)
++ if a then a() end
++ local d, b, a = parse_mem_b(params[1])
++ op2 = op2 + shl(b,12) + d
++ wputhw(op2)
++ if a then a() end
++ elseif p == "RXF" then
++ local d, x, b, a = parse_mem_bx(params[3])
++ op0 = op0 + shl(parse_reg(params[2]),4) + x
++ op1 = op1 + shl(b, 12) + d
++ wputhw(op0); wputhw(op1);
++ if a then a() end
++ op2 = op2 + shl(parse_reg(params[1]),12)
++ wputhw(op2)
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From 52368ac00539bada4b2c21cca68734e3c8c346f5 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 14:47:15 +0530
+Subject: [PATCH 116/260] Minor cleanup
+
+---
+ dynasm/dasm_s390x.lua | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 9f37bf4e9..5a79a9667 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1194,7 +1194,7 @@ map_op = {
+ -- RI-c mode instructions
+ brc_2 = "0000a7040000RI-c",
+ -- RIL-c
+- brcl_2 = "c00400000000RIL-c"
++ brcl_2 = "c00400000000RIL-c",
+ -- RX-b mode instructions
+ bc_2 = "000047000000RX-b",
+ -- RSI
+
+From df7c3245e055bc25fc14a1771e82e8de04580f66 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 17:45:32 +0530
+Subject: [PATCH 117/260] Minor Fix, correct the parameter used
+
+Instead of params[2] , params[1] was used, corrected it.
+---
+ dynasm/dasm_s390x.lua | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 5a79a9667..db5aa892f 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1316,7 +1316,7 @@ local function parse_template(params, template, nparams, pos)
+ parse_imm16(params[2])
+ elseif p == "RRF-e" then
+ wputhw(op1)
+- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[1]),12) + parse_reg(params[3])
++ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) + parse_reg(params[3])
+ if params[4] then
+ op2 = op2 + shl(parse_mask2(params[4]),8)
+ end
+
+From d63ff89c548660f0bf42fcebb8282a292e175a50 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 19:07:05 +0530
+Subject: [PATCH 118/260] Added support for RRD addressing mode
+
+We may not require RRD mode but, Added to check working of RRF-e.
+---
+ dynasm/dasm_s390x.lua | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index db5aa892f..fc65b810f 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1205,6 +1205,8 @@ map_op = {
+ ni_2 = "000094000000SI",
+ -- RXF
+ madb_3 = "ed000000001eRXF",
++ --RRD
++ maebr_3 = "0000b30e0000RRD",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1394,6 +1396,10 @@ local function parse_template(params, template, nparams, pos)
+ if a then a() end
+ op2 = op2 + shl(parse_reg(params[1]),12)
+ wputhw(op2)
++ elseif p == "RRD" then
++ wputhw(op1)
++ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
++ wputhw(op2)
+ elseif p == "w" then
+ local mode, n, s = parse_label(params[1])
+ wputhw(op1)
+
+From 49182c4d2edfca5a154c7d03682dbeab8ef2c96d Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 14 Dec 2016 19:14:10 +0530
+Subject: [PATCH 119/260] Added test for RRD and RRF-e
+
+Also have modified the function which can handle 3 arguments now
+---
+ dynasm/Examples/test_z_inst.c | 57 +++++++++++++++++++++--------------
+ 1 file changed, 35 insertions(+), 22 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index c09ae0831..42a4674f7 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -324,35 +324,48 @@ static void sil(dasm_State *state) {
+ | br r14
+ }
+
++static void rrfe_rrd(dasm_State *state) {
++ dasm_State ** Dst = &state;
++
++ | cefbr f0,r2
++ | cefbr f2,r3
++ | cefbr f4,r4
++ | maebr f0 ,f2 ,f4
++ | cfebr r2, 0, f0
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
++ int64_t arg3;
+ void (*fn)(dasm_State *);
+ int64_t want;
+ const char *testname;
+ } test_table;
+
+ test_table test[] = {
+- { 1, 2, add, 3, "add"},
+- {10, 5, sub, 5, "sub"},
+- { 2, 3, mul, 6, "mul"},
+- { 5, 7, rx, 12298, "rx"},
+- { 5, 7, rxy, 10, "rxy"},
+- { 2, 4, lab, 32, "lab"},
+- { 2, 4, labg, 32, "labg"},
+- { 2, 0, add_imm16, 17, "imm16"},
+- { 2, 0, add_imm32, 16, "imm32"},
+- { 7, 3, save, 480, "save"},
+- { 7, 3, labmul, 21, "labmul0"},
+- { 7, 0, labmul, 0, "labmul1"},
+- { 0, 0, pc, 55, "pc"},
+- { 2,12, jmp_fwd, 12, "jmp_fwd"},
+-// { 9,8, add_rrd, 25, "add_rrd"},
+-// { 2,4, load_test, 4,"load_test"},
+- {-1, 0, ssa, 65535<<8, "ssa"},
+- {-1, 0, ssa_act, 65535<<8, "ssa_act"},
+- {27, 0, type, 27, "type"},
+- { 0, 0, sil, 23, "sil"}
++ { 1, 2, 0, add, 3, "add"},
++ {10, 5, 0, sub, 5, "sub"},
++ { 2, 3, 0, mul, 6, "mul"},
++ { 5, 7, 0, rx, 12298, "rx"},
++ { 5, 7, 0, rxy, 10, "rxy"},
++ { 2, 4, 0, lab, 32, "lab"},
++ { 2, 4, 0, labg, 32, "labg"},
++ { 2, 0, 0, add_imm16, 17, "imm16"},
++ { 2, 0, 0, add_imm32, 16, "imm32"},
++ { 7, 3, 0, save, 480, "save"},
++ { 7, 3, 0, labmul, 21, "labmul0"},
++ { 7, 0, 0, labmul, 0, "labmul1"},
++ { 0, 0, 0, pc, 55, "pc"},
++ { 2,12, 0, jmp_fwd, 12, "jmp_fwd"},
++// { 9,8, 0, add_rrd, 25, "add_rrd"},
++// { 2,4, 0, load_test, 4,"load_test"},
++ {-1, 0, 0, ssa, 65535<<8, "ssa"},
++ {-1, 0, 0, ssa_act, 65535<<8, "ssa_act"},
++ {27, 0, 0, type, 27, "type"},
++ { 0, 0, 0, sil, 23, "sil"},
++ {15,3,10, rrfe_rrd, 45, "rrfe_rrd"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+@@ -378,8 +391,8 @@ int main(int argc, char *argv[])
+ dasm_setup(&state, actions);
+ test[i].fn(state);
+ size_t size;
+- int64_t (*fptr)(int64_t, int64_t) = jitcode(&state, &size);
+- int64_t got = fptr(test[i].arg1, test[i].arg2);
++ int64_t (*fptr)(int64_t, int64_t, int64_t) = jitcode(&state, &size);
++ int64_t got = fptr(test[i].arg1, test[i].arg2, test[i].arg3);
+
+ if (got != test[i].want) {
+ fprintf(stderr, "FAIL: test %s: want %ld, got %ld\n", test[i].testname, test[i].want, got);
+
+From ef3ff100f28f3526c89da01cef0a9c90396ebe4e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 14 Dec 2016 16:27:38 -0500
+Subject: [PATCH 120/260] Fix decoding of REL_EXT.
+
+REL_EXT has an argument which wasn't being properly jumped,
+resulting in an early STOP (because the argument is 0).
+---
+ dynasm/dasm_s390x.h | 16 +++++++++++-----
+ dynasm/dasm_s390x.lua | 3 ++-
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
+index b98df8fd3..ea3aa8053 100644
+--- a/dynasm/dasm_s390x.h
++++ b/dynasm/dasm_s390x.h
+@@ -214,12 +214,17 @@ void dasm_put(Dst_DECL, int start, ...)
+ ofs += 2;
+ break;
+ case DASM_REL_EXT:
++ p++;
++ ofs += 4;
+ break;
+ case DASM_ALIGN:
+ ofs += *p++;
+ b[pos++] = ofs;
+ break;
+ case DASM_REL_LG:
++ if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
++ ofs += 2;
++ }
+ n = *p++ - 10;
+ pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+@@ -234,6 +239,9 @@ void dasm_put(Dst_DECL, int start, ...)
+ n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
++ if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
++ ofs += 2;
++ }
+ pl = D->pclabels + n;
+ CKPL(pc, PC);
+ putrel:
+@@ -246,9 +254,6 @@ void dasm_put(Dst_DECL, int start, ...)
+ *pl = pos;
+ }
+ ofs += 2;
+- if (p[-3] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
+- ofs += 2;
+- }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+@@ -359,6 +364,7 @@ int dasm_link(Dst_DECL, size_t * szp)
+ p++;
+ break;
+ case DASM_REL_EXT:
++ p++;
+ break;
+ case DASM_ALIGN:
+ ofs -= (b[pos++] + ofs) & *p++;
+@@ -430,7 +436,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ *cp++ = *p++;
+ break;
+ case DASM_REL_EXT:
+- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
++ n = DASM_EXTERN(Dst, (unsigned char *)cp, *p++, 1) - 4;
+ goto patchrel;
+ case DASM_ALIGN:
+ ins = *p++;
+@@ -443,6 +449,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
++ p++; /* skip argument */
+ patchrel:
+ /* Offsets are halfword aligned (so need to be halved). */
+ n += 2; /* Offset is relative to start of instruction. */
+@@ -452,7 +459,6 @@ int dasm_encode(Dst_DECL, void *buffer)
+ CK(-(1 << 16) <= n && n < (1 << 16) && (n & 1) == 0, RANGE_LG);
+ }
+ *cp++ = n >> 1;
+- p++; /* skip argument */
+ break;
+ case DASM_LABEL_LG:
+ ins = *p++;
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index fc65b810f..556cfec11 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -79,7 +79,8 @@ local function havearg(a)
+ return a == "ESC" or
+ a == "SECTION" or
+ a == "REL_LG" or
+- a == "LABEL_LG"
++ a == "LABEL_LG" or
++ a == "REL_EXT"
+ end
+
+ -- Write action list buffer as a huge static C array.
+
+From 00d00e995fdfe4a51c0551e5792d3e1545084665 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 14 Dec 2016 16:31:52 -0500
+Subject: [PATCH 121/260] Fix extern handling in host vm builder.
+
+---
+ src/host/buildvm_asm.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
+index 3fd034c08..074436875 100644
+--- a/src/host/buildvm_asm.c
++++ b/src/host/buildvm_asm.c
+@@ -118,8 +118,8 @@ static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
+ int opcode = *(uint16_t*)(&cp[n]);
+ int arg = (opcode>>4) & 0xf;
+ switch (opcode & 0xff0f) {
+- case 0xa705: opname = "bras"; argt = "r"; break;
+- case 0xc005: opname = "brasl"; argt = "r"; break;
++ case 0xa705: opname = "bras"; argt = "%r"; break;
++ case 0xc005: opname = "brasl"; argt = "%r"; break;
+ case 0xa704: opname = "brc"; break;
+ case 0xc004: opname = "brcl"; break;
+ default:
+@@ -355,7 +355,7 @@ void emit_asm(BuildCtx *ctx)
+ ofs += n+4;
+ #elif LJ_TARGET_S390X
+ emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+- ofs += n;
++ ofs += n+4;
+ #else
+ emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+ ofs += n;
+
+From 24f2ab48f557f2bc07b527ac3c1b4aa612808fb6 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 14 Dec 2016 18:43:21 -0500
+Subject: [PATCH 122/260] Various fixes for vm_s390x.dasc.
+
+---
+ src/vm_s390x.dasc | 38 ++++++++++++++++++++++++++++++++------
+ 1 file changed, 32 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index a1a4d7695..803184bfd 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -249,11 +249,11 @@
+ |.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47)
+ |.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47)
+ |
+-|.define PC_OP, -4(PC)
+-|.define PC_RA, -3(PC)
+-|.define PC_RB, -1(PC)
+-|.define PC_RC, -2(PC)
+-|.define PC_RD, -2(PC)
++|.define PC_OP, -1(PC)
++|.define PC_RA, -2(PC)
++|.define PC_RB, -4(PC)
++|.define PC_RC, -3(PC)
++|.define PC_RD, -4(PC)
+ |
+ |// Set current VM state.
+ |.macro set_vmstate, st
+@@ -381,12 +381,37 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+- | stg r0, 0(r0)
++ | lghi CARG2, LUA_MINSTACK
++ | j >2
+ |
+ |->vm_growstack_v: // Grow stack for vararg Lua function.
++ | aghi RD, -16 // LJ_FR2
++ | j >1
+ |
+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
++ | sllg RD, NARGS:RD, 3(r0)
++ | lay RD, -8(RD, BASE)
++ |1:
++ | llgc RA, (PC2PROTO(framesize)-4)(PC)
++ | la PC, 4(PC) // Must point after first instruction.
++ | stg BASE, L:RB->base
++ | stg RD, L:RB->top
++ | stg PC, SAVE_PC
++ | lgr CARG2, RA
++ |2:
++ | // RB = L, L->base = new base, L->top = top
++ | lgr CARG1, L:RB
++ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
++ | lg BASE, L:RB->base
++ | lg RD, L:RB->top
++ | lg LFUNC:RB, -16(BASE)
++ | cleartp LFUNC:RB
++ | sgr RD, BASE
++ | srlg RD, RD, 3(r0)
++ | aghi NARGS:RD, 1
++ | // BASE = new base, RB = LFUNC, RD = nargs+1
++ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+@@ -998,6 +1023,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | basr r14, TMP1 // TODO: TMP1==r14, is this ok?
+ }
+ | // nresults returned in r2 (CRET1).
++ | lgr RD, CRET1
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+
+From f79a6f3f0b2eb6cd8576c0a1c2d6faf7d172e1f1 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 14 Dec 2016 21:16:30 -0500
+Subject: [PATCH 123/260] Add support for clm instruction.
+
+---
+ dynasm/Examples/test_z_inst.c | 21 ++++++++++++++++++++-
+ dynasm/dasm_s390x.lua | 11 ++++++++++-
+ 2 files changed, 30 insertions(+), 2 deletions(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 42a4674f7..4820c5704 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -335,6 +335,24 @@ static void rrfe_rrd(dasm_State *state) {
+ | br r14
+ }
+
++static void rsb(dasm_State *state) {
++ dasm_State **Dst = &state;
++
++ | lay sp, -4(sp)
++ | lghi r3, 0x0706
++ | lghi r4, 0
++ | iill r4, 6
++ | iilh r4, 7
++ | st r4, 0(sp)
++ | lghi r2, 0
++ | clm r3, 5, 0(sp)
++ | jne >1
++ | lghi r2, 1
++ |1:
++ | la sp, 4(sp)
++ | br r14
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -365,7 +383,8 @@ test_table test[] = {
+ {-1, 0, 0, ssa_act, 65535<<8, "ssa_act"},
+ {27, 0, 0, type, 27, "type"},
+ { 0, 0, 0, sil, 23, "sil"},
+- {15,3,10, rrfe_rrd, 45, "rrfe_rrd"}
++ {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
++ { 0, 0, 0, rsb, 0, "rsb"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 556cfec11..6bb008e8a 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1206,8 +1206,10 @@ map_op = {
+ ni_2 = "000094000000SI",
+ -- RXF
+ madb_3 = "ed000000001eRXF",
+- --RRD
++ -- RRD
+ maebr_3 = "0000b30e0000RRD",
++ -- RS-b
++ clm_3 = "0000bd000000RS-b"
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+@@ -1418,6 +1420,13 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "z" then
+ op2 = op2 + parse_reg(params[1])
+ wputhw(op2)
++ elseif p == "RS-b" then
++ local m = parse_mask(params[2])
++ local d, b, a = parse_mem_b(params[3])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + m
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2)
++ if a then a() end
+ else
+ werror("unrecognized encoding")
+ end
+
+From ee4b942c94098c4e65eadae494cec90a063096b0 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 14 Dec 2016 22:22:49 -0500
+Subject: [PATCH 124/260] Add vm_call handling code.
+
+Now prints the prompt (!).
+---
+ src/vm_s390x.dasc | 57 +++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 57 insertions(+)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 803184bfd..da087ea61 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -418,10 +418,67 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
++ | saveregs
++ | lgr L:RB, CARG1 // Caveat: CARG1 may be RA.
++ | stg CARG1, SAVE_L
++ | lgr RA, CARG2
++ | lghi PC, FRAME_CP
++ | lghi RD, 0
++ | lay KBASE, CFRAME_RESUME(sp)
++ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
++ | aghi DISPATCH, GG_G2DISP
++ | stg RD, SAVE_PC // Any value outside of bytecode is ok.
++ | stg RD, SAVE_CFRAME
++ | stg RD, SAVE_NRES
++ | stg RD, SAVE_ERRF
++ | stg KBASE, L:RB->cframe
++ | clm RD, 1, L:RB->status
++ | je >2 // Initial resume (like a call).
++ |
++ | // Resume after yield (like a return).
++ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
++ | set_vmstate INTERP
++ | llgc RD, L:RB->status
++ | lg BASE, L:RB->base
++ | lg RD, L:RB->top
++ | sgr RD, RA
++ | srlg RD, RD, 3(r0)
++ | aghi RD, 1 // RD = nresults+1
++ | sgr RA, BASE // RA = resultofs
++ | lg PC, -8(BASE)
++ | stg RD, SAVE_MULTRES
++ | tmll PC, FRAME_TYPE
++ | je ->BC_RET_Z
++ | j ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
++ | saveregs
++ | lghi PC, FRAME_CP
++ | llgfr CARG4, CARG4
++ | stg CARG4, SAVE_ERRF
++ | j >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
++ | // (lua_State *L, TValue *base, int nres1)
++ | saveregs
++ | lghi PC, FRAME_C
++ |
++ |1: // Entry point for vm_pcall above (PC = ftype).
++ | lgfr CARG3, CARG3
++ | stg CARG3, SAVE_NRES
++ | lgr L:RB, CARG1
++ | stg CARG1, SAVE_L
++ | lgr RA, CARG2
++ |
++ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
++ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
++ | stg KBASE, SAVE_CFRAME
++ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
++ | aghi DISPATCH, GG_G2DISP
++ | stg sp, L:RB->cframe
++ | lgr L:LREG, L:RB // TODO: use RB instead of LREG here?
+ |
+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
+ | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
+
+From b7c4e4b3d62cd361e6ab71b9690b6f785b892c77 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 15 Dec 2016 13:51:46 +0530
+Subject: [PATCH 125/260] Added rre instruction format example
+
+example includes instruction fidr
+---
+ dynasm/Examples/test_z_inst.c | 16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 4820c5704..2314606c2 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -335,6 +335,19 @@ static void rrfe_rrd(dasm_State *state) {
+ | br r14
+ }
+
++static void rre(dasm_State *state) {
++
++ dasm_State **Dst = &state;
++
++ | lay sp , -8(sp)
++ | cefbr f0 , r2
++ | cefbr f1 , r3
++ | fidr f0 , f1
++ | cfebr r2 ,0,f0
++ | la sp, 8(sp)
++ | br r14
++}
++
+ static void rsb(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+@@ -384,7 +397,8 @@ test_table test[] = {
+ {27, 0, 0, type, 27, "type"},
+ { 0, 0, 0, sil, 23, "sil"},
+ {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
+- { 0, 0, 0, rsb, 0, "rsb"}
++ { 0, 0, 0, rsb, 0, "rsb"},
++ {12,10, 0, rre, 10, "rre"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From 2dcbf5be3ed4c726de58bfef39e2fd587b604bf2 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 15 Dec 2016 11:01:59 -0500
+Subject: [PATCH 126/260] Implement more functions in the VM.
+
+Also adds segmentation faults to stubbed out functions to make it
+easier to work out what the control flow is.
+---
+ src/vm_s390x.dasc | 317 +++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 311 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index da087ea61..01a3b874b 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -286,7 +286,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg ITYPE, 0(RA, BASE) // Prepend true to results.
+ |
+ |->vm_returnc:
+- | ahi RD, 1 // RD = nresults+1
++ | aghi RD, 1 // RD = nresults+1
+ | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
+ | stg RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+@@ -305,13 +305,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | sgr PC, BASE
+ | lcgr PC, PC // Previous base = BASE - delta.
+ |
+- | ahi RD, -1
++ | aghi RD, -1
+ | je >2
+ |1: // Move results down.
+ | lg RB, 0(BASE, RA)
+ | stg RB, -16(BASE)
+ | la BASE, 8(BASE)
+- | ahi RD, -1
++ | aghi RD, -1
+ | jne <1
+ |2:
+ | lg L:RB, SAVE_L
+@@ -368,13 +368,31 @@ static void build_subroutines(BuildCtx *ctx)
+ | j <3
+ |
+ |->vm_unwind_yield:
+- | stg r0, 0(r0)
++ | lghi CRET1, LUA_YIELD
++ | j ->vm_unwind_c_eh
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
++ | // (void *cframe, int errcode)
++ | lgr sp, CARG1
++ | lgfr CARG2, CRET1 // Error return status for vm_pcall.
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
++ | lg L:RB, SAVE_L
++ | lg GL:RB, L:RB->glref
++ | lghi TMP1, ~LJ_VMST_C
++ | stg TMP1, GL:RB->vmstate
++ | j ->vm_leave_unw
++ |
+ |->vm_unwind_rethrow:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ |
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+@@ -537,69 +555,127 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_tgetb:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_tgetv:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->cont_ra: // BASE = base, RC = result
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_tgetr:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_tsetb:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_tsetv:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->cont_nop: // BASE = base, (RC = result)
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_tsetr:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->cont_condt: // BASE = base, RC = result
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->cont_condf: // BASE = base, RC = result
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_equal:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_equal_cd:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_istype:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vno:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->vmeta_arith_vn:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_arith_nvo:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->vmeta_arith_nv:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_unm:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_arith_vvo:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->vmeta_arith_vv:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vmeta_len:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call_ra:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->vmeta_call: // Resolve and call __call metamethod.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+@@ -849,6 +925,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro vm_round, name, mode, cond
+ |->name:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |.endmacro
+ |
+ | vm_round vm_floor, 0, 1
+@@ -857,9 +935,13 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |->vm_mod:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
+ |->vm_powi_sse:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+@@ -867,12 +949,16 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
+ |->vm_cpuid:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->assert_bad_for_arg_type:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ #ifdef LUA_USE_ASSERT
+ #endif
+ |
+@@ -882,11 +968,17 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// Handler for callback functions. Callback slot number in ah/al.
+ |->vm_ffi_callback:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ |// Note: vm_ffi_call must be the last function in this object file!
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ }
+@@ -901,60 +993,220 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ switch (op) {
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISEQV: case BC_ISNEV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISEQS: case BC_ISNES:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISEQN: case BC_ISNEN:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISEQP: case BC_ISNEP:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISTYPE:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISNUM:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_MOV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_NOT:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_UNM:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_LEN:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_MODVN:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_MODNV: case BC_MODVV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_POW:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_CAT:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_KSTR:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_KCDATA:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_KSHORT:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_KNUM:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_KPRI:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_KNIL:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_UGET:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_USETV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_USETS:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_USETN:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_USETP:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_UCLO:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_FNEW:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TNEW:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TDUP:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_GGET:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_GSET:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TGETV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TGETS:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TGETB:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TGETR:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TSETV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TSETS:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TSETB:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TSETR:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_TSETM:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_CALL: case BC_CALLM:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_CALLMT:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_CALLT:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ITERC:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ITERN:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ISNEXT:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_VARG:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_RETM:
+- | stg r0, 0(r0) // not implemented
++ | stg r0, 0(r0) // not implemented
++ | stg r0, 0(r0)
+ break;
+
+ case BC_RET: case BC_RET0: case BC_RET1:
+@@ -1033,24 +1285,76 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <1
+ break;
+ case BC_FORL:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_JFORI:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_JFORL:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_FORI:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_IFORL:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ITERL:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_JITERL:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_IITERL:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_LOOP:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_ILOOP:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_JLOOP:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_JMP:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_FUNCF:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_JFUNCF:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_IFUNCF:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_JFUNCV:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
++ break;
+ case BC_IFUNCV:
+- | lg r0, 0(r0) // Not implemented, seg fault.
++ | stg r0, 0(r0) // Not implemented, seg fault.
++ | stg r0, 0(r0)
+ break;
+
+ case BC_FUNCC:
+@@ -1089,6 +1393,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lcgr RA, RA
+ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+ | lg PC, -8(BASE) // Fetch PC of caller.
++ | // BUG: PC seems to be -1 here sometimes. Not yet sure why.
+ | j ->vm_returnc
+ break;
+
+
+From 24bdb7576d91f1f10a6a4316d228715b05b7e102 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 15 Dec 2016 11:03:13 -0500
+Subject: [PATCH 127/260] Add debug options to Makefile.
+
+We're going to need these for a while, so better to put in the
+repository. Once we're happy things are working we can disable
+the debug info and enable optimizations again.
+---
+ src/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/Makefile b/src/Makefile
+index 1450adc03..8ecd6183d 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -56,7 +56,7 @@ CCOPT_mips=
+ #
+ CCDEBUG=
+ # Uncomment the next line to generate debug information:
+-#CCDEBUG= -g
++CCDEBUG= -g -O0
+ #
+ CCWARN= -Wall
+ # Uncomment the next line to enable more warnings:
+
+From bee112d43123d1267680e1b410b63541c44a4b63 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 16 Dec 2016 17:23:46 -0500
+Subject: [PATCH 128/260] Add support for global short assignments.
+
+In other words 'a = 1' now works.
+---
+ dynasm/dasm_s390x.lua | 4 +
+ src/lj_arch.h | 2 +-
+ src/vm_s390x.dasc | 280 +++++++++++++++++++++++++++++++++++-------
+ 3 files changed, 238 insertions(+), 48 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 6bb008e8a..a4b01ccbe 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1190,6 +1190,10 @@ map_op = {
+ stfl_1 = "0000b2b10000sS",
+ -- I- mode instructions
+ svc_1 = "000000000a00iI",
++ -- RI-a mode instructions
++ -- TODO: change "i" to "RI-a"
++ mhi_2 = "0000a70c0000i",
++ mghi_2 = "0000a70d0000i",
+ -- RI-b mode instructions
+ bras_2 = "0000a7050000RI-b",
+ -- RI-c mode instructions
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index 3839027b2..81f4873ed 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -367,7 +367,7 @@
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNALIGNED 1
+-#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+ #define LJ_TARGET_GC64 1
+ #define LJ_ARCH_NOJIT 1 /* NYI */
+ #define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 01a3b874b..3f4cea640 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -57,7 +57,8 @@
+ |.define CRET1, r2
+ |
+ |.define OP, r2
+-|.define TMP1, r14
++|.define TMPR1, r14
++|.define TMPR2, r0
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+ |.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
+@@ -66,9 +67,9 @@
+ |.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
+ |.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated).
+ |
+-|// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
++|// Argument save area.
+ |.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
+-|.define SAVE_NRES, 272(sp) // Argument 3, in r4.
++|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes.
+ |.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
+ |.define SAVE_L, 256(sp) // Argument 1, in r2.
+ |.define RESERVED, 248(sp) // Reserved for compiler use.
+@@ -85,6 +86,7 @@
+ |.define SAVE_FPR8, 176(sp)
+ |.define SAVE_PC, 168(sp)
+ |.define SAVE_MULTRES, 160(sp)
++|.define TMP_STACK, 160(sp) // Overlaps SAVE_MULTRES
+ |
+ |// Callee save area (allocated by interpreter).
+ |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
+@@ -140,7 +142,7 @@
+ |.macro ins_ABC; .endmacro
+ |.macro ins_AB_; .endmacro
+ |.macro ins_A_C; .endmacro
+-|.macro ins_AND; .endmacro
++|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
+ |
+ |// Instruction decode+dispatch.
+ | // TODO: tune this, right now we always decode RA-D even if they aren't used.
+@@ -157,9 +159,10 @@
+ | srlg RB, RB, 8(r0)
+ | llgcr RC, RD
+ | la PC, 4(PC)
+-| llgfr TMP1, OP
+-| sllg TMP1, TMP1, 3(r0) // TMP1=OP*8
+-| b 0(TMP1, DISPATCH)
++| llgfr TMPR1, OP
++| sllg TMPR1, TMPR1, 3(r0) // TMPR1=OP*8
++| lg TMPR1, 0(TMPR1, DISPATCH)
++| br TMPR1
+ |.endmacro
+ |
+ |// Instruction footer.
+@@ -184,10 +187,10 @@
+ | lg PC, LFUNC:RB->pc
+ | llgf RA, 0(PC) // TODO: combine loads?
+ | llgcr OP, RA
+-| sllg TMP1, OP, 3(r0)
++| sllg TMPR1, OP, 3(r0)
+ | la PC, 4(PC)
+-| lg TMP1, 0(TMP1, DISPATCH)
+-| br TMP1
++| lg TMPR1, 0(TMPR1, DISPATCH)
++| br TMPR1
+ |.endmacro
+ |
+ |.macro ins_call
+@@ -210,6 +213,11 @@
+ | oihh reg, ((tp>>1) &0xffff)
+ | oihl reg, ((tp<<15)&0x8000)
+ |.endmacro
++|.macro settp, dst, reg, tp
++| llihh dst, ((tp>>1) &0xffff)
++| iihl dst, ((tp<<15)&0x8000)
++| ogr dst, reg
++|.endmacro
+ |.macro setint, reg
+ | settp reg, LJ_TISNUM
+ |.endmacro
+@@ -257,10 +265,24 @@
+ |
+ |// Set current VM state.
+ |.macro set_vmstate, st
+-| lghi TMP1, ~LJ_VMST_..st
+-| stg TMP1, DISPATCH_GL(vmstate)(DISPATCH)
++| lghi TMPR1, ~LJ_VMST_..st
++| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
+ |.endmacro
+ |
++|// Move table write barrier back. Overwrites reg.
++|.macro barrierback, tab, reg
++| // TODO: more efficient way?
++| llgc reg, tab->marked
++| nill reg, (uint16_t)~LJ_GC_BLACK // black2gray(tab)
++| stc reg, tab->marked
++| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
++| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
++| stg reg, tab->gclist
++|.endmacro
++
++#if !LJ_DUALNUM
++#error "Only dual-number mode supported for s390x target"
++#endif
+
+ /* Generate subroutines used by opcodes and other parts of the VM. */
+ /* The .code_sub section should be last to help static branch prediction. */
+@@ -294,8 +316,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
+- | lghi TMP1, FRAME_C
+- | xgr PC, TMP1
++ | lghi TMPR1, FRAME_C
++ | xgr PC, TMPR1
+ | tmll PC, FRAME_TYPE
+ | jne ->vm_returnp
+ |
+@@ -318,7 +340,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg PC, L:RB->base
+ |3:
+ | lg RD, SAVE_MULTRES
+- | lg RA, SAVE_NRES // RA = wanted nresults+1
++ | lgf RA, SAVE_NRES // RA = wanted nresults+1
+ |4:
+ | cgr RA, RD
+ | jne >6 // More/less results wanted?
+@@ -340,8 +362,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | // More results wanted. Check stack size and fill up results with nil.
+ | cg BASE, L:RB->maxstack
+ | jh >8
+- | lghi TMP1, LJ_TNIL
+- | stg TMP1, -16(BASE)
++ | lghi TMPR1, LJ_TNIL
++ | stg TMPR1, -16(BASE)
+ | la BASE, 8(BASE)
+ | aghi RD, 1
+ | j <4
+@@ -350,8 +372,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | cghi RA, 0
+ | je <5 // But check for LUA_MULTRET+1.
+ | sgr RA, RD // Negative result!
+- | sllg TMP1, RA, 3(r0)
+- | lay BASE, 0(TMP1, BASE) // Correct top.
++ | sllg TMPR1, RA, 3(r0)
++ | lay BASE, 0(TMPR1, BASE) // Correct top.
+ | j <5
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+@@ -378,8 +400,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | lg L:RB, SAVE_L
+ | lg GL:RB, L:RB->glref
+- | lghi TMP1, ~LJ_VMST_C
+- | stg TMP1, GL:RB->vmstate
++ | lghi TMPR1, ~LJ_VMST_C
++ | stg TMPR1, GL:RB->vmstate
+ | j ->vm_leave_unw
+ |
+ |->vm_unwind_rethrow:
+@@ -448,7 +470,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | aghi DISPATCH, GG_G2DISP
+ | stg RD, SAVE_PC // Any value outside of bytecode is ok.
+ | stg RD, SAVE_CFRAME
+- | stg RD, SAVE_NRES
++ | st RD, SAVE_NRES
+ | stg RD, SAVE_ERRF
+ | stg KBASE, L:RB->cframe
+ | clm RD, 1, L:RB->status
+@@ -484,8 +506,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lghi PC, FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+- | lgfr CARG3, CARG3
+- | stg CARG3, SAVE_NRES
++ | st CARG3, SAVE_NRES
+ | lgr L:RB, CARG1
+ | stg CARG1, SAVE_L
+ | lgr RA, CARG2
+@@ -531,7 +552,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
+ | lghi RA, 0
+ | stg RA, SAVE_ERRF // No error function.
+- | stg KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
++ | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | aghi DISPATCH, GG_G2DISP
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+ |
+@@ -1081,8 +1102,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_KSHORT:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = signed int16 literal
++ | // Assumes DUALNUM.
++ | lhr RD, RD // Sign-extend literal to 32-bits.
++ | setint RD
++ | sllg TMPR1, RA, 3(r0)
++ | stg RD, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_KNUM:
+ | stg r0, 0(r0)
+@@ -1132,21 +1158,67 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
+ case BC_GGET:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AND // RA = dst, RD = str const (~)
++ | lg LFUNC:RB, -16(BASE)
++ | cleartp LFUNC:RB
++ | lg TAB:RB, LFUNC:RB->env
++ | sllg TMPR1, RD, 3(r0)
++ | lg STR:RC, 0(TMPR1, KBASE)
++ | j ->BC_TGETS_Z
+ break;
+ case BC_GSET:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AND // RA = src, RD = str const (~)
++ | lg LFUNC:RB, -16(BASE)
++ | cleartp LFUNC:RB
++ | lg TAB:RB, LFUNC:RB->env
++ | sllg TMPR1, RD, 3(r0)
++ | lg STR:RC, 0(TMPR1, KBASE)
++ | j ->BC_TSETS_Z
+ break;
++
+ case BC_TGETV:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
+ case BC_TGETS:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0(r0) // Not yet implemented.
++ |
++ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
++ | l TMPR1, TAB:RB->hmask
++ | n TMPR1, STR:RC->hash
++ | lgfr TMPR1, TMPR1
++ | mghi TMPR1, #NODE // TODO: not sure about this one, original: imul TMPRd, #NODE
++ | ag NODE:TMPR1, TAB:RB->node
++ | settp ITYPE, STR:RC, LJ_TSTR
++ |1:
++ | cg ITYPE, NODE:TMPR1->key
++ | jne >4
++ | // Get node value.
++ | lg ITYPE, NODE:TMPR1->val
++ | cghi ITYPE, LJ_TNIL
++ | je >5 // Key found, but nil value?
++ |2:
++ | sllg RA, RA, 3(r0)
++ | stg ITYPE, 0(TMPR1, RA)
++ | ins_next
++ |
++ |4: // Follow hash chain.
++ | lg NODE:TMPR1, NODE:TMPR1->next
++ | cghi NODE:TMPR1, 0
++ | jne <1
++ | // End of hash chain: key not found, nil result.
++ | lghi ITYPE, LJ_TNIL
++ |
++ |5: // Check for __index if table value is nil.
++ | lg TAB:TMPR1, TAB:RB->metatable
++ | cghi TAB:TMPR1, 0
++ | je <2 // No metatable: done.
++ | llgc TMPR2, TAB:TMPR1->nomm
++ | tmll TMPR2, 1<<MM_index
++ | jne <2 // 'no __index' flag set: done.
++ | j ->vmeta_tgets // Caveat: preserve STR:RC.
+ break;
+ case BC_TGETB:
+ | stg r0, 0(r0)
+@@ -1162,7 +1234,73 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_TSETS:
+ | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ |
++ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
++ | l TMPR1, TAB:RB->hmask
++ | n TMPR1, STR:RC->hash
++ | lgfr TMPR1, TMPR1
++ | mghi TMPR1, #NODE
++ | xr TMPR2, TMPR2
++ | stc TMPR2, TAB:RB->nomm // Clear metamethod cache.
++ | ag NODE:TMPR1, TAB:RB->node
++ | settp ITYPE, STR:RC, LJ_TSTR
++ |1:
++ | cg ITYPE, NODE:TMPR1->key
++ | jne >5
++ | // Ok, key found. Assumes: offsetof(Node, val) == 0
++ | lghi TMPR2, LJ_TNIL
++ | cg TMPR2, 0(TMPR1)
++ | je >4 // Previous value is nil?
++ |2:
++ | llgc TMPR2, TAB:RB->marked
++ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | jne >7
++ |3: // Set node value.
++ | sllg RA, RA, 3(r0)
++ | lg ITYPE, 0(RA, BASE)
++ | stg ITYPE, 0(TMPR1)
++ | ins_next
++ |
++ |4: // Check for __newindex if previous value is nil.
++ | lg TAB:ITYPE, TAB:RB->metatable
++ | cghi TAB:ITYPE, 0
++ | je <2
++ | llgc TMPR2, TAB:ITYPE->nomm
++ | tmll TMPR2, 1<<MM_newindex
++ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
++ | j <2
++ |
++ |5: // Follow hash chain.
++ | lg NODE:TMPR1, NODE:TMPR1->next
++ | cghi NODE:TMPR1, 0
++ | jne <1
++ | // End of hash chain: key not found, add a new one.
++ |
++ | // But check for __newindex first.
++ | lg TAB:TMPR1, TAB:RB->metatable
++ | cghi TAB:TMPR1, 0
++ | je >6 // No metatable: continue.
++ | llgc TMPR2, TAB:TMPR1->nomm
++ | tmll TMPR2, 1<<MM_newindex
++ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
++ |6:
++ | stg ITYPE, TMP_STACK
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base
++ | la CARG3, TMP_STACK // TODO: lea CARG3, ITYPE... not sure.
++ | lgr CARG2, TAB:RB
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
++ | // Handles write barrier for the new key. TValue * returned in r2 (CRET1).
++ | lgr TMPR1, CRET1
++ | lg L:CRET1, SAVE_L
++ | lg BASE, L:CRET1->base
++ | llgc RA, PC_RA
++ | j <2 // Must check write barrier for value.
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, ITYPE
++ | j <3
+ break;
+ case BC_TSETB:
+ | stg r0, 0(r0)
+@@ -1245,8 +1383,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ /* fallthrough */
+ case BC_RET0:
+ |5:
+- | llgc TMP1, PC_RB
+- | cgr TMP1, RD
++ | llgc TMPR1, PC_RB
++ | cgr TMPR1, RD
+ | jh >6
+ default:
+ break;
+@@ -1262,13 +1400,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+- | lghi TMP1, LJ_TNIL
++ | lghi TMPR1, LJ_TNIL
+ if (op == BC_RET) {
+- | stg TMP1, -16(KBASE) // Note: relies on shifted base.
++ | stg TMPR1, -16(KBASE) // Note: relies on shifted base.
+ | la KBASE, 8(KBASE)
+ } else {
+ | sllg RC, RD, 3(r0) // RC used as temp.
+- | stg TMP1, -24(RC, BASE)
++ | stg TMPR1, -24(RC, BASE)
+ }
+ | la RD, 1(RD)
+ | j <5
+@@ -1348,13 +1486,61 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
+ case BC_JFUNCV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++#if !LJ_HASJIT
+ break;
++#endif
++ | stg r0, 0(r0) // NYI: compiled vararg functions
++ break; /* NYI: compiled vararg functions. */
++
+ case BC_IFUNCV:
+- | stg r0, 0(r0) // Not implemented, seg fault.
+- | stg r0, 0(r0)
++ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
++ | sllg TMPR1, NARGS:RD, 3(r0)
++ | la RB, (FRAME_VARG+8)(TMPR1)
++ | la RD, 8(TMPR1, BASE)
++ | lg LFUNC:KBASE, -16(BASE)
++ | stg RB, -8(RD) // Store delta + FRAME_VARG.
++ | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC.
++ | lg L:RB, SAVE_L
++ | sllg RA, RA, 3(r0)
++ | la RA, 0(RA, RD)
++ | cg RA, L:RB->maxstack
++ | jh ->vm_growstack_v // Need to grow stack.
++ | lgr RA, BASE
++ | lgr BASE, RD
++ | llgc RB, (PC2PROTO(numparams)-4)(PC)
++ | cghi RB, 0
++ | je >2
++ | aghi RA, 8
++ | lghi TMPR1, LJ_TNIL
++ |1: // Copy fixarg slots up to new frame.
++ | la RA, 8(RA)
++ | cgr RA, BASE
++ | jnl >3 // Less args than parameters?
++ | lg KBASE, -16(RA)
++ | stg KBASE, 0(RD)
++ | la RD, 8(RD)
++ | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC).
++ | aghi RB, -1
++ | jne <1
++ | // TODO: brctg instead of decrement/branch
++ |2:
++ if (op == BC_JFUNCV) {
++ | llgh RD, PC_RD
++ | j =>BC_JLOOP
++ } else {
++ | lg KBASE, (PC2PROTO(k)-4)(PC)
++ | ins_next
++ }
++ |
++ |3: // Clear missing parameters.
++ | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here.
++ | la RD, 8(RD)
++ | aghi RB, -1
++ | jne <3
++ | // TODO: brctg instead of decrement/branch
++ | j <2
+ break;
+
+ case BC_FUNCC:
+@@ -1380,16 +1566,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | basr r14, KBASE // (lua_State *L)
+ } else {
+ | // (lua_State *L, lua_CFunction f)
+- | lg TMP1, (DISPATCH_GL(wrapf))(DISPATCH)
+- | basr r14, TMP1 // TODO: TMP1==r14, is this ok?
++ | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
++ | basr r14, TMPR1 // TODO: TMPR1==r14, is this ok?
+ }
+ | // nresults returned in r2 (CRET1).
+ | lgr RD, CRET1
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+- | sllg TMP1, RD, 3(r0)
+- | la RA, 0(TMP1, BASE)
++ | sllg TMPR1, RD, 3(r0)
++ | la RA, 0(TMPR1, BASE)
+ | lcgr RA, RA
+ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+ | lg PC, -8(BASE) // Fetch PC of caller.
+
+From 6a9855d9881bb6762ae4741c03e2d82fa7bb48b8 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Sat, 17 Dec 2016 19:56:56 -0500
+Subject: [PATCH 129/260] Add support for print function call.
+
+Hello world now works.
+
+> print("hello world!")
+hello world!
+---
+ src/vm_s390x.dasc | 184 +++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 172 insertions(+), 12 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 3f4cea640..0454e9c68 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -37,7 +37,7 @@
+ |.define ITYPE, r13 //
+ |
+ |// The following temporaries are not saved across C calls, except for RD.
+-|.define RA, r1 // Cannot be dereferenced.
++|.define RA, r1
+ |.define RB, r12
+ |.define RC, r5 // Overlaps CARG4.
+ |.define RD, r6 // Overlaps CARG5. Callee-saved.
+@@ -686,11 +686,30 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call_ra:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | la RA, 16(RA, BASE) // RA previously set to RA*8.
+ |->vmeta_call: // Resolve and call __call metamethod.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
++ | stg NARGS:RD, TMP_STACK // Save RA, RC for us (not sure about this).
++ | lgr RB, RA
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base
++ | lay CARG2, -16(RA)
++ | sllg RD, RD, 3(r0)
++ | lay CARG3, -8(RA, RD)
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
++ | lgr RA, RB
++ | lg L:RB, SAVE_L
++ | lg BASE, L:RB->base
++ | lg NARGS:RD, TMP_STACK
++ | lg LFUNC:RB, -16(RA)
++ | aghi NARGS:RD, 1 // 32-bit on x64.
++ | // This is fragile. L->base must not move, KBASE must always be defined.
++ | cgr KBASE, BASE // Continue with CALLT if flag set.
++ | je ->BC_CALLT_Z
++ | cleartp LFUNC:RB
++ | lgr BASE, RA
++ | ins_call // Otherwise call resolved metamethod.
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+@@ -704,14 +723,20 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |.endmacro
+ |
+ |.macro .ffunc_n, name, op
+@@ -733,22 +758,36 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_1 type
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_2 setmetatable
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_2 rawget
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_1 tostring
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+@@ -764,8 +803,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc_1 pcall
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_2 xpcall
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+@@ -843,14 +886,20 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
++ | stg r0, 0(r0)
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
++ | stg r0, 0(r0)
+ |->fff_newstr:
++ | stg r0, 0(r0)
+ |->fff_resstr:
++ | stg r0, 0(r0)
+ |
+ |.ffunc string_sub
++ | stg r0, 0(r0)
+ |
+ |->fff_emptystr: // Range underflow.
++ | stg r0, 0(r0)
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
+@@ -889,6 +938,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc_bit_sh, name, ins
+ | .ffunc_bit name, 1, .ffunc_2
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |.endmacro
+ |
+ |.ffunc_bit_sh bit_lshift, shl
+@@ -900,36 +951,64 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback_2:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->fff_fallback_1:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->fff_fallback: // Call fast function fallback handler.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->cont_hook: // Continue from hook yield.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vm_hotcall: // Hot call counter underflow.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->cont_stitch: // Trace stitching.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+@@ -938,7 +1017,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |// Called from an exit stub with the exit number on the stack.
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+ |->vm_exit_handler:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->vm_exit_interp:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+@@ -1093,9 +1176,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
++ /* -- Constant ops ------------------------------------------------------ */
++
+ case BC_KSTR:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AND // RA = dst, RD = str const (~)
++ | sllg RD, RD, 3(r0)
++ | lg RD, 0(RD, KBASE)
++ | settp RD, LJ_TSTR
++ | sllg RA, RA, 3(r0)
++ | stg RD, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_KCDATA:
+ | stg r0, 0(r0)
+@@ -1201,7 +1292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | je >5 // Key found, but nil value?
+ |2:
+ | sllg RA, RA, 3(r0)
+- | stg ITYPE, 0(TMPR1, RA)
++ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+ |4: // Follow hash chain.
+@@ -1314,18 +1405,87 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
++ /* -- Calls and vararg handling ----------------------------------------- */
++
+ case BC_CALL: case BC_CALLM:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
++ if (op == BC_CALLM) {
++ | ag NARGS:RD, SAVE_MULTRES // TODO: MULTRES is 32-bit on x64
++ }
++ | sllg RA, RA, 3(r0)
++ | lg LFUNC:RB, 0(BASE, RA)
++ | checkfunc LFUNC:RB, ->vmeta_call_ra
++ | la BASE, 16(RA, BASE)
++ | lgr RD, RC
++ | ins_call
+ break;
++
+ case BC_CALLMT:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
+ case BC_CALLT:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = base, RD = nargs+1
++ | sllg RA, RA, 3(r0)
++ | la RA, 16(RA, BASE)
++ | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint.
++ | lg LFUNC:RB, -16(RA)
++ | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
++ |->BC_CALLT_Z:
++ | lg PC, -8(BASE)
++ | tmll PC, FRAME_TYPE
++ | jne >7
++ |1:
++ | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below.
++ | stg NARGS:RD, SAVE_MULTRES // 32-bit on x64.
++ | aghi NARGS:RD, -1
++ | je >3
++ |2: // Move args down.
++ | // TODO: mvc or something here?
++ | lg RB, 0(RA)
++ | la RA, 8(RA)
++ | stg RB, 0(KBASE)
++ | la KBASE, 8(KBASE)
++ | // TODO: replace decrement/branch with brctg
++ | aghi NARGS:RD, -1
++ | jne <2
++ |
++ | lg LFUNC:RB, -16(BASE)
++ |3:
++ | cleartp LFUNC:RB
++ | lg NARGS:RD, SAVE_MULTRES
++ | llgc TMPR1, LFUNC:RB->ffid
++ | cghi TMPR1, 1 // (> FF_C) Calling a fast function?
++ | jh >5
++ |4:
++ | ins_callt
++ |
++ |5: // Tailcall to a fast function.
++ | tmll PC, FRAME_TYPE // Lua frame below?
++ | jne <4
++ | llgc RA, PC_RA
++ | lcgr RA, RA
++ | sllg RA, RA, 3(r0)
++ | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE.
++ | cleartp LFUNC:KBASE
++ | lg KBASE, LFUNC:KBASE->pc
++ | lg KBASE, (PC2PROTO(k))(KBASE)
++ | j <4
++ |
++ |7: // Tailcall from a vararg function.
++ | aghi PC, -FRAME_VARG
++ | tmll PC, FRAME_TYPEP
++ | jne >8 // Vararg frame below?
++ | sgr BASE, PC // Need to relocate BASE/KBASE down.
++ | lgr KBASE, BASE
++ | lg PC, -8(BASE)
++ | j <1
++ |8:
++ | aghi PC, FRAME_VARG
++ | j <1
+ break;
++
+ case BC_ITERC:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+
+From 3330f6adc27e489c18618ab2cab467679b3c1f19 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 19 Dec 2016 10:49:21 -0500
+Subject: [PATCH 130/260] Fix KSHORT destination slot address.
+
+---
+ src/vm_s390x.dasc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 0454e9c68..34e6843f1 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1197,7 +1197,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | // Assumes DUALNUM.
+ | lhr RD, RD // Sign-extend literal to 32-bits.
+ | setint RD
+- | sllg TMPR1, RA, 3(r0)
++ | sllg RA, RA, 3(r0)
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+
+From 0d442ec6882bf79b551538ae13874ba949577113 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 19 Dec 2016 14:21:24 -0500
+Subject: [PATCH 131/260] Add some more instructions to DynASM.
+
+---
+ dynasm/dasm_s390x.lua | 16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index a4b01ccbe..dc1f5e5dc 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -694,7 +694,6 @@ map_op = {
+ chlr_2 = "0000b9dd0000h",
+ cfi_2 = "c20d00000000n",
+ cgfi_2 = "c20c00000000n",
+- cghi_2 = "0000a70f0000i",
+ cih_2 = "cc0d00000000n",
+ cl_2 = "000055000000j",
+ clr_2 = "000000001500g",
+@@ -1183,7 +1182,20 @@ map_op = {
+ cfebr_3 = "0000b3980000RRF-e",
+ cfebra_4 = "0000b3980000RRF-e",
+ -- RXE instructions
++ adb_2 = "ed000000001aRXE",
++ aeb_2 = "ed000000000aRXE",
++ cdb_2 = "ed0000000019RXE",
++ ceb_2 = "ed0000000009RXE",
++ ddb_2 = "ed000000001dRXE",
++ deb_2 = "ed000000000dRXE",
++ mdb_2 = "ed000000001cRXE",
++ mdeb_2 = "ed000000000cRXE",
++ meeb_2 = "ed0000000017RXE",
++ mxdb_2 = "ed0000000007RXE",
+ sqdb_2 = "ed0000000015RXE",
++ sqeb_2 = "ed0000000014RXE",
++ sdb_2 = "ed000000001bRXE",
++ seb_2 = "ed000000000bRXE",
+ -- RRF-b instructions
+ didbr_4 = "0000b3580000RRF-b",
+ -- S mode instructions
+@@ -1192,6 +1204,8 @@ map_op = {
+ svc_1 = "000000000a00iI",
+ -- RI-a mode instructions
+ -- TODO: change "i" to "RI-a"
++ chi_2 = "0000a70e0000i",
++ cghi_2 = "0000a70f0000i",
+ mhi_2 = "0000a70c0000i",
+ mghi_2 = "0000a70d0000i",
+ -- RI-b mode instructions
+
+From 299dc34db2d29b9e475e860b4bb800ea40120173 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 19 Dec 2016 14:21:48 -0500
+Subject: [PATCH 132/260] Add basic integer for loop support.
+
+> for i=1,3 do print(i) end
+1
+2
+3
+---
+ src/vm_s390x.dasc | 172 ++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 158 insertions(+), 14 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 34e6843f1..c1d6f5fa7 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -263,6 +263,15 @@
+ |.define PC_RC, -3(PC)
+ |.define PC_RD, -4(PC)
+ |
++|.macro branchPC, reg
++| // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
++| // Can't clobber TMPR1 or condition code.
++| lgr TMPR2, TMPR1 // Workaround because TMPR2 == r0 and can't be used in lay.
++| sllg TMPR1, reg, 2(r0)
++| lay PC, (-BCBIAS_J*4)(TMPR1, PC)
++| lgr TMPR1, TMPR2
++|.endmacro
++|
+ |// Set current VM state.
+ |.macro set_vmstate, st
+ | lghi TMPR1, ~LJ_VMST_..st
+@@ -1129,8 +1138,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_MOV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = src
++ | sllg RD, RD, 3(r0)
++ | lg RB, 0(RD, BASE)
++ | sllg RA, RA, 3(r0)
++ | stg RB, 0(RA, BASE)
++ | ins_next_
+ break;
+ case BC_NOT:
+ | stg r0, 0(r0)
+@@ -1274,8 +1287,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_TGETS:
+- | stg r0, 0(r0) // Not yet implemented.
+- |
++ | ins_ABC
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | lghi TMPR1, -1
++ | xgr RC, TMPR1
++ | sllg RC, RC, 3(r0)
++ | lg STR:RC, 0(RC, BASE)
++ | checktab TAB:RB, ->vmeta_tgets
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
+ | l TMPR1, TAB:RB->hmask
+ | n TMPR1, STR:RC->hash
+@@ -1582,26 +1601,151 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ }
+ | j <1
+ break;
++
++ /* -- Loops and branches ------------------------------------------------ */
++
++ |.define FOR_IDX, 0(RA)
++ |.define FOR_STOP, 8(RA)
++ |.define FOR_STEP, 16(RA)
++ |.define FOR_EXT, 24(RA)
++
+ case BC_FORL:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
+ case BC_JFORI:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- break;
+ case BC_JFORL:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++#if !LJ_HASJIT
+ break;
++#endif
+ case BC_FORI:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- break;
+ case BC_IFORL:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ vk = (op == BC_IFORL || op == BC_JFORL);
++ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
++ | sllg RA, RA, 3(r0)
++ | la RA, 0(RA, BASE)
++ | lg RB, FOR_IDX
++ | checkint RB, >9
++ | lg TMPR1, FOR_STOP
++ if (!vk) {
++ | checkint TMPR1, ->vmeta_for
++ | lg ITYPE, FOR_STEP
++ | chi ITYPE, 0; jl >5
++ | srag ITYPE, ITYPE, 47(r0)
++ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
++ } else {
++#ifdef LUA_USE_ASSERT
++ | // lg TMPR1, FOR_STOP
++ | checkinttp TMPR1, ->assert_bad_for_arg_type
++ | lg TMPR2, FOR_STEP
++ | checkinttp TMPR2, ->assert_bad_for_arg_type
++#endif
++ | lg ITYPE, FOR_STEP
++ | chi ITYPE, 0; jl >5
++ | ar RB, ITYPE; jo >1
++ | setint RB
++ | stg RB, FOR_IDX
++ }
++ | cr RB, TMPR1
++ | stg RB, FOR_EXT
++ if (op == BC_FORI) {
++ | jle >7
++ |1:
++ |6:
++ | branchPC RD
++ } else if (op == BC_JFORI) {
++ | branchPC RD
++ | llgh RD, PC_RD
++ | jle =>BC_JLOOP
++ |1:
++ |6:
++ } else if (op == BC_IFORL) {
++ | jh >7
++ |6:
++ | branchPC RD
++ |1:
++ } else {
++ | jle =>BC_JLOOP
++ |1:
++ |6:
++ }
++ |7:
++ | ins_next
++ |
++ |5: // Invert check for negative step.
++ if (!vk) {
++ | srag ITYPE, ITYPE, 47(r0)
++ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
++ } else {
++ | ar RB, ITYPE; jo <1
++ | setint RB
++ | stg RB, FOR_IDX
++ }
++ | cr RB, TMPR1
++ | stg RB, FOR_EXT
++ if (op == BC_FORI) {
++ | jhe <7
++ } else if (op == BC_JFORI) {
++ | branchPC RD
++ | llgh RD, PC_RD
++ | jhe =>BC_JLOOP
++ } else if (op == BC_IFORL) {
++ | jl <7
++ } else {
++ | jhe =>BC_JLOOP
++ }
++ | j <6
++ |9: // Fallback to FP variant.
++ if (!vk) {
++ | jhe ->vmeta_for
++ }
++ if (!vk) {
++ | lg TMPR2, FOR_STOP
++ | checknumtp TMPR2, ->vmeta_for
++ } else {
++#ifdef LUA_USE_ASSERT
++ | lg TMPR2, FOR_STOP
++ | checknumtp TMPR2, ->assert_bad_for_arg_type
++ | lg TMPR2, FOR_STEP
++ | checknumtp TMPR2, ->assert_bad_for_arg_type
++#endif
++ }
++ | lg RB, FOR_STEP
++ if (!vk) {
++ | checknum RB, ->vmeta_for
++ }
++ | ld f0, FOR_IDX
++ | ld f1, FOR_STOP
++ if (vk) {
++ | adb f0, FOR_STEP
++ | std f0, FOR_IDX
++ | cghi RB, 0; jl >3
++ } else {
++ | // TODO: need cmp here?
++ | jl >3
++ }
++ | cdbr f1, f0
++ |1:
++ | std f0, FOR_EXT
++ if (op == BC_FORI) {
++ | jnl <7
++ } else if (op == BC_JFORI) {
++ | branchPC RD
++ | llgh RD, PC_RD
++ | jnl =>BC_JLOOP
++ } else if (op == BC_IFORL) {
++ | jl <7
++ } else {
++ | jnl =>BC_JLOOP
++ }
++ | j <6
++ |
++ |3: // Invert comparison if step is negative.
++ | cdbr f0, f1
++ | j <1
+ break;
++
+ case BC_ITERL:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+
+From e467d784a927eae395e37910f92e970623af90c3 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 19 Dec 2016 16:03:21 -0500
+Subject: [PATCH 133/260] Fix floating point fallback code for for loops.
+
+Adds a dependency on clfi. Not sure how to work around (don't
+really want to always introduce a temporary).
+---
+ src/vm_s390x.dasc | 40 ++++++++++++++++++++++++++++------------
+ 1 file changed, 28 insertions(+), 12 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index c1d6f5fa7..c387975e6 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -18,6 +18,10 @@
+ |// ar0,ar1 | TLS | volatile |
+ |// ar2-ar15 | | volatile |
+ |
++|// Instructions used that are not in base z/Architecture:
++|// clfi (compare logical immediate) [requires z9-109]
++|// TODO: alternative instructions?
++|
+ |.arch s390x
+ |.section code_op, code_sub
+ |
+@@ -225,18 +229,18 @@
+ |// Macros to test operand types.
+ |.macro checktp_nc, reg, tp, target
+ | srag ITYPE, reg, 47(r0)
+-| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
++| clfi ITYPE, tp
+ | jne target
+ |.endmacro
+ |.macro checktp, reg, tp, target
+ | srag ITYPE, reg, 47(r0)
+ | cleartp reg
+-| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
++| clfi ITYPE, tp
+ | jne target
+ |.endmacro
+ |.macro checktptp, src, tp, target
+ | srag ITYPE, src, 47(r0)
+-| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
++| clfi ITYPE, tp
+ | jne target
+ |.endmacro
+ |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+@@ -245,7 +249,7 @@
+ |
+ |.macro checknumx, reg, target, jump
+ | srag ITYPE, reg, 47(r0)
+-| cghi ITYPE, LJ_TISNUM // Sign extend LJ_TISNUM tp from 16- to 64-bits.
++| clfi ITYPE, LJ_TISNUM
+ | jump target
+ |.endmacro
+ |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
+@@ -723,8 +727,19 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | lgr CARG2, RA
++ | lgr CARG1, RB
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_for // (lua_State *L, TValue *base)
++ | lg BASE, L:RB->base
++ | llgc OP, PC_OP
++ | llgc RA, PC_RA
++ | llgh RD, PC_RD
++ | sllg TMPR1, OP, 3(r0)
++ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI.
++ | br TMPR1
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+@@ -1215,8 +1230,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_KNUM:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = num const
++ | sllg RD, RD, 3(r0)
++ | ld f0, 0(RD, KBASE)
++ | sllg RA, RA, 3(r0)
++ | std f0, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_KPRI:
+ | stg r0, 0(r0)
+@@ -1720,11 +1739,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ if (vk) {
+ | adb f0, FOR_STEP
+ | std f0, FOR_IDX
+- | cghi RB, 0; jl >3
+- } else {
+- | // TODO: need cmp here?
+- | jl >3
+ }
++ | cghi RB, 0; jl >3
+ | cdbr f1, f0
+ |1:
+ | std f0, FOR_EXT
+
+From 28b6fe875982a28a61d5754a2dbd837baf9312e3 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 20 Dec 2016 17:50:29 +0530
+Subject: [PATCH 134/260] Updated encoding for instructions
+
+Have replaced the characters used for encoding with their respective addressing modes
+---
+ dynasm/dasm_s390x.lua | 1204 ++++++++++++++++++++---------------------
+ 1 file changed, 602 insertions(+), 602 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index dc1f5e5dc..a5f280d73 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -513,10 +513,12 @@ end
+
+ local function parse_mask(mask)
+ local m3 = parse_number(mask)
+- if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
+- return m3
+- else
+- werror("Mask value should be 0,1 or 3-7: ", m3)
++ if m3 then
++ if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
++ return m3
++ else
++ werror("Mask value should be 0,1 or 3-7: ", m3)
++ end
+ end
+ end
+
+@@ -573,593 +575,594 @@ end
+
+ -- Template strings for s390x instructions.
+ map_op = {
+- a_2 = "00005a000000j",
+- ar_2 = "000000001a00g",
+- ay_2 = "e3000000005al",
+- ag_2 = "e30000000008l",
+- agr_2 = "0000b9080000h",
+- agf_2 = "e30000000018l",
+- agfr_2 = "0000b9180000h",
+- axbr_2 = "0000b34a0000h",
+- adbr_2 = "0000b31a0000h",
+- aebr_2 = "0000b30a0000h",
+- aghi_2 = "0000a70b0000i",
+- ah_2 = "00004a000000j",
+- ahi_2 = "0000a70a0000i",
+- ahy_2 = "e3000000007al",
+- afi_2 = "c20900000000n",
+- agfi_2 = "c20800000000n",
+- aih_2 = "cc0800000000n",
+- al_2 = "00005e000000j",
+- alr_2 = "000000001e00g",
+- aly_2 = "e3000000005el",
+- alg_2 = "e3000000000al",
+- algr_2 = "0000b90a0000h",
+- algf_2 = "e3000000001al",
+- algfr_2 = "0000b91a0000h",
+- alfi_2 = "c20b00000000n",
+- algfi_2 = "c20a00000000n",
+- alc_2 = "e30000000098l",
+- alcr_2 = "0000b9980000h",
+- alcg_2 = "e30000000088l",
+- alcgr_2 = "0000b9880000h",
+- alsih_2 = "cc0a00000000n",
+- alsihn_2 = "cc0b00000000n",
+- axr_2 = "000000003600g",
+- ad_2 = "00006a000000j",
+- adr_2 = "000000002a00g",
+- ae_2 = "00007a000000j",
+- aer_2 = "000000003a00g",
+- aw_2 = "00006e000000j",
+- awr_2 = "000000002e00g",
+- au_2 = "00007e000000j",
+- aur_2 = "000000003e00g",
+- n_2 = "000054000000j",
+- nr_2 = "000000001400g",
+- ny_2 = "e30000000054l",
+- ng_2 = "e30000000080l",
+- ngr_2 = "0000b9800000h",
+- nihf_2 = "c00a00000000n",
+- nihh_2 = "0000a5040000i",
+- nihl_2 = "0000a5050000i",
+- nilf_2 = "c00b00000000n",
+- nilh_2 = "0000a5060000i",
+- nill_2 = "0000a5070000i",
+- bal_2 = "000045000000j",
+- balr_2 = "000000000500g",
+- bas_2 = "00004d000000j",
+- basr_2 = "000000000d00g",
+- bassm_2 = "000000000c00g",
+- bsa_2 = "0000b25a0000h",
+- bsm_2 = "000000000b00g",
+- bakr_2 = "0000b2400000h",
+- bsg_2 = "0000b2580000h",
+- bc_2 = "000047000000k",
+- bcr_2 = "000000000700g",
+- bct_2 = "000046000000j",
+- bctr_2 = "000000000600g",
+- bctg_2 = "e30000000046l",
+- bctgr_2 = "0000b9460000h",
+- bxh_3 = "000086000000q",
+- bxhg_3 = "eb0000000044s",
+- bxle_3 = "000087000000q",
+- bxleg_3 = "eb0000000045s",
+- brasl_2 = "c00500000000o",
+- brcl_2 = "c00400000000p",
+- brcth_2 = "cc0600000000o",
+- cksm_2 = "0000b2410000h",
+- km_2 = "0000b92e0000h",
+- kmf_2 = "0000b92a0000h",
+- kmc_2 = "0000b92f0000h",
+- kmo_2 = "0000b92b0000h",
+- c_2 = "000059000000j",
+- cr_2 = "000000001900g",
+- cy_2 = "e30000000059l",
+- cg_2 = "e30000000020l",
+- cgr_2 = "0000b9200000h",
+- cgf_2 = "e30000000030l",
+- cgfr_2 = "0000b9300000h",
+- cxbr_2 = "0000b3490000h",
+- cxtr_2 = "0000b3ec0000h",
+- cxr_2 = "0000b3690000h",
+- cdbr_2 = "0000b3190000h",
+- cdtr_2 = "0000b3e40000h",
+- cd_2 = "000069000000j",
+- cdr_2 = "000000002900g",
+- cebr_2 = "0000b3090000h",
+- ce_2 = "000079000000j",
+- cer_2 = "000000003900g",
+- kxbr_2 = "0000b3480000h",
+- kxtr_2 = "0000b3e80000h",
+- kdbr_2 = "0000b3180000h",
+- kdtr_2 = "0000b3e00000h",
+- kebr_2 = "0000b3080000h",
+- cs_3 = "0000ba000000q",
+- csy_3 = "eb0000000014s",
+- csg_3 = "eb0000000030s",
+- csp_2 = "0000b2500000h",
+- cspg_2 = "0000b98a0000h",
+- cextr_2 = "0000b3fc0000h",
+- cedtr_2 = "0000b3f40000h",
+- cds_3 = "0000bb000000q",
+- cdsy_3 = "eb0000000031s",
+- cdsg_3 = "eb000000003es",
+- ch_2 = "000049000000j",
+- chy_2 = "e30000000079l",
+- cgh_2 = "e30000000034l",
+- chrl_2 = "c60500000000o",
+- cghrl_2 = "c60400000000o",
+- chf_2 = "e300000000cdl",
+- chhr_2 = "0000b9cd0000h",
+- chlr_2 = "0000b9dd0000h",
+- cfi_2 = "c20d00000000n",
+- cgfi_2 = "c20c00000000n",
+- cih_2 = "cc0d00000000n",
+- cl_2 = "000055000000j",
+- clr_2 = "000000001500g",
+- cly_2 = "e30000000055l",
+- clg_2 = "e30000000021l",
+- clgr_2 = "0000b9210000h",
+- clgf_2 = "e30000000031l",
+- clgfr_2 = "0000b9310000h",
+- clmh_3 = "eb0000000020t",
+- clm_3 = "0000bd000000r",
+- clmy_3 = "eb0000000021t",
+- clhf_2 = "e300000000cfl",
+- clhhr_2 = "0000b9cf0000h",
+- clhlr_2 = "0000b9df0000h",
+- clfi_2 = "c20f00000000n",
+- clgfi_2 = "c20e00000000n",
+- clih_2 = "cc0f00000000n",
+- clcl_2 = "000000000f00g",
+- clcle_3 = "0000a9000000q",
+- clclu_3 = "eb000000008fs",
+- clrl_2 = "c60f00000000o",
+- clhrl_2 = "c60700000000o",
+- clgrl_2 = "c60a00000000o",
+- clghrl_2 = "c60600000000o",
+- clgfrl_2 = "c60e00000000o",
+- clst_2 = "0000b25d0000h",
+- crl_2 = "c60d00000000o",
+- cgrl_2 = "c60800000000o",
+- cgfrl_2 = "c60c00000000o",
+- cuse_2 = "0000b2570000h",
+- cmpsc_2 = "0000b2630000h",
+- kimd_2 = "0000b93e0000h",
+- klmd_2 = "0000b93f0000h",
+- kmac_2 = "0000b91e0000h",
+- thdr_2 = "0000b3590000h",
+- thder_2 = "0000b3580000h",
+- cxfbr_2 = "0000b3960000h",
+- cxftr_2 = "0000b9590000h",
+- cxfr_2 = "0000b3b60000h",
+- cdfbr_2 = "0000b3950000h",
+- cdftr_2 = "0000b9510000h",
+- cdfr_2 = "0000b3b50000h",
+- cefbr_2 = "0000b3940000h",
+- cefr_2 = "0000b3b40000h",
+- cxgbr_2 = "0000b3a60000h",
+- cxgtr_2 = "0000b3f90000h",
+- cxgr_2 = "0000b3c60000h",
+- cdgbr_2 = "0000b3a50000h",
+- cdgtr_2 = "0000b3f10000h",
+- cdgr_2 = "0000b3c50000h",
+- cegbr_2 = "0000b3a40000h",
+- cegr_2 = "0000b3c40000h",
+- cxstr_2 = "0000b3fb0000h",
+- cdstr_2 = "0000b3f30000h",
+- cxutr_2 = "0000b3fa0000h",
+- cdutr_2 = "0000b3f20000h",
+- cvb_2 = "00004f000000j",
+- cvby_2 = "e30000000006l",
+- cvbg_2 = "e3000000000el",
+- cvd_2 = "00004e000000j",
+- cvdy_2 = "e30000000026l",
+- cvdg_2 = "e3000000002el",
+- cuxtr_2 = "0000b3ea0000h",
+- cudtr_2 = "0000b3e20000h",
+- cu42_2 = "0000b9b30000h",
+- cu41_2 = "0000b9b20000h",
+- cpya_2 = "0000b24d0000h",
+- d_2 = "00005d000000j",
+- dr_2 = "000000001d00g",
+- dxbr_2 = "0000b34d0000h",
+- dxr_2 = "0000b22d0000h",
+- ddbr_2 = "0000b31d0000h",
+- dd_2 = "00006d000000j",
+- ddr_2 = "000000002d00g",
+- debr_2 = "0000b30d0000h",
+- de_2 = "00007d000000j",
+- der_2 = "000000003d00g",
+- dl_2 = "e30000000097l",
+- dlr_2 = "0000b9970000h",
+- dlg_2 = "e30000000087l",
+- dlgr_2 = "0000b9870000h",
+- dsg_2 = "e3000000000dl",
+- dsgr_2 = "0000b90d0000h",
+- dsgf_2 = "e3000000001dl",
+- dsgfr_2 = "0000b91d0000h",
+- x_2 = "000057000000j",
+- xr_2 = "000000001700g",
+- xy_2 = "e30000000057l",
+- xg_2 = "e30000000082l",
+- xgr_2 = "0000b9820000h",
+- xihf_2 = "c00600000000n",
+- xilf_2 = "c00700000000n",
+- ex_2 = "000044000000j",
+- exrl_2 = "c60000000000o",
+- ear_2 = "0000b24f0000h",
+- esea_2 = "0000b99d0000h",
+- eextr_2 = "0000b3ed0000h",
+- eedtr_2 = "0000b3e50000h",
+- ecag_3 = "eb000000004cs",
+- efpc_2 = "0000b38c0000h",
+- epar_2 = "0000b2260000h",
+- epair_2 = "0000b99a0000h",
+- epsw_2 = "0000b98d0000h",
+- esar_2 = "0000b2270000h",
+- esair_2 = "0000b99b0000h",
+- esxtr_2 = "0000b3ef0000h",
+- esdtr_2 = "0000b3e70000h",
+- ereg_2 = "0000b2490000h",
+- eregg_2 = "0000b90e0000h",
+- esta_2 = "0000b24a0000h",
+- flogr_2 = "0000b9830000h",
+- hdr_2 = "000000002400g",
+- her_2 = "000000003400g",
+- iac_2 = "0000b2240000h",
+- ic_2 = "000043000000j",
+- icy_2 = "e30000000073l",
+- icmh_3 = "eb0000000080t",
+- icm_3 = "0000bf000000r",
+- icmy_3 = "eb0000000081t",
+- iihf_2 = "c00800000000n",
+- iihh_2 = "0000a5000000i",
+- iihl_2 = "0000a5010000i",
+- iilf_2 = "c00900000000n",
+- iilh_2 = "0000a5020000i",
+- iill_2 = "0000a5030000i",
+- ipm_2 = "0000b2220000h",
+- iske_2 = "0000b2290000h",
+- ivsk_2 = "0000b2230000h",
+- l_2 = "000058000000j",
+- lr_2 = "000000001800g",
+- ly_2 = "e30000000058l",
+- lg_2 = "e30000000004l",
+- lgr_2 = "0000b9040000h",
+- lgf_2 = "e30000000014l",
+- lgfr_2 = "0000b9140000h",
+- lghi_2 = "0000a7090000i",
+- lxr_2 = "0000b3650000h",
+- ld_2 = "000068000000j",
+- ldr_2 = "000000002800g",
+- ldy_2 = "ed0000000065l",
+- le_2 = "000078000000j",
+- ler_2 = "000000003800g",
+- ley_2 = "ed0000000064l",
+- lam_3 = "00009a000000q",
+- lamy_3 = "eb000000009as",
+- la_2 = "000041000000j",
+- lay_2 = "e30000000071l",
+- lae_2 = "000051000000j",
+- laey_2 = "e30000000075l",
+- larl_2 = "c00000000000o",
+- laa_3 = "eb00000000f8s",
+- laag_3 = "eb00000000e8s",
+- laal_3 = "eb00000000fas",
+- laalg_3 = "eb00000000eas",
+- lan_3 = "eb00000000f4s",
+- lang_3 = "eb00000000e4s",
+- lax_3 = "eb00000000f7s",
+- laxg_3 = "eb00000000e7s",
+- lao_3 = "eb00000000f6s",
+- laog_3 = "eb00000000e6s",
+- lt_2 = "e30000000012l",
+- ltr_2 = "000000001200g",
+- ltg_2 = "e30000000002l",
+- ltgr_2 = "0000b9020000h",
+- ltgf_2 = "e30000000032l",
+- ltgfr_2 = "0000b9120000h",
+- ltxbr_2 = "0000b3420000h",
+- ltxtr_2 = "0000b3de0000h",
+- ltxr_2 = "0000b3620000h",
+- ltdbr_2 = "0000b3120000h",
+- ltdtr_2 = "0000b3d60000h",
+- ltdr_2 = "000000002200g",
+- ltebr_2 = "0000b3020000h",
+- lter_2 = "000000003200g",
+- lb_2 = "e30000000076l",
+- lbr_2 = "0000b9260000h",
+- lgb_2 = "e30000000077l",
+- lgbr_2 = "0000b9060000h",
+- lbh_2 = "e300000000c0l",
+- lcr_2 = "000000001300g",
+- lcgr_2 = "0000b9030000h",
+- lcgfr_2 = "0000b9130000h",
+- lcxbr_2 = "0000b3430000h",
+- lcxr_2 = "0000b3630000h",
+- lcdbr_2 = "0000b3130000h",
+- lcdr_2 = "000000002300g",
+- lcdfr_2 = "0000b3730000h",
+- lcebr_2 = "0000b3030000h",
+- lcer_2 = "000000003300g",
+- lctl_3 = "0000b7000000q",
+- lctlg_3 = "eb000000002fs",
+- fixr_2 = "0000b3670000h",
+- fidr_2 = "0000b37f0000h",
+- fier_2 = "0000b3770000h",
+- ldgr_2 = "0000b3c10000h",
+- lgdr_2 = "0000b3cd0000h",
+- lh_2 = "000048000000j",
+- lhr_2 = "0000b9270000h",
+- lhy_2 = "e30000000078l",
+- lgh_2 = "e30000000015l",
+- lghr_2 = "0000b9070000h",
+- lhh_2 = "e300000000c4l",
+- lhi_2 = "0000a7080000i",
+- lhrl_2 = "c40500000000o",
+- lghrl_2 = "c40400000000o",
+- lfh_2 = "e300000000cal",
+- lgfi_2 = "c00100000000n",
+- lxdbr_2 = "0000b3050000h",
+- lxdr_2 = "0000b3250000h",
+- lxebr_2 = "0000b3060000h",
+- lxer_2 = "0000b3260000h",
+- ldebr_2 = "0000b3040000h",
+- lder_2 = "0000b3240000h",
+- llgf_2 = "e30000000016l",
+- llgfr_2 = "0000b9160000h",
+- llc_2 = "e30000000094l",
+- llcr_2 = "0000b9940000h",
+- llgc_2 = "e30000000090l",
+- llgcr_2 = "0000b9840000h",
+- llch_2 = "e300000000c2l",
+- llh_2 = "e30000000095l",
+- llhr_2 = "0000b9950000h",
+- llgh_2 = "e30000000091l",
+- llghr_2 = "0000b9850000h",
+- llhh_2 = "e300000000c6l",
+- llhrl_2 = "c40200000000o",
+- llghrl_2 = "c40600000000o",
+- llihf_2 = "c00e00000000n",
+- llihh_2 = "0000a50c0000i",
+- llihl_2 = "0000a50d0000i",
+- llilf_2 = "c00f00000000n",
+- llilh_2 = "0000a50e0000i",
+- llill_2 = "0000a50f0000i",
+- llgfrl_2 = "c40e00000000o",
+- llgt_2 = "e30000000017l",
+- llgtr_2 = "0000b9170000h",
+- lm_3 = "000098000000q",
+- lmy_3 = "eb0000000098s",
+- lmg_3 = "eb0000000004s",
+- lmh_3 = "eb0000000096s",
+- lnr_2 = "000000001100g",
+- lngr_2 = "0000b9010000h",
+- lngfr_2 = "0000b9110000h",
+- lnxbr_2 = "0000b3410000h",
+- lnxr_2 = "0000b3610000h",
+- lndbr_2 = "0000b3110000h",
+- lndr_2 = "000000002100g",
+- lndfr_2 = "0000b3710000h",
+- lnebr_2 = "0000b3010000h",
+- lner_2 = "000000003100g",
+- loc_3 = "eb00000000f2t",
+- locg_3 = "eb00000000e2t",
+- lpq_2 = "e3000000008fl",
+- lpr_2 = "000000001000g",
+- lpgr_2 = "0000b9000000h",
+- lpgfr_2 = "0000b9100000h",
+- lpxbr_2 = "0000b3400000h",
+- lpxr_2 = "0000b3600000h",
+- lpdbr_2 = "0000b3100000h",
+- lpdr_2 = "000000002000g",
+- lpdfr_2 = "0000b3700000h",
+- lpebr_2 = "0000b3000000h",
+- lper_2 = "000000003000g",
+- lra_2 = "0000b1000000j",
+- lray_2 = "e30000000013l",
+- lrag_2 = "e30000000003l",
+- lrl_2 = "c40d00000000o",
+- lgrl_2 = "c40800000000o",
+- lgfrl_2 = "c40c00000000o",
+- lrvh_2 = "e3000000001fl",
+- lrv_2 = "e3000000001el",
+- lrvr_2 = "0000b91f0000h",
+- lrvg_2 = "e3000000000fl",
+- lrvgr_2 = "0000b90f0000h",
+- ldxbr_2 = "0000b3450000h",
+- ldxr_2 = "000000002500g",
+- lrdr_2 = "000000002500g",
+- lexbr_2 = "0000b3460000h",
+- lexr_2 = "0000b3660000h",
+- ledbr_2 = "0000b3440000h",
+- ledr_2 = "000000003500g",
+- lrer_2 = "000000003500g",
+- lura_2 = "0000b24b0000h",
+- lurag_2 = "0000b9050000h",
+- lzxr_2 = "0000b3760000h",
+- lzdr_2 = "0000b3750000h",
+- lzer_2 = "0000b3740000h",
+- msta_2 = "0000b2470000h",
+- mvcl_2 = "000000000e00g",
+- mvcle_3 = "0000a8000000q",
+- mvclu_3 = "eb000000008es",
+- mvpg_2 = "0000b2540000h",
+- mvst_2 = "0000b2550000h",
+- m_2 = "00005c000000j",
+- mfy_2 = "e3000000005cl",
+- mr_2 = "000000001c00g",
+- mxbr_2 = "0000b34c0000h",
+- mxr_2 = "000000002600g",
+- mdbr_2 = "0000b31c0000h",
+- md_2 = "00006c000000j",
+- mdr_2 = "000000002c00g",
+- mxdbr_2 = "0000b3070000h",
+- mxd_2 = "000067000000j",
+- mxdr_2 = "000000002700g",
+- meebr_2 = "0000b3170000h",
+- meer_2 = "0000b3370000h",
+- mdebr_2 = "0000b30c0000h",
+- mde_2 = "00007c000000j",
+- mder_2 = "000000003c00g",
+- me_2 = "00007c000000j",
+- mer_2 = "000000003c00g",
+- mh_2 = "00004c000000j",
+- mhy_2 = "e3000000007cl",
+- mlg_2 = "e30000000086l",
+- mlgr_2 = "0000b9860000h",
+- ml_2 = "e30000000096l",
+- mlr_2 = "0000b9960000h",
+- ms_2 = "000071000000j",
+- msr_2 = "0000b2520000h",
+- msy_2 = "e30000000051l",
+- msg_2 = "e3000000000cl",
+- msgr_2 = "0000b90c0000h",
+- msgf_2 = "e3000000001cl",
+- msgfr_2 = "0000b91c0000h",
+- msfi_2 = "c20100000000n",
+- msgfi_2 = "c20000000000n",
+- maer_3 = "0000b32e0000r",
++ a_2 = "00005a000000RX-a",
++ ar_2 = "000000001a00RR",
++ ay_2 = "e3000000005aRXY-a",
++ ag_2 = "e30000000008RXY-a",
++ agr_2 = "0000b9080000RRE",
++ agf_2 = "e30000000018RXY-a",
++ agfr_2 = "0000b9180000RRE",
++ axbr_2 = "0000b34a0000RRE",
++ adbr_2 = "0000b31a0000RRE",
++ aebr_2 = "0000b30a0000RRE",
++ aghi_2 = "0000a70b0000RI-a",
++ ah_2 = "00004a000000RX-a",
++ ahi_2 = "0000a70a0000RI-a",
++ ahy_2 = "e3000000007aRXY-a",
++ afi_2 = "c20900000000RIL-a",
++ agfi_2 = "c20800000000RIL-a",
++ aih_2 = "cc0800000000RIL-a",
++ al_2 = "00005e000000RX-a",
++ alr_2 = "000000001e00RR",
++ aly_2 = "e3000000005eRXY-a",
++ alg_2 = "e3000000000aRXY-a",
++ algr_2 = "0000b90a0000RRE",
++ algf_2 = "e3000000001aRXY-a",
++ algfr_2 = "0000b91a0000RRE",
++ alfi_2 = "c20b00000000RIL-a",
++ algfi_2 = "c20a00000000RIL-a",
++ alc_2 = "e30000000098RXY-a",
++ alcr_2 = "0000b9980000RRE",
++ alcg_2 = "e30000000088RXY-a",
++ alcgr_2 = "0000b9880000RRE",
++ alsih_2 = "cc0a00000000RIL-a",
++ alsihn_2 = "cc0b00000000RIL-a",
++ axr_2 = "000000003600RR",
++ ad_2 = "00006a000000RX-a",
++ adr_2 = "000000002a00RR",
++ ae_2 = "00007a000000RX-a",
++ aer_2 = "000000003a00RR",
++ aw_2 = "00006e000000RX-a",
++ awr_2 = "000000002e00RR",
++ au_2 = "00007e000000RX-a",
++ aur_2 = "000000003e00RR",
++ n_2 = "000054000000RX-a",
++ nr_2 = "000000001400RR",
++ ny_2 = "e30000000054RXY-a",
++ ng_2 = "e30000000080RXY-a",
++ ngr_2 = "0000b9800000RRE",
++ nihf_2 = "c00a00000000RIL-a",
++ nihh_2 = "0000a5040000RI-a",
++ nihl_2 = "0000a5050000RI-a",
++ nilf_2 = "c00b00000000RIL-a",
++ nilh_2 = "0000a5060000RI-a",
++ nill_2 = "0000a5070000RI-a",
++ bal_2 = "000045000000RX-a",
++ balr_2 = "000000000500RR",
++ bas_2 = "00004d000000RX-a",
++ basr_2 = "000000000d00RR",
++ bassm_2 = "000000000c00RR",
++ bsa_2 = "0000b25a0000RRE",
++ bsm_2 = "000000000b00RR",
++ bakr_2 = "0000b2400000RRE",
++ bsg_2 = "0000b2580000RRE",
++ bc_2 = "000047000000RX-b",
++ bcr_2 = "000000000700RR",
++ bct_2 = "000046000000RX-a",
++ bctr_2 = "000000000600RR",
++ bctg_2 = "e30000000046RXY-a",
++ bctgr_2 = "0000b9460000RRE",
++ bxh_3 = "000086000000RS-a",
++ bxhg_3 = "eb0000000044RSY-a",
++ bxle_3 = "000087000000RS-a",
++ bxleg_3 = "eb0000000045RSY-a",
++ brasl_2 = "c00500000000RIL-b",
++ brcl_2 = "c00400000000RIL-c",
++ brcth_2 = "cc0600000000RIL-b",
++ cksm_2 = "0000b2410000RRE",
++ km_2 = "0000b92e0000RRE",
++ kmf_2 = "0000b92a0000RRE",
++ kmc_2 = "0000b92f0000RRE",
++ kmo_2 = "0000b92b0000RRE",
++ c_2 = "000059000000RX-a",
++ cr_2 = "000000001900RR",
++ cy_2 = "e30000000059RXY-a",
++ cg_2 = "e30000000020RXY-a",
++ cgr_2 = "0000b9200000RRE",
++ cgf_2 = "e30000000030RXY-a",
++ cgfr_2 = "0000b9300000RRE",
++ cxbr_2 = "0000b3490000RRE",
++ cxtr_2 = "0000b3ec0000RRE",
++ cxr_2 = "0000b3690000RRE",
++ cdbr_2 = "0000b3190000RRE",
++ cdtr_2 = "0000b3e40000RRE",
++ cd_2 = "000069000000RX-a",
++ cdr_2 = "000000002900RR",
++ cebr_2 = "0000b3090000RRE",
++ ce_2 = "000079000000RX-a",
++ cer_2 = "000000003900RR",
++ kxbr_2 = "0000b3480000RRE",
++ kxtr_2 = "0000b3e80000RRE",
++ kdbr_2 = "0000b3180000RRE",
++ kdtr_2 = "0000b3e00000RRE",
++ kebr_2 = "0000b3080000RRE",
++ cs_3 = "0000ba000000RS-a",
++ csy_3 = "eb0000000014RSY-a",
++ csg_3 = "eb0000000030RSY-a",
++ csp_2 = "0000b2500000RRE",
++ cspg_2 = "0000b98a0000RRE",
++ cextr_2 = "0000b3fc0000RRE",
++ cedtr_2 = "0000b3f40000RRE",
++ cds_3 = "0000bb000000RS-a",
++ cdsy_3 = "eb0000000031RSY-a",
++ cdsg_3 = "eb000000003eRSY-a",
++ ch_2 = "000049000000RX-a",
++ chy_2 = "e30000000079RXY-a",
++ cgh_2 = "e30000000034RXY-a",
++ chrl_2 = "c60500000000RIL-b",
++ cghrl_2 = "c60400000000RIL-b",
++ chf_2 = "e300000000cdRXY-a",
++ chhr_2 = "0000b9cd0000RRE",
++ chlr_2 = "0000b9dd0000RRE",
++ cfi_2 = "c20d00000000RIL-a",
++ cgfi_2 = "c20c00000000RIL-a",
++ cih_2 = "cc0d00000000RIL-a",
++ cl_2 = "000055000000RX-a",
++ clr_2 = "000000001500RR",
++ cly_2 = "e30000000055RXY-a",
++ clg_2 = "e30000000021RXY-a",
++ clgr_2 = "0000b9210000RRE",
++ clgf_2 = "e30000000031RXY-a",
++ clgfr_2 = "0000b9310000RRE",
++ clmh_3 = "eb0000000020RSY-b",
++ clm_3 = "0000bd000000RS-b",
++ clmy_3 = "eb0000000021RSY-b",
++ clhf_2 = "e300000000cfRXY-a",
++ clhhr_2 = "0000b9cf0000RRE",
++ clhlr_2 = "0000b9df0000RRE",
++ clfi_2 = "c20f00000000RIL-a",
++ clgfi_2 = "c20e00000000RIL-a",
++ clih_2 = "cc0f00000000RIL-a",
++ clcl_2 = "000000000f00RR",
++ clcle_3 = "0000a9000000RS-a",
++ clclu_3 = "eb000000008fRSY-a",
++ clrl_2 = "c60f00000000RIL-b",
++ clhrl_2 = "c60700000000RIL-b",
++ clgrl_2 = "c60a00000000RIL-b",
++ clghrl_2 = "c60600000000RIL-b",
++ clgfrl_2 = "c60e00000000RIL-b",
++ clst_2 = "0000b25d0000RRE",
++ crl_2 = "c60d00000000RIL-b",
++ cgrl_2 = "c60800000000RIL-b",
++ cgfrl_2 = "c60c00000000RIL-b",
++ cuse_2 = "0000b2570000RRE",
++ cmpsc_2 = "0000b2630000RRE",
++ kimd_2 = "0000b93e0000RRE",
++ klmd_2 = "0000b93f0000RRE",
++ kmac_2 = "0000b91e0000RRE",
++ thdr_2 = "0000b3590000RRE",
++ thder_2 = "0000b3580000RRE",
++ cxfbr_2 = "0000b3960000RRE",
++ cxftr_2 = "0000b9590000RRE",
++ cxfr_2 = "0000b3b60000RRE",
++ cdfbr_2 = "0000b3950000RRE",
++ cdftr_2 = "0000b9510000RRE",
++ cdfr_2 = "0000b3b50000RRE",
++ cefbr_2 = "0000b3940000RRE",
++ cefr_2 = "0000b3b40000RRE",
++ cxgbr_2 = "0000b3a60000RRE",
++ cxgtr_2 = "0000b3f90000RRE",
++ cxgr_2 = "0000b3c60000RRE",
++ cdgbr_2 = "0000b3a50000RRE",
++ cdgtr_2 = "0000b3f10000RRE",
++ cdgr_2 = "0000b3c50000RRE",
++ cegbr_2 = "0000b3a40000RRE",
++ cegr_2 = "0000b3c40000RRE",
++ cxstr_2 = "0000b3fb0000RRE",
++ cdstr_2 = "0000b3f30000RRE",
++ cxutr_2 = "0000b3fa0000RRE",
++ cdutr_2 = "0000b3f20000RRE",
++ cvb_2 = "00004f000000RX-a",
++ cvby_2 = "e30000000006RXY-a",
++ cvbg_2 = "e3000000000eRXY-a",
++ cvd_2 = "00004e000000RX-a",
++ cvdy_2 = "e30000000026RXY-a",
++ cvdg_2 = "e3000000002eRXY-a",
++ cuxtr_2 = "0000b3ea0000RRE",
++ cudtr_2 = "0000b3e20000RRE",
++ cu42_2 = "0000b9b30000RRE",
++ cu41_2 = "0000b9b20000RRE",
++ cpya_2 = "0000b24d0000RRE",
++ d_2 = "00005d000000RX-a",
++ dr_2 = "000000001d00RR",
++ dxbr_2 = "0000b34d0000RRE",
++ dxr_2 = "0000b22d0000RRE",
++ ddbr_2 = "0000b31d0000RRE",
++ dd_2 = "00006d000000RX-a",
++ ddr_2 = "000000002d00RR",
++ debr_2 = "0000b30d0000RRE",
++ de_2 = "00007d000000RX-a",
++ der_2 = "000000003d00RR",
++ dl_2 = "e30000000097RXY-a",
++ dlr_2 = "0000b9970000RRE",
++ dlg_2 = "e30000000087RXY-a",
++ dlgr_2 = "0000b9870000RRE",
++ dsg_2 = "e3000000000dRXY-a",
++ dsgr_2 = "0000b90d0000RRE",
++ dsgf_2 = "e3000000001dRXY-a",
++ dsgfr_2 = "0000b91d0000RRE",
++ x_2 = "000057000000RX-a",
++ xr_2 = "000000001700RR",
++ xy_2 = "e30000000057RXY-a",
++ xg_2 = "e30000000082RXY-a",
++ xgr_2 = "0000b9820000RRE",
++ xihf_2 = "c00600000000RIL-a",
++ xilf_2 = "c00700000000RIL-a",
++ ex_2 = "000044000000RX-a",
++ exrl_2 = "c60000000000RIL-b",
++ ear_2 = "0000b24f0000RRE",
++ esea_2 = "0000b99d0000RRE",
++ eextr_2 = "0000b3ed0000RRE",
++ eedtr_2 = "0000b3e50000RRE",
++ ecag_3 = "eb000000004cRSY-a",
++ efpc_2 = "0000b38c0000RRE",
++ epar_2 = "0000b2260000RRE",
++ epair_2 = "0000b99a0000RRE",
++ epsw_2 = "0000b98d0000RRE",
++ esar_2 = "0000b2270000RRE",
++ esair_2 = "0000b99b0000RRE",
++ esxtr_2 = "0000b3ef0000RRE",
++ esdtr_2 = "0000b3e70000RRE",
++ ereg_2 = "0000b2490000RRE",
++ eregg_2 = "0000b90e0000RRE",
++ esta_2 = "0000b24a0000RRE",
++ flogr_2 = "0000b9830000RRE",
++ hdr_2 = "000000002400RR",
++ her_2 = "000000003400RR",
++ iac_2 = "0000b2240000RRE",
++ ic_2 = "000043000000RX-a",
++ icy_2 = "e30000000073RXY-a",
++ icmh_3 = "eb0000000080RSY-b",
++ icm_3 = "0000bf000000RS-b",
++ icmy_3 = "eb0000000081RSY-b",
++ iihf_2 = "c00800000000RIL-a",
++ iihh_2 = "0000a5000000RI-a",
++ iihl_2 = "0000a5010000RI-a",
++ iilf_2 = "c00900000000RIL-a",
++ iilh_2 = "0000a5020000RI-a",
++ iill_2 = "0000a5030000RI-a",
++ ipm_2 = "0000b2220000RRE",
++ iske_2 = "0000b2290000RRE",
++ ivsk_2 = "0000b2230000RRE",
++ l_2 = "000058000000RX-a",
++ lr_2 = "000000001800RR",
++ ly_2 = "e30000000058RXY-a",
++ lg_2 = "e30000000004RXY-a",
++ lgr_2 = "0000b9040000RRE",
++ lgf_2 = "e30000000014RXY-a",
++ lgfr_2 = "0000b9140000RRE",
++ lghi_2 = "0000a7090000RI-a",
++ lxr_2 = "0000b3650000RRE",
++ ld_2 = "000068000000RX-a",
++ ldr_2 = "000000002800RR",
++ ldy_2 = "ed0000000065RXY-a",
++ le_2 = "000078000000RX-a",
++ ler_2 = "000000003800RR",
++ ley_2 = "ed0000000064RXY-a",
++ lam_3 = "00009a000000RS-a",
++ lamy_3 = "eb000000009aRSY-a",
++ la_2 = "000041000000RX-a",
++ lay_2 = "e30000000071RXY-a",
++ lae_2 = "000051000000RX-a",
++ laey_2 = "e30000000075RXY-a",
++ larl_2 = "c00000000000RIL-b",
++ laa_3 = "eb00000000f8RSY-a",
++ laag_3 = "eb00000000e8RSY-a",
++ laal_3 = "eb00000000faRSY-a",
++ laalg_3 = "eb00000000eaRSY-a",
++ lan_3 = "eb00000000f4RSY-a",
++ lang_3 = "eb00000000e4RSY-a",
++ lax_3 = "eb00000000f7RSY-a",
++ laxg_3 = "eb00000000e7RSY-a",
++ lao_3 = "eb00000000f6RSY-a",
++ laog_3 = "eb00000000e6RSY-a",
++ lt_2 = "e30000000012RXY-a",
++ ltr_2 = "000000001200RR",
++ ltg_2 = "e30000000002RXY-a",
++ ltgr_2 = "0000b9020000RRE",
++ ltgf_2 = "e30000000032RXY-a",
++ ltgfr_2 = "0000b9120000RRE",
++ ltxbr_2 = "0000b3420000RRE",
++ ltxtr_2 = "0000b3de0000RRE",
++ ltxr_2 = "0000b3620000RRE",
++ ltdbr_2 = "0000b3120000RRE",
++ ltdtr_2 = "0000b3d60000RRE",
++ ltdr_2 = "000000002200RR",
++ ltebr_2 = "0000b3020000RRE",
++ lter_2 = "000000003200RR",
++ lb_2 = "e30000000076RXY-a",
++ lbr_2 = "0000b9260000RRE",
++ lgb_2 = "e30000000077RXY-a",
++ lgbr_2 = "0000b9060000RRE",
++ lbh_2 = "e300000000c0RXY-a",
++ lcr_2 = "000000001300RR",
++ lcgr_2 = "0000b9030000RRE",
++ lcgfr_2 = "0000b9130000RRE",
++ lcxbr_2 = "0000b3430000RRE",
++ lcxr_2 = "0000b3630000RRE",
++ lcdbr_2 = "0000b3130000RRE",
++ lcdr_2 = "000000002300RR",
++ lcdfr_2 = "0000b3730000RRE",
++ lcebr_2 = "0000b3030000RRE",
++ lcer_2 = "000000003300RR",
++ lctl_3 = "0000b7000000RS-a",
++ lctlg_3 = "eb000000002fRSY-a",
++ fixr_2 = "0000b3670000RRE",
++ fidr_2 = "0000b37f0000RRE",
++ fier_2 = "0000b3770000RRE",
++ ldgr_2 = "0000b3c10000RRE",
++ lgdr_2 = "0000b3cd0000RRE",
++ lh_2 = "000048000000RX-a",
++ lhr_2 = "0000b9270000RRE",
++ lhy_2 = "e30000000078RXY-a",
++ lgh_2 = "e30000000015RXY-a",
++ lghr_2 = "0000b9070000RRE",
++ lhh_2 = "e300000000c4RXY-a",
++ lhi_2 = "0000a7080000RI-a",
++ lhrl_2 = "c40500000000RIL-b",
++ lghrl_2 = "c40400000000RIL-b",
++ lfh_2 = "e300000000caRXY-a",
++ lgfi_2 = "c00100000000RIL-a",
++ lxdbr_2 = "0000b3050000RRE",
++ lxdr_2 = "0000b3250000RRE",
++ lxebr_2 = "0000b3060000RRE",
++ lxer_2 = "0000b3260000RRE",
++ ldebr_2 = "0000b3040000RRE",
++ lder_2 = "0000b3240000RRE",
++ llgf_2 = "e30000000016RXY-a",
++ llgfr_2 = "0000b9160000RRE",
++ llc_2 = "e30000000094RXY-a",
++ llcr_2 = "0000b9940000RRE",
++ llgc_2 = "e30000000090RXY-a",
++ llgcr_2 = "0000b9840000RRE",
++ llch_2 = "e300000000c2RXY-a",
++ llh_2 = "e30000000095RXY-a",
++ llhr_2 = "0000b9950000RRE",
++ llgh_2 = "e30000000091RXY-a",
++ llghr_2 = "0000b9850000RRE",
++ llhh_2 = "e300000000c6RXY-a",
++ llhrl_2 = "c40200000000RIL-b",
++ llghrl_2 = "c40600000000RIL-b",
++ llihf_2 = "c00e00000000RIL-a",
++ llihh_2 = "0000a50c0000RI-a",
++ llihl_2 = "0000a50d0000RI-a",
++ llilf_2 = "c00f00000000RIL-a",
++ llilh_2 = "0000a50e0000RI-a",
++ llill_2 = "0000a50f0000RI-a",
++ llgfrl_2 = "c40e00000000RIL-b",
++ llgt_2 = "e30000000017RXY-a",
++ llgtr_2 = "0000b9170000RRE",
++ lm_3 = "000098000000RS-a",
++ lmy_3 = "eb0000000098RSY-a",
++ lmg_3 = "eb0000000004RSY-a",
++ lmh_3 = "eb0000000096RSY-a",
++ lnr_2 = "000000001100RR",
++ lngr_2 = "0000b9010000RRE",
++ lngfr_2 = "0000b9110000RRE",
++ lnxbr_2 = "0000b3410000RRE",
++ lnxr_2 = "0000b3610000RRE",
++ lndbr_2 = "0000b3110000RRE",
++ lndr_2 = "000000002100RR",
++ lndfr_2 = "0000b3710000RRE",
++ lnebr_2 = "0000b3010000RRE",
++ lner_2 = "000000003100RR",
++ loc_3 = "eb00000000f2RSY-b",
++ locg_3 = "eb00000000e2RSY-b",
++ lpq_2 = "e3000000008fRXY-a",
++ lpr_2 = "000000001000RR",
++ lpgr_2 = "0000b9000000RRE",
++ lpgfr_2 = "0000b9100000RRE",
++ lpxbr_2 = "0000b3400000RRE",
++ lpxr_2 = "0000b3600000RRE",
++ lpdbr_2 = "0000b3100000RRE",
++ lpdr_2 = "000000002000RR",
++ lpdfr_2 = "0000b3700000RRE",
++ lpebr_2 = "0000b3000000RRE",
++ lper_2 = "000000003000RR",
++ lra_2 = "0000b1000000RX-a",
++ lray_2 = "e30000000013RXY-a",
++ lrag_2 = "e30000000003RXY-a",
++ lrl_2 = "c40d00000000RIL-b",
++ lgrl_2 = "c40800000000RIL-b",
++ lgfrl_2 = "c40c00000000RIL-b",
++ lrvh_2 = "e3000000001fRXY-a",
++ lrv_2 = "e3000000001eRXY-a",
++ lrvr_2 = "0000b91f0000RRE",
++ lrvg_2 = "e3000000000fRXY-a",
++ lrvgr_2 = "0000b90f0000RRE",
++ ldxbr_2 = "0000b3450000RRE",
++ ldxr_2 = "000000002500RR",
++ lrdr_2 = "000000002500RR",
++ lexbr_2 = "0000b3460000RRE",
++ lexr_2 = "0000b3660000RRE",
++ ledbr_2 = "0000b3440000RRE",
++ ledr_2 = "000000003500RR",
++ lrer_2 = "000000003500RR",
++ lura_2 = "0000b24b0000RRE",
++ lurag_2 = "0000b9050000RRE",
++ lzxr_2 = "0000b3760000RRE",
++ lzdr_2 = "0000b3750000RRE",
++ lzer_2 = "0000b3740000RRE",
++ msta_2 = "0000b2470000RRE",
++ mvcl_2 = "000000000e00RR",
++ mvcle_3 = "0000a8000000RS-a",
++ mvclu_3 = "eb000000008eRSY-a",
++ mvpg_2 = "0000b2540000RRE",
++ mvst_2 = "0000b2550000RRE",
++ m_2 = "00005c000000RX-a",
++ mfy_2 = "e3000000005cRXY-a",
++ mr_2 = "000000001c00RR",
++ mxbr_2 = "0000b34c0000RRE",
++ mxr_2 = "000000002600RR",
++ mdbr_2 = "0000b31c0000RRE",
++ md_2 = "00006c000000RX-a",
++ mdr_2 = "000000002c00RR",
++ mxdbr_2 = "0000b3070000RRE",
++ mxd_2 = "000067000000RX-a",
++ mxdr_2 = "000000002700RR",
++ meebr_2 = "0000b3170000RRE",
++ meer_2 = "0000b3370000RRE",
++ mdebr_2 = "0000b30c0000RRE",
++ mde_2 = "00007c000000RX-a",
++ mder_2 = "000000003c00RR",
++ me_2 = "00007c000000RX-a",
++ mer_2 = "000000003c00RR",
++ mh_2 = "00004c000000RX-a",
++ mhy_2 = "e3000000007cRXY-a",
++ mlg_2 = "e30000000086RXY-a",
++ mlgr_2 = "0000b9860000RRE",
++ ml_2 = "e30000000096RXY-a",
++ mlr_2 = "0000b9960000RRE",
++ ms_2 = "000071000000RX-a",
++ msr_2 = "0000b2520000RRE",
++ msy_2 = "e30000000051RXY-a",
++ msg_2 = "e3000000000cRXY-a",
++ msgr_2 = "0000b90c0000RRE",
++ msgf_2 = "e3000000001cRXY-a",
++ msgfr_2 = "0000b91c0000RRE",
++ msfi_2 = "c20100000000RIL-a",
++ msgfi_2 = "c20000000000RIL-a",
++ maer_3 = "0000b32e0000RRD",
+ mvhhi_2 = "e54400000000SIL",
+ mvhi_2 = "e54c00000000SIL",
+ mvghi_2 = "e54800000000SIL",
+- o_2 = "000056000000j",
+- or_2 = "000000001600g",
+- oy_2 = "e30000000056l",
+- og_2 = "e30000000081l",
+- ogr_2 = "0000b9810000h",
+- oihf_2 = "c00c00000000n",
+- oihh_2 = "0000a5080000i",
+- oihl_2 = "0000a5090000i",
+- oilf_2 = "c00d00000000n",
+- oilh_2 = "0000a50a0000i",
+- oill_2 = "0000a50b0000i",
+- pgin_2 = "0000b22e0000h",
+- pgout_2 = "0000b22f0000h",
+- pcc_2 = "0000b92c0000h",
+- pckmo_2 = "0000b9280000h",
+- pfmf_2 = "0000b9af0000h",
+- ptf_2 = "0000b9a20000h",
+- popcnt_2 = "0000b9e10000h",
++ o_2 = "000056000000RX-a",
++ or_2 = "000000001600RR",
++ oy_2 = "e30000000056RXY-a",
++ og_2 = "e30000000081RXY-a",
++ ogr_2 = "0000b9810000RRE",
++ oihf_2 = "c00c00000000RIL-a",
++ oihh_2 = "0000a5080000RI-a",
++ oihl_2 = "0000a5090000RI-a",
++ oilf_2 = "c00d00000000RIL-a",
++ oilh_2 = "0000a50a0000RI-a",
++ oill_2 = "0000a50b0000RI-a",
++ pgin_2 = "0000b22e0000RRE",
++ pgout_2 = "0000b22f0000RRE",
++ pcc_2 = "0000b92c0000RRE",
++ pckmo_2 = "0000b9280000RRE",
++ pfmf_2 = "0000b9af0000RRE",
++ ptf_2 = "0000b9a20000RRE",
++ popcnt_2 = "0000b9e10000RRE",
+ pfd_2 = "e30000000036m",
+- pfdrl_2 = "c60200000000p",
+- pt_2 = "0000b2280000h",
+- pti_2 = "0000b99e0000h",
+- palb_2 = "0000b2480000h",
+- rrbe_2 = "0000b22a0000h",
+- rrbm_2 = "0000b9ae0000h",
+- rll_3 = "eb000000001ds",
+- rllg_3 = "eb000000001cs",
+- srst_2 = "0000b25e0000h",
+- srstu_2 = "0000b9be0000h",
+- sar_2 = "0000b24e0000h",
+- sfpc_2 = "0000b3840000h",
+- sfasr_2 = "0000b3850000h",
+- spm_2 = "000000000400g",
+- ssar_2 = "0000b2250000h",
+- ssair_2 = "0000b99f0000h",
+- slda_3 = "00008f000000q",
+- sldl_3 = "00008d000000q",
+- sla_3 = "00008b000000q",
+- slak_3 = "eb00000000dds",
+- slag_3 = "eb000000000bs",
+- sll_3 = "000089000000q",
+- sllk_3 = "eb00000000dfs",
+- sllg_3 = "eb000000000ds",
+- srda_3 = "00008e000000q",
+- srdl_3 = "00008c000000q",
+- sra_3 = "00008a000000q",
+- srak_3 = "eb00000000dcs",
+- srag_3 = "eb000000000as",
+- srl_3 = "000088000000q",
+- srlk_3 = "eb00000000des",
+- srlg_3 = "eb000000000cs",
+- sqxbr_2 = "0000b3160000h",
+- sqxr_2 = "0000b3360000h",
+- sqdbr_2 = "0000b3150000h",
+- sqdr_2 = "0000b2440000h",
+- sqebr_2 = "0000b3140000h",
+- sqer_2 = "0000b2450000h",
+- st_2 = "000050000000j",
+- sty_2 = "e30000000050l",
+- stg_2 = "e30000000024l",
+- std_2 = "000060000000j",
+- stdy_2 = "ed0000000067l",
+- ste_2 = "000070000000j",
+- stey_2 = "ed0000000066l",
+- stam_3 = "00009b000000q",
+- stamy_3 = "eb000000009bs",
+- stc_2 = "000042000000j",
+- stcy_2 = "e30000000072l",
+- stch_2 = "e300000000c3l",
+- stcmh_3 = "eb000000002ct",
+- stcm_3 = "0000be000000r",
+- stcmy_3 = "eb000000002dt",
+- stctl_3 = "0000b6000000q",
+- stctg_3 = "eb0000000025s",
+- sth_2 = "000040000000j",
+- sthy_2 = "e30000000070l",
+- sthh_2 = "e300000000c7l",
+- sthrl_2 = "c40700000000o",
+- stfh_2 = "e300000000cbl",
+- stm_3 = "000090000000q",
+- stmy_3 = "eb0000000090s",
+- stmg_3 = "eb0000000024s",
+- stmh_3 = "eb0000000026s",
+- stoc_3 = "eb00000000f3t",
+- stocg_3 = "eb00000000e3t",
+- stpq_2 = "e3000000008el",
+- strl_2 = "c40f00000000o",
+- stgrl_2 = "c40b00000000o",
+- strvh_2 = "e3000000003fl",
+- strv_2 = "e3000000003el",
+- strvg_2 = "e3000000002fl",
+- stura_2 = "0000b2460000h",
+- sturg_2 = "0000b9250000h",
+- s_2 = "00005b000000j",
+- sr_2 = "000000001b00g",
+- sy_2 = "e3000000005bl",
+- sg_2 = "e30000000009l",
+- sgr_2 = "0000b9090000h",
+- sgf_2 = "e30000000019l",
+- sgfr_2 = "0000b9190000h",
+- sxbr_2 = "0000b34b0000h",
+- sdbr_2 = "0000b31b0000h",
+- sebr_2 = "0000b30b0000h",
+- sh_2 = "00004b000000j",
+- shy_2 = "e3000000007bl",
+- sl_2 = "00005f000000j",
+- slr_2 = "000000001f00g",
+- sly_2 = "e3000000005fl",
+- slg_2 = "e3000000000bl",
+- slgr_2 = "0000b90b0000h",
+- slgf_2 = "e3000000001bl",
+- slgfr_2 = "0000b91b0000h",
+- slfi_2 = "c20500000000n",
+- slgfi_2 = "c20400000000n",
+- slb_2 = "e30000000099l",
+- slbr_2 = "0000b9990000h",
+- slbg_2 = "e30000000089l",
+- slbgr_2 = "0000b9890000h",
+- sxr_2 = "000000003700g",
+- sd_2 = "00006b000000j",
+- sdr_2 = "000000002b00g",
+- se_2 = "00007b000000j",
+- ser_2 = "000000003b00g",
+- su_2 = "00007f000000j",
+- sur_2 = "000000003f00g",
+- sw_2 = "00006f000000j",
+- swr_2 = "000000002f00g",
+- tar_2 = "0000b24c0000h",
+- tb_2 = "0000b22c0000h",
+- tmhh_2 = "0000a7020000i",
+- tmhl_2 = "0000a7030000i",
+- tmlh_2 = "0000a7000000i",
+- tmll_2 = "0000a7010000i",
+- trace_3 = "000099000000q",
+- tracg_3 = "eb000000000fs",
+- tre_2 = "0000b2a50000h",
++ pfdrl_2 = "c60200000000RIL-c",
++ pt_2 = "0000b2280000RRE",
++ pti_2 = "0000b99e0000RRE",
++ palb_2 = "0000b2480000RRE",
++ rrbe_2 = "0000b22a0000RRE",
++ rrbm_2 = "0000b9ae0000RRE",
++ rll_3 = "eb000000001dRSY-a",
++ rllg_3 = "eb000000001cRSY-a",
++ srst_2 = "0000b25e0000RRE",
++ srstu_2 = "0000b9be0000RRE",
++ sar_2 = "0000b24e0000RRE",
++ sfpc_2 = "0000b3840000RRE",
++ sfasr_2 = "0000b3850000RRE",
++ spm_2 = "000000000400RR",
++ ssar_2 = "0000b2250000RRE",
++ ssair_2 = "0000b99f0000RRE",
++ slda_3 = "00008f000000RS-a",
++ sldl_3 = "00008d000000RS-a",
++ sla_3 = "00008b000000RS-a",
++ slak_3 = "eb00000000ddRSY-a",
++ slag_3 = "eb000000000bRSY-a",
++ sll_3 = "000089000000RS-a",
++ sllk_3 = "eb00000000dfRSY-a",
++ sllg_3 = "eb000000000dRSY-a",
++ srda_3 = "00008e000000RS-a",
++ srdl_3 = "00008c000000RS-a",
++ sra_3 = "00008a000000RS-a",
++ srak_3 = "eb00000000dcRSY-a",
++ srag_3 = "eb000000000aRSY-a",
++ srl_3 = "000088000000RS-a",
++ srlk_3 = "eb00000000deRSY-a",
++ srlg_3 = "eb000000000cRSY-a",
++ sqxbr_2 = "0000b3160000RRE",
++ sqxr_2 = "0000b3360000RRE",
++ sqdbr_2 = "0000b3150000RRE",
++ sqdr_2 = "0000b2440000RRE",
++ sqebr_2 = "0000b3140000RRE",
++ sqer_2 = "0000b2450000RRE",
++ st_2 = "000050000000RX-a",
++ sty_2 = "e30000000050RXY-a",
++ stg_2 = "e30000000024RXY-a",
++ std_2 = "000060000000RX-a",
++ stdy_2 = "ed0000000067RXY-a",
++ ste_2 = "000070000000RX-a",
++ stey_2 = "ed0000000066RXY-a",
++ stam_3 = "00009b000000RS-a",
++ stamy_3 = "eb000000009bRSY-a",
++ stc_2 = "000042000000RX-a",
++ stcy_2 = "e30000000072RXY-a",
++ stch_2 = "e300000000c3RXY-a",
++ stcmh_3 = "eb000000002cRSY-b",
++ stcm_3 = "0000be000000RS-b",
++ stcmy_3 = "eb000000002dRSY-b",
++ stctl_3 = "0000b6000000RS-a",
++ stctg_3 = "eb0000000025RSY-a",
++ sth_2 = "000040000000RX-a",
++ sthy_2 = "e30000000070RXY-a",
++ sthh_2 = "e300000000c7RXY-a",
++ sthrl_2 = "c40700000000RIL-b",
++ stfh_2 = "e300000000cbRXY-a",
++ stm_3 = "000090000000RS-a",
++ stmy_3 = "eb0000000090RSY-a",
++ stmg_3 = "eb0000000024RSY-a",
++ stmh_3 = "eb0000000026RSY-a",
++ stoc_3 = "eb00000000f3RSY-b",
++ stocg_3 = "eb00000000e3RSY-b",
++ stpq_2 = "e3000000008eRXY-a",
++ strl_2 = "c40f00000000RIL-b",
++ stgrl_2 = "c40b00000000RIL-b",
++ strvh_2 = "e3000000003fRXY-a",
++ strv_2 = "e3000000003eRXY-a",
++ strvg_2 = "e3000000002fRXY-a",
++ stura_2 = "0000b2460000RRE",
++ sturg_2 = "0000b9250000RRE",
++ s_2 = "00005b000000RX-a",
++ sr_2 = "000000001b00RR",
++ sy_2 = "e3000000005bRXY-a",
++ sg_2 = "e30000000009RXY-a",
++ sgr_2 = "0000b9090000RRE",
++ sgf_2 = "e30000000019RXY-a",
++ sgfr_2 = "0000b9190000RRE",
++ sxbr_2 = "0000b34b0000RRE",
++ sdbr_2 = "0000b31b0000RRE",
++ sebr_2 = "0000b30b0000RRE",
++ sh_2 = "00004b000000RX-a",
++ shy_2 = "e3000000007bRXY-a",
++ sl_2 = "00005f000000RX-a",
++ slr_2 = "000000001f00RR",
++ sly_2 = "e3000000005fRXY-a",
++ slg_2 = "e3000000000bRXY-a",
++ slgr_2 = "0000b90b0000RRE",
++ slgf_2 = "e3000000001bRXY-a",
++ slgfr_2 = "0000b91b0000RRE",
++ slfi_2 = "c20500000000RIL-a",
++ slgfi_2 = "c20400000000RIL-a",
++ slb_2 = "e30000000099RXY-a",
++ slbr_2 = "0000b9990000RRE",
++ slbg_2 = "e30000000089RXY-a",
++ slbgr_2 = "0000b9890000RRE",
++ sxr_2 = "000000003700RR",
++ sd_2 = "00006b000000RX-a",
++ sdr_2 = "000000002b00RR",
++ se_2 = "00007b000000RX-a",
++ ser_2 = "000000003b00RR",
++ su_2 = "00007f000000RX-a",
++ sur_2 = "000000003f00RR",
++ sw_2 = "00006f000000RX-a",
++ swr_2 = "000000002f00RR",
++ tar_2 = "0000b24c0000RRE",
++ tb_2 = "0000b22c0000RRE",
++ tmhh_2 = "0000a7020000RI-a",
++ tmhl_2 = "0000a7030000RI-a",
++ tmlh_2 = "0000a7000000RI-a",
++ tmll_2 = "0000a7010000RI-a",
++ trace_3 = "000099000000RS-a",
++ tracg_3 = "eb000000000fRSY-a",
++ tre_2 = "0000b2a50000RRE",
++
+
+ -- SS-a instructions
+ clc_2 = "d50000000000SS-a",
+@@ -1252,24 +1255,24 @@ local function parse_template(params, template, nparams, pos)
+
+ -- Process each character.
+ local p = sub(template, 13)
+- if p == "g" then
++ if p == "RR" then
+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+ wputhw(op2)
+- elseif p == "h" then
++ elseif p == "RRE" then
+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+ wputhw(op1); wputhw(op2)
+- elseif p == "i" then
++ elseif p == "RI-a" then
+ op1 = op1 + shl(parse_reg(params[1]),4)
+ wputhw(op1);
+ parse_imm16(params[2])
+- elseif p == "j" then
++ elseif p == "RX-a" then
+ local d, x, b, a = parse_mem_bx(params[2])
+ op1 = op1 + shl(parse_reg(params[1]), 4) + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+ if a then a() end
+- elseif p == "k" then
+- elseif p == "l" then
++ elseif p == "RX-b" then
++ elseif p == "RXY-a" then
+ local d, x, b, a = parse_mem_bxy(params[2])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+@@ -1278,25 +1281,22 @@ local function parse_template(params, template, nparams, pos)
+ if a then a() end
+ elseif p == "m" then
+
+- elseif p == "n" then
++ elseif p == "RIL-a" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+ parse_imm32(params[2])
+- elseif p == "o" then
++ elseif p == "RIL-b" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
+- elseif p == "q" then
++ elseif p == "RS-a" then
+ local d, b, a = parse_mem_b(params[3])
+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+ if a then a() end -- a() emits action.
+- elseif p == "r" then
+- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
+- wputhw(op1); wputhw(op2)
+- elseif p == "s" then
++ elseif p == "RSY-a" then
+ local d, b, a = parse_mem_by(params[3])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+@@ -1357,13 +1357,13 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1);
+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + parse_mask(params[4])
+ wputhw(op2)
+- elseif p =="sS" then
++ elseif p =="S" then
+ wputhw(op1);
+ local d, b, a = parse_mem_b(params[1])
+ op2 = op2 + shl(b,12) + d;
+ wputhw(op2)
+ if a then a() end
+- elseif p =="iI" then
++ elseif p =="I" then
+ local imm_val, a = parse_imm8(params[1])
+ op2 = op2 + imm_val;
+ wputhw(op2);
+
+From 5076a3ee2e1eaa4a053c843037d0d87b1577bc02 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 09:27:40 -0500
+Subject: [PATCH 135/260] Fix some DynASM instructions.
+
+---
+ dynasm/dasm_s390x.lua | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index a5f280d73..9012c3a65 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1202,15 +1202,15 @@ map_op = {
+ -- RRF-b instructions
+ didbr_4 = "0000b3580000RRF-b",
+ -- S mode instructions
+- stfl_1 = "0000b2b10000sS",
++ stfl_1 = "0000b2b10000S",
+ -- I- mode instructions
+- svc_1 = "000000000a00iI",
++ svc_1 = "000000000a00I",
+ -- RI-a mode instructions
+ -- TODO: change "i" to "RI-a"
+- chi_2 = "0000a70e0000i",
+- cghi_2 = "0000a70f0000i",
+- mhi_2 = "0000a70c0000i",
+- mghi_2 = "0000a70d0000i",
++ chi_2 = "0000a70e0000RI-a",
++ cghi_2 = "0000a70f0000RI-a",
++ mhi_2 = "0000a70c0000RI-a",
++ mghi_2 = "0000a70d0000RI-a",
+ -- RI-b mode instructions
+ bras_2 = "0000a7050000RI-b",
+ -- RI-c mode instructions
+
+From 5df5e1f144b785f97e678b52411f8e91e7dbc16f Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 09:41:19 -0500
+Subject: [PATCH 136/260] Add support for integer add/subtract.
+
+Still need to support floating point operations. Multiplication is a
+little more complicated because it doesn't set the overflow flag.
+---
+ src/vm_s390x.dasc | 53 +++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 49 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index c387975e6..ea554c7f5 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1172,13 +1172,58 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
++ /* -- Binary ops -------------------------------------------------------- */
++
++ |.macro ins_arithpre
++ | ins_ABC
++ | sllg RB, RB, 3(r0)
++ | sllg RC, RC, 3(r0)
++ | sllg RA, RA, 3(r0)
++ |.endmacro
++ |
++ |.macro ins_arithdn, intins
++ | ins_arithpre
++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
++ ||switch (vk) {
++ ||case 0:
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, KBASE)
++ | checkint RB, ->vmeta_arith_vno
++ | checkint RC, ->vmeta_arith_vno
++ | intins RB, RC; jo ->vmeta_arith_vno
++ || break;
++ ||case 1:
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, KBASE)
++ | checkint RB, ->vmeta_arith_nvo
++ | checkint RC, ->vmeta_arith_nvo
++ | intins RC, RB; jo ->vmeta_arith_nvo
++ || break;
++ ||default:
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, BASE)
++ | checkint RB, ->vmeta_arith_vvo
++ | checkint RC, ->vmeta_arith_vvo
++ | intins RB, RC; jo ->vmeta_arith_vvo
++ || break;
++ ||}
++ ||if (vk == 1) {
++ | // setint RC
++ | stg RC, 0(RA, BASE)
++ ||} else {
++ | // setint RB
++ | stg RB, 0(RA, BASE)
++ ||}
++ | ins_next
++ |.endmacro
++
++ | // RA = dst, RB = src1 or num const, RC = src2 or num const
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_arithdn ar
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_arithdn sr
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | stg r0, 0(r0)
+
+From d256d996593cba84a772a3f91ee5554a7f8f31f8 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 12:09:32 -0500
+Subject: [PATCH 137/260] Add support for floating point add/sub.
+
+---
+ src/vm_s390x.dasc | 62 +++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 16 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index ea554c7f5..3750de0ff 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -631,8 +631,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ |->cont_nop: // BASE = base, (RC = result)
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_next
+ |
+ |->vmeta_tsetr:
+ | stg r0, 0(r0)
+@@ -663,34 +662,65 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vno:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RB, PC_RB
++ | llgc RC, PC_RC
+ |->vmeta_arith_vn:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | sllg RB, RB, 3(r0)
++ | sllg RC, RC, 3(r0)
++ | lay RB, 0(RB, BASE)
++ | lay RC, 0(RC, KBASE)
++ | j >1
+ |
+ |->vmeta_arith_nvo:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RC, PC_RC
++ | llgc RB, PC_RB
+ |->vmeta_arith_nv:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | sllg RC, RC, 3(r0)
++ | sllg RB, RB, 3(r0)
++ | lay TMPR1, 0(RC, KBASE)
++ | lay RC, 0(RB, BASE)
++ | lgr RB, TMPR1
++ | j >1
+ |
+ |->vmeta_unm:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ |
+ |->vmeta_arith_vvo:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RB, PC_RB
++ | llgc RC, PC_RC
+ |->vmeta_arith_vv:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | sllg RC, RC, 3(r0)
++ | sllg RB, RB, 3(r0)
++ | lay RB, 0(RB, BASE)
++ | lay RC, 0(RC, BASE)
++ |1:
++ | sllg RA, RA, 3(r0)
++ | lay RA, 0(RA, BASE)
++ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
++ | lgr CARG2, RA
++ | lgr CARG3, RB
++ | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out).
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base
++ | lgr L:RB, L:CARG1
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
++ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | cghi CRET1, 0
++ | lgr RC, CRET1
++ | je ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // BASE = base, RC = new base, stack = cont/func/o1/o2
++ | lgr RA, RC
++ | sgr RC, BASE
++ | stg PC, -24(RA) // [cont|PC]
++ | la PC, FRAME_CONT(RC)
++ | lghi NARGS:RD, 2+1 // 2 args for func(o1, o2).
++ | j ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | stg r0, 0(r0)
+
+From d94f4ac079ffc4a996862e7e0acffb7ed557696a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 13:14:53 -0500
+Subject: [PATCH 138/260] Add support for multiplication.
+
+Multiplication instructions don't set the overflow flag so we need
+to manually check, which is why this is more complicated than
+addition.
+---
+ src/vm_s390x.dasc | 41 +++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 39 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 3750de0ff..37aa29bf7 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1256,8 +1256,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_arithdn sr
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_arithpre
++ | // For multiplication we use msgfr and check if the result
++ | // fits in an int32_t.
++ switch(op) {
++ case BC_MULVN:
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, KBASE)
++ | checkint RB, ->vmeta_arith_vno
++ | checkint RC, ->vmeta_arith_vno
++ | lgfr RB, RB
++ | msgfr RB, RC
++ | lgfr RC, RB
++ | cgr RB, RC; jne ->vmeta_arith_vno
++ break;
++ case BC_MULNV:
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, KBASE)
++ | checkint RB, ->vmeta_arith_nvo
++ | checkint RC, ->vmeta_arith_nvo
++ | lgfr RB, RB
++ | msgfr RB, RC
++ | lgfr RC, RB
++ | cgr RB, RC; jne ->vmeta_arith_nvo
++ break;
++ default:
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, BASE)
++ | checkint RB, ->vmeta_arith_vvo
++ | checkint RC, ->vmeta_arith_vvo
++ | lgfr RB, RB
++ | msgfr RB, RC
++ | lgfr RC, RB
++ | cgr RB, RC; jne ->vmeta_arith_vvo
++ break;
++ }
++ | llgfr RB, RB
++ | setint RB
++ | stg RB, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | stg r0, 0(r0)
+
+From d006b07127467d3ecae570c1c357df37bb29e1f4 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 13:37:42 -0500
+Subject: [PATCH 139/260] Add support for division.
+
+---
+ src/vm_s390x.dasc | 39 +++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 37 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 37aa29bf7..955fe9f06 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1212,6 +1212,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | sllg RA, RA, 3(r0)
+ |.endmacro
+ |
++ |.macro ins_arithfp, ins
++ | ins_arithpre
++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
++ ||switch (vk) {
++ ||case 0:
++ | ld f0, 0(RB, BASE)
++ | ld f1, 0(RC, KBASE)
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, KBASE)
++ | checknumtp RB, ->vmeta_arith_vno
++ | checknumtp RC, ->vmeta_arith_vno
++ | ins f0, f1
++ || break;
++ ||case 1:
++ | ld f1, 0(RB, BASE)
++ | ld f0, 0(RC, KBASE)
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, KBASE)
++ | checknumtp RB, ->vmeta_arith_nvo
++ | checknumtp RC, ->vmeta_arith_nvo
++ | ins f0, f1
++ || break;
++ ||default:
++ | ld f0, 0(RB, BASE)
++ | ld f1, 0(RC, BASE)
++ | lg RB, 0(RB, BASE)
++ | lg RC, 0(RC, BASE)
++ | checknumtp RB, ->vmeta_arith_vvo
++ | checknumtp RC, ->vmeta_arith_vvo
++ | ins f0, f1
++ || break;
++ ||}
++ | std f0, 0(RA, BASE)
++ | ins_next
++ |.endmacro
++ |
+ |.macro ins_arithdn, intins
+ | ins_arithpre
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+@@ -1297,8 +1333,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_arithfp ddbr
+ break;
+ case BC_MODVN:
+ | stg r0, 0(r0)
+
+From 419869b20ee380219bafadbd3cbe2ea378c2a28d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 14:53:31 -0500
+Subject: [PATCH 140/260] Fixes for DynASM.
+
+ * s/hle/nle/
+ * Fix RRF-b encoding (didbr instruction)
+---
+ dynasm/dasm_s390x.lua | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 9012c3a65..f8c45fa89 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -239,7 +239,7 @@ function _M.revdef(s)
+ end
+
+ local map_cond = {
+- o = 1, h = 2, hle = 3, l = 4,
++ o = 1, h = 2, nle = 3, l = 4,
+ nhe = 5, lh = 6, ne = 7, e = 8,
+ nlh = 9, he = 10, nl = 11, le = 12,
+ nh = 13, no = 14, [""] = 15,
+@@ -1200,7 +1200,7 @@ map_op = {
+ sdb_2 = "ed000000001bRXE",
+ seb_2 = "ed000000000bRXE",
+ -- RRF-b instructions
+- didbr_4 = "0000b3580000RRF-b",
++ didbr_4 = "0000b35b0000RRF-b",
+ -- S mode instructions
+ stfl_1 = "0000b2b10000S",
+ -- I- mode instructions
+@@ -1355,7 +1355,7 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op2);
+ elseif p == "RRF-b" then
+ wputhw(op1);
+- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + parse_mask(params[4])
++ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + shl(parse_mask(params[4]),8)
+ wputhw(op2)
+ elseif p =="S" then
+ wputhw(op1);
+
+From e0e98f94d33a06b1a352068c318fe1d85412c40d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 14:54:26 -0500
+Subject: [PATCH 141/260] Add support for modulo (%) operations.
+
+Only the slow path for now.
+---
+ src/vm_s390x.dasc | 32 ++++++++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 10 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 955fe9f06..2e404aac1 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1081,15 +1081,24 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+- |.macro vm_round, name, mode, cond
++ |// FP value rounding. Called by math.floor/math.ceil fast functions.
++ |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
++ |.macro vm_round, name, mask
+ |->name:
+- | stg r0, 0(r0)
++ | // TODO: handle edge cases?
++ | lghi r0, 1
++ | cdfbr f1, r0
++ | didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
++ | jnle >1
++ | ldr f0, f2
++ | br r14
++ |1: // partial remainder (sanity check)
+ | stg r0, 0(r0)
+ |.endmacro
+ |
+- | vm_round vm_floor, 0, 1
+- | vm_round vm_ceil, 1, JIT
+- | vm_round vm_trunc, 2, JIT
++ | vm_round vm_floor, 7 // Round towards -inf.
++ | vm_round vm_ceil, 6 // Round towards +inf.
++ | vm_round vm_trunc, 5 // Round towards 0.
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |->vm_mod:
+@@ -1335,13 +1344,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithfp ddbr
+ break;
++ // TODO: implement fast mod operation.
++ // x86_64 does floating point mod, however it might be better to use integer mod.
+ case BC_MODVN:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | j ->vmeta_arith_vno
+ break;
+- case BC_MODNV: case BC_MODVV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ case BC_MODNV:
++ | j ->vmeta_arith_nvo
++ break;
++ case BC_MODVV:
++ | j ->vmeta_arith_vvo
+ break;
+ case BC_POW:
+ | stg r0, 0(r0)
+
+From 90334d3be90eed3e351e4ea5362a9ae5660337da Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 15:49:21 -0500
+Subject: [PATCH 142/260] Add support for function definitions.
+
+---
+ src/vm_s390x.dasc | 71 ++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 61 insertions(+), 10 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 2e404aac1..018ec2702 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1429,8 +1429,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_FNEW:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | lg CARG3, -16(BASE)
++ | cleartp CARG3
++ | sllg RD, RD, 3(r0)
++ | lg CARG2, 0(RD, KBASE) // Fetch GCproto *.
++ | lgr CARG1, L:RB
++ | stg PC, SAVE_PC
++ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
++ | brasl r14, extern lj_func_newL_gc
++ | // GCfuncL * returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | llgc RA, PC_RA
++ | sllg RA, RA, 3(r0)
++ | settp LFUNC:CRET1, LJ_TFUNC
++ | stg LFUNC:CRET1, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_TNEW:
+ | stg r0, 0(r0)
+@@ -1949,21 +1965,56 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
++ /* -- Function headers -------------------------------------------------- */
++
++ /*
++ ** Reminder: A function may be called with func/args above L->maxstack,
++ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
++ ** too. This means all FUNC* ops (including fast functions) must check
++ ** for stack overflow _before_ adding more slots!
++ */
++
+ case BC_FUNCF:
++ |.if JIT
+ | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- break;
++ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+ break;
++
+ case BC_JFUNCF:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++#if !LJ_HASJIT
+ break;
++#endif
+ case BC_IFUNCF:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
++ | lg KBASE, (PC2PROTO(k)-4)(PC)
++ | lg L:RB, SAVE_L
++ | sllg RA, RA, 3(r0)
++ | la RA, 0(RA, BASE) // Top of frame.
++ | clg RA, L:RB->maxstack
++ | jh ->vm_growstack_f
++ | llgc RA, (PC2PROTO(numparams)-4)(PC)
++ | clgr NARGS:RD, RA // Check for missing parameters.
++ | jle >3
++ |2:
++ if (op == BC_JFUNCF) {
++ | llgh RD, PC_RD
++ | j =>BC_JLOOP
++ } else {
++ | ins_next
++ }
++ |
++ |3: // Clear missing parameters.
++ | // TODO: optimize this. Some of this can be hoisted.
++ | sllg TMPR1, NARGS:RD, 3(r0)
++ | lghi TMPR2, LJ_TNIL
++ | stg TMPR2, -8(TMPR1, BASE)
++ | la RD, 1(RD)
++ | clgr RD, RA
++ | jle <3
++ | j <2
+ break;
+
+ case BC_JFUNCV:
+
+From eda56c9a44961dfdc8c15621773001f93dd3af71 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 17:10:38 -0500
+Subject: [PATCH 143/260] Add support for if statements.
+
+---
+ src/vm_s390x.dasc | 100 ++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 96 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 018ec2702..83c26dba6 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -639,6 +639,33 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
++ |->vmeta_comp:
++ | llgh RD, PC_RD
++ | sllg RD, RD, 3(r0)
++ | llgc RA, PC_RA
++ | sllg RA, RA, 3(r0)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | la CARG2, 0(RA, BASE)
++ | la CARG3, 0(RD, BASE)
++ | lgr CARG1, L:RB
++ | llgc CARG4, PC_OP
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
++ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
++ |3:
++ | lg BASE, L:RB->base
++ | clgfi CRET1, 1
++ | jh ->vmeta_binop
++ |4:
++ | la PC, 4(PC)
++ | jl >6
++ |5:
++ | llgh RD, PC_RD
++ | branchPC RD
++ |6:
++ | ins_next
++ |
+ |->cont_condt: // BASE = base, RC = result
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+@@ -1159,10 +1186,73 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |=>defop:
+
+ switch (op) {
++
++ /* -- Comparison ops ---------------------------------------------------- */
++
++ /* Remember: all ops branch for a true comparison, fall through otherwise. */
++
++ |.macro jmp_comp, lt, ge, le, gt, target
++ ||switch (op) {
++ ||case BC_ISLT:
++ | lt target
++ ||break;
++ ||case BC_ISGE:
++ | ge target
++ ||break;
++ ||case BC_ISLE:
++ | le target
++ ||break;
++ ||case BC_ISGT:
++ | gt target
++ ||break;
++ ||default: break; /* Shut up GCC. */
++ ||}
++ |.endmacro
++
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // RA = src1, RD = src2, JMP with RD = target
++ | ins_AD
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | ld f0, 0(RA, BASE)
++ | ld f1, 0(RD, BASE)
++ | lg RA, 0(RA, BASE)
++ | lg RD, 0(RD, BASE)
++ | srag ITYPE, RA, 47(r0)
++ | srag RB, RD, 47(r0)
++ |
++ | clfi ITYPE, LJ_TISNUM; jne >7
++ | clfi RB, LJ_TISNUM; jne >8
++ | // Both are integers.
++ | la PC, 4(PC)
++ | cr RA, RD
++ | jmp_comp jhe, jl, jh, jle, >9
++ |6:
++ | llgh RD, PC_RD
++ | branchPC RD
++ |9:
++ | ins_next
++ |
++ |7: // RA is not an integer.
++ | jh ->vmeta_comp
++ | // RA is a number.
++ | clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp
++ | // RA is a number, RD is an integer.
++ | cdfbr f1, RD
++ | j >1
++ |
++ |8: // RA is an integer, RD is not an integer.
++ | jh ->vmeta_comp
++ | // RA is an integer, RD is a number.
++ | cdfbr f0, RA
++ |1:
++ | la PC, 4(PC)
++ | cdbr f0, f1
++ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
++ | jmp_comp jnl, jl, jnle, jle, <9
++ | j <6
+ break;
++
+ case BC_ISEQV: case BC_ISNEV:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+@@ -1961,9 +2051,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
+ case BC_JMP:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AJ // RA = unused, RD = target
++ | branchPC RD
++ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+From b5aa0d077c62d9cf8b7db2c31aa8d9f116e87ab8 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 20 Dec 2016 17:26:33 -0500
+Subject: [PATCH 144/260] Fix bug in division.
+
+---
+ src/vm_s390x.dasc | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 83c26dba6..0f538d08c 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -722,6 +722,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lay RB, 0(RB, BASE)
+ | lay RC, 0(RC, BASE)
+ |1:
++ | llgc RA, PC_RA
+ | sllg RA, RA, 3(r0)
+ | lay RA, 0(RA, BASE)
+ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
+
+From 4853da820a1f484ab992267df5b079aeed7c0d60 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 21 Dec 2016 18:43:25 +0530
+Subject: [PATCH 145/260] Added test example for RXE mode
+
+---
+ dynasm/Examples/test_z_inst.c | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 2314606c2..20b20456f 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -366,6 +366,20 @@ static void rsb(dasm_State *state) {
+ | br r14
+ }
+
++static void sqrt_rxe(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ | lay sp , -8(sp)
++ | cefbr f0 , r2
++ | stdy f0 , 0(sp)
++ | sqeb f0 ,0(r4,sp)
++ | cfebr r2 ,0, f0
++ | la sp, 8(sp)
++ | br r14
++
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -398,7 +412,8 @@ test_table test[] = {
+ { 0, 0, 0, sil, 23, "sil"},
+ {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
+ { 0, 0, 0, rsb, 0, "rsb"},
+- {12,10, 0, rre, 10, "rre"}
++ {12,10, 0, rre, 10, "rre"},
++ {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From ae38a6913ea0d2e43bf04e94423d6f03ad995f8b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 21 Dec 2016 11:02:53 -0500
+Subject: [PATCH 146/260] Add support for numeric equality checks.
+
+---
+ src/vm_s390x.dasc | 114 +++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 112 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 0f538d08c..d1db7b97e 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1255,8 +1255,118 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ vk = op == BC_ISEQV;
++ | ins_AD // RA = src1, RD = src2, JMP with RD = target
++ | sllg RD, RD, 3(r0)
++ | ld f1, 0(RD, BASE)
++ | lg RD, 0(RD, BASE)
++ | sllg RA, RA, 3(r0)
++ | ld f0, 0(RA, BASE)
++ | lg RA, 0(RA, BASE)
++ | la PC, 4(PC)
++ | srag RB, RD, 47(r0)
++ | srag ITYPE, RA, 47(r0)
++ | clfi RB, LJ_TISNUM; jne >7
++ | clfi ITYPE, LJ_TISNUM; jne >8
++ | cr RD, RA
++ if (vk) {
++ | jne >9
++ } else {
++ | je >9
++ }
++ | llgh RD, PC_RD
++ | branchPC RD
++ |9:
++ | ins_next
++ |
++ |7: // RD is not an integer.
++ | jh >5
++ | // RD is a number.
++ | clfi ITYPE, LJ_TISNUM; jl >1; jne >5
++ | // RD is a number, RA is an integer.
++ | cdfbr f0, RA
++ | j >1
++ |
++ |8: // RD is an integer, RA is not an integer.
++ | jh >5
++ | // RD is an integer, RA is a number.
++ | cdfbr f1, RD
++ | j >1
++ |
++ |1:
++ | cdbr f0, f1
++ |4:
++ iseqne_fp:
++ if (vk) {
++ | jne >2 // Unordered means not equal.
++ } else {
++ | je >1 // Unordered means not equal.
++ }
++ iseqne_end:
++ if (vk) {
++ |1: // EQ: Branch to the target.
++ | llgh RD, PC_RD
++ | branchPC RD
++ |2: // NE: Fallthrough to next instruction.
++ |.if not FFI
++ |3:
++ |.endif
++ } else {
++ |.if not FFI
++ |3:
++ |.endif
++ |2: // NE: Branch to the target.
++ | llgh RD, PC_RD
++ | branchPC RD
++ |1: // EQ: Fallthrough to next instruction.
++ }
++ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
++ op == BC_ISEQN || op == BC_ISNEN)) {
++ | j <9
++ } else {
++ | ins_next
++ }
++ |
++ if (op == BC_ISEQV || op == BC_ISNEV) {
++ |5: // Either or both types are not numbers.
++ |.if FFI
++ | clfi RB, LJ_TCDATA; je ->vmeta_equal_cd
++ | clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd
++ |.endif
++ | cgr RA, RD
++ | je <1 // Same GCobjs or pvalues?
++ | cr RB, ITYPE
++ | jne <2 // Not the same type?
++ | clfi RB, LJ_TISTABUD
++ | jh <2 // Different objects and not table/ud?
++ |
++ | // Different tables or userdatas. Need to check __eq metamethod.
++ | // Field metatable must be at same offset for GCtab and GCudata!
++ | cleartp TAB:RA
++ | lg TAB:RB, TAB:RA->metatable
++ | cghi TAB:RB, 0
++ | je <2 // No metatable?
++ | llgc TMPR2, TAB:RB->nomm
++ | tmll TMPR2, 1<<MM_eq
++ | jne <2 // Or 'no __eq' flag set?
++ if (vk) {
++ | lghi RB, 0 // ne = 0 // TODO: should be 32-bit?
++ } else {
++ | lghi RB, 1 // ne = 1 // TODO: should be 32-bit?
++ }
++ | j ->vmeta_equal // Handle __eq metamethod.
++ } else {
++ |.if FFI
++ |3:
++ | clfi ITYPE, LJ_TCDATA
++ if (LJ_DUALNUM && vk) {
++ | jne <9
++ } else {
++ | jne <2
++ }
++ | j ->vmeta_equal_cd
++ |.endif
++ }
+ break;
+ case BC_ISEQS: case BC_ISNES:
+ | stg r0, 0(r0)
+
+From 8518df8e5696292519f2686d7d4a38c71f66f5f5 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 21 Dec 2016 12:49:53 -0500
+Subject: [PATCH 147/260] Implement some boolean operations.
+
+---
+ src/vm_s390x.dasc | 47 +++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 41 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index d1db7b97e..e24307422 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1381,8 +1381,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
++ | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3(r0)
++ | lg ITYPE, 0(RD, BASE)
++ | la PC, 4(PC)
++ if (op == BC_ISTC || op == BC_ISFC) {
++ | lgr RB, ITYPE
++ }
++ | srag ITYPE, ITYPE, 47(r0)
++ | clfi ITYPE, LJ_TISTRUECOND
++ if (op == BC_IST || op == BC_ISTC) {
++ | jhe >1
++ } else {
++ | jl >1
++ }
++ if (op == BC_ISTC || op == BC_ISFC) {
++ | stg RB, 0(RA, BASE)
++ }
++ | llgh RD, PC_RD
++ | branchPC RD
++ |1: // Fallthrough to the next instruction.
++ | ins_next
+ break;
+ case BC_ISTYPE:
+ | stg r0, 0(r0)
+@@ -1401,8 +1421,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next_
+ break;
+ case BC_NOT:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = src
++ | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3(r0)
++ | lg RB, 0(RD, BASE)
++ | srag RB, RB, 47(r0)
++ | load_false RC
++ | cghi RB, LJ_TTRUE
++ | je >1 // TODO: Maybe do something fancy to avoid the jump?
++ | load_true RC
++ |1:
++ | stg RC, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_UNM:
+ | stg r0, 0(r0)
+@@ -1598,8 +1628,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_KPRI:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = primitive type (~)
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 47(r0)
++ | lghi TMPR2, -1
++ | xgr RD, TMPR2 // not
++ | stg RD, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_KNIL:
+ | stg r0, 0(r0)
+
+From 9da6ff7ea3f83b65ad17adcb0abe05cc03eac46d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 21 Dec 2016 13:21:12 -0500
+Subject: [PATCH 148/260] Implement more equality checks.
+
+---
+ src/vm_s390x.dasc | 89 ++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 81 insertions(+), 8 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index e24307422..59ce1843a 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1369,17 +1369,90 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ }
+ break;
+ case BC_ISEQS: case BC_ISNES:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- break;
++ vk = op == BC_ISEQS;
++ | ins_AND // RA = src, RD = str const, JMP with RD = target
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | lg RB, 0(RA, BASE)
++ | la PC, 4(PC)
++ | checkstr RB, >3
++ | cg RB, 0(RD, KBASE)
++ iseqne_test:
++ if (vk) {
++ | jne >2
++ } else {
++ | je >1
++ }
++ goto iseqne_end;
+ case BC_ISEQN: case BC_ISNEN:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- break;
++ vk = op == BC_ISEQN;
++ | ins_AD // RA = src, RD = num const, JMP with RD = target
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | ld f0, 0(RA, BASE)
++ | lg RB, 0(RA, BASE)
++ | ld f1, 0(RD, KBASE)
++ | lg RD, 0(RD, KBASE)
++ | la PC, 4(PC)
++ | checkint RB, >7
++ | checkint RD, >8
++ | cr RB, RD
++ if (vk) {
++ | jne >9
++ } else {
++ | je >9
++ }
++ | llgh RD, PC_RD
++ | branchPC RD
++ |9:
++ | ins_next
++ |
++ |7: // RA is not an integer.
++ | jh >3
++ | // RA is a number.
++ | checkint RD, >1
++ | // RA is a number, RD is an integer.
++ | cdfbr f1, RD
++ | j >1
++ |
++ |8: // RA is an integer, RD is a number.
++ | cdfbr f0, RB
++ | cdbr f0, f1
++ | j >4
++ |1:
++ | cdbr f0, f1
++ |4:
++ goto iseqne_fp;
+ case BC_ISEQP: case BC_ISNEP:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ vk = op == BC_ISEQP;
++ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
++ | sllg RA, RA, 3(r0)
++ | lg RB, 0(RA, BASE)
++ | srag RB, RB, 47(r0)
++ | la PC, 4(PC)
++ | cr RB, RD
++ if (!LJ_HASFFI) goto iseqne_test;
++ if (vk) {
++ | jne >3
++ | llgh RD, PC_RD
++ | branchPC RD
++ |2:
++ | ins_next
++ |3:
++ | cghi RB, LJ_TCDATA; jne <2
++ | j ->vmeta_equal_cd
++ } else {
++ | je >2
++ | cghi RB, LJ_TCDATA; je ->vmeta_equal_cd
++ | llgh RD, PC_RD
++ | branchPC RD
++ |2:
++ | ins_next
++ }
+ break;
++
++ /* -- Unary test and copy ops ------------------------------------------- */
++
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
+ | sllg RD, RD, 3(r0)
+
+From e19544ae9a4cd24abbf40e8209bd5c937141db55 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 21 Dec 2016 14:03:58 -0500
+Subject: [PATCH 149/260] Implement unary minus.
+
+---
+ src/vm_s390x.dasc | 27 +++++++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 59ce1843a..9a45446ce 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -710,8 +710,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | j >1
+ |
+ |->vmeta_unm:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgh RD, PC_RD
++ | sllg RD, RD, 3(r0)
++ | la RC, 0(RD, BASE)
++ | lgr RB, RC
++ | j >1
+ |
+ |->vmeta_arith_vvo:
+ | llgc RB, PC_RB
+@@ -1508,8 +1511,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_UNM:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = src
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | lg RB, 0(RD, BASE)
++ | checkint RB, >3
++ | lcr RB, RB; jo >2
++ |1:
++ | stg RB, 0(RA, BASE)
++ | ins_next
++ |2:
++ | llihh RB, 0x41e0 // (double)2^31
++ | j <1
++ |3:
++ | jh ->vmeta_unm
++ | // Toggle sign bit.
++ | llihh TMPR2, 0x8000
++ | xgr RB, TMPR2
++ | j <1
+ break;
+ case BC_LEN:
+ | stg r0, 0(r0)
+
+From 18250375387077f865b9bb135afb874626853a06 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 21 Dec 2016 18:02:43 -0500
+Subject: [PATCH 150/260] Implement table creation and printing.
+
+---
+ src/vm_s390x.dasc | 140 ++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 123 insertions(+), 17 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 9a45446ce..8cf4b3f70 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -808,20 +808,16 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | clfi NARGS:RD, 1+1; jl ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | clfi NARGS:RD, 2+1; jl ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name, op
+@@ -867,12 +863,40 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // Only handles the number case inline (without a base argument).
++ | clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
++ | lg RB, 0(BASE)
++ | checknumber RB, ->fff_fallback
++ | lg PC, -8(BASE)
++ | stg RB, -16(BASE)
++ | j ->fff_res1
+ |
+ |.ffunc_1 tostring
++ | // Only handles the string or number case inline.
++ | lg PC, -8(BASE)
++ | lg STR:RB, 0(BASE)
++ | checktp_nc STR:RB, LJ_TSTR, >3
++ | // A __tostring method in the string base metatable is ignored.
++ |2:
++ | stg STR:RB, -16(BASE)
++ | j ->fff_res1
++ |3: // Handle numbers inline, unless a number base metatable is present.
++ | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
++ | lghi TMPR2, 0
++ | cg TMPR2, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
++ | jne ->fff_fallback
++ | ffgccheck // Caveat: uses label 1.
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base // Add frame since C call can throw.
++ | stg PC, SAVE_PC // Redundant (but a defined value).
++ | lgr CARG2, BASE // Otherwise: CARG2 == BASE
++ | lgr L:CARG1, L:RB
++ | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o)
++ | // GCstr returned in r2 (CRET1).
+ | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg BASE, L:RB->base
++ | settp STR:RB, CRET1, LJ_TSTR
++ | j <2
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+@@ -921,8 +945,33 @@ static void build_subroutines(BuildCtx *ctx)
+ |->fff_resxmm0:
+ |
+ |->fff_res1:
++ | lghi RD, 1+1
+ |->fff_res:
++ | stg RD, SAVE_MULTRES
+ |->fff_res_:
++ | tmll PC, FRAME_TYPE
++ | jne >7
++ |5:
++ | llgc TMPR1, PC_RB
++ | clgr TMPR1, RD // More results expected?
++ | jh >6
++ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
++ | llgc RA, PC_RA
++ | lcgr RA, RA
++ | sllg RA, RA, 3(r0)
++ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
++ | ins_next
++ |
++ |6: // Fill up results with nil.
++ | sllg TMPR1, RD, 3(r0)
++ | lghi TMPR2, LJ_TNIL
++ | stg TMPR2, -24(TMPR1, BASE)
++ | la RD, 1(RD)
++ | j <5
++ |
++ |7: // Non-standard return case.
++ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
++ | j ->vm_return
+ |
+ |.macro math_round, func
+ | .ffunc math_ .. func
+@@ -1036,14 +1085,41 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback_2:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lghi NARGS:RD, 1+2 // Other args are ignored, anyway.
++ | j ->fff_fallback
+ |->fff_fallback_1:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lghi NARGS:RD, 1+1 // Other args are ignored, anyway.
+ |->fff_fallback: // Call fast function fallback handler.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // BASE = new base, RD = nargs+1
++ | lg L:RB, SAVE_L
++ | lg PC, -8(BASE) // Fallback may overwrite PC.
++ | stg PC, SAVE_PC // Redundant (but a defined value).
++ | stg BASE, L:RB->base
++ | sllg RD, NARGS:RD, 3(r0)
++ | lay RD, -8(RD, BASE)
++ | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler.
++ | stg RD, L:RB->top
++ | lg CFUNC:RD, -16(BASE)
++ | cleartp CFUNC:RD
++ | clg RA, L:RB->maxstack
++ | jh >5 // Need to grow stack.
++ | lgr CARG1, L:RB
++ | lg TMPR1, CFUNC:RD->f
++ | basr r14, TMPR1 // (lua_State *L)
++ | lg BASE, L:RB->base
++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
++ | lgr RD, CRET1
++ | cghi RD, 0; jh ->fff_res // Returned nresults+1?
++ |1:
++ | lg RA, L:RB->top
++ | sgr RA, BASE
++ | srlg RA, RA, 3(r0)
++ | cghi RD, 0
++ | la NARGS:RD, 1(RA)
++ | lg LFUNC:RB, -16(BASE)
++ | jne ->vm_call_tail // Returned -1?
++ | cleartp LFUNC:RB
++ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+@@ -1777,8 +1853,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_TNEW:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = hbits|asize
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
++ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
++ | stg PC, SAVE_PC
++ | jhe >5
++ |1:
++ | srlg CARG3, RD, 11(r0)
++ | llill TMPR2, 0x7ff
++ | nr RD, TMPR2
++ | cr RD, TMPR2
++ | je >3
++ |2:
++ | lgr L:CARG1, L:RB
++ | llgfr CARG2, RD
++ | brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits)
++ | // Table * returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | llgc RA, PC_RA
++ | sllg RA, RA, 3(r0)
++ | settp TAB:CRET1, LJ_TTAB
++ | stg TAB:CRET1, 0(RA, BASE)
++ | ins_next
++ |3: // Turn 0x7ff into 0x801.
++ | llill RD, 0x801
++ | j <2
++ |5:
++ | lgr L:CARG1, L:RB
++ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
++ | llgh RD, PC_RD
++ | j <1
+ break;
+ case BC_TDUP:
+ | stg r0, 0(r0)
+
+From 6fc4c0c1a8d2687a1cfc2316c9117c4eb8c6aa8e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 22 Dec 2016 12:41:00 -0500
+Subject: [PATCH 151/260] Fix BC_MCALL
+
+RC and RD are the same register on x64, so sometimes it uses them
+interchangeably. Probably we should make them the same register
+on s390x, but that would involve changing the instruction decode
+code which I would rather leave until we have a test suite passing.
+---
+ src/vm_s390x.dasc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 8cf4b3f70..c7145681c 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2057,6 +2057,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_CALL: case BC_CALLM:
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
++ | lgr RD, RC
+ if (op == BC_CALLM) {
+ | ag NARGS:RD, SAVE_MULTRES // TODO: MULTRES is 32-bit on x64
+ }
+@@ -2064,7 +2065,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg LFUNC:RB, 0(BASE, RA)
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
+ | la BASE, 16(RA, BASE)
+- | lgr RD, RC
+ | ins_call
+ break;
+
+
+From 01dbd6dfa29218602dec6c2aff7c5221ebcb8e89 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 22 Dec 2016 13:16:02 -0500
+Subject: [PATCH 152/260] Implement TDUP, TGETB and TSETB.
+
+Allows some simple table operations, for example:
+
+t = {1, 2}
+print(t[1]) -- prints 1
+t[1] = 3
+print(t[1]) -- prints 3
+---
+ src/vm_s390x.dasc | 90 +++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 84 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index c7145681c..260c576cf 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1887,8 +1887,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <1
+ break;
+ case BC_TDUP:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AND // RA = dst, RD = table const (~) (holding template table)
++ | lg L:RB, SAVE_L
++ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
++ | stg PC, SAVE_PC
++ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
++ | stg BASE, L:RB->base
++ | jhe >3
++ |2:
++ | sllg RD, RD, 3(r0)
++ | lg TAB:CARG2, 0(RD, KBASE)
++ | lgr L:CARG1, L:RB
++ | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt)
++ | // Table * returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | llgc RA, PC_RA
++ | settp TAB:CRET1, LJ_TTAB
++ | sllg RA, RA, 3(r0)
++ | stg TAB:CRET1, 0(RA, BASE)
++ | ins_next
++ |3:
++ | lgr L:CARG1, L:RB
++ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
++ | llgh RD, PC_RD // Need to reload RD.
++ | lghi TMPR2, -1
++ | xgr RD, TMPR2 // not RD
++ | j <2
+ break;
+
+ case BC_GGET:
+@@ -1959,8 +1983,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j ->vmeta_tgets // Caveat: preserve STR:RC.
+ break;
+ case BC_TGETB:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC // RA = dst, RB = table, RC = byte literal
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | checktab TAB:RB, ->vmeta_tgetb
++ | cl RC, TAB:RB->asize
++ | jhe ->vmeta_tgetb
++ | sllg RC, RC, 3(r0)
++ | ag RC, TAB:RB->array
++ | // Get array slot.
++ | lg ITYPE, 0(RC)
++ | cghi ITYPE, LJ_TNIL
++ | je >2
++ |1:
++ | sllg RA, RA, 3(r0)
++ | stg ITYPE, 0(RA, BASE)
++ | ins_next
++ |
++ |2: // Check for __index if table value is nil.
++ | lg TAB:TMPR1, TAB:RB->metatable
++ | cghi TAB:TMPR1, 0
++ | je <1
++ | llgc TMPR2, TAB:TMPR1->nomm
++ | tmll TMPR2, 1<<MM_index
++ | je ->vmeta_tgetb // 'no __index' flag NOT set: check.
++ | j <1
+ break;
+ case BC_TGETR:
+ | stg r0, 0(r0)
+@@ -2041,8 +2088,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <3
+ break;
+ case BC_TSETB:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC // RA = src, RB = table, RC = byte literal
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | checktab TAB:RB, ->vmeta_tsetb
++ | cl RC, TAB:RB->asize
++ | jhe ->vmeta_tsetb
++ | sllg RC, RC, 3(r0)
++ | ag RC, TAB:RB->array
++ | lghi TMPR2, LJ_TNIL
++ | cg TMPR2, 0(RC)
++ | je >3 // Previous value is nil?
++ |1:
++ | llgc TMPR1, TAB:RB->marked
++ | tmll TMPR1, LJ_GC_BLACK // isblack(table)
++ | jne >7
++ |2: // Set array slot.
++ | sllg RA, RA, 3(r0)
++ | lg ITYPE, 0(RA, BASE)
++ | stg ITYPE, 0(RC)
++ | ins_next
++ |
++ |3: // Check for __newindex if previous value is nil.
++ | lg TAB:TMPR1, TAB:RB->metatable
++ | cghi TAB:TMPR1, 0
++ | je <1
++ | llgc TMPR2, TAB:TMPR1->nomm
++ | tmll TMPR2, 1<<MM_newindex
++ | je ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
++ | j <1
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMPR1
++ | j <2
+ break;
+ case BC_TSETR:
+ | stg r0, 0(r0)
+
+From 20f05a4e20eabdc858a83c7582869ac256ee127a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 22 Dec 2016 13:50:59 -0500
+Subject: [PATCH 153/260] Implement more tset and tget metamethods.
+
+This allows table entries to be get and set even if they don't
+already exist, for example:
+
+t = {}
+print(t[1]) -- prints nil
+t[1] = 3
+print(t[1]) -- prints 3
+---
+ src/vm_s390x.dasc | 119 ++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 105 insertions(+), 14 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 260c576cf..7051370ed 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -599,19 +599,61 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
++ | stg STR:RC, TMP_STACK
++ | la RC, TMP_STACK
++ | llgc TMPR1, PC_OP
++ | cghi TMPR1, BC_GGET
++ | jne >1
++ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
++ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
++ | stg TAB:RA, 0(RB)
++ | j >2
+ |
+ |->vmeta_tgetb:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RC, PC_RC
++ | setint RC
++ | stg RC, TMP_STACK
++ | la RC, TMP_STACK
++ | j >1
+ |
+ |->vmeta_tgetv:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RC, PC_RC // Reload TValue *k from RC.
++ | sllg RC, RC, 3(r0)
++ | la RC, 0(RC, BASE)
++ |1:
++ | llgc RB, PC_RB // Reload TValue *t from RB.
++ | sllg RB, RB, 3(r0)
++ | la RB, 0(RB, BASE)
++ |2:
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base
++ | lgr CARG2, RB
++ | lgr CARG3, RC
++ | lgr L:RB, L:CARG1
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
++ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | ltgr RC, CRET1
++ | je >3
+ |->cont_ra: // BASE = base, RC = result
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RA, PC_RA
++ | sllg RA, RA, 3(r0)
++ | lg RB, 0(RC)
++ | stg RB, 0(RA, BASE)
++ | ins_next
++ |
++ |3: // Call __index metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k
++ | lg RA, L:RB->top
++ | stg PC, -24(PC) // [cont|PC]
++ | lay PC, FRAME_CONT(RA)
++ | sgr PC, BASE
++ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
++ | lghi NARGS:RD, 2+1 // 2 args for func(t, k).
++ | cleartp LFUNC:RB
++ | j ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | stg r0, 0(r0)
+@@ -620,19 +662,68 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
++ | stg STR:RC, TMP_STACK
++ | la RC, TMP_STACK
++ | llgc TMPR2, PC_OP
++ | cghi TMPR2, BC_GSET
++ | jne >1
++ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
++ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
++ | stg TAB:RA, 0(RB)
++ | j >2
+ |
+ |->vmeta_tsetb:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RC, PC_RC
++ | setint RC
++ | stg RC, TMP_STACK
++ | la RC, TMP_STACK
++ | j >1
+ |
+ |->vmeta_tsetv:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RC, PC_RC // Reload TValue *k from RC.
++ | sllg RC, RC, 3(r0)
++ | la RC, 0(RC, BASE)
++ |1:
++ | llgc RB, PC_RB // Reload TValue *t from RB.
++ | sllg RB, RB, 3(r0)
++ | la RB, 0(RB, BASE)
++ |2:
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base // Caveat: CARG2/CARG3 may be BASE.
++ | lgr CARG2, RB
++ | lgr CARG3, RC
++ | lgr L:RB, L:CARG1
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
++ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | ltgr RC, CRET1
++ | je >3
++ | // NOBARRIER: lj_meta_tset ensures the table is not black.
++ | llgc RA, PC_RA
++ | sllg RA, RA, 3(r0)
++ | lg RB, 0(RA, BASE)
++ | stg RB, 0(RC)
+ |->cont_nop: // BASE = base, (RC = result)
+ | ins_next
+ |
++ |3: // Call __newindex metamethod.
++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
++ | lg RA, L:RB->top
++ | stg PC, -24(PC) // [cont|PC]
++ | llgc RC, PC_RA
++ | // Copy value to third argument.
++ | sllg RB, RC, 3(r0)
++ | lg RB, 0(RB, BASE)
++ | stg RB, 16(RA)
++ | la PC, FRAME_CONT(RA)
++ | sgr PC, BASE
++ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
++ | lghi NARGS:RD, 3+1 // 3 args for func(t, k, v).
++ | cleartp LFUNC:RB
++ | j ->vm_call_dispatch_f
++ |
+ |->vmeta_tsetr:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+
+From cab03375f1f6def39658304aca09d773275bc4eb Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 22 Dec 2016 14:20:47 -0500
+Subject: [PATCH 154/260] Implement TGETV and TSETV.
+
+Allows table entries to be get and set using variables, for example:
+
+t = {4,5}
+i = 1
+print(t[i]) -- prints 4
+t[i] = 3
+print(t[i]) -- prints 3
+---
+ src/vm_s390x.dasc | 85 ++++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 81 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 7051370ed..3b0b3ac1f 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2026,8 +2026,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_TGETV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC // RA = dst, RB = table, RC = key
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | sllg RC, RC, 3(r0)
++ | lg RC, 0(RC, BASE)
++ | checktab TAB:RB, ->vmeta_tgetv
++ |
++ | // Integer key?
++ | checkint RC, >5
++ | cl RC, TAB:RB->asize // Takes care of unordered, too.
++ | jhe ->vmeta_tgetv // Not in array part? Use fallback.
++ | llgfr RC, RC
++ | sllg RC, RC, 3(r0)
++ | ag RC, TAB:RB->array
++ | // Get array slot.
++ | lg ITYPE, 0(RC)
++ | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
++ | je >2
++ |1:
++ | sllg RA, RA, 3(r0)
++ | stg ITYPE, 0(RA, BASE)
++ | ins_next
++ |
++ |2: // Check for __index if table value is nil.
++ | lg TAB:TMPR1, TAB:RB->metatable
++ | cghi TAB:TMPR1, 0
++ | je <1
++ | llgc TMPR2, TAB:TMPR1->nomm
++ | tmll TMPR2, 1<<MM_index
++ | je ->vmeta_tgetv // 'no __index' flag NOT set: check.
++ | j <1
++ |
++ |5: // String key?
++ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv
++ | cleartp STR:RC
++ | j ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | ins_ABC
+@@ -2104,9 +2138,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
+ case BC_TSETV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC // RA = src, RB = table, RC = key
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | sllg RC, RC, 3(r0)
++ | lg RC, 0(RC, BASE)
++ | checktab TAB:RB, ->vmeta_tsetv
++ |
++ | // Integer key?
++ | checkint RC, >5
++ | cl RC, TAB:RB->asize // Takes care of unordered, too.
++ | jhe ->vmeta_tsetv
++ | llgfr RC, RC
++ | sllg RC, RC, 3(r0)
++ | ag RC, TAB:RB->array
++ | lghi TMPR2, LJ_TNIL
++ | cg TMPR2, 0(RC)
++ | je >3 // Previous value is nil?
++ |1:
++ | llgc TMPR1, TAB:RB->marked
++ | tmll TMPR1, LJ_GC_BLACK // isblack(table)
++ | jne >7
++ |2: // Set array slot.
++ | sllg RA, RA, 3(r0)
++ | lg RB, 0(RA, BASE)
++ | stg RB, 0(RC)
++ | ins_next
++ |
++ |3: // Check for __newindex if previous value is nil.
++ | lg TAB:TMPR1, TAB:RB->metatable
++ | cghi TAB:TMPR1, 0
++ | je <1
++ | llgc TMPR2, TAB:TMPR1->nomm
++ | tmll TMPR2, 1<<MM_newindex
++ | je ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
++ | j <1
++ |
++ |5: // String key?
++ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv
++ | cleartp STR:RC
++ | j ->BC_TSETS_Z
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMPR1
++ | j <2
+ break;
+ case BC_TSETS:
+ | stg r0, 0(r0)
+
+From c0c155e45e6606f7b8d3ec96601000f34e16c335 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 22 Dec 2016 14:40:31 -0500
+Subject: [PATCH 155/260] Implement/fix TGETS and TSETS.
+
+Allows string keys in tables, for example:
+
+t = {}
+t["hello"] = 1
+print(t["hello"]) -- prints 1
+---
+ src/vm_s390x.dasc | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 3b0b3ac1f..2bba69f37 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2070,7 +2070,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lghi TMPR1, -1
+ | xgr RC, TMPR1
+ | sllg RC, RC, 3(r0)
+- | lg STR:RC, 0(RC, BASE)
++ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tgets
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
+ | l TMPR1, TAB:RB->hmask
+@@ -2186,8 +2186,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <2
+ break;
+ case BC_TSETS:
+- | stg r0, 0(r0)
+- |
++ | ins_ABC // RA = src, RB = table, RC = str const (~)
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | lghi TMPR2, -1
++ | xgr RC, TMPR2 // ~RC
++ | sllg RC, RC, 3(r0)
++ | lg STR:RC, 0(RC, KBASE)
++ | checktab TAB:RB, ->vmeta_tsets
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
+ | l TMPR1, TAB:RB->hmask
+ | n TMPR1, STR:RC->hash
+
+From 077ccc86580b5d075297f513ea44c130bfdaa31f Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 22 Dec 2016 14:59:37 -0500
+Subject: [PATCH 156/260] Implement LEN.
+
+Enables length of tables and strings to be taken, for example:
+
+t = "hello"
+print(#t) -- prints 5
+t = {1,2}
+print(#t) -- prints 2
+---
+ src/vm_s390x.dasc | 60 +++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 56 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 2bba69f37..dedccf20d 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -845,8 +845,28 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->vm_call_dispatch
+ |
+ |->vmeta_len:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgh RD, PC_RD
++ | sllg RD, RD, 3(r0)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | la CARG2, 0(RD, BASE)
++ | lgr L:CARG1, L:RB
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_len // (lua_State *L, TValue *o)
++ | // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1).
++ | lgr RC, CRET1
++ | lg BASE, L:RB->base
++#if LJ_52
++ | cghi RC, 0
++ | jne ->vmeta_binop // Binop call for compatibility.
++ | llgh RD, PC_RD
++ | sllg RD, RD, 3(r0)
++ | lg TAB:CARG1, 0(RD, BASE)
++ | cleartp TAB:CARG1
++ | j ->BC_LEN_Z
++#else
++ | j ->vmeta_binop // Binop call for compatibility.
++#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+@@ -1698,8 +1718,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <1
+ break;
+ case BC_LEN:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = src
++ | sllg RD, RD, 3(r0)
++ | lg RD, 0(RD, BASE)
++ | checkstr RD, >2
++ | llgf RD, STR:RD->len
++ |1:
++ | sllg RA, RA, 3(r0)
++ | setint RD
++ | stg RD, 0(RA, BASE)
++ | ins_next
++ |2:
++ | cghi ITYPE, LJ_TTAB; jne ->vmeta_len
++ | lgr TAB:CARG1, TAB:RD
++#if LJ_52
++ | lg TAB:RB, TAB:RD->metatable
++ | cghi TAB:RB, 0
++ | jne >9
++ |3:
++#endif
++ |->BC_LEN_Z:
++ | lgr RB, BASE // Save BASE.
++ | brasl r14, extern lj_tab_len // (GCtab *t)
++ | // Length of table returned in r2 (CRET1).
++ | lgr RD, CRET1
++ | lgr BASE, RB // Restore BASE.
++ | llgc RA, PC_RA
++ | j <1
++#if LJ_52
++ |9: // Check for __len.
++ | llgc TMPR2, TAB:RB->nomm
++ | tmll TMPR2, 1<<MM_len
++ | jne <3
++ | j ->vmeta_len // 'no __len' flag NOT set: check.
++#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+From 354b5c748bec963150bea4ee878d05b43ac15238 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 22 Dec 2016 15:40:25 -0500
+Subject: [PATCH 157/260] Implement a UCLO, ff_assert and a couple of other
+ bits.
+
+Needed to get -bl working, still more to do though.
+---
+ src/vm_s390x.dasc | 46 ++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 40 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index dedccf20d..c862e4275 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -950,8 +950,26 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg RB, 0(BASE)
++ | srag ITYPE, RB, 47(r0)
++ | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
++ | lg PC, -8(BASE)
++ | stg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
++ | lg RB, 0(BASE)
++ | stg RB, -16(BASE)
++ | ahi RD, -2
++ | je >2
++ | lgr RA, BASE
++ |1:
++ | la RA, 8(RA)
++ | lg RB, 0(RA)
++ | stg RB, -16(RA)
++ | ahi RD, -1
++ | jne <1
++ | // TODO: replace with branch on count (brctg).
++ |2:
++ | lg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
++ | j ->fff_res_
+ |
+ |.ffunc_1 type
+ | stg r0, 0(r0)
+@@ -1972,9 +1990,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_UCLO:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = level, RD = target
++ | branchPC RD // Do this first to free RD.
++ | lg L:RB, SAVE_L
++ | ltg TMPR2, L:RB->openupval
++ | je >1
++ | stg BASE, L:RB->base
++ | sllg RA, RA, 3(r0)
++ | la CARG2, 0(RA, BASE)
++ | lgr L:CARG1, L:RB
++ | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level)
++ | lg BASE, L:RB->base
++ |1:
++ | ins_next
+ break;
++
+ case BC_FNEW:
+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
+ | lg L:RB, SAVE_L
+@@ -2453,9 +2483,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
++ /* -- Returns ----------------------------------------------------------- */
++
+ case BC_RETM:
+- | stg r0, 0(r0) // not implemented
+- | stg r0, 0(r0)
++ | ins_AD // RA = results, RD = extra_nresults
++ | ag RD, SAVE_MULTRES // MULTRES >=1, so RD >=1. // TODO: needs to be 32-bit.
++ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+ break;
+
+ case BC_RET: case BC_RET0: case BC_RET1:
+
+From aba9cfb2a85dd42f22f6784a418f9b7c82b82d69 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 28 Dec 2016 10:56:09 -0500
+Subject: [PATCH 158/260] Implement UGET.
+
+Allows simple closures, for example:
+
+function f(x)
+ return function() return x end
+end
+
+y = f(1)
+print(y()) -- prints 1
+---
+ src/vm_s390x.dasc | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index c862e4275..7754c45fd 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1970,8 +1970,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_UGET:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst, RD = upvalue #
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | lg LFUNC:RB, -16(BASE)
++ | cleartp LFUNC:RB
++ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
++ | lg RB, UPVAL:RB->v
++ | lg RD, 0(RB)
++ | stg RD, 0(RA, BASE)
++ | ins_next
+ break;
+ case BC_USETV:
+ | stg r0, 0(r0)
+
+From 6673652fd959a6e8df667cecaef78f3fb53d678a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 28 Dec 2016 13:21:06 -0500
+Subject: [PATCH 159/260] Implement TSETM and VARG.
+
+Allows varargs to be used, for example:
+
+function sel(n, ...)
+ local arg = {...}
+ return arg[n]
+end
+
+print(sel(2, 3, 4, 5)) -- prints 4
+---
+ src/vm_s390x.dasc | 149 ++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 132 insertions(+), 17 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 7754c45fd..450fce81a 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -91,6 +91,7 @@
+ |.define SAVE_PC, 168(sp)
+ |.define SAVE_MULTRES, 160(sp)
+ |.define TMP_STACK, 160(sp) // Overlaps SAVE_MULTRES
++|.define TMP_STACK_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
+ |
+ |// Callee save area (allocated by interpreter).
+ |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
+@@ -323,7 +324,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vm_returnc:
+ | aghi RD, 1 // RD = nresults+1
+ | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
+- | stg RD, SAVE_MULTRES
++ | st RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z // Handle regular return to Lua.
+ |
+@@ -352,7 +353,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg L:RB, SAVE_L
+ | stg PC, L:RB->base
+ |3:
+- | lg RD, SAVE_MULTRES
++ | llgf RD, SAVE_MULTRES
+ | lgf RA, SAVE_NRES // RA = wanted nresults+1
+ |4:
+ | cgr RA, RD
+@@ -395,7 +396,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | stg BASE, L:RB->top // Save current top held in BASE (yes).
+- | stg RD, SAVE_MULTRES // Need to fill only remainder with nil.
++ | st RD, SAVE_MULTRES // Need to fill only remainder with nil.
+ | lgr CARG2, RA
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+@@ -500,7 +501,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | aghi RD, 1 // RD = nresults+1
+ | sgr RA, BASE // RA = resultofs
+ | lg PC, -8(BASE)
+- | stg RD, SAVE_MULTRES
++ | st RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z
+ | j ->vm_return
+@@ -954,7 +955,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | srag ITYPE, RB, 47(r0)
+ | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
+ | lg PC, -8(BASE)
+- | stg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
++ | st RD, SAVE_MULTRES
+ | lg RB, 0(BASE)
+ | stg RB, -16(BASE)
+ | ahi RD, -2
+@@ -968,7 +969,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | jne <1
+ | // TODO: replace with branch on count (brctg).
+ |2:
+- | lg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
++ | llgf RD, SAVE_MULTRES
+ | j ->fff_res_
+ |
+ |.ffunc_1 type
+@@ -1076,7 +1077,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |->fff_res1:
+ | lghi RD, 1+1
+ |->fff_res:
+- | stg RD, SAVE_MULTRES
++ | st RD, SAVE_MULTRES
+ |->fff_res_:
+ | tmll PC, FRAME_TYPE
+ | jne >7
+@@ -2391,8 +2392,54 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_TSETM:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = base (table at base-1), RD = num const (start index)
++ |1:
++ | sllg RA, RA, 3(r0)
++ | sllg TMPR1, RD, 3(r0)
++ | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word.
++ | la RA, 0(RA, BASE)
++ | lg TAB:RB, -8(RA) // Guaranteed to be a table.
++ | cleartp TAB:RB
++ | llgc TMPR2, TAB:RB->marked
++ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | jne >7
++ |2:
++ | llgf RD, SAVE_MULTRES
++ | aghi RD, -1
++ | je >4 // Nothing to copy?
++ | agr RD, TMPR1 // Compute needed size.
++ | clgf RD, TAB:RB->asize
++ | jh >5 // Doesn't fit into array part?
++ | sgr RD, TMPR1
++ | sllg TMPR1, TMPR1, 3(r0)
++ | ag TMPR1, TAB:RB->array
++ |3: // Copy result slots to table.
++ | lg RB, 0(RA)
++ | la RA, 8(RA)
++ | stg RB, 0(TMPR1)
++ | la TMPR1, 8(TMPR1)
++ | aghi RD, -1
++ | jne <3
++ | // TODO: replace decrement/branch with branch on count.
++ |4:
++ | ins_next
++ |
++ |5: // Need to resize array part.
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base
++ | lgr CARG2, TAB:RB
++ | lgfr CARG3, RD
++ | lgr L:RB, L:CARG1
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
++ | lg BASE, L:RB->base
++ | llgc RA, PC_RA // Restore RA.
++ | llgh RD, PC_RD // Restore RD.
++ | j <1 // Retry.
++ |
++ |7: // Possible table write barrier for any value. Skip valiswhite check.
++ | barrierback TAB:RB, RD
++ | j <2
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+@@ -2401,7 +2448,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+ | lgr RD, RC
+ if (op == BC_CALLM) {
+- | ag NARGS:RD, SAVE_MULTRES // TODO: MULTRES is 32-bit on x64
++ | agf NARGS:RD, SAVE_MULTRES
+ }
+ | sllg RA, RA, 3(r0)
+ | lg LFUNC:RB, 0(BASE, RA)
+@@ -2427,7 +2474,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | jne >7
+ |1:
+ | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below.
+- | stg NARGS:RD, SAVE_MULTRES // 32-bit on x64.
++ | st NARGS:RD, SAVE_MULTRES
+ | aghi NARGS:RD, -1
+ | je >3
+ |2: // Move args down.
+@@ -2443,7 +2490,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg LFUNC:RB, -16(BASE)
+ |3:
+ | cleartp LFUNC:RB
+- | lg NARGS:RD, SAVE_MULTRES
++ | llgf NARGS:RD, SAVE_MULTRES
+ | llgc TMPR1, LFUNC:RB->ffid
+ | cghi TMPR1, 1 // (> FF_C) Calling a fast function?
+ | jh >5
+@@ -2488,15 +2535,83 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ break;
+ case BC_VARG:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // TODO: some opportunities for branch on index in here.
++ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
++ | sllg RA, RA, 3(r0)
++ | sllg RB, RB, 3(r0)
++ | sllg RC, RC, 3(r0)
++ | la TMPR1, (16+FRAME_VARG)(RC, BASE)
++ | la RA, 0(RA, BASE)
++ | sg TMPR1, -8(BASE)
++ | // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams.
++ | cghi RB, 0
++ | je >5 // Copy all varargs?
++ | lay RB, -8(RA, RB)
++ | clgr TMPR1, BASE // No vararg slots?
++ | jnl >2
++ |1: // Copy vararg slots to destination slots.
++ | lg RC, -16(TMPR1)
++ | la TMPR1, 8(TMPR1)
++ | stg RC, 0(RA)
++ | la RA, 8(RA)
++ | clgr RA, RB // All destination slots filled?
++ | jnl >3
++ | clgr TMPR1, BASE // No more vararg slots?
++ | jl <1
++ | lghi TMPR2, LJ_TNIL
++ |2: // Fill up remainder with nil.
++ | stg TMPR2, 0(RA)
++ | la RA, 8(RA)
++ | clgr RA, RB
++ | jl <2
++ |3:
++ | ins_next
++ |
++ |5: // Copy all varargs.
++ | lghi TMPR2, 1
++ | st TMPR2, SAVE_MULTRES // MULTRES = 0+1
++ | lgr RC, BASE
++ | slgr RC, TMPR1
++ | jno <3 // No vararg slots? (borrow or zero)
++ | llgfr RB, RC
++ | srlg RB, RB, 3(r0)
++ | ahi RB, 1
++ | st RB, SAVE_MULTRES // MULTRES = #varargs+1
++ | lg L:RB, SAVE_L
++ | agr RC, RA
++ | clg RC, L:RB->maxstack
++ | jh >7 // Need to grow stack?
++ |6: // Copy all vararg slots.
++ | lg RC, -16(TMPR1)
++ | la TMPR1, 8(TMPR1)
++ | stg RC, 0(RA)
++ | la RA, 8(RA)
++ | clgr TMPR1, BASE // No more vararg slots?
++ | jl <6
++ | j <3
++ |
++ |7: // Grow stack for varargs.
++ | stg BASE, L:RB->base
++ | stg RA, L:RB->top
++ | stg PC, SAVE_PC
++ | sgr TMPR1, BASE // Need delta, because BASE may change.
++ | st TMPR1, TMP_STACK_HI
++ | llgf CARG2, SAVE_MULTRES
++ | aghi CARG2, -1
++ | lgr CARG1, L:RB
++ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
++ | lg BASE, L:RB->base
++ | llgf TMPR1, TMP_STACK_HI
++ | lg RA, L:RB->top
++ | agr TMPR1, BASE
++ | j <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | ins_AD // RA = results, RD = extra_nresults
+- | ag RD, SAVE_MULTRES // MULTRES >=1, so RD >=1. // TODO: needs to be 32-bit.
++ | agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1.
+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+ break;
+
+@@ -2507,7 +2622,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ }
+ |1:
+ | lg PC, -8(BASE)
+- | stg RD, SAVE_MULTRES // Save nresults+1.
++ | st RD, SAVE_MULTRES // Save nresults+1.
+ | tmll PC, FRAME_TYPE // Check frame type marker.
+ | jne >7 // Not returning to a fixarg Lua func?
+ switch (op) {
+@@ -2524,7 +2639,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | aghi RD, -1
+ | jne <2
+ |3:
+- | lg RD, SAVE_MULTRES // Note: MULTRES may be >255.
++ | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256.
+ | llgc RB, PC_RB
+ |5:
+ | cgr RB, RD // More results expected?
+
+From 5dc644ad893080e2bf3ca3a1a7ecefeea6a7170d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 28 Dec 2016 14:13:08 -0500
+Subject: [PATCH 160/260] Implement LOOP.
+
+Allows for while and repeat loops, for example:
+
+x = 0
+while x < 5 do
+ print(x)
+ x = x + 1
+end
+
+-- prints:
+-- 0
+-- 1
+-- 2
+-- 3
+-- 4
+---
+ src/vm_s390x.dasc | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 450fce81a..990ae9218 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2844,14 +2844,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ break;
++
+ case BC_LOOP:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_A // RA = base, RD = target (loop extent)
++ | // Note: RA/RD is only used by trace recorder to determine scope/extent
++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
++ |.if JIT
++ | hotloop RBd
++ |.endif
++ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+ break;
++
+ case BC_ILOOP:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_A // RA = base, RD = target (loop extent)
++ | ins_next
+ break;
++
+ case BC_JLOOP:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+
+From 2584c6d5a84ae0ef245f50c31591dd2a32d321bf Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 28 Dec 2016 15:42:30 -0500
+Subject: [PATCH 161/260] Implement ISNUM, ISTYPE, TGETR and TSETR.
+
+---
+ src/vm_s390x.dasc | 223 +++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 202 insertions(+), 21 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 990ae9218..bcb8e3fbb 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -657,8 +657,17 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lgr CARG1, TAB:RB
++ | lgr RB, BASE // Save BASE.
++ | lgfr CARG2, RC
++ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
++ | // cTValue * or NULL returned in r2 (CRET1).
++ | llgc RA, PC_RA
++ | lgr BASE, RB // Restore BASE.
++ | ltgr RC, CRET1
++ | jne ->BC_TGETR_Z
++ | lghi ITYPE, LJ_TNIL
++ | j ->BC_TGETR2_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+@@ -726,8 +735,18 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg L:CARG1, SAVE_L
++ | lgr CARG2, TAB:RB
++ | stg BASE, L:CARG1->base
++ | lgr RB, BASE // Save BASE (TODO: BASE is callee-saved anyway on s390x).
++ | lgfr CARG3, RC
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
++ | // TValue * returned in r2 (CRET1).
++ | lgr RC, CRET1
++ | llgh RA, PC_RA
++ | lgr BASE, RB // Restore BASE.
++ | j ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+@@ -775,8 +794,15 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg r0, 0(r0)
+ |
+ |->vmeta_istype:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | llgfr CARG2, RA
++ | llgfr CARG3, RD
++ | lgr L:CARG1, L:RB
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
++ | lg BASE, L:RB->base
++ | j <6
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+@@ -946,6 +972,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+ |.macro ffgccheck
++ | lg RB, (DISPATCH_GL(gc.total))(DISPATCH)
++ | clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH)
++ | jl >1
++ | brasl r14, ->fff_gcstep
++ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+@@ -973,8 +1004,24 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_res_
+ |
+ |.ffunc_1 type
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg RC, 0(BASE)
++ | srag RC, RC, 47(r0)
++ | lghi RB, LJ_TISNUM
++ | clgr RC, RB
++ | jnl >1
++ | lgr RC, RB
++ |1:
++ | lghi TMPR2, -1
++ | xgr RC, TMPR2
++ |2:
++ | lg CFUNC:RB, -16(BASE)
++ | cleartp CFUNC:RB
++ | sllg RC, RC, 3(r0)
++ | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
++ | lg PC, -8(BASE)
++ | settp STR:RC, LJ_TSTR
++ | stg STR:RC, -16(BASE)
++ | j ->fff_res1
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+@@ -1155,18 +1202,89 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | stg r0, 0(r0)
+ |->fff_newstr:
+- | stg r0, 0(r0)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | llgfr CARG3, TMPR1 // Zero-extended to size_t.
++ | lgr CARG2, RD
++ | lgr CARG1, L:RB
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+- | stg r0, 0(r0)
++ | // GCstr * returned in r2 (CRET1).
++ | lgr STR:RD, CRET1
++ | lg BASE, L:RB->base
++ | lg PC, -8(BASE)
++ | settp STR:RD, LJ_TSTR
++ | stg STR:RD, -16(BASE)
++ | j ->fff_res1
+ |
+ |.ffunc string_sub
+- | stg r0, 0(r0)
++ | ffgccheck
++ | lghi TMPR1, -1
++ | clfi NARGS:RD, 1+2; jl ->fff_fallback
++ | jnh >1
++ | lg TMPR1, 16(BASE)
++ | checkint TMPR1, ->fff_fallback
++ |1:
++ | lg STR:RB, 0(BASE)
++ | checkstr STR:RB, ->fff_fallback
++ | lg ITYPE, 8(BASE)
++ | llgfr RA, ITYPE // Must clear hiword for lea below.
++ | srag ITYPE, ITYPE, 47(r0)
++ | cghi ITYPE, LJ_TISNUM
++ | jne ->fff_fallback
++ | llgf RC, STR:RB->len
++ | clr RC, TMPR1 // len < end? (unsigned compare)
++ | jl >5
++ |2:
++ | cghi RA, 0 // start <= 0?
++ | jle >7
++ |3:
++ | sr TMPR1, RA // start > end?
++ | jnhe ->fff_emptystr // TODO: not sure about this, was jl in x64.
++ | la RD, (#STR-1)(RA, STR:RB)
++ | ahi TMPR1, 1
++ |4:
++ | j ->fff_newstr
++ |
++ |5: // Negative end or overflow.
++ | chi TMPR1, 0
++ | jnl >6
++ | ahi TMPR1, 1
++ | ar TMPR1, RC // end = end+(len+1)
++ | j <2
++ |6: // Overflow.
++ | lr TMPR1, RC // end = len
++ | j <2
++ |
++ |7: // Negative start or underflow.
++ | je >8
++ | agr RA, RC // start = start+(len+1)
++ | aghi RA, 1
++ | jh <3 // start > 0?
++ |8: // Underflow.
++ | lghi RA, 1 // start = 1
++ | j <3
+ |
+ |->fff_emptystr: // Range underflow.
+ | stg r0, 0(r0)
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
++ | ffgccheck
++ | lg STR:CARG2, 0(BASE)
++ | checkstr STR:CARG2, ->fff_fallback
++ | lg L:RB, SAVE_L
++ | lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH)
++ | stg BASE, L:RB->base
++ | lg RC, SBUF:CARG1->b
++ | stg L:RB, SBUF:CARG1->L
++ | stg RC, SBUF:CARG1->p
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_buf_putstr_ .. name
++ | // lgr CARG1, CRET1 (nop, CARG1==CRET1)
++ | brasl r14, extern lj_buf_tostr
++ | j ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+@@ -1258,8 +1376,22 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r14, TMP_STACK // Save return address
++ | lg L:RB, SAVE_L
++ | stg PC, SAVE_PC // Redundant (but a defined value).
++ | stg BASE, L:RB->base
++ | sllg RD, NARGS:RD, 3(r0)
++ | lay RD, -8(RD, BASE)
++ | lgr CARG1, L:RB
++ | stg RD, L:RB->top
++ | brasl r14, extern lj_gc_step // (lua_State *L)
++ | lg BASE, L:RB->base
++ | lg RD, L:RB->top
++ | sgr RD, BASE
++ | srlg RD, RD, 3(r0)
++ | aghi NARGS:RD, 1
++ | lg r14, TMP_STACK // Restore return address.
++ | br r14
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+@@ -1686,13 +1818,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |1: // Fallthrough to the next instruction.
+ | ins_next
+ break;
++
+ case BC_ISTYPE:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = src, RD = -type
++ | lghr RD, RD // TODO: always sign extend RD?
++ | sllg RA, RA, 3(r0)
++ | lg RB, 0(RA, BASE)
++ | srag RB, RB, 47(r0)
++ | agr RB, RD
++ | jne ->vmeta_istype
++ | ins_next
+ break;
+ case BC_ISNUM:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = src, RD = -(TISNUM-1)
++ | sllg TMPR1, RA, 3(r0)
++ | lg TMPR1, 0(TMPR1, BASE)
++ | checknumtp TMPR1, ->vmeta_istype
++ | ins_next
+ break;
+ case BC_MOV:
+ | ins_AD // RA = dst, RD = src
+@@ -2226,8 +2368,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <1
+ break;
+ case BC_TGETR:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC // RA = dst, RB = table, RC = key
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | cleartp TAB:RB
++ | sllg RC, RC, 3(r0)
++ | llgf RC, 4(RC, BASE) // Load low word (big endian).
++ | cl RC, TAB:RB->asize
++ | jhe ->vmeta_tgetr // Not in array part? Use fallback.
++ | sllg RC, RC, 3(r0)
++ | ag RC, TAB:RB->array
++ | // Get array slot.
++ |->BC_TGETR_Z:
++ | lg ITYPE, 0(RC)
++ |->BC_TGETR2_Z:
++ | sllg RA, RA, 3(r0)
++ | stg ITYPE, 0(RA, BASE)
++ | ins_next
+ break;
+
+ case BC_TSETV:
+@@ -2388,9 +2545,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <2
+ break;
+ case BC_TSETR:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC // RA = src, RB = table, RC = key
++ | sllg RB, RB, 3(r0)
++ | lg TAB:RB, 0(RB, BASE)
++ | cleartp TAB:RB
++ | sllg RC, RC, 3(r0)
++ | lg RC, 0(RC, BASE)
++ | llgc TMPR2, TAB:RB->marked
++ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | jne >7
++ |2:
++ | cl RC, TAB:RB->asize
++ | jhe ->vmeta_tsetr
++ | llgfr RC, RC
++ | sllg RC, RC, 3(r0)
++ | ag RC, TAB:RB->array
++ | // Set array slot.
++ |->BC_TSETR_Z:
++ | sllg RA, RA, 3(r0)
++ | lg ITYPE, 0(RA, BASE)
++ | stg ITYPE, 0(RC)
++ | ins_next
++ |
++ |7: // Possible table write barrier for the value. Skip valiswhite check.
++ | barrierback TAB:RB, TMPR1
++ | j <2
+ break;
++
+ case BC_TSETM:
+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
+ |1:
+
+From 230a4aa42465d4a75a6a3b3d9066e50b1b6771e9 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 28 Dec 2016 17:53:38 -0500
+Subject: [PATCH 162/260] Implement KNIL and CALLMT.
+
+---
+ src/vm_s390x.dasc | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index bcb8e3fbb..f5055a40c 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2109,9 +2109,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_KNIL:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = dst_start, RD = dst_end
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | la RA, 8(RA, BASE)
++ | la RD, 0(RD, BASE)
++ | lghi RB, LJ_TNIL
++ | stg RB, -8(RA) // Sets minimum 2 slots.
++ |1:
++ | stg RB, 0(RA)
++ | la RA, 8(RA)
++ | clgr RA, RD
++ | jle <1
++ | ins_next
+ break;
++
++/* -- Upvalue and function ops ------------------------------------------ */
++
+ case BC_UGET:
+ | ins_AD // RA = dst, RD = upvalue #
+ | sllg RA, RA, 3(r0)
+@@ -2639,8 +2653,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_CALLMT:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = base, RD = extra_nargs
++ | a NARGS:RD, SAVE_MULTRES
++ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
+ break;
+ case BC_CALLT:
+ | ins_AD // RA = base, RD = nargs+1
+
+From 29223bb979d87be2623cf34f3500a011e183fc93 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 29 Dec 2016 11:10:18 -0500
+Subject: [PATCH 163/260] Implement POW.
+
+Allows use of the '^' operator, for example:
+
+x = 2
+y = 3
+print(x ^ y) -- prints 8
+---
+ src/vm_s390x.dasc | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f5055a40c..99200bc68 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2059,9 +2059,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j ->vmeta_arith_vvo
+ break;
+ case BC_POW:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC
++ | sllg RB, RB, 3(r0)
++ | sllg RC, RC, 3(r0)
++ | ld FARG1, 0(RB, BASE)
++ | ld FARG2, 0(RC, BASE)
++ | lg TMPR2, 0(RB, BASE)
++ | checknumtp TMPR2, ->vmeta_arith_vvo
++ | lg TMPR2, 0(RC, BASE)
++ | checknumtp TMPR2, ->vmeta_arith_vvo
++ | lgr RB, BASE // TODO: redundant, BASE is currently callee-saved.
++ | brasl r14, extern pow // double pow(double x, double y), result in f0.
++ | llgc RA, PC_RA
++ | lgr BASE, RB
++ | sllg RA, RA, 3(r0)
++ | std f0, 0(RA, BASE)
++ | ins_next
+ break;
++
+ case BC_CAT:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+
+From e8ca7b87996c12b17344925a8374e2211df0050a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 29 Dec 2016 11:23:45 -0500
+Subject: [PATCH 164/260] Implement CAT.
+
+Allows the use of the '..' operator, for example:
+
+x = "hello"
+y = " "
+z = "world!"
+print(x..y..z) -- prints 'hello world!'
+---
+ src/vm_s390x.dasc | 24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 99200bc68..cf9a8ccae 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2078,8 +2078,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_CAT:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_ABC // RA = dst, RB = src_start, RC = src_end
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base
++ | lgr CARG3, RC
++ | sgr CARG3, RB
++ | sllg RC, RC, 3(r0)
++ | la CARG2, 0(RC, BASE)
++ |->BC_CAT_Z:
++ | lgr L:RB, L:CARG1
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left)
++ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | ltgr RC, CRET1
++ | jne ->vmeta_binop
++ | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
++ | sllg RB, RB, 3(r0)
++ | llgc RA, PC_RA
++ | sllg RA, RA, 3(r0)
++ | lg RC, 0(RB, BASE)
++ | stg RC, 0(RA, BASE)
++ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+From e6eb12b26856eefeb059f4bb4f3a08f5f24b337f Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 29 Dec 2016 14:50:45 -0500
+Subject: [PATCH 165/260] Implement bit operations.
+
+See http://bitop.luajit.org/api.html for more information.
+
+Bytecode listing is now supported, for example:
+
+$ ./luajit -bl -e 'a=1'
+-- BYTECODE -- "a=1":0-1
+0001 KSHORT 0 1
+0002 GSET 0 0 ; "a"
+0003 RET0 0 1
+---
+ src/vm_s390x.dasc | 114 +++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 103 insertions(+), 11 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index cf9a8ccae..352613913 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -20,6 +20,8 @@
+ |
+ |// Instructions used that are not in base z/Architecture:
+ |// clfi (compare logical immediate) [requires z9-109]
++|// ldgr (load FPR from GPR) [requires z9-109 GA3]
++|// lgdr (load GPR from FPR) [requires z9-109 GA3]
+ |// TODO: alternative instructions?
+ |
+ |.arch s390x
+@@ -283,6 +285,12 @@
+ | stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
+ |.endmacro
+ |
++|// Synthesize binary floating-point constants.
++|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
++| llihh tmp, 0x4338
++| ldgr reg, tmp
++|.endmacro
++|
+ |// Move table write barrier back. Overwrites reg.
+ |.macro barrierback, tab, reg
+ | // TODO: more efficient way?
+@@ -1113,10 +1121,26 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+- | .ffunc_1 math_abs
++ |.ffunc_1 math_abs
++ | lg RB, 0(BASE)
++ | checkint RB, >3
++ | lpr RB, RB; jo >2
+ |->fff_resbit:
+ |->fff_resi:
++ | setint RB
+ |->fff_resRB:
++ | lg PC, -8(BASE)
++ | stg RB, -16(BASE)
++ | j ->fff_res1
++ |2:
++ | llihh RB, 0x41e0 // 2^31
++ | j ->fff_resRB
++ |3:
++ | jh ->fff_fallback
++ | nihh RB, 0x7fff // Clear sign bit.
++ | lg PC, -8(BASE)
++ | stg RB, -16(BASE)
++ | j ->fff_res1
+ |
+ |.ffunc_n math_sqrt, sqrtsd
+ |->fff_resxmm0:
+@@ -1295,6 +1319,26 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc_bit, name, kind, fdef
+ | fdef name
++ |.if kind == 2
++ | bfpconst_tobit f1, RB
++ |.endif
++ | lg RB, 0(BASE)
++ | ld f0, 0(BASE)
++ | checkint RB, >1
++ |.if kind > 0
++ | j >2
++ |.else
++ | j ->fff_resbit
++ |.endif
++ |1:
++ | jh ->fff_fallback
++ |.if kind < 2
++ | bfpconst_tobit f1, RB
++ |.endif
++ | adbr f0, f1
++ | lgdr RB, f0
++ | llgfr RB, RB
++ |2:
+ |.endmacro
+ |
+ |.macro .ffunc_bit, name, kind
+@@ -1302,33 +1346,81 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endmacro
+ |
+ |.ffunc_bit bit_tobit, 0
++ | j ->fff_resbit
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name, 2
++ | lgr TMPR1, NARGS:RD // Save for fallback.
++ | sllg RD, NARGS:RD, 3(r0)
++ | lay RD, -16(RD, BASE)
++ |1:
++ | clgr RD, BASE
++ | jle ->fff_resbit
++ | lg RA, 0(RD)
++ | checkint RA, >2
++ | ins RB, RA
++ | aghi RD, -8
++ | j <1
++ |2:
++ | jh ->fff_fallback_bit_op
++ | ldgr f0, RA
++ | adbr f0, f1
++ | lgdr RA, f0
++ | ins RB, RA
++ | aghi RD, -8
++ | j <1
+ |.endmacro
+ |
+- |.ffunc_bit_op bit_band, and
++ |.ffunc_bit_op bit_band, nr
+ |.ffunc_bit_op bit_bor, or
+- |.ffunc_bit_op bit_bxor, xor
++ |.ffunc_bit_op bit_bxor, xr
+ |
+ |.ffunc_bit bit_bswap, 1
++ | lrvr RB, RB
++ | j ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot, 1
+- |->fff_resbit:
++ | lhi TMPR2, -1
++ | xr RB, TMPR2 // TODO: use xilf on newer models?
++ | j ->fff_resbit
+ |
+ |->fff_fallback_bit_op:
++ | lgr NARGS:RD, TMPR1 // Restore for fallback
++ | j ->fff_fallback
+ |
+ |.macro .ffunc_bit_sh, name, ins
+ | .ffunc_bit name, 1, .ffunc_2
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // Note: no inline conversion from number for 2nd argument!
++ | lg RA, 8(BASE)
++ | checkint RA, ->fff_fallback
++ | nill RA, 0x1f // Limit shift to 5-bits.
++ | ins RB, r0, 0(RA) // TODO: fix shift args in DynASM.
++ | j ->fff_resbit
+ |.endmacro
+ |
+- |.ffunc_bit_sh bit_lshift, shl
+- |.ffunc_bit_sh bit_rshift, shr
+- |.ffunc_bit_sh bit_arshift, sar
+- |.ffunc_bit_sh bit_rol, rol
+- |.ffunc_bit_sh bit_ror, ror
++ |.ffunc_bit_sh bit_lshift, sll
++ |.ffunc_bit_sh bit_rshift, srl
++ |.ffunc_bit_sh bit_arshift, sra
++ |
++ |.ffunc_bit bit_rol, 1, .ffunc_2
++ | // Note: no inline conversion from number for 2nd argument!
++ | lg RA, 8(BASE)
++ | checkint RA, ->fff_fallback
++ | // Note: no need to limit rotate to 5-bits (wraps).
++ | rll RB, RB, 0(RA)
++ | j ->fff_resbit
++ |
++ |.ffunc_bit bit_ror, 1, .ffunc_2
++ | // Note: no inline conversion from number for 2nd argument!
++ | lg RA, 8(BASE)
++ | checkint RA, ->fff_fallback
++ | // TODO: shorter sequence of instructions to convert right rotate into left rotate.
++ | nill RA, 0x1f
++ | lghi TMPR2, 32
++ | sr TMPR2, RA
++ | lr RA, TMPR2
++ | rll RB, RB, 0(RA)
++ | j ->fff_resbit
+ |
+ |//-----------------------------------------------------------------------
+ |
+
+From 12602d2a1f687a3e1e545489323ea6ddf778699b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 29 Dec 2016 16:50:58 -0500
+Subject: [PATCH 166/260] Fix for DynASM buffer overflow.
+
+Need to include all actions with arguments against MAXSECPOS.
+---
+ dynasm/dasm_s390x.lua | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index f8c45fa89..9b9d3f4c0 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -117,7 +117,7 @@ local function waction(action, val, a, num)
+ wputxhw(w)
+ if val then wputxhw(val) end -- Not sure about this, do we always have one arg?
+ if a then actargs[#actargs+1] = a end
+- if a or num then secpos = secpos + (num or 1) end
++ if val or a or num then secpos = secpos + (num or 1) end
+ end
+
+ -- Flush action list (intervening C code or buffer pos overflow).
+
+From 21f2fdfab241d046af9e7a9070be2c0fdb321b38 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 29 Dec 2016 17:37:11 -0500
+Subject: [PATCH 167/260] Partially implement ipairs.
+
+Still need to handle ipairs_aux.
+---
+ src/vm_s390x.dasc | 109 +++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 94 insertions(+), 15 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 352613913..9985f1a74 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -431,12 +431,23 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg r0, 0(r0)
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- |
++ | // (void *cframe)
++ | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK.
++ | lgr sp, CARG1
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg L:RB, SAVE_L
++ | lghi RD, 1+1 // Really 1+2 results, incr. later.
++ | lg BASE, L:RB->base
++ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
++ | lay DISPATCH, GG_G2DISP(DISPATCH)
++ | lg PC, -8(BASE) // Fetch PC of previous frame.
++ | load_false RA
++ | lg RB, 0(BASE)
++ | stg RA, -16(BASE) // Prepend false to error message.
++ | stg RB, -8(BASE)
++ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
++ | set_vmstate INTERP
++ | j ->vm_returnc // Increments RD/MULTRES and returns.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+@@ -1086,19 +1097,60 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_1 pairs
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_2 ipairs_aux
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |->fff_res0:
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |.ffunc_1 ipairs
++ | lg TAB:RB, 0(BASE)
++ | lgr TMPR1, TAB:RB
++ | checktab TAB:RB, ->fff_fallback
++#if LJ_52
++ | lghi TMPR2, 0
++ | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
++#endif
++ | lg CFUNC:RD, -16(BASE)
++ | cleartp CFUNC:RD
++ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
++ | settp CFUNC:RD, LJ_TFUNC
++ | lg PC, -8(BASE)
++ | stg CFUNC:RD, -16(BASE)
++ | stg TMPR1, -8(BASE)
++ | llihh RD, ((int)LJ_TISNUM)>>1 // mov64 RD, ((int64_t)LJ_TISNUM<<47) // TODO: write mov64-macro, use all of TISNUM (currently this is very fragile).
++ | stg RD, 0(BASE)
++ | lghi RD, 1+3
++ | j ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc_1 pcall
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | la RA, 16(BASE)
++ | aghi NARGS:RD, -1
++ | lghi PC, 16+FRAME_PCALL
++ |1:
++ | llgc RB, (DISPATCH_GL(hookmask))(DISPATCH)
++ | srlg RB, RB, HOOK_ACTIVE_SHIFT(r0)
++ | nill RB, 1 // High bits already zero (from load).
++ | agr PC, RB // Remember active hook before pcall.
++ | // Note: this does a (harmless) copy of the function to the PC slot, too.
++ | lgr KBASE, RD
++ |2:
++ | sllg TMPR1, KBASE, 3(r0)
++ | lg RB, -24(TMPR1, RA)
++ | stg RB, -16(TMPR1, RA)
++ | aghi KBASE, -1
++ | jh <2
++ | j ->vm_call_dispatch
+ |
+ |.ffunc_2 xpcall
+ | stg r0, 0(r0)
+@@ -2846,9 +2898,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERC:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
++ | sllg RA, RA, 3(r0)
++ | la RA, 16(RA, BASE) // fb = base+2
++ | lg RB, -32(RA) // Copy state. fb[0] = fb[-4].
++ | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3].
++ | stg RB, 0(RA)
++ | stg RC, 8(RA)
++ | lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5]
++ | stg LFUNC:RB, -16(RA)
++ | lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call.
++ | checkfunc LFUNC:RB, ->vmeta_call
++ | lgr BASE, RA
++ | ins_call
+ break;
++
+ case BC_ITERN:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+@@ -3156,16 +3220,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERL:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ |.if JIT
++ | hotloop RB
++ |.endif
++ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+ break;
++
+ case BC_JITERL:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++#if !LJ_HASJIT
+ break;
++#endif
+ case BC_IITERL:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AJ // RA = base, RD = target
++ | sllg RA, RA, 3(r0)
++ | la RA, 0(RA, BASE)
++ | lg RB, 0(RA)
++ | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
++ if (op == BC_JITERL) {
++ | stg RB, -8(RA)
++ | j =>BC_JLOOP
++ } else {
++ | branchPC RD // Otherwise save control var + branch.
++ | stg RB, -8(RA)
++ }
++ |1:
++ | ins_next
+ break;
+
+ case BC_LOOP:
+
+From 5e7121c62508ac668fad9e1d591ba7995d2f5c0d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 30 Dec 2016 11:40:39 -0500
+Subject: [PATCH 168/260] Implement ipairs.
+
+Allows the use of the ipairs iterator, for example:
+
+t = { "i", "robot" }
+for i,v in ipairs(t) do
+ print(i, v)
+end
+-- prints:
+-- 1 i
+-- 2 robot
+---
+ src/vm_s390x.dasc | 72 +++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 66 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 9985f1a74..2db412397 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -228,6 +228,9 @@
+ |.macro setint, reg
+ | settp reg, LJ_TISNUM
+ |.endmacro
++|.macro setint, dst, reg
++| settp dst, reg, LJ_TISNUM
++|.endmacro
+ |
+ |// Macros to test operand types.
+ |.macro checktp_nc, reg, tp, target
+@@ -1097,19 +1100,76 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | je >2 // Missing 2nd arg?
++ |1:
++ | lg CARG2, 0(BASE)
++ | checktab CARG2, ->fff_fallback
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base // Add frame since C call can throw.
++ | stg BASE, L:RB->top // Dummy frame length is ok.
++ | lg PC, -8(BASE)
++ | la CARG3, 8(BASE)
++ | lgr CARG1, L:RB
++ | stg PC, SAVE_PC // Needed for ITERN fallback.
++ | brasl r14, extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
++ | // Flag returned in r2 (CRET1).
++ | lg BASE, L:RB->base
++ | lgr RD, CRET1 // TODO: high bits needed? low bits load/test (ltr) enough?
++ | ltr RD, CRET1; je >3 // End of traversal?
++ | // Copy key and value to results.
++ | lg RB, 8(BASE)
++ | lg RD, 16(BASE)
++ | stg RB, -16(BASE)
++ | stg RD, -8(BASE)
++ |->fff_res2:
++ | lghi RD, 1+2
++ | j ->fff_res
++ |2: // Set missing 2nd arg to nil.
++ | lghi TMPR2, LJ_TNIL
++ | stg TMPR2, 8(BASE)
++ | j <1
++ |3: // End of traversal: return nil.
++ | lghi TMPR2, LJ_TNIL
++ | stg TMPR2, -16(BASE)
++ | j ->fff_res1
+ |
+ |.ffunc_1 pairs
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ |
+ |.ffunc_2 ipairs_aux
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg TAB:RB, 0(BASE)
++ | checktab TAB:RB, ->fff_fallback
++ | lg RA, 8(BASE)
++ | checkint RA, ->fff_fallback
++ | lg PC, -8(BASE)
++ | aghi RA, 1
++ | setint ITYPE, RA
++ | stg ITYPE, -16(BASE)
++ | cl RA, TAB:RB->asize; jhe >2 // Not in array part?
++ | lg RD, TAB:RB->array
++ | lgfr TMPR1, RA
++ | sllg TMPR1, TMPR1, 3(r0)
++ | la RD, 0(TMPR1, RD)
++ |1:
++ | lg TMPR2, 0(RD)
++ | cghi TMPR2, LJ_TNIL; je ->fff_res0
++ | // Copy array slot.
++ | stg TMPR2, -8(BASE)
++ | j ->fff_res2
++ |2: // Check for empty hash part first. Otherwise call C function.
++ | lt TMPR2, TAB:RB->hmask; je ->fff_res0
++ | lgr CARG1, TAB:RB
++ | lgr RB, BASE // Save BASE. // TODO: needed?
++ | lgfr CARG2, RA
++ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
++ | // cTValue * or NULL returned in r2 (CRET1).
++ | lgr BASE, RB
++ | ltgr RD, CRET1
++ | jne <1
+ |->fff_res0:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lghi RD, 1+0
++ | j ->fff_res
+ |
+ |.ffunc_1 ipairs
+ | lg TAB:RB, 0(BASE)
+
+From 21073df0dcf364810197b45843c23b4fc02e8ebd Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 30 Dec 2016 12:27:08 -0500
+Subject: [PATCH 169/260] Add FORL implementation (just fallthrough).
+
+---
+ src/vm_s390x.dasc | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 2db412397..8e02c79d2 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -3146,8 +3146,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.define FOR_EXT, 24(RA)
+
+ case BC_FORL:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ |.if JIT
++ | hotloop RB
++ |.endif
++ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JFORI:
+
+From a5d9604419df45c4eb829b3a4778fc523bb16bb3 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 30 Dec 2016 13:00:38 -0500
+Subject: [PATCH 170/260] Implement USETV.
+
+Allows upvalues to be set in closures, for example:
+
+function f(x)
+ local y = x
+ local j = function(z)
+ y = y + z
+ end
+ for i=1,3 do
+ j(i)
+ print(y)
+ end
+end
+
+f(2) -- prints: 3 5 8
+---
+ src/vm_s390x.dasc | 43 +++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 41 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 8e02c79d2..182cfef13 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2378,9 +2378,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_USETV:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++#define TV2MARKOFS \
++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
++ | ins_AD // RA = upvalue #, RD = src
++ | lg LFUNC:RB, -16(BASE)
++ | cleartp LFUNC:RB
++ | sllg RA, RA, 3(r0)
++ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
++ | // TODO: (instead of next 2 instructions) tm UPVAL:RB->closed, 0xff
++ | llgc TMPR2, UPVAL:RB->closed
++ | tmll TMPR2, 0xff
++ | lg RB, UPVAL:RB->v
++ | sllg TMPR1, RD, 3(r0)
++ | lg RA, 0(TMPR1, BASE)
++ | stg RA, 0(RB)
++ | je >1
++ | // Check barrier for closed upvalue.
++ | // TODO: tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
++ | llgc TMPR2, TV2MARKOFS(RB)
++ | tmll TMPR2, LJ_GC_BLACK
++ | jne >2
++ |1:
++ | ins_next
++ |
++ |2: // Upvalue is black. Check if new value is collectable and white.
++ | srag RD, RA, 47(r0)
++ | ahi RD, -LJ_TISGCV
++ | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
++ | jle <1
++ | cleartp GCOBJ:RA
++ | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
++ | llgc TMPR2, GCOBJ:RA->gch.marked
++ | tmll TMPR2, LJ_GC_WHITES
++ | je <1
++ | // Crossed a write barrier. Move the barrier forward.
++ | lgr CARG2, RB
++ | lgr RB, BASE // Save BASE.
++ | lay GL:CARG1, GG_DISP2G(DISPATCH)
++ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
++ | lgr BASE, RB // Restore BASE.
++ | j <1
+ break;
++#undef TV2MARKOFS
+ case BC_USETS:
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+
+From 6fbe3565072b86ab85a9d1c465cbd15a25ae6f98 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 30 Dec 2016 13:33:31 -0500
+Subject: [PATCH 171/260] Implement USETN, USETP and USETS.
+
+Allows constant numbers, primitives (nil, true, false) and strings
+to be assigned to upvalues in closures.
+---
+ src/vm_s390x.dasc | 60 ++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 54 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 182cfef13..7b35afb79 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2421,16 +2421,64 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ #undef TV2MARKOFS
+ case BC_USETS:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AND // RA = upvalue #, RD = str const (~)
++ | lg LFUNC:RB, -16(BASE)
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | cleartp LFUNC:RB
++ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
++ | lg STR:RA, 0(RD, KBASE)
++ | lg RD, UPVAL:RB->v
++ | settp STR:ITYPE, STR:RA, LJ_TSTR
++ | stg STR:ITYPE, 0(RD)
++ | // TODO: tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
++ | llgc TMPR2, UPVAL:RB->marked
++ | tmll TMPR2, LJ_GC_BLACK
++ | jne >2
++ |1:
++ | ins_next
++ |
++ |2: // Check if string is white and ensure upvalue is closed.
++ | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
++ | llgc TMPR2, GCOBJ:RA->gch.marked
++ | tmll TMPR2, LJ_GC_WHITES
++ | je <1
++ | // TODO: tm UPVAL:RB->closed, 0xff
++ | llgc TMPR2, UPVAL:RB->closed
++ | tmll TMPR2, 0xff
++ | je <1
++ | // Crossed a write barrier. Move the barrier forward.
++ | lgr RB, BASE
++ | lgr CARG2, RD
++ | lay GL:CARG1, GG_DISP2G(DISPATCH)
++ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
++ | lgr BASE, RB // Restore BASE.
++ | j <1
+ break;
+ case BC_USETN:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = upvalue #, RD = num const
++ | lg LFUNC:RB, -16(BASE)
++ | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3(r0)
++ | cleartp LFUNC:RB
++ | ld f0, 0(RD, KBASE)
++ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
++ | lg RA, UPVAL:RB->v
++ | std f0, 0(RA)
++ | ins_next
+ break;
+ case BC_USETP:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = upvalue #, RD = primitive type (~)
++ | lg LFUNC:RB, -16(BASE)
++ | sllg RA, RA, 3(r0)
++ | cleartp LFUNC:RB
++ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
++ | sllg RD, RD, 47(r0)
++ | lghi TMPR2, -1
++ | xgr RD, TMPR2
++ | lg RA, UPVAL:RB->v
++ | stg RD, 0(RA)
++ | ins_next
+ break;
+ case BC_UCLO:
+ | ins_AD // RA = level, RD = target
+
+From 4f1c4dc514d900486b07b4b715ae8192e5c50949 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 30 Dec 2016 14:48:48 -0500
+Subject: [PATCH 172/260] Implement pairs (including ISNEXT and ITERN).
+
+Allows use of the pairs iterator, for example:
+
+t = { alpha = 1, beta = 2 }
+for k,v in pairs(t)
+ print(k, v)
+end
+
+-- prints:
+-- alpha 1
+-- beta 2
+---
+ src/vm_s390x.dasc | 104 +++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 98 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 7b35afb79..4d5729db9 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1134,8 +1134,23 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_res1
+ |
+ |.ffunc_1 pairs
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg TAB:RB, 0(BASE)
++ | lgr TMPR1, TAB:RB
++ | checktab TAB:RB, ->fff_fallback
++#if LJ_52
++ | ltg TMPR2, TAB:RB->metatable; jne ->fff_fallback
++#endif
++ | lg CFUNC:RD, -16(BASE)
++ | cleartp CFUNC:RD
++ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
++ | settp CFUNC:RD, LJ_TFUNC
++ | lg PC, -8(BASE)
++ | stg CFUNC:RD, -16(BASE)
++ | stg TMPR1, -8(BASE)
++ | lghi TMPR2, LJ_TNIL
++ | stg TMPR2, 0(BASE)
++ | lghi RD, 1+3
++ | j ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | lg TAB:RB, 0(BASE)
+@@ -3061,13 +3076,90 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERN:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
++ |.if JIT
++ | // NYI: add hotloop, record BC_ITERN.
++ |.endif
++ | sllg RA, RA, 3(r0)
++ | lg TAB:RB, -16(RA, BASE)
++ | cleartp TAB:RB
++ | llgf RC, -4(RA, BASE) // Get index from control var. // TODO: ENDIANNESS DRAGONS.
++ | llgf TMPR1, TAB:RB->asize
++ | la PC, 4(PC)
++ | lg ITYPE, TAB:RB->array
++ |1: // Traverse array part.
++ | clr RC, TMPR1; jhe >5 // Index points after array part?
++ | sllg RD, RC, 3(r0) // Warning: won't work if RD==RC!
++ | lg TMPR2, 0(RD, ITYPE)
++ | cghi TMPR2, LJ_TNIL; je >4
++ | // Copy array slot to returned value.
++ | lgr RB, TMPR2
++ | stg RB, 8(RA, BASE)
++ | // Return array index as a numeric key.
++ | setint ITYPE, RC
++ | stg ITYPE, 0(RA, BASE)
++ | ahi RC, 1
++ | sty RC, -4(RA, BASE) // Update control var. // TODO: ENDIANNESS DRAGONS
++ |2:
++ | llgh RD, PC_RD // Get target from ITERL.
++ | branchPC RD
++ |3:
++ | ins_next
++ |
++ |4: // Skip holes in array part.
++ | ahi RC, 1
++ | j <1
++ |
++ |5: // Traverse hash part.
++ | sr RC, TMPR1
++ |6:
++ | cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1.
++ | llgfr ITYPE, RC
++ | mghi ITYPE, #NODE
++ | ag NODE:ITYPE, TAB:RB->node
++ | lghi TMPR2, LJ_TNIL
++ | cg TMPR2, NODE:ITYPE->val; je >7
++ | ar TMPR1, RC
++ | ahi TMPR1, 1
++ | // Copy key and value from hash slot.
++ | lg RB, NODE:ITYPE->key
++ | lg RC, NODE:ITYPE->val
++ | stg RB, 0(RA, BASE)
++ | stg RC, 8(RA, BASE)
++ | sty TMPR1, -4(RA, BASE) // TODO: ENDIANNESS DRAGONS
++ | j <2
++ |
++ |7: // Skip holes in hash part.
++ | ahi RC, 1
++ | j <6
+ break;
++
+ case BC_ISNEXT:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | ins_AD // RA = base, RD = target (points to ITERN)
++ | sllg RA, RA, 3(r0)
++ | lg CFUNC:RB, -24(RA, BASE)
++ | checkfunc CFUNC:RB, >5
++ | lg TMPR1, -16(RA, BASE)
++ | checktptp TMPR1, LJ_TTAB, >5
++ | lghi TMPR2, LJ_TNIL
++ | cg TMPR2, -8(RA, BASE); jne >5
++ | llgc TMPR1, CFUNC:RB->ffid
++ | clfi TMPR1, (uint8_t)FF_next_N; jne >5
++ | branchPC RD
++ | llihl TMPR1, 0x7fff
++ | iihh TMPR1, 0xfffe
++ | stg TMPR1, -8(RA, BASE) // Initialize control var.
++ |1:
++ | ins_next
++ |5: // Despecialize bytecode if any of the checks fail.
++ | lghi TMPR2, BC_JMP
++ | stcy TMPR2, PC_OP
++ | branchPC RD
++ | lghi TMPR2, BC_ITERC
++ | stc TMPR2, 3(PC)
++ | j <1
+ break;
++
+ case BC_VARG:
+ | // TODO: some opportunities for branch on index in here.
+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
+
+From 39c37a1a1adbb707826cc9e0381fff0995370570 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 30 Dec 2016 16:33:35 -0500
+Subject: [PATCH 173/260] Remove debug code from tostring.
+
+---
+ src/vm_s390x.dasc | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 4d5729db9..61203413d 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1092,7 +1092,6 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ | // GCstr returned in r2 (CRET1).
+- | stg r0, 0(r0)
+ | lg BASE, L:RB->base
+ | settp STR:RB, CRET1, LJ_TSTR
+ | j <2
+
+From 703398877b3f25912a543ff16e6294a62d0253a9 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Mon, 2 Jan 2017 11:19:52 +0530
+Subject: [PATCH 174/260] Added test.lua file
+
+file consist of code snippets which execute successfully on luajit v2.1
+---
+ test/test.lua | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 84 insertions(+)
+ create mode 100644 test/test.lua
+
+diff --git a/test/test.lua b/test/test.lua
+new file mode 100644
+index 000000000..fcb3d9791
+--- /dev/null
++++ b/test/test.lua
+@@ -0,0 +1,84 @@
++print("*********Addition*******")
++print("a=20;b=10;c=a+b;print(c)")
++a=20
++b=10
++c=a+b
++print(c)
++print("***************")
++
++print("************* Loops ***************")
++print("for i=1,10 do print(i) end")
++for i=1,10 do print(i) end
++print("for i=10,1,-1 do print(i) end")
++for i=10,1,-1 do print(i) end
++print("***************")
++
++print("************ Tables ***********")
++print("days = {'sun','mon'};print(days[1]);print(days[2])")
++days = { "sun" , "Mon"};
++print(days[1])
++print(days[2])
++
++print("revDays = {['Sunday']=1,['Monday']=2};x='Monday'")
++revDays = {["Sunday"] = 1, ["Monday"] = 2,}
++x="Monday"
++print(x)
++print(revDays[x])
++
++print("Equating 2 tables")
++print("a={};a.x=1;a.y=0;b={};b.x=1;b.y=0;c=a")
++a = {}; a.x = 1; a.y = 0
++b = {}; b.x = 1; b.y = 0
++c = a
++print("***************")
++
++print("****************Numbers *******************")
++print("a=1.000000;a=1.01;a=4.57e-3")
++a= 1.00000000
++print(a)
++
++a=1.01
++print(a)
++
++a=4.57e-3
++print(a)
++print("***************")
++
++print("********************* Strings *******************")
++print("a='one string';b=string.gsub(a,'one','another');print(b);print(a)")
++a = "one string"
++b = string.gsub(a, "one", "another")
++print(b)
++print(a)
++print("***************")
++
++
++
++
++print("*******Escape characters********")
++print("one line\nnext line\n\"in quotes\", 'in quotes'")
++print('a backslash inside quotes: \'\\\'')
++print("a simpler way: '\\'")
++print("***************")
++
++print("************** Input - ouput ***************")
++print("Please enter a valid integer")
++line = io.read()
++n = tonumber(line)
++if n == nil then error("line .. is not a valid number") else print(n*2) end
++print("************************************************")
++
++print("********************* Logical Operators********************")
++print("4 and 5 ; nil and 13 ; false and 13 ; 4 or 5 ; false or 5")
++print(4 and 5)
++print(nil and 13)
++print(false and 13)
++print(4 or 5)
++print(false or 5)
++
++print("not nil ; not false ; not 0 ; not not nil")
++print(not nil)
++print(not false)
++print(not 0)
++print(not not nil)
++print("********************************************")
+
+From 700f0e75c18c77f099b9b46b1fa05e07c4a066f3 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Mon, 2 Jan 2017 13:56:40 +0530
+Subject: [PATCH 175/260] Updated test.lua withe more tests
+
+---
+ test/test.lua | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/test/test.lua b/test/test.lua
+index fcb3d9791..2d636a7a3 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -50,6 +50,11 @@ a = "one string"
+ b = string.gsub(a, "one", "another")
+ print(b)
+ print(a)
++
++print("a=10;b=tostring(a);print(b)")
++a=10
++b=tostring(a)
++print(b)
+ print("***************")
+
+
+@@ -82,3 +87,12 @@ print(not false)
+ print(not 0)
+ print(not not nil)
+ print("********************************************")
++
++
++print("**********Functions*****************")
++print("function twice(x) return 2*x end")
++b=twice(3)
++print(b)
++print("********************************************")
++
++
+
+From 5dd0c245b71ce98c47a48b651b48c6e3ddd2d541 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Mon, 2 Jan 2017 14:29:43 +0530
+Subject: [PATCH 176/260] Updated with ipairs & table based example
+
+---
+ test/test.lua | 21 ++++++++++++++++++---
+ 1 file changed, 18 insertions(+), 3 deletions(-)
+
+diff --git a/test/test.lua b/test/test.lua
+index 2d636a7a3..50076bdec 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -30,6 +30,24 @@ print("a={};a.x=1;a.y=0;b={};b.x=1;b.y=0;c=a")
+ a = {}; a.x = 1; a.y = 0
+ b = {}; b.x = 1; b.y = 0
+ c = a
++
++print(" i=10 ; j='10';k='+10' ; a ={} ; a[i] = 'one value' ; a[j] = 'another value' ; a[k]='yet another value'")
++print("print(a[j];print(a[k];print(a[tonumber(j)];print(a[tonumber(k)]")
++i = 10; j = "10"; k = "+10"
++a = {}
++a[i] = "one value"
++a[j] = "another value"
++a[k] = "yet another value"
++print(a[j]) --> another value
++print(a[k]) --> yet another value
++print(a[tonumber(j)]) --> one value
++print(a[tonumber(k)]) --> one value
++print("***************")
++
++print("**********Ipairs****")
++print("a = {1,2,3,4,5,6} for i , line in ipairs(a) do print(line) end")
++a = {1,2,3,4,5,6}
++for i , line in ipairs(a) do print(line) end
+ print("***************")
+
+ print("****************Numbers *******************")
+@@ -57,9 +75,6 @@ b=tostring(a)
+ print(b)
+ print("***************")
+
+-
+-
+-
+ print("*******Escape characters********")
+ print("one line\nnext line\n\"in quotes\", 'in quotes'")
+ print('a backslash inside quotes: \'\\\'')
+
+From bc065ce2bf68c33113a5f6c7bfdce6b1dee7d837 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Mon, 2 Jan 2017 17:05:47 +0530
+Subject: [PATCH 177/260] Added switch case and while loop test
+
+---
+ test/test.lua | 40 ++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 40 insertions(+)
+
+diff --git a/test/test.lua b/test/test.lua
+index 50076bdec..3dab71528 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -1,3 +1,6 @@
++-- To run this file use luajit binary as below
++-- ./luajit test.lua
++
+ print("*********Addition*******")
+ print("a=20;b=10;c=a+b;print(c)")
+ a=20
+@@ -11,6 +14,16 @@ print("for i=1,10 do print(i) end")
+ for i=1,10 do print(i) end
+ print("for i=10,1,-1 do print(i) end")
+ for i=10,1,-1 do print(i) end
++
++print("************* While Loop ********")
++print("x=10;i=1;while i<x do ; x=i*2 ; print(x); i=i+1;end")
++x=10
++i=1
++while i<x do
++x=i*2
++print(x)
++i=i+1
++end
+ print("***************")
+
+ print("************ Tables ***********")
+@@ -108,6 +121,33 @@ print("**********Functions*****************")
+ print("function twice(x) return 2*x end")
+ b=twice(3)
+ print(b)
++
++print("************Switch case *************")
++function switch(operator)
++local op = operator;
++a = 20 ; b= 10
++
++if op == "+"
++then
++c=a+b
++print("Add Result",c)
++elseif op == "-"
++then
++c=a-b
++print("Sub Result",c)
++elseif op == "*"
++then
++c=a*b
++print("Mul Result",c)
++elseif op == "/"
++then
++c=a/b
++print("Div Result",c)
++else
++error(" Invalid operator")
++end
++end
++switch("+")
+ print("********************************************")
+
+
+
+From a13e120fd5dfe8104dcb8ab65b99baebbcb65790 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Mon, 2 Jan 2017 17:13:05 +0530
+Subject: [PATCH 178/260] Added definition to function 'twice()'
+
+---
+ test/test.lua | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/test/test.lua b/test/test.lua
+index 3dab71528..05334c9ff 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -119,6 +119,9 @@ print("********************************************")
+
+ print("**********Functions*****************")
+ print("function twice(x) return 2*x end")
++function twice(x)
++return 2*x
++end
+ b=twice(3)
+ print(b)
+
+
+From dd6ecfa73d274edd2eaefd03fdcb0092655d53bb Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 3 Jan 2017 15:51:23 +0530
+Subject: [PATCH 179/260] Added SIY addressing mode support
+
+Added SIY add mode, and Updated the number of parameters for few of the instructions of RS-a mode
+---
+ dynasm/dasm_s390x.lua | 43 +++++++++++++++++++++++++++++++------------
+ 1 file changed, 31 insertions(+), 12 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 9b9d3f4c0..138366965 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1061,20 +1061,20 @@ map_op = {
+ spm_2 = "000000000400RR",
+ ssar_2 = "0000b2250000RRE",
+ ssair_2 = "0000b99f0000RRE",
+- slda_3 = "00008f000000RS-a",
+- sldl_3 = "00008d000000RS-a",
+- sla_3 = "00008b000000RS-a",
++ slda_2 = "00008f000000RS-a",
++ sldl_2 = "00008d000000RS-a",
++ sla_2 = "00008b000000RS-a",
+ slak_3 = "eb00000000ddRSY-a",
+ slag_3 = "eb000000000bRSY-a",
+- sll_3 = "000089000000RS-a",
++ sll_2 = "000089000000RS-a",
+ sllk_3 = "eb00000000dfRSY-a",
+ sllg_3 = "eb000000000dRSY-a",
+- srda_3 = "00008e000000RS-a",
+- srdl_3 = "00008c000000RS-a",
+- sra_3 = "00008a000000RS-a",
++ srda_2 = "00008e000000RS-a",
++ srdl_2 = "00008c000000RS-a",
++ sra_2 = "00008a000000RS-a",
+ srak_3 = "eb00000000dcRSY-a",
+ srag_3 = "eb000000000aRSY-a",
+- srl_3 = "000088000000RS-a",
++ srl_2 = "000088000000RS-a",
+ srlk_3 = "eb00000000deRSY-a",
+ srlg_3 = "eb000000000cRSY-a",
+ sqxbr_2 = "0000b3160000RRE",
+@@ -1225,6 +1225,9 @@ map_op = {
+ brxhg_3 = "ec0000000044RIE-e",
+ -- SI
+ ni_2 = "000094000000SI",
++ tm_2 = "000091000000SI",
++ -- SIY
++ tmy_2 = "eb0000000051SIY",
+ -- RXF
+ madb_3 = "ed000000001eRXF",
+ -- RRD
+@@ -1291,11 +1294,17 @@ local function parse_template(params, template, nparams, pos)
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
+ elseif p == "RS-a" then
+- local d, b, a = parse_mem_b(params[3])
+- op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+- op2 = op2 + shl(b, 12) + d
++ if (params[3]) then
++ local d, b, a = parse_mem_b(params[3])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
++ op2 = op2 + shl(b, 12) + d
++ else
++ local d, b, a = parse_mem_b(params[2])
++ op1 = op1 + shl(parse_reg(params[1]), 4)
++ op2 = op2 + shl(b, 12) + d
++ end
+ wputhw(op1); wputhw(op2)
+- if a then a() end -- a() emits action.
++ if a then a() end
+ elseif p == "RSY-a" then
+ local d, b, a = parse_mem_by(params[3])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+@@ -1445,6 +1454,16 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2)
+ if a then a() end
++ elseif p == "SIY" then
++ local imm8,iact = parse_imm8(params[2])
++ op0 = op0 + shl(imm8, 8)
++ wputhw(op0);
++ if iact then iact() end
++ local d, b, a = parse_mem_by(params[1])
++ op1 = op1 + shl(b, 12) + band(d, 0xfff)
++ op2 = op2 + band(shr(d, 4), 0xff00)
++ wputhw(op1); wputhw(op2)
++ if a then a() end
+ else
+ werror("unrecognized encoding")
+ end
+
+From d02e076507e9ba57b69a0b068b5e1865d120f066 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 3 Jan 2017 15:55:25 +0530
+Subject: [PATCH 180/260] Minor fix, for arguments in shift operations
+
+---
+ src/vm_s390x.dasc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 61203413d..12cb0e5ef 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1520,7 +1520,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | nill RA, 0x1f // Limit shift to 5-bits.
+- | ins RB, r0, 0(RA) // TODO: fix shift args in DynASM.
++ | ins RB, 0(RA) // TODO: fix shift args in DynASM.
+ | j ->fff_resbit
+ |.endmacro
+ |
+
+From b84dd8e65d0dc637828ab1210f209e7f92485c10 Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Tue, 3 Jan 2017 16:16:29 +0530
+Subject: [PATCH 181/260] Added couple of instructions required by test-case
+
+maeb(RXF) and cegbra(RRF-e) have been added
+---
+ dynasm/dasm_s390x.lua | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 138366965..7d260fe96 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1184,6 +1184,7 @@ map_op = {
+ -- RRF-e instructions
+ cfebr_3 = "0000b3980000RRF-e",
+ cfebra_4 = "0000b3980000RRF-e",
++ cegbra_4 = "0000b3a40000RRF-e",
+ -- RXE instructions
+ adb_2 = "ed000000001aRXE",
+ aeb_2 = "ed000000000aRXE",
+@@ -1230,6 +1231,7 @@ map_op = {
+ tmy_2 = "eb0000000051SIY",
+ -- RXF
+ madb_3 = "ed000000001eRXF",
++ maeb_3 = "ed000000000eRXF",
+ -- RRD
+ maebr_3 = "0000b30e0000RRD",
+ -- RS-b
+
+From 47012cea2fb9aadeb187f5a302a477048b6919f4 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Tue, 3 Jan 2017 17:08:30 +0530
+Subject: [PATCH 182/260] Added example for RX-f based instruction mode
+
+---
+ dynasm/Examples/test_z_inst.c | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 20b20456f..8558aae42 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -380,6 +380,20 @@ static void sqrt_rxe(dasm_State *state)
+
+ }
+
++static void rxf(dasm_State *state) {
++ dasm_State **Dst = &state;
++
++ | lay sp , -8(sp)
++ | cegbra f1 ,0, r2,0
++ | cegbra f2 ,0,r3,0
++ | ste f2 ,0(sp)
++ | maeb f1, f2, 0(sp)
++ | cfebr r2 ,0, f1
++ | la sp, 8(sp)
++ | br r14
++
++}
++
+ typedef struct {
+ int64_t arg1;
+ int64_t arg2;
+@@ -413,7 +427,8 @@ test_table test[] = {
+ {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
+ { 0, 0, 0, rsb, 0, "rsb"},
+ {12,10, 0, rre, 10, "rre"},
+- {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"}
++ {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"},
++ {16,10, 0, rxf, 116, "rxf"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From 8e747c540609cdbd8b345766c4b7408ffe77131d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 3 Jan 2017 12:17:34 -0500
+Subject: [PATCH 183/260] Implement metamethod support.
+
+Allows metamethod tables to be get and set.
+---
+ src/vm_s390x.dasc | 139 +++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 124 insertions(+), 15 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 12cb0e5ef..e68c0952f 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -320,11 +320,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+- | cghi PC, 0
++ | lghi TMPR2, FRAME_P
++ | nr TMPR2, PC
+ | je ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+- | nill PC, -7
++ | nill PC, -8
+ | sgr BASE, PC // Restore caller base.
+ | lay RA, -8(RA, PC) // Rebase RA and prepend one result.
+ | lg PC, -8(BASE) // Fetch PC of previous frame.
+@@ -612,8 +613,40 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
++ | agr RA, BASE
++ | nill PC, -8
++ | lgr RB, BASE
++ | sgr BASE, PC // Restore caller BASE.
++ | sllg TMPR1, RD, 3(r0)
++ | lghi TMPR2, LJ_TNIL
++ | stg TMPR2, -8(RA, TMPR1) // Ensure one valid arg.
++ | lgr RC, RA // ... in [RC]
++ | lg PC, -24(RB) // Restore PC from [cont|PC].
++ | lg RA, -32(RB)
++ |.if FFI
++ | stg r0, 0(r0) // TODO: remove once tested.
++ | clfi RA, 1
++ | jle >1
++ |.endif
++ | lg LFUNC:KBASE, -16(BASE)
++ | cleartp LFUNC:KBASE
++ | lg KBASE, LFUNC:KBASE->pc
++ | lg KBASE, (PC2PROTO(k))(KBASE)
++ | // BASE = base, RC = result, RB = meta base
++ | br RA // Jump to continuation.
++ |
++ |.if FFI
++ |1:
++ | stg r0, 0(r0) // TODO: remove once tested.
++ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
++ | // cont = 0: Tail call from C function.
++ | sgr RB, BASE
++ | srl RB, 3(r0)
++ | ahi RB, -3
++ | llgf RD, RB
++ | j ->vm_call_tail
++ |.endif
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+ | stg r0, 0(r0)
+@@ -787,8 +820,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
+ |3:
++ | lgr RC, CRET1
+ | lg BASE, L:RB->base
+- | clgfi CRET1, 1
++ | clgfi RC, 1
+ | jh ->vmeta_binop
+ |4:
+ | la PC, 4(PC)
+@@ -800,16 +834,34 @@ static void build_subroutines(BuildCtx *ctx)
+ | ins_next
+ |
+ |->cont_condt: // BASE = base, RC = result
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | la PC, 4(PC)
++ | lg ITYPE, 0(RC)
++ | srag ITYPE, ITYPE, 47(r0)
++ | lghi TMPR2, LJ_TISTRUECOND
++ | clr ITYPE, TMPR2 // Branch if result is true.
++ | jl <5
++ | j <6
+ |
+ |->cont_condf: // BASE = base, RC = result
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg ITYPE, 0(RC)
++ | srag ITYPE, ITYPE, 47(r0)
++ | lghi TMPR2, LJ_TISTRUECOND
++ | clr ITYPE, TMPR2 // Branch if result is false.
++ | j <4
+ |
+ |->vmeta_equal:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | cleartp TAB:RD
++ | lay PC, -4(PC)
++ | lgr CARG2, RA
++ | lgfr CARG4, RB
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | lgr CARG3, RD
++ | lgr CARG1, L:RB
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
++ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
++ | j <3
+ |
+ |->vmeta_equal_cd:
+ | stg r0, 0(r0)
+@@ -1048,12 +1100,69 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg TAB:RB, 0(BASE)
++ | lg PC, -8(BASE)
++ | checktab TAB:RB, >6
++ |1: // Field metatable must be at same offset for GCtab and GCudata!
++ | lg TAB:RB, TAB:RB->metatable
++ |2:
++ | lghi TMPR2, LJ_TNIL
++ | stg TMPR2, -16(BASE)
++ | cghi TAB:RB, 0
++ | je ->fff_res1
++ | settp TAB:RC, TAB:RB, LJ_TTAB
++ | stg TAB:RC, -16(BASE) // Store metatable as default result.
++ | lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH)
++ | llgf RA, TAB:RB->hmask
++ | n RA, STR:RC->hash
++ | settp STR:RC, LJ_TSTR
++ | mghi RA, #NODE
++ | ag NODE:RA, TAB:RB->node
++ |3: // Rearranged logic, because we expect _not_ to find the key.
++ | cg STR:RC, NODE:RA->key
++ | je >5
++ |4:
++ | ltg NODE:RA, NODE:RA->next
++ | jne <3
++ | j ->fff_res1 // Not found, keep default result.
++ |5:
++ | lg RB, NODE:RA->val
++ | cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
++ | stg RB, -16(BASE) // Return value of mt.__metatable.
++ | j ->fff_res1
++ |
++ |6:
++ | clfi ITYPE, LJ_TUDATA; je <1
++ | clfi ITYPE, LJ_TISNUM; jh >7
++ | lhi ITYPE, LJ_TISNUM
++ |7:
++ | lhi TMPR2, -1
++ | xr ITYPE, TMPR2 // not ITYPE
++ | llgfr ITYPE, ITYPE
++ | sllg ITYPE, ITYPE, 3(r0)
++ | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
++ | j <2
+ |
+ |.ffunc_2 setmetatable
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg TAB:RB, 0(BASE)
++ | lgr TAB:TMPR1, TAB:RB
++ | checktab TAB:RB, ->fff_fallback
++ | // Fast path: no mt for table yet and not clearing the mt.
++ | lghi TMPR2, 0
++ | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
++ | lg TAB:RA, 8(BASE)
++ | checktab TAB:RA, ->fff_fallback
++ | stg TAB:RA, TAB:RB->metatable
++ | lg PC, -8(BASE)
++ | stg TAB:TMPR1, -16(BASE) // Return original table.
++ | // TODO: change to tm
++ | llgc TMPR2, TAB:RB->marked
++ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | je >1
++ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
++ | barrierback TAB:RB, RC
++ |1:
++ | j ->fff_res1
+ |
+ |.ffunc_2 rawget
+ | stg r0, 0(r0)
+
+From 65af21e2ed6d56fe6e0c0a6779ea3a53083343ef Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 3 Jan 2017 16:12:22 -0500
+Subject: [PATCH 184/260] Implement more math functions.
+
+Everything apart from min/max should now be working.
+---
+ src/vm_s390x.dasc | 68 ++++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 64 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index e68c0952f..4ecc82490 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -22,6 +22,8 @@
+ |// clfi (compare logical immediate) [requires z9-109]
+ |// ldgr (load FPR from GPR) [requires z9-109 GA3]
+ |// lgdr (load GPR from FPR) [requires z9-109 GA3]
++|// ldy (load (long bfp)) [requires z900 GA2]
++|// stdy (store (long bfp)) [requires z900 GA2]
+ |// TODO: alternative instructions?
+ |
+ |.arch s390x
+@@ -180,7 +182,7 @@
+ |.else
+ | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+ | .macro ins_next
+-| jmp ->ins_next
++| j ->ins_next
+ | .endmacro
+ | .macro ins_next_
+ | ->ins_next:
+@@ -1034,14 +1036,23 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc_n, name, op
+ | .ffunc_1 name
++ | lg TMPR2, 0(BASE)
++ | checknumtp TMPR2, ->fff_fallback
++ | op f0, 0(BASE) // TODO: might be better to unconditionally load into f1.
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+- | .ffunc_n name, mvc
++ | .ffunc_n name, ld
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc_2 name
++ | lg TMPR1, 0(BASE)
++ | lg TMPR2, 8(BASE)
++ | ld f0, 0(BASE)
++ | ld f1, 8(BASE)
++ | checknumtp TMPR1, ->fff_fallback
++ | checknumtp TMPR2, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+@@ -1377,8 +1388,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg RB, -16(BASE)
+ | j ->fff_res1
+ |
+- |.ffunc_n math_sqrt, sqrtsd
+- |->fff_resxmm0:
++ |.ffunc_n math_sqrt, sqdb
++ |->fff_resf0:
++ | lg PC, -8(BASE)
++ | stdy f0, -16(BASE)
++ | // fallthrough
+ |
+ |->fff_res1:
+ | lghi RD, 1+1
+@@ -1417,13 +1431,29 @@ static void build_subroutines(BuildCtx *ctx)
+ | math_round ceil
+ |
+ |.ffunc math_log
++ | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
++ | lg TMPR2, 0(BASE)
++ | ld f0, 0(BASE)
++ | checknumtp TMPR2, ->fff_fallback
++ | lgr RB, BASE
++ | brasl r14, extern log
++ | lgr BASE, RB
++ | j ->fff_resf0
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
++ | lgr RB, BASE
++ | brasl r14, extern func
++ | lgr BASE, RB
++ | j ->fff_resf0
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
++ | lgr RB, BASE
++ | brasl r14, extern func
++ | lgr BASE, RB
++ | j ->fff_resf0
+ |.endmacro
+ |
+ | math_extern log10
+@@ -1442,10 +1472,40 @@ static void build_subroutines(BuildCtx *ctx)
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
++ | lg TMPR2, 0(BASE)
++ | ld f0, 0(BASE)
++ | lg CARG1, 8(BASE)
++ | checknumtp TMPR2, ->fff_fallback
++ | checkinttp CARG1, ->fff_fallback
++ | lgfr CARG1, CARG1
++ | lgr RB, BASE
++ | brasl r14, extern ldexp // (double, int)
++ | lgr BASE, RB
++ | j ->fff_resf0
+ |
+ |.ffunc_n math_frexp
++ | lgr RB, BASE
++ | la CARG1, TMP_STACK
++ | brasl r14, extern frexp
++ | lgr BASE, RB
++ | llgf RB, TMP_STACK
++ | lg PC, -8(BASE)
++ | stdy f0, -16(BASE)
++ | setint RB
++ | stg RB, -8(BASE)
++ | lghi RD, 1+2
++ | j ->fff_res
+ |
+ |.ffunc_n math_modf
++ | lgr RB, BASE
++ | lay CARG1, -16(BASE)
++ | brasl r14, extern modf // (double, double*)
++ | lgr BASE, RB
++ | lg PC, -8(BASE)
++ | stdy f0, -8(BASE)
++ | lghi RD, 1+2
++ | j ->fff_res
++ |
+ |.macro math_minmax, name, cmovop, sseop
+ | .ffunc name
+ |.endmacro
+
+From e739ffedce75119c72f6e10c44c27976f16678c9 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 3 Jan 2017 16:36:34 -0500
+Subject: [PATCH 185/260] Implement string.byte and string.char.
+
+---
+ src/vm_s390x.dasc | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 4ecc82490..b987766bf 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1516,10 +1516,24 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+- | stg r0, 0(r0)
++ | chi NARGS:RD, 1+1; jne ->fff_fallback
++ | lg STR:RB, 0(BASE)
++ | checkstr STR:RB, ->fff_fallback
++ | lg PC, -8(BASE)
++ | ltg TMPR2, STR:RB->len
++ | je ->fff_res0 // Return no results for empty string.
++ | llgc RB, STR:RB[1]
++ | j ->fff_resi
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+- | stg r0, 0(r0)
++ | ffgccheck
++ | chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
++ | lg RB, 0(BASE)
++ | checkint RB, ->fff_fallback
++ | clfi RB, 255; jh ->fff_fallback
++ | strvh RB, TMP_STACK // Store [c,0].
++ | lghi TMPR1, 1
++ | la RD, TMP_STACK // Points to stack. Little-endian.
+ |->fff_newstr:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+
+From 86e5e57f4c5410be60b4969b4868be7ff059c474 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 3 Jan 2017 16:44:36 -0500
+Subject: [PATCH 186/260] Add emptystr implementation and stub out co-routine
+ functions.
+
+---
+ src/vm_s390x.dasc | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index b987766bf..ef5420f03 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1358,12 +1358,16 @@ static void build_subroutines(BuildCtx *ctx)
+ |.else
+ |.ffunc coroutine_wrap_aux
+ |.endif
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
++ | stg r0, 0(r0)
++ | stg r0, 0(r0)
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+@@ -1600,7 +1604,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | j <3
+ |
+ |->fff_emptystr: // Range underflow.
+- | stg r0, 0(r0)
++ | lghi RD, 0
++ | j <3
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
+
+From 6bbfa48b9372c6bb90efc91beddd6f60ab1550ca Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 4 Jan 2017 16:20:56 +0530
+Subject: [PATCH 187/260] Updated the memory parsing
+
+It accepts 2 registers, without the displacement
+---
+ dynasm/dasm_s390x.lua | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 7d260fe96..95c6927d4 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -324,6 +324,11 @@ local function split_memop(arg)
+ if d then
+ return d, 0, parse_reg(b)
+ end
++ -- Assuming the two registers are passed as "(r1,r2)", and displacement(d) is not specified
++ local x, b = string.match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
++ if b then
++ return 0, parse_reg(x), parse_reg(b)
++ end
+ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
+ if reg then
+ local r, tp = parse_reg(reg)
+@@ -332,7 +337,7 @@ local function split_memop(arg)
+ end
+ end
+ -- TODO: handle values without registers?
+- -- TODO: handle registers without a displacement?
++ -- TODO: handle registers without a displacement? -- done, above ,needs to be tested
+ werror("bad memory operand: "..arg)
+ return nil
+ end
+
+From 8b20a0eae2f0f2a7e6bc49762a50a95c025bda43 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 4 Jan 2017 17:25:13 +0530
+Subject: [PATCH 188/260] Added some math function tests
+
+---
+ test/test.lua | 22 +++++++++++++++++++++-
+ 1 file changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/test/test.lua b/test/test.lua
+index 05334c9ff..bc9110193 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -153,4 +153,24 @@ end
+ switch("+")
+ print("********************************************")
+
+-
++print("****************Math Functions *******")
++radianVal = math.rad(math.pi / 2)
++io.write("RadianVal=" , radianVal,"\n")
++io.write("Sin Value=",string.format("%.1f ", math.sin(radianVal)),"\n")
++io.write("Cosine Value=",string.format("%.1f ", math.cos(radianVal)),"\n")
++io.write("Tan Value=",string.format("%.1f ", math.tan(radianVal)),"\n")
++io.write("Cosh Value=",string.format("%.1f ", math.cosh(radianVal)),"\n")
++io.write("Math.deg",math.deg(math.pi),"\n")
++io.write("Floor of 10.5055 is ", math.floor(10.5055),"\n")
++io.write("Ceil of 10.5055 is ", math.ceil(10.5055),"\n")
++io.write("Square root of 16 is ",math.sqrt(16),"\n")
++--io.write("10 power 2 is ",math.pow(10,2),"\n")
++--io.write("100 power 0.5 is ",math.pow(100,0.5),"\n")
++io.write("Absolute value of -10 is ",math.abs(-10),"\n")
++
++math.randomseed(os.time())
++io.write("Random number between 1 and 100 is ",math.random(),"\n")
++io.write("Random number between 1 and 100 is ",math.random(1,100),"\n")
++--io.write("Maximum in the input array is ",math.max(1,100,101,99,999),"\n")
++--io.write("Minimum in the input array is ",math.min(1,100,101,99,999),"\n")
++print("********************************************")
+
+From 1fe2176241ec24f355ac7a7670bba9c0e9e536ab Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 4 Jan 2017 17:43:53 +0530
+Subject: [PATCH 189/260] Updated memory parsing
+
+The values of base and index registers have been passed as 0, if only displacement is passed
+the displacement is assumed to be alphanumeric (since label might be used)
+---
+ dynasm/dasm_s390x.lua | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 95c6927d4..a62fe21a6 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -325,10 +325,15 @@ local function split_memop(arg)
+ return d, 0, parse_reg(b)
+ end
+ -- Assuming the two registers are passed as "(r1,r2)", and displacement(d) is not specified
+- local x, b = string.match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
++ local x, b = match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
+ if b then
+ return 0, parse_reg(x), parse_reg(b)
+ end
++ -- Assuming that only displacement is passed, as either digit or label "45 or label1"
++ local d = match(arg,"[%w_]+")
++ if d then
++ return d, 0, 0
++ end
+ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
+ if reg then
+ local r, tp = parse_reg(reg)
+
+From b92584b497aeab23b1ed6a3556997fefef715a0c Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 4 Jan 2017 17:45:19 +0530
+Subject: [PATCH 190/260] Added test for OS based functions
+
+---
+ test/test.lua | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/test/test.lua b/test/test.lua
+index bc9110193..cd6c67f0c 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -174,3 +174,10 @@ io.write("Random number between 1 and 100 is ",math.random(1,100),"\n")
+ --io.write("Maximum in the input array is ",math.max(1,100,101,99,999),"\n")
+ --io.write("Minimum in the input array is ",math.min(1,100,101,99,999),"\n")
+ print("********************************************")
++
++print("****************OS Functions *******")
++io.write("The date is ", os.date("%m/%d/%Y"),"\n")
++io.write("The date and time is ", os.date(),"\n")
++io.write("The OS time is ", os.time(),"\n")
++io.write("Lua started before ", os.clock(),"\n")
++print("********************************************")
+
+From 7aae451d931b1e03443c2bc97a5ea7258d1d7adb Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Wed, 4 Jan 2017 18:15:57 +0530
+Subject: [PATCH 191/260] Reverting the changes, as its breaking the build
+
+The above expression works on CLI, but its failing here, not sure whats going wrong , Please let me know your comments on it
+---
+ dynasm/dasm_s390x.lua | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index a62fe21a6..b3cda6fff 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -330,10 +330,10 @@ local function split_memop(arg)
+ return 0, parse_reg(x), parse_reg(b)
+ end
+ -- Assuming that only displacement is passed, as either digit or label "45 or label1"
+- local d = match(arg,"[%w_]+")
+- if d then
+- return d, 0, 0
+- end
++ -- local d = match(arg,"[%w_]+")
++ -- if d then
++ -- return d, 0, 0
++ -- end
+ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
+ if reg then
+ local r, tp = parse_reg(reg)
+
+From 705784a4f9791ef4e80f9030e56c8f418a8feb1d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 4 Jan 2017 11:42:22 -0500
+Subject: [PATCH 192/260] Implement math.min and math.max.
+
+Replicates the standard Lua behaviour in the presence of NaNs.
+---
+ src/vm_s390x.dasc | 43 ++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 40 insertions(+), 3 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index ef5420f03..56d5c026b 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1510,12 +1510,49 @@ static void build_subroutines(BuildCtx *ctx)
+ | lghi RD, 1+2
+ | j ->fff_res
+ |
+- |.macro math_minmax, name, cmovop, sseop
++ |.macro math_minmax, name, cjmp
+ | .ffunc name
++ | lghi RA, 2*8
++ | sllg TMPR1, RD, 3(r0)
++ | lg RB, 0(BASE)
++ | ld f0, 0(BASE)
++ | checkint RB, >4
++ |1: // Handle integers.
++ | clgr RA, TMPR1; jhe ->fff_resRB
++ | lg TMPR2, -8(RA, BASE)
++ | checkint TMPR2, >3
++ | cr RB, TMPR2
++ | cjmp >2
++ | lgr RB, TMPR2
++ |2:
++ | aghi RA, 8
++ | j <1
++ |3:
++ | jh ->fff_fallback
++ | // Convert intermediate result to number and continue below.
++ | cdfbr f0, RB
++ | ldgr f1, TMPR2
++ | j >6
++ |4:
++ | jh ->fff_fallback
++ |5: // Handle numbers or integers.
++ | clgr RA, TMPR1; jhe ->fff_resf0
++ | lg RB, -8(RA, BASE)
++ | ldy f1, -8(RA, BASE)
++ | checknumx RB, >6, jl
++ | jh ->fff_fallback
++ | cdfbr f1, RB
++ |6:
++ | cdbr f0, f1
++ | cjmp >7
++ | ldr f0, f1
++ |7:
++ | aghi RA, 8
++ | j <5
+ |.endmacro
+ |
+- | math_minmax math_min, cmovg, minsd
+- | math_minmax math_max, cmovl, maxsd
++ | math_minmax math_min, jnh
++ | math_minmax math_max, jnl
+ |
+ |//-- String library -----------------------------------------------------
+ |
+
+From e598c067c3deda9bb94b582f18fd239624f4ba9a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 4 Jan 2017 12:25:15 -0500
+Subject: [PATCH 193/260] Fixes for negative string.sub arguments and __index
+ metamethod calls.
+
+---
+ src/vm_s390x.dasc | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 56d5c026b..7e369f0bb 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -705,7 +705,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | lg RA, L:RB->top
+- | stg PC, -24(PC) // [cont|PC]
++ | stg PC, -24(RA) // [cont|PC]
+ | lay PC, FRAME_CONT(RA)
+ | sgr PC, BASE
+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
+@@ -1603,7 +1603,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg STR:RB, 0(BASE)
+ | checkstr STR:RB, ->fff_fallback
+ | lg ITYPE, 8(BASE)
+- | llgfr RA, ITYPE // Must clear hiword for lea below.
++ | lgfr RA, ITYPE
+ | srag ITYPE, ITYPE, 47(r0)
+ | cghi ITYPE, LJ_TISNUM
+ | jne ->fff_fallback
+@@ -1641,8 +1641,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | j <3
+ |
+ |->fff_emptystr: // Range underflow.
+- | lghi RD, 0
+- | j <3
++ | lghi TMPR1, 0
++ | j <4
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
+
+From 80191dcab7cd9f226cf6db190d43161c361b65d6 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 4 Jan 2017 15:34:30 -0500
+Subject: [PATCH 194/260] Implement cont_cat.
+
+Required to pass cat tests.
+---
+ src/vm_s390x.dasc | 28 ++++++++++++++++++++++++----
+ 1 file changed, 24 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 7e369f0bb..51c600247 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -651,8 +651,21 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | llgc RA, PC_RB
++ | sllg RA, RA, 3(r0)
++ | aghi RB, -32
++ | la RA, 0(RA, BASE)
++ | sgr RA, RB
++ | je ->cont_ra
++ | lcgr RA, RA
++ | srlg RA, RA, 3(r0)
++ | lg L:CARG1, SAVE_L
++ | stg BASE, L:CARG1->base
++ | lgfr CARG3, RA
++ | lg RA, 0(RC)
++ | stg RA, 0(RB)
++ | lgr CARG2, RB
++ | j ->BC_CAT_Z
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+@@ -1347,8 +1360,15 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->vm_call_dispatch
+ |
+ |.ffunc_2 xpcall
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg LFUNC:RA, 8(BASE)
++ | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
++ | lg LFUNC:RB, 0(BASE) // Swap function and traceback.
++ | stg LFUNC:RA, 0(BASE)
++ | stg LFUNC:RB, 8(BASE)
++ | la RA, 24(BASE)
++ | aghi NARGS:RD, -2
++ | lghi PC, 24+FRAME_PCALL
++ | j <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+
+From faef0fb092115ac6153362f07e338220ca4da8fd Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 4 Jan 2017 15:54:21 -0500
+Subject: [PATCH 195/260] Fix for __newindex metamethod.
+
+---
+ src/vm_s390x.dasc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 51c600247..f411c439b 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -791,7 +791,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | lg RA, L:RB->top
+- | stg PC, -24(PC) // [cont|PC]
++ | stg PC, -24(RA) // [cont|PC]
+ | llgc RC, PC_RA
+ | // Copy value to third argument.
+ | sllg RB, RC, 3(r0)
+
+From dfb95646f83262514c6c23750d684f1957f25080 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 4 Jan 2017 16:05:55 -0500
+Subject: [PATCH 196/260] Implement call_tail.
+
+---
+ src/vm_s390x.dasc | 23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f411c439b..84dffec2d 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1834,8 +1834,27 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lgr RA, BASE
++ | tmll PC, FRAME_TYPE
++ | jne >3
++ | llgc RB, PC_RA
++ | lcgr RB, RB
++ | sllg RB, RB, 3(r0)
++ | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8
++ | j ->vm_call_dispatch // Resolve again for tailcall.
++ |3:
++ | lgr RB, PC
++ | nill RB, -8
++ | sgr BASE, RB
++ | j ->vm_call_dispatch // Resolve again for tailcall.
++ |
++ |5: // Grow stack for fallback handler.
++ | lghi CARG2, LUA_MINSTACK
++ | lgr CARG1, L:RB
++ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
++ | lg BASE, L:RB->base
++ | lghi RD, 0 // Simulate a return 0.
++ | j <1 // Dumb retry (goes through ff first).
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
+
+From fe42519c164b32db0878b19a7e9ef1cd9c67ab2a Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 5 Jan 2017 13:53:18 +0530
+Subject: [PATCH 197/260] Added String processing Functions
+
+---
+ test/test.lua | 28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+diff --git a/test/test.lua b/test/test.lua
+index cd6c67f0c..e12cabf47 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -181,3 +181,31 @@ io.write("The date and time is ", os.date(),"\n")
+ io.write("The OS time is ", os.time(),"\n")
+ io.write("Lua started before ", os.clock(),"\n")
+ print("********************************************")
++
++print("****************String Processing Functions *******")
++string1 = "Lua";
++print("String Upper",string.upper(string1))
++print("String Lower",string.lower(string1))
++string = "Lua Tutorial"
++print("String Indices",string.find(string,"Tutorial"))
++print("String Reverse",string.reverse(string))
++string1 = "Lua"
++string2 = "Tutorial"
++number1 = 10
++number2 = 20
++print(string.format("Basic formatting %s %s",string1,string2))
++date = 2; month = 1; year = 2014
++print(string.format("Date formatting %02d/%02d/%03d", date, month, year))
++print("String to ASCII",string.byte("Lua"))
++print("ASCII for 3 character in word",string.byte("Lua",3))
++print("ASCII for 1 character from last in word",string.byte("Lua",-1))
++print("ASCII for 2 character in word",string.byte("Lua",2))
++print("ASCII for 2 character from last in word",string.byte("Lua",-2))
++print("ASCII Value to string",string.char(97))
++
++string1 = "Lua"
++string2 = "Tutorial"
++print("Concatenated string",string1..string2)
++print("Length of string1 is ",string.len(string1))
++print("Repeated String",string.rep(string1,3))
++print("********************************************")
+
+From dcb1dd74edcaa6399d4d77c21d3dd8a4dd42df34 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 5 Jan 2017 14:40:19 +0530
+Subject: [PATCH 198/260] Added table Manipulation Functions
+
+---
+ test/test.lua | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/test/test.lua b/test/test.lua
+index e12cabf47..0d5d7b9c7 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -209,3 +209,19 @@ print("Concatenated string",string1..string2)
+ print("Length of string1 is ",string.len(string1))
+ print("Repeated String",string.rep(string1,3))
+ print("********************************************")
++
++print("****************OS Functions *******")
++fruits = {"banana","orange","apple"}
++print("Table contents are ",fruits)
++print("Concatenated string ",table.concat(fruits))
++print("Concatenated string ",table.concat(fruits,", "))
++print("Concatenated string ",table.concat(fruits,", ", 2,3))
++print("Inserting new fruit Mango")
++table.insert(fruits,"mango")
++print("Fruit table now is",fruits)
++print("Concatenated string ",table.concat(fruits,", "))
++print("The maximum elements in table is",table.maxn(fruits))
++print("The maximum elements in table is",table.remove(fruits))
++fruits = {"banana","orange","apple","grapes"}
++print("The maximum elements in table is",table.sort(fruits))
++print("********************************************")
+
+From 206c650689c535d195c3b359c2ded65cd1b5663e Mon Sep 17 00:00:00 2001
+From: niravthakkar <thakkarniravb@gmail.com>
+Date: Thu, 5 Jan 2017 14:48:09 +0530
+Subject: [PATCH 199/260] Updated the memory parsing
+
+The order matters here, so just moved displacement check to end
+---
+ dynasm/dasm_s390x.lua | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index b3cda6fff..0c1263c52 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -329,11 +329,6 @@ local function split_memop(arg)
+ if b then
+ return 0, parse_reg(x), parse_reg(b)
+ end
+- -- Assuming that only displacement is passed, as either digit or label "45 or label1"
+- -- local d = match(arg,"[%w_]+")
+- -- if d then
+- -- return d, 0, 0
+- -- end
+ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
+ if reg then
+ local r, tp = parse_reg(reg)
+@@ -341,6 +336,11 @@ local function split_memop(arg)
+ return format(tp.ctypefmt, tailr), 0, r
+ end
+ end
++ -- Assuming that only displacement is passed, as either digit or label "45 or label1"
++ local d = match(arg,"[%w_]+")
++ if d then
++ return d, 0, 0
++ end
+ -- TODO: handle values without registers?
+ -- TODO: handle registers without a displacement? -- done, above ,needs to be tested
+ werror("bad memory operand: "..arg)
+
+From 3da26860b754aadde8d23c2ebc74b0e7052ace6f Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Thu, 5 Jan 2017 14:50:23 +0530
+Subject: [PATCH 200/260] Updated file to display table contents
+
+---
+ test/test.lua | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/test/test.lua b/test/test.lua
+index 0d5d7b9c7..deaeaac15 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -212,13 +212,14 @@ print("********************************************")
+
+ print("****************OS Functions *******")
+ fruits = {"banana","orange","apple"}
+-print("Table contents are ",fruits)
++print("Table contents are ")
++for key,value in ipairs(fruits) do print(value) end
+ print("Concatenated string ",table.concat(fruits))
+ print("Concatenated string ",table.concat(fruits,", "))
+ print("Concatenated string ",table.concat(fruits,", ", 2,3))
+ print("Inserting new fruit Mango")
+ table.insert(fruits,"mango")
+-print("Fruit table now is",fruits)
++for key,value in ipairs(fruits) do print(value) end
+ print("Concatenated string ",table.concat(fruits,", "))
+ print("The maximum elements in table is",table.maxn(fruits))
+ print("The maximum elements in table is",table.remove(fruits))
+
+From 1b16e7ce3c529f9bf7aafe76448c769d865c1d90 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 5 Jan 2017 10:50:17 -0500
+Subject: [PATCH 201/260] Fix math.pow.
+
+The second floating point argument is placed into f2, not f1.
+Use the macros FARG{1,2} instead of using the registers directly.
+---
+ src/vm_s390x.dasc | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 84dffec2d..9efd5b099 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1062,8 +1062,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | .ffunc_2 name
+ | lg TMPR1, 0(BASE)
+ | lg TMPR2, 8(BASE)
+- | ld f0, 0(BASE)
+- | ld f1, 8(BASE)
++ | ld FARG1, 0(BASE)
++ | ld FARG2, 8(BASE)
+ | checknumtp TMPR1, ->fff_fallback
+ | checknumtp TMPR2, ->fff_fallback
+ |.endmacro
+@@ -1457,7 +1457,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc math_log
+ | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+ | lg TMPR2, 0(BASE)
+- | ld f0, 0(BASE)
++ | ld FARG1, 0(BASE)
+ | checknumtp TMPR2, ->fff_fallback
+ | lgr RB, BASE
+ | brasl r14, extern log
+@@ -1497,7 +1497,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc_2 math_ldexp
+ | lg TMPR2, 0(BASE)
+- | ld f0, 0(BASE)
++ | ld FARG1, 0(BASE)
+ | lg CARG1, 8(BASE)
+ | checknumtp TMPR2, ->fff_fallback
+ | checkinttp CARG1, ->fff_fallback
+
+From c08fa1c1191f1774ff331136fcc2fe58e4a2dce0 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Fri, 6 Jan 2017 10:01:36 +0530
+Subject: [PATCH 202/260] Enabled math.pow ,math.min and math.max
+
+---
+ test/test.lua | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/test/test.lua b/test/test.lua
+index deaeaac15..5637250af 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -164,15 +164,15 @@ io.write("Math.deg",math.deg(math.pi),"\n")
+ io.write("Floor of 10.5055 is ", math.floor(10.5055),"\n")
+ io.write("Ceil of 10.5055 is ", math.ceil(10.5055),"\n")
+ io.write("Square root of 16 is ",math.sqrt(16),"\n")
+---io.write("10 power 2 is ",math.pow(10,2),"\n")
+---io.write("100 power 0.5 is ",math.pow(100,0.5),"\n")
++io.write("10 power 2 is ",math.pow(10,2),"\n")
++io.write("100 power 0.5 is ",math.pow(100,0.5),"\n")
+ io.write("Absolute value of -10 is ",math.abs(-10),"\n")
+
+ math.randomseed(os.time())
+ io.write("Random number between 1 and 100 is ",math.random(),"\n")
+ io.write("Random number between 1 and 100 is ",math.random(1,100),"\n")
+---io.write("Maximum in the input array is ",math.max(1,100,101,99,999),"\n")
+---io.write("Minimum in the input array is ",math.min(1,100,101,99,999),"\n")
++io.write("Maximum in the input array is ",math.max(1,100,101,99,999),"\n")
++io.write("Minimum in the input array is ",math.min(1,100,101,99,999),"\n")
+ print("********************************************")
+
+ print("****************OS Functions *******")
+
+From 60fb35cb68f6e97ff5e7058698ab8277277132b1 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 5 Jan 2017 11:02:03 -0500
+Subject: [PATCH 203/260] Implement rawget.
+
+---
+ src/vm_s390x.dasc | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 9efd5b099..d377738b4 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1189,8 +1189,19 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_res1
+ |
+ |.ffunc_2 rawget
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg TAB:CARG2, 0(BASE)
++ | checktab TAB:CARG2, ->fff_fallback
++ | lgr RB, BASE // Save BASE.
++ | la CARG3, 8(BASE)
++ | lg CARG1, SAVE_L
++ | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
++ | // cTValue * returned in r2 (CRET1).
++ | lgr BASE, RB // Restore BASE.
++ | // Copy table slot.
++ | lg RB, 0(CRET1)
++ | lg PC, -8(BASE)
++ | stg RB, -16(BASE)
++ | j ->fff_res1
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+
+From d90293f55ef08b03e997d597bf6bb6d5cc2c402c Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 5 Jan 2017 17:32:51 -0500
+Subject: [PATCH 204/260] Implement coroutines.
+
+TODO: delete LREG, caused problems while implementing this (x64
+doesn't have LREG).
+---
+ src/vm_s390x.dasc | 161 ++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 156 insertions(+), 5 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index d377738b4..5b2192468 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -512,6 +512,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | st RD, SAVE_NRES
+ | stg RD, SAVE_ERRF
+ | stg KBASE, L:RB->cframe
++ | lgr LREG, L:RB
+ | clm RD, 1, L:RB->status
+ | je >2 // Initial resume (like a call).
+ |
+@@ -1386,19 +1387,169 @@ static void build_subroutines(BuildCtx *ctx)
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
++ | lg L:RB, 0(BASE)
++ | lgr L:TMPR2, L:RB // Save type for checktptp.
++ | cleartp L:RB
+ |.else
+ |.ffunc coroutine_wrap_aux
++ | lg CFUNC:RB, -16(BASE)
++ | cleartp CFUNC:RB
++ | lg L:RB, CFUNC:RB->upvalue[0].gcr
++ | cleartp L:RB
+ |.endif
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg PC, -8(BASE)
++ | stg PC, SAVE_PC
++ | stg L:RB, TMP_STACK
++ |.if resume
++ | checktptp L:TMPR2, LJ_TTHREAD, ->fff_fallback
++ |.endif
++ | ltg TMPR2, L:RB->cframe; jne ->fff_fallback
++ | // TODO: replace with cli.
++ | llgc TMPR1, L:RB->status
++ | cghi TMPR1, (uint8_t)LUA_YIELD; jh ->fff_fallback
++ | lg RA, L:RB->top
++ | je >1 // Status != LUA_YIELD (i.e. 0)?
++ | cg RA, L:RB->base // Check for presence of initial func.
++ | je ->fff_fallback
++ | lg PC, -8(RA) // Move initial function up.
++ | stg PC, 0(RA)
++ | la RA, 8(RA)
++ |1:
++ | sllg TMPR1, NARGS:RD, 3(r0)
++ |.if resume
++ | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread).
++ |.else
++ | lay PC, -8(TMPR1, RA) // Check stack space (-1).
++ |.endif
++ | clg PC, L:RB->maxstack; jh ->fff_fallback
++ | stg PC, L:RB->top
++ |
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ |.if resume
++ | la BASE, 8(BASE) // Keep resumed thread in stack for GC.
++ |.endif
++ | stg BASE, L:RB->top
++ |.if resume
++ | lay RB, -24(TMPR1, BASE) // RB = end of source for stack move.
++ |.else
++ | lay RB, -16(TMPR1, BASE) // RB = end of source for stack move.
++ |.endif
++ | sgr RB, PC // Relative to PC.
++ |
++ | cgr PC, RA
++ | je >3
++ |2: // Move args to coroutine.
++ | lg RC, 0(RB, PC)
++ | stg RC, -8(PC)
++ | // TODO: replace with branch on count/index?
++ | lay PC, -8(PC)
++ | cgr PC, RA
++ | jne <2
++ |3:
++ | lgr CARG2, RA
++ | lg L:CARG1, TMP_STACK
++ | lghi CARG3, 0
++ | lghi CARG4, 0
++ | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0)
++ |
++ | lg L:RB, SAVE_L
++ | lg L:PC, TMP_STACK
++ | lg BASE, L:RB->base
++ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
++ | set_vmstate INTERP
++ |
++ | clfi CRET1, LUA_YIELD
++ | jh >8
++ |4:
++ | lg RA, L:PC->base
++ | lg KBASE, L:PC->top
++ | stg RA, L:PC->top // Clear coroutine stack.
++ | lgr PC, KBASE
++ | sgr PC, RA
++ | je >6 // No results?
++ | la RD, 0(PC, BASE)
++ | llgfr PC, PC
++ | srlg PC, PC, 3(r0)
++ | clg RD, L:RB->maxstack
++ | jh >9 // Need to grow stack?
++ |
++ | lgr RB, BASE
++ | sgr RB, RA
++ |5: // Move results from coroutine.
++ | lg RD, 0(RA)
++ | stg RD, 0(RA, RB)
++ | // TODO: branch on count/index?
++ | la RA, 8(RA)
++ | cgr RA, KBASE
++ | jne <5
++ |6:
++ |.if resume
++ | la RD, 2(PC) // nresults+1 = 1 + true + results.
++ | load_true ITYPE // Prepend true to results.
++ | stg ITYPE, -8(BASE)
++ |.else
++ | la RD, 1(PC) // nresults+1 = 1 + results.
++ |.endif
++ |7:
++ | lg PC, SAVE_PC
++ | st RD, SAVE_MULTRES
++ |.if resume
++ | lghi RA, -8
++ |.else
++ | lghi RA, 0
++ |.endif
++ | tmll PC, FRAME_TYPE
++ | je ->BC_RET_Z
++ | j ->vm_return
++ |
++ |8: // Coroutine returned with error (at co->top-1).
++ |.if resume
++ | load_false ITYPE // Prepend false to results.
++ | stg ITYPE, -8(BASE)
++ | lg RA, L:PC->top
++ | aghi RA, -8
++ | stg RA, L:PC->top // Clear error from coroutine stack.
++ | // Copy error message.
++ | lg RD, 0(RA)
++ | stg RD, 0(BASE)
++ | lghi RD, 1+2 // nresults+1 = 1 + false + error.
++ | j <7
++ |.else
++ | lgr CARG2, L:PC
++ | lgr CARG1, L:RB
++ | brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
++ | // Error function does not return.
++ |.endif
++ |
++ |9: // Handle stack expansion on return from yield.
++ | lg L:RA, TMP_STACK
++ | stg KBASE, L:RA->top // Undo coroutine stack clearing.
++ | lgr CARG2, PC
++ | lgr CARG1, L:RB
++ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
++ | lg L:PC, TMP_STACK
++ | lg BASE, L:RB->base
++ | j <4 // Retry the stack move.
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | lg L:RB, SAVE_L
++ | lg TMPR2, L:RB->cframe
++ | tmll TMPR2, CFRAME_RESUME
++ | je ->fff_fallback
++ | stg BASE, L:RB->base
++ | sllg TMPR1, NARGS:RD, 3(r0)
++ | lay RD, -8(TMPR1, BASE)
++ | stg RD, L:RB->top
++ | lghi RD, 0
++ | stg RD, L:RB->cframe
++ | lhi RA, LUA_YIELD
++ | stc RA, L:RB->status
++ | j ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+@@ -3906,7 +4057,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lay RD, -8(RD,BASE)
+ | stg BASE, L:RB->base
+ | lay RA, (8*LUA_MINSTACK)(RD)
+- | cg RA, L:RB->maxstack
++ | clg RA, L:RB->maxstack
+ | stg RD, L:RB->top
+ | lgr CARG1, L:RB // Caveat: CARG1 may be RA.
+ if (op != BC_FUNCC) {
+
+From a3501b062d18f25afef1707bdf1f94d1e50b8122 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 5 Jan 2017 23:33:10 -0500
+Subject: [PATCH 205/260] Various fixes for coroutines.
+
+Now passing the tests.
+---
+ src/vm_s390x.dasc | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 5b2192468..6b80f4115 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -337,7 +337,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vm_returnc:
+ | aghi RD, 1 // RD = nresults+1
+- | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
++ | je ->vm_unwind_yield
+ | st RD, SAVE_MULTRES
+ | tmll PC, FRAME_TYPE
+ | je ->BC_RET_Z // Handle regular return to Lua.
+@@ -519,7 +519,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Resume after yield (like a return).
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+- | llgc RD, L:RB->status
++ | stc RD, L:RB->status
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, RA
+@@ -1542,13 +1542,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | tmll TMPR2, CFRAME_RESUME
+ | je ->fff_fallback
+ | stg BASE, L:RB->base
+- | sllg TMPR1, NARGS:RD, 3(r0)
+- | lay RD, -8(TMPR1, BASE)
++ | sllg RD, NARGS:RD, 3(r0)
++ | lay RD, -8(RD, BASE)
+ | stg RD, L:RB->top
+ | lghi RD, 0
+ | stg RD, L:RB->cframe
+- | lhi RA, LUA_YIELD
+- | stc RA, L:RB->status
++ | lghi CRET1, LUA_YIELD
++ | stc CRET1, L:RB->status
+ | j ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+
+From 443814b6b32a9da3149374ca4b746a97fb5c16f7 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 6 Jan 2017 11:16:04 -0500
+Subject: [PATCH 206/260] Add more convert to/from fixed instructions to
+ DynASM.
+
+---
+ dynasm/dasm_s390x.lua | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 0c1263c52..7d95f788a 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1194,7 +1194,22 @@ map_op = {
+ -- RRF-e instructions
+ cfebr_3 = "0000b3980000RRF-e",
+ cfebra_4 = "0000b3980000RRF-e",
++ cfdbr_3 = "0000b3990000RRF-e",
++ cfdbra_4 = "0000b3990000RRF-e",
++ cfxbr_3 = "0000b39a0000RRF-e",
++ cfxbra_4 = "0000b39a0000RRF-e",
++ cgebr_3 = "0000b3a80000RRF-e",
++ cgebra_4 = "0000b3a80000RRF-e",
++ cgdbr_3 = "0000b3a90000RRF-e",
++ cgdbra_4 = "0000b3a90000RRF-e",
++ cgxbr_3 = "0000b3aa0000RRF-e",
++ cgxbra_4 = "0000b3aa0000RRF-e",
++ cefbra_4 = "0000b3940000RRF-e",
++ cdfbra_4 = "0000b3950000RRF-e",
++ cxfbra_4 = "0000b3960000RRF-e",
+ cegbra_4 = "0000b3a40000RRF-e",
++ cdgbra_4 = "0000b3a50000RRF-e",
++ cxgbra_4 = "0000b3a60000RRF-e",
+ -- RXE instructions
+ adb_2 = "ed000000001aRXE",
+ aeb_2 = "ed000000000aRXE",
+
+From 2e05e7ca679366085fe3a2398e1e7679f7c342bd Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 6 Jan 2017 11:16:33 -0500
+Subject: [PATCH 207/260] Implement math.floor/math.ceil.
+
+---
+ src/vm_s390x.dasc | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 6b80f4115..274ca11eb 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1611,6 +1611,15 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro math_round, func
+ | .ffunc math_ .. func
++ | lg RB, 0(BASE)
++ | ld f0, 0(BASE)
++ | checknumx RB, ->fff_resRB, je
++ | jh ->fff_fallback
++ | brasl r14, ->vm_ .. func
++ | cfdbr RB, 0, f0
++ | jo ->fff_resf0
++ | llgfr RB, RB
++ | j ->fff_resi
+ |.endmacro
+ |
+ | math_round floor
+
+From ed2d43d73cd8f25cf0acd7e8943537ee943dd650 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 6 Jan 2017 11:47:56 -0500
+Subject: [PATCH 208/260] Fix VARG.
+
+---
+ src/vm_s390x.dasc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 274ca11eb..a9bc10dfa 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -3618,8 +3618,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | jnl >3
+ | clgr TMPR1, BASE // No more vararg slots?
+ | jl <1
+- | lghi TMPR2, LJ_TNIL
+ |2: // Fill up remainder with nil.
++ | lghi TMPR2, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
+ | stg TMPR2, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RB
+
+From 5dec8c22116cb5fab7c7549930dee75c16ee2403 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 6 Jan 2017 13:53:31 -0500
+Subject: [PATCH 209/260] Fix vm_tsetr (needed by table.remove).
+
+The A argument was being loaded as 2-bytes instead of 1.
+---
+ src/vm_s390x.dasc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index a9bc10dfa..8fa928b71 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -815,7 +815,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // TValue * returned in r2 (CRET1).
+ | lgr RC, CRET1
+- | llgh RA, PC_RA
++ | llgc RA, PC_RA
+ | lgr BASE, RB // Restore BASE.
+ | j ->BC_TSETR_Z
+ |
+
+From bb98985db319889bd0350df82d1b0014d7015d18 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Fri, 6 Jan 2017 16:19:56 -0500
+Subject: [PATCH 210/260] Add partial FFI support.
+
+Interestingly, enough to pass all the FFI tests. So s390x now
+passes all the tests in LuaJIT-test-cleanup.
+---
+ src/Makefile | 2 +-
+ src/lj_arch.h | 1 -
+ src/lj_ccall.c | 35 +++++++++++
+ src/lj_ccall.h | 6 +-
+ src/lj_ccallback.c | 9 +++
+ src/lj_target.h | 2 +
+ src/lj_target_s390x.h | 139 +++++-------------------------------------
+ src/vm_s390x.dasc | 81 +++++++++++++++++++++---
+ 8 files changed, 139 insertions(+), 136 deletions(-)
+
+diff --git a/src/Makefile b/src/Makefile
+index 8ecd6183d..1450adc03 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -56,7 +56,7 @@ CCOPT_mips=
+ #
+ CCDEBUG=
+ # Uncomment the next line to generate debug information:
+-CCDEBUG= -g -O0
++#CCDEBUG= -g
+ #
+ CCWARN= -Wall
+ # Uncomment the next line to enable more warnings:
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index 81f4873ed..d17884e50 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -370,7 +370,6 @@
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+ #define LJ_TARGET_GC64 1
+ #define LJ_ARCH_NOJIT 1 /* NYI */
+-#define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */
+
+ #else
+ #error "No target architecture defined"
+diff --git a/src/lj_ccall.c b/src/lj_ccall.c
+index b599be33a..a6b0a8fdb 100644
+--- a/src/lj_ccall.c
++++ b/src/lj_ccall.c
+@@ -555,6 +555,41 @@
+ goto done; \
+ }
+
++#elif LJ_TARGET_S390X
++/* -- POSIX/s390x calling conventions --------------------------------------- */
++
++#define CCALL_HANDLE_STRUCTRET \
++ /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \
++ cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
++
++#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
++
++#define CCALL_HANDLE_COMPLEXRET2 \
++ if (!cc->retref) \
++ *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */
++
++#define CCALL_HANDLE_STRUCTARG \
++ /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
++ if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
++ sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \
++ }
++
++#define CCALL_HANDLE_COMPLEXARG \
++ /* Pass complex float in a GPR and complex double by reference. */ \
++ if (sz != 2*sizeof(float)) { \
++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
++ sz = CTSIZE_PTR; \
++ }
++
++#define CCALL_HANDLE_REGARG \
++ if (isfp) { \
++ if (nfpr < maxgpr) { dp = &cc->fpr[nfpr++]; goto done; } \
++ } else { \
++ if (ngpr < CCALL_NARG_FPR) { dp = &cc->gpr[ngpr++]; goto done; } \
++ }
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+diff --git a/src/lj_ccall.h b/src/lj_ccall.h
+index 2a10a5e88..84e7926b1 100644
+--- a/src/lj_ccall.h
++++ b/src/lj_ccall.h
+@@ -136,7 +136,11 @@ typedef union FPRArg {
+ #define CCALL_SPS_FREE 0
+
+ typedef intptr_t GPRArg;
+-typedef double FPRArg;
++typedef union FPRArg {
++ double d;
++ float f;
++} FPRArg;
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
+index 2ca6406c3..780400573 100644
+--- a/src/lj_ccallback.c
++++ b/src/lj_ccallback.c
+@@ -495,6 +495,15 @@ void lj_ccallback_mcode_free(CTState *cts)
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ ((float *)dp)[1] = *(float *)dp;
+
++#elif LJ_TARGET_S390X
++
++#define CALLBACK_HANDLE_REGARG \
++ if (isfp) { \
++ if (nfpr < maxgpr) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
++ } else { \
++ if (ngpr < CCALL_NARG_FPR) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
++ }
++
+ #else
+ #error "Missing calling convention definitions for this architecture"
+ #endif
+diff --git a/src/lj_target.h b/src/lj_target.h
+index abea8d5b2..467860b81 100644
+--- a/src/lj_target.h
++++ b/src/lj_target.h
+@@ -144,6 +144,8 @@ typedef uint32_t RegCost;
+ #include "lj_target_ppc.h"
+ #elif LJ_TARGET_MIPS
+ #include "lj_target_mips.h"
++#elif LJ_TARGET_S390X
++#include "lj_target_s390x.h"
+ #else
+ #error "Missing include for target CPU"
+ #endif
+diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
+index 4e35891a3..6e0245fe1 100644
+--- a/src/lj_target_s390x.h
++++ b/src/lj_target_s390x.h
+@@ -1,26 +1,22 @@
+ /*
+-** Definitions for S390 CPUs.
++** Definitions for IBM z/Architecture (s390x) CPUs.
+ ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+-#ifndef _LJ_TARGET_S390_H
+-#define _LJ_TARGET_S390_H
++#ifndef _LJ_TARGET_S390X_H
++#define _LJ_TARGET_S390X_H
+
+ /* -- Registers IDs ------------------------------------------------------- */
+
+ #define GPRDEF(_) \
+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
+- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \
+-#if LJ_SOFTFP
+-#define FPRDEF(_)
+-#else
++ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
+ #define FPRDEF(_) \
+ _(F0) _(F1) _(F2) _(F3) \
+ _(F4) _(F5) _(F6) _(F7) \
+ _(F8) _(F9) _(F10) _(F11) \
+ _(F12) _(F13) _(F14) _(F15)
+-#endif
+-#define VRIDDEF(_)
++// TODO: VREG?
+
+ #define RIDENUM(name) RID_##name,
+
+@@ -28,84 +24,28 @@ enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+- RID_TMP = RID_LR,
+
+ /* Calling conventions. */
+- RID_RET = RID_R0,
+- RID_RETLO = RID_R0,
+- RID_RETHI = RID_R1,
+-#if LJ_SOFTFP
+- RID_FPRET = RID_R0,
+-#else
+- RID_FPRET = RID_D0,
+-#endif
++ RID_SP = RID_R15,
++ RID_RET = RID_R2,
++ RID_FPRET = RID_F0,
+
+ /* These definitions must match with the *.dasc file(s): */
+- RID_BASE = RID_R9, /* Interpreter BASE. */
+- RID_LPC = RID_R6, /* Interpreter PC. */
+- RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
+- RID_LREG = RID_R8, /* Interpreter L. */
++ RID_BASE = RID_R7, /* Interpreter BASE. */
++ RID_LPC = RID_R9, /* Interpreter PC. */
++ RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_R0,
+- RID_MAX_GPR = RID_PC+1,
+- RID_MIN_FPR = RID_MAX_GPR,
+-#if LJ_SOFTFP
+- RID_MAX_FPR = RID_MIN_FPR,
+-#else
+- RID_MAX_FPR = RID_D15+1,
+-#endif
++ RID_MIN_FPR = RID_F0,
++ RID_MAX_GPR = RID_MIN_FPR,
++ RID_MAX_FPR = RID_MAX,
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+- RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
+ };
+
+-#define RID_NUM_KREF RID_NUM_GPR
+-#define RID_MIN_KREF RID_R0
+-
+ /* -- Register sets ------------------------------------------------------- */
+
+-/* Make use of all registers, except sp, lr and pc. */
+-#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
+-#define RSET_GPREVEN \
+- (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
+- RID2RSET(RID_R8)|RID2RSET(RID_R10))
+-#define RSET_GPRODD \
+- (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
+- RID2RSET(RID_R9)|RID2RSET(RID_R11))
+-#if LJ_SOFTFP
+-#define RSET_FPR 0
+-#else
+-#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
+-#endif
+-#define RSET_ALL (RSET_GPR|RSET_FPR)
+-#define RSET_INIT RSET_ALL
+-
+-/* ABI-specific register sets. lr is an implicit scratch register. */
+-#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
+-#ifdef __APPLE__
+-#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
+-#else
+-#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
+-#endif
+-#if LJ_SOFTFP
+-#define RSET_SCRATCH_FPR 0
+-#else
+-#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
+-#endif
+-#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
+-#define REGARG_FIRSTGPR RID_R0
+-#define REGARG_LASTGPR RID_R3
+-#define REGARG_NUMGPR 4
+-#if LJ_ABI_SOFTFP
+-#define REGARG_FIRSTFPR 0
+-#define REGARG_LASTFPR 0
+-#define REGARG_NUMFPR 0
+-#else
+-#define REGARG_FIRSTFPR RID_D0
+-#define REGARG_LASTFPR RID_D7
+-#define REGARG_NUMFPR 8
+-#endif
+-
+ /* -- Spill slots --------------------------------------------------------- */
+
+ /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+@@ -127,63 +67,14 @@ enum {
+
+ /* This definition must match with the *.dasc file(s). */
+ typedef struct {
+-#if !LJ_SOFTFP
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
+-#endif
+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ int32_t spill[256]; /* Spill slots. */
+ } ExitState;
+
+-/* PC after instruction that caused an exit. Used to find the trace number. */
+-#define EXITSTATE_PCREG RID_PC
+-/* Highest exit + 1 indicates stack check. */
+-#define EXITSTATE_CHECKEXIT 1
+-
+ #define EXITSTUB_SPACING 4
+ #define EXITSTUBS_PER_GROUP 32
+
+ /* -- Instructions -------------------------------------------------------- */
+
+-/* Instruction fields. */
+-#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
+-#define ARMF_N(r) ((r) << 16)
+-#define ARMF_D(r) ((r) << 12)
+-#define ARMF_S(r) ((r) << 8)
+-#define ARMF_M(r) (r)
+-#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
+-#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
+-
+-typedef enum S390xIns {
+- S390I_SR = 0x1B00000000000000,
+- S390I_AR = 0x1A00000000000000,
+- S390I_NR = 0x1400000000000000,
+- S390I_XR = 0x1700000000000000,
+- S390I_MR = 0x1C00000000000000,
+- S390I_LR = 0x1800000000000000,
+- S390I_C = 0x5900000000000000,
+- S390I_LH = 0x4800000000000000,
+- S390I_BASR = 0x0D00000000000000,
+- S390I_MVCL = 0x0e00000000000000,
+- S390I_ST = 0x5000000000000000,
+- S390I_TM = 0x9100000000000000,
+- S390I_MP = 0xbd00009000000000,
+- S390I_CLR = 0x1500000000000000,
+-} S390xIns;
+-
+-typedef enum S390xShift {
+- S390SH_SLL, S390SH_SRL, S390SH_SRA
+-} S390xShift;
+-
+-/* S390x condition codes. */
+-typedef enum S390xCC {
+- /* Z- Zero , LZ - Less thena Zero , GZ - Greater than Zero
+- O - Overflow , NZ - Not Zero , ZC - Zero with carry
+- NZC - No Zero with carry , ZNC - Zero with No Carry
+- EQ - Equal , NE - Not Equal , LO - Loq , HI - High
+- */
+- CC_Z , CC_LZ , CC_GZ , CC_O ,
+- CC_NZ , CC_ZC , CC_NZC ,
+- CC_ZNC , CC_EQ , CC_NE , CC_LO , CC_HI
+-} S390xCC;
+-
+ #endif
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 8fa928b71..6ca7e1306 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -628,7 +628,6 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg PC, -24(RB) // Restore PC from [cont|PC].
+ | lg RA, -32(RB)
+ |.if FFI
+- | stg r0, 0(r0) // TODO: remove once tested.
+ | clfi RA, 1
+ | jle >1
+ |.endif
+@@ -641,13 +640,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.if FFI
+ |1:
+- | stg r0, 0(r0) // TODO: remove once tested.
+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: Tail call from C function.
+ | sgr RB, BASE
+ | srl RB, 3(r0)
+ | ahi RB, -3
+- | llgf RD, RB
++ | llgfr RD, RB
+ | j ->vm_call_tail
+ |.endif
+ |
+@@ -880,8 +878,17 @@ static void build_subroutines(BuildCtx *ctx)
+ | j <3
+ |
+ |->vmeta_equal_cd:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ |.if FFI
++ | lay PC, -4(PC)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | lgr CARG1, L:RB
++ | llgf CARG2, -4(PC)
++ | stg PC, SAVE_PC
++ | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
++ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
++ | j <3
++ |.endif
+ |
+ |->vmeta_istype:
+ | lg L:RB, SAVE_L
+@@ -2165,9 +2172,58 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg r0, 0(r0)
+ |
+ |->vm_ffi_call: // Call C function via FFI.
++ | // Caveat: needs special frame unwinding, see below.
++ |.if FFI
++ | .type CCSTATE, CCallState, r10
++ | stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate store?
++ | lgr CCSTATE, CARG1
++ |
++ | // Readjust stack.
++ | sgf sp, CCSTATE->spadj
++ |
++ | // Copy stack slots.
++ | llgc r0, CCSTATE->nsp
++ | cghi r0, 0
++ | jle >3
++ | lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source.
++ | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
++ |1:
++ | cghi r0, 256
++ | jl >2
++ | mvc 0(256, r11), 0(r1)
++ | aghi r1, 256*8
++ | aghi r11, 256*8
++ | aghi r0, -256
++ | j <1
++ |2:
++ | cghi r0, 0
++ | je >3
++ | // TODO: exrl mvc rather than loop.
++ | mvc 0(8, r11), 0(r1)
++ | aghi r1, 8
++ | aghi r11, 8
++ | aghi r0, -1
++ | j <2
++ |3:
++ |
++ | lmg CARG1, CARG5, CCSTATE->gpr[0]
++ | // TODO: conditionally load FPRs?
++ | ld FARG1, CCSTATE->fpr[0]
++ | ld FARG2, CCSTATE->fpr[1]
++ | ld FARG3, CCSTATE->fpr[2]
++ | ld FARG4, CCSTATE->fpr[3]
++ |5:
++ | lg r1, CCSTATE->func // TODO: move further up?
++ | basr r14, r1
++ |
++ | stg CRET1, CCSTATE->gpr[0]
++ | stg f0, CCSTATE->fpr[0]
++ |
++ | agf sp, CCSTATE->spadj
++ | lmg r6, r15, 48(sp)
++ | br r14
++ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+ |
+ |//-----------------------------------------------------------------------
+ }
+@@ -2767,8 +2823,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ break;
+ case BC_KCDATA:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ |.if FFI
++ | ins_AND // RA = dst, RD = cdata const (~)
++ | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3(r0)
++ | lg RD, 0(RD, KBASE)
++ | settp RD, LJ_TCDATA
++ | stg RD, 0(RA, BASE)
++ | ins_next
++ |.endif
+ break;
+ case BC_KSHORT:
+ | ins_AD // RA = dst, RD = signed int16 literal
+
+From 99b36689952311a61b628208c6e8d42b7a30d23f Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 9 Jan 2017 11:20:13 -0500
+Subject: [PATCH 211/260] Add stub dis_s390x.lua file to allow make install to
+ work.
+
+---
+ src/jit/dis_s390x.lua | 1 +
+ 1 file changed, 1 insertion(+)
+ create mode 100644 src/jit/dis_s390x.lua
+
+diff --git a/src/jit/dis_s390x.lua b/src/jit/dis_s390x.lua
+new file mode 100644
+index 000000000..3c63033bf
+--- /dev/null
++++ b/src/jit/dis_s390x.lua
+@@ -0,0 +1 @@
++-- Not yet implemented.
+
+From 4c738134dfb81776daf11bc1d10af3a1da58db97 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 9 Jan 2017 14:16:44 -0500
+Subject: [PATCH 212/260] Improve ins_NEXT performance.
+
+Prioritise critical path and reduce number of instructions. About
+10% improvement on md5 benchmark.
+---
+ src/vm_s390x.dasc | 22 ++++++++--------------
+ 1 file changed, 8 insertions(+), 14 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 6ca7e1306..723efe244 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -148,29 +148,23 @@
+ |.macro ins_A; .endmacro
+ |.macro ins_AD; .endmacro
+ |.macro ins_AJ; .endmacro
+-|.macro ins_ABC; .endmacro
+-|.macro ins_AB_; .endmacro
+-|.macro ins_A_C; .endmacro
++|.macro ins_ABC; srlg RB, RD, 8(r0); llgcr RC, RD; .endmacro
++|.macro ins_AB_; srlg RB, RD, 8(r0); .endmacro
++|.macro ins_A_C; llgcr RC, RD; .endmacro
+ |.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
+ |
+ |// Instruction decode+dispatch.
+ | // TODO: tune this, right now we always decode RA-D even if they aren't used.
+ |.macro ins_NEXT
+-| llgf RD, 0(PC)
+ | // 32 63
+ | // [ B | C | A | OP ]
+ | // [ D | A | OP ]
+-| llghr RA, RD
+-| srlg RA, RA, 8(r0)
+-| llgcr OP, RD
+-| srlg RD, RD, 16(r0)
+-| lgr RB, RD
+-| srlg RB, RB, 8(r0)
+-| llgcr RC, RD
+-| la PC, 4(PC)
+-| llgfr TMPR1, OP
+-| sllg TMPR1, TMPR1, 3(r0) // TMPR1=OP*8
++| llgc OP, 3(PC)
++| llgh RD, 0(PC)
++| llgc RA, 2(PC)
++| sllg TMPR1, OP, 3(r0)
+ | lg TMPR1, 0(TMPR1, DISPATCH)
++| la PC, 4(PC)
+ | br TMPR1
+ |.endmacro
+ |
+
+From dae61f59d6870cd0078d9ff7210078b6b71d4e2b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 9 Jan 2017 15:57:37 -0500
+Subject: [PATCH 213/260] Delete some unused function stubs.
+
+---
+ src/vm_s390x.dasc | 20 +-------------------
+ 1 file changed, 1 insertion(+), 19 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 723efe244..86909a9aa 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -426,10 +426,6 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg TMPR1, GL:RB->vmstate
+ | j ->vm_leave_unw
+ |
+- |->vm_unwind_rethrow:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK.
+@@ -2124,21 +2120,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | vm_round vm_trunc, 5 // Round towards 0.
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+- |->vm_mod:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- |
+- |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
+- |->vm_powi_sse:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
+- |
+- |//-----------------------------------------------------------------------
+- |//-- Miscellaneous functions --------------------------------------------
+- |//-----------------------------------------------------------------------
+- |
+- |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
+- |->vm_cpuid:
++ |->vm_mod: // NYI.
+ | stg r0, 0(r0)
+ | stg r0, 0(r0)
+ |
+
+From a8562b7f34d1bf5823ae207b9d509d571c2ec5e0 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 10:50:41 -0500
+Subject: [PATCH 214/260] Allow displacements to be used directly without
+ register values.
+
+Allows sllg r1, r1, 3(r0,r0) to be written as sllg r1, r1, 3.
+---
+ dynasm/dasm_s390x.lua | 14 +-
+ src/vm_s390x.dasc | 440 +++++++++++++++++++++---------------------
+ 2 files changed, 226 insertions(+), 228 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 7d95f788a..b175593bb 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -324,11 +324,16 @@ local function split_memop(arg)
+ if d then
+ return d, 0, parse_reg(b)
+ end
+- -- Assuming the two registers are passed as "(r1,r2)", and displacement(d) is not specified
++ -- Assume the two registers are passed as "(r1,r2)", and displacement(d) is not specified. TODO: not sure if we want to do this, GAS doesn't.
+ local x, b = match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
+ if b then
+ return 0, parse_reg(x), parse_reg(b)
+ end
++ -- Accept a lone integer as a displacement. TODO: allow expressions/variables here? Interacts badly with the other rules currently.
++ local d = match(arg,"^(-?[%d]+)$")
++ if d then
++ return d, 0, 0
++ end
+ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
+ if reg then
+ local r, tp = parse_reg(reg)
+@@ -336,13 +341,6 @@ local function split_memop(arg)
+ return format(tp.ctypefmt, tailr), 0, r
+ end
+ end
+- -- Assuming that only displacement is passed, as either digit or label "45 or label1"
+- local d = match(arg,"[%w_]+")
+- if d then
+- return d, 0, 0
+- end
+- -- TODO: handle values without registers?
+- -- TODO: handle registers without a displacement? -- done, above ,needs to be tested
+ werror("bad memory operand: "..arg)
+ return nil
+ end
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 86909a9aa..a44149839 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -148,8 +148,8 @@
+ |.macro ins_A; .endmacro
+ |.macro ins_AD; .endmacro
+ |.macro ins_AJ; .endmacro
+-|.macro ins_ABC; srlg RB, RD, 8(r0); llgcr RC, RD; .endmacro
+-|.macro ins_AB_; srlg RB, RD, 8(r0); .endmacro
++|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro
++|.macro ins_AB_; srlg RB, RD, 8; .endmacro
+ |.macro ins_A_C; llgcr RC, RD; .endmacro
+ |.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
+ |
+@@ -162,7 +162,7 @@
+ | llgc OP, 3(PC)
+ | llgh RD, 0(PC)
+ | llgc RA, 2(PC)
+-| sllg TMPR1, OP, 3(r0)
++| sllg TMPR1, OP, 3
+ | lg TMPR1, 0(TMPR1, DISPATCH)
+ | la PC, 4(PC)
+ | br TMPR1
+@@ -190,7 +190,7 @@
+ | lg PC, LFUNC:RB->pc
+ | llgf RA, 0(PC) // TODO: combine loads?
+ | llgcr OP, RA
+-| sllg TMPR1, OP, 3(r0)
++| sllg TMPR1, OP, 3
+ | la PC, 4(PC)
+ | lg TMPR1, 0(TMPR1, DISPATCH)
+ | br TMPR1
+@@ -211,7 +211,7 @@
+ |//-----------------------------------------------------------------------
+ |
+ |// Macros to clear or set tags.
+-|.macro cleartp, reg; sllg reg, reg, 17(r0); srlg reg, reg, 17(r0); .endmacro // TODO: use nihf instead? would introduce dependence on z9-109.
++|.macro cleartp, reg; sllg reg, reg, 17; srlg reg, reg, 17; .endmacro // TODO: use nihf instead? would introduce dependence on z9-109.
+ |.macro settp, reg, tp
+ | oihh reg, ((tp>>1) &0xffff)
+ | oihl reg, ((tp<<15)&0x8000)
+@@ -230,18 +230,18 @@
+ |
+ |// Macros to test operand types.
+ |.macro checktp_nc, reg, tp, target
+-| srag ITYPE, reg, 47(r0)
++| srag ITYPE, reg, 47
+ | clfi ITYPE, tp
+ | jne target
+ |.endmacro
+ |.macro checktp, reg, tp, target
+-| srag ITYPE, reg, 47(r0)
++| srag ITYPE, reg, 47
+ | cleartp reg
+ | clfi ITYPE, tp
+ | jne target
+ |.endmacro
+ |.macro checktptp, src, tp, target
+-| srag ITYPE, src, 47(r0)
++| srag ITYPE, src, 47
+ | clfi ITYPE, tp
+ | jne target
+ |.endmacro
+@@ -250,7 +250,7 @@
+ |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+ |
+ |.macro checknumx, reg, target, jump
+-| srag ITYPE, reg, 47(r0)
++| srag ITYPE, reg, 47
+ | clfi ITYPE, LJ_TISNUM
+ | jump target
+ |.endmacro
+@@ -273,7 +273,7 @@
+ | // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
+ | // Can't clobber TMPR1 or condition code.
+ | lgr TMPR2, TMPR1 // Workaround because TMPR2 == r0 and can't be used in lay.
+-| sllg TMPR1, reg, 2(r0)
++| sllg TMPR1, reg, 2
+ | lay PC, (-BCBIAS_J*4)(TMPR1, PC)
+ | lgr TMPR1, TMPR2
+ |.endmacro
+@@ -394,7 +394,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | cghi RA, 0
+ | je <5 // But check for LUA_MULTRET+1.
+ | sgr RA, RD // Negative result!
+- | sllg TMPR1, RA, 3(r0)
++ | sllg TMPR1, RA, 3
+ | lay BASE, 0(TMPR1, BASE) // Correct top.
+ | j <5
+ |
+@@ -459,7 +459,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
+- | sllg RD, NARGS:RD, 3(r0)
++ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ |1:
+ | llgc RA, (PC2PROTO(framesize)-4)(PC)
+@@ -477,7 +477,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | sgr RD, BASE
+- | srlg RD, RD, 3(r0)
++ | srlg RD, RD, 3
+ | aghi NARGS:RD, 1
+ | // BASE = new base, RB = LFUNC, RD = nargs+1
+ | ins_callt // Just retry the call.
+@@ -513,7 +513,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, RA
+- | srlg RD, RD, 3(r0)
++ | srlg RD, RD, 3
+ | aghi RD, 1 // RD = nresults+1
+ | sgr RA, BASE // RA = resultofs
+ | lg PC, -8(BASE)
+@@ -558,7 +558,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ | lg RD, L:LREG->top
+ | sgr RD, RA
+- | srlg NARGS:RD, NARGS:RD, 3(r0) // TODO: support '3' on its own in dynasm.
++ | srlg NARGS:RD, NARGS:RD, 3 // TODO: support '3' on its own in dynasm.
+ | aghi NARGS:RD, 1 // RD = nargs+1
+ |
+ |->vm_call_dispatch:
+@@ -611,7 +611,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | nill PC, -8
+ | lgr RB, BASE
+ | sgr BASE, PC // Restore caller BASE.
+- | sllg TMPR1, RD, 3(r0)
++ | sllg TMPR1, RD, 3
+ | lghi TMPR2, LJ_TNIL
+ | stg TMPR2, -8(RA, TMPR1) // Ensure one valid arg.
+ | lgr RC, RA // ... in [RC]
+@@ -633,7 +633,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: Tail call from C function.
+ | sgr RB, BASE
+- | srl RB, 3(r0)
++ | srl RB, 3
+ | ahi RB, -3
+ | llgfr RD, RB
+ | j ->vm_call_tail
+@@ -641,13 +641,13 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+ | llgc RA, PC_RB
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | aghi RB, -32
+ | la RA, 0(RA, BASE)
+ | sgr RA, RB
+ | je ->cont_ra
+ | lcgr RA, RA
+- | srlg RA, RA, 3(r0)
++ | srlg RA, RA, 3
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgfr CARG3, RA
+@@ -679,11 +679,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_tgetv:
+ | llgc RC, PC_RC // Reload TValue *k from RC.
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RB, PC_RB // Reload TValue *t from RB.
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ |2:
+ | lg L:CARG1, SAVE_L
+@@ -699,7 +699,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | je >3
+ |->cont_ra: // BASE = base, RC = result
+ | llgc RA, PC_RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg RB, 0(RC)
+ | stg RB, 0(RA, BASE)
+ | ins_next
+@@ -751,11 +751,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_tsetv:
+ | llgc RC, PC_RC // Reload TValue *k from RC.
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RB, PC_RB // Reload TValue *t from RB.
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | la RB, 0(RB, BASE)
+ |2:
+ | lg L:CARG1, SAVE_L
+@@ -771,7 +771,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | je >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | llgc RA, PC_RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | stg RB, 0(RC)
+ |->cont_nop: // BASE = base, (RC = result)
+@@ -783,7 +783,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg PC, -24(RA) // [cont|PC]
+ | llgc RC, PC_RA
+ | // Copy value to third argument.
+- | sllg RB, RC, 3(r0)
++ | sllg RB, RC, 3
+ | lg RB, 0(RB, BASE)
+ | stg RB, 16(RA)
+ | la PC, FRAME_CONT(RA)
+@@ -811,9 +811,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_comp:
+ | llgh RD, PC_RD
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | llgc RA, PC_RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | la CARG2, 0(RA, BASE)
+@@ -840,7 +840,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |->cont_condt: // BASE = base, RC = result
+ | la PC, 4(PC)
+ | lg ITYPE, 0(RC)
+- | srag ITYPE, ITYPE, 47(r0)
++ | srag ITYPE, ITYPE, 47
+ | lghi TMPR2, LJ_TISTRUECOND
+ | clr ITYPE, TMPR2 // Branch if result is true.
+ | jl <5
+@@ -848,7 +848,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->cont_condf: // BASE = base, RC = result
+ | lg ITYPE, 0(RC)
+- | srag ITYPE, ITYPE, 47(r0)
++ | srag ITYPE, ITYPE, 47
+ | lghi TMPR2, LJ_TISTRUECOND
+ | clr ITYPE, TMPR2 // Branch if result is false.
+ | j <4
+@@ -897,8 +897,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | llgc RB, PC_RB
+ | llgc RC, PC_RC
+ |->vmeta_arith_vn:
+- | sllg RB, RB, 3(r0)
+- | sllg RC, RC, 3(r0)
++ | sllg RB, RB, 3
++ | sllg RC, RC, 3
+ | lay RB, 0(RB, BASE)
+ | lay RC, 0(RC, KBASE)
+ | j >1
+@@ -907,8 +907,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | llgc RC, PC_RC
+ | llgc RB, PC_RB
+ |->vmeta_arith_nv:
+- | sllg RC, RC, 3(r0)
+- | sllg RB, RB, 3(r0)
++ | sllg RC, RC, 3
++ | sllg RB, RB, 3
+ | lay TMPR1, 0(RC, KBASE)
+ | lay RC, 0(RB, BASE)
+ | lgr RB, TMPR1
+@@ -916,7 +916,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_unm:
+ | llgh RD, PC_RD
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | la RC, 0(RD, BASE)
+ | lgr RB, RC
+ | j >1
+@@ -925,13 +925,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | llgc RB, PC_RB
+ | llgc RC, PC_RC
+ |->vmeta_arith_vv:
+- | sllg RC, RC, 3(r0)
+- | sllg RB, RB, 3(r0)
++ | sllg RC, RC, 3
++ | sllg RB, RB, 3
+ | lay RB, 0(RB, BASE)
+ | lay RC, 0(RC, BASE)
+ |1:
+ | llgc RA, PC_RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lay RA, 0(RA, BASE)
+ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
+ | lgr CARG2, RA
+@@ -960,7 +960,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_len:
+ | llgh RD, PC_RD
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | la CARG2, 0(RD, BASE)
+@@ -974,7 +974,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | cghi RC, 0
+ | jne ->vmeta_binop // Binop call for compatibility.
+ | llgh RD, PC_RD
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lg TAB:CARG1, 0(RD, BASE)
+ | cleartp TAB:CARG1
+ | j ->BC_LEN_Z
+@@ -993,7 +993,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lay CARG2, -16(RA)
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lay CARG3, -8(RA, RD)
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+@@ -1023,7 +1023,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | llgc OP, PC_OP
+ | llgc RA, PC_RA
+ | llgh RD, PC_RD
+- | sllg TMPR1, OP, 3(r0)
++ | sllg TMPR1, OP, 3
+ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI.
+ | br TMPR1
+ |
+@@ -1079,7 +1079,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc_1 assert
+ | lg RB, 0(BASE)
+- | srag ITYPE, RB, 47(r0)
++ | srag ITYPE, RB, 47
+ | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
+ | lg PC, -8(BASE)
+ | st RD, SAVE_MULTRES
+@@ -1101,7 +1101,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc_1 type
+ | lg RC, 0(BASE)
+- | srag RC, RC, 47(r0)
++ | srag RC, RC, 47
+ | lghi RB, LJ_TISNUM
+ | clgr RC, RB
+ | jnl >1
+@@ -1112,7 +1112,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |2:
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
+ | lg PC, -8(BASE)
+ | settp STR:RC, LJ_TSTR
+@@ -1161,7 +1161,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lhi TMPR2, -1
+ | xr ITYPE, TMPR2 // not ITYPE
+ | llgfr ITYPE, ITYPE
+- | sllg ITYPE, ITYPE, 3(r0)
++ | sllg ITYPE, ITYPE, 3
+ | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
+ | j <2
+ |
+@@ -1305,7 +1305,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | cl RA, TAB:RB->asize; jhe >2 // Not in array part?
+ | lg RD, TAB:RB->array
+ | lgfr TMPR1, RA
+- | sllg TMPR1, TMPR1, 3(r0)
++ | sllg TMPR1, TMPR1, 3
+ | la RD, 0(TMPR1, RD)
+ |1:
+ | lg TMPR2, 0(RD)
+@@ -1361,7 +1361,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Note: this does a (harmless) copy of the function to the PC slot, too.
+ | lgr KBASE, RD
+ |2:
+- | sllg TMPR1, KBASE, 3(r0)
++ | sllg TMPR1, KBASE, 3
+ | lg RB, -24(TMPR1, RA)
+ | stg RB, -16(TMPR1, RA)
+ | aghi KBASE, -1
+@@ -1412,7 +1412,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg PC, 0(RA)
+ | la RA, 8(RA)
+ |1:
+- | sllg TMPR1, NARGS:RD, 3(r0)
++ | sllg TMPR1, NARGS:RD, 3
+ |.if resume
+ | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread).
+ |.else
+@@ -1467,7 +1467,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | je >6 // No results?
+ | la RD, 0(PC, BASE)
+ | llgfr PC, PC
+- | srlg PC, PC, 3(r0)
++ | srlg PC, PC, 3
+ | clg RD, L:RB->maxstack
+ | jh >9 // Need to grow stack?
+ |
+@@ -1539,7 +1539,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | tmll TMPR2, CFRAME_RESUME
+ | je ->fff_fallback
+ | stg BASE, L:RB->base
+- | sllg RD, NARGS:RD, 3(r0)
++ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | stg RD, L:RB->top
+ | lghi RD, 0
+@@ -1591,12 +1591,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+- | sllg TMPR1, RD, 3(r0)
++ | sllg TMPR1, RD, 3
+ | lghi TMPR2, LJ_TNIL
+ | stg TMPR2, -24(TMPR1, BASE)
+ | la RD, 1(RD)
+@@ -1701,7 +1701,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |.macro math_minmax, name, cjmp
+ | .ffunc name
+ | lghi RA, 2*8
+- | sllg TMPR1, RD, 3(r0)
++ | sllg TMPR1, RD, 3
+ | lg RB, 0(BASE)
+ | ld f0, 0(BASE)
+ | checkint RB, >4
+@@ -1792,7 +1792,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | checkstr STR:RB, ->fff_fallback
+ | lg ITYPE, 8(BASE)
+ | lgfr RA, ITYPE
+- | srag ITYPE, ITYPE, 47(r0)
++ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM
+ | jne ->fff_fallback
+ | llgf RC, STR:RB->len
+@@ -1890,7 +1890,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name, 2
+ | lgr TMPR1, NARGS:RD // Save for fallback.
+- | sllg RD, NARGS:RD, 3(r0)
++ | sllg RD, NARGS:RD, 3
+ | lay RD, -16(RD, BASE)
+ |1:
+ | clgr RD, BASE
+@@ -1974,7 +1974,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg PC, -8(BASE) // Fallback may overwrite PC.
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | stg BASE, L:RB->base
+- | sllg RD, NARGS:RD, 3(r0)
++ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler.
+ | stg RD, L:RB->top
+@@ -1992,7 +1992,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |1:
+ | lg RA, L:RB->top
+ | sgr RA, BASE
+- | srlg RA, RA, 3(r0)
++ | srlg RA, RA, 3
+ | cghi RD, 0
+ | la NARGS:RD, 1(RA)
+ | lg LFUNC:RB, -16(BASE)
+@@ -2007,7 +2007,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | jne >3
+ | llgc RB, PC_RA
+ | lcgr RB, RB
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8
+ | j ->vm_call_dispatch // Resolve again for tailcall.
+ |3:
+@@ -2030,7 +2030,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg L:RB, SAVE_L
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | stg BASE, L:RB->base
+- | sllg RD, NARGS:RD, 3(r0)
++ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD, BASE)
+ | lgr CARG1, L:RB
+ | stg RD, L:RB->top
+@@ -2038,7 +2038,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg BASE, L:RB->base
+ | lg RD, L:RB->top
+ | sgr RD, BASE
+- | srlg RD, RD, 3(r0)
++ | srlg RD, RD, 3
+ | aghi NARGS:RD, 1
+ | lg r14, TMP_STACK // Restore return address.
+ | br r14
+@@ -2048,40 +2048,40 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->cont_hook: // Continue from hook yield.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->cont_stitch: // Trace stitching.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+@@ -2090,11 +2090,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |// Called from an exit stub with the exit number on the stack.
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+ |->vm_exit_handler:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |->vm_exit_interp:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+@@ -2112,7 +2112,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | ldr f0, f2
+ | br r14
+ |1: // partial remainder (sanity check)
+- | stg r0, 0(r0)
++ | stg r0, 0
+ |.endmacro
+ |
+ | vm_round vm_floor, 7 // Round towards -inf.
+@@ -2121,16 +2121,16 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |->vm_mod: // NYI.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->assert_bad_for_arg_type:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ #ifdef LUA_USE_ASSERT
+ #endif
+ |
+@@ -2140,12 +2140,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// Handler for callback functions. Callback slot number in ah/al.
+ |->vm_ffi_callback:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+@@ -2239,14 +2239,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1, RD = src2, JMP with RD = target
+ | ins_AD
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | ld f0, 0(RA, BASE)
+ | ld f1, 0(RD, BASE)
+ | lg RA, 0(RA, BASE)
+ | lg RD, 0(RD, BASE)
+- | srag ITYPE, RA, 47(r0)
+- | srag RB, RD, 47(r0)
++ | srag ITYPE, RA, 47
++ | srag RB, RD, 47
+ |
+ | clfi ITYPE, LJ_TISNUM; jne >7
+ | clfi RB, LJ_TISNUM; jne >8
+@@ -2283,15 +2283,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | ld f1, 0(RD, BASE)
+ | lg RD, 0(RD, BASE)
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | ld f0, 0(RA, BASE)
+ | lg RA, 0(RA, BASE)
+ | la PC, 4(PC)
+- | srag RB, RD, 47(r0)
+- | srag ITYPE, RA, 47(r0)
++ | srag RB, RD, 47
++ | srag ITYPE, RA, 47
+ | clfi RB, LJ_TISNUM; jne >7
+ | clfi ITYPE, LJ_TISNUM; jne >8
+ | cr RD, RA
+@@ -2397,8 +2397,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | ins_AND // RA = src, RD = str const, JMP with RD = target
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | lg RB, 0(RA, BASE)
+ | la PC, 4(PC)
+ | checkstr RB, >3
+@@ -2413,8 +2413,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | ins_AD // RA = src, RD = num const, JMP with RD = target
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | ld f0, 0(RA, BASE)
+ | lg RB, 0(RA, BASE)
+ | ld f1, 0(RD, KBASE)
+@@ -2452,9 +2452,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+- | srag RB, RB, 47(r0)
++ | srag RB, RB, 47
+ | la PC, 4(PC)
+ | cr RB, RD
+ if (!LJ_HASFFI) goto iseqne_test;
+@@ -2481,14 +2481,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
+- | sllg RD, RD, 3(r0)
+- | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3
++ | sllg RA, RA, 3
+ | lg ITYPE, 0(RD, BASE)
+ | la PC, 4(PC)
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | lgr RB, ITYPE
+ }
+- | srag ITYPE, ITYPE, 47(r0)
++ | srag ITYPE, ITYPE, 47
+ | clfi ITYPE, LJ_TISTRUECOND
+ if (op == BC_IST || op == BC_ISTC) {
+ | jhe >1
+@@ -2507,34 +2507,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISTYPE:
+ | ins_AD // RA = src, RD = -type
+ | lghr RD, RD // TODO: always sign extend RD?
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+- | srag RB, RB, 47(r0)
++ | srag RB, RB, 47
+ | agr RB, RD
+ | jne ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | ins_AD // RA = src, RD = -(TISNUM-1)
+- | sllg TMPR1, RA, 3(r0)
++ | sllg TMPR1, RA, 3
+ | lg TMPR1, 0(TMPR1, BASE)
+ | checknumtp TMPR1, ->vmeta_istype
+ | ins_next
+ break;
+ case BC_MOV:
+ | ins_AD // RA = dst, RD = src
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lg RB, 0(RD, BASE)
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg RB, 0(RA, BASE)
+ | ins_next_
+ break;
+ case BC_NOT:
+ | ins_AD // RA = dst, RD = src
+- | sllg RD, RD, 3(r0)
+- | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3
++ | sllg RA, RA, 3
+ | lg RB, 0(RD, BASE)
+- | srag RB, RB, 47(r0)
++ | srag RB, RB, 47
+ | load_false RC
+ | cghi RB, LJ_TTRUE
+ | je >1 // TODO: Maybe do something fancy to avoid the jump?
+@@ -2545,8 +2545,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_UNM:
+ | ins_AD // RA = dst, RD = src
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | lg RB, 0(RD, BASE)
+ | checkint RB, >3
+ | lcr RB, RB; jo >2
+@@ -2565,12 +2565,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_LEN:
+ | ins_AD // RA = dst, RD = src
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lg RD, 0(RD, BASE)
+ | checkstr RD, >2
+ | llgf RD, STR:RD->len
+ |1:
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | setint RD
+ | stg RD, 0(RA, BASE)
+ | ins_next
+@@ -2604,9 +2604,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ |.macro ins_arithpre
+ | ins_ABC
+- | sllg RB, RB, 3(r0)
+- | sllg RC, RC, 3(r0)
+- | sllg RA, RA, 3(r0)
++ | sllg RB, RB, 3
++ | sllg RC, RC, 3
++ | sllg RA, RA, 3
+ |.endmacro
+ |
+ |.macro ins_arithfp, ins
+@@ -2745,8 +2745,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_POW:
+ | ins_ABC
+- | sllg RB, RB, 3(r0)
+- | sllg RC, RC, 3(r0)
++ | sllg RB, RB, 3
++ | sllg RC, RC, 3
+ | ld FARG1, 0(RB, BASE)
+ | ld FARG2, 0(RC, BASE)
+ | lg TMPR2, 0(RB, BASE)
+@@ -2757,7 +2757,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | brasl r14, extern pow // double pow(double x, double y), result in f0.
+ | llgc RA, PC_RA
+ | lgr BASE, RB
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | std f0, 0(RA, BASE)
+ | ins_next
+ break;
+@@ -2768,7 +2768,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg BASE, L:CARG1->base
+ | lgr CARG3, RC
+ | sgr CARG3, RB
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | la CARG2, 0(RC, BASE)
+ |->BC_CAT_Z:
+ | lgr L:RB, L:CARG1
+@@ -2779,9 +2779,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ltgr RC, CRET1
+ | jne ->vmeta_binop
+ | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | llgc RA, PC_RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg RC, 0(RB, BASE)
+ | stg RC, 0(RA, BASE)
+ | ins_next
+@@ -2791,18 +2791,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_KSTR:
+ | ins_AND // RA = dst, RD = str const (~)
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lg RD, 0(RD, KBASE)
+ | settp RD, LJ_TSTR
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | ins_AND // RA = dst, RD = cdata const (~)
+- | sllg RD, RD, 3(r0)
+- | sllg RA, RA, 3(r0)
++ | sllg RD, RD, 3
++ | sllg RA, RA, 3
+ | lg RD, 0(RD, KBASE)
+ | settp RD, LJ_TCDATA
+ | stg RD, 0(RA, BASE)
+@@ -2814,22 +2814,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | // Assumes DUALNUM.
+ | lhr RD, RD // Sign-extend literal to 32-bits.
+ | setint RD
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KNUM:
+ | ins_AD // RA = dst, RD = num const
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | ld f0, 0(RD, KBASE)
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | std f0, 0(RA, BASE)
+ | ins_next
+ break;
+ case BC_KPRI:
+ | ins_AD // RA = dst, RD = primitive type (~)
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 47(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 47
+ | lghi TMPR2, -1
+ | xgr RD, TMPR2 // not
+ | stg RD, 0(RA, BASE)
+@@ -2837,8 +2837,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_KNIL:
+ | ins_AD // RA = dst_start, RD = dst_end
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | la RA, 8(RA, BASE)
+ | la RD, 0(RD, BASE)
+ | lghi RB, LJ_TNIL
+@@ -2855,8 +2855,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_UGET:
+ | ins_AD // RA = dst, RD = upvalue #
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
+@@ -2871,13 +2871,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_AD // RA = upvalue #, RD = src
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | // TODO: (instead of next 2 instructions) tm UPVAL:RB->closed, 0xff
+ | llgc TMPR2, UPVAL:RB->closed
+ | tmll TMPR2, 0xff
+ | lg RB, UPVAL:RB->v
+- | sllg TMPR1, RD, 3(r0)
++ | sllg TMPR1, RD, 3
+ | lg RA, 0(TMPR1, BASE)
+ | stg RA, 0(RB)
+ | je >1
+@@ -2890,7 +2890,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ |
+ |2: // Upvalue is black. Check if new value is collectable and white.
+- | srag RD, RA, 47(r0)
++ | srag RD, RA, 47
+ | ahi RD, -LJ_TISGCV
+ | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
+ | jle <1
+@@ -2911,8 +2911,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_USETS:
+ | ins_AND // RA = upvalue #, RD = str const (~)
+ | lg LFUNC:RB, -16(BASE)
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | lg STR:RA, 0(RD, KBASE)
+@@ -2946,8 +2946,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_USETN:
+ | ins_AD // RA = upvalue #, RD = num const
+ | lg LFUNC:RB, -16(BASE)
+- | sllg RA, RA, 3(r0)
+- | sllg RD, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RD, RD, 3
+ | cleartp LFUNC:RB
+ | ld f0, 0(RD, KBASE)
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+@@ -2958,10 +2958,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_USETP:
+ | ins_AD // RA = upvalue #, RD = primitive type (~)
+ | lg LFUNC:RB, -16(BASE)
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+- | sllg RD, RD, 47(r0)
++ | sllg RD, RD, 47
+ | lghi TMPR2, -1
+ | xgr RD, TMPR2
+ | lg RA, UPVAL:RB->v
+@@ -2975,7 +2975,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ltg TMPR2, L:RB->openupval
+ | je >1
+ | stg BASE, L:RB->base
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | la CARG2, 0(RA, BASE)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level)
+@@ -2990,7 +2990,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg BASE, L:RB->base
+ | lg CARG3, -16(BASE)
+ | cleartp CARG3
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lg CARG2, 0(RD, KBASE) // Fetch GCproto *.
+ | lgr CARG1, L:RB
+ | stg PC, SAVE_PC
+@@ -2999,7 +2999,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | // GCfuncL * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | settp LFUNC:CRET1, LJ_TFUNC
+ | stg LFUNC:CRET1, 0(RA, BASE)
+ | ins_next
+@@ -3013,7 +3013,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg PC, SAVE_PC
+ | jhe >5
+ |1:
+- | srlg CARG3, RD, 11(r0)
++ | srlg CARG3, RD, 11
+ | llill TMPR2, 0x7ff
+ | nr RD, TMPR2
+ | cr RD, TMPR2
+@@ -3025,7 +3025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | // Table * returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | settp TAB:CRET1, LJ_TTAB
+ | stg TAB:CRET1, 0(RA, BASE)
+ | ins_next
+@@ -3047,7 +3047,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg BASE, L:RB->base
+ | jhe >3
+ |2:
+- | sllg RD, RD, 3(r0)
++ | sllg RD, RD, 3
+ | lg TAB:CARG2, 0(RD, KBASE)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt)
+@@ -3055,7 +3055,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg BASE, L:RB->base
+ | llgc RA, PC_RA
+ | settp TAB:CRET1, LJ_TTAB
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg TAB:CRET1, 0(RA, BASE)
+ | ins_next
+ |3:
+@@ -3072,7 +3072,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg TAB:RB, LFUNC:RB->env
+- | sllg TMPR1, RD, 3(r0)
++ | sllg TMPR1, RD, 3
+ | lg STR:RC, 0(TMPR1, KBASE)
+ | j ->BC_TGETS_Z
+ break;
+@@ -3081,16 +3081,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg LFUNC:RB, -16(BASE)
+ | cleartp LFUNC:RB
+ | lg TAB:RB, LFUNC:RB->env
+- | sllg TMPR1, RD, 3(r0)
++ | sllg TMPR1, RD, 3
+ | lg STR:RC, 0(TMPR1, KBASE)
+ | j ->BC_TSETS_Z
+ break;
+
+ case BC_TGETV:
+ | ins_ABC // RA = dst, RB = table, RC = key
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | checktab TAB:RB, ->vmeta_tgetv
+ |
+@@ -3099,14 +3099,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
+ | jhe ->vmeta_tgetv // Not in array part? Use fallback.
+ | llgfr RC, RC
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ | lg ITYPE, 0(RC)
+ | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
+ | je >2
+ |1:
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+@@ -3126,11 +3126,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_TGETS:
+ | ins_ABC
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | lghi TMPR1, -1
+ | xgr RC, TMPR1
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tgets
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
+@@ -3148,7 +3148,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cghi ITYPE, LJ_TNIL
+ | je >5 // Key found, but nil value?
+ |2:
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+@@ -3170,19 +3170,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_TGETB:
+ | ins_ABC // RA = dst, RB = table, RC = byte literal
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | checktab TAB:RB, ->vmeta_tgetb
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tgetb
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ | lg ITYPE, 0(RC)
+ | cghi ITYPE, LJ_TNIL
+ | je >2
+ |1:
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ |
+@@ -3197,29 +3197,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_TGETR:
+ | ins_ABC // RA = dst, RB = table, RC = key
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | cleartp TAB:RB
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | llgf RC, 4(RC, BASE) // Load low word (big endian).
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tgetr // Not in array part? Use fallback.
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Get array slot.
+ |->BC_TGETR_Z:
+ | lg ITYPE, 0(RC)
+ |->BC_TGETR2_Z:
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | stg ITYPE, 0(RA, BASE)
+ | ins_next
+ break;
+
+ case BC_TSETV:
+ | ins_ABC // RA = src, RB = table, RC = key
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | checktab TAB:RB, ->vmeta_tsetv
+ |
+@@ -3228,7 +3228,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
+ | jhe ->vmeta_tsetv
+ | llgfr RC, RC
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | lghi TMPR2, LJ_TNIL
+ | cg TMPR2, 0(RC)
+@@ -3238,7 +3238,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | tmll TMPR1, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | stg RB, 0(RC)
+ | ins_next
+@@ -3263,11 +3263,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_TSETS:
+ | ins_ABC // RA = src, RB = table, RC = str const (~)
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | lghi TMPR2, -1
+ | xgr RC, TMPR2 // ~RC
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tsets
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
+@@ -3291,7 +3291,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |3: // Set node value.
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(TMPR1)
+ | ins_next
+@@ -3339,12 +3339,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_TSETB:
+ | ins_ABC // RA = src, RB = table, RC = byte literal
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | checktab TAB:RB, ->vmeta_tsetb
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tsetb
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | lghi TMPR2, LJ_TNIL
+ | cg TMPR2, 0(RC)
+@@ -3354,7 +3354,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | tmll TMPR1, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(RC)
+ | ins_next
+@@ -3374,10 +3374,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_TSETR:
+ | ins_ABC // RA = src, RB = table, RC = key
+- | sllg RB, RB, 3(r0)
++ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+ | cleartp TAB:RB
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+ | llgc TMPR2, TAB:RB->marked
+ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
+@@ -3386,11 +3386,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cl RC, TAB:RB->asize
+ | jhe ->vmeta_tsetr
+ | llgfr RC, RC
+- | sllg RC, RC, 3(r0)
++ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+ | // Set array slot.
+ |->BC_TSETR_Z:
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg ITYPE, 0(RA, BASE)
+ | stg ITYPE, 0(RC)
+ | ins_next
+@@ -3403,8 +3403,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_TSETM:
+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
+ |1:
+- | sllg RA, RA, 3(r0)
+- | sllg TMPR1, RD, 3(r0)
++ | sllg RA, RA, 3
++ | sllg TMPR1, RD, 3
+ | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word.
+ | la RA, 0(RA, BASE)
+ | lg TAB:RB, -8(RA) // Guaranteed to be a table.
+@@ -3420,7 +3420,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | clgf RD, TAB:RB->asize
+ | jh >5 // Doesn't fit into array part?
+ | sgr RD, TMPR1
+- | sllg TMPR1, TMPR1, 3(r0)
++ | sllg TMPR1, TMPR1, 3
+ | ag TMPR1, TAB:RB->array
+ |3: // Copy result slots to table.
+ | lg RB, 0(RA)
+@@ -3459,7 +3459,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ if (op == BC_CALLM) {
+ | agf NARGS:RD, SAVE_MULTRES
+ }
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg LFUNC:RB, 0(BASE, RA)
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
+ | la BASE, 16(RA, BASE)
+@@ -3473,7 +3473,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+ case BC_CALLT:
+ | ins_AD // RA = base, RD = nargs+1
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | la RA, 16(RA, BASE)
+ | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint.
+ | lg LFUNC:RB, -16(RA)
+@@ -3512,7 +3512,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | jne <4
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE.
+ | cleartp LFUNC:KBASE
+ | lg KBASE, LFUNC:KBASE->pc
+@@ -3534,7 +3534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_ITERC:
+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | la RA, 16(RA, BASE) // fb = base+2
+ | lg RB, -32(RA) // Copy state. fb[0] = fb[-4].
+ | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3].
+@@ -3553,7 +3553,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.if JIT
+ | // NYI: add hotloop, record BC_ITERN.
+ |.endif
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg TAB:RB, -16(RA, BASE)
+ | cleartp TAB:RB
+ | llgf RC, -4(RA, BASE) // Get index from control var. // TODO: ENDIANNESS DRAGONS.
+@@ -3562,7 +3562,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg ITYPE, TAB:RB->array
+ |1: // Traverse array part.
+ | clr RC, TMPR1; jhe >5 // Index points after array part?
+- | sllg RD, RC, 3(r0) // Warning: won't work if RD==RC!
++ | sllg RD, RC, 3 // Warning: won't work if RD==RC!
+ | lg TMPR2, 0(RD, ITYPE)
+ | cghi TMPR2, LJ_TNIL; je >4
+ | // Copy array slot to returned value.
+@@ -3609,7 +3609,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_ISNEXT:
+ | ins_AD // RA = base, RD = target (points to ITERN)
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lg CFUNC:RB, -24(RA, BASE)
+ | checkfunc CFUNC:RB, >5
+ | lg TMPR1, -16(RA, BASE)
+@@ -3636,9 +3636,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_VARG:
+ | // TODO: some opportunities for branch on index in here.
+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
+- | sllg RA, RA, 3(r0)
+- | sllg RB, RB, 3(r0)
+- | sllg RC, RC, 3(r0)
++ | sllg RA, RA, 3
++ | sllg RB, RB, 3
++ | sllg RC, RC, 3
+ | la TMPR1, (16+FRAME_VARG)(RC, BASE)
+ | la RA, 0(RA, BASE)
+ | sg TMPR1, -8(BASE)
+@@ -3673,7 +3673,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | slgr RC, TMPR1
+ | jno <3 // No vararg slots? (borrow or zero)
+ | llgfr RB, RC
+- | srlg RB, RB, 3(r0)
++ | srlg RB, RB, 3
+ | ahi RB, 1
+ | st RB, SAVE_MULTRES // MULTRES = #varargs+1
+ | lg L:RB, SAVE_L
+@@ -3717,7 +3717,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_RET: case BC_RET0: case BC_RET1:
+ | ins_AD // RA = results, RD = nresults+1
+ if (op != BC_RET0) {
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ }
+ |1:
+ | lg PC, -8(BASE)
+@@ -3758,7 +3758,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ }
+ | llgc RA, PC_RA
+ | lcgr RA, RA
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
+ | lg LFUNC:KBASE, -16(BASE)
+ | cleartp LFUNC:KBASE
+@@ -3772,7 +3772,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg TMPR1, -16(KBASE) // Note: relies on shifted base.
+ | la KBASE, 8(KBASE)
+ } else {
+- | sllg RC, RD, 3(r0) // RC used as temp.
++ | sllg RC, RD, 3 // RC used as temp.
+ | stg TMPR1, -24(RC, BASE)
+ }
+ | la RD, 1(RD)
+@@ -3813,7 +3813,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_IFORL:
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | lg RB, FOR_IDX
+ | checkint RB, >9
+@@ -3822,7 +3822,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | checkint TMPR1, ->vmeta_for
+ | lg ITYPE, FOR_STEP
+ | chi ITYPE, 0; jl >5
+- | srag ITYPE, ITYPE, 47(r0)
++ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+ } else {
+ #ifdef LUA_USE_ASSERT
+@@ -3865,7 +3865,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |
+ |5: // Invert check for negative step.
+ if (!vk) {
+- | srag ITYPE, ITYPE, 47(r0)
++ | srag ITYPE, ITYPE, 47
+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
+ } else {
+ | ar RB, ITYPE; jo <1
+@@ -3946,7 +3946,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ #endif
+ case BC_IITERL:
+ | ins_AJ // RA = base, RD = target
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE)
+ | lg RB, 0(RA)
+ | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
+@@ -3977,8 +3977,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_JLOOP:
+- | stg r0, 0(r0)
+- | stg r0, 0(r0)
++ | stg r0, 0
++ | stg r0, 0
+ break;
+
+ case BC_JMP:
+@@ -3998,7 +3998,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_FUNCF:
+ |.if JIT
+- | stg r0, 0(r0)
++ | stg r0, 0
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+@@ -4012,7 +4012,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+ | lg KBASE, (PC2PROTO(k)-4)(PC)
+ | lg L:RB, SAVE_L
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | la RA, 0(RA, BASE) // Top of frame.
+ | clg RA, L:RB->maxstack
+ | jh ->vm_growstack_f
+@@ -4029,7 +4029,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |
+ |3: // Clear missing parameters.
+ | // TODO: optimize this. Some of this can be hoisted.
+- | sllg TMPR1, NARGS:RD, 3(r0)
++ | sllg TMPR1, NARGS:RD, 3
+ | lghi TMPR2, LJ_TNIL
+ | stg TMPR2, -8(TMPR1, BASE)
+ | la RD, 1(RD)
+@@ -4042,19 +4042,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ #if !LJ_HASJIT
+ break;
+ #endif
+- | stg r0, 0(r0) // NYI: compiled vararg functions
++ | stg r0, 0 // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+- | sllg TMPR1, NARGS:RD, 3(r0)
++ | sllg TMPR1, NARGS:RD, 3
+ | la RB, (FRAME_VARG+8)(TMPR1)
+ | la RD, 8(TMPR1, BASE)
+ | lg LFUNC:KBASE, -16(BASE)
+ | stg RB, -8(RD) // Store delta + FRAME_VARG.
+ | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC.
+ | lg L:RB, SAVE_L
+- | sllg RA, RA, 3(r0)
++ | sllg RA, RA, 3
+ | la RA, 0(RA, RD)
+ | cg RA, L:RB->maxstack
+ | jh ->vm_growstack_v // Need to grow stack.
+@@ -4101,7 +4101,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cleartp CFUNC:RB
+ | lg KBASE, CFUNC:RB->f
+ | lg L:RB, SAVE_L
+- | sllg RD, NARGS:RD, 3(r0)
++ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD,BASE)
+ | stg BASE, L:RB->base
+ | lay RA, (8*LUA_MINSTACK)(RD)
+@@ -4125,7 +4125,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+- | sllg TMPR1, RD, 3(r0)
++ | sllg TMPR1, RD, 3
+ | la RA, 0(TMPR1, BASE)
+ | lcgr RA, RA
+ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+
+From a038a0818990775c25c8145a9373603d44ce6db1 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 11:15:26 -0500
+Subject: [PATCH 215/260] Fix SI (tm) action parsing.
+
+---
+ dynasm/dasm_s390x.lua | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index b175593bb..dc83c9f9f 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -312,6 +312,10 @@ local function is_int8(num)
+ return -128 <= num and num < 128
+ end
+
++local function is_uint8(num)
++ return 0 <= num and num < 256
++end
++
+ -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
+ -- If x is not specified then it is 0.
+ local function split_memop(arg)
+@@ -510,13 +514,12 @@ end
+ local function parse_imm8(imm)
+ local imm_val = tonumber(imm)
+ if imm_val then
+- if not is_int8(imm_val) then
++ if not is_int8(imm_val) and not is_uint8(imm_val) then
+ werror("Immediate value out of range: ", imm_val)
+ end
+- else
+- iact = function() waction("IMM8",nil,imm) end
++ return imm_val, nil
+ end
+- return imm_val, iact
++ return 0, function() waction("IMM8",nil,imm) end
+ end
+
+ local function parse_mask(mask)
+
+From d44390a2d38ff06ad516c2488d6c3bdc98234ff5 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 11:44:27 -0500
+Subject: [PATCH 216/260] Use tm{,y} instructions where possible.
+
+---
+ src/vm_s390x.dasc | 72 +++++++++++++++--------------------------------
+ 1 file changed, 22 insertions(+), 50 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index a44149839..7e7915bf9 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1177,9 +1177,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg TAB:RA, TAB:RB->metatable
+ | lg PC, -8(BASE)
+ | stg TAB:TMPR1, -16(BASE) // Return original table.
+- | // TODO: change to tm
+- | llgc TMPR2, TAB:RB->marked
+- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | je >1
+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
+ | barrierback TAB:RB, RC
+@@ -2372,8 +2370,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:RB, TAB:RA->metatable
+ | cghi TAB:RB, 0
+ | je <2 // No metatable?
+- | llgc TMPR2, TAB:RB->nomm
+- | tmll TMPR2, 1<<MM_eq
++ | tm TAB:RB->nomm, 1<<MM_eq
+ | jne <2 // Or 'no __eq' flag set?
+ if (vk) {
+ | lghi RB, 0 // ne = 0 // TODO: should be 32-bit?
+@@ -2593,8 +2590,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | j <1
+ #if LJ_52
+ |9: // Check for __len.
+- | llgc TMPR2, TAB:RB->nomm
+- | tmll TMPR2, 1<<MM_len
++ | tm TAB:RB->nomm, 1<<MM_len
+ | jne <3
+ | j ->vmeta_len // 'no __len' flag NOT set: check.
+ #endif
+@@ -2873,18 +2869,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cleartp LFUNC:RB
+ | sllg RA, RA, 3
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+- | // TODO: (instead of next 2 instructions) tm UPVAL:RB->closed, 0xff
+- | llgc TMPR2, UPVAL:RB->closed
+- | tmll TMPR2, 0xff
++ | tm UPVAL:RB->closed, 0xff
+ | lg RB, UPVAL:RB->v
+ | sllg TMPR1, RD, 3
+ | lg RA, 0(TMPR1, BASE)
+ | stg RA, 0(RB)
+ | je >1
+ | // Check barrier for closed upvalue.
+- | // TODO: tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
+- | llgc TMPR2, TV2MARKOFS(RB)
+- | tmll TMPR2, LJ_GC_BLACK
++ | tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
+ | jne >2
+ |1:
+ | ins_next
+@@ -2892,12 +2884,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |2: // Upvalue is black. Check if new value is collectable and white.
+ | srag RD, RA, 47
+ | ahi RD, -LJ_TISGCV
+- | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
++ | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
+ | jle <1
+ | cleartp GCOBJ:RA
+- | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
+- | llgc TMPR2, GCOBJ:RA->gch.marked
+- | tmll TMPR2, LJ_GC_WHITES
++ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | lgr CARG2, RB
+@@ -2919,21 +2909,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg RD, UPVAL:RB->v
+ | settp STR:ITYPE, STR:RA, LJ_TSTR
+ | stg STR:ITYPE, 0(RD)
+- | // TODO: tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+- | llgc TMPR2, UPVAL:RB->marked
+- | tmll TMPR2, LJ_GC_BLACK
++ | tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+ | jne >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+- | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
+- | llgc TMPR2, GCOBJ:RA->gch.marked
+- | tmll TMPR2, LJ_GC_WHITES
++ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
+ | je <1
+- | // TODO: tm UPVAL:RB->closed, 0xff
+- | llgc TMPR2, UPVAL:RB->closed
+- | tmll TMPR2, 0xff
++ | tm UPVAL:RB->closed, 0xff
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | lgr RB, BASE
+@@ -3114,8 +3098,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+- | llgc TMPR2, TAB:TMPR1->nomm
+- | tmll TMPR2, 1<<MM_index
++ | tm TAB:TMPR1->nomm, 1<<MM_index
+ | je ->vmeta_tgetv // 'no __index' flag NOT set: check.
+ | j <1
+ |
+@@ -3163,8 +3146,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <2 // No metatable: done.
+- | llgc TMPR2, TAB:TMPR1->nomm
+- | tmll TMPR2, 1<<MM_index
++ | tm TAB:TMPR1->nomm, 1<<MM_index
+ | jne <2 // 'no __index' flag set: done.
+ | j ->vmeta_tgets // Caveat: preserve STR:RC.
+ break;
+@@ -3190,8 +3172,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+- | llgc TMPR2, TAB:TMPR1->nomm
+- | tmll TMPR2, 1<<MM_index
++ | tm TAB:TMPR1->nomm, 1<<MM_index
+ | je ->vmeta_tgetb // 'no __index' flag NOT set: check.
+ | j <1
+ break;
+@@ -3234,8 +3215,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cg TMPR2, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+- | llgc TMPR1, TAB:RB->marked
+- | tmll TMPR1, LJ_GC_BLACK // isblack(table)
++ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+ | sllg RA, RA, 3
+@@ -3247,8 +3227,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+- | llgc TMPR2, TAB:TMPR1->nomm
+- | tmll TMPR2, 1<<MM_newindex
++ | tm TAB:TMPR1->nomm, 1<<MM_newindex
+ | je ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
+ | j <1
+ |
+@@ -3287,8 +3266,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cg TMPR2, 0(TMPR1)
+ | je >4 // Previous value is nil?
+ |2:
+- | llgc TMPR2, TAB:RB->marked
+- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |3: // Set node value.
+ | sllg RA, RA, 3
+@@ -3300,8 +3278,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:ITYPE, TAB:RB->metatable
+ | cghi TAB:ITYPE, 0
+ | je <2
+- | llgc TMPR2, TAB:ITYPE->nomm
+- | tmll TMPR2, 1<<MM_newindex
++ | tm TAB:ITYPE->nomm, 1<<MM_newindex
+ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ | j <2
+ |
+@@ -3315,8 +3292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je >6 // No metatable: continue.
+- | llgc TMPR2, TAB:TMPR1->nomm
+- | tmll TMPR2, 1<<MM_newindex
++ | tm TAB:TMPR1->nomm, 1<<MM_newindex
+ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | stg ITYPE, TMP_STACK
+@@ -3350,8 +3326,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cg TMPR2, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+- | llgc TMPR1, TAB:RB->marked
+- | tmll TMPR1, LJ_GC_BLACK // isblack(table)
++ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2: // Set array slot.
+ | sllg RA, RA, 3
+@@ -3363,8 +3338,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg TAB:TMPR1, TAB:RB->metatable
+ | cghi TAB:TMPR1, 0
+ | je <1
+- | llgc TMPR2, TAB:TMPR1->nomm
+- | tmll TMPR2, 1<<MM_newindex
++ | tm TAB:TMPR1->nomm, 1<<MM_newindex
+ | je ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
+ | j <1
+ |
+@@ -3379,8 +3353,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cleartp TAB:RB
+ | sllg RC, RC, 3
+ | lg RC, 0(RC, BASE)
+- | llgc TMPR2, TAB:RB->marked
+- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2:
+ | cl RC, TAB:RB->asize
+@@ -3409,8 +3382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | la RA, 0(RA, BASE)
+ | lg TAB:RB, -8(RA) // Guaranteed to be a table.
+ | cleartp TAB:RB
+- | llgc TMPR2, TAB:RB->marked
+- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
++ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jne >7
+ |2:
+ | llgf RD, SAVE_MULTRES
+
+From 102d848577a7a90823fda1c818973a44a8a8714f Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 11:58:24 -0500
+Subject: [PATCH 217/260] Minor instruction changes.
+
+---
+ src/vm_s390x.dasc | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 7e7915bf9..d16abc05c 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -292,10 +292,7 @@
+ |
+ |// Move table write barrier back. Overwrites reg.
+ |.macro barrierback, tab, reg
+-| // TODO: more efficient way?
+-| llgc reg, tab->marked
+-| nill reg, (uint16_t)~LJ_GC_BLACK // black2gray(tab)
+-| stc reg, tab->marked
++| ni tab->marked, ~LJ_GC_BLACK // black2gray(tab)
+ | lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+ | stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
+ | stg reg, tab->gclist
+@@ -316,8 +313,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+- | lghi TMPR2, FRAME_P
+- | nr TMPR2, PC
++ | tmll PC, FRAME_P
+ | je ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+
+From 55af819e971feabc39ed82015b35436a97a96c17 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 12:04:17 -0500
+Subject: [PATCH 218/260] Remove TODO.
+
+---
+ src/vm_s390x.dasc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index d16abc05c..4c8e3a327 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -554,7 +554,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ | lg RD, L:LREG->top
+ | sgr RD, RA
+- | srlg NARGS:RD, NARGS:RD, 3 // TODO: support '3' on its own in dynasm.
++ | srlg NARGS:RD, NARGS:RD, 3
+ | aghi NARGS:RD, 1 // RD = nargs+1
+ |
+ |->vm_call_dispatch:
+
+From 21655cf90b7b75841c169ea9458d5fc22dc78735 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 12:52:02 -0500
+Subject: [PATCH 219/260] Add remaining (useful) SI instructions to DynASM.
+
+---
+ dynasm/dasm_s390x.lua | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index dc83c9f9f..bff135b30 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1251,8 +1251,12 @@ map_op = {
+ -- RIE-e
+ brxhg_3 = "ec0000000044RIE-e",
+ -- SI
++ cli_2 = "000095000000SI",
++ mvi_2 = "000092000000SI",
+ ni_2 = "000094000000SI",
+ tm_2 = "000091000000SI",
++ xi_2 = "000097000000SI",
++ oi_2 = "000096000000SI",
+ -- SIY
+ tmy_2 = "eb0000000051SIY",
+ -- RXF
+
+From 660ddd1db2ad7a00d1a7f3be55cf98dd968b668a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 12:52:45 -0500
+Subject: [PATCH 220/260] Replace some instruction sequences with their SI
+ equivalents.
+
+---
+ src/vm_s390x.dasc | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 4c8e3a327..bb53757f6 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1395,9 +1395,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | checktptp L:TMPR2, LJ_TTHREAD, ->fff_fallback
+ |.endif
+ | ltg TMPR2, L:RB->cframe; jne ->fff_fallback
+- | // TODO: replace with cli.
+- | llgc TMPR1, L:RB->status
+- | cghi TMPR1, (uint8_t)LUA_YIELD; jh ->fff_fallback
++ | cli L:RB->status, LUA_YIELD; jh ->fff_fallback
+ | lg RA, L:RB->top
+ | je >1 // Status != LUA_YIELD (i.e. 0)?
+ | cg RA, L:RB->base // Check for presence of initial func.
+@@ -3250,8 +3248,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | n TMPR1, STR:RC->hash
+ | lgfr TMPR1, TMPR1
+ | mghi TMPR1, #NODE
+- | xr TMPR2, TMPR2
+- | stc TMPR2, TAB:RB->nomm // Clear metamethod cache.
++ | mvi TAB:RB->nomm, 0 // Clear metamethod cache.
+ | ag NODE:TMPR1, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+@@ -3596,8 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lghi TMPR2, BC_JMP
+ | stcy TMPR2, PC_OP
+ | branchPC RD
+- | lghi TMPR2, BC_ITERC
+- | stc TMPR2, 3(PC)
++ | mvi 3(PC), BC_ITERC
+ | j <1
+ break;
+
+
+From e933353feb13b26b437f7fbe5e02e7ccc7147be9 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 14:12:06 -0500
+Subject: [PATCH 221/260] Use execute rather than loop for mvc and avoid jumps
+ in fast path.
+
+Not sure if this works, the tests don't exercise the stack code.
+---
+ src/Makefile | 2 +-
+ src/vm_s390x.dasc | 58 ++++++++++++++++++++++++-----------------------
+ 2 files changed, 31 insertions(+), 29 deletions(-)
+
+diff --git a/src/Makefile b/src/Makefile
+index 1450adc03..a6723e232 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -56,7 +56,7 @@ CCOPT_mips=
+ #
+ CCDEBUG=
+ # Uncomment the next line to generate debug information:
+-#CCDEBUG= -g
++CCDEBUG= -g
+ #
+ CCWARN= -Wall
+ # Uncomment the next line to enable more warnings:
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index bb53757f6..f8be28478 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2142,47 +2142,26 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+- | .type CCSTATE, CCallState, r10
+- | stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate store?
++ | .type CCSTATE, CCallState, r8
++ | stmg r6, r15, 48(sp)
+ | lgr CCSTATE, CARG1
++ | lg r7, CCSTATE->func // TODO: move further up?
+ |
+ | // Readjust stack.
+ | sgf sp, CCSTATE->spadj
+ |
+ | // Copy stack slots.
+- | llgc r0, CCSTATE->nsp
+- | cghi r0, 0
+- | jle >3
+- | lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source.
+- | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
++ | llgc r1, CCSTATE->nsp
++ | chi r1, 0
++ | jh >2
+ |1:
+- | cghi r0, 256
+- | jl >2
+- | mvc 0(256, r11), 0(r1)
+- | aghi r1, 256*8
+- | aghi r11, 256*8
+- | aghi r0, -256
+- | j <1
+- |2:
+- | cghi r0, 0
+- | je >3
+- | // TODO: exrl mvc rather than loop.
+- | mvc 0(8, r11), 0(r1)
+- | aghi r1, 8
+- | aghi r11, 8
+- | aghi r0, -1
+- | j <2
+- |3:
+- |
+ | lmg CARG1, CARG5, CCSTATE->gpr[0]
+ | // TODO: conditionally load FPRs?
+ | ld FARG1, CCSTATE->fpr[0]
+ | ld FARG2, CCSTATE->fpr[1]
+ | ld FARG3, CCSTATE->fpr[2]
+ | ld FARG4, CCSTATE->fpr[3]
+- |5:
+- | lg r1, CCSTATE->func // TODO: move further up?
+- | basr r14, r1
++ | basr r14, r7
+ |
+ | stg CRET1, CCSTATE->gpr[0]
+ | stg f0, CCSTATE->fpr[0]
+@@ -2190,6 +2169,29 @@ static void build_subroutines(BuildCtx *ctx)
+ | agf sp, CCSTATE->spadj
+ | lmg r6, r15, 48(sp)
+ | br r14
++ |
++ |2:
++ | lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
++ | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
++ |3:
++ | chi r1, 256
++ | jl >4
++ | mvc 0(256, r11), 0(r10)
++ | la r10, 256*8(r10)
++ | la r11, 256*8(r11)
++ | ahi r1, -256
++ | j <3
++ |
++ |4:
++ | ahi r1, -1
++ | jl <1
++ | larl r9, >5
++ | ex r1, 0(r9) // TODO: exrl is faster but needs z10.
++ | j <1
++ |
++ |5:
++ | // exrl target
++ | mvc 0(1, r11), 0(r10)
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+
+From 86a24eac4affd8e61d0e7f7404a727b1196ac6f2 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 16:11:30 -0500
+Subject: [PATCH 222/260] Various fixes for FFI calls.
+
+Fixes the following scenarios:
+ * Returning floating point value.
+ * More than 4 GPR arguments.
+---
+ src/lj_ccall.c | 13 +++++++++++--
+ src/vm_s390x.dasc | 7 ++++---
+ 2 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/src/lj_ccall.c b/src/lj_ccall.c
+index a6b0a8fdb..00c1c7cf8 100644
+--- a/src/lj_ccall.c
++++ b/src/lj_ccall.c
+@@ -585,9 +585,9 @@
+
+ #define CCALL_HANDLE_REGARG \
+ if (isfp) { \
+- if (nfpr < maxgpr) { dp = &cc->fpr[nfpr++]; goto done; } \
++ if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \
+ } else { \
+- if (ngpr < CCALL_NARG_FPR) { dp = &cc->gpr[ngpr++]; goto done; } \
++ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
+ }
+
+ #else
+@@ -1074,6 +1074,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
+ }
+ #endif
++#if LJ_TARGET_S390X
++ /* Arguments need to be sign-/zero-extended to 64-bits. */
++ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) && d->size <= 4) {
++ if (d->info & CTF_UNSIGNED)
++ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
++ else
++ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
++ }
++#endif
+ #if LJ_TARGET_X64 && LJ_ABI_WIN
+ if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
+ if (nfpr == ngpr)
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f8be28478..f0289de60 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2164,21 +2164,22 @@ static void build_subroutines(BuildCtx *ctx)
+ | basr r14, r7
+ |
+ | stg CRET1, CCSTATE->gpr[0]
+- | stg f0, CCSTATE->fpr[0]
++ | std f0, CCSTATE->fpr[0]
+ |
+ | agf sp, CCSTATE->spadj
+ | lmg r6, r15, 48(sp)
+ | br r14
+ |
+ |2:
++ | sll r1, 3
+ | lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
+ | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
+ |3:
+ | chi r1, 256
+ | jl >4
+ | mvc 0(256, r11), 0(r10)
+- | la r10, 256*8(r10)
+- | la r11, 256*8(r11)
++ | la r10, 256(r10)
++ | la r11, 256(r11)
+ | ahi r1, -256
+ | j <3
+ |
+
+From 938f964b6ac2ea6d920f5046658a989e6d29fcef Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 16:56:43 -0500
+Subject: [PATCH 223/260] Fix single-precision floating point parameters passed
+ on stack.
+
+The opposite way round to the registers for some reason.
+---
+ src/lj_ccall.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/src/lj_ccall.c b/src/lj_ccall.c
+index 00c1c7cf8..87c4cac3e 100644
+--- a/src/lj_ccall.c
++++ b/src/lj_ccall.c
+@@ -1000,7 +1000,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+ CTypeID did;
+ CType *d;
+ CTSize sz;
+- MSize n, isfp = 0, isva = 0;
++ MSize n, isfp = 0, isva = 0, onstack = 0;
+ void *dp, *rp = NULL;
+
+ if (fid) { /* Get argument type from field. */
+@@ -1040,6 +1040,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+ CCALL_HANDLE_REGARG /* Handle register arguments. */
+
+ /* Otherwise pass argument on stack. */
++ onstack = 1;
+ if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
+ MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
+ nsp = (nsp + align) & ~align; /* Align argument on stack. */
+@@ -1076,8 +1077,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+ #endif
+ #if LJ_TARGET_S390X
+ /* Arguments need to be sign-/zero-extended to 64-bits. */
+- if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) && d->size <= 4) {
+- if (d->info & CTF_UNSIGNED)
++ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
++ (isfp && onstack)) && d->size <= 4) {
++ if (d->info & CTF_UNSIGNED || isfp)
+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
+ else
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+
+From db99c31890dbfffb163e6999a2cd0686e5adad16 Mon Sep 17 00:00:00 2001
+From: ketank-new <ketan22584@gmail.com>
+Date: Wed, 11 Jan 2017 17:04:09 +0530
+Subject: [PATCH 224/260] Added example for 'TM' instruction
+
+---
+ dynasm/Examples/test_z_inst.c | 21 ++++++++++++++++++++-
+ 1 file changed, 20 insertions(+), 1 deletion(-)
+
+diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
+index 8558aae42..4d6ee41ff 100644
+--- a/dynasm/Examples/test_z_inst.c
++++ b/dynasm/Examples/test_z_inst.c
+@@ -258,6 +258,24 @@ static void load_test(dasm_State *state)
+ }
+ */
+
++
++static void test_mask(dasm_State *state)
++{
++ dasm_State **Dst = &state;
++
++ |lay sp , -8(sp)
++ |stg r2, 4(sp)
++ |tm 4(sp),0x04
++ |je >2
++ |jne >1
++|1:
++ |ar r2,r3
++ |br r14
++|2:
++ |sr r2,r3
++ |br r14
++}
++
+ static void ssa(dasm_State *state) {
+ dasm_State **Dst = &state;
+
+@@ -428,7 +446,8 @@ test_table test[] = {
+ { 0, 0, 0, rsb, 0, "rsb"},
+ {12,10, 0, rre, 10, "rre"},
+ {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"},
+- {16,10, 0, rxf, 116, "rxf"}
++ {16,10, 0, rxf, 116, "rxf"},
++ { 4, 3, 0, test_mask, 1,"test_mask"}
+ };
+
+ static void *jitcode(dasm_State **state, size_t *size)
+
+From f660d36138b7550145ed87d34612fb03597d2db5 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 10 Jan 2017 17:09:59 -0500
+Subject: [PATCH 225/260] Fix ffi calls with complex parameters.
+
+---
+ src/lj_ccall.c | 15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+diff --git a/src/lj_ccall.c b/src/lj_ccall.c
+index 87c4cac3e..6c8d89692 100644
+--- a/src/lj_ccall.c
++++ b/src/lj_ccall.c
+@@ -563,11 +563,12 @@
+ cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
+ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+-#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
++#define CCALL_HANDLE_COMPLEXRET \
++ cc->retref = 1; /* Return all complex values by reference. */ \
++ cc->gpr[ngpr++] = (GPRArg)dp;
+
+ #define CCALL_HANDLE_COMPLEXRET2 \
+- if (!cc->retref) \
+- *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */
++ UNUSED(dp); /* Nothing to do. */
+
+ #define CCALL_HANDLE_STRUCTARG \
+ /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
+@@ -577,11 +578,9 @@
+ }
+
+ #define CCALL_HANDLE_COMPLEXARG \
+- /* Pass complex float in a GPR and complex double by reference. */ \
+- if (sz != 2*sizeof(float)) { \
+- rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+- sz = CTSIZE_PTR; \
+- }
++ /* Pass complex numbers by reference. */ \
++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
++ sz = CTSIZE_PTR; \
+
+ #define CCALL_HANDLE_REGARG \
+ if (isfp) { \
+
+From 5c697cc772efe56f1471dca5326e53e571fa8aee Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 11 Jan 2017 12:13:12 -0500
+Subject: [PATCH 226/260] Fix ffi calls returning structs.
+
+---
+ src/lj_ccall.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/lj_ccall.c b/src/lj_ccall.c
+index 6c8d89692..ec3ef73eb 100644
+--- a/src/lj_ccall.c
++++ b/src/lj_ccall.c
+@@ -559,9 +559,8 @@
+ /* -- POSIX/s390x calling conventions --------------------------------------- */
+
+ #define CCALL_HANDLE_STRUCTRET \
+- /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \
+- cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
+- if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
++ cc->retref = 1; /* Return all structs by reference. */ \
++ cc->gpr[ngpr++] = (GPRArg)dp;
+
+ #define CCALL_HANDLE_COMPLEXRET \
+ cc->retref = 1; /* Return all complex values by reference. */ \
+@@ -579,6 +578,7 @@
+
+ #define CCALL_HANDLE_COMPLEXARG \
+ /* Pass complex numbers by reference. */ \
++ /* TODO: not sure why this is different to structs. */ \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; \
+
+
+From 65906fbc49105dcb4819c0c311ff43858b920e1b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 11 Jan 2017 12:46:30 -0500
+Subject: [PATCH 227/260] Fix typo in lj_ccallback.h
+
+For future reference only, we aren't using this bit of code yet.
+---
+ src/lj_ccallback.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
+index 780400573..f7be343fb 100644
+--- a/src/lj_ccallback.c
++++ b/src/lj_ccallback.c
+@@ -499,9 +499,9 @@ void lj_ccallback_mcode_free(CTState *cts)
+
+ #define CALLBACK_HANDLE_REGARG \
+ if (isfp) { \
+- if (nfpr < maxgpr) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
++ if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
+ } else { \
+- if (ngpr < CCALL_NARG_FPR) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
++ if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
+ }
+
+ #else
+
+From 084ab262803fb76f087f5707c1aefbf9bde4eb1a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 11 Jan 2017 14:42:24 -0500
+Subject: [PATCH 228/260] Delete LREG and replace it's uses with RB.
+
+This frees up a register and fixes a bug where RB was not loaded
+correctly into LREG.
+---
+ src/vm_s390x.dasc | 33 +++++++++++++++------------------
+ 1 file changed, 15 insertions(+), 18 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f0289de60..0e9709ff8 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -41,8 +41,7 @@
+ |.define KBASE, r8 // Constants of current Lua function.
+ |.define PC, r9 // Next PC.
+ |.define DISPATCH, r10 // Opcode dispatch table.
+-|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
+-|.define ITYPE, r13 //
++|.define ITYPE, r11 //
+ |
+ |// The following temporaries are not saved across C calls, except for RD.
+ |.define RA, r1
+@@ -368,7 +367,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vm_leave_cp:
+ | lg RA, SAVE_CFRAME // Restore previous C frame.
+- | stg RA, L:LREG->cframe
++ | stg RA, L:RB->cframe
+ | lghi CRET1, 0 // Ok return status for vm_pcall.
+ |
+ |->vm_leave_unw:
+@@ -498,7 +497,6 @@ static void build_subroutines(BuildCtx *ctx)
+ | st RD, SAVE_NRES
+ | stg RD, SAVE_ERRF
+ | stg KBASE, L:RB->cframe
+- | lgr LREG, L:RB
+ | clm RD, 1, L:RB->status
+ | je >2 // Initial resume (like a call).
+ |
+@@ -543,16 +541,15 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
+ | aghi DISPATCH, GG_G2DISP
+ | stg sp, L:RB->cframe
+- | lgr L:LREG, L:RB // TODO: use RB instead of LREG here?
+ |
+- |2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
+- | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
++ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
++ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+ | set_vmstate INTERP
+- | lg BASE, L:LREG->base // BASE = old base (used in vmeta_call).
++ | lg BASE, L:RB->base // BASE = old base (used in vmeta_call).
+ | agr PC, RA
+ | sgr PC, BASE // PC = frame delta + frame type
+ |
+- | lg RD, L:LREG->top
++ | lg RD, L:RB->top
+ | sgr RD, RA
+ | srlg NARGS:RD, NARGS:RD, 3
+ | aghi NARGS:RD, 1 // RD = nargs+1
+@@ -569,23 +566,23 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+- | lgr LREG, CARG1
+- | stg LREG, SAVE_L
+- | stg LREG, SAVE_PC // Any value outside of bytecode is ok.
++ | lgr L:RB, CARG1
++ | stg L:RB, SAVE_L
++ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
+ |
+- | lg KBASE, L:LREG->stack // Compute -savestack(L, L->top).
+- | sg KBASE, L:LREG->top
+- | lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
++ | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
++ | sg KBASE, L:RB->top
++ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | lghi RA, 0
+ | stg RA, SAVE_ERRF // No error function.
+ | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | aghi DISPATCH, GG_G2DISP
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+ |
+- | lg KBASE, L:LREG->cframe // Add our C frame to cframe chain.
++ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | stg KBASE, SAVE_CFRAME
+- | stg sp, L:LREG->cframe
+- | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
++ | stg sp, L:RB->cframe
++ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
+ |
+ | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | // TValue * (new base) or NULL returned in r2 (CRET1/).
+
+From c8db1b960caf9947991dc65346e841033bb5bc55 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 11 Jan 2017 16:16:51 -0500
+Subject: [PATCH 229/260] Clean up register allocations and comments slightly.
+
+Move RB from r12 to r13 so that it no longer overlaps with the
+GOT pointer (to avoid potential problems with PIC compilation).
+---
+ src/vm_s390x.dasc | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 0e9709ff8..404c4b325 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -8,8 +8,8 @@
+ |// r3-r5 | parameter | volatile |
+ |// r6 | parameter | saved |
+ |// r7-r11 | | saved |
+-|// r12 | GOT pointer (needed?) | saved |
+-|// r13 | literal pool (needed?) | saved |
++|// r12 | GOT pointer (needed?) | saved |
++|// r13 | literal pool (not needed) | saved |
+ |// r14 | return address | volatile |
+ |// r15 | stack pointer | saved |
+ |// f0,f2,f4,f6 | parameter and return value | volatile |
+@@ -41,13 +41,13 @@
+ |.define KBASE, r8 // Constants of current Lua function.
+ |.define PC, r9 // Next PC.
+ |.define DISPATCH, r10 // Opcode dispatch table.
+-|.define ITYPE, r11 //
++|.define ITYPE, r11 // Temporary used for type information.
+ |
+-|// The following temporaries are not saved across C calls, except for RD.
++|// The following temporaries are not saved across C calls, except for RB.
+ |.define RA, r1
+-|.define RB, r12
++|.define RB, r13 // Must be callee-save.
+ |.define RC, r5 // Overlaps CARG4.
+-|.define RD, r6 // Overlaps CARG5. Callee-saved.
++|.define RD, r6 // Overlaps CARG5.
+ |
+ |// Calling conventions. Also used as temporaries.
+ |.define CARG1, r2
+@@ -484,7 +484,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+- | lgr L:RB, CARG1 // Caveat: CARG1 may be RA.
++ | lgr L:RB, CARG1
+ | stg CARG1, SAVE_L
+ | lgr RA, CARG2
+ | lghi PC, FRAME_CP
+@@ -752,7 +752,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | la RB, 0(RB, BASE)
+ |2:
+ | lg L:CARG1, SAVE_L
+- | stg BASE, L:CARG1->base // Caveat: CARG2/CARG3 may be BASE.
++ | stg BASE, L:CARG1->base
+ | lgr CARG2, RB
+ | lgr CARG3, RC
+ | lgr L:RB, L:CARG1
+@@ -4071,7 +4071,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lay RA, (8*LUA_MINSTACK)(RD)
+ | clg RA, L:RB->maxstack
+ | stg RD, L:RB->top
+- | lgr CARG1, L:RB // Caveat: CARG1 may be RA.
++ | lgr CARG1, L:RB
+ if (op != BC_FUNCC) {
+ | lgr CARG2, KBASE
+ }
+
+From a3bb1cee5d2b7c9b1ea1cfe4c89fe39f77a5e64e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 11 Jan 2017 16:38:35 -0500
+Subject: [PATCH 230/260] Add and use branch on count instructions where
+ possible.
+
+---
+ dynasm/dasm_s390x.lua | 2 ++
+ src/vm_s390x.dasc | 24 ++++++------------------
+ 2 files changed, 8 insertions(+), 18 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index bff135b30..8bf7084d6 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1240,6 +1240,8 @@ map_op = {
+ mghi_2 = "0000a70d0000RI-a",
+ -- RI-b mode instructions
+ bras_2 = "0000a7050000RI-b",
++ brct_2 = "0000a7060000RI-b",
++ brctg_2 = "0000a7070000RI-b",
+ -- RI-c mode instructions
+ brc_2 = "0000a7040000RI-c",
+ -- RIL-c
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 404c4b325..918a52f76 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1085,9 +1085,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | la RA, 8(RA)
+ | lg RB, 0(RA)
+ | stg RB, -16(RA)
+- | ahi RD, -1
+- | jne <1
+- | // TODO: replace with branch on count (brctg).
++ | brct RD, <1
+ |2:
+ | llgf RD, SAVE_MULTRES
+ | j ->fff_res_
+@@ -3392,9 +3390,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | la RA, 8(RA)
+ | stg RB, 0(TMPR1)
+ | la TMPR1, 8(TMPR1)
+- | aghi RD, -1
+- | jne <3
+- | // TODO: replace decrement/branch with branch on count.
++ | brctg RD, <3
+ |4:
+ | ins_next
+ |
+@@ -3458,9 +3454,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | la RA, 8(RA)
+ | stg RB, 0(KBASE)
+ | la KBASE, 8(KBASE)
+- | // TODO: replace decrement/branch with brctg
+- | aghi NARGS:RD, -1
+- | jne <2
++ | brctg NARGS:RD, <2
+ |
+ | lg LFUNC:RB, -16(BASE)
+ |3:
+@@ -3698,9 +3692,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg RB, 0(KBASE, RA)
+ | stg RB, -16(KBASE)
+ | la KBASE, 8(KBASE)
+- | // TODO: replace with brctg RD, <2 once supported.
+- | aghi RD, -1
+- | jne <2
++ | brctg RD, <2
+ |3:
+ | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256.
+ | llgc RB, PC_RB
+@@ -4037,9 +4029,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg KBASE, 0(RD)
+ | la RD, 8(RD)
+ | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC).
+- | aghi RB, -1
+- | jne <1
+- | // TODO: brctg instead of decrement/branch
++ | brctg RB, <1
+ |2:
+ if (op == BC_JFUNCV) {
+ | llgh RD, PC_RD
+@@ -4052,9 +4042,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |3: // Clear missing parameters.
+ | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here.
+ | la RD, 8(RD)
+- | aghi RB, -1
+- | jne <3
+- | // TODO: brctg instead of decrement/branch
++ | brctg RB, <3
+ | j <2
+ break;
+
+
+From 9e9a1cf0a5b06dfbafa2ba731774588a141fa021 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 11 Jan 2017 16:55:50 -0500
+Subject: [PATCH 231/260] Replace lay with la where possible.
+
+---
+ src/vm_s390x.dasc | 29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 918a52f76..dee93c6ab 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -22,6 +22,7 @@
+ |// clfi (compare logical immediate) [requires z9-109]
+ |// ldgr (load FPR from GPR) [requires z9-109 GA3]
+ |// lgdr (load GPR from FPR) [requires z9-109 GA3]
++|// lay (load address)  [requires z900 GA2]
+ |// ldy (load (long bfp)) [requires z900 GA2]
+ |// stdy (store (long bfp)) [requires z900 GA2]
+ |// TODO: alternative instructions?
+@@ -390,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | je <5 // But check for LUA_MULTRET+1.
+ | sgr RA, RD // Negative result!
+ | sllg TMPR1, RA, 3
+- | lay BASE, 0(TMPR1, BASE) // Correct top.
++ | la BASE, 0(TMPR1, BASE) // Correct top.
+ | j <5
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+@@ -430,7 +431,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lghi RD, 1+1 // Really 1+2 results, incr. later.
+ | lg BASE, L:RB->base
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+- | lay DISPATCH, GG_G2DISP(DISPATCH)
++ | la DISPATCH, GG_G2DISP(DISPATCH)
+ | lg PC, -8(BASE) // Fetch PC of previous frame.
+ | load_false RA
+ | lg RB, 0(BASE)
+@@ -489,7 +490,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr RA, CARG2
+ | lghi PC, FRAME_CP
+ | lghi RD, 0
+- | lay KBASE, CFRAME_RESUME(sp)
++ | la KBASE, CFRAME_RESUME(sp)
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | aghi DISPATCH, GG_G2DISP
+ | stg RD, SAVE_PC // Any value outside of bytecode is ok.
+@@ -701,7 +702,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | lg RA, L:RB->top
+ | stg PC, -24(RA) // [cont|PC]
+- | lay PC, FRAME_CONT(RA)
++ | la PC, FRAME_CONT(RA)
+ | sgr PC, BASE
+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
+ | lghi NARGS:RD, 2+1 // 2 args for func(t, k).
+@@ -892,8 +893,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vmeta_arith_vn:
+ | sllg RB, RB, 3
+ | sllg RC, RC, 3
+- | lay RB, 0(RB, BASE)
+- | lay RC, 0(RC, KBASE)
++ | la RB, 0(RB, BASE)
++ | la RC, 0(RC, KBASE)
+ | j >1
+ |
+ |->vmeta_arith_nvo:
+@@ -902,8 +903,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vmeta_arith_nv:
+ | sllg RC, RC, 3
+ | sllg RB, RB, 3
+- | lay TMPR1, 0(RC, KBASE)
+- | lay RC, 0(RB, BASE)
++ | la TMPR1, 0(RC, KBASE)
++ | la RC, 0(RB, BASE)
+ | lgr RB, TMPR1
+ | j >1
+ |
+@@ -920,12 +921,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vmeta_arith_vv:
+ | sllg RC, RC, 3
+ | sllg RB, RB, 3
+- | lay RB, 0(RB, BASE)
+- | lay RC, 0(RC, BASE)
++ | la RB, 0(RB, BASE)
++ | la RC, 0(RC, BASE)
+ |1:
+ | llgc RA, PC_RA
+ | sllg RA, RA, 3
+- | lay RA, 0(RA, BASE)
++ | la RA, 0(RA, BASE)
+ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
+ | lgr CARG2, RA
+ | lgr CARG3, RB
+@@ -2167,8 +2168,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |2:
+ | sll r1, 3
+- | lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
+- | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
++ | la r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
++ | la r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
+ |3:
+ | chi r1, 256
+ | jl >4
+@@ -4056,7 +4057,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | sllg RD, NARGS:RD, 3
+ | lay RD, -8(RD,BASE)
+ | stg BASE, L:RB->base
+- | lay RA, (8*LUA_MINSTACK)(RD)
++ | la RA, (8*LUA_MINSTACK)(RD)
+ | clg RA, L:RB->maxstack
+ | stg RD, L:RB->top
+ | lgr CARG1, L:RB
+
+From a9d61d0044ca017559ec6fab8b359ddff8e9f24a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Thu, 12 Jan 2017 14:29:55 -0500
+Subject: [PATCH 232/260] Implement debug.sethook().
+
+---
+ src/lj_frame.h | 7 +++-
+ src/vm_s390x.dasc | 81 +++++++++++++++++++++++++++++++++++++++--------
+ 2 files changed, 74 insertions(+), 14 deletions(-)
+
+diff --git a/src/lj_frame.h b/src/lj_frame.h
+index 0b90f1421..a8148150a 100644
+--- a/src/lj_frame.h
++++ b/src/lj_frame.h
+@@ -272,7 +272,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+ #define CFRAME_OFS_PC 168
+ #define CFRAME_OFS_MULTRES 160
+ #define CFRAME_SIZE 240
+-#define CFRAME_SHIFT_MULTRES 3
++/*
++** TODO: it would be good if we always decoded param*8 like
++** the RISC architectures do. If so then SHIFT_MULTRES will
++** need to change to 3.
++*/
++#define CFRAME_SHIFT_MULTRES 0
+ #else
+ #error "Missing CFRAME_* definitions for this architecture"
+ #endif
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index dee93c6ab..b049a6c7a 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -188,12 +188,12 @@
+ |.macro ins_callt
+ | // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
+ | lg PC, LFUNC:RB->pc
+-| llgf RA, 0(PC) // TODO: combine loads?
+-| llgcr OP, RA
+-| sllg TMPR1, OP, 3
++| llgc OP, 3(PC)
++| llgc RA, 2(PC)
++| sllg TMPR1, OP, 3
+ | la PC, 4(PC)
+-| lg TMPR1, 0(TMPR1, DISPATCH)
+-| br TMPR1
++| lg TMPR1, 0(TMPR1, DISPATCH)
++| br TMPR1
+ |.endmacro
+ |
+ |.macro ins_call
+@@ -2044,8 +2044,35 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg r0, 0
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+- | stg r0, 0
+- | stg r0, 0
++ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
++ | tmll RD, HOOK_ACTIVE // Hook already active?
++ | jne >5
++ |
++ | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT
++ | je >5
++ | ly TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
++ | ahi TMPR2, -1
++ | sty TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
++ | je >1
++ | tmll RD, LUA_MASKLINE
++ | je >5
++ |1:
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | lgr CARG2, PC
++ | lgr CARG1, L:RB
++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
++ | brasl r14, extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
++ |3:
++ | lg BASE, L:RB->base
++ |4:
++ | llgc RA, PC_RA
++ |5:
++ | llgc OP, PC_OP
++ | sllg TMPR1, OP, 3
++ | llgh RD, PC_RD
++ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH)
++ | br TMPR1
+ |
+ |->cont_hook: // Continue from hook yield.
+ | stg r0, 0
+@@ -2056,12 +2083,40 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg r0, 0
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+- | stg r0, 0
+- | stg r0, 0
++ | stg PC, SAVE_PC
++ |.if JIT
++ | j >1
++ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+- | stg r0, 0
+- | stg r0, 0
++ |.if JIT
++ | stg PC, SAVE_PC
++ | oill PC, 1 // Marker for hot call.
++ |1:
++ |.endif
++ | sllg RD, NARGS:RD, 3
++ | lay RD, -8(RD, BASE)
++ | lg L:RB, SAVE_L
++ | stg BASE, L:RB->base
++ | stg RD, L:RB->top
++ | lgr CARG2, PC
++ | lgr CARG1, L:RB
++ | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
++ | // ASMFunction returned in r2 (CRET1).
++ | lghi TMPR2, 0
++ | stg TMPR2, SAVE_PC // Invalidate for subsequent line hook.
++ |.if JIT
++ | nill PC, -2
++ |.endif
++ | lg BASE, L:RB->base
++ | lg RD, L:RB->top
++ | sgr RD, BASE
++ | lgr RB, CRET1
++ | llgc RA, PC_RA
++ | srl RD, 3
++ | ahi NARGS:RD, 1
++ | llgfr RD, RD
++ | br RB
+ |
+ |->cont_stitch: // Trace stitching.
+ | stg r0, 0
+@@ -3422,7 +3477,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | agf NARGS:RD, SAVE_MULTRES
+ }
+ | sllg RA, RA, 3
+- | lg LFUNC:RB, 0(BASE, RA)
++ | lg LFUNC:RB, 0(RA, BASE)
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
+ | la BASE, 16(RA, BASE)
+ | ins_call
+@@ -3659,7 +3714,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+- | llgf TMPR1, TMP_STACK_HI
++ | lgf TMPR1, TMP_STACK_HI
+ | lg RA, L:RB->top
+ | agr TMPR1, BASE
+ | j <6
+
+From 70b5f6b966a185e966b4e021d51bb597812cd6f2 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 16 Jan 2017 16:11:24 -0500
+Subject: [PATCH 233/260] Make TMPR1 r1 and RA r4.
+
+This avoids using the link register as a temporary.
+---
+ src/vm_s390x.dasc | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index b049a6c7a..3ab8904cc 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -45,7 +45,7 @@
+ |.define ITYPE, r11 // Temporary used for type information.
+ |
+ |// The following temporaries are not saved across C calls, except for RB.
+-|.define RA, r1
++|.define RA, r4 // Overlaps CARG3.
+ |.define RB, r13 // Must be callee-save.
+ |.define RC, r5 // Overlaps CARG4.
+ |.define RD, r6 // Overlaps CARG5.
+@@ -65,7 +65,7 @@
+ |.define CRET1, r2
+ |
+ |.define OP, r2
+-|.define TMPR1, r14
++|.define TMPR1, r1
+ |.define TMPR2, r0
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+@@ -534,7 +534,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | st CARG3, SAVE_NRES
+ | lgr L:RB, CARG1
+ | stg CARG1, SAVE_L
+- | lgr RA, CARG2
++ | lgr RA, CARG2 // Caveat: RA = CARG3.
+ |
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
+@@ -574,8 +574,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
+ | sg KBASE, L:RB->top
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+- | lghi RA, 0
+- | stg RA, SAVE_ERRF // No error function.
++ | lghi TMPR2, 0
++ | stg TMPR2, SAVE_ERRF // No error function.
+ | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | aghi DISPATCH, GG_G2DISP
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+@@ -644,9 +644,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | srlg RA, RA, 3
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+- | lgfr CARG3, RA
+- | lg RA, 0(RC)
+- | stg RA, 0(RB)
++ | lgfr CARG3, RA // Caveat: RA == CARG3.
++ | lg TMPR2, 0(RC)
++ | stg TMPR2, 0(RB)
+ | lgr CARG2, RB
+ | j ->BC_CAT_Z
+ |
+@@ -811,7 +811,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | la CARG2, 0(RA, BASE)
+- | la CARG3, 0(RD, BASE)
++ | la CARG3, 0(RD, BASE) // Caveat: RA == CARG3
+ | lgr CARG1, L:RB
+ | llgc CARG4, PC_OP
+ | stg PC, SAVE_PC
+@@ -878,7 +878,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+ | llgfr CARG2, RA
+- | llgfr CARG3, RD
++ | llgfr CARG3, RD // Caveat: CARG3 == RA.
+ | lgr L:CARG1, L:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+@@ -929,7 +929,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | la RA, 0(RA, BASE)
+ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
+ | lgr CARG2, RA
+- | lgr CARG3, RB
++ | lgr CARG3, RB // Caveat: CARG3 == RA.
+ | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out).
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+@@ -988,7 +988,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg BASE, L:CARG1->base
+ | lay CARG2, -16(RA)
+ | sllg RD, RD, 3
+- | lay CARG3, -8(RA, RD)
++ | lay CARG3, -8(RA, RD) // Caveat: CARG3 == RA.
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | lgr RA, RB
+
+From 828bf8499908771a95f26745492edce98b21040b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 16 Jan 2017 16:14:51 -0500
+Subject: [PATCH 234/260] Rename TMPR2 as TMPR0.
+
+TMPR2 was r0 and so cannot be used in address calculations (or
+shift values). Renaming it TMPR0 makes this more obvious.
+---
+ src/vm_s390x.dasc | 234 +++++++++++++++++++++++-----------------------
+ 1 file changed, 117 insertions(+), 117 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 3ab8904cc..e43e77453 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -64,9 +64,9 @@
+ |
+ |.define CRET1, r2
+ |
+-|.define OP, r2
++|.define TMPR0, r0
+ |.define TMPR1, r1
+-|.define TMPR2, r0
++|.define OP, r2
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+ |.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
+@@ -272,10 +272,10 @@
+ |.macro branchPC, reg
+ | // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
+ | // Can't clobber TMPR1 or condition code.
+-| lgr TMPR2, TMPR1 // Workaround because TMPR2 == r0 and can't be used in lay.
++| lgr TMPR0, TMPR1 // Workaround because TMPR0 == r0 and can't be used in lay.
+ | sllg TMPR1, reg, 2
+ | lay PC, (-BCBIAS_J*4)(TMPR1, PC)
+-| lgr TMPR1, TMPR2
++| lgr TMPR1, TMPR0
+ |.endmacro
+ |
+ |// Set current VM state.
+@@ -574,8 +574,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
+ | sg KBASE, L:RB->top
+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+- | lghi TMPR2, 0
+- | stg TMPR2, SAVE_ERRF // No error function.
++ | lghi TMPR0, 0
++ | stg TMPR0, SAVE_ERRF // No error function.
+ | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | aghi DISPATCH, GG_G2DISP
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+@@ -606,8 +606,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr RB, BASE
+ | sgr BASE, PC // Restore caller BASE.
+ | sllg TMPR1, RD, 3
+- | lghi TMPR2, LJ_TNIL
+- | stg TMPR2, -8(RA, TMPR1) // Ensure one valid arg.
++ | lghi TMPR0, LJ_TNIL
++ | stg TMPR0, -8(RA, TMPR1) // Ensure one valid arg.
+ | lgr RC, RA // ... in [RC]
+ | lg PC, -24(RB) // Restore PC from [cont|PC].
+ | lg RA, -32(RB)
+@@ -645,8 +645,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+ | lgfr CARG3, RA // Caveat: RA == CARG3.
+- | lg TMPR2, 0(RC)
+- | stg TMPR2, 0(RB)
++ | lg TMPR0, 0(RC)
++ | stg TMPR0, 0(RB)
+ | lgr CARG2, RB
+ | j ->BC_CAT_Z
+ |
+@@ -728,8 +728,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+ | stg STR:RC, TMP_STACK
+ | la RC, TMP_STACK
+- | llgc TMPR2, PC_OP
+- | cghi TMPR2, BC_GSET
++ | llgc TMPR0, PC_OP
++ | cghi TMPR0, BC_GSET
+ | jne >1
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
+ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
+@@ -835,16 +835,16 @@ static void build_subroutines(BuildCtx *ctx)
+ | la PC, 4(PC)
+ | lg ITYPE, 0(RC)
+ | srag ITYPE, ITYPE, 47
+- | lghi TMPR2, LJ_TISTRUECOND
+- | clr ITYPE, TMPR2 // Branch if result is true.
++ | lghi TMPR0, LJ_TISTRUECOND
++ | clr ITYPE, TMPR0 // Branch if result is true.
+ | jl <5
+ | j <6
+ |
+ |->cont_condf: // BASE = base, RC = result
+ | lg ITYPE, 0(RC)
+ | srag ITYPE, ITYPE, 47
+- | lghi TMPR2, LJ_TISTRUECOND
+- | clr ITYPE, TMPR2 // Branch if result is false.
++ | lghi TMPR0, LJ_TISTRUECOND
++ | clr ITYPE, TMPR0 // Branch if result is false.
+ | j <4
+ |
+ |->vmeta_equal:
+@@ -1041,8 +1041,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc_n, name, op
+ | .ffunc_1 name
+- | lg TMPR2, 0(BASE)
+- | checknumtp TMPR2, ->fff_fallback
++ | lg TMPR0, 0(BASE)
++ | checknumtp TMPR0, ->fff_fallback
+ | op f0, 0(BASE) // TODO: might be better to unconditionally load into f1.
+ |.endmacro
+ |
+@@ -1053,11 +1053,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |.macro .ffunc_nn, name
+ | .ffunc_2 name
+ | lg TMPR1, 0(BASE)
+- | lg TMPR2, 8(BASE)
++ | lg TMPR0, 8(BASE)
+ | ld FARG1, 0(BASE)
+ | ld FARG2, 8(BASE)
+ | checknumtp TMPR1, ->fff_fallback
+- | checknumtp TMPR2, ->fff_fallback
++ | checknumtp TMPR0, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+@@ -1099,8 +1099,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | jnl >1
+ | lgr RC, RB
+ |1:
+- | lghi TMPR2, -1
+- | xgr RC, TMPR2
++ | lghi TMPR0, -1
++ | xgr RC, TMPR0
+ |2:
+ | lg CFUNC:RB, -16(BASE)
+ | cleartp CFUNC:RB
+@@ -1120,8 +1120,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | lg TAB:RB, TAB:RB->metatable
+ |2:
+- | lghi TMPR2, LJ_TNIL
+- | stg TMPR2, -16(BASE)
++ | lghi TMPR0, LJ_TNIL
++ | stg TMPR0, -16(BASE)
+ | cghi TAB:RB, 0
+ | je ->fff_res1
+ | settp TAB:RC, TAB:RB, LJ_TTAB
+@@ -1150,8 +1150,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | clfi ITYPE, LJ_TISNUM; jh >7
+ | lhi ITYPE, LJ_TISNUM
+ |7:
+- | lhi TMPR2, -1
+- | xr ITYPE, TMPR2 // not ITYPE
++ | lhi TMPR0, -1
++ | xr ITYPE, TMPR0 // not ITYPE
+ | llgfr ITYPE, ITYPE
+ | sllg ITYPE, ITYPE, 3
+ | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
+@@ -1162,8 +1162,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr TAB:TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+ | // Fast path: no mt for table yet and not clearing the mt.
+- | lghi TMPR2, 0
+- | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
++ | lghi TMPR0, 0
++ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+ | lg TAB:RA, 8(BASE)
+ | checktab TAB:RA, ->fff_fallback
+ | stg TAB:RA, TAB:RB->metatable
+@@ -1213,8 +1213,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_res1
+ |3: // Handle numbers inline, unless a number base metatable is present.
+ | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
+- | lghi TMPR2, 0
+- | cg TMPR2, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
++ | lghi TMPR0, 0
++ | cg TMPR0, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
+ | jne ->fff_fallback
+ | ffgccheck // Caveat: uses label 1.
+ | lg L:RB, SAVE_L
+@@ -1256,12 +1256,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | lghi RD, 1+2
+ | j ->fff_res
+ |2: // Set missing 2nd arg to nil.
+- | lghi TMPR2, LJ_TNIL
+- | stg TMPR2, 8(BASE)
++ | lghi TMPR0, LJ_TNIL
++ | stg TMPR0, 8(BASE)
+ | j <1
+ |3: // End of traversal: return nil.
+- | lghi TMPR2, LJ_TNIL
+- | stg TMPR2, -16(BASE)
++ | lghi TMPR0, LJ_TNIL
++ | stg TMPR0, -16(BASE)
+ | j ->fff_res1
+ |
+ |.ffunc_1 pairs
+@@ -1269,7 +1269,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+ #if LJ_52
+- | ltg TMPR2, TAB:RB->metatable; jne ->fff_fallback
++ | ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+ #endif
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+@@ -1278,8 +1278,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg PC, -8(BASE)
+ | stg CFUNC:RD, -16(BASE)
+ | stg TMPR1, -8(BASE)
+- | lghi TMPR2, LJ_TNIL
+- | stg TMPR2, 0(BASE)
++ | lghi TMPR0, LJ_TNIL
++ | stg TMPR0, 0(BASE)
+ | lghi RD, 1+3
+ | j ->fff_res
+ |
+@@ -1298,13 +1298,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | sllg TMPR1, TMPR1, 3
+ | la RD, 0(TMPR1, RD)
+ |1:
+- | lg TMPR2, 0(RD)
+- | cghi TMPR2, LJ_TNIL; je ->fff_res0
++ | lg TMPR0, 0(RD)
++ | cghi TMPR0, LJ_TNIL; je ->fff_res0
+ | // Copy array slot.
+- | stg TMPR2, -8(BASE)
++ | stg TMPR0, -8(BASE)
+ | j ->fff_res2
+ |2: // Check for empty hash part first. Otherwise call C function.
+- | lt TMPR2, TAB:RB->hmask; je ->fff_res0
++ | lt TMPR0, TAB:RB->hmask; je ->fff_res0
+ | lgr CARG1, TAB:RB
+ | lgr RB, BASE // Save BASE. // TODO: needed?
+ | lgfr CARG2, RA
+@@ -1322,8 +1322,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr TMPR1, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+ #if LJ_52
+- | lghi TMPR2, 0
+- | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
++ | lghi TMPR0, 0
++ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
+ #endif
+ | lg CFUNC:RD, -16(BASE)
+ | cleartp CFUNC:RD
+@@ -1375,7 +1375,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | lg L:RB, 0(BASE)
+- | lgr L:TMPR2, L:RB // Save type for checktptp.
++ | lgr L:TMPR0, L:RB // Save type for checktptp.
+ | cleartp L:RB
+ |.else
+ |.ffunc coroutine_wrap_aux
+@@ -1388,9 +1388,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg PC, SAVE_PC
+ | stg L:RB, TMP_STACK
+ |.if resume
+- | checktptp L:TMPR2, LJ_TTHREAD, ->fff_fallback
++ | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback
+ |.endif
+- | ltg TMPR2, L:RB->cframe; jne ->fff_fallback
++ | ltg TMPR0, L:RB->cframe; jne ->fff_fallback
+ | cli L:RB->status, LUA_YIELD; jh ->fff_fallback
+ | lg RA, L:RB->top
+ | je >1 // Status != LUA_YIELD (i.e. 0)?
+@@ -1523,8 +1523,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc coroutine_yield
+ | lg L:RB, SAVE_L
+- | lg TMPR2, L:RB->cframe
+- | tmll TMPR2, CFRAME_RESUME
++ | lg TMPR0, L:RB->cframe
++ | tmll TMPR0, CFRAME_RESUME
+ | je ->fff_fallback
+ | stg BASE, L:RB->base
+ | sllg RD, NARGS:RD, 3
+@@ -1585,8 +1585,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |6: // Fill up results with nil.
+ | sllg TMPR1, RD, 3
+- | lghi TMPR2, LJ_TNIL
+- | stg TMPR2, -24(TMPR1, BASE)
++ | lghi TMPR0, LJ_TNIL
++ | stg TMPR0, -24(TMPR1, BASE)
+ | la RD, 1(RD)
+ | j <5
+ |
+@@ -1612,9 +1612,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc math_log
+ | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
+- | lg TMPR2, 0(BASE)
++ | lg TMPR0, 0(BASE)
+ | ld FARG1, 0(BASE)
+- | checknumtp TMPR2, ->fff_fallback
++ | checknumtp TMPR0, ->fff_fallback
+ | lgr RB, BASE
+ | brasl r14, extern log
+ | lgr BASE, RB
+@@ -1652,10 +1652,10 @@ static void build_subroutines(BuildCtx *ctx)
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
+- | lg TMPR2, 0(BASE)
++ | lg TMPR0, 0(BASE)
+ | ld FARG1, 0(BASE)
+ | lg CARG1, 8(BASE)
+- | checknumtp TMPR2, ->fff_fallback
++ | checknumtp TMPR0, ->fff_fallback
+ | checkinttp CARG1, ->fff_fallback
+ | lgfr CARG1, CARG1
+ | lgr RB, BASE
+@@ -1695,11 +1695,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | checkint RB, >4
+ |1: // Handle integers.
+ | clgr RA, TMPR1; jhe ->fff_resRB
+- | lg TMPR2, -8(RA, BASE)
+- | checkint TMPR2, >3
+- | cr RB, TMPR2
++ | lg TMPR0, -8(RA, BASE)
++ | checkint TMPR0, >3
++ | cr RB, TMPR0
+ | cjmp >2
+- | lgr RB, TMPR2
++ | lgr RB, TMPR0
+ |2:
+ | aghi RA, 8
+ | j <1
+@@ -1707,7 +1707,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | jh ->fff_fallback
+ | // Convert intermediate result to number and continue below.
+ | cdfbr f0, RB
+- | ldgr f1, TMPR2
++ | ldgr f1, TMPR0
+ | j >6
+ |4:
+ | jh ->fff_fallback
+@@ -1737,7 +1737,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg STR:RB, 0(BASE)
+ | checkstr STR:RB, ->fff_fallback
+ | lg PC, -8(BASE)
+- | ltg TMPR2, STR:RB->len
++ | ltg TMPR0, STR:RB->len
+ | je ->fff_res0 // Return no results for empty string.
+ | llgc RB, STR:RB[1]
+ | j ->fff_resi
+@@ -1907,8 +1907,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot, 1
+- | lhi TMPR2, -1
+- | xr RB, TMPR2 // TODO: use xilf on newer models?
++ | lhi TMPR0, -1
++ | xr RB, TMPR0 // TODO: use xilf on newer models?
+ | j ->fff_resbit
+ |
+ |->fff_fallback_bit_op:
+@@ -1943,9 +1943,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | checkint RA, ->fff_fallback
+ | // TODO: shorter sequence of instructions to convert right rotate into left rotate.
+ | nill RA, 0x1f
+- | lghi TMPR2, 32
+- | sr TMPR2, RA
+- | lr RA, TMPR2
++ | lghi TMPR0, 32
++ | sr TMPR0, RA
++ | lr RA, TMPR0
+ | rll RB, RB, 0(RA)
+ | j ->fff_resbit
+ |
+@@ -2050,9 +2050,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT
+ | je >5
+- | ly TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
+- | ahi TMPR2, -1
+- | sty TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
++ | ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
++ | ahi TMPR0, -1
++ | sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
+ | je >1
+ | tmll RD, LUA_MASKLINE
+ | je >5
+@@ -2103,8 +2103,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // ASMFunction returned in r2 (CRET1).
+- | lghi TMPR2, 0
+- | stg TMPR2, SAVE_PC // Invalidate for subsequent line hook.
++ | lghi TMPR0, 0
++ | stg TMPR0, SAVE_PC // Invalidate for subsequent line hook.
+ |.if JIT
+ | nill PC, -2
+ |.endif
+@@ -2604,8 +2604,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |3:
+ | jh ->vmeta_unm
+ | // Toggle sign bit.
+- | llihh TMPR2, 0x8000
+- | xgr RB, TMPR2
++ | llihh TMPR0, 0x8000
++ | xgr RB, TMPR0
+ | j <1
+ break;
+ case BC_LEN:
+@@ -2793,10 +2793,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | sllg RC, RC, 3
+ | ld FARG1, 0(RB, BASE)
+ | ld FARG2, 0(RC, BASE)
+- | lg TMPR2, 0(RB, BASE)
+- | checknumtp TMPR2, ->vmeta_arith_vvo
+- | lg TMPR2, 0(RC, BASE)
+- | checknumtp TMPR2, ->vmeta_arith_vvo
++ | lg TMPR0, 0(RB, BASE)
++ | checknumtp TMPR0, ->vmeta_arith_vvo
++ | lg TMPR0, 0(RC, BASE)
++ | checknumtp TMPR0, ->vmeta_arith_vvo
+ | lgr RB, BASE // TODO: redundant, BASE is currently callee-saved.
+ | brasl r14, extern pow // double pow(double x, double y), result in f0.
+ | llgc RA, PC_RA
+@@ -2874,8 +2874,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_AD // RA = dst, RD = primitive type (~)
+ | sllg RA, RA, 3
+ | sllg RD, RD, 47
+- | lghi TMPR2, -1
+- | xgr RD, TMPR2 // not
++ | lghi TMPR0, -1
++ | xgr RD, TMPR0 // not
+ | stg RD, 0(RA, BASE)
+ | ins_next
+ break;
+@@ -2994,8 +2994,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cleartp LFUNC:RB
+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
+ | sllg RD, RD, 47
+- | lghi TMPR2, -1
+- | xgr RD, TMPR2
++ | lghi TMPR0, -1
++ | xgr RD, TMPR0
+ | lg RA, UPVAL:RB->v
+ | stg RD, 0(RA)
+ | ins_next
+@@ -3004,7 +3004,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_AD // RA = level, RD = target
+ | branchPC RD // Do this first to free RD.
+ | lg L:RB, SAVE_L
+- | ltg TMPR2, L:RB->openupval
++ | ltg TMPR0, L:RB->openupval
+ | je >1
+ | stg BASE, L:RB->base
+ | sllg RA, RA, 3
+@@ -3046,9 +3046,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | jhe >5
+ |1:
+ | srlg CARG3, RD, 11
+- | llill TMPR2, 0x7ff
+- | nr RD, TMPR2
+- | cr RD, TMPR2
++ | llill TMPR0, 0x7ff
++ | nr RD, TMPR0
++ | cr RD, TMPR0
+ | je >3
+ |2:
+ | lgr L:CARG1, L:RB
+@@ -3094,8 +3094,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lgr L:CARG1, L:RB
+ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
+ | llgh RD, PC_RD // Need to reload RD.
+- | lghi TMPR2, -1
+- | xgr RD, TMPR2 // not RD
++ | lghi TMPR0, -1
++ | xgr RD, TMPR0 // not RD
+ | j <2
+ break;
+
+@@ -3259,8 +3259,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | llgfr RC, RC
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+- | lghi TMPR2, LJ_TNIL
+- | cg TMPR2, 0(RC)
++ | lghi TMPR0, LJ_TNIL
++ | cg TMPR0, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+@@ -3292,8 +3292,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_ABC // RA = src, RB = table, RC = str const (~)
+ | sllg RB, RB, 3
+ | lg TAB:RB, 0(RB, BASE)
+- | lghi TMPR2, -1
+- | xgr RC, TMPR2 // ~RC
++ | lghi TMPR0, -1
++ | xgr RC, TMPR0 // ~RC
+ | sllg RC, RC, 3
+ | lg STR:RC, 0(RC, KBASE)
+ | checktab TAB:RB, ->vmeta_tsets
+@@ -3309,8 +3309,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cg ITYPE, NODE:TMPR1->key
+ | jne >5
+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
+- | lghi TMPR2, LJ_TNIL
+- | cg TMPR2, 0(TMPR1)
++ | lghi TMPR0, LJ_TNIL
++ | cg TMPR0, 0(TMPR1)
+ | je >4 // Previous value is nil?
+ |2:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+@@ -3369,8 +3369,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | jhe ->vmeta_tsetb
+ | sllg RC, RC, 3
+ | ag RC, TAB:RB->array
+- | lghi TMPR2, LJ_TNIL
+- | cg TMPR2, 0(RC)
++ | lghi TMPR0, LJ_TNIL
++ | cg TMPR0, 0(RC)
+ | je >3 // Previous value is nil?
+ |1:
+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+@@ -3578,10 +3578,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |1: // Traverse array part.
+ | clr RC, TMPR1; jhe >5 // Index points after array part?
+ | sllg RD, RC, 3 // Warning: won't work if RD==RC!
+- | lg TMPR2, 0(RD, ITYPE)
+- | cghi TMPR2, LJ_TNIL; je >4
++ | lg TMPR0, 0(RD, ITYPE)
++ | cghi TMPR0, LJ_TNIL; je >4
+ | // Copy array slot to returned value.
+- | lgr RB, TMPR2
++ | lgr RB, TMPR0
+ | stg RB, 8(RA, BASE)
+ | // Return array index as a numeric key.
+ | setint ITYPE, RC
+@@ -3605,8 +3605,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | llgfr ITYPE, RC
+ | mghi ITYPE, #NODE
+ | ag NODE:ITYPE, TAB:RB->node
+- | lghi TMPR2, LJ_TNIL
+- | cg TMPR2, NODE:ITYPE->val; je >7
++ | lghi TMPR0, LJ_TNIL
++ | cg TMPR0, NODE:ITYPE->val; je >7
+ | ar TMPR1, RC
+ | ahi TMPR1, 1
+ | // Copy key and value from hash slot.
+@@ -3629,8 +3629,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | checkfunc CFUNC:RB, >5
+ | lg TMPR1, -16(RA, BASE)
+ | checktptp TMPR1, LJ_TTAB, >5
+- | lghi TMPR2, LJ_TNIL
+- | cg TMPR2, -8(RA, BASE); jne >5
++ | lghi TMPR0, LJ_TNIL
++ | cg TMPR0, -8(RA, BASE); jne >5
+ | llgc TMPR1, CFUNC:RB->ffid
+ | clfi TMPR1, (uint8_t)FF_next_N; jne >5
+ | branchPC RD
+@@ -3640,8 +3640,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+- | lghi TMPR2, BC_JMP
+- | stcy TMPR2, PC_OP
++ | lghi TMPR0, BC_JMP
++ | stcy TMPR0, PC_OP
+ | branchPC RD
+ | mvi 3(PC), BC_ITERC
+ | j <1
+@@ -3672,8 +3672,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | clgr TMPR1, BASE // No more vararg slots?
+ | jl <1
+ |2: // Fill up remainder with nil.
+- | lghi TMPR2, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
+- | stg TMPR2, 0(RA)
++ | lghi TMPR0, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
++ | stg TMPR0, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RB
+ | jl <2
+@@ -3681,8 +3681,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ |
+ |5: // Copy all varargs.
+- | lghi TMPR2, 1
+- | st TMPR2, SAVE_MULTRES // MULTRES = 0+1
++ | lghi TMPR0, 1
++ | st TMPR0, SAVE_MULTRES // MULTRES = 0+1
+ | lgr RC, BASE
+ | slgr RC, TMPR1
+ | jno <3 // No vararg slots? (borrow or zero)
+@@ -3840,8 +3840,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ #ifdef LUA_USE_ASSERT
+ | // lg TMPR1, FOR_STOP
+ | checkinttp TMPR1, ->assert_bad_for_arg_type
+- | lg TMPR2, FOR_STEP
+- | checkinttp TMPR2, ->assert_bad_for_arg_type
++ | lg TMPR0, FOR_STEP
++ | checkinttp TMPR0, ->assert_bad_for_arg_type
+ #endif
+ | lg ITYPE, FOR_STEP
+ | chi ITYPE, 0; jl >5
+@@ -3903,14 +3903,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | jhe ->vmeta_for
+ }
+ if (!vk) {
+- | lg TMPR2, FOR_STOP
+- | checknumtp TMPR2, ->vmeta_for
++ | lg TMPR0, FOR_STOP
++ | checknumtp TMPR0, ->vmeta_for
+ } else {
+ #ifdef LUA_USE_ASSERT
+- | lg TMPR2, FOR_STOP
+- | checknumtp TMPR2, ->assert_bad_for_arg_type
+- | lg TMPR2, FOR_STEP
+- | checknumtp TMPR2, ->assert_bad_for_arg_type
++ | lg TMPR0, FOR_STOP
++ | checknumtp TMPR0, ->assert_bad_for_arg_type
++ | lg TMPR0, FOR_STEP
++ | checknumtp TMPR0, ->assert_bad_for_arg_type
+ #endif
+ }
+ | lg RB, FOR_STEP
+@@ -4042,8 +4042,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |3: // Clear missing parameters.
+ | // TODO: optimize this. Some of this can be hoisted.
+ | sllg TMPR1, NARGS:RD, 3
+- | lghi TMPR2, LJ_TNIL
+- | stg TMPR2, -8(TMPR1, BASE)
++ | lghi TMPR0, LJ_TNIL
++ | stg TMPR0, -8(TMPR1, BASE)
+ | la RD, 1(RD)
+ | clgr RD, RA
+ | jle <3
+
+From a40e905aecd38b33573958d9c0685b81c678726d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 16 Jan 2017 16:28:21 -0500
+Subject: [PATCH 235/260] Implement return hooks.
+
+---
+ src/vm_s390x.dasc | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index e43e77453..6b86c0323 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2040,8 +2040,10 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg r0, 0
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+- | stg r0, 0
+- | stg r0, 0
++ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
++ | tmll RD, HOOK_ACTIVE
++ | jne >5
++ | j >1
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
+
+From 695c59703c3d0409de78630e5177e135a607894a Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 17 Jan 2017 09:46:02 -0500
+Subject: [PATCH 236/260] Swap register assignments for BASE and RB.
+
+Feels more natural this way round. Puts all parameters in the range
+[r4,r7] and BASE is now the register used as the literal pool, which
+seems appropriate.
+---
+ src/vm_s390x.dasc | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 6b86c0323..b5d5db62a 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -38,15 +38,15 @@
+ |//-----------------------------------------------------------------------
+ |
+ |// Fixed register assignments for the interpreter, callee-saved.
+-|.define BASE, r7 // Base of current Lua stack frame.
+ |.define KBASE, r8 // Constants of current Lua function.
+ |.define PC, r9 // Next PC.
+ |.define DISPATCH, r10 // Opcode dispatch table.
+ |.define ITYPE, r11 // Temporary used for type information.
++|.define BASE, r13 // Base of current Lua stack frame.
+ |
+ |// The following temporaries are not saved across C calls, except for RB.
+ |.define RA, r4 // Overlaps CARG3.
+-|.define RB, r13 // Must be callee-save.
++|.define RB, r7 // Must be callee-save.
+ |.define RC, r5 // Overlaps CARG4.
+ |.define RD, r6 // Overlaps CARG5.
+ |
+
+From e151edea531cf9341cebb1763136529ca8a39bb1 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 17 Jan 2017 14:05:31 -0500
+Subject: [PATCH 237/260] Avoid saving/restoring floating point registers when
+ entering the interpreter.
+
+We only need to worry about doing this if we actually use those
+floating point registers.
+---
+ src/vm_s390x.dasc | 18 +-----------------
+ 1 file changed, 1 insertion(+), 17 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index b5d5db62a..cb58cb951 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -103,26 +103,10 @@
+ |.macro saveregs
+ | stmg r6, r15, SAVE_GPRS_P
+ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+-| // TODO: save backchain?
+-| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+-| std f9, SAVE_FPR9
+-| std f10, SAVE_FPR10
+-| std f11, SAVE_FPR11
+-| std f12, SAVE_FPR12
+-| std f13, SAVE_FPR13
+-| std f14, SAVE_FPR14
+-| std f15, SAVE_FPR15
++| // f8-f15 are also callee-save but are not currently used in the interpreter.
+ |.endmacro
+ |
+ |.macro restoreregs
+-| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
+-| ld f9, SAVE_FPR9
+-| ld f10, SAVE_FPR10
+-| ld f11, SAVE_FPR11
+-| ld f12, SAVE_FPR12
+-| ld f13, SAVE_FPR13
+-| ld f14, SAVE_FPR14
+-| ld f15, SAVE_FPR15
+ | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
+ |.endmacro
+ |
+
+From 77852cbe0ba1fc1b77f87ebaefae99936a3e9752 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 17 Jan 2017 14:14:25 -0500
+Subject: [PATCH 238/260] Don't bother saving BASE before a call unless it will
+ be modified.
+
+BASE is callee-saved anyway, so we don't need to save it in RB.
+---
+ src/vm_s390x.dasc | 30 +-----------------------------
+ 1 file changed, 1 insertion(+), 29 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index cb58cb951..dbcd08b07 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -22,7 +22,7 @@
+ |// clfi (compare logical immediate) [requires z9-109]
+ |// ldgr (load FPR from GPR) [requires z9-109 GA3]
+ |// lgdr (load GPR from FPR) [requires z9-109 GA3]
+-|// lay (load address)  [requires z900 GA2]
++|// lay (load address) [requires z900 GA2]
+ |// ldy (load (long bfp)) [requires z900 GA2]
+ |// stdy (store (long bfp)) [requires z900 GA2]
+ |// TODO: alternative instructions?
+@@ -695,12 +695,10 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_tgetr:
+ | lgr CARG1, TAB:RB
+- | lgr RB, BASE // Save BASE.
+ | lgfr CARG2, RC
+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in r2 (CRET1).
+ | llgc RA, PC_RA
+- | lgr BASE, RB // Restore BASE.
+ | ltgr RC, CRET1
+ | jne ->BC_TGETR_Z
+ | lghi ITYPE, LJ_TNIL
+@@ -775,14 +773,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg L:CARG1, SAVE_L
+ | lgr CARG2, TAB:RB
+ | stg BASE, L:CARG1->base
+- | lgr RB, BASE // Save BASE (TODO: BASE is callee-saved anyway on s390x).
+ | lgfr CARG3, RC
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // TValue * returned in r2 (CRET1).
+ | lgr RC, CRET1
+ | llgc RA, PC_RA
+- | lgr BASE, RB // Restore BASE.
+ | j ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+@@ -1163,12 +1159,10 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc_2 rawget
+ | lg TAB:CARG2, 0(BASE)
+ | checktab TAB:CARG2, ->fff_fallback
+- | lgr RB, BASE // Save BASE.
+ | la CARG3, 8(BASE)
+ | lg CARG1, SAVE_L
+ | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // cTValue * returned in r2 (CRET1).
+- | lgr BASE, RB // Restore BASE.
+ | // Copy table slot.
+ | lg RB, 0(CRET1)
+ | lg PC, -8(BASE)
+@@ -1290,11 +1284,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | lt TMPR0, TAB:RB->hmask; je ->fff_res0
+ | lgr CARG1, TAB:RB
+- | lgr RB, BASE // Save BASE. // TODO: needed?
+ | lgfr CARG2, RA
+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in r2 (CRET1).
+- | lgr BASE, RB
+ | ltgr RD, CRET1
+ | jne <1
+ |->fff_res0:
+@@ -1599,24 +1591,18 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg TMPR0, 0(BASE)
+ | ld FARG1, 0(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+- | lgr RB, BASE
+ | brasl r14, extern log
+- | lgr BASE, RB
+ | j ->fff_resf0
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+- | lgr RB, BASE
+ | brasl r14, extern func
+- | lgr BASE, RB
+ | j ->fff_resf0
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+- | lgr RB, BASE
+ | brasl r14, extern func
+- | lgr BASE, RB
+ | j ->fff_resf0
+ |.endmacro
+ |
+@@ -1642,16 +1628,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | checknumtp TMPR0, ->fff_fallback
+ | checkinttp CARG1, ->fff_fallback
+ | lgfr CARG1, CARG1
+- | lgr RB, BASE
+ | brasl r14, extern ldexp // (double, int)
+- | lgr BASE, RB
+ | j ->fff_resf0
+ |
+ |.ffunc_n math_frexp
+- | lgr RB, BASE
+ | la CARG1, TMP_STACK
+ | brasl r14, extern frexp
+- | lgr BASE, RB
+ | llgf RB, TMP_STACK
+ | lg PC, -8(BASE)
+ | stdy f0, -16(BASE)
+@@ -1661,10 +1643,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_res
+ |
+ |.ffunc_n math_modf
+- | lgr RB, BASE
+ | lay CARG1, -16(BASE)
+ | brasl r14, extern modf // (double, double*)
+- | lgr BASE, RB
+ | lg PC, -8(BASE)
+ | stdy f0, -8(BASE)
+ | lghi RD, 1+2
+@@ -2615,11 +2595,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |3:
+ #endif
+ |->BC_LEN_Z:
+- | lgr RB, BASE // Save BASE.
+ | brasl r14, extern lj_tab_len // (GCtab *t)
+ | // Length of table returned in r2 (CRET1).
+ | lgr RD, CRET1
+- | lgr BASE, RB // Restore BASE.
+ | llgc RA, PC_RA
+ | j <1
+ #if LJ_52
+@@ -2783,10 +2761,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | checknumtp TMPR0, ->vmeta_arith_vvo
+ | lg TMPR0, 0(RC, BASE)
+ | checknumtp TMPR0, ->vmeta_arith_vvo
+- | lgr RB, BASE // TODO: redundant, BASE is currently callee-saved.
+ | brasl r14, extern pow // double pow(double x, double y), result in f0.
+ | llgc RA, PC_RA
+- | lgr BASE, RB
+ | sllg RA, RA, 3
+ | std f0, 0(RA, BASE)
+ | ins_next
+@@ -2925,10 +2901,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | lgr CARG2, RB
+- | lgr RB, BASE // Save BASE.
+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+- | lgr BASE, RB // Restore BASE.
+ | j <1
+ break;
+ #undef TV2MARKOFS
+@@ -2954,11 +2928,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | tm UPVAL:RB->closed, 0xff
+ | je <1
+ | // Crossed a write barrier. Move the barrier forward.
+- | lgr RB, BASE
+ | lgr CARG2, RD
+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+- | lgr BASE, RB // Restore BASE.
+ | j <1
+ break;
+ case BC_USETN:
+
+From 9d0846ea9182d1176b94dd21258bea2319aa92fd Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 17 Jan 2017 17:26:00 -0500
+Subject: [PATCH 239/260] Add debug frame information to the interpreter.
+
+Allows gdb to backtrace from inside the interpreter. Still need to
+add FFI support.
+---
+ src/vm_s390x.dasc | 43 +++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 43 insertions(+)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index dbcd08b07..358fb767f 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -4123,4 +4123,47 @@ static int build_backend(BuildCtx *ctx)
+ /* Emit pseudo frame-info for all assembler functions. */
+ static void emit_asm_debug(BuildCtx *ctx)
+ {
++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
++ switch (ctx->mode) {
++ case BUILD_elfasm:
++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
++ fprintf(ctx->fp,
++ ".Lframe0:\n"
++ "\t.long .LECIE0-.LSCIE0\n"
++ ".LSCIE0:\n"
++ "\t.long 0xffffffff\n"
++ "\t.byte 0x1\n"
++ "\t.string \"\"\n"
++ "\t.uleb128 1\n"
++ "\t.sleb128 -8\n"
++ "\t.byte 0xe\n"
++ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
++ "\t.align 8\n"
++ ".LECIE0:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE0:\n"
++ "\t.long .LEFDE0-.LASFDE0\n"
++ ".LASFDE0:\n"
++ "\t.long .Lframe0\n"
++ "\t.quad .Lbegin\n"
++ "\t.quad %d\n"
++ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
++ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
++ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
++ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
++ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
++ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
++ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
++ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
++ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
++ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
++ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
++ "\t.align 8\n"
++ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
++ /* TODO: FFI, UNWIND */
++ break;
++ default: /* No other modes. */
++ break;
++ }
++
+ }
+
+From 0a18b89a04aa463b308e99710edc6c205bc87070 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 18 Jan 2017 12:38:53 -0500
+Subject: [PATCH 240/260] Implement the LUAJIT_UNWIND_EXTERNAL option.
+
+Still need to add support for FFI.
+---
+ src/lj_err.c | 3 +++
+ src/vm_s390x.dasc | 43 ++++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 45 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_err.c b/src/lj_err.c
+index 600e6ee65..ff94254ed 100644
+--- a/src/lj_err.c
++++ b/src/lj_err.c
+@@ -234,6 +234,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
+ return _URC_FATAL_PHASE1_ERROR;
+ UNUSED(uexclass);
+ cf = (void *)_Unwind_GetCFA(ctx);
++#ifdef LJ_TARGET_S390X
++ cf -= 160; /* CFA points 160 bytes above r15. */
++#endif
+ L = cframe_L(cf);
+ if ((actions & _UA_SEARCH_PHASE)) {
+ #if LJ_UNWIND_EXT
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 358fb767f..f2dd30b7c 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -4160,7 +4160,48 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
+- /* TODO: FFI, UNWIND */
++#if !LJ_NO_UNWIND
++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
++ fprintf(ctx->fp,
++ ".Lframe1:\n"
++ "\t.long .LECIE1-.LSCIE1\n"
++ ".LSCIE1:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zPR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -8\n"
++ "\t.byte 0xe\n"
++ "\t.uleb128 6\n" /* augmentation length */
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.long lj_err_unwind_dwarf-.\n"
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
++ "\t.align 8\n"
++ ".LECIE1:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE2:\n"
++ "\t.long .LEFDE2-.LASFDE2\n"
++ ".LASFDE2:\n"
++ "\t.long .LASFDE2-.Lframe1\n"
++ "\t.long .Lbegin-.\n"
++ "\t.long %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
++ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
++ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
++ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
++ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
++ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
++ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
++ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
++ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
++ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
++ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
++ "\t.align 8\n"
++ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160);
++#endif
++ /* TODO: FFI */
+ break;
+ default: /* No other modes. */
+ break;
+
+From 34394c49b16d6204852ade5cb60f3e917e2f637b Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 18 Jan 2017 14:18:40 -0500
+Subject: [PATCH 241/260] Add file change missing from previous commit (needed
+ for EXT unwinding).
+
+---
+ src/lj_arch.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index d17884e50..3ce9a7ff8 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -362,7 +362,7 @@
+ #define LJ_ARCH_BITS 64
+ #define LJ_ARCH_ENDIAN LUAJIT_BE
+ #define LJ_TARGET_S390X 1
+-#define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRETREG 0xe
+ #define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+
+From e7e346f529c829ba7b73def74ce68c139f826e1d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 18 Jan 2017 15:08:58 -0500
+Subject: [PATCH 242/260] Implement support for unwinding through FFI stack
+ frames.
+
+The DWARF here is a little incomplete, unwinding won't work while
+executing the first two instructions in vm_ffi_call.
+---
+ src/vm_s390x.dasc | 68 ++++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 65 insertions(+), 3 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f2dd30b7c..f58d36903 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2161,8 +2161,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |.if FFI
+ | .type CCSTATE, CCallState, r8
+ | stmg r6, r15, 48(sp)
++ | lgr r13, sp // Use r13 as frame pointer.
+ | lgr CCSTATE, CARG1
+- | lg r7, CCSTATE->func // TODO: move further up?
++ | lg r7, CCSTATE->func
+ |
+ | // Readjust stack.
+ | sgf sp, CCSTATE->spadj
+@@ -2183,7 +2184,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stg CRET1, CCSTATE->gpr[0]
+ | std f0, CCSTATE->fpr[0]
+ |
+- | agf sp, CCSTATE->spadj
++ | lgr sp, r13
+ | lmg r6, r15, 48(sp)
+ | br r14
+ |
+@@ -4160,6 +4161,29 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".LSFDE1:\n"
++ "\t.long .LEFDE1-.LASFDE1\n"
++ ".LASFDE1:\n"
++ "\t.long .Lframe0\n"
++ "\t.quad lj_vm_ffi_call\n"
++ "\t.quad %d\n"
++ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
++ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
++ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
++ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
++ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
++ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
++ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
++ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
++ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
++ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
++ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
++ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
++ "\t.align 8\n"
++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
++#endif
+ #if !LJ_NO_UNWIND
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+ fprintf(ctx->fp,
+@@ -4200,11 +4224,49 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
+ "\t.align 8\n"
+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160);
++#if LJ_HASFFI
++ fprintf(ctx->fp,
++ ".Lframe2:\n"
++ "\t.long .LECIE2-.LSCIE2\n"
++ ".LSCIE2:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.string \"zR\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -8\n"
++ "\t.byte 0xe\n"
++ "\t.uleb128 1\n" /* augmentation length */
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
++ "\t.align 8\n"
++ ".LECIE2:\n\n");
++ fprintf(ctx->fp,
++ ".LSFDE3:\n"
++ "\t.long .LEFDE3-.LASFDE3\n"
++ ".LASFDE3:\n"
++ "\t.long .LASFDE3-.Lframe2\n"
++ "\t.long lj_vm_ffi_call-.\n"
++ "\t.long %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
++ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
++ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
++ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
++ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
++ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
++ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
++ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
++ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
++ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
++ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
++ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
++ "\t.align 8\n"
++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
++#endif
+ #endif
+ /* TODO: FFI */
+ break;
+ default: /* No other modes. */
+ break;
+ }
+-
+ }
+
+From 908528d80157e219654f094dff3ee154cd69e8fb Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 18 Jan 2017 15:39:39 -0500
+Subject: [PATCH 243/260] Revert "Avoid saving/restoring floating point
+ registers when entering the interpreter."
+
+This reverts commit e151edea531cf9341cebb1763136529ca8a39bb1.
+---
+ src/vm_s390x.dasc | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f58d36903..d1cf95243 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -103,10 +103,26 @@
+ |.macro saveregs
+ | stmg r6, r15, SAVE_GPRS_P
+ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+-| // f8-f15 are also callee-save but are not currently used in the interpreter.
++| // TODO: save backchain?
++| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
++| std f9, SAVE_FPR9
++| std f10, SAVE_FPR10
++| std f11, SAVE_FPR11
++| std f12, SAVE_FPR12
++| std f13, SAVE_FPR13
++| std f14, SAVE_FPR14
++| std f15, SAVE_FPR15
+ |.endmacro
+ |
+ |.macro restoreregs
++| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
++| ld f9, SAVE_FPR9
++| ld f10, SAVE_FPR10
++| ld f11, SAVE_FPR11
++| ld f12, SAVE_FPR12
++| ld f13, SAVE_FPR13
++| ld f14, SAVE_FPR14
++| ld f15, SAVE_FPR15
+ | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
+ |.endmacro
+ |
+
+From d475b5b93e30562abce91a7a3405356eb3ee6f74 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 18 Jan 2017 15:43:24 -0500
+Subject: [PATCH 244/260] Add some TODOs to the saveregs and restoreregs
+ macros.
+
+When unwinding the stack using the internal unwinder we may need to
+restore floating point registers clobbered by C calls. Since I'm
+not sure yet I'm going to be conservative and save/restore them
+for now. Most probably we want to, at the very least, avoid restoring
+them when cleanly exiting the interpreter.
+---
+ src/vm_s390x.dasc | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index d1cf95243..637a174b0 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -104,6 +104,7 @@
+ | stmg r6, r15, SAVE_GPRS_P
+ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+ | // TODO: save backchain?
++| // TODO: is it necessary to save all float registers?
+ | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | std f9, SAVE_FPR9
+ | std f10, SAVE_FPR10
+@@ -115,6 +116,7 @@
+ |.endmacro
+ |
+ |.macro restoreregs
++| // TODO: restore float registers only when unwinding?
+ | ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | ld f9, SAVE_FPR9
+ | ld f10, SAVE_FPR10
+
+From 5f72b2313dd067bba89e58207b6b90e43f3f5d65 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 18 Jan 2017 16:18:56 -0500
+Subject: [PATCH 245/260] Remove unnecessary register moves.
+
+---
+ src/vm_s390x.dasc | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 637a174b0..41b11cca4 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -272,12 +272,9 @@
+ |.define PC_RD, -4(PC)
+ |
+ |.macro branchPC, reg
+-| // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
+-| // Can't clobber TMPR1 or condition code.
+-| lgr TMPR0, TMPR1 // Workaround because TMPR0 == r0 and can't be used in lay.
++| // Must not clobber condition code.
+ | sllg TMPR1, reg, 2
+ | lay PC, (-BCBIAS_J*4)(TMPR1, PC)
+-| lgr TMPR1, TMPR0
+ |.endmacro
+ |
+ |// Set current VM state.
+
+From 7b6aa863c2f8277217adc9683c37ddb6b8689247 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Wed, 18 Jan 2017 16:36:32 -0500
+Subject: [PATCH 246/260] Remove TODOs for branch on index.
+
+It is probably not suitable (relies on even-odd register numbering).
+---
+ src/vm_s390x.dasc | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 41b11cca4..f1664de2d 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1418,7 +1418,6 @@ static void build_subroutines(BuildCtx *ctx)
+ |2: // Move args to coroutine.
+ | lg RC, 0(RB, PC)
+ | stg RC, -8(PC)
+- | // TODO: replace with branch on count/index?
+ | lay PC, -8(PC)
+ | cgr PC, RA
+ | jne <2
+@@ -1455,7 +1454,6 @@ static void build_subroutines(BuildCtx *ctx)
+ |5: // Move results from coroutine.
+ | lg RD, 0(RA)
+ | stg RD, 0(RA, RB)
+- | // TODO: branch on count/index?
+ | la RA, 8(RA)
+ | cgr RA, KBASE
+ | jne <5
+
+From f13d2314e0ac1490aee40d0c6ae399fe700c76d2 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 30 Jan 2017 15:54:47 -0500
+Subject: [PATCH 247/260] Use z10 instructions more frequently.
+
+It would be nice to support the base z/Architecture but it has
+quite a big impact on performance to do so. z10 gives us most
+of the desirable instructions. It should be possible to emulate the
+instructions if earlier machines were ever targetted.
+---
+ src/vm_s390x.dasc | 29 +++++++++++------------------
+ 1 file changed, 11 insertions(+), 18 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index f1664de2d..73966f83d 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -2,6 +2,9 @@
+ |// Bytecode interpreter, fast functions and helper functions.
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+ |
++|// This assembly targets the instruction set available on z10 (and newer)
++|// machines.
++|
+ |// ELF ABI registers:
+ |// r0,r1 | | volatile |
+ |// r2 | parameter and return value | volatile |
+@@ -18,15 +21,6 @@
+ |// ar0,ar1 | TLS | volatile |
+ |// ar2-ar15 | | volatile |
+ |
+-|// Instructions used that are not in base z/Architecture:
+-|// clfi (compare logical immediate) [requires z9-109]
+-|// ldgr (load FPR from GPR) [requires z9-109 GA3]
+-|// lgdr (load GPR from FPR) [requires z9-109 GA3]
+-|// lay (load address) [requires z900 GA2]
+-|// ldy (load (long bfp)) [requires z900 GA2]
+-|// stdy (store (long bfp)) [requires z900 GA2]
+-|// TODO: alternative instructions?
+-|
+ |.arch s390x
+ |.section code_op, code_sub
+ |
+@@ -213,14 +207,14 @@
+ |//-----------------------------------------------------------------------
+ |
+ |// Macros to clear or set tags.
+-|.macro cleartp, reg; sllg reg, reg, 17; srlg reg, reg, 17; .endmacro // TODO: use nihf instead? would introduce dependence on z9-109.
++|.macro cleartp, reg
++| nihf reg, 0x7fff
++|.endmacro
+ |.macro settp, reg, tp
+-| oihh reg, ((tp>>1) &0xffff)
+-| oihl reg, ((tp<<15)&0x8000)
++| oihf reg, tp<<15
+ |.endmacro
+ |.macro settp, dst, reg, tp
+-| llihh dst, ((tp>>1) &0xffff)
+-| iihl dst, ((tp<<15)&0x8000)
++| llihf dst, tp<<15
+ | ogr dst, reg
+ |.endmacro
+ |.macro setint, reg
+@@ -1238,7 +1232,6 @@ static void build_subroutines(BuildCtx *ctx)
+ | brasl r14, extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+ | // Flag returned in r2 (CRET1).
+ | lg BASE, L:RB->base
+- | lgr RD, CRET1 // TODO: high bits needed? low bits load/test (ltr) enough?
+ | ltr RD, CRET1; je >3 // End of traversal?
+ | // Copy key and value to results.
+ | lg RB, 8(BASE)
+@@ -1323,7 +1316,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg PC, -8(BASE)
+ | stg CFUNC:RD, -16(BASE)
+ | stg TMPR1, -8(BASE)
+- | llihh RD, ((int)LJ_TISNUM)>>1 // mov64 RD, ((int64_t)LJ_TISNUM<<47) // TODO: write mov64-macro, use all of TISNUM (currently this is very fragile).
++ | llihf RD, LJ_TISNUM<<15
+ | stg RD, 0(BASE)
+ | lghi RD, 1+3
+ | j ->fff_res
+@@ -1768,7 +1761,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | jle >7
+ |3:
+ | sr TMPR1, RA // start > end?
+- | jnhe ->fff_emptystr // TODO: not sure about this, was jl in x64.
++ | jnhe ->fff_emptystr
+ | la RD, (#STR-1)(RA, STR:RB)
+ | ahi TMPR1, 1
+ |4:
+@@ -3444,11 +3437,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_CALL: case BC_CALLM:
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
++ | sllg RA, RA, 3
+ | lgr RD, RC
+ if (op == BC_CALLM) {
+ | agf NARGS:RD, SAVE_MULTRES
+ }
+- | sllg RA, RA, 3
+ | lg LFUNC:RB, 0(RA, BASE)
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
+ | la BASE, 16(RA, BASE)
+
+From a06bfc99f7822c07f7b7af1f3bfcb10ec9290e16 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 30 Jan 2017 16:12:50 -0500
+Subject: [PATCH 248/260] Remove various TODOs.
+
+---
+ src/vm_s390x.dasc | 36 ++++++++++++++----------------------
+ 1 file changed, 14 insertions(+), 22 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 73966f83d..ce5e14c8c 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -97,8 +97,6 @@
+ |.macro saveregs
+ | stmg r6, r15, SAVE_GPRS_P
+ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
+-| // TODO: save backchain?
+-| // TODO: is it necessary to save all float registers?
+ | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | std f9, SAVE_FPR9
+ | std f10, SAVE_FPR10
+@@ -110,7 +108,6 @@
+ |.endmacro
+ |
+ |.macro restoreregs
+-| // TODO: restore float registers only when unwinding?
+ | ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
+ | ld f9, SAVE_FPR9
+ | ld f10, SAVE_FPR10
+@@ -1032,7 +1029,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | .ffunc_1 name
+ | lg TMPR0, 0(BASE)
+ | checknumtp TMPR0, ->fff_fallback
+- | op f0, 0(BASE) // TODO: might be better to unconditionally load into f1.
++ | op f0, 0(BASE)
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+@@ -1877,8 +1874,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot, 1
+- | lhi TMPR0, -1
+- | xr RB, TMPR0 // TODO: use xilf on newer models?
++ | xilf RB, -1
+ | j ->fff_resbit
+ |
+ |->fff_fallback_bit_op:
+@@ -1891,7 +1887,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+ | nill RA, 0x1f // Limit shift to 5-bits.
+- | ins RB, 0(RA) // TODO: fix shift args in DynASM.
++ | ins RB, 0(RA)
+ | j ->fff_resbit
+ |.endmacro
+ |
+@@ -2119,7 +2115,6 @@ static void build_subroutines(BuildCtx *ctx)
+ |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
+ |.macro vm_round, name, mask
+ |->name:
+- | // TODO: handle edge cases?
+ | lghi r0, 1
+ | cdfbr f1, r0
+ | didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
+@@ -2211,7 +2206,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | ahi r1, -1
+ | jl <1
+ | larl r9, >5
+- | ex r1, 0(r9) // TODO: exrl is faster but needs z10.
++ | ex r1, 0(r9)
+ | j <1
+ |
+ |5:
+@@ -2394,9 +2389,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | tm TAB:RB->nomm, 1<<MM_eq
+ | jne <2 // Or 'no __eq' flag set?
+ if (vk) {
+- | lghi RB, 0 // ne = 0 // TODO: should be 32-bit?
++ | lghi RB, 0 // ne = 0
+ } else {
+- | lghi RB, 1 // ne = 1 // TODO: should be 32-bit?
++ | lghi RB, 1 // ne = 1
+ }
+ | j ->vmeta_equal // Handle __eq metamethod.
+ } else {
+@@ -2524,7 +2519,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+
+ case BC_ISTYPE:
+ | ins_AD // RA = src, RD = -type
+- | lghr RD, RD // TODO: always sign extend RD?
++ | lghr RD, RD
+ | sllg RA, RA, 3
+ | lg RB, 0(RA, BASE)
+ | srag RB, RB, 47
+@@ -2555,7 +2550,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | srag RB, RB, 47
+ | load_false RC
+ | cghi RB, LJ_TTRUE
+- | je >1 // TODO: Maybe do something fancy to avoid the jump?
++ | je >1
+ | load_true RC
+ |1:
+ | stg RC, 0(RA, BASE)
+@@ -3133,7 +3128,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | l TMPR1, TAB:RB->hmask
+ | n TMPR1, STR:RC->hash
+ | lgfr TMPR1, TMPR1
+- | mghi TMPR1, #NODE // TODO: not sure about this one, original: imul TMPRd, #NODE
++ | mghi TMPR1, #NODE
+ | ag NODE:TMPR1, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+@@ -3310,7 +3305,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg ITYPE, TMP_STACK
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+- | la CARG3, TMP_STACK // TODO: lea CARG3, ITYPE... not sure.
++ | la CARG3, TMP_STACK
+ | lgr CARG2, TAB:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+@@ -3470,7 +3465,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | aghi NARGS:RD, -1
+ | je >3
+ |2: // Move args down.
+- | // TODO: mvc or something here?
+ | lg RB, 0(RA)
+ | la RA, 8(RA)
+ | stg RB, 0(KBASE)
+@@ -3536,7 +3530,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | sllg RA, RA, 3
+ | lg TAB:RB, -16(RA, BASE)
+ | cleartp TAB:RB
+- | llgf RC, -4(RA, BASE) // Get index from control var. // TODO: ENDIANNESS DRAGONS.
++ | llgf RC, -4(RA, BASE) // Get index from control var.
+ | llgf TMPR1, TAB:RB->asize
+ | la PC, 4(PC)
+ | lg ITYPE, TAB:RB->array
+@@ -3552,7 +3546,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | setint ITYPE, RC
+ | stg ITYPE, 0(RA, BASE)
+ | ahi RC, 1
+- | sty RC, -4(RA, BASE) // Update control var. // TODO: ENDIANNESS DRAGONS
++ | sty RC, -4(RA, BASE) // Update control var.
+ |2:
+ | llgh RD, PC_RD // Get target from ITERL.
+ | branchPC RD
+@@ -3579,7 +3573,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lg RC, NODE:ITYPE->val
+ | stg RB, 0(RA, BASE)
+ | stg RC, 8(RA, BASE)
+- | sty TMPR1, -4(RA, BASE) // TODO: ENDIANNESS DRAGONS
++ | sty TMPR1, -4(RA, BASE)
+ | j <2
+ |
+ |7: // Skip holes in hash part.
+@@ -3613,7 +3607,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_VARG:
+- | // TODO: some opportunities for branch on index in here.
+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
+ | sllg RA, RA, 3
+ | sllg RB, RB, 3
+@@ -4091,7 +4084,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ } else {
+ | // (lua_State *L, lua_CFunction f)
+ | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
+- | basr r14, TMPR1 // TODO: TMPR1==r14, is this ok?
++ | basr r14, TMPR1
+ }
+ | // nresults returned in r2 (CRET1).
+ | lgr RD, CRET1
+@@ -4270,7 +4263,6 @@ static void emit_asm_debug(BuildCtx *ctx)
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+ #endif
+ #endif
+- /* TODO: FFI */
+ break;
+ default: /* No other modes. */
+ break;
+
+From 8141ca5d63f54cb9ba86b18d3b7cc377af17edfb Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 30 Jan 2017 16:24:06 -0500
+Subject: [PATCH 249/260] Simplify right rotations.
+
+---
+ src/vm_s390x.dasc | 7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index ce5e14c8c..5ebd20c00 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -1899,7 +1899,6 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+- | // Note: no need to limit rotate to 5-bits (wraps).
+ | rll RB, RB, 0(RA)
+ | j ->fff_resbit
+ |
+@@ -1907,11 +1906,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Note: no inline conversion from number for 2nd argument!
+ | lg RA, 8(BASE)
+ | checkint RA, ->fff_fallback
+- | // TODO: shorter sequence of instructions to convert right rotate into left rotate.
+- | nill RA, 0x1f
+- | lghi TMPR0, 32
+- | sr TMPR0, RA
+- | lr RA, TMPR0
++ | lcr RA, RA // Right rotate equivalent to negative left rotate.
+ | rll RB, RB, 0(RA)
+ | j ->fff_resbit
+ |
+
+From 58460de58fd1fe3eae8a17597e0fa91a69bcff2e Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 30 Jan 2017 16:40:32 -0500
+Subject: [PATCH 250/260] Hoist some loop invariants.
+
+---
+ src/vm_s390x.dasc | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 5ebd20c00..939aea3b4 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -3614,6 +3614,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | je >5 // Copy all varargs?
+ | lay RB, -8(RA, RB)
+ | clgr TMPR1, BASE // No vararg slots?
++ | lghi TMPR0, LJ_TNIL
+ | jnl >2
+ |1: // Copy vararg slots to destination slots.
+ | lg RC, -16(TMPR1)
+@@ -3625,7 +3626,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | clgr TMPR1, BASE // No more vararg slots?
+ | jl <1
+ |2: // Fill up remainder with nil.
+- | lghi TMPR0, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
+ | stg TMPR0, 0(RA)
+ | la RA, 8(RA)
+ | clgr RA, RB
+@@ -3993,13 +3993,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ }
+ |
+ |3: // Clear missing parameters.
+- | // TODO: optimize this. Some of this can be hoisted.
+ | sllg TMPR1, NARGS:RD, 3
+ | lghi TMPR0, LJ_TNIL
++ |4:
+ | stg TMPR0, -8(TMPR1, BASE)
++ | la TMPR1, 8(TMPR1)
+ | la RD, 1(RD)
+ | clgr RD, RA
+- | jle <3
++ | jle <4
+ | j <2
+ break;
+
+
+From 22a95498b687e185e22e72b4c2e798bd02677501 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Mon, 30 Jan 2017 16:44:15 -0500
+Subject: [PATCH 251/260] Delete old BUG comment.
+
+---
+ src/vm_s390x.dasc | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 939aea3b4..8238ae57a 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -4092,7 +4092,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lcgr RA, RA
+ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+ | lg PC, -8(BASE) // Fetch PC of caller.
+- | // BUG: PC seems to be -1 here sometimes. Not yet sure why.
+ | j ->vm_returnc
+ break;
+
+
+From e1e348315f125ee7da55d0ea4efaef8e8c31fc3d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 31 Jan 2017 13:58:25 -0500
+Subject: [PATCH 252/260] s/TMP_STACK/SAVE_TMP/
+
+More in line with the naming of the other stack variables.
+---
+ src/vm_s390x.dasc | 54 +++++++++++++++++++++++------------------------
+ 1 file changed, 27 insertions(+), 27 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index 8238ae57a..b3753a33b 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -88,8 +88,8 @@
+ |.define SAVE_FPR8, 176(sp)
+ |.define SAVE_PC, 168(sp)
+ |.define SAVE_MULTRES, 160(sp)
+-|.define TMP_STACK, 160(sp) // Overlaps SAVE_MULTRES
+-|.define TMP_STACK_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
++|.define SAVE_TMP, 160(sp) // Overlaps SAVE_MULTRES
++|.define SAVE_TMP_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
+ |
+ |// Callee save area (allocated by interpreter).
+ |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
+@@ -644,8 +644,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_tgets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+- | stg STR:RC, TMP_STACK
+- | la RC, TMP_STACK
++ | stg STR:RC, SAVE_TMP
++ | la RC, SAVE_TMP
+ | llgc TMPR1, PC_OP
+ | cghi TMPR1, BC_GGET
+ | jne >1
+@@ -657,8 +657,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vmeta_tgetb:
+ | llgc RC, PC_RC
+ | setint RC
+- | stg RC, TMP_STACK
+- | la RC, TMP_STACK
++ | stg RC, SAVE_TMP
++ | la RC, SAVE_TMP
+ | j >1
+ |
+ |->vmeta_tgetv:
+@@ -714,8 +714,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->vmeta_tsets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+- | stg STR:RC, TMP_STACK
+- | la RC, TMP_STACK
++ | stg STR:RC, SAVE_TMP
++ | la RC, SAVE_TMP
+ | llgc TMPR0, PC_OP
+ | cghi TMPR0, BC_GSET
+ | jne >1
+@@ -727,8 +727,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vmeta_tsetb:
+ | llgc RC, PC_RC
+ | setint RC
+- | stg RC, TMP_STACK
+- | la RC, TMP_STACK
++ | stg RC, SAVE_TMP
++ | la RC, SAVE_TMP
+ | j >1
+ |
+ |->vmeta_tsetv:
+@@ -968,7 +968,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | la RA, 16(RA, BASE) // RA previously set to RA*8.
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
+- | stg NARGS:RD, TMP_STACK // Save RA, RC for us (not sure about this).
++ | stg NARGS:RD, SAVE_TMP // Save RA, RC for us (not sure about this).
+ | lgr RB, RA
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+@@ -980,7 +980,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lgr RA, RB
+ | lg L:RB, SAVE_L
+ | lg BASE, L:RB->base
+- | lg NARGS:RD, TMP_STACK
++ | lg NARGS:RD, SAVE_TMP
+ | lg LFUNC:RB, -16(RA)
+ | aghi NARGS:RD, 1 // 32-bit on x64.
+ | // This is fragile. L->base must not move, KBASE must always be defined.
+@@ -1367,7 +1367,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ | lg PC, -8(BASE)
+ | stg PC, SAVE_PC
+- | stg L:RB, TMP_STACK
++ | stg L:RB, SAVE_TMP
+ |.if resume
+ | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback
+ |.endif
+@@ -1413,13 +1413,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | jne <2
+ |3:
+ | lgr CARG2, RA
+- | lg L:CARG1, TMP_STACK
++ | lg L:CARG1, SAVE_TMP
+ | lghi CARG3, 0
+ | lghi CARG4, 0
+ | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ |
+ | lg L:RB, SAVE_L
+- | lg L:PC, TMP_STACK
++ | lg L:PC, SAVE_TMP
+ | lg BASE, L:RB->base
+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
+ | set_vmstate INTERP
+@@ -1487,12 +1487,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+- | lg L:RA, TMP_STACK
++ | lg L:RA, SAVE_TMP
+ | stg KBASE, L:RA->top // Undo coroutine stack clearing.
+ | lgr CARG2, PC
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+- | lg L:PC, TMP_STACK
++ | lg L:PC, SAVE_TMP
+ | lg BASE, L:RB->base
+ | j <4 // Retry the stack move.
+ |.endmacro
+@@ -1635,9 +1635,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | j ->fff_resf0
+ |
+ |.ffunc_n math_frexp
+- | la CARG1, TMP_STACK
++ | la CARG1, SAVE_TMP
+ | brasl r14, extern frexp
+- | llgf RB, TMP_STACK
++ | llgf RB, SAVE_TMP
+ | lg PC, -8(BASE)
+ | stdy f0, -16(BASE)
+ | setint RB
+@@ -1715,9 +1715,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | lg RB, 0(BASE)
+ | checkint RB, ->fff_fallback
+ | clfi RB, 255; jh ->fff_fallback
+- | strvh RB, TMP_STACK // Store [c,0].
++ | strvh RB, SAVE_TMP // Store [c,0].
+ | lghi TMPR1, 1
+- | la RD, TMP_STACK // Points to stack. Little-endian.
++ | la RD, SAVE_TMP // Points to stack. Little-endian.
+ |->fff_newstr:
+ | lg L:RB, SAVE_L
+ | stg BASE, L:RB->base
+@@ -1975,7 +1975,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
+- | stg r14, TMP_STACK // Save return address
++ | stg r14, SAVE_TMP // Save return address
+ | lg L:RB, SAVE_L
+ | stg PC, SAVE_PC // Redundant (but a defined value).
+ | stg BASE, L:RB->base
+@@ -1989,7 +1989,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | sgr RD, BASE
+ | srlg RD, RD, 3
+ | aghi NARGS:RD, 1
+- | lg r14, TMP_STACK // Restore return address.
++ | lg r14, SAVE_TMP // Restore return address.
+ | br r14
+ |
+ |//-----------------------------------------------------------------------
+@@ -3297,10 +3297,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | tm TAB:TMPR1->nomm, 1<<MM_newindex
+ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+- | stg ITYPE, TMP_STACK
++ | stg ITYPE, SAVE_TMP
+ | lg L:CARG1, SAVE_L
+ | stg BASE, L:CARG1->base
+- | la CARG3, TMP_STACK
++ | la CARG3, SAVE_TMP
+ | lgr CARG2, TAB:RB
+ | stg PC, SAVE_PC
+ | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+@@ -3661,13 +3661,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | stg RA, L:RB->top
+ | stg PC, SAVE_PC
+ | sgr TMPR1, BASE // Need delta, because BASE may change.
+- | st TMPR1, TMP_STACK_HI
++ | st TMPR1, SAVE_TMP_HI
+ | llgf CARG2, SAVE_MULTRES
+ | aghi CARG2, -1
+ | lgr CARG1, L:RB
+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
+ | lg BASE, L:RB->base
+- | lgf TMPR1, TMP_STACK_HI
++ | lgf TMPR1, SAVE_TMP_HI
+ | lg RA, L:RB->top
+ | agr TMPR1, BASE
+ | j <6
+
+From 08e97d4ea5139253fb284e6fadc6cec9dbd0d9c7 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 31 Jan 2017 14:02:38 -0500
+Subject: [PATCH 253/260] Remove out of date comments.
+
+---
+ src/vm_s390x.dasc | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
+index b3753a33b..21a1ff330 100644
+--- a/src/vm_s390x.dasc
++++ b/src/vm_s390x.dasc
+@@ -147,11 +147,7 @@
+ |.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
+ |
+ |// Instruction decode+dispatch.
+-| // TODO: tune this, right now we always decode RA-D even if they aren't used.
+ |.macro ins_NEXT
+-| // 32 63
+-| // [ B | C | A | OP ]
+-| // [ D | A | OP ]
+ | llgc OP, 3(PC)
+ | llgh RD, 0(PC)
+ | llgc RA, 2(PC)
+
+From 60bc2fad2d157d6484c29e9b3abc5278c29be93d Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 31 Jan 2017 15:00:15 -0500
+Subject: [PATCH 254/260] Sort instructions in alphabetical order.
+
+---
+ dynasm/dasm_s390x.lua | 856 ++++++++++++++++++++----------------------
+ 1 file changed, 417 insertions(+), 439 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 8bf7084d6..ced4f9f78 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -587,245 +587,256 @@ end
+ -- Template strings for s390x instructions.
+ map_op = {
+ a_2 = "00005a000000RX-a",
+- ar_2 = "000000001a00RR",
+- ay_2 = "e3000000005aRXY-a",
++ ad_2 = "00006a000000RX-a",
++ adb_2 = "ed000000001aRXE",
++ adbr_2 = "0000b31a0000RRE",
++ adr_2 = "000000002a00RR",
++ ae_2 = "00007a000000RX-a",
++ aeb_2 = "ed000000000aRXE",
++ aebr_2 = "0000b30a0000RRE",
++ aer_2 = "000000003a00RR",
++ afi_2 = "c20900000000RIL-a",
+ ag_2 = "e30000000008RXY-a",
+- agr_2 = "0000b9080000RRE",
+ agf_2 = "e30000000018RXY-a",
++ agfi_2 = "c20800000000RIL-a",
+ agfr_2 = "0000b9180000RRE",
+- axbr_2 = "0000b34a0000RRE",
+- adbr_2 = "0000b31a0000RRE",
+- aebr_2 = "0000b30a0000RRE",
+ aghi_2 = "0000a70b0000RI-a",
++ agr_2 = "0000b9080000RRE",
+ ah_2 = "00004a000000RX-a",
+ ahi_2 = "0000a70a0000RI-a",
+ ahy_2 = "e3000000007aRXY-a",
+- afi_2 = "c20900000000RIL-a",
+- agfi_2 = "c20800000000RIL-a",
+ aih_2 = "cc0800000000RIL-a",
+ al_2 = "00005e000000RX-a",
+- alr_2 = "000000001e00RR",
+- aly_2 = "e3000000005eRXY-a",
+- alg_2 = "e3000000000aRXY-a",
+- algr_2 = "0000b90a0000RRE",
+- algf_2 = "e3000000001aRXY-a",
+- algfr_2 = "0000b91a0000RRE",
+- alfi_2 = "c20b00000000RIL-a",
+- algfi_2 = "c20a00000000RIL-a",
+ alc_2 = "e30000000098RXY-a",
+- alcr_2 = "0000b9980000RRE",
+ alcg_2 = "e30000000088RXY-a",
+ alcgr_2 = "0000b9880000RRE",
++ alcr_2 = "0000b9980000RRE",
++ alfi_2 = "c20b00000000RIL-a",
++ alg_2 = "e3000000000aRXY-a",
++ algf_2 = "e3000000001aRXY-a",
++ algfi_2 = "c20a00000000RIL-a",
++ algfr_2 = "0000b91a0000RRE",
++ algr_2 = "0000b90a0000RRE",
++ alr_2 = "000000001e00RR",
+ alsih_2 = "cc0a00000000RIL-a",
+ alsihn_2 = "cc0b00000000RIL-a",
+- axr_2 = "000000003600RR",
+- ad_2 = "00006a000000RX-a",
+- adr_2 = "000000002a00RR",
+- ae_2 = "00007a000000RX-a",
+- aer_2 = "000000003a00RR",
+- aw_2 = "00006e000000RX-a",
+- awr_2 = "000000002e00RR",
++ aly_2 = "e3000000005eRXY-a",
++ ap_2 = "fa0000000000SS-b",
++ ar_2 = "000000001a00RR",
+ au_2 = "00007e000000RX-a",
+ aur_2 = "000000003e00RR",
+- n_2 = "000054000000RX-a",
+- nr_2 = "000000001400RR",
+- ny_2 = "e30000000054RXY-a",
+- ng_2 = "e30000000080RXY-a",
+- ngr_2 = "0000b9800000RRE",
+- nihf_2 = "c00a00000000RIL-a",
+- nihh_2 = "0000a5040000RI-a",
+- nihl_2 = "0000a5050000RI-a",
+- nilf_2 = "c00b00000000RIL-a",
+- nilh_2 = "0000a5060000RI-a",
+- nill_2 = "0000a5070000RI-a",
++ aw_2 = "00006e000000RX-a",
++ awr_2 = "000000002e00RR",
++ axbr_2 = "0000b34a0000RRE",
++ axr_2 = "000000003600RR",
++ ay_2 = "e3000000005aRXY-a",
++ bakr_2 = "0000b2400000RRE",
+ bal_2 = "000045000000RX-a",
+ balr_2 = "000000000500RR",
+ bas_2 = "00004d000000RX-a",
+ basr_2 = "000000000d00RR",
+ bassm_2 = "000000000c00RR",
+- bsa_2 = "0000b25a0000RRE",
+- bsm_2 = "000000000b00RR",
+- bakr_2 = "0000b2400000RRE",
+- bsg_2 = "0000b2580000RRE",
++ bc_2 = "000047000000RX-b",
+ bc_2 = "000047000000RX-b",
+ bcr_2 = "000000000700RR",
+ bct_2 = "000046000000RX-a",
+- bctr_2 = "000000000600RR",
+ bctg_2 = "e30000000046RXY-a",
+ bctgr_2 = "0000b9460000RRE",
++ bctr_2 = "000000000600RR",
++ bras_2 = "0000a7050000RI-b",
++ brasl_2 = "c00500000000RIL-b",
++ brc_2 = "0000a7040000RI-c",
++ brcl_2 = "c00400000000RIL-c",
++ brcl_2 = "c00400000000RIL-c",
++ brct_2 = "0000a7060000RI-b",
++ brctg_2 = "0000a7070000RI-b",
++ brcth_2 = "cc0600000000RIL-b",
++ brxh_3 = "000084000000RSI",
++ brxhg_3 = "ec0000000044RIE-e",
++ bsa_2 = "0000b25a0000RRE",
++ bsg_2 = "0000b2580000RRE",
++ bsm_2 = "000000000b00RR",
+ bxh_3 = "000086000000RS-a",
+ bxhg_3 = "eb0000000044RSY-a",
+ bxle_3 = "000087000000RS-a",
+ bxleg_3 = "eb0000000045RSY-a",
+- brasl_2 = "c00500000000RIL-b",
+- brcl_2 = "c00400000000RIL-c",
+- brcth_2 = "cc0600000000RIL-b",
+- cksm_2 = "0000b2410000RRE",
+- km_2 = "0000b92e0000RRE",
+- kmf_2 = "0000b92a0000RRE",
+- kmc_2 = "0000b92f0000RRE",
+- kmo_2 = "0000b92b0000RRE",
+ c_2 = "000059000000RX-a",
+- cr_2 = "000000001900RR",
+- cy_2 = "e30000000059RXY-a",
+- cg_2 = "e30000000020RXY-a",
+- cgr_2 = "0000b9200000RRE",
+- cgf_2 = "e30000000030RXY-a",
+- cgfr_2 = "0000b9300000RRE",
+- cxbr_2 = "0000b3490000RRE",
+- cxtr_2 = "0000b3ec0000RRE",
+- cxr_2 = "0000b3690000RRE",
+- cdbr_2 = "0000b3190000RRE",
+- cdtr_2 = "0000b3e40000RRE",
+ cd_2 = "000069000000RX-a",
++ cdb_2 = "ed0000000019RXE",
++ cdbr_2 = "0000b3190000RRE",
++ cdfbr_2 = "0000b3950000RRE",
++ cdfbra_4 = "0000b3950000RRF-e",
++ cdfr_2 = "0000b3b50000RRE",
++ cdftr_2 = "0000b9510000RRE",
++ cdgbr_2 = "0000b3a50000RRE",
++ cdgbra_4 = "0000b3a50000RRF-e",
++ cdgr_2 = "0000b3c50000RRE",
++ cdgtr_2 = "0000b3f10000RRE",
+ cdr_2 = "000000002900RR",
+- cebr_2 = "0000b3090000RRE",
++ cds_3 = "0000bb000000RS-a",
++ cdsg_3 = "eb000000003eRSY-a",
++ cdstr_2 = "0000b3f30000RRE",
++ cdsy_3 = "eb0000000031RSY-a",
++ cdtr_2 = "0000b3e40000RRE",
++ cdutr_2 = "0000b3f20000RRE",
+ ce_2 = "000079000000RX-a",
++ ceb_2 = "ed0000000009RXE",
++ cebr_2 = "0000b3090000RRE",
++ cedtr_2 = "0000b3f40000RRE",
++ cefbr_2 = "0000b3940000RRE",
++ cefbra_4 = "0000b3940000RRF-e",
++ cefr_2 = "0000b3b40000RRE",
++ cegbr_2 = "0000b3a40000RRE",
++ cegbra_4 = "0000b3a40000RRF-e",
++ cegr_2 = "0000b3c40000RRE",
+ cer_2 = "000000003900RR",
+- kxbr_2 = "0000b3480000RRE",
+- kxtr_2 = "0000b3e80000RRE",
+- kdbr_2 = "0000b3180000RRE",
+- kdtr_2 = "0000b3e00000RRE",
+- kebr_2 = "0000b3080000RRE",
+- cs_3 = "0000ba000000RS-a",
+- csy_3 = "eb0000000014RSY-a",
+- csg_3 = "eb0000000030RSY-a",
+- csp_2 = "0000b2500000RRE",
+- cspg_2 = "0000b98a0000RRE",
+ cextr_2 = "0000b3fc0000RRE",
+- cedtr_2 = "0000b3f40000RRE",
+- cds_3 = "0000bb000000RS-a",
+- cdsy_3 = "eb0000000031RSY-a",
+- cdsg_3 = "eb000000003eRSY-a",
+- ch_2 = "000049000000RX-a",
+- chy_2 = "e30000000079RXY-a",
++ cfdbr_3 = "0000b3990000RRF-e",
++ cfdbra_4 = "0000b3990000RRF-e",
++ cfebr_3 = "0000b3980000RRF-e",
++ cfebra_4 = "0000b3980000RRF-e",
++ cfi_2 = "c20d00000000RIL-a",
++ cfxbr_3 = "0000b39a0000RRF-e",
++ cfxbra_4 = "0000b39a0000RRF-e",
++ cg_2 = "e30000000020RXY-a",
++ cgdbr_3 = "0000b3a90000RRF-e",
++ cgdbra_4 = "0000b3a90000RRF-e",
++ cgebr_3 = "0000b3a80000RRF-e",
++ cgebra_4 = "0000b3a80000RRF-e",
++ cgf_2 = "e30000000030RXY-a",
++ cgfi_2 = "c20c00000000RIL-a",
++ cgfr_2 = "0000b9300000RRE",
++ cgfrl_2 = "c60c00000000RIL-b",
+ cgh_2 = "e30000000034RXY-a",
+- chrl_2 = "c60500000000RIL-b",
++ cghi_2 = "0000a70f0000RI-a",
+ cghrl_2 = "c60400000000RIL-b",
++ cgr_2 = "0000b9200000RRE",
++ cgrl_2 = "c60800000000RIL-b",
++ cgxbr_3 = "0000b3aa0000RRF-e",
++ cgxbra_4 = "0000b3aa0000RRF-e",
++ ch_2 = "000049000000RX-a",
+ chf_2 = "e300000000cdRXY-a",
+ chhr_2 = "0000b9cd0000RRE",
++ chi_2 = "0000a70e0000RI-a",
+ chlr_2 = "0000b9dd0000RRE",
+- cfi_2 = "c20d00000000RIL-a",
+- cgfi_2 = "c20c00000000RIL-a",
++ chrl_2 = "c60500000000RIL-b",
++ chy_2 = "e30000000079RXY-a",
+ cih_2 = "cc0d00000000RIL-a",
++ cksm_2 = "0000b2410000RRE",
+ cl_2 = "000055000000RX-a",
+- clr_2 = "000000001500RR",
+- cly_2 = "e30000000055RXY-a",
++ clc_2 = "d50000000000SS-a",
++ clcl_2 = "000000000f00RR",
++ clcle_3 = "0000a9000000RS-a",
++ clclu_3 = "eb000000008fRSY-a",
++ clfi_2 = "c20f00000000RIL-a",
+ clg_2 = "e30000000021RXY-a",
+- clgr_2 = "0000b9210000RRE",
+ clgf_2 = "e30000000031RXY-a",
++ clgfi_2 = "c20e00000000RIL-a",
+ clgfr_2 = "0000b9310000RRE",
+- clmh_3 = "eb0000000020RSY-b",
+- clm_3 = "0000bd000000RS-b",
+- clmy_3 = "eb0000000021RSY-b",
++ clgfrl_2 = "c60e00000000RIL-b",
++ clghrl_2 = "c60600000000RIL-b",
++ clgr_2 = "0000b9210000RRE",
++ clgrl_2 = "c60a00000000RIL-b",
+ clhf_2 = "e300000000cfRXY-a",
+ clhhr_2 = "0000b9cf0000RRE",
+ clhlr_2 = "0000b9df0000RRE",
+- clfi_2 = "c20f00000000RIL-a",
+- clgfi_2 = "c20e00000000RIL-a",
++ clhrl_2 = "c60700000000RIL-b",
++ cli_2 = "000095000000SI",
+ clih_2 = "cc0f00000000RIL-a",
+- clcl_2 = "000000000f00RR",
+- clcle_3 = "0000a9000000RS-a",
+- clclu_3 = "eb000000008fRSY-a",
++ clm_3 = "0000bd000000RS-b",
++ clmh_3 = "eb0000000020RSY-b",
++ clmy_3 = "eb0000000021RSY-b",
++ clr_2 = "000000001500RR",
+ clrl_2 = "c60f00000000RIL-b",
+- clhrl_2 = "c60700000000RIL-b",
+- clgrl_2 = "c60a00000000RIL-b",
+- clghrl_2 = "c60600000000RIL-b",
+- clgfrl_2 = "c60e00000000RIL-b",
+ clst_2 = "0000b25d0000RRE",
++ cly_2 = "e30000000055RXY-a",
++ cmpsc_2 = "0000b2630000RRE",
++ cpya_2 = "0000b24d0000RRE",
++ cr_2 = "000000001900RR",
+ crl_2 = "c60d00000000RIL-b",
+- cgrl_2 = "c60800000000RIL-b",
+- cgfrl_2 = "c60c00000000RIL-b",
++ cs_3 = "0000ba000000RS-a",
++ csg_3 = "eb0000000030RSY-a",
++ csp_2 = "0000b2500000RRE",
++ cspg_2 = "0000b98a0000RRE",
++ csy_3 = "eb0000000014RSY-a",
++ cu41_2 = "0000b9b20000RRE",
++ cu42_2 = "0000b9b30000RRE",
++ cudtr_2 = "0000b3e20000RRE",
+ cuse_2 = "0000b2570000RRE",
+- cmpsc_2 = "0000b2630000RRE",
+- kimd_2 = "0000b93e0000RRE",
+- klmd_2 = "0000b93f0000RRE",
+- kmac_2 = "0000b91e0000RRE",
+- thdr_2 = "0000b3590000RRE",
+- thder_2 = "0000b3580000RRE",
++ cuxtr_2 = "0000b3ea0000RRE",
++ cvb_2 = "00004f000000RX-a",
++ cvbg_2 = "e3000000000eRXY-a",
++ cvby_2 = "e30000000006RXY-a",
++ cvd_2 = "00004e000000RX-a",
++ cvdg_2 = "e3000000002eRXY-a",
++ cvdy_2 = "e30000000026RXY-a",
++ cxbr_2 = "0000b3490000RRE",
+ cxfbr_2 = "0000b3960000RRE",
+- cxftr_2 = "0000b9590000RRE",
++ cxfbra_4 = "0000b3960000RRF-e",
+ cxfr_2 = "0000b3b60000RRE",
+- cdfbr_2 = "0000b3950000RRE",
+- cdftr_2 = "0000b9510000RRE",
+- cdfr_2 = "0000b3b50000RRE",
+- cefbr_2 = "0000b3940000RRE",
+- cefr_2 = "0000b3b40000RRE",
++ cxftr_2 = "0000b9590000RRE",
+ cxgbr_2 = "0000b3a60000RRE",
+- cxgtr_2 = "0000b3f90000RRE",
++ cxgbra_4 = "0000b3a60000RRF-e",
+ cxgr_2 = "0000b3c60000RRE",
+- cdgbr_2 = "0000b3a50000RRE",
+- cdgtr_2 = "0000b3f10000RRE",
+- cdgr_2 = "0000b3c50000RRE",
+- cegbr_2 = "0000b3a40000RRE",
+- cegr_2 = "0000b3c40000RRE",
++ cxgtr_2 = "0000b3f90000RRE",
++ cxr_2 = "0000b3690000RRE",
+ cxstr_2 = "0000b3fb0000RRE",
+- cdstr_2 = "0000b3f30000RRE",
++ cxtr_2 = "0000b3ec0000RRE",
+ cxutr_2 = "0000b3fa0000RRE",
+- cdutr_2 = "0000b3f20000RRE",
+- cvb_2 = "00004f000000RX-a",
+- cvby_2 = "e30000000006RXY-a",
+- cvbg_2 = "e3000000000eRXY-a",
+- cvd_2 = "00004e000000RX-a",
+- cvdy_2 = "e30000000026RXY-a",
+- cvdg_2 = "e3000000002eRXY-a",
+- cuxtr_2 = "0000b3ea0000RRE",
+- cudtr_2 = "0000b3e20000RRE",
+- cu42_2 = "0000b9b30000RRE",
+- cu41_2 = "0000b9b20000RRE",
+- cpya_2 = "0000b24d0000RRE",
++ cy_2 = "e30000000059RXY-a",
+ d_2 = "00005d000000RX-a",
+- dr_2 = "000000001d00RR",
+- dxbr_2 = "0000b34d0000RRE",
+- dxr_2 = "0000b22d0000RRE",
+- ddbr_2 = "0000b31d0000RRE",
+ dd_2 = "00006d000000RX-a",
++ ddb_2 = "ed000000001dRXE",
++ ddbr_2 = "0000b31d0000RRE",
+ ddr_2 = "000000002d00RR",
+- debr_2 = "0000b30d0000RRE",
+ de_2 = "00007d000000RX-a",
++ deb_2 = "ed000000000dRXE",
++ debr_2 = "0000b30d0000RRE",
+ der_2 = "000000003d00RR",
++ didbr_4 = "0000b35b0000RRF-b",
+ dl_2 = "e30000000097RXY-a",
+- dlr_2 = "0000b9970000RRE",
+ dlg_2 = "e30000000087RXY-a",
+ dlgr_2 = "0000b9870000RRE",
++ dlr_2 = "0000b9970000RRE",
++ dr_2 = "000000001d00RR",
+ dsg_2 = "e3000000000dRXY-a",
+- dsgr_2 = "0000b90d0000RRE",
+ dsgf_2 = "e3000000001dRXY-a",
+ dsgfr_2 = "0000b91d0000RRE",
+- x_2 = "000057000000RX-a",
+- xr_2 = "000000001700RR",
+- xy_2 = "e30000000057RXY-a",
+- xg_2 = "e30000000082RXY-a",
+- xgr_2 = "0000b9820000RRE",
+- xihf_2 = "c00600000000RIL-a",
+- xilf_2 = "c00700000000RIL-a",
+- ex_2 = "000044000000RX-a",
+- exrl_2 = "c60000000000RIL-b",
++ dsgr_2 = "0000b90d0000RRE",
++ dxbr_2 = "0000b34d0000RRE",
++ dxr_2 = "0000b22d0000RRE",
+ ear_2 = "0000b24f0000RRE",
+- esea_2 = "0000b99d0000RRE",
+- eextr_2 = "0000b3ed0000RRE",
+- eedtr_2 = "0000b3e50000RRE",
+ ecag_3 = "eb000000004cRSY-a",
++ ed_2 = "de0000000000SS-a",
++ edmk_2 = "df0000000000SS-a",
++ eedtr_2 = "0000b3e50000RRE",
++ eextr_2 = "0000b3ed0000RRE",
+ efpc_2 = "0000b38c0000RRE",
+- epar_2 = "0000b2260000RRE",
+ epair_2 = "0000b99a0000RRE",
++ epar_2 = "0000b2260000RRE",
+ epsw_2 = "0000b98d0000RRE",
+- esar_2 = "0000b2270000RRE",
+- esair_2 = "0000b99b0000RRE",
+- esxtr_2 = "0000b3ef0000RRE",
+- esdtr_2 = "0000b3e70000RRE",
+ ereg_2 = "0000b2490000RRE",
+ eregg_2 = "0000b90e0000RRE",
++ esair_2 = "0000b99b0000RRE",
++ esar_2 = "0000b2270000RRE",
++ esdtr_2 = "0000b3e70000RRE",
++ esea_2 = "0000b99d0000RRE",
+ esta_2 = "0000b24a0000RRE",
++ esxtr_2 = "0000b3ef0000RRE",
++ ex_2 = "000044000000RX-a",
++ exrl_2 = "c60000000000RIL-b",
++ fidr_2 = "0000b37f0000RRE",
++ fier_2 = "0000b3770000RRE",
++ fixr_2 = "0000b3670000RRE",
+ flogr_2 = "0000b9830000RRE",
+ hdr_2 = "000000002400RR",
+ her_2 = "000000003400RR",
+ iac_2 = "0000b2240000RRE",
+ ic_2 = "000043000000RX-a",
+- icy_2 = "e30000000073RXY-a",
+- icmh_3 = "eb0000000080RSY-b",
+ icm_3 = "0000bf000000RS-b",
++ icmh_3 = "eb0000000080RSY-b",
+ icmy_3 = "eb0000000081RSY-b",
++ icy_2 = "e30000000073RXY-a",
+ iihf_2 = "c00800000000RIL-a",
+ iihh_2 = "0000a5000000RI-a",
+ iihl_2 = "0000a5010000RI-a",
+@@ -835,439 +846,406 @@ map_op = {
+ ipm_2 = "0000b2220000RRE",
+ iske_2 = "0000b2290000RRE",
+ ivsk_2 = "0000b2230000RRE",
++ kdbr_2 = "0000b3180000RRE",
++ kdtr_2 = "0000b3e00000RRE",
++ kebr_2 = "0000b3080000RRE",
++ kimd_2 = "0000b93e0000RRE",
++ klmd_2 = "0000b93f0000RRE",
++ km_2 = "0000b92e0000RRE",
++ kmac_2 = "0000b91e0000RRE",
++ kmc_2 = "0000b92f0000RRE",
++ kmf_2 = "0000b92a0000RRE",
++ kmo_2 = "0000b92b0000RRE",
++ kxbr_2 = "0000b3480000RRE",
++ kxtr_2 = "0000b3e80000RRE",
+ l_2 = "000058000000RX-a",
+- lr_2 = "000000001800RR",
+- ly_2 = "e30000000058RXY-a",
+- lg_2 = "e30000000004RXY-a",
+- lgr_2 = "0000b9040000RRE",
+- lgf_2 = "e30000000014RXY-a",
+- lgfr_2 = "0000b9140000RRE",
+- lghi_2 = "0000a7090000RI-a",
+- lxr_2 = "0000b3650000RRE",
+- ld_2 = "000068000000RX-a",
+- ldr_2 = "000000002800RR",
+- ldy_2 = "ed0000000065RXY-a",
+- le_2 = "000078000000RX-a",
+- ler_2 = "000000003800RR",
+- ley_2 = "ed0000000064RXY-a",
+- lam_3 = "00009a000000RS-a",
+- lamy_3 = "eb000000009aRSY-a",
+ la_2 = "000041000000RX-a",
+- lay_2 = "e30000000071RXY-a",
+- lae_2 = "000051000000RX-a",
+- laey_2 = "e30000000075RXY-a",
+- larl_2 = "c00000000000RIL-b",
+ laa_3 = "eb00000000f8RSY-a",
+ laag_3 = "eb00000000e8RSY-a",
+ laal_3 = "eb00000000faRSY-a",
+ laalg_3 = "eb00000000eaRSY-a",
++ lae_2 = "000051000000RX-a",
++ laey_2 = "e30000000075RXY-a",
++ lam_3 = "00009a000000RS-a",
++ lamy_3 = "eb000000009aRSY-a",
+ lan_3 = "eb00000000f4RSY-a",
+ lang_3 = "eb00000000e4RSY-a",
+- lax_3 = "eb00000000f7RSY-a",
+- laxg_3 = "eb00000000e7RSY-a",
+ lao_3 = "eb00000000f6RSY-a",
+ laog_3 = "eb00000000e6RSY-a",
+- lt_2 = "e30000000012RXY-a",
+- ltr_2 = "000000001200RR",
+- ltg_2 = "e30000000002RXY-a",
+- ltgr_2 = "0000b9020000RRE",
+- ltgf_2 = "e30000000032RXY-a",
+- ltgfr_2 = "0000b9120000RRE",
+- ltxbr_2 = "0000b3420000RRE",
+- ltxtr_2 = "0000b3de0000RRE",
+- ltxr_2 = "0000b3620000RRE",
+- ltdbr_2 = "0000b3120000RRE",
+- ltdtr_2 = "0000b3d60000RRE",
+- ltdr_2 = "000000002200RR",
+- ltebr_2 = "0000b3020000RRE",
+- lter_2 = "000000003200RR",
++ larl_2 = "c00000000000RIL-b",
++ lax_3 = "eb00000000f7RSY-a",
++ laxg_3 = "eb00000000e7RSY-a",
++ lay_2 = "e30000000071RXY-a",
+ lb_2 = "e30000000076RXY-a",
+- lbr_2 = "0000b9260000RRE",
+- lgb_2 = "e30000000077RXY-a",
+- lgbr_2 = "0000b9060000RRE",
+ lbh_2 = "e300000000c0RXY-a",
+- lcr_2 = "000000001300RR",
+- lcgr_2 = "0000b9030000RRE",
+- lcgfr_2 = "0000b9130000RRE",
+- lcxbr_2 = "0000b3430000RRE",
+- lcxr_2 = "0000b3630000RRE",
++ lbr_2 = "0000b9260000RRE",
+ lcdbr_2 = "0000b3130000RRE",
+- lcdr_2 = "000000002300RR",
+ lcdfr_2 = "0000b3730000RRE",
++ lcdr_2 = "000000002300RR",
+ lcebr_2 = "0000b3030000RRE",
+ lcer_2 = "000000003300RR",
++ lcgfr_2 = "0000b9130000RRE",
++ lcgr_2 = "0000b9030000RRE",
++ lcr_2 = "000000001300RR",
+ lctl_3 = "0000b7000000RS-a",
+ lctlg_3 = "eb000000002fRSY-a",
+- fixr_2 = "0000b3670000RRE",
+- fidr_2 = "0000b37f0000RRE",
+- fier_2 = "0000b3770000RRE",
++ lcxbr_2 = "0000b3430000RRE",
++ lcxr_2 = "0000b3630000RRE",
++ ld_2 = "000068000000RX-a",
++ ldebr_2 = "0000b3040000RRE",
++ lder_2 = "0000b3240000RRE",
+ ldgr_2 = "0000b3c10000RRE",
++ ldr_2 = "000000002800RR",
++ ldxbr_2 = "0000b3450000RRE",
++ ldxr_2 = "000000002500RR",
++ ldy_2 = "ed0000000065RXY-a",
++ le_2 = "000078000000RX-a",
++ ledbr_2 = "0000b3440000RRE",
++ ledr_2 = "000000003500RR",
++ ler_2 = "000000003800RR",
++ lexbr_2 = "0000b3460000RRE",
++ lexr_2 = "0000b3660000RRE",
++ ley_2 = "ed0000000064RXY-a",
++ lfh_2 = "e300000000caRXY-a",
++ lg_2 = "e30000000004RXY-a",
++ lgb_2 = "e30000000077RXY-a",
++ lgbr_2 = "0000b9060000RRE",
+ lgdr_2 = "0000b3cd0000RRE",
+- lh_2 = "000048000000RX-a",
+- lhr_2 = "0000b9270000RRE",
+- lhy_2 = "e30000000078RXY-a",
++ lgf_2 = "e30000000014RXY-a",
++ lgfi_2 = "c00100000000RIL-a",
++ lgfr_2 = "0000b9140000RRE",
++ lgfrl_2 = "c40c00000000RIL-b",
+ lgh_2 = "e30000000015RXY-a",
++ lghi_2 = "0000a7090000RI-a",
+ lghr_2 = "0000b9070000RRE",
++ lghrl_2 = "c40400000000RIL-b",
++ lgr_2 = "0000b9040000RRE",
++ lgrl_2 = "c40800000000RIL-b",
++ lh_2 = "000048000000RX-a",
+ lhh_2 = "e300000000c4RXY-a",
+ lhi_2 = "0000a7080000RI-a",
++ lhr_2 = "0000b9270000RRE",
+ lhrl_2 = "c40500000000RIL-b",
+- lghrl_2 = "c40400000000RIL-b",
+- lfh_2 = "e300000000caRXY-a",
+- lgfi_2 = "c00100000000RIL-a",
+- lxdbr_2 = "0000b3050000RRE",
+- lxdr_2 = "0000b3250000RRE",
+- lxebr_2 = "0000b3060000RRE",
+- lxer_2 = "0000b3260000RRE",
+- ldebr_2 = "0000b3040000RRE",
+- lder_2 = "0000b3240000RRE",
+- llgf_2 = "e30000000016RXY-a",
+- llgfr_2 = "0000b9160000RRE",
++ lhy_2 = "e30000000078RXY-a",
+ llc_2 = "e30000000094RXY-a",
++ llch_2 = "e300000000c2RXY-a",
+ llcr_2 = "0000b9940000RRE",
+ llgc_2 = "e30000000090RXY-a",
+ llgcr_2 = "0000b9840000RRE",
+- llch_2 = "e300000000c2RXY-a",
+- llh_2 = "e30000000095RXY-a",
+- llhr_2 = "0000b9950000RRE",
++ llgf_2 = "e30000000016RXY-a",
++ llgfr_2 = "0000b9160000RRE",
++ llgfrl_2 = "c40e00000000RIL-b",
+ llgh_2 = "e30000000091RXY-a",
+ llghr_2 = "0000b9850000RRE",
++ llghrl_2 = "c40600000000RIL-b",
++ llgt_2 = "e30000000017RXY-a",
++ llgtr_2 = "0000b9170000RRE",
++ llh_2 = "e30000000095RXY-a",
+ llhh_2 = "e300000000c6RXY-a",
++ llhr_2 = "0000b9950000RRE",
+ llhrl_2 = "c40200000000RIL-b",
+- llghrl_2 = "c40600000000RIL-b",
+ llihf_2 = "c00e00000000RIL-a",
+ llihh_2 = "0000a50c0000RI-a",
+ llihl_2 = "0000a50d0000RI-a",
+ llilf_2 = "c00f00000000RIL-a",
+ llilh_2 = "0000a50e0000RI-a",
+ llill_2 = "0000a50f0000RI-a",
+- llgfrl_2 = "c40e00000000RIL-b",
+- llgt_2 = "e30000000017RXY-a",
+- llgtr_2 = "0000b9170000RRE",
+ lm_3 = "000098000000RS-a",
+- lmy_3 = "eb0000000098RSY-a",
+ lmg_3 = "eb0000000004RSY-a",
+ lmh_3 = "eb0000000096RSY-a",
+- lnr_2 = "000000001100RR",
+- lngr_2 = "0000b9010000RRE",
+- lngfr_2 = "0000b9110000RRE",
+- lnxbr_2 = "0000b3410000RRE",
+- lnxr_2 = "0000b3610000RRE",
++ lmy_3 = "eb0000000098RSY-a",
+ lndbr_2 = "0000b3110000RRE",
+- lndr_2 = "000000002100RR",
+ lndfr_2 = "0000b3710000RRE",
++ lndr_2 = "000000002100RR",
+ lnebr_2 = "0000b3010000RRE",
+ lner_2 = "000000003100RR",
++ lngfr_2 = "0000b9110000RRE",
++ lngr_2 = "0000b9010000RRE",
++ lnr_2 = "000000001100RR",
++ lnxbr_2 = "0000b3410000RRE",
++ lnxr_2 = "0000b3610000RRE",
+ loc_3 = "eb00000000f2RSY-b",
+ locg_3 = "eb00000000e2RSY-b",
+- lpq_2 = "e3000000008fRXY-a",
+- lpr_2 = "000000001000RR",
+- lpgr_2 = "0000b9000000RRE",
+- lpgfr_2 = "0000b9100000RRE",
+- lpxbr_2 = "0000b3400000RRE",
+- lpxr_2 = "0000b3600000RRE",
+ lpdbr_2 = "0000b3100000RRE",
+- lpdr_2 = "000000002000RR",
+ lpdfr_2 = "0000b3700000RRE",
++ lpdr_2 = "000000002000RR",
+ lpebr_2 = "0000b3000000RRE",
+ lper_2 = "000000003000RR",
++ lpgfr_2 = "0000b9100000RRE",
++ lpgr_2 = "0000b9000000RRE",
++ lpq_2 = "e3000000008fRXY-a",
++ lpr_2 = "000000001000RR",
++ lpxbr_2 = "0000b3400000RRE",
++ lpxr_2 = "0000b3600000RRE",
++ lr_2 = "000000001800RR",
+ lra_2 = "0000b1000000RX-a",
+- lray_2 = "e30000000013RXY-a",
+ lrag_2 = "e30000000003RXY-a",
++ lray_2 = "e30000000013RXY-a",
++ lrdr_2 = "000000002500RR",
++ lrer_2 = "000000003500RR",
+ lrl_2 = "c40d00000000RIL-b",
+- lgrl_2 = "c40800000000RIL-b",
+- lgfrl_2 = "c40c00000000RIL-b",
+- lrvh_2 = "e3000000001fRXY-a",
+ lrv_2 = "e3000000001eRXY-a",
+- lrvr_2 = "0000b91f0000RRE",
+ lrvg_2 = "e3000000000fRXY-a",
+ lrvgr_2 = "0000b90f0000RRE",
+- ldxbr_2 = "0000b3450000RRE",
+- ldxr_2 = "000000002500RR",
+- lrdr_2 = "000000002500RR",
+- lexbr_2 = "0000b3460000RRE",
+- lexr_2 = "0000b3660000RRE",
+- ledbr_2 = "0000b3440000RRE",
+- ledr_2 = "000000003500RR",
+- lrer_2 = "000000003500RR",
++ lrvh_2 = "e3000000001fRXY-a",
++ lrvr_2 = "0000b91f0000RRE",
++ lt_2 = "e30000000012RXY-a",
++ ltdbr_2 = "0000b3120000RRE",
++ ltdr_2 = "000000002200RR",
++ ltdtr_2 = "0000b3d60000RRE",
++ ltebr_2 = "0000b3020000RRE",
++ lter_2 = "000000003200RR",
++ ltg_2 = "e30000000002RXY-a",
++ ltgf_2 = "e30000000032RXY-a",
++ ltgfr_2 = "0000b9120000RRE",
++ ltgr_2 = "0000b9020000RRE",
++ ltr_2 = "000000001200RR",
++ ltxbr_2 = "0000b3420000RRE",
++ ltxr_2 = "0000b3620000RRE",
++ ltxtr_2 = "0000b3de0000RRE",
+ lura_2 = "0000b24b0000RRE",
+ lurag_2 = "0000b9050000RRE",
+- lzxr_2 = "0000b3760000RRE",
++ lxdbr_2 = "0000b3050000RRE",
++ lxdr_2 = "0000b3250000RRE",
++ lxebr_2 = "0000b3060000RRE",
++ lxer_2 = "0000b3260000RRE",
++ lxr_2 = "0000b3650000RRE",
++ ly_2 = "e30000000058RXY-a",
+ lzdr_2 = "0000b3750000RRE",
+ lzer_2 = "0000b3740000RRE",
+- msta_2 = "0000b2470000RRE",
+- mvcl_2 = "000000000e00RR",
+- mvcle_3 = "0000a8000000RS-a",
+- mvclu_3 = "eb000000008eRSY-a",
+- mvpg_2 = "0000b2540000RRE",
+- mvst_2 = "0000b2550000RRE",
++ lzxr_2 = "0000b3760000RRE",
+ m_2 = "00005c000000RX-a",
+- mfy_2 = "e3000000005cRXY-a",
+- mr_2 = "000000001c00RR",
+- mxbr_2 = "0000b34c0000RRE",
+- mxr_2 = "000000002600RR",
+- mdbr_2 = "0000b31c0000RRE",
++ madb_3 = "ed000000001eRXF",
++ maeb_3 = "ed000000000eRXF",
++ maebr_3 = "0000b30e0000RRD",
++ maer_3 = "0000b32e0000RRD",
+ md_2 = "00006c000000RX-a",
+- mdr_2 = "000000002c00RR",
+- mxdbr_2 = "0000b3070000RRE",
+- mxd_2 = "000067000000RX-a",
+- mxdr_2 = "000000002700RR",
+- meebr_2 = "0000b3170000RRE",
+- meer_2 = "0000b3370000RRE",
+- mdebr_2 = "0000b30c0000RRE",
++ mdb_2 = "ed000000001cRXE",
++ mdbr_2 = "0000b31c0000RRE",
+ mde_2 = "00007c000000RX-a",
++ mdeb_2 = "ed000000000cRXE",
++ mdebr_2 = "0000b30c0000RRE",
+ mder_2 = "000000003c00RR",
++ mdr_2 = "000000002c00RR",
+ me_2 = "00007c000000RX-a",
++ meeb_2 = "ed0000000017RXE",
++ meebr_2 = "0000b3170000RRE",
++ meer_2 = "0000b3370000RRE",
+ mer_2 = "000000003c00RR",
++ mfy_2 = "e3000000005cRXY-a",
++ mghi_2 = "0000a70d0000RI-a",
+ mh_2 = "00004c000000RX-a",
++ mhi_2 = "0000a70c0000RI-a",
+ mhy_2 = "e3000000007cRXY-a",
++ ml_2 = "e30000000096RXY-a",
+ mlg_2 = "e30000000086RXY-a",
+ mlgr_2 = "0000b9860000RRE",
+- ml_2 = "e30000000096RXY-a",
+ mlr_2 = "0000b9960000RRE",
++ mr_2 = "000000001c00RR",
+ ms_2 = "000071000000RX-a",
+- msr_2 = "0000b2520000RRE",
+- msy_2 = "e30000000051RXY-a",
++ msfi_2 = "c20100000000RIL-a",
+ msg_2 = "e3000000000cRXY-a",
+- msgr_2 = "0000b90c0000RRE",
+ msgf_2 = "e3000000001cRXY-a",
+- msgfr_2 = "0000b91c0000RRE",
+- msfi_2 = "c20100000000RIL-a",
+ msgfi_2 = "c20000000000RIL-a",
+- maer_3 = "0000b32e0000RRD",
++ msgfr_2 = "0000b91c0000RRE",
++ msgr_2 = "0000b90c0000RRE",
++ msr_2 = "0000b2520000RRE",
++ msta_2 = "0000b2470000RRE",
++ msy_2 = "e30000000051RXY-a",
++ mvc_2 = "d20000000000SS-a",
++ mvcin_2 = "e80000000000SS-a",
++ mvcl_2 = "000000000e00RR",
++ mvcle_3 = "0000a8000000RS-a",
++ mvclu_3 = "eb000000008eRSY-a",
++ mvghi_2 = "e54800000000SIL",
+ mvhhi_2 = "e54400000000SIL",
+ mvhi_2 = "e54c00000000SIL",
+- mvghi_2 = "e54800000000SIL",
++ mvi_2 = "000092000000SI",
++ mvn_2 = "d10000000000SS-a",
++ mvpg_2 = "0000b2540000RRE",
++ mvst_2 = "0000b2550000RRE",
++ mvz_2 = "d30000000000SS-a",
++ mxbr_2 = "0000b34c0000RRE",
++ mxd_2 = "000067000000RX-a",
++ mxdb_2 = "ed0000000007RXE",
++ mxdbr_2 = "0000b3070000RRE",
++ mxdr_2 = "000000002700RR",
++ mxr_2 = "000000002600RR",
++ n_2 = "000054000000RX-a",
++ nc_2 = "d40000000000SS-a",
++ ng_2 = "e30000000080RXY-a",
++ ngr_2 = "0000b9800000RRE",
++ ni_2 = "000094000000SI",
++ nihf_2 = "c00a00000000RIL-a",
++ nihh_2 = "0000a5040000RI-a",
++ nihl_2 = "0000a5050000RI-a",
++ nilf_2 = "c00b00000000RIL-a",
++ nilh_2 = "0000a5060000RI-a",
++ nill_2 = "0000a5070000RI-a",
++ nr_2 = "000000001400RR",
++ ny_2 = "e30000000054RXY-a",
+ o_2 = "000056000000RX-a",
+- or_2 = "000000001600RR",
+- oy_2 = "e30000000056RXY-a",
++ oc_2 = "d60000000000SS-a",
+ og_2 = "e30000000081RXY-a",
+ ogr_2 = "0000b9810000RRE",
++ oi_2 = "000096000000SI",
+ oihf_2 = "c00c00000000RIL-a",
+ oihh_2 = "0000a5080000RI-a",
+ oihl_2 = "0000a5090000RI-a",
+ oilf_2 = "c00d00000000RIL-a",
+ oilh_2 = "0000a50a0000RI-a",
+ oill_2 = "0000a50b0000RI-a",
+- pgin_2 = "0000b22e0000RRE",
+- pgout_2 = "0000b22f0000RRE",
++ or_2 = "000000001600RR",
++ oy_2 = "e30000000056RXY-a",
++ palb_2 = "0000b2480000RRE",
+ pcc_2 = "0000b92c0000RRE",
+ pckmo_2 = "0000b9280000RRE",
+- pfmf_2 = "0000b9af0000RRE",
+- ptf_2 = "0000b9a20000RRE",
+- popcnt_2 = "0000b9e10000RRE",
+ pfd_2 = "e30000000036m",
+ pfdrl_2 = "c60200000000RIL-c",
++ pfmf_2 = "0000b9af0000RRE",
++ pgin_2 = "0000b22e0000RRE",
++ pgout_2 = "0000b22f0000RRE",
++ popcnt_2 = "0000b9e10000RRE",
+ pt_2 = "0000b2280000RRE",
++ ptf_2 = "0000b9a20000RRE",
+ pti_2 = "0000b99e0000RRE",
+- palb_2 = "0000b2480000RRE",
+- rrbe_2 = "0000b22a0000RRE",
+- rrbm_2 = "0000b9ae0000RRE",
+ rll_3 = "eb000000001dRSY-a",
+ rllg_3 = "eb000000001cRSY-a",
+- srst_2 = "0000b25e0000RRE",
+- srstu_2 = "0000b9be0000RRE",
++ rrbe_2 = "0000b22a0000RRE",
++ rrbm_2 = "0000b9ae0000RRE",
++ s_2 = "00005b000000RX-a",
+ sar_2 = "0000b24e0000RRE",
+- sfpc_2 = "0000b3840000RRE",
++ sd_2 = "00006b000000RX-a",
++ sdb_2 = "ed000000001bRXE",
++ sdbr_2 = "0000b31b0000RRE",
++ sdr_2 = "000000002b00RR",
++ se_2 = "00007b000000RX-a",
++ seb_2 = "ed000000000bRXE",
++ sebr_2 = "0000b30b0000RRE",
++ ser_2 = "000000003b00RR",
+ sfasr_2 = "0000b3850000RRE",
+- spm_2 = "000000000400RR",
+- ssar_2 = "0000b2250000RRE",
+- ssair_2 = "0000b99f0000RRE",
+- slda_2 = "00008f000000RS-a",
+- sldl_2 = "00008d000000RS-a",
++ sfpc_2 = "0000b3840000RRE",
++ sg_2 = "e30000000009RXY-a",
++ sgf_2 = "e30000000019RXY-a",
++ sgfr_2 = "0000b9190000RRE",
++ sgr_2 = "0000b9090000RRE",
++ sh_2 = "00004b000000RX-a",
++ shy_2 = "e3000000007bRXY-a",
++ sl_2 = "00005f000000RX-a",
+ sla_2 = "00008b000000RS-a",
+- slak_3 = "eb00000000ddRSY-a",
+ slag_3 = "eb000000000bRSY-a",
++ slak_3 = "eb00000000ddRSY-a",
++ slb_2 = "e30000000099RXY-a",
++ slbg_2 = "e30000000089RXY-a",
++ slbgr_2 = "0000b9890000RRE",
++ slbr_2 = "0000b9990000RRE",
++ slda_2 = "00008f000000RS-a",
++ sldl_2 = "00008d000000RS-a",
++ slfi_2 = "c20500000000RIL-a",
++ slg_2 = "e3000000000bRXY-a",
++ slgf_2 = "e3000000001bRXY-a",
++ slgfi_2 = "c20400000000RIL-a",
++ slgfr_2 = "0000b91b0000RRE",
++ slgr_2 = "0000b90b0000RRE",
+ sll_2 = "000089000000RS-a",
+- sllk_3 = "eb00000000dfRSY-a",
+ sllg_3 = "eb000000000dRSY-a",
+- srda_2 = "00008e000000RS-a",
+- srdl_2 = "00008c000000RS-a",
+- sra_2 = "00008a000000RS-a",
+- srak_3 = "eb00000000dcRSY-a",
+- srag_3 = "eb000000000aRSY-a",
+- srl_2 = "000088000000RS-a",
+- srlk_3 = "eb00000000deRSY-a",
+- srlg_3 = "eb000000000cRSY-a",
+- sqxbr_2 = "0000b3160000RRE",
+- sqxr_2 = "0000b3360000RRE",
++ sllk_3 = "eb00000000dfRSY-a",
++ slr_2 = "000000001f00RR",
++ sly_2 = "e3000000005fRXY-a",
++ spm_2 = "000000000400RR",
++ sqdb_2 = "ed0000000015RXE",
+ sqdbr_2 = "0000b3150000RRE",
+ sqdr_2 = "0000b2440000RRE",
++ sqeb_2 = "ed0000000014RXE",
+ sqebr_2 = "0000b3140000RRE",
+ sqer_2 = "0000b2450000RRE",
++ sqxbr_2 = "0000b3160000RRE",
++ sqxr_2 = "0000b3360000RRE",
++ sr_2 = "000000001b00RR",
++ sra_2 = "00008a000000RS-a",
++ srag_3 = "eb000000000aRSY-a",
++ srak_3 = "eb00000000dcRSY-a",
++ srda_2 = "00008e000000RS-a",
++ srdl_2 = "00008c000000RS-a",
++ srl_2 = "000088000000RS-a",
++ srlg_3 = "eb000000000cRSY-a",
++ srlk_3 = "eb00000000deRSY-a",
++ srst_2 = "0000b25e0000RRE",
++ srstu_2 = "0000b9be0000RRE",
++ ssair_2 = "0000b99f0000RRE",
++ ssar_2 = "0000b2250000RRE",
+ st_2 = "000050000000RX-a",
+- sty_2 = "e30000000050RXY-a",
+- stg_2 = "e30000000024RXY-a",
+- std_2 = "000060000000RX-a",
+- stdy_2 = "ed0000000067RXY-a",
+- ste_2 = "000070000000RX-a",
+- stey_2 = "ed0000000066RXY-a",
+ stam_3 = "00009b000000RS-a",
+ stamy_3 = "eb000000009bRSY-a",
+ stc_2 = "000042000000RX-a",
+- stcy_2 = "e30000000072RXY-a",
+ stch_2 = "e300000000c3RXY-a",
+- stcmh_3 = "eb000000002cRSY-b",
+ stcm_3 = "0000be000000RS-b",
++ stcmh_3 = "eb000000002cRSY-b",
+ stcmy_3 = "eb000000002dRSY-b",
+- stctl_3 = "0000b6000000RS-a",
+ stctg_3 = "eb0000000025RSY-a",
++ stctl_3 = "0000b6000000RS-a",
++ stcy_2 = "e30000000072RXY-a",
++ std_2 = "000060000000RX-a",
++ stdy_2 = "ed0000000067RXY-a",
++ ste_2 = "000070000000RX-a",
++ stey_2 = "ed0000000066RXY-a",
++ stfh_2 = "e300000000cbRXY-a",
++ stfl_1 = "0000b2b10000S",
++ stg_2 = "e30000000024RXY-a",
++ stgrl_2 = "c40b00000000RIL-b",
+ sth_2 = "000040000000RX-a",
+- sthy_2 = "e30000000070RXY-a",
+ sthh_2 = "e300000000c7RXY-a",
+ sthrl_2 = "c40700000000RIL-b",
+- stfh_2 = "e300000000cbRXY-a",
++ sthy_2 = "e30000000070RXY-a",
+ stm_3 = "000090000000RS-a",
+- stmy_3 = "eb0000000090RSY-a",
+ stmg_3 = "eb0000000024RSY-a",
+ stmh_3 = "eb0000000026RSY-a",
++ stmy_3 = "eb0000000090RSY-a",
+ stoc_3 = "eb00000000f3RSY-b",
+ stocg_3 = "eb00000000e3RSY-b",
+ stpq_2 = "e3000000008eRXY-a",
+ strl_2 = "c40f00000000RIL-b",
+- stgrl_2 = "c40b00000000RIL-b",
+- strvh_2 = "e3000000003fRXY-a",
+ strv_2 = "e3000000003eRXY-a",
+ strvg_2 = "e3000000002fRXY-a",
++ strvh_2 = "e3000000003fRXY-a",
+ stura_2 = "0000b2460000RRE",
+ sturg_2 = "0000b9250000RRE",
+- s_2 = "00005b000000RX-a",
+- sr_2 = "000000001b00RR",
+- sy_2 = "e3000000005bRXY-a",
+- sg_2 = "e30000000009RXY-a",
+- sgr_2 = "0000b9090000RRE",
+- sgf_2 = "e30000000019RXY-a",
+- sgfr_2 = "0000b9190000RRE",
+- sxbr_2 = "0000b34b0000RRE",
+- sdbr_2 = "0000b31b0000RRE",
+- sebr_2 = "0000b30b0000RRE",
+- sh_2 = "00004b000000RX-a",
+- shy_2 = "e3000000007bRXY-a",
+- sl_2 = "00005f000000RX-a",
+- slr_2 = "000000001f00RR",
+- sly_2 = "e3000000005fRXY-a",
+- slg_2 = "e3000000000bRXY-a",
+- slgr_2 = "0000b90b0000RRE",
+- slgf_2 = "e3000000001bRXY-a",
+- slgfr_2 = "0000b91b0000RRE",
+- slfi_2 = "c20500000000RIL-a",
+- slgfi_2 = "c20400000000RIL-a",
+- slb_2 = "e30000000099RXY-a",
+- slbr_2 = "0000b9990000RRE",
+- slbg_2 = "e30000000089RXY-a",
+- slbgr_2 = "0000b9890000RRE",
+- sxr_2 = "000000003700RR",
+- sd_2 = "00006b000000RX-a",
+- sdr_2 = "000000002b00RR",
+- se_2 = "00007b000000RX-a",
+- ser_2 = "000000003b00RR",
++ sty_2 = "e30000000050RXY-a",
+ su_2 = "00007f000000RX-a",
+ sur_2 = "000000003f00RR",
++ svc_1 = "000000000a00I",
+ sw_2 = "00006f000000RX-a",
+ swr_2 = "000000002f00RR",
++ sxbr_2 = "0000b34b0000RRE",
++ sxr_2 = "000000003700RR",
++ sy_2 = "e3000000005bRXY-a",
+ tar_2 = "0000b24c0000RRE",
+ tb_2 = "0000b22c0000RRE",
++ thder_2 = "0000b3580000RRE",
++ thdr_2 = "0000b3590000RRE",
++ tm_2 = "000091000000SI",
+ tmhh_2 = "0000a7020000RI-a",
+ tmhl_2 = "0000a7030000RI-a",
+ tmlh_2 = "0000a7000000RI-a",
+ tmll_2 = "0000a7010000RI-a",
++ tmy_2 = "eb0000000051SIY",
++ tr_2 = "dc0000000000SS-a",
+ trace_3 = "000099000000RS-a",
+ tracg_3 = "eb000000000fRSY-a",
+ tre_2 = "0000b2a50000RRE",
+-
+-
+- -- SS-a instructions
+- clc_2 = "d50000000000SS-a",
+- ed_2 = "de0000000000SS-a",
+- edmk_2 = "df0000000000SS-a",
+- mvc_2 = "d20000000000SS-a",
+- mvcin_2 = "e80000000000SS-a",
+- mvn_2 = "d10000000000SS-a",
+- mvz_2 = "d30000000000SS-a",
+- nc_2 = "d40000000000SS-a",
+- oc_2 = "d60000000000SS-a",
+- tr_2 = "dc0000000000SS-a",
+ trt_2 = "dd0000000000SS-a",
+ trtr_2 = "d00000000000SS-a",
+ unpka_2 = "ea0000000000SS-a",
+ unpku_2 = "e20000000000SS-a",
++ x_2 = "000057000000RX-a",
+ xc_2 = "d70000000000SS-a",
+- ap_2 = "fa0000000000SS-b",
+- -- RRF-e instructions
+- cfebr_3 = "0000b3980000RRF-e",
+- cfebra_4 = "0000b3980000RRF-e",
+- cfdbr_3 = "0000b3990000RRF-e",
+- cfdbra_4 = "0000b3990000RRF-e",
+- cfxbr_3 = "0000b39a0000RRF-e",
+- cfxbra_4 = "0000b39a0000RRF-e",
+- cgebr_3 = "0000b3a80000RRF-e",
+- cgebra_4 = "0000b3a80000RRF-e",
+- cgdbr_3 = "0000b3a90000RRF-e",
+- cgdbra_4 = "0000b3a90000RRF-e",
+- cgxbr_3 = "0000b3aa0000RRF-e",
+- cgxbra_4 = "0000b3aa0000RRF-e",
+- cefbra_4 = "0000b3940000RRF-e",
+- cdfbra_4 = "0000b3950000RRF-e",
+- cxfbra_4 = "0000b3960000RRF-e",
+- cegbra_4 = "0000b3a40000RRF-e",
+- cdgbra_4 = "0000b3a50000RRF-e",
+- cxgbra_4 = "0000b3a60000RRF-e",
+- -- RXE instructions
+- adb_2 = "ed000000001aRXE",
+- aeb_2 = "ed000000000aRXE",
+- cdb_2 = "ed0000000019RXE",
+- ceb_2 = "ed0000000009RXE",
+- ddb_2 = "ed000000001dRXE",
+- deb_2 = "ed000000000dRXE",
+- mdb_2 = "ed000000001cRXE",
+- mdeb_2 = "ed000000000cRXE",
+- meeb_2 = "ed0000000017RXE",
+- mxdb_2 = "ed0000000007RXE",
+- sqdb_2 = "ed0000000015RXE",
+- sqeb_2 = "ed0000000014RXE",
+- sdb_2 = "ed000000001bRXE",
+- seb_2 = "ed000000000bRXE",
+- -- RRF-b instructions
+- didbr_4 = "0000b35b0000RRF-b",
+- -- S mode instructions
+- stfl_1 = "0000b2b10000S",
+- -- I- mode instructions
+- svc_1 = "000000000a00I",
+- -- RI-a mode instructions
+- -- TODO: change "i" to "RI-a"
+- chi_2 = "0000a70e0000RI-a",
+- cghi_2 = "0000a70f0000RI-a",
+- mhi_2 = "0000a70c0000RI-a",
+- mghi_2 = "0000a70d0000RI-a",
+- -- RI-b mode instructions
+- bras_2 = "0000a7050000RI-b",
+- brct_2 = "0000a7060000RI-b",
+- brctg_2 = "0000a7070000RI-b",
+- -- RI-c mode instructions
+- brc_2 = "0000a7040000RI-c",
+- -- RIL-c
+- brcl_2 = "c00400000000RIL-c",
+- -- RX-b mode instructions
+- bc_2 = "000047000000RX-b",
+- -- RSI
+- brxh_3 = "000084000000RSI",
+- -- RIE-e
+- brxhg_3 = "ec0000000044RIE-e",
+- -- SI
+- cli_2 = "000095000000SI",
+- mvi_2 = "000092000000SI",
+- ni_2 = "000094000000SI",
+- tm_2 = "000091000000SI",
++ xg_2 = "e30000000082RXY-a",
++ xgr_2 = "0000b9820000RRE",
+ xi_2 = "000097000000SI",
+- oi_2 = "000096000000SI",
+- -- SIY
+- tmy_2 = "eb0000000051SIY",
+- -- RXF
+- madb_3 = "ed000000001eRXF",
+- maeb_3 = "ed000000000eRXF",
+- -- RRD
+- maebr_3 = "0000b30e0000RRD",
+- -- RS-b
+- clm_3 = "0000bd000000RS-b"
++ xihf_2 = "c00600000000RIL-a",
++ xilf_2 = "c00700000000RIL-a",
++ xr_2 = "000000001700RR",
++ xy_2 = "e30000000057RXY-a",
+ }
+ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+
+From 7352e706fb0fe48959a769f594dfd88f312a13e3 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 31 Jan 2017 15:30:54 -0500
+Subject: [PATCH 255/260] Use real encoding names for extended mnemonics.
+
+---
+ dynasm/dasm_s390x.lua | 54 ++++++++++++++++++-------------------------
+ 1 file changed, 22 insertions(+), 32 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index ced4f9f78..62aa7bc39 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1251,13 +1251,13 @@ for cond,c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+ -- TODO: replace 'B' with correct encoding.
+ -- brc
+- map_op["j"..cond.."_1"] = "0000"..tohex(0xa7040000+shl(c, 20)).."w"
++ map_op["j"..cond.."_1"] = "0000"..tohex(0xa7040000+shl(c, 20)).."RI-c"
+ -- brcl
+- map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c, 20)).."0000".."x"
++ map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c, 20)).."0000".."RIL-c"
+ -- bc
+- map_op["b"..cond.."_1"] = "0000"..tohex(0x47000000+shl(c, 20)).."y"
++ map_op["b"..cond.."_1"] = "0000"..tohex(0x47000000+shl(c, 20)).."RX-b"
+ -- bcr
+- map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c, 4)).."z"
++ map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c, 4)).."RR"
+ end
+ ------------------------------------------------------------------------------
+ -- Handle opcodes defined with template strings.
+@@ -1271,7 +1271,10 @@ local function parse_template(params, template, nparams, pos)
+ -- Process each character.
+ local p = sub(template, 13)
+ if p == "RR" then
+- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ if #params > 1 then
++ op2 = op2 + shl(parse_reg(params[1]),4)
++ end
++ op2 = op2 + parse_reg(params[#params])
+ wputhw(op2)
+ elseif p == "RRE" then
+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+@@ -1286,7 +1289,6 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1); wputhw(op2);
+ if a then a() end
+- elseif p == "RX-b" then
+ elseif p == "RXY-a" then
+ local d, x, b, a = parse_mem_bxy(params[2])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
+@@ -1294,8 +1296,6 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+ if a then a() end
+- elseif p == "m" then
+-
+ elseif p == "RIL-a" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+@@ -1395,18 +1395,25 @@ local function parse_template(params, template, nparams, pos)
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
+ elseif p == "RI-c" then
+- op1 = op1 + shl(parse_num(params[1]),4)
++ if #params > 1 then
++ op1 = op1 + shl(parse_num(params[1]), 4)
++ end
+ wputhw(op1)
+- local mode, n, s = parse_label(params[2])
++ local mode, n, s = parse_label(params[#params])
+ waction("REL_"..mode, n, s)
+ elseif p == "RIL-c" then
+- op0 = op0 + shl(parse_num(params[1]),4)
+- wputhhw(op0)
+- local mode, n, s = parse_label(params[2])
++ if #params > 1 then
++ op0 = op0 + shl(parse_num(params[1]), 4)
++ end
++ wputhw(op0)
++ local mode, n, s = parse_label(params[#params])
+ waction("REL_"..mode, n, s)
+ elseif p == "RX-b" then
+- local d, x, b, a = parse_mem_bx(params[2])
+- op1 = op1 + shl(parse_num(params[1]), 4) + x
++ local d, x, b, a = parse_mem_bx(params[#params])
++ if #params > 1 then
++ op1 = op1 + shl(parse_num(params[1]), 4)
++ end
++ op1 = op1 + x
+ op2 = op2 + shl(b, 12) + d
+ wputhw(op1);wputhw(op2);
+ if a then a() end
+@@ -1442,23 +1449,6 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1)
+ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
+ wputhw(op2)
+- elseif p == "w" then
+- local mode, n, s = parse_label(params[1])
+- wputhw(op1)
+- waction("REL_"..mode, n, s)
+- elseif p == "x" then
+- local mode, n, s = parse_label(params[1])
+- wputhw(op0)
+- waction("REL_"..mode, n, s)
+- elseif p == "y" then
+- local d, x, b, a = parse_mem_bx(params[1])
+- op1 = op1 + x
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2);
+- if a then a() end -- a() emits action.
+- elseif p == "z" then
+- op2 = op2 + parse_reg(params[1])
+- wputhw(op2)
+ elseif p == "RS-b" then
+ local m = parse_mask(params[2])
+ local d, b, a = parse_mem_b(params[3])
+
+From 09017733b8be56005617d9639386b9dfb036b52f Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 31 Jan 2017 15:42:48 -0500
+Subject: [PATCH 256/260] Re-arrange instruction encodings so they are in
+ alphabetical order.
+
+---
+ dynasm/dasm_s390x.lua | 251 +++++++++++++++++++++---------------------
+ 1 file changed, 125 insertions(+), 126 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 62aa7bc39..96ab88fe4 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -1270,32 +1270,33 @@ local function parse_template(params, template, nparams, pos)
+
+ -- Process each character.
+ local p = sub(template, 13)
+- if p == "RR" then
+- if #params > 1 then
+- op2 = op2 + shl(parse_reg(params[1]),4)
+- end
+- op2 = op2 + parse_reg(params[#params])
+- wputhw(op2)
+- elseif p == "RRE" then
+- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+- wputhw(op1); wputhw(op2)
++ if p == "I" then
++ local imm_val, a = parse_imm8(params[1])
++ op2 = op2 + imm_val;
++ wputhw(op2);
++ if a then a() end
+ elseif p == "RI-a" then
+ op1 = op1 + shl(parse_reg(params[1]),4)
+ wputhw(op1);
+ parse_imm16(params[2])
+- elseif p == "RX-a" then
+- local d, x, b, a = parse_mem_bx(params[2])
+- op1 = op1 + shl(parse_reg(params[1]), 4) + x
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2);
+- if a then a() end
+- elseif p == "RXY-a" then
+- local d, x, b, a = parse_mem_bxy(params[2])
+- op0 = op0 + shl(parse_reg(params[1]), 4) + x
+- op1 = op1 + shl(b, 12) + band(d, 0xfff)
+- op2 = op2 + band(shr(d, 4), 0xff00)
+- wputhw(op0); wputhw(op1); wputhw(op2)
+- if a then a() end
++ elseif p == "RI-b" then
++ op1 = op1 + shl(parse_reg(params[1]),4)
++ wputhw(op1)
++ local mode, n, s = parse_label(params[2])
++ waction("REL_"..mode, n, s)
++ elseif p == "RI-c" then
++ if #params > 1 then
++ op1 = op1 + shl(parse_num(params[1]), 4)
++ end
++ wputhw(op1)
++ local mode, n, s = parse_label(params[#params])
++ waction("REL_"..mode, n, s)
++ elseif p == "RIE-e" then
++ op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ wputhw1(op0)
++ local mode, n, s = parse_label(params[3])
++ waction("REL_"..mode, n, s)
++ wputhw(op2)
+ elseif p == "RIL-a" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+ wputhw(op0);
+@@ -1305,6 +1306,37 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op0);
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
++ elseif p == "RIL-c" then
++ if #params > 1 then
++ op0 = op0 + shl(parse_num(params[1]), 4)
++ end
++ wputhw(op0)
++ local mode, n, s = parse_label(params[#params])
++ waction("REL_"..mode, n, s)
++ elseif p == "RR" then
++ if #params > 1 then
++ op2 = op2 + shl(parse_reg(params[1]),4)
++ end
++ op2 = op2 + parse_reg(params[#params])
++ wputhw(op2)
++ elseif p == "RRD" then
++ wputhw(op1)
++ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
++ wputhw(op2)
++ elseif p == "RRE" then
++ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ wputhw(op1); wputhw(op2)
++ elseif p == "RRF-b" then
++ wputhw(op1);
++ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + shl(parse_mask(params[4]),8)
++ wputhw(op2)
++ elseif p == "RRF-e" then
++ wputhw(op1)
++ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) + parse_reg(params[3])
++ if params[4] then
++ op2 = op2 + shl(parse_mask2(params[4]),8)
++ end
++ wputhw(op2)
+ elseif p == "RS-a" then
+ if (params[3]) then
+ local d, b, a = parse_mem_b(params[3])
+@@ -1317,6 +1349,18 @@ local function parse_template(params, template, nparams, pos)
+ end
+ wputhw(op1); wputhw(op2)
+ if a then a() end
++ elseif p == "RS-b" then
++ local m = parse_mask(params[2])
++ local d, b, a = parse_mem_b(params[3])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + m
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2)
++ if a then a() end
++ elseif p == "RSI" then
++ op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ wputhw(op1)
++ local mode, n, s = parse_label(params[3])
++ waction("REL_"..mode, n, s)
+ elseif p == "RSY-a" then
+ local d, b, a = parse_mem_by(params[3])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+@@ -1324,110 +1368,50 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op0); wputhw(op1); wputhw(op2)
+ if a then a() end -- a() emits action.
+- elseif p == "SS-a" then
+- local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
+- local d2, b2, d2a = parse_mem_b(params[2])
+- op0 = op0 + l1
+- op1 = op1 + shl(b1, 12) + d1
+- op2 = op2 + shl(b2, 12) + d2
+- wputhw(op0)
+- if l1a then l1a() end
+- wputhw(op1)
+- if d1a then d1a() end
+- wputhw(op2)
+- if d2a then d2a() end
+- elseif p == "SS-b" then
+- local high_l=true;
+- local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
+- high_l=false;
+- local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
+- op0 = op0 + shl(l1,4) + l2
+- op1 = op1 + shl(b1, 12) + d1
+- op2 = op2 + shl(b2, 12) + d2
+- wputhw(op0)
+- if l1a then l1a() end
+- if l2a then l2a() end
+- wputhw(op1)
+- if d1a then d1a() end
+- wputhw(op2)
+- if d2a then d2a() end
+- elseif p == "SIL" then
+- wputhw(op0)
+- local d, b, a = parse_mem_b(params[1])
+- op1 = op1 + shl(b, 12) + d
+- wputhw(op1)
++ elseif p == "RX-a" then
++ local d, x, b, a = parse_mem_bx(params[2])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1); wputhw(op2);
+ if a then a() end
+- parse_imm16(params[2])
+- elseif p == "RRF-e" then
+- wputhw(op1)
+- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) + parse_reg(params[3])
+- if params[4] then
+- op2 = op2 + shl(parse_mask2(params[4]),8)
++ elseif p == "RX-b" then
++ local d, x, b, a = parse_mem_bx(params[#params])
++ if #params > 1 then
++ op1 = op1 + shl(parse_num(params[1]), 4)
+ end
+- wputhw(op2)
++ op1 = op1 + x
++ op2 = op2 + shl(b, 12) + d
++ wputhw(op1);wputhw(op2);
++ if a then a() end
+ elseif p == "RXE" then
+ local d, x, b, a = parse_mem_bx(params[2])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
+ op1 = op1 + shl(b, 12) + d
+- -- m3 is not present, so assumed its not part of the instruction since its not passed as a prameter
+ wputhw(op0);
+ wputhw(op1);
+ if a then a() end
+ wputhw(op2);
+- elseif p == "RRF-b" then
+- wputhw(op1);
+- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + shl(parse_mask(params[4]),8)
++ elseif p == "RXF" then
++ local d, x, b, a = parse_mem_bx(params[3])
++ op0 = op0 + shl(parse_reg(params[2]),4) + x
++ op1 = op1 + shl(b, 12) + d
++ wputhw(op0); wputhw(op1);
++ if a then a() end
++ op2 = op2 + shl(parse_reg(params[1]),12)
+ wputhw(op2)
+- elseif p =="S" then
++ elseif p == "RXY-a" then
++ local d, x, b, a = parse_mem_bxy(params[2])
++ op0 = op0 + shl(parse_reg(params[1]), 4) + x
++ op1 = op1 + shl(b, 12) + band(d, 0xfff)
++ op2 = op2 + band(shr(d, 4), 0xff00)
++ wputhw(op0); wputhw(op1); wputhw(op2)
++ if a then a() end
++ elseif p == "S" then
+ wputhw(op1);
+ local d, b, a = parse_mem_b(params[1])
+ op2 = op2 + shl(b,12) + d;
+ wputhw(op2)
+ if a then a() end
+- elseif p =="I" then
+- local imm_val, a = parse_imm8(params[1])
+- op2 = op2 + imm_val;
+- wputhw(op2);
+- if a then a() end
+- elseif p == "RI-b" then
+- op1 = op1 + shl(parse_reg(params[1]),4)
+- wputhw(op1)
+- local mode, n, s = parse_label(params[2])
+- waction("REL_"..mode, n, s)
+- elseif p == "RI-c" then
+- if #params > 1 then
+- op1 = op1 + shl(parse_num(params[1]), 4)
+- end
+- wputhw(op1)
+- local mode, n, s = parse_label(params[#params])
+- waction("REL_"..mode, n, s)
+- elseif p == "RIL-c" then
+- if #params > 1 then
+- op0 = op0 + shl(parse_num(params[1]), 4)
+- end
+- wputhw(op0)
+- local mode, n, s = parse_label(params[#params])
+- waction("REL_"..mode, n, s)
+- elseif p == "RX-b" then
+- local d, x, b, a = parse_mem_bx(params[#params])
+- if #params > 1 then
+- op1 = op1 + shl(parse_num(params[1]), 4)
+- end
+- op1 = op1 + x
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1);wputhw(op2);
+- if a then a() end
+- elseif p == "RSI" then
+- op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+- wputhw(op1)
+- local mode, n, s = parse_label(params[3])
+- waction("REL_"..mode, n, s)
+- elseif p == "RIE-e" then
+- op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
+- wputhw1(op0)
+- local mode, n, s = parse_label(params[3])
+- waction("REL_"..mode, n, s)
+- wputhw(op2)
+ elseif p == "SI" then
+ local imm_val, a = parse_imm8(params[2])
+ op1 = op1 + imm_val
+@@ -1437,25 +1421,13 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + shl(b,12) + d
+ wputhw(op2)
+ if a then a() end
+- elseif p == "RXF" then
+- local d, x, b, a = parse_mem_bx(params[3])
+- op0 = op0 + shl(parse_reg(params[2]),4) + x
++ elseif p == "SIL" then
++ wputhw(op0)
++ local d, b, a = parse_mem_b(params[1])
+ op1 = op1 + shl(b, 12) + d
+- wputhw(op0); wputhw(op1);
+- if a then a() end
+- op2 = op2 + shl(parse_reg(params[1]),12)
+- wputhw(op2)
+- elseif p == "RRD" then
+ wputhw(op1)
+- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
+- wputhw(op2)
+- elseif p == "RS-b" then
+- local m = parse_mask(params[2])
+- local d, b, a = parse_mem_b(params[3])
+- op1 = op1 + shl(parse_reg(params[1]), 4) + m
+- op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2)
+ if a then a() end
++ parse_imm16(params[2])
+ elseif p == "SIY" then
+ local imm8,iact = parse_imm8(params[2])
+ op0 = op0 + shl(imm8, 8)
+@@ -1466,6 +1438,33 @@ local function parse_template(params, template, nparams, pos)
+ op2 = op2 + band(shr(d, 4), 0xff00)
+ wputhw(op1); wputhw(op2)
+ if a then a() end
++ elseif p == "SS-a" then
++ local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
++ local d2, b2, d2a = parse_mem_b(params[2])
++ op0 = op0 + l1
++ op1 = op1 + shl(b1, 12) + d1
++ op2 = op2 + shl(b2, 12) + d2
++ wputhw(op0)
++ if l1a then l1a() end
++ wputhw(op1)
++ if d1a then d1a() end
++ wputhw(op2)
++ if d2a then d2a() end
++ elseif p == "SS-b" then
++ local high_l=true;
++ local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
++ high_l=false;
++ local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
++ op0 = op0 + shl(l1,4) + l2
++ op1 = op1 + shl(b1, 12) + d1
++ op2 = op2 + shl(b2, 12) + d2
++ wputhw(op0)
++ if l1a then l1a() end
++ if l2a then l2a() end
++ wputhw(op1)
++ if d1a then d1a() end
++ wputhw(op2)
++ if d2a then d2a() end
+ else
+ werror("unrecognized encoding")
+ end
+
+From 72ba386d14e0c0f63784f9cf52c7bff24bbad5b4 Mon Sep 17 00:00:00 2001
+From: Michael Munday <munday@ca.ibm.com>
+Date: Tue, 31 Jan 2017 16:13:18 -0500
+Subject: [PATCH 257/260] Various minor style changes.
+
+---
+ dynasm/dasm_s390x.lua | 91 +++++++++++++++++++++----------------------
+ 1 file changed, 45 insertions(+), 46 deletions(-)
+
+diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
+index 96ab88fe4..222eb74f8 100644
+--- a/dynasm/dasm_s390x.lua
++++ b/dynasm/dasm_s390x.lua
+@@ -49,7 +49,7 @@ local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+ -- Action name -> action number.
+ local map_action = {}
+ local max_action = 0
+-for n,name in ipairs(action_names) do
++for n, name in ipairs(action_names) do
+ map_action[name] = n-1
+ max_action = n
+ end
+@@ -68,7 +68,7 @@ local secpos = 1
+ -- Dump action names and numbers.
+ local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+- for n,name in ipairs(action_names) do
++ for n, name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+@@ -89,7 +89,7 @@ local function writeactions(out, name)
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned short ", name, "[", nn, "] = {")
+ local esc = false -- also need to escape for action arguments
+- for i = 1,nn do
++ for i = 1, nn do
+ assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8)))
+ if i ~= nn then assert(out:write(",")) end
+ local name = action_names[actlist[i]+1]
+@@ -160,7 +160,7 @@ local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+- for i=20,next_global-1 do
++ for i=20, next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+@@ -171,7 +171,7 @@ local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+- for i=20,next_global-1 do
++ for i=20, next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+@@ -182,7 +182,7 @@ local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+- for i=20,next_global-1 do
++ for i=20, next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+@@ -206,7 +206,7 @@ end})
+ -- Dump extern labels.
+ local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+- for i=0,next_extern-1 do
++ for i=0, next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+@@ -215,7 +215,7 @@ end
+ -- Write extern label names.
+ local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+- for i=0,next_extern-1 do
++ for i=0, next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+@@ -443,7 +443,7 @@ local function parse_mem_lb(arg)
+ return dval, lval, parse_reg(b), dact, lact
+ end
+
+-local function parse_mem_l2b(arg,high_l)
++local function parse_mem_l2b(arg, high_l)
+ local reg = "r1?[0-9]"
+ local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
+ if not d then
+@@ -474,7 +474,7 @@ local function parse_mem_l2b(arg,high_l)
+ if high_l then
+ lact = function() waction("LEN4HR", nil, l) end
+ else
+- lact = function() waction("LEN4LR",nil,l) end
++ lact = function() waction("LEN4LR", nil, l) end
+ end
+ end
+ return dval, lval, parse_reg(b), dact, lact
+@@ -519,7 +519,7 @@ local function parse_imm8(imm)
+ end
+ return imm_val, nil
+ end
+- return 0, function() waction("IMM8",nil,imm) end
++ return 0, function() waction("IMM8", nil, imm) end
+ end
+
+ local function parse_mask(mask)
+@@ -1247,7 +1247,7 @@ map_op = {
+ xr_2 = "000000001700RR",
+ xy_2 = "e30000000057RXY-a",
+ }
+-for cond,c in pairs(map_cond) do
++for cond, c in pairs(map_cond) do
+ -- Extended mnemonics for branches.
+ -- TODO: replace 'B' with correct encoding.
+ -- brc
+@@ -1272,15 +1272,15 @@ local function parse_template(params, template, nparams, pos)
+ local p = sub(template, 13)
+ if p == "I" then
+ local imm_val, a = parse_imm8(params[1])
+- op2 = op2 + imm_val;
+- wputhw(op2);
++ op2 = op2 + imm_val
++ wputhw(op2)
+ if a then a() end
+ elseif p == "RI-a" then
+- op1 = op1 + shl(parse_reg(params[1]),4)
+- wputhw(op1);
++ op1 = op1 + shl(parse_reg(params[1]), 4)
++ wputhw(op1)
+ parse_imm16(params[2])
+ elseif p == "RI-b" then
+- op1 = op1 + shl(parse_reg(params[1]),4)
++ op1 = op1 + shl(parse_reg(params[1]), 4)
+ wputhw(op1)
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
+@@ -1292,7 +1292,7 @@ local function parse_template(params, template, nparams, pos)
+ local mode, n, s = parse_label(params[#params])
+ waction("REL_"..mode, n, s)
+ elseif p == "RIE-e" then
+- op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ wputhw1(op0)
+ local mode, n, s = parse_label(params[3])
+ waction("REL_"..mode, n, s)
+@@ -1303,7 +1303,7 @@ local function parse_template(params, template, nparams, pos)
+ parse_imm32(params[2])
+ elseif p == "RIL-b" then
+ op0 = op0 + shl(parse_reg(params[1]), 4)
+- wputhw(op0);
++ wputhw(op0)
+ local mode, n, s = parse_label(params[2])
+ waction("REL_"..mode, n, s)
+ elseif p == "RIL-c" then
+@@ -1315,26 +1315,26 @@ local function parse_template(params, template, nparams, pos)
+ waction("REL_"..mode, n, s)
+ elseif p == "RR" then
+ if #params > 1 then
+- op2 = op2 + shl(parse_reg(params[1]),4)
++ op2 = op2 + shl(parse_reg(params[1]), 4)
+ end
+ op2 = op2 + parse_reg(params[#params])
+ wputhw(op2)
+ elseif p == "RRD" then
+ wputhw(op1)
+- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) + parse_reg(params[3])
++ op2 = op2 + shl(parse_reg(params[1]), 12) + shl(parse_reg(params[2]), 4) + parse_reg(params[3])
+ wputhw(op2)
+ elseif p == "RRE" then
+- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ op2 = op2 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ wputhw(op1); wputhw(op2)
+ elseif p == "RRF-b" then
+- wputhw(op1);
+- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) + parse_reg(params[3]) + shl(parse_mask(params[4]),8)
++ wputhw(op1)
++ op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_reg(params[2]), 12) + parse_reg(params[3]) + shl(parse_mask(params[4]), 8)
+ wputhw(op2)
+ elseif p == "RRF-e" then
+ wputhw(op1)
+- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) + parse_reg(params[3])
++ op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_mask(params[2]), 12) + parse_reg(params[3])
+ if params[4] then
+- op2 = op2 + shl(parse_mask2(params[4]),8)
++ op2 = op2 + shl(parse_mask2(params[4]), 8)
+ end
+ wputhw(op2)
+ elseif p == "RS-a" then
+@@ -1357,7 +1357,7 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "RSI" then
+- op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
++ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
+ wputhw(op1)
+ local mode, n, s = parse_label(params[3])
+ waction("REL_"..mode, n, s)
+@@ -1372,7 +1372,7 @@ local function parse_template(params, template, nparams, pos)
+ local d, x, b, a = parse_mem_bx(params[2])
+ op1 = op1 + shl(parse_reg(params[1]), 4) + x
+ op2 = op2 + shl(b, 12) + d
+- wputhw(op1); wputhw(op2);
++ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "RX-b" then
+ local d, x, b, a = parse_mem_bx(params[#params])
+@@ -1381,23 +1381,22 @@ local function parse_template(params, template, nparams, pos)
+ end
+ op1 = op1 + x
+ op2 = op2 + shl(b, 12) + d
+- wputhw(op1);wputhw(op2);
++ wputhw(op1); wputhw(op2)
+ if a then a() end
+ elseif p == "RXE" then
+ local d, x, b, a = parse_mem_bx(params[2])
+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
+ op1 = op1 + shl(b, 12) + d
+- wputhw(op0);
+- wputhw(op1);
++ wputhw(op0); wputhw(op1)
+ if a then a() end
+ wputhw(op2);
+ elseif p == "RXF" then
+ local d, x, b, a = parse_mem_bx(params[3])
+- op0 = op0 + shl(parse_reg(params[2]),4) + x
++ op0 = op0 + shl(parse_reg(params[2]), 4) + x
+ op1 = op1 + shl(b, 12) + d
+- wputhw(op0); wputhw(op1);
++ wputhw(op0); wputhw(op1)
+ if a then a() end
+- op2 = op2 + shl(parse_reg(params[1]),12)
++ op2 = op2 + shl(parse_reg(params[1]), 12)
+ wputhw(op2)
+ elseif p == "RXY-a" then
+ local d, x, b, a = parse_mem_bxy(params[2])
+@@ -1409,7 +1408,7 @@ local function parse_template(params, template, nparams, pos)
+ elseif p == "S" then
+ wputhw(op1);
+ local d, b, a = parse_mem_b(params[1])
+- op2 = op2 + shl(b,12) + d;
++ op2 = op2 + shl(b, 12) + d
+ wputhw(op2)
+ if a then a() end
+ elseif p == "SI" then
+@@ -1418,7 +1417,7 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op1)
+ if a then a() end
+ local d, b, a = parse_mem_b(params[1])
+- op2 = op2 + shl(b,12) + d
++ op2 = op2 + shl(b, 12) + d
+ wputhw(op2)
+ if a then a() end
+ elseif p == "SIL" then
+@@ -1429,9 +1428,9 @@ local function parse_template(params, template, nparams, pos)
+ if a then a() end
+ parse_imm16(params[2])
+ elseif p == "SIY" then
+- local imm8,iact = parse_imm8(params[2])
++ local imm8, iact = parse_imm8(params[2])
+ op0 = op0 + shl(imm8, 8)
+- wputhw(op0);
++ wputhw(op0)
+ if iact then iact() end
+ local d, b, a = parse_mem_by(params[1])
+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
+@@ -1451,11 +1450,11 @@ local function parse_template(params, template, nparams, pos)
+ wputhw(op2)
+ if d2a then d2a() end
+ elseif p == "SS-b" then
+- local high_l=true;
+- local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
+- high_l=false;
+- local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
+- op0 = op0 + shl(l1,4) + l2
++ local high_l = true
++ local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1], high_l)
++ high_l = false
++ local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2], high_l)
++ op0 = op0 + shl(l1, 4) + l2
+ op1 = op1 + shl(b1, 12) + d1
+ op2 = op2 + shl(b2, 12) + d2
+ wputhw(op0)
+@@ -1529,7 +1528,7 @@ end
+ -- Pseudo-opcodes for data storage.
+ map_op[".long_*"] = function(params)
+ if not params then return "imm..." end
+- for _,p in ipairs(params) do
++ for _, p in ipairs(params) do
+ local n = tonumber(p)
+ if not n then werror("bad immediate `"..p.."'") end
+ if n < 0 then n = n + 2^32 end
+@@ -1545,7 +1544,7 @@ map_op[".align_1"] = function(params)
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+- for i=1,8 do
++ for i=1, 8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1.
+@@ -1588,7 +1587,7 @@ local function dumptypes(out, lvl)
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+- for _,name in ipairs(t) do
++ for _, name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+
+From d8f783f466ef985e017a2497ea602804b1212533 Mon Sep 17 00:00:00 2001
+From: preetikhorjuvenkar <preetikhorjuvenkar29@gmail.com>
+Date: Wed, 14 Feb 2018 10:44:31 +0000
+Subject: [PATCH 259/260] Deleting an empty file. Also minor changes to the
+ test.lua file
+
+---
+ src/jit/dis_s390x.lua | 1 -
+ test/test.lua | 12 +++++++-----
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+ delete mode 100644 src/jit/dis_s390x.lua
+
+diff --git a/src/jit/dis_s390x.lua b/src/jit/dis_s390x.lua
+deleted file mode 100644
+index 3c63033bf..000000000
+--- a/src/jit/dis_s390x.lua
++++ /dev/null
+@@ -1 +0,0 @@
+--- Not yet implemented.
+diff --git a/test/test.lua b/test/test.lua
+index 5637250af..2a5dd9d85 100644
+--- a/test/test.lua
++++ b/test/test.lua
+@@ -210,19 +210,21 @@ print("Length of string1 is ",string.len(string1))
+ print("Repeated String",string.rep(string1,3))
+ print("********************************************")
+
+-print("****************OS Functions *******")
++print("****************Table Functions *******")
+ fruits = {"banana","orange","apple"}
+ print("Table contents are ")
+ for key,value in ipairs(fruits) do print(value) end
+ print("Concatenated string ",table.concat(fruits))
+ print("Concatenated string ",table.concat(fruits,", "))
+ print("Concatenated string ",table.concat(fruits,", ", 2,3))
+-print("Inserting new fruit Mango")
++print("Inserting new fruit mango")
+ table.insert(fruits,"mango")
+ for key,value in ipairs(fruits) do print(value) end
+ print("Concatenated string ",table.concat(fruits,", "))
+ print("The maximum elements in table is",table.maxn(fruits))
+-print("The maximum elements in table is",table.remove(fruits))
+-fruits = {"banana","orange","apple","grapes"}
+-print("The maximum elements in table is",table.sort(fruits))
++print("Table contents after sorting are ",table.sort(fruits))
++for key,value in ipairs(fruits) do print(value) end
++print("Removing the last element from the table:" ,table.remove(fruits))
++print("Table contents are ")
++for key,value in ipairs(fruits) do print(value) end
+ print("********************************************")