diff --git a/source/build-aux/texinfo.tex b/source/build-aux/texinfo.tex
index c9baa11dfed88b67bdc3685c12dbc3ce2ea00e23..e70ea2858471eec90713418aa5551a2d09362163 100644
--- a/source/build-aux/texinfo.tex
+++ b/source/build-aux/texinfo.tex
@@ -3,7 +3,7 @@
 % Load plain if necessary, i.e., if running under initex.
 \expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi
 %
-\def\texinfoversion{2015-08-27.20}
+\def\texinfoversion{2015-09-04.11}
 %
 % Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995,
 % 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
@@ -4667,6 +4667,28 @@ end
 \let\indexlbrace\relax
 \let\indexrbrace\relax
 
+{\catcode`\@=0
+\catcode`\\=13
+  @gdef@backslashdisappear{@def\{}}
+}
+
+{
+\catcode`\<=13
+\catcode`\-=13
+  \gdef\indexnonalnumdisappear{%
+    \backslashdisappear
+    \def-{}%
+    \def<{}%
+  }
+
+  \gdef\indexnonalnumreappear{%
+    \useindexbackslash
+    \let-\normaldash
+    \let<\normalless
+  }
+}
+
+
 % \indexnofonts is used when outputting the strings to sort the index
 % by, and when constructing control sequence names.  It eliminates all
 % control sequences and just writes whatever the best ASCII sort string
@@ -4828,13 +4850,23 @@ end
 % Definition for writing index entry text.
 \def\sortas#1{\ignorespaces}%
 
-% Definition for writing index entry sort key.  @sortas{} must occur at
+% Definition for writing index entry sort key.  Should occur at the at
 % the beginning of the index entry, like
-% @cindex @sortas{september} \september
-\def\indexwritesortas#1#2\endofindexentry{#1}
+%     @cindex @sortas{september} \september
+% The \ignorespaces takes care of following space, but there's no way
+% to remove space before it.
+{
+\catcode`\-=13
+\gdef\indexwritesortas{%
+  \begingroup
+  \indexnonalnumreappear
+  \indexwritesortasxxx}
+\gdef\indexwritesortasxxx#1{%
+  \xdef\indexsortkey{#1}\endgroup}
+}
 
 
-% Write the entry in \toks0 to the index file:
+% Write the entry in \toks0 to the index file.
 %
 \def\dosubindwrite{%
   % Put the index entry in the margin if desired.
@@ -4849,10 +4881,15 @@ end
   % Get the string to sort by, by processing the index entry with all
   % font commands turned off.
   {\indexnofonts
-   \def\endofindexentry{}%
+   \xdef\indexsortkey{}%
    \let\sortas=\indexwritesortas
-   \edef\temp{\the\toks0 \noexpand\endofindexentry}%
-   \xdef\indexsortkey{\temp}%
+   \indexnonalnumdisappear
+   \edef\temp{\the\toks0}%
+   \setbox\dummybox = \hbox{\temp}% Make sure to execute any \sortas
+   \ifx\indexsortkey\empty
+     \xdef\indexsortkey{\temp}%
+     \ifx\indexsortkey\empty\xdef\indexsortkey{ }\fi
+   \fi
   }%
   %
   % Set up the complete index entry, with both the sort key and
@@ -4866,6 +4903,7 @@ end
   }%
   \temp
 }
+\newbox\dummybox % used above
 
 % Take care of unwanted page breaks/skips around a whatsit:
 %
@@ -5026,11 +5064,10 @@ end
 % These macros are used by the sorted index file itself.
 % Change them to control the appearance of the index.
 
-\let\normalhyphen=-
 {\catcode`\/=13 \catcode`\-=13 \catcode`\^=13 \catcode`\~=13 \catcode`\_=13
 \catcode`\|=13 \catcode`\<=13 \catcode`\>=13 \catcode`\+=13 \catcode`\"=13
 \catcode`\$=3
-\gdef\initialfonts{%
+\gdef\initialglyphs{%
   % Some changes for non-alphabetic characters.  Using the glyphs from the
   % math fonts looks more consistent than the typewriter font used elsewhere
   % for these characters.
@@ -5040,12 +5077,11 @@ end
   % Can't get bold backslash so don't use bold forward slash
   \catcode`\/=13
   \def/{{\secrmnotbold \normalslash}}%
-  \catcode`\-=13
-  \def-{{\normalhyphen\normalhyphen}}%
+  \def-{{\normaldash\normaldash}}% en dash `--'
   \let^=\normalcaret
   \let~=\normaltilde
   \def\_{%
-    \leavevmode \kern.07em \vbox{\hrule width.33em height.06ex}\kern .07em }
+    \leavevmode \kern.07em \vbox{\hrule width.33em height.06ex}\kern .07em }%
   \def|{$\vert$}%
   \def<{$\less$}%
   \def>{$\gtr$}%
@@ -5054,7 +5090,7 @@ end
 
 \def\initial{%
   \bgroup
-  \initialfonts
+  \initialglyphs
   \initialx
 }
 
@@ -5261,7 +5297,7 @@ end
 }
 
 % The double-column output routine for all double-column pages except
-% the last.
+% the last, which is done by \balancecolumns.
 %
 \def\doublecolumnout{%
   \splittopskip=\topskip \splitmaxdepth=\maxdepth
@@ -5343,7 +5379,8 @@ end
   \pagegoal = \vsize
 }
 %
-% Only called for the last of the double column material.
+% Only called for the last of the double column material.  \doublecolumnout 
+% does the others.
 \def\balancecolumns{%
   \setbox0 = \vbox{\unvbox255}% like \box255 but more efficient, see p.120.
   \dimen@ = \ht0
@@ -5351,26 +5388,33 @@ end
   \advance\dimen@ by-\baselineskip
   \ifdim\dimen@<14\baselineskip
     % Don't split a short final column in two.
-    \global\setbox1 = \copy0
-    \global\setbox3 = \vbox{}%
+    \setbox2=\vbox{}%
   \else
-  \divide\dimen@ by 2 % target to split to
-  %debug\message{final 2-column material height=\the\ht0, target=\the\dimen@.}%
-  \splittopskip = \topskip
-  % Loop until we get a decent breakpoint.
-  {%
-    \vbadness = 10000
-    \loop
-      \global\setbox3 = \copy0
-      \global\setbox1 = \vsplit3 to \dimen@
-    \ifdim\ht3>\dimen@
-      \global\advance\dimen@ by 1pt
-    \repeat
-  }%
+    \divide\dimen@ by 2 % target to split to
+    \dimen@ii = \dimen@
+    \splittopskip = \topskip
+    % Loop until the second column is no higher than the first
+    {%
+      \vbadness = 10000
+      \loop
+        \global\setbox3 = \copy0
+        \global\setbox1 = \vsplit3 to \dimen@
+      \ifdim\ht3>\dimen@
+        \global\advance\dimen@ by 1pt
+      \repeat
+    }%
+    \multiply\dimen@ii by 4
+    \divide\dimen@ii by 5
+    \ifdim\ht3<\dimen@ii
+      % Column heights are too different, so don't make their bottoms
+      % flush with each other
+      \setbox0=\vbox to\dimen@{\unvbox1\vfill}%
+      \setbox2=\vbox to\dimen@{\unvbox3\vfill}%
+    \else
+      \setbox0=\vbox to\dimen@{\unvbox1}%
+      \setbox2=\vbox to\dimen@{\unvbox3}%
+    \fi
   \fi
-  %debug\message{split to \the\dimen@, column heights: \the\ht1, \the\ht3.}%
-  \setbox0=\vbox to\dimen@{\unvbox1\vfill}%
-  \setbox2=\vbox to\dimen@{\unvbox3\vfill}%
   %
   \pagesofar
 }
@@ -10321,6 +10365,8 @@ directory should work if nowhere else does.}
 \catcode`\>=\active \def\activegtr{{\tt \gtr}}\let> = \activegtr
 \catcode`\+=\active \def+{{\tt \char 43}}
 \catcode`\$=\active \def${\ifusingit{{\sl\$}}\normaldollar}%$ font-lock fix
+\catcode`\-=\active \let-=\normaldash
+
 
 % used for headline/footline in the output routine, in case the page
 % breaks in the middle of an @tex block.
@@ -10376,8 +10422,7 @@ directory should work if nowhere else does.}
 @gdef@otherbackslash{@let\=@realbackslash}
 
 % Same as @turnoffactive except outputs \ as {\tt\char`\\} instead of
-% the literal character `\'.  Also revert - to its normal character, in
-% case the active - from code has slipped in.
+% the literal character `\'.
 %
 {@catcode`- = @active
  @gdef@normalturnoffactive{%
diff --git a/source/libs/README b/source/libs/README
index 5d18f22e8a5b43d012a2021c8cebf142a47f01ce..d552850500bd42d5990e9a363e0792e5d27ecdee 100644
--- a/source/libs/README
+++ b/source/libs/README
@@ -1,4 +1,4 @@
-$Id: README 38252 2015-08-31 07:58:01Z peter $
+$Id: README 38316 2015-09-07 10:07:39Z peter $
 Public domain.  Originally created by Karl Berry, 2005.
 
 Libraries we compile for TeX Live.
@@ -24,7 +24,7 @@ gmp 6.0.0 - checked 25mar14
 graphite2 1.3.1 - checked 31aug15
   http://sourceforge.net/projects/silgraphite/files/graphite2/
 
-harfbuzz 1.0.2 - checked 20aug15
+harfbuzz 1.0.3 - checked 2sep15
   http://www.freedesktop.org/software/harfbuzz/release/
 
 icu 55.1 (release) - checked 12apr15
@@ -40,8 +40,8 @@ libpng 1.6.18 - checked 1aug15
 lua 5.2.3 - checked 11mar15
   http://www.lua.org/ftp/
 
-luajit 2.0.4 - checked 12jun15
-  http://luajit.org/download/LuaJIT-2.0.4.tar.gz
+luajit 2.1.0-beta1 - checked 7sep15
+  http://luajit.org/download/LuaJIT-2.1.0-beta1.tar.gz
 
 mpfr 3.1.3 - checked 20jun15
   http://ftp.gnu.org/gnu/mpfr/
diff --git a/source/libs/luajit/ChangeLog b/source/libs/luajit/ChangeLog
index 782d9c0b104e1d7a0e200534f4a0fc38b8ad1e89..ddf72b85fd14f94afdbd30e3d83605901e048b7a 100644
--- a/source/libs/luajit/ChangeLog
+++ b/source/libs/luajit/ChangeLog
@@ -1,3 +1,9 @@
+2015-09-07  Peter Breitenlohner  <peb@mppmu.mpg.de>
+
+	Import LuaJIT-2.1.0-beta1.
+	* version.ac: Adjusted.
+	* Makefile.am, configure.ac, m4/lj-system.m4: Adapted.
+
 2015-07-06  Peter Breitenlohner  <peb@mppmu.mpg.de>
 
 	* Makefile.am: Better dependencies for 'make check'.
diff --git a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-06-ppc-darwin b/source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-06-ppc-darwin
deleted file mode 100644
index 35ed495231a0470f7c53f6939541a2e55ab2e2c7..0000000000000000000000000000000000000000
--- a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-06-ppc-darwin
+++ /dev/null
@@ -1,26 +0,0 @@
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/host/buildvm.c LuaJIT-2.0.4/src/host/buildvm.c
---- LuaJIT-2.0.4.orig/src/host/buildvm.c	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/host/buildvm.c	2015-06-12 11:06:32.000000000 +0200
-@@ -113,7 +113,7 @@
-       name[0] = '@';
-     else
-       *p = '\0';
--#elif (LJ_TARGET_PPC  || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
-+#elif ((LJ_TARGET_PPC && !LJ_TARGET_OSX)  || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
-     /* Keep @plt. */
- #else
-     *p = '\0';
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lj_arch.h LuaJIT-2.0.4/src/lj_arch.h
---- LuaJIT-2.0.4.orig/src/lj_arch.h	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lj_arch.h	2015-06-12 11:06:32.000000000 +0200
-@@ -301,7 +301,7 @@
- #if __GNUC__ < 4
- #error "Need at least GCC 4.0 or newer"
- #endif
--#elif LJ_TARGET_ARM
-+#elif LJ_TARGET_ARM || LJ_TARGET_PPC
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
- #error "Need at least GCC 4.2 or newer"
- #endif
-Only in LuaJIT-2.0.4/src: lj_arch.h.orig
-Only in LuaJIT-2.0.4/src: Makefile.orig
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_bit.c b/source/libs/luajit/LuaJIT-2.0.4/src/lib_bit.c
deleted file mode 100644
index 583e04b0c0171b257d3ecbecec75940ab365a6c4..0000000000000000000000000000000000000000
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_bit.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-** Bit manipulation library.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
-*/
-
-#define lib_bit_c
-#define LUA_LIB
-
-#include "lua.h"
-#include "lauxlib.h"
-#include "lualib.h"
-
-#include "lj_obj.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#include "lj_lib.h"
-
-/* ------------------------------------------------------------------------ */
-
-#define LJLIB_MODULE_bit
-
-LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_unary IR_TOBIT)
-{
-  lj_lib_checknumber(L, 1);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
-LJLIB_ASM_(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
-
-LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
-{
-  lj_lib_checknumber(L, 1);
-  lj_lib_checkbit(L, 2);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
-LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
-LJLIB_ASM_(bit_rol)		LJLIB_REC(bit_shift IR_BROL)
-LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
-
-LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
-{
-  int i = 0;
-  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
-  return FFH_RETRY;
-}
-LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
-LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
-
-/* ------------------------------------------------------------------------ */
-
-LJLIB_CF(bit_tohex)
-{
-  uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
-  int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
-  const char *hexdigits = "0123456789abcdef";
-  char buf[8];
-  if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
-  if (n > 8) n = 8;
-  for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
-  lua_pushlstring(L, buf, (size_t)n);
-  return 1;
-}
-
-/* ------------------------------------------------------------------------ */
-
-#include "lj_libdef.h"
-
-LUALIB_API int luaopen_bit(lua_State *L)
-{
-  LJ_LIB_REG(L, LUA_BITLIBNAME, bit);
-  return 1;
-}
-
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lex.c b/source/libs/luajit/LuaJIT-2.0.4/src/lj_lex.c
deleted file mode 100644
index e1dc3cdfde2654720ea1e957a40c2570f6a9c5c0..0000000000000000000000000000000000000000
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lex.c
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
-** Lexical analyzer.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
-**
-** Major portions taken verbatim or adapted from the Lua interpreter.
-** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
-*/
-
-#define lj_lex_c
-#define LUA_CORE
-
-#include "lj_obj.h"
-#include "lj_gc.h"
-#include "lj_err.h"
-#include "lj_str.h"
-#if LJ_HASFFI
-#include "lj_tab.h"
-#include "lj_ctype.h"
-#include "lj_cdata.h"
-#include "lualib.h"
-#endif
-#include "lj_state.h"
-#include "lj_lex.h"
-#include "lj_parse.h"
-#include "lj_char.h"
-#include "lj_strscan.h"
-
-/* Lua lexer token names. */
-static const char *const tokennames[] = {
-#define TKSTR1(name)		#name,
-#define TKSTR2(name, sym)	#sym,
-TKDEF(TKSTR1, TKSTR2)
-#undef TKSTR1
-#undef TKSTR2
-  NULL
-};
-
-/* -- Buffer handling ----------------------------------------------------- */
-
-#define char2int(c)		((int)(uint8_t)(c))
-#define next(ls) \
-  (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
-#define save_and_next(ls)	(save(ls, ls->current), next(ls))
-#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
-#define END_OF_STREAM		(-1)
-
-static int fillbuf(LexState *ls)
-{
-  size_t sz;
-  const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
-  if (buf == NULL || sz == 0) return END_OF_STREAM;
-  ls->n = (MSize)sz - 1;
-  ls->p = buf;
-  return char2int(*(ls->p++));
-}
-
-static LJ_NOINLINE void save_grow(LexState *ls, int c)
-{
-  MSize newsize;
-  if (ls->sb.sz >= LJ_MAX_STR/2)
-    lj_lex_error(ls, 0, LJ_ERR_XELEM);
-  newsize = ls->sb.sz * 2;
-  lj_str_resizebuf(ls->L, &ls->sb, newsize);
-  ls->sb.buf[ls->sb.n++] = (char)c;
-}
-
-static LJ_AINLINE void save(LexState *ls, int c)
-{
-  if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz))
-    save_grow(ls, c);
-  else
-    ls->sb.buf[ls->sb.n++] = (char)c;
-}
-
-static void inclinenumber(LexState *ls)
-{
-  int old = ls->current;
-  lua_assert(currIsNewline(ls));
-  next(ls);  /* skip `\n' or `\r' */
-  if (currIsNewline(ls) && ls->current != old)
-    next(ls);  /* skip `\n\r' or `\r\n' */
-  if (++ls->linenumber >= LJ_MAX_LINE)
-    lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
-}
-
-/* -- Scanner for terminals ----------------------------------------------- */
-
-/* Parse a number literal. */
-static void lex_number(LexState *ls, TValue *tv)
-{
-  StrScanFmt fmt;
-  int c, xp = 'e';
-  lua_assert(lj_char_isdigit(ls->current));
-  if ((c = ls->current) == '0') {
-    save_and_next(ls);
-    if ((ls->current | 0x20) == 'x') xp = 'p';
-  }
-  while (lj_char_isident(ls->current) || ls->current == '.' ||
-	 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) {
-    c = ls->current;
-    save_and_next(ls);
-  }
-  save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv,
-	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
-	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
-  if (LJ_DUALNUM && fmt == STRSCAN_INT) {
-    setitype(tv, LJ_TISNUM);
-  } else if (fmt == STRSCAN_NUM) {
-    /* Already in correct format. */
-#if LJ_HASFFI
-  } else if (fmt != STRSCAN_ERROR) {
-    lua_State *L = ls->L;
-    GCcdata *cd;
-    lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG);
-    if (!ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
-    if (fmt == STRSCAN_IMAG) {
-      cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
-      ((double *)cdataptr(cd))[0] = 0;
-      ((double *)cdataptr(cd))[1] = numV(tv);
-    } else {
-      cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8);
-      *(uint64_t *)cdataptr(cd) = tv->u64;
-    }
-    lj_parse_keepcdata(ls, tv, cd);
-#endif
-  } else {
-    lua_assert(fmt == STRSCAN_ERROR);
-    lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
-  }
-}
-
-static int skip_sep(LexState *ls)
-{
-  int count = 0;
-  int s = ls->current;
-  lua_assert(s == '[' || s == ']');
-  save_and_next(ls);
-  while (ls->current == '=') {
-    save_and_next(ls);
-    count++;
-  }
-  return (ls->current == s) ? count : (-count) - 1;
-}
-
-static void read_long_string(LexState *ls, TValue *tv, int sep)
-{
-  save_and_next(ls);  /* skip 2nd `[' */
-  if (currIsNewline(ls))  /* string starts with a newline? */
-    inclinenumber(ls);  /* skip it */
-  for (;;) {
-    switch (ls->current) {
-    case END_OF_STREAM:
-      lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
-      break;
-    case ']':
-      if (skip_sep(ls) == sep) {
-	save_and_next(ls);  /* skip 2nd `]' */
-	goto endloop;
-      }
-      break;
-    case '\n':
-    case '\r':
-      save(ls, '\n');
-      inclinenumber(ls);
-      if (!tv) lj_str_resetbuf(&ls->sb);  /* avoid wasting space */
-      break;
-    default:
-      if (tv) save_and_next(ls);
-      else next(ls);
-      break;
-    }
-  } endloop:
-  if (tv) {
-    GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
-				      ls->sb.n - 2*(2 + (MSize)sep));
-    setstrV(ls->L, tv, str);
-  }
-}
-
-static void read_string(LexState *ls, int delim, TValue *tv)
-{
-  save_and_next(ls);
-  while (ls->current != delim) {
-    switch (ls->current) {
-    case END_OF_STREAM:
-      lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
-      continue;
-    case '\n':
-    case '\r':
-      lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
-      continue;
-    case '\\': {
-      int c = next(ls);  /* Skip the '\\'. */
-      switch (c) {
-      case 'a': c = '\a'; break;
-      case 'b': c = '\b'; break;
-      case 'f': c = '\f'; break;
-      case 'n': c = '\n'; break;
-      case 'r': c = '\r'; break;
-      case 't': c = '\t'; break;
-      case 'v': c = '\v'; break;
-      case 'x':  /* Hexadecimal escape '\xXX'. */
-	c = (next(ls) & 15u) << 4;
-	if (!lj_char_isdigit(ls->current)) {
-	  if (!lj_char_isxdigit(ls->current)) goto err_xesc;
-	  c += 9 << 4;
-	}
-	c += (next(ls) & 15u);
-	if (!lj_char_isdigit(ls->current)) {
-	  if (!lj_char_isxdigit(ls->current)) goto err_xesc;
-	  c += 9;
-	}
-	break;
-      case 'z':  /* Skip whitespace. */
-	next(ls);
-	while (lj_char_isspace(ls->current))
-	  if (currIsNewline(ls)) inclinenumber(ls); else next(ls);
-	continue;
-      case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
-      case '\\': case '\"': case '\'': break;
-      case END_OF_STREAM: continue;
-      default:
-	if (!lj_char_isdigit(c))
-	  goto err_xesc;
-	c -= '0';  /* Decimal escape '\ddd'. */
-	if (lj_char_isdigit(next(ls))) {
-	  c = c*10 + (ls->current - '0');
-	  if (lj_char_isdigit(next(ls))) {
-	    c = c*10 + (ls->current - '0');
-	    if (c > 255) {
-	    err_xesc:
-	      lj_lex_error(ls, TK_string, LJ_ERR_XESC);
-	    }
-	    next(ls);
-	  }
-	}
-	save(ls, c);
-	continue;
-      }
-      save(ls, c);
-      next(ls);
-      continue;
-      }
-    default:
-      save_and_next(ls);
-      break;
-    }
-  }
-  save_and_next(ls);  /* skip delimiter */
-  setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
-}
-
-/* -- Main lexical scanner ------------------------------------------------ */
-
-static int llex(LexState *ls, TValue *tv)
-{
-  lj_str_resetbuf(&ls->sb);
-  for (;;) {
-    if (lj_char_isident(ls->current)) {
-      GCstr *s;
-      if (lj_char_isdigit(ls->current)) {  /* Numeric literal. */
-	lex_number(ls, tv);
-	return TK_number;
-      }
-      /* Identifier or reserved word. */
-      do {
-	save_and_next(ls);
-      } while (lj_char_isident(ls->current));
-      s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
-      setstrV(ls->L, tv, s);
-      if (s->reserved > 0)  /* Reserved word? */
-	return TK_OFS + s->reserved;
-      return TK_name;
-    }
-    switch (ls->current) {
-    case '\n':
-    case '\r':
-      inclinenumber(ls);
-      continue;
-    case ' ':
-    case '\t':
-    case '\v':
-    case '\f':
-      next(ls);
-      continue;
-    case '-':
-      next(ls);
-      if (ls->current != '-') return '-';
-      /* else is a comment */
-      next(ls);
-      if (ls->current == '[') {
-	int sep = skip_sep(ls);
-	lj_str_resetbuf(&ls->sb);  /* `skip_sep' may dirty the buffer */
-	if (sep >= 0) {
-	  read_long_string(ls, NULL, sep);  /* long comment */
-	  lj_str_resetbuf(&ls->sb);
-	  continue;
-	}
-      }
-      /* else short comment */
-      while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
-	next(ls);
-      continue;
-    case '[': {
-      int sep = skip_sep(ls);
-      if (sep >= 0) {
-	read_long_string(ls, tv, sep);
-	return TK_string;
-      } else if (sep == -1) {
-	return '[';
-      } else {
-	lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
-	continue;
-      }
-      }
-    case '=':
-      next(ls);
-      if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
-    case '<':
-      next(ls);
-      if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
-    case '>':
-      next(ls);
-      if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
-    case '~':
-      next(ls);
-      if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
-    case ':':
-      next(ls);
-      if (ls->current != ':') return ':'; else { next(ls); return TK_label; }
-    case '"':
-    case '\'':
-      read_string(ls, ls->current, tv);
-      return TK_string;
-    case '.':
-      save_and_next(ls);
-      if (ls->current == '.') {
-	next(ls);
-	if (ls->current == '.') {
-	  next(ls);
-	  return TK_dots;   /* ... */
-	}
-	return TK_concat;   /* .. */
-      } else if (!lj_char_isdigit(ls->current)) {
-	return '.';
-      } else {
-	lex_number(ls, tv);
-	return TK_number;
-      }
-    case END_OF_STREAM:
-      return TK_eof;
-    default: {
-      int c = ls->current;
-      next(ls);
-      return c;  /* Single-char tokens (+ - / ...). */
-    }
-    }
-  }
-}
-
-/* -- Lexer API ----------------------------------------------------------- */
-
-/* Setup lexer state. */
-int lj_lex_setup(lua_State *L, LexState *ls)
-{
-  int header = 0;
-  ls->L = L;
-  ls->fs = NULL;
-  ls->n = 0;
-  ls->p = NULL;
-  ls->vstack = NULL;
-  ls->sizevstack = 0;
-  ls->vtop = 0;
-  ls->bcstack = NULL;
-  ls->sizebcstack = 0;
-  ls->token = 0;
-  ls->lookahead = TK_eof;  /* No look-ahead token. */
-  ls->linenumber = 1;
-  ls->lastline = 1;
-  lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
-  next(ls);  /* Read-ahead first char. */
-  if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
-      char2int(ls->p[1]) == 0xbf) {  /* Skip UTF-8 BOM (if buffered). */
-    ls->n -= 2;
-    ls->p += 2;
-    next(ls);
-    header = 1;
-  }
-  if (ls->current == '#') {  /* Skip POSIX #! header line. */
-    do {
-      next(ls);
-      if (ls->current == END_OF_STREAM) return 0;
-    } while (!currIsNewline(ls));
-    inclinenumber(ls);
-    header = 1;
-  }
-  if (ls->current == LUA_SIGNATURE[0]) {  /* Bytecode dump. */
-    if (header) {
-      /*
-      ** Loading bytecode with an extra header is disabled for security
-      ** reasons. This may circumvent the usual check for bytecode vs.
-      ** Lua code by looking at the first char. Since this is a potential
-      ** security violation no attempt is made to echo the chunkname either.
-      */
-      setstrV(L, L->top++, lj_err_str(L, LJ_ERR_BCBAD));
-      lj_err_throw(L, LUA_ERRSYNTAX);
-    }
-    return 1;
-  }
-  return 0;
-}
-
-/* Cleanup lexer state. */
-void lj_lex_cleanup(lua_State *L, LexState *ls)
-{
-  global_State *g = G(L);
-  lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
-  lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
-  lj_str_freebuf(g, &ls->sb);
-}
-
-void lj_lex_next(LexState *ls)
-{
-  ls->lastline = ls->linenumber;
-  if (LJ_LIKELY(ls->lookahead == TK_eof)) {  /* No lookahead token? */
-    ls->token = llex(ls, &ls->tokenval);  /* Get next token. */
-  } else {  /* Otherwise return lookahead token. */
-    ls->token = ls->lookahead;
-    ls->lookahead = TK_eof;
-    ls->tokenval = ls->lookaheadval;
-  }
-}
-
-LexToken lj_lex_lookahead(LexState *ls)
-{
-  lua_assert(ls->lookahead == TK_eof);
-  ls->lookahead = llex(ls, &ls->lookaheadval);
-  return ls->lookahead;
-}
-
-const char *lj_lex_token2str(LexState *ls, LexToken token)
-{
-  if (token > TK_OFS)
-    return tokennames[token-TK_OFS-1];
-  else if (!lj_char_iscntrl(token))
-    return lj_str_pushf(ls->L, "%c", token);
-  else
-    return lj_str_pushf(ls->L, "char(%d)", token);
-}
-
-void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
-{
-  const char *tok;
-  va_list argp;
-  if (token == 0) {
-    tok = NULL;
-  } else if (token == TK_name || token == TK_string || token == TK_number) {
-    save(ls, '\0');
-    tok = ls->sb.buf;
-  } else {
-    tok = lj_lex_token2str(ls, token);
-  }
-  va_start(argp, em);
-  lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp);
-  va_end(argp);
-}
-
-void lj_lex_init(lua_State *L)
-{
-  uint32_t i;
-  for (i = 0; i < TK_RESERVED; i++) {
-    GCstr *s = lj_str_newz(L, tokennames[i]);
-    fixstring(s);  /* Reserved words are never collected. */
-    s->reserved = (uint8_t)(i+1);
-  }
-}
-
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_str.h b/source/libs/luajit/LuaJIT-2.0.4/src/lj_str.h
deleted file mode 100644
index 99697051d9b5fb0987275699875381ced1ac5ac3..0000000000000000000000000000000000000000
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_str.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-** String handling.
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
-*/
-
-#ifndef _LJ_STR_H
-#define _LJ_STR_H
-
-#include <stdarg.h>
-
-#include "lj_obj.h"
-
-/* String interning. */
-LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
-LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
-LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
-LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
-
-#define lj_str_newz(L, s)	(lj_str_new(L, s, strlen(s)))
-#define lj_str_newlit(L, s)	(lj_str_new(L, "" s, sizeof(s)-1))
-
-/* Type conversions. */
-LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
-LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
-LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
-
-#define LJ_STR_INTBUF		(1+10)
-#define LJ_STR_NUMBUF		LUAI_MAXNUMBER2STR
-
-/* String formatting. */
-LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
-LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
-#if defined(__GNUC__)
-  __attribute__ ((format (printf, 2, 3)))
-#endif
-  ;
-
-/* Resizable string buffers. Struct definition in lj_obj.h. */
-LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
-
-#define lj_str_initbuf(sb)	((sb)->buf = NULL, (sb)->sz = 0)
-#define lj_str_resetbuf(sb)	((sb)->n = 0)
-#define lj_str_resizebuf(L, sb, size) \
-  ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
-   (sb)->sz = (size))
-#define lj_str_freebuf(g, sb)	lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
-
-#endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/vm_ppcspe.dasc b/source/libs/luajit/LuaJIT-2.0.4/src/vm_ppcspe.dasc
deleted file mode 100644
index 53ea2d96581c182d82666d14e27785dbcff6afd2..0000000000000000000000000000000000000000
--- a/source/libs/luajit/LuaJIT-2.0.4/src/vm_ppcspe.dasc
+++ /dev/null
@@ -1,3691 +0,0 @@
-|// Low-level VM code for PowerPC/e500 CPUs.
-|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
-|
-|.arch ppc
-|.section code_op, code_sub
-|
-|.actionlist build_actionlist
-|.globals GLOB_
-|.globalnames globnames
-|.externnames extnames
-|
-|// Note: The ragged indentation of the instructions is intentional.
-|//       The starting columns indicate data dependencies.
-|
-|//-----------------------------------------------------------------------
-|
-|// Fixed register assignments for the interpreter.
-|// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr
-|
-|// The following must be C callee-save (but BASE is often refetched).
-|.define BASE,		r14	// Base of current Lua stack frame.
-|.define KBASE,		r15	// Constants of current Lua function.
-|.define PC,		r16	// Next PC.
-|.define DISPATCH,	r17	// Opcode dispatch table.
-|.define LREG,		r18	// Register holding lua_State (also in SAVE_L).
-|.define MULTRES,	r19	// Size of multi-result: (nresults+1)*8.
-|
-|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save.
-|.define TISNUM,	r22
-|.define TISSTR,	r23
-|.define TISTAB,	r24
-|.define TISFUNC,	r25
-|.define TISNIL,	r26
-|.define TOBIT,		r27
-|.define ZERO,		TOBIT	// Zero in lo word.
-|
-|// The following temporaries are not saved across C calls, except for RA.
-|.define RA,		r20	// Callee-save.
-|.define RB,		r10
-|.define RC,		r11
-|.define RD,		r12
-|.define INS,		r7	// Overlaps CARG5.
-|
-|.define TMP0,		r0
-|.define TMP1,		r8
-|.define TMP2,		r9
-|.define TMP3,		r6	// Overlaps CARG4.
-|
-|// Saved temporaries.
-|.define SAVE0,		r21
-|
-|// Calling conventions.
-|.define CARG1,		r3
-|.define CARG2,		r4
-|.define CARG3,		r5
-|.define CARG4,		r6	// Overlaps TMP3.
-|.define CARG5,		r7	// Overlaps INS.
-|
-|.define CRET1,		r3
-|.define CRET2,		r4
-|
-|// Stack layout while in interpreter. Must match with lj_frame.h.
-|.define SAVE_LR,	188(sp)
-|.define CFRAME_SPACE,	184	// Delta for sp.
-|// Back chain for sp:	184(sp)	<-- sp entering interpreter
-|.define SAVE_r31,	176(sp)	// 64 bit register saves.
-|.define SAVE_r30,	168(sp)
-|.define SAVE_r29,	160(sp)
-|.define SAVE_r28,	152(sp)
-|.define SAVE_r27,	144(sp)
-|.define SAVE_r26,	136(sp)
-|.define SAVE_r25,	128(sp)
-|.define SAVE_r24,	120(sp)
-|.define SAVE_r23,	112(sp)
-|.define SAVE_r22,	104(sp)
-|.define SAVE_r21,	96(sp)
-|.define SAVE_r20,	88(sp)
-|.define SAVE_r19,	80(sp)
-|.define SAVE_r18,	72(sp)
-|.define SAVE_r17,	64(sp)
-|.define SAVE_r16,	56(sp)
-|.define SAVE_r15,	48(sp)
-|.define SAVE_r14,	40(sp)
-|.define SAVE_CR,	36(sp)
-|.define UNUSED1,	32(sp)
-|.define SAVE_ERRF,	28(sp)	// 32 bit C frame info.
-|.define SAVE_NRES,	24(sp)
-|.define SAVE_CFRAME,	20(sp)
-|.define SAVE_L,	16(sp)
-|.define SAVE_PC,	12(sp)
-|.define SAVE_MULTRES,	8(sp)
-|// Next frame lr:	4(sp)
-|// Back chain for sp:	0(sp)	<-- sp while in interpreter
-|
-|.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro
-|.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro
-|
-|.macro saveregs
-|  stwu sp, -CFRAME_SPACE(sp)
-|  save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19
-|  mflr r0; mfcr r12
-|  save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25
-|  stw  r0, SAVE_LR; stw r12, SAVE_CR
-|  save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31
-|.endmacro
-|
-|.macro restoreregs
-|  lwz r0, SAVE_LR; lwz r12, SAVE_CR
-|  rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19
-|  mtlr r0; mtcrf 0x38, r12
-|  rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25
-|  rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31
-|  addi sp, sp, CFRAME_SPACE
-|.endmacro
-|
-|// Type definitions. Some of these are only used for documentation.
-|.type L,		lua_State,	LREG
-|.type GL,		global_State
-|.type TVALUE,		TValue
-|.type GCOBJ,		GCobj
-|.type STR,		GCstr
-|.type TAB,		GCtab
-|.type LFUNC,		GCfuncL
-|.type CFUNC,		GCfuncC
-|.type PROTO,		GCproto
-|.type UPVAL,		GCupval
-|.type NODE,		Node
-|.type NARGS8,		int
-|.type TRACE,		GCtrace
-|
-|//-----------------------------------------------------------------------
-|
-|// These basic macros should really be part of DynASM.
-|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
-|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
-|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
-|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
-|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
-|
-|// Trap for not-yet-implemented parts.
-|.macro NYI; tw 4, sp, sp; .endmacro
-|
-|//-----------------------------------------------------------------------
-|
-|// Access to frame relative to BASE.
-|.define FRAME_PC,	-8
-|.define FRAME_FUNC,	-4
-|
-|// Instruction decode.
-|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
-|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro
-|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro
-|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro
-|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro
-|
-|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
-|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
-|
-|// Instruction fetch.
-|.macro ins_NEXT1
-|  lwz INS, 0(PC)
-|   addi PC, PC, 4
-|.endmacro
-|// Instruction decode+dispatch.
-|.macro ins_NEXT2
-|  decode_OP4 TMP1, INS
-|   decode_RB8 RB, INS
-|   decode_RD8 RD, INS
-|  lwzx TMP0, DISPATCH, TMP1
-|   decode_RA8 RA, INS
-|   decode_RC8 RC, INS
-|  mtctr TMP0
-|  bctr
-|.endmacro
-|.macro ins_NEXT
-|  ins_NEXT1
-|  ins_NEXT2
-|.endmacro
-|
-|// Instruction footer.
-|.if 1
-|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
-|  .define ins_next, ins_NEXT
-|  .define ins_next_, ins_NEXT
-|  .define ins_next1, ins_NEXT1
-|  .define ins_next2, ins_NEXT2
-|.else
-|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
-|  // Affects only certain kinds of benchmarks (and only with -j off).
-|  .macro ins_next
-|    b ->ins_next
-|  .endmacro
-|  .macro ins_next1
-|  .endmacro
-|  .macro ins_next2
-|    b ->ins_next
-|  .endmacro
-|  .macro ins_next_
-|  ->ins_next:
-|    ins_NEXT
-|  .endmacro
-|.endif
-|
-|// Call decode and dispatch.
-|.macro ins_callt
-|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
-|  lwz PC, LFUNC:RB->pc
-|  lwz INS, 0(PC)
-|   addi PC, PC, 4
-|  decode_OP4 TMP1, INS
-|   decode_RA8 RA, INS
-|  lwzx TMP0, DISPATCH, TMP1
-|   add RA, RA, BASE
-|  mtctr TMP0
-|  bctr
-|.endmacro
-|
-|.macro ins_call
-|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
-|  stw PC, FRAME_PC(BASE)
-|  ins_callt
-|.endmacro
-|
-|//-----------------------------------------------------------------------
-|
-|// Macros to test operand types.
-|.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro
-|.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro
-|.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro
-|.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro
-|.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro
-|.macro checkok, label; blt label; .endmacro
-|.macro checkfail, label; bge label; .endmacro
-|.macro checkanyfail, label; bns label; .endmacro
-|.macro checkallok, label; bso label; .endmacro
-|
-|.macro branch_RD
-|  srwi TMP0, RD, 1
-|  add PC, PC, TMP0
-|  addis PC, PC, -(BCBIAS_J*4 >> 16)
-|.endmacro
-|
-|// Assumes DISPATCH is relative to GL.
-#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
-#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
-|
-#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
-|
-|.macro hotloop
-|  NYI
-|.endmacro
-|
-|.macro hotcall
-|  NYI
-|.endmacro
-|
-|// Set current VM state. Uses TMP0.
-|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
-|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
-|
-|// Move table write barrier back. Overwrites mark and tmp.
-|.macro barrierback, tab, mark, tmp
-|  lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
-|  // Assumes LJ_GC_BLACK is 0x04.
-|   rlwinm mark, mark, 0, 30, 28		// black2gray(tab)
-|  stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
-|   stb mark, tab->marked
-|  stw tmp, tab->gclist
-|.endmacro
-|
-|//-----------------------------------------------------------------------
-
-/* Generate subroutines used by opcodes and other parts of the VM. */
-/* The .code_sub section should be last to help static branch prediction. */
-static void build_subroutines(BuildCtx *ctx)
-{
-  |.code_sub
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Return handling ----------------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |->vm_returnp:
-  |  // See vm_return. Also: TMP2 = previous base.
-  |  andi. TMP0, PC, FRAME_P
-  |   evsplati TMP1, LJ_TTRUE
-  |  beq ->cont_dispatch
-  |
-  |  // Return from pcall or xpcall fast func.
-  |  lwz PC, FRAME_PC(TMP2)		// Fetch PC of previous frame.
-  |  mr BASE, TMP2			// Restore caller base.
-  |  // Prepending may overwrite the pcall frame, so do it at the end.
-  |   stwu TMP1, FRAME_PC(RA)		// Prepend true to results.
-  |
-  |->vm_returnc:
-  |  addi RD, RD, 8			// RD = (nresults+1)*8.
-  |   andi. TMP0, PC, FRAME_TYPE
-  |  cmpwi cr1, RD, 0
-  |  li CRET1, LUA_YIELD
-  |  beq cr1, ->vm_unwind_c_eh
-  |  mr MULTRES, RD
-  |   beq ->BC_RET_Z			// Handle regular return to Lua.
-  |
-  |->vm_return:
-  |  // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
-  |  // TMP0 = PC & FRAME_TYPE
-  |  cmpwi TMP0, FRAME_C
-  |   rlwinm TMP2, PC, 0, 0, 28
-  |    li_vmstate C
-  |   sub TMP2, BASE, TMP2		// TMP2 = previous base.
-  |  bne ->vm_returnp
-  |
-  |  addic. TMP1, RD, -8
-  |   stw TMP2, L->base
-  |   lwz TMP2, SAVE_NRES
-  |    subi BASE, BASE, 8
-  |    st_vmstate
-  |   slwi TMP2, TMP2, 3
-  |  beq >2
-  |1:
-  |  addic. TMP1, TMP1, -8
-  |   evldd TMP0, 0(RA)
-  |    addi RA, RA, 8
-  |   evstdd TMP0, 0(BASE)
-  |    addi BASE, BASE, 8
-  |  bne <1
-  |
-  |2:
-  |  cmpw TMP2, RD			// More/less results wanted?
-  |  bne >6
-  |3:
-  |  stw BASE, L->top			// Store new top.
-  |
-  |->vm_leave_cp:
-  |  lwz TMP0, SAVE_CFRAME		// Restore previous C frame.
-  |   li CRET1, 0			// Ok return status for vm_pcall.
-  |  stw TMP0, L->cframe
-  |
-  |->vm_leave_unw:
-  |  restoreregs
-  |  blr
-  |
-  |6:
-  |  ble >7				// Less results wanted?
-  |  // More results wanted. Check stack size and fill up results with nil.
-  |  lwz TMP1, L->maxstack
-  |  cmplw BASE, TMP1
-  |  bge >8
-  |  evstdd TISNIL, 0(BASE)
-  |  addi RD, RD, 8
-  |  addi BASE, BASE, 8
-  |  b <2
-  |
-  |7:  // Less results wanted.
-  |   sub TMP0, RD, TMP2
-  |  cmpwi TMP2, 0			// LUA_MULTRET+1 case?
-  |   sub TMP0, BASE, TMP0		// Subtract the difference.
-  |  iseleq BASE, BASE, TMP0		// Either keep top or shrink it.
-  |  b <3
-  |
-  |8:  // Corner case: need to grow stack for filling up results.
-  |  // This can happen if:
-  |  // - A C function grows the stack (a lot).
-  |  // - The GC shrinks the stack in between.
-  |  // - A return back from a lua_call() with (high) nresults adjustment.
-  |  stw BASE, L->top			// Save current top held in BASE (yes).
-  |   mr SAVE0, RD
-  |  mr CARG2, TMP2
-  |  mr CARG1, L
-  |  bl extern lj_state_growstack	// (lua_State *L, int n)
-  |    lwz TMP2, SAVE_NRES
-  |   mr RD, SAVE0
-  |    slwi TMP2, TMP2, 3
-  |  lwz BASE, L->top			// Need the (realloced) L->top in BASE.
-  |  b <2
-  |
-  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
-  |  // (void *cframe, int errcode)
-  |  mr sp, CARG1
-  |  mr CRET1, CARG2
-  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
-  |  lwz L, SAVE_L
-  |   li TMP0, ~LJ_VMST_C
-  |  lwz GL:TMP1, L->glref
-  |   stw TMP0, GL:TMP1->vmstate
-  |  b ->vm_leave_unw
-  |
-  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
-  |  // (void *cframe)
-  |  rlwinm sp, CARG1, 0, 0, 29
-  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
-  |  lwz L, SAVE_L
-  |     evsplati TISNUM, LJ_TISNUM+1	// Setup type comparison constants.
-  |     evsplati TISFUNC, LJ_TFUNC
-  |     lus TOBIT, 0x4338
-  |     evsplati TISTAB, LJ_TTAB
-  |     li TMP0, 0
-  |  lwz BASE, L->base
-  |     evmergelo TOBIT, TOBIT, TMP0
-  |   lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |     evsplati TISSTR, LJ_TSTR
-  |  li TMP1, LJ_TFALSE
-  |     evsplati TISNIL, LJ_TNIL
-  |    li_vmstate INTERP
-  |  lwz PC, FRAME_PC(BASE)		// Fetch PC of previous frame.
-  |  la RA, -8(BASE)			// Results start at BASE-8.
-  |   addi DISPATCH, DISPATCH, GG_G2DISP
-  |  stw TMP1, 0(RA)			// Prepend false to error message.
-  |  li RD, 16				// 2 results: false + error message.
-  |    st_vmstate
-  |  b ->vm_returnc
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Grow stack for calls -----------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |->vm_growstack_c:			// Grow stack for C function.
-  |  li CARG2, LUA_MINSTACK
-  |  b >2
-  |
-  |->vm_growstack_l:			// Grow stack for Lua function.
-  |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
-  |  add RC, BASE, RC
-  |   sub RA, RA, BASE
-  |  stw BASE, L->base
-  |   addi PC, PC, 4			// Must point after first instruction.
-  |  stw RC, L->top
-  |   srwi CARG2, RA, 3
-  |2:
-  |  // L->base = new base, L->top = top
-  |   stw PC, SAVE_PC
-  |  mr CARG1, L
-  |  bl extern lj_state_growstack	// (lua_State *L, int n)
-  |  lwz BASE, L->base
-  |  lwz RC, L->top
-  |  lwz LFUNC:RB, FRAME_FUNC(BASE)
-  |  sub RC, RC, BASE
-  |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
-  |  ins_callt				// Just retry the call.
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Entry points into the assembler VM ---------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |->vm_resume:				// Setup C frame and resume thread.
-  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
-  |  saveregs
-  |  mr L, CARG1
-  |    lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |  mr BASE, CARG2
-  |    lbz TMP1, L->status
-  |   stw L, SAVE_L
-  |  li PC, FRAME_CP
-  |  addi TMP0, sp, CFRAME_RESUME
-  |    addi DISPATCH, DISPATCH, GG_G2DISP
-  |   stw CARG3, SAVE_NRES
-  |    cmplwi TMP1, 0
-  |   stw CARG3, SAVE_ERRF
-  |  stw TMP0, L->cframe
-  |   stw CARG3, SAVE_CFRAME
-  |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
-  |    beq >3
-  |
-  |  // Resume after yield (like a return).
-  |  mr RA, BASE
-  |   lwz BASE, L->base
-  |    evsplati TISNUM, LJ_TISNUM+1	// Setup type comparison constants.
-  |   lwz TMP1, L->top
-  |    evsplati TISFUNC, LJ_TFUNC
-  |    lus TOBIT, 0x4338
-  |    evsplati TISTAB, LJ_TTAB
-  |  lwz PC, FRAME_PC(BASE)
-  |    li TMP2, 0
-  |    evsplati TISSTR, LJ_TSTR
-  |   sub RD, TMP1, BASE
-  |    evmergelo TOBIT, TOBIT, TMP2
-  |    stb CARG3, L->status
-  |  andi. TMP0, PC, FRAME_TYPE
-  |    li_vmstate INTERP
-  |   addi RD, RD, 8
-  |    evsplati TISNIL, LJ_TNIL
-  |   mr MULTRES, RD
-  |    st_vmstate
-  |  beq ->BC_RET_Z
-  |  b ->vm_return
-  |
-  |->vm_pcall:				// Setup protected C frame and enter VM.
-  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
-  |  saveregs
-  |  li PC, FRAME_CP
-  |  stw CARG4, SAVE_ERRF
-  |  b >1
-  |
-  |->vm_call:				// Setup C frame and enter VM.
-  |  // (lua_State *L, TValue *base, int nres1)
-  |  saveregs
-  |  li PC, FRAME_C
-  |
-  |1:  // Entry point for vm_pcall above (PC = ftype).
-  |  lwz TMP1, L:CARG1->cframe
-  |   stw CARG3, SAVE_NRES
-  |    mr L, CARG1
-  |   stw CARG1, SAVE_L
-  |    mr BASE, CARG2
-  |  stw sp, L->cframe			// Add our C frame to cframe chain.
-  |    lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
-  |  stw TMP1, SAVE_CFRAME
-  |    addi DISPATCH, DISPATCH, GG_G2DISP
-  |
-  |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
-  |  lwz TMP2, L->base			// TMP2 = old base (used in vmeta_call).
-  |    evsplati TISNUM, LJ_TISNUM+1	// Setup type comparison constants.
-  |   lwz TMP1, L->top
-  |    evsplati TISFUNC, LJ_TFUNC
-  |  add PC, PC, BASE
-  |    evsplati TISTAB, LJ_TTAB
-  |    lus TOBIT, 0x4338
-  |    li TMP0, 0
-  |  sub PC, PC, TMP2			// PC = frame delta + frame type
-  |    evsplati TISSTR, LJ_TSTR
-  |   sub NARGS8:RC, TMP1, BASE
-  |    evmergelo TOBIT, TOBIT, TMP0
-  |    li_vmstate INTERP
-  |    evsplati TISNIL, LJ_TNIL
-  |    st_vmstate
-  |
-  |->vm_call_dispatch:
-  |  // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
-  |  li TMP0, -8
-  |  evlddx LFUNC:RB, BASE, TMP0
-  |  checkfunc LFUNC:RB
-  |  checkfail ->vmeta_call
-  |
-  |->vm_call_dispatch_f:
-  |  ins_call
-  |  // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
-  |
-  |->vm_cpcall:				// Setup protected C frame, call C.
-  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
-  |  saveregs
-  |  mr L, CARG1
-  |   lwz TMP0, L:CARG1->stack
-  |  stw CARG1, SAVE_L
-  |   lwz TMP1, L->top
-  |  stw CARG1, SAVE_PC			// Any value outside of bytecode is ok.
-  |   sub TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
-  |    lwz TMP1, L->cframe
-  |    stw sp, L->cframe		// Add our C frame to cframe chain.
-  |  li TMP2, 0
-  |   stw TMP0, SAVE_NRES		// Neg. delta means cframe w/o frame.
-  |  stw TMP2, SAVE_ERRF		// No error function.
-  |    stw TMP1, SAVE_CFRAME
-  |  mtctr CARG4
-  |  bctrl			// (lua_State *L, lua_CFunction func, void *ud)
-  |  mr. BASE, CRET1
-  |   lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |    li PC, FRAME_CP
-  |   addi DISPATCH, DISPATCH, GG_G2DISP
-  |  bne <3				// Else continue with the call.
-  |  b ->vm_leave_cp			// No base? Just remove C frame.
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Metamethod handling ------------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
-  |// stack, so BASE doesn't need to be reloaded across these calls.
-  |
-  |//-- Continuation dispatch ----------------------------------------------
-  |
-  |->cont_dispatch:
-  |  // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
-  |  lwz TMP0, -12(BASE)		// Continuation.
-  |   mr RB, BASE
-  |   mr BASE, TMP2			// Restore caller BASE.
-  |    lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
-  |  cmplwi TMP0, 0
-  |     lwz PC, -16(RB)			// Restore PC from [cont|PC].
-  |  beq >1
-  |   subi TMP2, RD, 8
-  |    lwz TMP1, LFUNC:TMP1->pc
-  |   evstddx TISNIL, RA, TMP2		// Ensure one valid arg.
-  |    lwz KBASE, PC2PROTO(k)(TMP1)
-  |  // BASE = base, RA = resultptr, RB = meta base
-  |  mtctr TMP0
-  |  bctr				// Jump to continuation.
-  |
-  |1:  // Tail call from C function.
-  |  subi TMP1, RB, 16
-  |  sub RC, TMP1, BASE
-  |  b ->vm_call_tail
-  |
-  |->cont_cat:				// RA = resultptr, RB = meta base
-  |  lwz INS, -4(PC)
-  |   subi CARG2, RB, 16
-  |  decode_RB8 SAVE0, INS
-  |   evldd TMP0, 0(RA)
-  |  add TMP1, BASE, SAVE0
-  |   stw BASE, L->base
-  |  cmplw TMP1, CARG2
-  |   sub CARG3, CARG2, TMP1
-  |  decode_RA8 RA, INS
-  |   evstdd TMP0, 0(CARG2)
-  |  bne ->BC_CAT_Z
-  |   evstddx TMP0, BASE, RA
-  |  b ->cont_nop
-  |
-  |//-- Table indexing metamethods -----------------------------------------
-  |
-  |->vmeta_tgets1:
-  |  evmergelo STR:RC, TISSTR, STR:RC
-  |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
-  |   decode_RB8 RB, INS
-  |  evstdd STR:RC, 0(CARG3)
-  |   add CARG2, BASE, RB
-  |  b >1
-  |
-  |->vmeta_tgets:
-  |  evmergelo TAB:RB, TISTAB, TAB:RB
-  |  la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
-  |   evmergelo STR:RC, TISSTR, STR:RC
-  |  evstdd TAB:RB, 0(CARG2)
-  |   la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
-  |   evstdd STR:RC, 0(CARG3)
-  |  b >1
-  |
-  |->vmeta_tgetb:			// TMP0 = index
-  |  efdcfsi TMP0, TMP0
-  |   decode_RB8 RB, INS
-  |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
-  |   add CARG2, BASE, RB
-  |  evstdd TMP0, 0(CARG3)
-  |  b >1
-  |
-  |->vmeta_tgetv:
-  |  decode_RB8 RB, INS
-  |   decode_RC8 RC, INS
-  |  add CARG2, BASE, RB
-  |   add CARG3, BASE, RC
-  |1:
-  |  stw BASE, L->base
-  |  mr CARG1, L
-  |  stw PC, SAVE_PC
-  |  bl extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
-  |  // Returns TValue * (finished) or NULL (metamethod).
-  |  cmplwi CRET1, 0
-  |  beq >3
-  |  evldd TMP0, 0(CRET1)
-  |  evstddx TMP0, BASE, RA
-  |  ins_next
-  |
-  |3:  // Call __index metamethod.
-  |  // BASE = base, L->top = new base, stack = cont/func/t/k
-  |  subfic TMP1, BASE, FRAME_CONT
-  |  lwz BASE, L->top
-  |  stw PC, -16(BASE)			// [cont|PC]
-  |   add PC, TMP1, BASE
-  |  lwz LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
-  |   li NARGS8:RC, 16			// 2 args for func(t, k).
-  |  b ->vm_call_dispatch_f
-  |
-  |//-----------------------------------------------------------------------
-  |
-  |->vmeta_tsets1:
-  |  evmergelo STR:RC, TISSTR, STR:RC
-  |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
-  |   decode_RB8 RB, INS
-  |  evstdd STR:RC, 0(CARG3)
-  |   add CARG2, BASE, RB
-  |  b >1
-  |
-  |->vmeta_tsets:
-  |  evmergelo TAB:RB, TISTAB, TAB:RB
-  |  la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
-  |   evmergelo STR:RC, TISSTR, STR:RC
-  |  evstdd TAB:RB, 0(CARG2)
-  |   la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
-  |   evstdd STR:RC, 0(CARG3)
-  |  b >1
-  |
-  |->vmeta_tsetb:			// TMP0 = index
-  |  efdcfsi TMP0, TMP0
-  |   decode_RB8 RB, INS
-  |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
-  |   add CARG2, BASE, RB
-  |  evstdd TMP0, 0(CARG3)
-  |  b >1
-  |
-  |->vmeta_tsetv:
-  |  decode_RB8 RB, INS
-  |   decode_RC8 RC, INS
-  |  add CARG2, BASE, RB
-  |   add CARG3, BASE, RC
-  |1:
-  |  stw BASE, L->base
-  |  mr CARG1, L
-  |  stw PC, SAVE_PC
-  |  bl extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
-  |  // Returns TValue * (finished) or NULL (metamethod).
-  |  cmplwi CRET1, 0
-  |   evlddx TMP0, BASE, RA
-  |  beq >3
-  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
-  |   evstdd TMP0, 0(CRET1)
-  |  ins_next
-  |
-  |3:  // Call __newindex metamethod.
-  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
-  |  subfic TMP1, BASE, FRAME_CONT
-  |  lwz BASE, L->top
-  |  stw PC, -16(BASE)			// [cont|PC]
-  |   add PC, TMP1, BASE
-  |  lwz LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
-  |   li NARGS8:RC, 24			// 3 args for func(t, k, v)
-  |  evstdd TMP0, 16(BASE)		// Copy value to third argument.
-  |  b ->vm_call_dispatch_f
-  |
-  |//-- Comparison metamethods ---------------------------------------------
-  |
-  |->vmeta_comp:
-  |  mr CARG1, L
-  |   subi PC, PC, 4
-  |  add CARG2, BASE, RA
-  |   stw PC, SAVE_PC
-  |  add CARG3, BASE, RD
-  |   stw BASE, L->base
-  |  decode_OP1 CARG4, INS
-  |  bl extern lj_meta_comp  // (lua_State *L, TValue *o1, *o2, int op)
-  |  // Returns 0/1 or TValue * (metamethod).
-  |3:
-  |  cmplwi CRET1, 1
-  |  bgt ->vmeta_binop
-  |4:
-  |  lwz INS, 0(PC)
-  |   addi PC, PC, 4
-  |  decode_RD4 TMP2, INS
-  |  addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-  |  add TMP2, TMP2, TMP3
-  |  isellt PC, PC, TMP2
-  |->cont_nop:
-  |  ins_next
-  |
-  |->cont_ra:				// RA = resultptr
-  |  lwz INS, -4(PC)
-  |   evldd TMP0, 0(RA)
-  |  decode_RA8 TMP1, INS
-  |   evstddx TMP0, BASE, TMP1
-  |  b ->cont_nop
-  |
-  |->cont_condt:			// RA = resultptr
-  |  lwz TMP0, 0(RA)
-  |   li TMP1, LJ_TTRUE
-  |  cmplw TMP1, TMP0			// Branch if result is true.
-  |  b <4
-  |
-  |->cont_condf:			// RA = resultptr
-  |  lwz TMP0, 0(RA)
-  |   li TMP1, LJ_TFALSE
-  |  cmplw TMP0, TMP1			// Branch if result is false.
-  |  b <4
-  |
-  |->vmeta_equal:
-  |  // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
-  |  subi PC, PC, 4
-  |   stw BASE, L->base
-  |  mr CARG1, L
-  |   stw PC, SAVE_PC
-  |  bl extern lj_meta_equal  // (lua_State *L, GCobj *o1, *o2, int ne)
-  |  // Returns 0/1 or TValue * (metamethod).
-  |  b <3
-  |
-  |//-- Arithmetic metamethods ---------------------------------------------
-  |
-  |->vmeta_arith_vn:
-  |  add CARG3, BASE, RB
-  |  add CARG4, KBASE, RC
-  |  b >1
-  |
-  |->vmeta_arith_nv:
-  |  add CARG3, KBASE, RC
-  |  add CARG4, BASE, RB
-  |  b >1
-  |
-  |->vmeta_unm:
-  |  add CARG3, BASE, RD
-  |  mr CARG4, CARG3
-  |  b >1
-  |
-  |->vmeta_arith_vv:
-  |  add CARG3, BASE, RB
-  |  add CARG4, BASE, RC
-  |1:
-  |  add CARG2, BASE, RA
-  |   stw BASE, L->base
-  |  mr CARG1, L
-  |   stw PC, SAVE_PC
-  |  decode_OP1 CARG5, INS		// Caveat: CARG5 overlaps INS.
-  |  bl extern lj_meta_arith  // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
-  |  // Returns NULL (finished) or TValue * (metamethod).
-  |  cmplwi CRET1, 0
-  |  beq ->cont_nop
-  |
-  |  // Call metamethod for binary op.
-  |->vmeta_binop:
-  |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
-  |  sub TMP1, CRET1, BASE
-  |   stw PC, -16(CRET1)		// [cont|PC]
-  |   mr TMP2, BASE
-  |  addi PC, TMP1, FRAME_CONT
-  |   mr BASE, CRET1
-  |  li NARGS8:RC, 16			// 2 args for func(o1, o2).
-  |  b ->vm_call_dispatch
-  |
-  |->vmeta_len:
-#if LJ_52
-  |  mr SAVE0, CARG1
-#endif
-  |  add CARG2, BASE, RD
-  |   stw BASE, L->base
-  |  mr CARG1, L
-  |   stw PC, SAVE_PC
-  |  bl extern lj_meta_len		// (lua_State *L, TValue *o)
-  |  // Returns NULL (retry) or TValue * (metamethod base).
-#if LJ_52
-  |  cmplwi CRET1, 0
-  |  bne ->vmeta_binop			// Binop call for compatibility.
-  |  mr CARG1, SAVE0
-  |  b ->BC_LEN_Z
-#else
-  |  b ->vmeta_binop			// Binop call for compatibility.
-#endif
-  |
-  |//-- Call metamethod ----------------------------------------------------
-  |
-  |->vmeta_call:			// Resolve and call __call metamethod.
-  |  // TMP2 = old base, BASE = new base, RC = nargs*8
-  |  mr CARG1, L
-  |   stw TMP2, L->base			// This is the callers base!
-  |  subi CARG2, BASE, 8
-  |   stw PC, SAVE_PC
-  |  add CARG3, BASE, RC
-  |   mr SAVE0, NARGS8:RC
-  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
-  |  lwz LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
-  |   addi NARGS8:RC, SAVE0, 8		// Got one more argument now.
-  |  ins_call
-  |
-  |->vmeta_callt:			// Resolve __call for BC_CALLT.
-  |  // BASE = old base, RA = new base, RC = nargs*8
-  |  mr CARG1, L
-  |   stw BASE, L->base
-  |  subi CARG2, RA, 8
-  |   stw PC, SAVE_PC
-  |  add CARG3, RA, RC
-  |   mr SAVE0, NARGS8:RC
-  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
-  |  lwz TMP1, FRAME_PC(BASE)
-  |   addi NARGS8:RC, SAVE0, 8		// Got one more argument now.
-  |   lwz LFUNC:RB, FRAME_FUNC(RA)	// Guaranteed to be a function here.
-  |  b ->BC_CALLT_Z
-  |
-  |//-- Argument coercion for 'for' statement ------------------------------
-  |
-  |->vmeta_for:
-  |  mr CARG1, L
-  |   stw BASE, L->base
-  |  mr CARG2, RA
-  |   stw PC, SAVE_PC
-  |  mr SAVE0, INS
-  |  bl extern lj_meta_for	// (lua_State *L, TValue *base)
-  |.if JIT
-  |   decode_OP1 TMP0, SAVE0
-  |.endif
-  |  decode_RA8 RA, SAVE0
-  |.if JIT
-  |   cmpwi TMP0, BC_JFORI
-  |.endif
-  |  decode_RD8 RD, SAVE0
-  |.if JIT
-  |   beq =>BC_JFORI
-  |.endif
-  |  b =>BC_FORI
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Fast functions -----------------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |.macro .ffunc, name
-  |->ff_ .. name:
-  |.endmacro
-  |
-  |.macro .ffunc_1, name
-  |->ff_ .. name:
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG1, 0(BASE)
-  |  blt ->fff_fallback
-  |.endmacro
-  |
-  |.macro .ffunc_2, name
-  |->ff_ .. name:
-  |  cmplwi NARGS8:RC, 16
-  |   evldd CARG1, 0(BASE)
-  |   evldd CARG2, 8(BASE)
-  |  blt ->fff_fallback
-  |.endmacro
-  |
-  |.macro .ffunc_n, name
-  |  .ffunc_1 name
-  |  checknum CARG1
-  |  checkfail ->fff_fallback
-  |.endmacro
-  |
-  |.macro .ffunc_nn, name
-  |  .ffunc_2 name
-  |  evmergehi TMP0, CARG1, CARG2
-  |  checknum TMP0
-  |  checkanyfail ->fff_fallback
-  |.endmacro
-  |
-  |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
-  |.macro ffgccheck
-  |  lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
-  |  lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
-  |  cmplw TMP0, TMP1
-  |  bgel ->fff_gcstep
-  |.endmacro
-  |
-  |//-- Base library: checks -----------------------------------------------
-  |
-  |.ffunc assert
-  |  cmplwi NARGS8:RC, 8
-  |   evldd TMP0, 0(BASE)
-  |  blt ->fff_fallback
-  |  evaddw TMP1, TISNIL, TISNIL	// Synthesize LJ_TFALSE.
-  |  la RA, -8(BASE)
-  |   evcmpltu cr1, TMP0, TMP1
-  |    lwz PC, FRAME_PC(BASE)
-  |  bge cr1, ->fff_fallback
-  |   evstdd TMP0, 0(RA)
-  |  addi RD, NARGS8:RC, 8		// Compute (nresults+1)*8.
-  |   beq ->fff_res			// Done if exactly 1 argument.
-  |  li TMP1, 8
-  |  subi RC, RC, 8
-  |1:
-  |  cmplw TMP1, RC
-  |   evlddx TMP0, BASE, TMP1
-  |   evstddx TMP0, RA, TMP1
-  |    addi TMP1, TMP1, 8
-  |  bne <1
-  |  b ->fff_res
-  |
-  |.ffunc type
-  |  cmplwi NARGS8:RC, 8
-  |   lwz CARG1, 0(BASE)
-  |  blt ->fff_fallback
-  |    li TMP2, ~LJ_TNUMX
-  |  cmplw CARG1, TISNUM
-  |  not TMP1, CARG1
-  |  isellt TMP1, TMP2, TMP1
-  |  slwi TMP1, TMP1, 3
-  |   la TMP2, CFUNC:RB->upvalue
-  |  evlddx STR:CRET1, TMP2, TMP1
-  |  b ->fff_restv
-  |
-  |//-- Base library: getters and setters ---------------------------------
-  |
-  |.ffunc_1 getmetatable
-  |  checktab CARG1
-  |   evmergehi TMP1, CARG1, CARG1
-  |  checkfail >6
-  |1:  // Field metatable must be at same offset for GCtab and GCudata!
-  |  lwz TAB:RB, TAB:CARG1->metatable
-  |2:
-  |  evmr CRET1, TISNIL
-  |   cmplwi TAB:RB, 0
-  |  lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
-  |   beq ->fff_restv
-  |  lwz TMP0, TAB:RB->hmask
-  |   evmergelo CRET1, TISTAB, TAB:RB	// Use metatable as default result.
-  |  lwz TMP1, STR:RC->hash
-  |  lwz NODE:TMP2, TAB:RB->node
-  |   evmergelo STR:RC, TISSTR, STR:RC
-  |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
-  |  slwi TMP0, TMP1, 5
-  |  slwi TMP1, TMP1, 3
-  |  sub TMP1, TMP0, TMP1
-  |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
-  |3:  // Rearranged logic, because we expect _not_ to find the key.
-  |  evldd TMP0, NODE:TMP2->key
-  |   evldd TMP1, NODE:TMP2->val
-  |  evcmpeq TMP0, STR:RC
-  |   lwz NODE:TMP2, NODE:TMP2->next
-  |  checkallok >5
-  |   cmplwi NODE:TMP2, 0
-  |   beq ->fff_restv			// Not found, keep default result.
-  |   b <3
-  |5:
-  |  checknil TMP1
-  |  checkok ->fff_restv		// Ditto for nil value.
-  |  evmr CRET1, TMP1			// Return value of mt.__metatable.
-  |  b ->fff_restv
-  |
-  |6:
-  |  cmpwi TMP1, LJ_TUDATA
-  |   not TMP1, TMP1
-  |  beq <1
-  |  checknum CARG1
-  |   slwi TMP1, TMP1, 2
-  |   li TMP2, 4*~LJ_TNUMX
-  |  isellt TMP1, TMP2, TMP1
-  |   la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
-  |  lwzx TAB:RB, TMP2, TMP1
-  |  b <2
-  |
-  |.ffunc_2 setmetatable
-  |  // Fast path: no mt for table yet and not clearing the mt.
-  |  evmergehi TMP0, TAB:CARG1, TAB:CARG2
-  |  checktab TMP0
-  |  checkanyfail ->fff_fallback
-  |  lwz TAB:TMP1, TAB:CARG1->metatable
-  |  cmplwi TAB:TMP1, 0
-  |   lbz TMP3, TAB:CARG1->marked
-  |  bne ->fff_fallback
-  |   andi. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
-  |    stw TAB:CARG2, TAB:CARG1->metatable
-  |   beq ->fff_restv
-  |  barrierback TAB:CARG1, TMP3, TMP0
-  |  b ->fff_restv
-  |
-  |.ffunc rawget
-  |  cmplwi NARGS8:RC, 16
-  |   evldd CARG2, 0(BASE)
-  |  blt ->fff_fallback
-  |  checktab CARG2
-  |   la CARG3, 8(BASE)
-  |  checkfail ->fff_fallback
-  |   mr CARG1, L
-  |  bl extern lj_tab_get  // (lua_State *L, GCtab *t, cTValue *key)
-  |  // Returns cTValue *.
-  |  evldd CRET1, 0(CRET1)
-  |  b ->fff_restv
-  |
-  |//-- Base library: conversions ------------------------------------------
-  |
-  |.ffunc tonumber
-  |  // Only handles the number case inline (without a base argument).
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG1, 0(BASE)
-  |  bne ->fff_fallback			// Exactly one argument.
-  |  checknum CARG1
-  |  checkok ->fff_restv
-  |  b ->fff_fallback
-  |
-  |.ffunc_1 tostring
-  |  // Only handles the string or number case inline.
-  |  checkstr CARG1
-  |  // A __tostring method in the string base metatable is ignored.
-  |  checkok ->fff_restv		// String key?
-  |  // Handle numbers inline, unless a number base metatable is present.
-  |  lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
-  |  checknum CARG1
-  |  cmplwi cr1, TMP0, 0
-  |   stw BASE, L->base			// Add frame since C call can throw.
-  |  crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq
-  |   stw PC, SAVE_PC			// Redundant (but a defined value).
-  |  bne ->fff_fallback
-  |  ffgccheck
-  |  mr CARG1, L
-  |  mr CARG2, BASE
-  |  bl extern lj_str_fromnum		// (lua_State *L, lua_Number *np)
-  |  // Returns GCstr *.
-  |  evmergelo STR:CRET1, TISSTR, STR:CRET1
-  |  b ->fff_restv
-  |
-  |//-- Base library: iterators -------------------------------------------
-  |
-  |.ffunc next
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG2, 0(BASE)
-  |  blt ->fff_fallback
-  |   evstddx TISNIL, BASE, NARGS8:RC	// Set missing 2nd arg to nil.
-  |  checktab TAB:CARG2
-  |   lwz PC, FRAME_PC(BASE)
-  |  checkfail ->fff_fallback
-  |   stw BASE, L->base			// Add frame since C call can throw.
-  |  mr CARG1, L
-  |   stw BASE, L->top			// Dummy frame length is ok.
-  |  la CARG3, 8(BASE)
-  |   stw PC, SAVE_PC
-  |  bl extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
-  |  // Returns 0 at end of traversal.
-  |  cmplwi CRET1, 0
-  |   evmr CRET1, TISNIL
-  |  beq ->fff_restv			// End of traversal: return nil.
-  |  evldd TMP0, 8(BASE)		// Copy key and value to results.
-  |   la RA, -8(BASE)
-  |  evldd TMP1, 16(BASE)
-  |  evstdd TMP0, 0(RA)
-  |   li RD, (2+1)*8
-  |  evstdd TMP1, 8(RA)
-  |  b ->fff_res
-  |
-  |.ffunc_1 pairs
-  |  checktab TAB:CARG1
-  |   lwz PC, FRAME_PC(BASE)
-  |  checkfail ->fff_fallback
-#if LJ_52
-  |   lwz TAB:TMP2, TAB:CARG1->metatable
-  |  evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
-  |   cmplwi TAB:TMP2, 0
-  |  la RA, -8(BASE)
-  |   bne ->fff_fallback
-#else
-  |  evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
-  |  la RA, -8(BASE)
-#endif
-  |   evstdd TISNIL, 8(BASE)
-  |  li RD, (3+1)*8
-  |  evstdd CFUNC:TMP0, 0(RA)
-  |  b ->fff_res
-  |
-  |.ffunc_2 ipairs_aux
-  |  checktab TAB:CARG1
-  |   lwz PC, FRAME_PC(BASE)
-  |  checkfail ->fff_fallback
-  |  checknum CARG2
-  |    lus TMP3, 0x3ff0
-  |  checkfail ->fff_fallback
-  |  efdctsi TMP2, CARG2
-  |   lwz TMP0, TAB:CARG1->asize
-  |    evmergelo TMP3, TMP3, ZERO
-  |   lwz TMP1, TAB:CARG1->array
-  |  efdadd CARG2, CARG2, TMP3
-  |  addi TMP2, TMP2, 1
-  |   la RA, -8(BASE)
-  |  cmplw TMP0, TMP2
-  |   slwi TMP3, TMP2, 3
-  |  evstdd CARG2, 0(RA)
-  |  ble >2				// Not in array part?
-  |  evlddx TMP1, TMP1, TMP3
-  |1:
-  |  checknil TMP1
-  |   li RD, (0+1)*8
-  |  checkok ->fff_res			// End of iteration, return 0 results.
-  |   li RD, (2+1)*8
-  |  evstdd TMP1, 8(RA)
-  |  b ->fff_res
-  |2:  // Check for empty hash part first. Otherwise call C function.
-  |  lwz TMP0, TAB:CARG1->hmask
-  |  cmplwi TMP0, 0
-  |   li RD, (0+1)*8
-  |  beq ->fff_res
-  |   mr CARG2, TMP2
-  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
-  |  // Returns cTValue * or NULL.
-  |  cmplwi CRET1, 0
-  |   li RD, (0+1)*8
-  |  beq ->fff_res
-  |  evldd TMP1, 0(CRET1)
-  |  b <1
-  |
-  |.ffunc_1 ipairs
-  |  checktab TAB:CARG1
-  |   lwz PC, FRAME_PC(BASE)
-  |  checkfail ->fff_fallback
-#if LJ_52
-  |   lwz TAB:TMP2, TAB:CARG1->metatable
-  |  evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
-  |   cmplwi TAB:TMP2, 0
-  |  la RA, -8(BASE)
-  |   bne ->fff_fallback
-#else
-  |  evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
-  |  la RA, -8(BASE)
-#endif
-  |    evsplati TMP1, 0
-  |  li RD, (3+1)*8
-  |    evstdd TMP1, 8(BASE)
-  |  evstdd CFUNC:TMP0, 0(RA)
-  |  b ->fff_res
-  |
-  |//-- Base library: catch errors ----------------------------------------
-  |
-  |.ffunc pcall
-  |  cmplwi NARGS8:RC, 8
-  |   lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
-  |  blt ->fff_fallback
-  |   mr TMP2, BASE
-  |   la BASE, 8(BASE)
-  |  // Remember active hook before pcall.
-  |  rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
-  |   subi NARGS8:RC, NARGS8:RC, 8
-  |  addi PC, TMP3, 8+FRAME_PCALL
-  |  b ->vm_call_dispatch
-  |
-  |.ffunc_2 xpcall
-  |  lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
-  |   mr TMP2, BASE
-  |  checkfunc CARG2			// Traceback must be a function.
-  |  checkfail ->fff_fallback
-  |   la BASE, 16(BASE)
-  |  // Remember active hook before pcall.
-  |  rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
-  |   evstdd CARG2, 0(TMP2)		// Swap function and traceback.
-  |  subi NARGS8:RC, NARGS8:RC, 16
-  |   evstdd CARG1, 8(TMP2)
-  |  addi PC, TMP3, 16+FRAME_PCALL
-  |  b ->vm_call_dispatch
-  |
-  |//-- Coroutine library --------------------------------------------------
-  |
-  |.macro coroutine_resume_wrap, resume
-  |.if resume
-  |.ffunc_1 coroutine_resume
-  |  evmergehi TMP0, L:CARG1, L:CARG1
-  |.else
-  |.ffunc coroutine_wrap_aux
-  |  lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
-  |.endif
-  |.if resume
-  |  cmpwi TMP0, LJ_TTHREAD
-  |  bne ->fff_fallback
-  |.endif
-  |  lbz TMP0, L:CARG1->status
-  |   lwz TMP1, L:CARG1->cframe
-  |    lwz CARG2, L:CARG1->top
-  |  cmplwi cr0, TMP0, LUA_YIELD
-  |    lwz TMP2, L:CARG1->base
-  |   cmplwi cr1, TMP1, 0
-  |   lwz TMP0, L:CARG1->maxstack
-  |    cmplw cr7, CARG2, TMP2
-  |   lwz PC, FRAME_PC(BASE)
-  |  crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq		// st>LUA_YIELD || cframe!=0
-  |   add TMP2, CARG2, NARGS8:RC
-  |  crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq	// base==top && st!=LUA_YIELD
-  |   cmplw cr1, TMP2, TMP0
-  |  cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt
-  |   stw PC, SAVE_PC
-  |  cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt		// cond1 || cond2 || stackov
-  |   stw BASE, L->base
-  |  blt cr6, ->fff_fallback
-  |1:
-  |.if resume
-  |  addi BASE, BASE, 8			// Keep resumed thread in stack for GC.
-  |  subi NARGS8:RC, NARGS8:RC, 8
-  |  subi TMP2, TMP2, 8
-  |.endif
-  |  stw TMP2, L:CARG1->top
-  |  li TMP1, 0
-  |  stw BASE, L->top
-  |2:  // Move args to coroutine.
-  |  cmpw TMP1, NARGS8:RC
-  |   evlddx TMP0, BASE, TMP1
-  |  beq >3
-  |   evstddx TMP0, CARG2, TMP1
-  |  addi TMP1, TMP1, 8
-  |  b <2
-  |3:
-  |  li CARG3, 0
-  |   mr L:SAVE0, L:CARG1
-  |  li CARG4, 0
-  |  bl ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
-  |  // Returns thread status.
-  |4:
-  |  lwz TMP2, L:SAVE0->base
-  |   cmplwi CRET1, LUA_YIELD
-  |  lwz TMP3, L:SAVE0->top
-  |    li_vmstate INTERP
-  |  lwz BASE, L->base
-  |    st_vmstate
-  |   bgt >8
-  |  sub RD, TMP3, TMP2
-  |   lwz TMP0, L->maxstack
-  |  cmplwi RD, 0
-  |   add TMP1, BASE, RD
-  |  beq >6				// No results?
-  |  cmplw TMP1, TMP0
-  |   li TMP1, 0
-  |  bgt >9				// Need to grow stack?
-  |
-  |  subi TMP3, RD, 8
-  |   stw TMP2, L:SAVE0->top		// Clear coroutine stack.
-  |5:  // Move results from coroutine.
-  |  cmplw TMP1, TMP3
-  |   evlddx TMP0, TMP2, TMP1
-  |   evstddx TMP0, BASE, TMP1
-  |    addi TMP1, TMP1, 8
-  |  bne <5
-  |6:
-  |  andi. TMP0, PC, FRAME_TYPE
-  |.if resume
-  |  li TMP1, LJ_TTRUE
-  |   la RA, -8(BASE)
-  |  stw TMP1, -8(BASE)			// Prepend true to results.
-  |  addi RD, RD, 16
-  |.else
-  |  mr RA, BASE
-  |  addi RD, RD, 8
-  |.endif
-  |7:
-  |    stw PC, SAVE_PC
-  |   mr MULTRES, RD
-  |  beq ->BC_RET_Z
-  |  b ->vm_return
-  |
-  |8:  // Coroutine returned with error (at co->top-1).
-  |.if resume
-  |  andi. TMP0, PC, FRAME_TYPE
-  |  la TMP3, -8(TMP3)
-  |   li TMP1, LJ_TFALSE
-  |  evldd TMP0, 0(TMP3)
-  |   stw TMP3, L:SAVE0->top		// Remove error from coroutine stack.
-  |    li RD, (2+1)*8
-  |   stw TMP1, -8(BASE)		// Prepend false to results.
-  |    la RA, -8(BASE)
-  |  evstdd TMP0, 0(BASE)		// Copy error message.
-  |  b <7
-  |.else
-  |  mr CARG1, L
-  |  mr CARG2, L:SAVE0
-  |  bl extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
-  |.endif
-  |
-  |9:  // Handle stack expansion on return from yield.
-  |  mr CARG1, L
-  |  srwi CARG2, RD, 3
-  |  bl extern lj_state_growstack	// (lua_State *L, int n)
-  |  li CRET1, 0
-  |  b <4
-  |.endmacro
-  |
-  |  coroutine_resume_wrap 1		// coroutine.resume
-  |  coroutine_resume_wrap 0		// coroutine.wrap
-  |
-  |.ffunc coroutine_yield
-  |  lwz TMP0, L->cframe
-  |   add TMP1, BASE, NARGS8:RC
-  |   stw BASE, L->base
-  |  andi. TMP0, TMP0, CFRAME_RESUME
-  |   stw TMP1, L->top
-  |    li CRET1, LUA_YIELD
-  |  beq ->fff_fallback
-  |   stw ZERO, L->cframe
-  |    stb CRET1, L->status
-  |  b ->vm_leave_unw
-  |
-  |//-- Math library -------------------------------------------------------
-  |
-  |.ffunc_n math_abs
-  |  efdabs CRET1, CARG1
-  |  // Fallthrough.
-  |
-  |->fff_restv:
-  |  // CRET1 = TValue result.
-  |  lwz PC, FRAME_PC(BASE)
-  |   la RA, -8(BASE)
-  |  evstdd CRET1, 0(RA)
-  |->fff_res1:
-  |  // RA = results, PC = return.
-  |  li RD, (1+1)*8
-  |->fff_res:
-  |  // RA = results, RD = (nresults+1)*8, PC = return.
-  |  andi. TMP0, PC, FRAME_TYPE
-  |   mr MULTRES, RD
-  |  bne ->vm_return
-  |  lwz INS, -4(PC)
-  |  decode_RB8 RB, INS
-  |5:
-  |  cmplw RB, RD			// More results expected?
-  |   decode_RA8 TMP0, INS
-  |  bgt >6
-  |  ins_next1
-  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
-  |   sub BASE, RA, TMP0
-  |  ins_next2
-  |
-  |6:  // Fill up results with nil.
-  |  subi TMP1, RD, 8
-  |   addi RD, RD, 8
-  |  evstddx TISNIL, RA, TMP1
-  |  b <5
-  |
-  |.macro math_extern, func
-  |  .ffunc math_ .. func
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG2, 0(BASE)
-  |  blt ->fff_fallback
-  |  checknum CARG2
-  |   evmergehi CARG1, CARG2, CARG2
-  |  checkfail ->fff_fallback
-  |  bl extern func@plt
-  |  evmergelo CRET1, CRET1, CRET2
-  |  b ->fff_restv
-  |.endmacro
-  |
-  |.macro math_extern2, func
-  |  .ffunc math_ .. func
-  |  cmplwi NARGS8:RC, 16
-  |   evldd CARG2, 0(BASE)
-  |   evldd CARG4, 8(BASE)
-  |  blt ->fff_fallback
-  |  evmergehi CARG1, CARG4, CARG2
-  |  checknum CARG1
-  |   evmergehi CARG3, CARG4, CARG4
-  |  checkanyfail ->fff_fallback
-  |  bl extern func@plt
-  |  evmergelo CRET1, CRET1, CRET2
-  |  b ->fff_restv
-  |.endmacro
-  |
-  |.macro math_round, func
-  |  .ffunc math_ .. func
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG2, 0(BASE)
-  |  blt ->fff_fallback
-  |  checknum CARG2
-  |   evmergehi CARG1, CARG2, CARG2
-  |  checkfail ->fff_fallback
-  |   lwz PC, FRAME_PC(BASE)
-  |  bl ->vm_..func.._hilo;
-  |  la RA, -8(BASE)
-  |  evstdd CRET2, 0(RA)
-  |  b ->fff_res1
-  |.endmacro
-  |
-  |  math_round floor
-  |  math_round ceil
-  |
-  |  math_extern sqrt
-  |
-  |.ffunc math_log
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG2, 0(BASE)
-  |  bne ->fff_fallback                 // Need exactly 1 argument.
-  |  checknum CARG2
-  |   evmergehi CARG1, CARG2, CARG2
-  |  checkfail ->fff_fallback
-  |  bl extern log@plt
-  |  evmergelo CRET1, CRET1, CRET2
-  |  b ->fff_restv
-  |
-  |  math_extern log10
-  |  math_extern exp
-  |  math_extern sin
-  |  math_extern cos
-  |  math_extern tan
-  |  math_extern asin
-  |  math_extern acos
-  |  math_extern atan
-  |  math_extern sinh
-  |  math_extern cosh
-  |  math_extern tanh
-  |  math_extern2 pow
-  |  math_extern2 atan2
-  |  math_extern2 fmod
-  |
-  |->ff_math_deg:
-  |.ffunc_n math_rad
-  |  evldd CARG2, CFUNC:RB->upvalue[0]
-  |  efdmul CRET1, CARG1, CARG2
-  |  b ->fff_restv
-  |
-  |.ffunc math_ldexp
-  |  cmplwi NARGS8:RC, 16
-  |   evldd CARG2, 0(BASE)
-  |   evldd CARG4, 8(BASE)
-  |  blt ->fff_fallback
-  |  evmergehi CARG1, CARG4, CARG2
-  |  checknum CARG1
-  |  checkanyfail ->fff_fallback
-  |  efdctsi CARG3, CARG4
-  |  bl extern ldexp@plt
-  |  evmergelo CRET1, CRET1, CRET2
-  |  b ->fff_restv
-  |
-  |.ffunc math_frexp
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG2, 0(BASE)
-  |  blt ->fff_fallback
-  |  checknum CARG2
-  |   evmergehi CARG1, CARG2, CARG2
-  |  checkfail ->fff_fallback
-  |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
-  |   lwz PC, FRAME_PC(BASE)
-  |  bl extern frexp@plt
-  |   lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
-  |  evmergelo CRET1, CRET1, CRET2
-  |   efdcfsi CRET2, TMP1
-  |   la RA, -8(BASE)
-  |  evstdd CRET1, 0(RA)
-  |  li RD, (2+1)*8
-  |   evstdd CRET2, 8(RA)
-  |  b ->fff_res
-  |
-  |.ffunc math_modf
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG2, 0(BASE)
-  |  blt ->fff_fallback
-  |  checknum CARG2
-  |   evmergehi CARG1, CARG2, CARG2
-  |  checkfail ->fff_fallback
-  |  la CARG3, -8(BASE)
-  |   lwz PC, FRAME_PC(BASE)
-  |  bl extern modf@plt
-  |  evmergelo CRET1, CRET1, CRET2
-  |   la RA, -8(BASE)
-  |  evstdd CRET1, 0(BASE)
-  |  li RD, (2+1)*8
-  |  b ->fff_res
-  |
-  |.macro math_minmax, name, cmpop
-  |  .ffunc_1 name
-  |  checknum CARG1
-  |   li TMP1, 8
-  |  checkfail ->fff_fallback
-  |1:
-  |  evlddx CARG2, BASE, TMP1
-  |  cmplw cr1, TMP1, NARGS8:RC
-  |   checknum CARG2
-  |  bge cr1, ->fff_restv		// Ok, since CRET1 = CARG1.
-  |   checkfail ->fff_fallback
-  |  cmpop CARG2, CARG1
-  |   addi TMP1, TMP1, 8
-  |  crmove 4*cr0+lt, 4*cr0+gt
-  |  evsel CARG1, CARG2, CARG1
-  |  b <1
-  |.endmacro
-  |
-  |  math_minmax math_min, efdtstlt
-  |  math_minmax math_max, efdtstgt
-  |
-  |//-- String library -----------------------------------------------------
-  |
-  |.ffunc_1 string_len
-  |  checkstr STR:CARG1
-  |  checkfail ->fff_fallback
-  |  lwz TMP0, STR:CARG1->len
-  |  efdcfsi CRET1, TMP0
-  |  b ->fff_restv
-  |
-  |.ffunc string_byte			// Only handle the 1-arg case here.
-  |  cmplwi NARGS8:RC, 8
-  |   evldd STR:CARG1, 0(BASE)
-  |  bne ->fff_fallback			// Need exactly 1 argument.
-  |  checkstr STR:CARG1
-  |   la RA, -8(BASE)
-  |  checkfail ->fff_fallback
-  |  lwz TMP0, STR:CARG1->len
-  |   li RD, (0+1)*8
-  |    lbz TMP1, STR:CARG1[1]		// Access is always ok (NUL at end).
-  |   li TMP2, (1+1)*8
-  |  cmplwi TMP0, 0
-  |   lwz PC, FRAME_PC(BASE)
-  |    efdcfsi CRET1, TMP1
-  |  iseleq RD, RD, TMP2
-  |    evstdd CRET1, 0(RA)
-  |  b ->fff_res
-  |
-  |.ffunc string_char			// Only handle the 1-arg case here.
-  |  ffgccheck
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG1, 0(BASE)
-  |  bne ->fff_fallback			// Exactly 1 argument.
-  |  checknum CARG1
-  |   la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
-  |  checkfail ->fff_fallback
-  |  efdctsiz TMP0, CARG1
-  |   li CARG3, 1
-  |  cmplwi TMP0, 255
-  |   stb TMP0, 0(CARG2)
-  |  bgt ->fff_fallback
-  |->fff_newstr:
-  |  mr CARG1, L
-  |  stw BASE, L->base
-  |  stw PC, SAVE_PC
-  |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
-  |  // Returns GCstr *.
-  |  lwz BASE, L->base
-  |   evmergelo STR:CRET1, TISSTR, STR:CRET1
-  |  b ->fff_restv
-  |
-  |.ffunc string_sub
-  |  ffgccheck
-  |  cmplwi NARGS8:RC, 16
-  |   evldd CARG3, 16(BASE)
-  |   evldd STR:CARG1, 0(BASE)
-  |  blt ->fff_fallback
-  |   evldd CARG2, 8(BASE)
-  |   li TMP2, -1
-  |  beq >1
-  |  checknum CARG3
-  |  checkfail ->fff_fallback
-  |  efdctsiz TMP2, CARG3
-  |1:
-  |  checknum CARG2
-  |  checkfail ->fff_fallback
-  |  checkstr STR:CARG1
-  |   efdctsiz TMP1, CARG2
-  |  checkfail ->fff_fallback
-  |   lwz TMP0, STR:CARG1->len
-  |  cmplw TMP0, TMP2			// len < end? (unsigned compare)
-  |   add TMP3, TMP2, TMP0
-  |  blt >5
-  |2:
-  |  cmpwi TMP1, 0			// start <= 0?
-  |   add TMP3, TMP1, TMP0
-  |  ble >7
-  |3:
-  |  sub. CARG3, TMP2, TMP1
-  |    addi CARG2, STR:CARG1, #STR-1
-  |   addi CARG3, CARG3, 1
-  |    add CARG2, CARG2, TMP1
-  |  isellt CARG3, r0, CARG3
-  |  b ->fff_newstr
-  |
-  |5:  // Negative end or overflow.
-  |  cmpw TMP0, TMP2
-  |   addi TMP3, TMP3, 1
-  |  iselgt TMP2, TMP3, TMP0		// end = end > len ? len : end+len+1
-  |  b <2
-  |
-  |7:  // Negative start or underflow.
-  |   cmpwi cr1, TMP3, 0
-  |  iseleq TMP1, r0, TMP3
-  |   isel TMP1, r0, TMP1, 4*cr1+lt
-  |  addi TMP1, TMP1, 1			// start = 1 + (start ? start+len : 0)
-  |  b <3
-  |
-  |.ffunc string_rep			// Only handle the 1-char case inline.
-  |  ffgccheck
-  |  cmplwi NARGS8:RC, 16
-  |   evldd CARG1, 0(BASE)
-  |   evldd CARG2, 8(BASE)
-  |  bne ->fff_fallback			// Exactly 2 arguments.
-  |  checknum CARG2
-  |  checkfail ->fff_fallback
-  |  checkstr STR:CARG1
-  |   efdctsiz CARG3, CARG2
-  |  checkfail ->fff_fallback
-  |   lwz TMP0, STR:CARG1->len
-  |  cmpwi CARG3, 0
-  |   lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  ble >2				// Count <= 0? (or non-int)
-  |   cmplwi TMP0, 1
-  |  subi TMP2, CARG3, 1
-  |   blt >2				// Zero length string?
-  |  cmplw cr1, TMP1, CARG3
-  |   bne ->fff_fallback		// Fallback for > 1-char strings.
-  |   lbz TMP0, STR:CARG1[1]
-  |   lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |  blt cr1, ->fff_fallback
-  |1:  // Fill buffer with char. Yes, this is suboptimal code (do you care?).
-  |  cmplwi TMP2, 0
-  |   stbx TMP0, CARG2, TMP2
-  |   subi TMP2, TMP2, 1
-  |  bne <1
-  |  b ->fff_newstr
-  |2:  // Return empty string.
-  |  la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH)
-  |  evmergelo CRET1, TISSTR, STR:CRET1
-  |  b ->fff_restv
-  |
-  |.ffunc string_reverse
-  |  ffgccheck
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG1, 0(BASE)
-  |  blt ->fff_fallback
-  |  checkstr STR:CARG1
-  |   lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  checkfail ->fff_fallback
-  |  lwz CARG3, STR:CARG1->len
-  |   la CARG1, #STR(STR:CARG1)
-  |   lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |   li TMP2, 0
-  |  cmplw TMP1, CARG3
-  |   subi TMP3, CARG3, 1
-  |  blt ->fff_fallback
-  |1:  // Reverse string copy.
-  |  cmpwi TMP3, 0
-  |   lbzx TMP1, CARG1, TMP2
-  |  blt ->fff_newstr
-  |   stbx TMP1, CARG2, TMP3
-  |  subi TMP3, TMP3, 1
-  |  addi TMP2, TMP2, 1
-  |  b <1
-  |
-  |.macro ffstring_case, name, lo
-  |  .ffunc name
-  |  ffgccheck
-  |  cmplwi NARGS8:RC, 8
-  |   evldd CARG1, 0(BASE)
-  |  blt ->fff_fallback
-  |  checkstr STR:CARG1
-  |   lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  checkfail ->fff_fallback
-  |  lwz CARG3, STR:CARG1->len
-  |   la CARG1, #STR(STR:CARG1)
-  |   lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |  cmplw TMP1, CARG3
-  |   li TMP2, 0
-  |  blt ->fff_fallback
-  |1:  // ASCII case conversion.
-  |  cmplw TMP2, CARG3
-  |   lbzx TMP1, CARG1, TMP2
-  |  bge ->fff_newstr
-  |   subi TMP0, TMP1, lo
-  |    xori TMP3, TMP1, 0x20
-  |   cmplwi TMP0, 26
-  |   isellt TMP1, TMP3, TMP1
-  |   stbx TMP1, CARG2, TMP2
-  |  addi TMP2, TMP2, 1
-  |  b <1
-  |.endmacro
-  |
-  |ffstring_case string_lower, 65
-  |ffstring_case string_upper, 97
-  |
-  |//-- Table library ------------------------------------------------------
-  |
-  |.ffunc_1 table_getn
-  |  checktab CARG1
-  |  checkfail ->fff_fallback
-  |  bl extern lj_tab_len		// (GCtab *t)
-  |  // Returns uint32_t (but less than 2^31).
-  |  efdcfsi CRET1, CRET1
-  |  b ->fff_restv
-  |
-  |//-- Bit library --------------------------------------------------------
-  |
-  |.macro .ffunc_bit, name
-  |  .ffunc_n bit_..name
-  |  efdadd CARG1, CARG1, TOBIT
-  |.endmacro
-  |
-  |.ffunc_bit tobit
-  |->fff_resbit:
-  |  efdcfsi CRET1, CARG1
-  |  b ->fff_restv
-  |
-  |.macro .ffunc_bit_op, name, ins
-  |  .ffunc_bit name
-  |   li TMP1, 8
-  |1:
-  |  evlddx CARG2, BASE, TMP1
-  |  cmplw cr1, TMP1, NARGS8:RC
-  |   checknum CARG2
-  |  bge cr1, ->fff_resbit
-  |   checkfail ->fff_fallback
-  |  efdadd CARG2, CARG2, TOBIT
-  |  ins CARG1, CARG1, CARG2
-  |   addi TMP1, TMP1, 8
-  |  b <1
-  |.endmacro
-  |
-  |.ffunc_bit_op band, and
-  |.ffunc_bit_op bor, or
-  |.ffunc_bit_op bxor, xor
-  |
-  |.ffunc_bit bswap
-  |  rotlwi TMP0, CARG1, 8
-  |  rlwimi TMP0, CARG1, 24, 0, 7
-  |  rlwimi TMP0, CARG1, 24, 16, 23
-  |  efdcfsi CRET1, TMP0
-  |  b ->fff_restv
-  |
-  |.ffunc_bit bnot
-  |  not TMP0, CARG1
-  |  efdcfsi CRET1, TMP0
-  |  b ->fff_restv
-  |
-  |.macro .ffunc_bit_sh, name, ins, shmod
-  |  .ffunc_nn bit_..name
-  |  efdadd CARG2, CARG2, TOBIT
-  |   efdadd CARG1, CARG1, TOBIT
-  |.if shmod == 1
-  |  rlwinm CARG2, CARG2, 0, 27, 31
-  |.elif shmod == 2
-  |  neg CARG2, CARG2
-  |.endif
-  |  ins TMP0, CARG1, CARG2
-  |  efdcfsi CRET1, TMP0
-  |  b ->fff_restv
-  |.endmacro
-  |
-  |.ffunc_bit_sh lshift, slw, 1
-  |.ffunc_bit_sh rshift, srw, 1
-  |.ffunc_bit_sh arshift, sraw, 1
-  |.ffunc_bit_sh rol, rotlw, 0
-  |.ffunc_bit_sh ror, rotlw, 2
-  |
-  |//-----------------------------------------------------------------------
-  |
-  |->fff_fallback:			// Call fast function fallback handler.
-  |  // BASE = new base, RB = CFUNC, RC = nargs*8
-  |  lwz TMP3, CFUNC:RB->f
-  |    add TMP1, BASE, NARGS8:RC
-  |   lwz PC, FRAME_PC(BASE)		// Fallback may overwrite PC.
-  |    addi TMP0, TMP1, 8*LUA_MINSTACK
-  |     lwz TMP2, L->maxstack
-  |   stw PC, SAVE_PC			// Redundant (but a defined value).
-  |  cmplw TMP0, TMP2
-  |     stw BASE, L->base
-  |    stw TMP1, L->top
-  |   mr CARG1, L
-  |  bgt >5				// Need to grow stack.
-  |  mtctr TMP3
-  |  bctrl				// (lua_State *L)
-  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
-  |  lwz BASE, L->base
-  |  cmpwi CRET1, 0
-  |   slwi RD, CRET1, 3
-  |   la RA, -8(BASE)
-  |  bgt ->fff_res			// Returned nresults+1?
-  |1:  // Returned 0 or -1: retry fast path.
-  |  lwz TMP0, L->top
-  |   lwz LFUNC:RB, FRAME_FUNC(BASE)
-  |  sub NARGS8:RC, TMP0, BASE
-  |  bne ->vm_call_tail			// Returned -1?
-  |  ins_callt				// Returned 0: retry fast path.
-  |
-  |// Reconstruct previous base for vmeta_call during tailcall.
-  |->vm_call_tail:
-  |  andi. TMP0, PC, FRAME_TYPE
-  |   rlwinm TMP1, PC, 0, 0, 28
-  |  bne >3
-  |  lwz INS, -4(PC)
-  |  decode_RA8 TMP1, INS
-  |  addi TMP1, TMP1, 8
-  |3:
-  |  sub TMP2, BASE, TMP1
-  |  b ->vm_call_dispatch		// Resolve again for tailcall.
-  |
-  |5:  // Grow stack for fallback handler.
-  |  li CARG2, LUA_MINSTACK
-  |  bl extern lj_state_growstack	// (lua_State *L, int n)
-  |  lwz BASE, L->base
-  |  cmpw TMP0, TMP0			// Set 4*cr0+eq to force retry.
-  |  b <1
-  |
-  |->fff_gcstep:			// Call GC step function.
-  |  // BASE = new base, RC = nargs*8
-  |  mflr SAVE0
-  |   stw BASE, L->base
-  |  add TMP0, BASE, NARGS8:RC
-  |   stw PC, SAVE_PC			// Redundant (but a defined value).
-  |  stw TMP0, L->top
-  |  mr CARG1, L
-  |  bl extern lj_gc_step		// (lua_State *L)
-  |   lwz BASE, L->base
-  |  mtlr SAVE0
-  |    lwz TMP0, L->top
-  |   sub NARGS8:RC, TMP0, BASE
-  |   lwz CFUNC:RB, FRAME_FUNC(BASE)
-  |  blr
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Special dispatch targets -------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |->vm_record:				// Dispatch target for recording phase.
-  |.if JIT
-  |  NYI
-  |.endif
-  |
-  |->vm_rethook:			// Dispatch target for return hooks.
-  |  lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
-  |  andi. TMP0, TMP3, HOOK_ACTIVE	// Hook already active?
-  |  beq >1
-  |5:  // Re-dispatch to static ins.
-  |  addi TMP1, TMP1, GG_DISP2STATIC	// Assumes decode_OP4 TMP1, INS.
-  |  lwzx TMP0, DISPATCH, TMP1
-  |  mtctr TMP0
-  |  bctr
-  |
-  |->vm_inshook:			// Dispatch target for instr/line hooks.
-  |  lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
-  |  lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
-  |  andi. TMP0, TMP3, HOOK_ACTIVE	// Hook already active?
-  |   rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0
-  |  bne <5
-  |
-  |   cmpwi cr1, TMP0, 0
-  |  addic. TMP2, TMP2, -1
-  |   beq cr1, <5
-  |  stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
-  |  beq >1
-  |   bge cr1, <5
-  |1:
-  |  mr CARG1, L
-  |   stw MULTRES, SAVE_MULTRES
-  |  mr CARG2, PC
-  |   stw BASE, L->base
-  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
-  |  bl extern lj_dispatch_ins		// (lua_State *L, const BCIns *pc)
-  |3:
-  |  lwz BASE, L->base
-  |4:  // Re-dispatch to static ins.
-  |  lwz INS, -4(PC)
-  |  decode_OP4 TMP1, INS
-  |   decode_RB8 RB, INS
-  |  addi TMP1, TMP1, GG_DISP2STATIC
-  |   decode_RD8 RD, INS
-  |  lwzx TMP0, DISPATCH, TMP1
-  |   decode_RA8 RA, INS
-  |   decode_RC8 RC, INS
-  |  mtctr TMP0
-  |  bctr
-  |
-  |->cont_hook:				// Continue from hook yield.
-  |  addi PC, PC, 4
-  |  lwz MULTRES, -20(RB)		// Restore MULTRES for *M ins.
-  |  b <4
-  |
-  |->vm_hotloop:			// Hot loop counter underflow.
-  |.if JIT
-  |  NYI
-  |.endif
-  |
-  |->vm_callhook:			// Dispatch target for call hooks.
-  |  mr CARG2, PC
-  |.if JIT
-  |  b >1
-  |.endif
-  |
-  |->vm_hotcall:			// Hot call counter underflow.
-  |.if JIT
-  |  ori CARG2, PC, 1
-  |1:
-  |.endif
-  |  add TMP0, BASE, RC
-  |   stw PC, SAVE_PC
-  |  mr CARG1, L
-  |   stw BASE, L->base
-  |  sub RA, RA, BASE
-  |   stw TMP0, L->top
-  |  bl extern lj_dispatch_call		// (lua_State *L, const BCIns *pc)
-  |  // Returns ASMFunction.
-  |  lwz BASE, L->base
-  |   lwz TMP0, L->top
-  |   stw ZERO, SAVE_PC			// Invalidate for subsequent line hook.
-  |  sub NARGS8:RC, TMP0, BASE
-  |  add RA, BASE, RA
-  |  lwz LFUNC:RB, FRAME_FUNC(BASE)
-  |  mtctr CRET1
-  |  bctr
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Trace exit handler -------------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |->vm_exit_handler:
-  |.if JIT
-  |  NYI
-  |.endif
-  |->vm_exit_interp:
-  |.if JIT
-  |  NYI
-  |.endif
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Math helper functions ----------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |// FP value rounding. Called by math.floor/math.ceil fast functions
-  |// and from JIT code.
-  |//
-  |// This can be inlined if the CPU has the frin/friz/frip/frim instructions.
-  |// The alternative hard-float approaches have a deep dependency chain.
-  |// The resulting latency is at least 3x-7x the double-precision FP latency
-  |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles.
-  |//
-  |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy).
-  |// However it relies on a fast way to transfer the FP value to GPRs
-  |// (e500v2: 0cy for lo-word, 1cy for hi-word).
-  |//
-  |.macro vm_round, name, mode
-  |  // Used temporaries: TMP0, TMP1, TMP2, TMP3.
-  |->name.._efd:			// Input: CARG2, output: CRET2
-  |  evmergehi CARG1, CARG2, CARG2
-  |->name.._hilo:
-  |  // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2
-  |  rlwinm TMP2, CARG1, 12, 21, 31
-  |  addic. TMP2, TMP2, -1023		// exp = exponent(x) - 1023
-  |   li TMP1, -1
-  |  cmplwi cr1, TMP2, 51		// 0 <= exp <= 51?
-  |   subfic TMP0, TMP2, 52
-  |  bgt cr1, >1
-  |   lus TMP3, 0xfff0
-  |  slw TMP0, TMP1, TMP0		// lomask = -1 << (52-exp)
-  |   sraw TMP1, TMP3, TMP2		// himask = (int32_t)0xfff00000 >> exp
-  |.if mode == 2		// trunc(x):
-  |  evmergelo TMP0, TMP1, TMP0
-  |  evand CRET2, CARG2, TMP0		// hi &= himask, lo &= lomask
-  |.else
-  |  andc TMP2, CARG2, TMP0
-  |   andc TMP3, CARG1, TMP1
-  |  or TMP2, TMP2, TMP3		// ztest = (hi&~himask) | (lo&~lomask)
-  |   srawi TMP3, CARG1, 31		// signmask = (int32_t)hi >> 31
-  |.if mode == 0		// floor(x):
-  |  and. TMP2, TMP2, TMP3		// iszero = ((ztest & signmask) == 0)
-  |.else			// ceil(x):
-  |  andc. TMP2, TMP2, TMP3		// iszero = ((ztest & ~signmask) == 0)
-  |.endif
-  |  and CARG2, CARG2, TMP0		// lo &= lomask
-  |  and CARG1, CARG1, TMP1		// hi &= himask
-  |   subc TMP0, CARG2, TMP0
-  |  iseleq TMP0, CARG2, TMP0		// lo = iszero ? lo : lo-lomask
-  |   sube TMP1, CARG1, TMP1
-  |  iseleq TMP1, CARG1, TMP1		// hi = iszero ? hi : hi-himask+carry
-  |  evmergelo CRET2, TMP1, TMP0
-  |.endif
-  |  blr
-  |1:
-  |  bgtlr				// Already done if >=2^52, +-inf or nan.
-  |.if mode == 2		// trunc(x):
-  |  rlwinm TMP1, CARG1, 0, 0, 0	// hi = sign(x)
-  |  li TMP0, 0
-  |  evmergelo CRET2, TMP1, TMP0
-  |.else
-  |  rlwinm TMP2, CARG1, 0, 1, 31
-  |  srawi TMP0, CARG1, 31		// signmask = (int32_t)hi >> 31
-  |  or TMP2, TMP2, CARG2		// ztest = abs(hi) | lo
-  |   lus TMP1, 0x3ff0
-  |.if mode == 0		// floor(x):
-  |  and. TMP2, TMP2, TMP0		// iszero = ((ztest & signmask) == 0)
-  |.else			// ceil(x):
-  |  andc. TMP2, TMP2, TMP0		// iszero = ((ztest & ~signmask) == 0)
-  |.endif
-  |   li TMP0, 0
-  |  iseleq TMP1, r0, TMP1
-  |  rlwimi CARG1, TMP1, 0, 1, 31	// hi = sign(x) | (iszero ? 0.0 : 1.0)
-  |  evmergelo CRET2, CARG1, TMP0
-  |.endif
-  |  blr
-  |.endmacro
-  |
-  |->vm_floor:
-  |  mflr CARG3
-  |  evmergelo CARG2, CARG1, CARG2
-  |  bl ->vm_floor_hilo
-  |  mtlr CARG3
-  |  evmergehi CRET1, CRET2, CRET2
-  |  blr
-  |
-  |  vm_round vm_floor, 0
-  |  vm_round vm_ceil,  1
-  |.if JIT
-  |  vm_round vm_trunc, 2
-  |.else
-  |->vm_trunc_efd:
-  |->vm_trunc_hilo:
-  |.endif
-  |
-  |//-----------------------------------------------------------------------
-  |//-- Miscellaneous functions --------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |//-----------------------------------------------------------------------
-  |//-- FFI helper functions -----------------------------------------------
-  |//-----------------------------------------------------------------------
-  |
-  |->vm_ffi_call:
-  |.if FFI
-  |  NYI
-  |.endif
-  |
-  |//-----------------------------------------------------------------------
-}
-
-/* Generate the code for a single instruction. */
-static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-{
-  int vk = 0;
-  |=>defop:
-
-  switch (op) {
-
-  /* -- Comparison ops ---------------------------------------------------- */
-
-  /* Remember: all ops branch for a true comparison, fall through otherwise. */
-
-  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
-    |  // RA = src1*8, RD = src2*8, JMP with RD = target
-    |  evlddx TMP0, BASE, RA
-    |   addi PC, PC, 4
-    |  evlddx TMP1, BASE, RD
-    |   addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-    |   lwz TMP2, -4(PC)
-    |  evmergehi RB, TMP0, TMP1
-    |   decode_RD4 TMP2, TMP2
-    |  checknum RB
-    |   add TMP2, TMP2, TMP3
-    |  checkanyfail ->vmeta_comp
-    |  efdcmplt TMP0, TMP1
-    if (op == BC_ISLE || op == BC_ISGT) {
-      |  efdcmpeq cr1, TMP0, TMP1
-      |  cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
-    }
-    if (op == BC_ISLT || op == BC_ISLE) {
-      |  iselgt PC, TMP2, PC
-    } else {
-      |  iselgt PC, PC, TMP2
-    }
-    |  ins_next
-    break;
-
-  case BC_ISEQV: case BC_ISNEV:
-    vk = op == BC_ISEQV;
-    |  // RA = src1*8, RD = src2*8, JMP with RD = target
-    |  evlddx CARG2, BASE, RA
-    |   addi PC, PC, 4
-    |  evlddx CARG3, BASE, RD
-    |   addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-    |   lwz TMP2, -4(PC)
-    |  evmergehi RB, CARG2, CARG3
-    |   decode_RD4 TMP2, TMP2
-    |  checknum RB
-    |   add TMP2, TMP2, TMP3
-    |  checkanyfail >5
-    |  efdcmpeq CARG2, CARG3
-    if (vk) {
-      |  iselgt PC, TMP2, PC
-    } else {
-      |  iselgt PC, PC, TMP2
-    }
-    |1:
-    |  ins_next
-    |
-    |5:  // Either or both types are not numbers.
-    |  evcmpeq CARG2, CARG3
-    |   not TMP3, RB
-    |   cmplwi cr1, TMP3, ~LJ_TISPRI		// Primitive?
-    |  crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt	// 1: Same tv or different type.
-    |   cmplwi cr6, TMP3, ~LJ_TISTABUD		// Table or userdata?
-    |  crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt	// 2: Same type and primitive.
-    |   mr SAVE0, PC
-    if (vk) {
-      |  isel PC, TMP2, PC, 4*cr7+gt
-    } else {
-      |  isel TMP2, PC, TMP2, 4*cr7+gt
-    }
-    |  cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt	// 1 or 2.
-    if (vk) {
-      |  isel PC, TMP2, PC, 4*cr0+so
-    } else {
-      |  isel PC, PC, TMP2, 4*cr0+so
-    }
-    |  blt cr7, <1			// Done if 1 or 2.
-    |  blt cr6, <1			// Done if not tab/ud.
-    |
-    |  // Different tables or userdatas. Need to check __eq metamethod.
-    |  // Field metatable must be at same offset for GCtab and GCudata!
-    |  lwz TAB:TMP2, TAB:CARG2->metatable
-    |   li CARG4, 1-vk			// ne = 0 or 1.
-    |  cmplwi TAB:TMP2, 0
-    |  beq <1				// No metatable?
-    |  lbz TMP2, TAB:TMP2->nomm
-    |  andi. TMP2, TMP2, 1<<MM_eq
-    |  bne <1				// Or 'no __eq' flag set?
-    |  mr PC, SAVE0			// Restore old PC.
-    |  b ->vmeta_equal			// Handle __eq metamethod.
-    break;
-
-  case BC_ISEQS: case BC_ISNES:
-    vk = op == BC_ISEQS;
-    |  // RA = src*8, RD = str_const*8 (~), JMP with RD = target
-    |  evlddx TMP0, BASE, RA
-    |   srwi RD, RD, 1
-    |    lwz INS, 0(PC)
-    |   subfic RD, RD, -4
-    |    addi PC, PC, 4
-    |   lwzx STR:TMP1, KBASE, RD	// KBASE-4-str_const*4
-    |    addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-    |    decode_RD4 TMP2, INS
-    |   evmergelo STR:TMP1, TISSTR, STR:TMP1
-    |    add TMP2, TMP2, TMP3
-    |  evcmpeq TMP0, STR:TMP1
-    if (vk) {
-      |  isel PC, TMP2, PC, 4*cr0+so
-    } else {
-      |  isel PC, PC, TMP2, 4*cr0+so
-    }
-    |  ins_next
-    break;
-
-  case BC_ISEQN: case BC_ISNEN:
-    vk = op == BC_ISEQN;
-    |  // RA = src*8, RD = num_const*8, JMP with RD = target
-    |  evlddx TMP0, BASE, RA
-    |   addi PC, PC, 4
-    |  evlddx TMP1, KBASE, RD
-    |   addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-    |   lwz INS, -4(PC)
-    |  checknum TMP0
-    |  checkfail >5
-    |  efdcmpeq TMP0, TMP1
-    |1:
-    |   decode_RD4 TMP2, INS
-    |   add TMP2, TMP2, TMP3
-    if (vk) {
-      |  iselgt PC, TMP2, PC
-      |5:
-    } else {
-      |  iselgt PC, PC, TMP2
-    }
-    |3:
-    |  ins_next
-    if (!vk) {
-      |5:
-      |  decode_RD4 TMP2, INS
-      |  add PC, TMP2, TMP3
-      |  b <3
-    }
-    break;
-
-  case BC_ISEQP: case BC_ISNEP:
-    vk = op == BC_ISEQP;
-    |  // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
-    |  lwzx TMP0, BASE, RA
-    |   srwi TMP1, RD, 3
-    |    lwz INS, 0(PC)
-    |    addi PC, PC, 4
-    |   not TMP1, TMP1
-    |    addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-    |  cmplw TMP0, TMP1
-    |    decode_RD4 TMP2, INS
-    |    add TMP2, TMP2, TMP3
-    if (vk) {
-      |  iseleq PC, TMP2, PC
-    } else {
-      |  iseleq PC, PC, TMP2
-    }
-    |  ins_next
-    break;
-
-  /* -- Unary test and copy ops ------------------------------------------- */
-
-  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
-    |  // RA = dst*8 or unused, RD = src*8, JMP with RD = target
-    |  evlddx TMP0, BASE, RD
-    |   evaddw TMP1, TISNIL, TISNIL	// Synthesize LJ_TFALSE.
-    |   lwz INS, 0(PC)
-    |  evcmpltu TMP0, TMP1
-    |   addi PC, PC, 4
-    if (op == BC_IST || op == BC_ISF) {
-      |  addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-      |  decode_RD4 TMP2, INS
-      |  add TMP2, TMP2, TMP3
-      if (op == BC_IST) {
-	|  isellt PC, TMP2, PC
-      } else {
-	|  isellt PC, PC, TMP2
-      }
-    } else {
-      if (op == BC_ISTC) {
-	|  checkfail >1
-      } else {
-	|  checkok >1
-      }
-      |  addis PC, PC, -(BCBIAS_J*4 >> 16)
-      |  decode_RD4 TMP2, INS
-      |   evstddx TMP0, BASE, RA
-      |  add PC, PC, TMP2
-      |1:
-    }
-    |  ins_next
-    break;
-
-  /* -- Unary ops --------------------------------------------------------- */
-
-  case BC_MOV:
-    |  // RA = dst*8, RD = src*8
-    |  ins_next1
-    |  evlddx TMP0, BASE, RD
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_NOT:
-    |  // RA = dst*8, RD = src*8
-    |  ins_next1
-    |  lwzx TMP0, BASE, RD
-    |  subfic TMP1, TMP0, LJ_TTRUE
-    |  adde TMP0, TMP0, TMP1
-    |  stwx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_UNM:
-    |  // RA = dst*8, RD = src*8
-    |  evlddx TMP0, BASE, RD
-    |  checknum TMP0
-    |  checkfail ->vmeta_unm
-    |  efdneg TMP0, TMP0
-    |  ins_next1
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_LEN:
-    |  // RA = dst*8, RD = src*8
-    |  evlddx CARG1, BASE, RD
-    |  checkstr CARG1
-    |  checkfail >2
-    |  lwz CRET1, STR:CARG1->len
-    |1:
-    |  ins_next1
-    |  efdcfsi TMP0, CRET1
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    |2:
-    |  checktab CARG1
-    |  checkfail ->vmeta_len
-#if LJ_52
-    |  lwz TAB:TMP2, TAB:CARG1->metatable
-    |  cmplwi TAB:TMP2, 0
-    |  bne >9
-    |3:
-#endif
-    |->BC_LEN_Z:
-    |  bl extern lj_tab_len		// (GCtab *t)
-    |  // Returns uint32_t (but less than 2^31).
-    |  b <1
-#if LJ_52
-    |9:
-    |  lbz TMP0, TAB:TMP2->nomm
-    |  andi. TMP0, TMP0, 1<<MM_len
-    |  bne <3				// 'no __len' flag set: done.
-    |  b ->vmeta_len
-#endif
-    break;
-
-  /* -- Binary ops -------------------------------------------------------- */
-
-    |.macro ins_arithpre, t0, t1
-    |  // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
-    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-    ||switch (vk) {
-    ||case 0:
-    |   evlddx t0, BASE, RB
-    |    checknum t0
-    |   evlddx t1, KBASE, RC
-    |    checkfail ->vmeta_arith_vn
-    ||  break;
-    ||case 1:
-    |   evlddx t1, BASE, RB
-    |    checknum t1
-    |   evlddx t0, KBASE, RC
-    |    checkfail ->vmeta_arith_nv
-    ||  break;
-    ||default:
-    |   evlddx t0, BASE, RB
-    |   evlddx t1, BASE, RC
-    |    evmergehi TMP2, t0, t1
-    |    checknum TMP2
-    |    checkanyfail ->vmeta_arith_vv
-    ||  break;
-    ||}
-    |.endmacro
-    |
-    |.macro ins_arith, ins
-    |  ins_arithpre TMP0, TMP1
-    |  ins_next1
-    |  ins TMP0, TMP0, TMP1
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    |.endmacro
-
-  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-    |  ins_arith efdadd
-    break;
-  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-    |  ins_arith efdsub
-    break;
-  case BC_MULVN: case BC_MULNV: case BC_MULVV:
-    |  ins_arith efdmul
-    break;
-  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-    |  ins_arith efddiv
-    break;
-  case BC_MODVN:
-    |  ins_arithpre RD, SAVE0
-    |->BC_MODVN_Z:
-    |  efddiv CARG2, RD, SAVE0
-    |  bl ->vm_floor_efd		// floor(b/c)
-    |  efdmul TMP0, CRET2, SAVE0
-    |  ins_next1
-    |  efdsub TMP0, RD, TMP0		// b - floor(b/c)*c
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_MODNV: case BC_MODVV:
-    |  ins_arithpre RD, SAVE0
-    |  b ->BC_MODVN_Z			// Avoid 3 copies. It's slow anyway.
-    break;
-  case BC_POW:
-    |  evlddx CARG2, BASE, RB
-    |  evlddx CARG4, BASE, RC
-    |  evmergehi CARG1, CARG4, CARG2
-    |  checknum CARG1
-    |   evmergehi CARG3, CARG4, CARG4
-    |  checkanyfail ->vmeta_arith_vv
-    |  bl extern pow@plt
-    |  evmergelo CRET2, CRET1, CRET2
-    |  evstddx CRET2, BASE, RA
-    |  ins_next
-    break;
-
-  case BC_CAT:
-    |  // RA = dst*8, RB = src_start*8, RC = src_end*8
-    |  sub CARG3, RC, RB
-    |   stw BASE, L->base
-    |  add CARG2, BASE, RC
-    |  mr SAVE0, RB
-    |->BC_CAT_Z:
-    |   stw PC, SAVE_PC
-    |  mr CARG1, L
-    |  srwi CARG3, CARG3, 3
-    |  bl extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
-    |  // Returns NULL (finished) or TValue * (metamethod).
-    |  cmplwi CRET1, 0
-    |   lwz BASE, L->base
-    |  bne ->vmeta_binop
-    |  evlddx TMP0, BASE, SAVE0		// Copy result from RB to RA.
-    |  evstddx TMP0, BASE, RA
-    |  ins_next
-    break;
-
-  /* -- Constant ops ------------------------------------------------------ */
-
-  case BC_KSTR:
-    |  // RA = dst*8, RD = str_const*8 (~)
-    |  ins_next1
-    |  srwi TMP1, RD, 1
-    |  subfic TMP1, TMP1, -4
-    |  lwzx TMP0, KBASE, TMP1		// KBASE-4-str_const*4
-    |  evmergelo TMP0, TISSTR, TMP0
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_KCDATA:
-    |.if FFI
-    |  // RA = dst*8, RD = cdata_const*8 (~)
-    |  ins_next1
-    |  srwi TMP1, RD, 1
-    |  subfic TMP1, TMP1, -4
-    |  lwzx TMP0, KBASE, TMP1		// KBASE-4-cdata_const*4
-    |  li TMP2, LJ_TCDATA
-    |  evmergelo TMP0, TMP2, TMP0
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    |.endif
-    break;
-  case BC_KSHORT:
-    |  // RA = dst*8, RD = int16_literal*8
-    |  srwi TMP1, RD, 3
-    |  extsh TMP1, TMP1
-    |  ins_next1
-    |  efdcfsi TMP0, TMP1
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_KNUM:
-    |  // RA = dst*8, RD = num_const*8
-    |  evlddx TMP0, KBASE, RD
-    |  ins_next1
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_KPRI:
-    |  // RA = dst*8, RD = primitive_type*8 (~)
-    |  srwi TMP1, RD, 3
-    |  not TMP0, TMP1
-    |  ins_next1
-    |  stwx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_KNIL:
-    |  // RA = base*8, RD = end*8
-    |  evstddx TISNIL, BASE, RA
-    |   addi RA, RA, 8
-    |1:
-    |  evstddx TISNIL, BASE, RA
-    |  cmpw RA, RD
-    |   addi RA, RA, 8
-    |  blt <1
-    |  ins_next_
-    break;
-
-  /* -- Upvalue and function ops ------------------------------------------ */
-
-  case BC_UGET:
-    |  // RA = dst*8, RD = uvnum*8
-    |  ins_next1
-    |  lwz LFUNC:RB, FRAME_FUNC(BASE)
-    |   srwi RD, RD, 1
-    |   addi RD, RD, offsetof(GCfuncL, uvptr)
-    |  lwzx UPVAL:RB, LFUNC:RB, RD
-    |  lwz TMP1, UPVAL:RB->v
-    |  evldd TMP0, 0(TMP1)
-    |  evstddx TMP0, BASE, RA
-    |  ins_next2
-    break;
-  case BC_USETV:
-    |  // RA = uvnum*8, RD = src*8
-    |  lwz LFUNC:RB, FRAME_FUNC(BASE)
-    |    srwi RA, RA, 1
-    |    addi RA, RA, offsetof(GCfuncL, uvptr)
-    |   evlddx TMP1, BASE, RD
-    |  lwzx UPVAL:RB, LFUNC:RB, RA
-    |  lbz TMP3, UPVAL:RB->marked
-    |   lwz CARG2, UPVAL:RB->v
-    |  andi. TMP3, TMP3, LJ_GC_BLACK	// isblack(uv)
-    |    lbz TMP0, UPVAL:RB->closed
-    |   evmergehi TMP2, TMP1, TMP1
-    |   evstdd TMP1, 0(CARG2)
-    |    cmplwi cr1, TMP0, 0
-    |  cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
-    |   subi TMP2, TMP2, (LJ_TISNUM+1)
-    |  bne >2				// Upvalue is closed and black?
-    |1:
-    |  ins_next
-    |
-    |2:  // Check if new value is collectable.
-    |  cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
-    |  bge <1				// tvisgcv(v)
-    |  lbz TMP3, GCOBJ:TMP1->gch.marked
-    |  andi. TMP3, TMP3, LJ_GC_WHITES	// iswhite(v)
-    |   la CARG1, GG_DISP2G(DISPATCH)
-    |  // Crossed a write barrier. Move the barrier forward.
-    |  beq <1
-    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
-    |  b <1
-    break;
-  case BC_USETS:
-    |  // RA = uvnum*8, RD = str_const*8 (~)
-    |  lwz LFUNC:RB, FRAME_FUNC(BASE)
-    |   srwi TMP1, RD, 1
-    |    srwi RA, RA, 1
-    |   subfic TMP1, TMP1, -4
-    |    addi RA, RA, offsetof(GCfuncL, uvptr)
-    |   lwzx STR:TMP1, KBASE, TMP1	// KBASE-4-str_const*4
-    |  lwzx UPVAL:RB, LFUNC:RB, RA
-    |   evmergelo STR:TMP1, TISSTR, STR:TMP1
-    |  lbz TMP3, UPVAL:RB->marked
-    |   lwz CARG2, UPVAL:RB->v
-    |  andi. TMP3, TMP3, LJ_GC_BLACK	// isblack(uv)
-    |   lbz TMP3, STR:TMP1->marked
-    |   lbz TMP2, UPVAL:RB->closed
-    |   evstdd STR:TMP1, 0(CARG2)
-    |  bne >2
-    |1:
-    |  ins_next
-    |
-    |2:  // Check if string is white and ensure upvalue is closed.
-    |  andi. TMP3, TMP3, LJ_GC_WHITES	// iswhite(str)
-    |   cmplwi cr1, TMP2, 0
-    |  cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
-    |   la CARG1, GG_DISP2G(DISPATCH)
-    |  // Crossed a write barrier. Move the barrier forward.
-    |  beq <1
-    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
-    |  b <1
-    break;
-  case BC_USETN:
-    |  // RA = uvnum*8, RD = num_const*8
-    |  ins_next1
-    |  lwz LFUNC:RB, FRAME_FUNC(BASE)
-    |   srwi RA, RA, 1
-    |   addi RA, RA, offsetof(GCfuncL, uvptr)
-    |    evlddx TMP0, KBASE, RD
-    |  lwzx UPVAL:RB, LFUNC:RB, RA
-    |  lwz TMP1, UPVAL:RB->v
-    |  evstdd TMP0, 0(TMP1)
-    |  ins_next2
-    break;
-  case BC_USETP:
-    |  // RA = uvnum*8, RD = primitive_type*8 (~)
-    |  ins_next1
-    |  lwz LFUNC:RB, FRAME_FUNC(BASE)
-    |   srwi RA, RA, 1
-    |   addi RA, RA, offsetof(GCfuncL, uvptr)
-    |    srwi TMP0, RD, 3
-    |  lwzx UPVAL:RB, LFUNC:RB, RA
-    |    not TMP0, TMP0
-    |  lwz TMP1, UPVAL:RB->v
-    |  stw TMP0, 0(TMP1)
-    |  ins_next2
-    break;
-
-  case BC_UCLO:
-    |  // RA = level*8, RD = target
-    |  lwz TMP1, L->openupval
-    |  branch_RD			// Do this first since RD is not saved.
-    |   stw BASE, L->base
-    |  cmplwi TMP1, 0
-    |   mr CARG1, L
-    |  beq >1
-    |   add CARG2, BASE, RA
-    |  bl extern lj_func_closeuv	// (lua_State *L, TValue *level)
-    |  lwz BASE, L->base
-    |1:
-    |  ins_next
-    break;
-
-  case BC_FNEW:
-    |  // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
-    |  srwi TMP1, RD, 1
-    |   stw BASE, L->base
-    |  subfic TMP1, TMP1, -4
-    |   stw PC, SAVE_PC
-    |  lwzx CARG2, KBASE, TMP1		// KBASE-4-tab_const*4
-    |   mr CARG1, L
-    |  lwz CARG3, FRAME_FUNC(BASE)
-    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
-    |  bl extern lj_func_newL_gc
-    |  // Returns GCfuncL *.
-    |  lwz BASE, L->base
-    |  evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1
-    |  evstddx LFUNC:CRET1, BASE, RA
-    |  ins_next
-    break;
-
-  /* -- Table ops --------------------------------------------------------- */
-
-  case BC_TNEW:
-  case BC_TDUP:
-    |  // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
-    |  lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
-    |   mr CARG1, L
-    |  lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
-    |   stw BASE, L->base
-    |  cmplw TMP0, TMP1
-    |   stw PC, SAVE_PC
-    |  bge >5
-    |1:
-    if (op == BC_TNEW) {
-      |  rlwinm CARG2, RD, 29, 21, 31
-      |  rlwinm CARG3, RD, 18, 27, 31
-      |  cmpwi CARG2, 0x7ff
-      |   li TMP1, 0x801
-      |  iseleq CARG2, TMP1, CARG2
-      |  bl extern lj_tab_new  // (lua_State *L, int32_t asize, uint32_t hbits)
-      |  // Returns Table *.
-    } else {
-      |  srwi TMP1, RD, 1
-      |  subfic TMP1, TMP1, -4
-      |  lwzx CARG2, KBASE, TMP1		// KBASE-4-tab_const*4
-      |  bl extern lj_tab_dup  // (lua_State *L, Table *kt)
-      |  // Returns Table *.
-    }
-    |  lwz BASE, L->base
-    |  evmergelo TAB:CRET1, TISTAB, TAB:CRET1
-    |  evstddx TAB:CRET1, BASE, RA
-    |  ins_next
-    |5:
-    |  mr SAVE0, RD
-    |  bl extern lj_gc_step_fixtop  // (lua_State *L)
-    |  mr RD, SAVE0
-    |  mr CARG1, L
-    |  b <1
-    break;
-
-  case BC_GGET:
-    |  // RA = dst*8, RD = str_const*8 (~)
-  case BC_GSET:
-    |  // RA = src*8, RD = str_const*8 (~)
-    |  lwz LFUNC:TMP2, FRAME_FUNC(BASE)
-    |   srwi TMP1, RD, 1
-    |  lwz TAB:RB, LFUNC:TMP2->env
-    |   subfic TMP1, TMP1, -4
-    |   lwzx STR:RC, KBASE, TMP1	// KBASE-4-str_const*4
-    if (op == BC_GGET) {
-      |  b ->BC_TGETS_Z
-    } else {
-      |  b ->BC_TSETS_Z
-    }
-    break;
-
-  case BC_TGETV:
-    |  // RA = dst*8, RB = table*8, RC = key*8
-    |  evlddx TAB:RB, BASE, RB
-    |   evlddx RC, BASE, RC
-    |  checktab TAB:RB
-    |  checkfail ->vmeta_tgetv
-    |  checknum RC
-    |  checkfail >5
-    |  // Convert number key to integer
-    |  efdctsi TMP2, RC
-    |   lwz TMP0, TAB:RB->asize
-    |  efdcfsi TMP1, TMP2
-    |   cmplw cr0, TMP0, TMP2
-    |  efdcmpeq cr1, RC, TMP1
-    |   lwz TMP1, TAB:RB->array
-    |  crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
-    |   slwi TMP2, TMP2, 3
-    |  ble ->vmeta_tgetv		// Integer key and in array part?
-    |  evlddx TMP1, TMP1, TMP2
-    |  checknil TMP1
-    |  checkok >2
-    |1:
-    |  evstddx TMP1, BASE, RA
-    |  ins_next
-    |
-    |2:  // Check for __index if table value is nil.
-    |  lwz TAB:TMP2, TAB:RB->metatable
-    |  cmplwi TAB:TMP2, 0
-    |  beq <1				// No metatable: done.
-    |  lbz TMP0, TAB:TMP2->nomm
-    |  andi. TMP0, TMP0, 1<<MM_index
-    |  bne <1				// 'no __index' flag set: done.
-    |  b ->vmeta_tgetv
-    |
-    |5:
-    |  checkstr STR:RC			// String key?
-    |  checkok ->BC_TGETS_Z
-    |  b ->vmeta_tgetv
-    break;
-  case BC_TGETS:
-    |  // RA = dst*8, RB = table*8, RC = str_const*8 (~)
-    |  evlddx TAB:RB, BASE, RB
-    |   srwi TMP1, RC, 1
-    |  checktab TAB:RB
-    |   subfic TMP1, TMP1, -4
-    |   lwzx STR:RC, KBASE, TMP1	// KBASE-4-str_const*4
-    |  checkfail ->vmeta_tgets1
-    |->BC_TGETS_Z:
-    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
-    |  lwz TMP0, TAB:RB->hmask
-    |  lwz TMP1, STR:RC->hash
-    |  lwz NODE:TMP2, TAB:RB->node
-    |   evmergelo STR:RC, TISSTR, STR:RC
-    |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
-    |  slwi TMP0, TMP1, 5
-    |  slwi TMP1, TMP1, 3
-    |  sub TMP1, TMP0, TMP1
-    |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
-    |1:
-    |  evldd TMP0, NODE:TMP2->key
-    |   evldd TMP1, NODE:TMP2->val
-    |  evcmpeq TMP0, STR:RC
-    |  checkanyfail >4
-    |   checknil TMP1
-    |   checkok >5			// Key found, but nil value?
-    |3:
-    |   evstddx TMP1, BASE, RA
-    |  ins_next
-    |
-    |4:  // Follow hash chain.
-    |  lwz NODE:TMP2, NODE:TMP2->next
-    |  cmplwi NODE:TMP2, 0
-    |  bne <1
-    |  // End of hash chain: key not found, nil result.
-    |   evmr TMP1, TISNIL
-    |
-    |5:  // Check for __index if table value is nil.
-    |  lwz TAB:TMP2, TAB:RB->metatable
-    |  cmplwi TAB:TMP2, 0
-    |  beq <3				// No metatable: done.
-    |  lbz TMP0, TAB:TMP2->nomm
-    |  andi. TMP0, TMP0, 1<<MM_index
-    |  bne <3				// 'no __index' flag set: done.
-    |  b ->vmeta_tgets
-    break;
-  case BC_TGETB:
-    |  // RA = dst*8, RB = table*8, RC = index*8
-    |  evlddx TAB:RB, BASE, RB
-    |   srwi TMP0, RC, 3
-    |  checktab TAB:RB
-    |  checkfail ->vmeta_tgetb
-    |  lwz TMP1, TAB:RB->asize
-    |   lwz TMP2, TAB:RB->array
-    |  cmplw TMP0, TMP1
-    |  bge ->vmeta_tgetb
-    |  evlddx TMP1, TMP2, RC
-    |  checknil TMP1
-    |  checkok >5
-    |1:
-    |  ins_next1
-    |  evstddx TMP1, BASE, RA
-    |  ins_next2
-    |
-    |5:  // Check for __index if table value is nil.
-    |  lwz TAB:TMP2, TAB:RB->metatable
-    |  cmplwi TAB:TMP2, 0
-    |  beq <1				// No metatable: done.
-    |  lbz TMP2, TAB:TMP2->nomm
-    |  andi. TMP2, TMP2, 1<<MM_index
-    |  bne <1				// 'no __index' flag set: done.
-    |  b ->vmeta_tgetb			// Caveat: preserve TMP0!
-    break;
-
-  case BC_TSETV:
-    |  // RA = src*8, RB = table*8, RC = key*8
-    |  evlddx TAB:RB, BASE, RB
-    |   evlddx RC, BASE, RC
-    |  checktab TAB:RB
-    |  checkfail ->vmeta_tsetv
-    |  checknum RC
-    |  checkfail >5
-    |  // Convert number key to integer
-    |  efdctsi TMP2, RC
-    |    evlddx SAVE0, BASE, RA
-    |   lwz TMP0, TAB:RB->asize
-    |  efdcfsi TMP1, TMP2
-    |   cmplw cr0, TMP0, TMP2
-    |  efdcmpeq cr1, RC, TMP1
-    |   lwz TMP1, TAB:RB->array
-    |  crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
-    |   slwi TMP0, TMP2, 3
-    |  ble ->vmeta_tsetv		// Integer key and in array part?
-    |   lbz TMP3, TAB:RB->marked
-    |  evlddx TMP2, TMP1, TMP0
-    |  checknil TMP2
-    |  checkok >3
-    |1:
-    |  andi. TMP2, TMP3, LJ_GC_BLACK	// isblack(table)
-    |   evstddx SAVE0, TMP1, TMP0
-    |  bne >7
-    |2:
-    |  ins_next
-    |
-    |3:  // Check for __newindex if previous value is nil.
-    |  lwz TAB:TMP2, TAB:RB->metatable
-    |  cmplwi TAB:TMP2, 0
-    |  beq <1				// No metatable: done.
-    |  lbz TMP2, TAB:TMP2->nomm
-    |  andi. TMP2, TMP2, 1<<MM_newindex
-    |  bne <1				// 'no __newindex' flag set: done.
-    |  b ->vmeta_tsetv
-    |
-    |5:
-    |  checkstr STR:RC			// String key?
-    |  checkok ->BC_TSETS_Z
-    |  b ->vmeta_tsetv
-    |
-    |7:  // Possible table write barrier for the value. Skip valiswhite check.
-    |  barrierback TAB:RB, TMP3, TMP0
-    |  b <2
-    break;
-  case BC_TSETS:
-    |  // RA = src*8, RB = table*8, RC = str_const*8 (~)
-    |  evlddx TAB:RB, BASE, RB
-    |   srwi TMP1, RC, 1
-    |  checktab TAB:RB
-    |   subfic TMP1, TMP1, -4
-    |   lwzx STR:RC, KBASE, TMP1	// KBASE-4-str_const*4
-    |  checkfail ->vmeta_tsets1
-    |->BC_TSETS_Z:
-    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
-    |  lwz TMP0, TAB:RB->hmask
-    |  lwz TMP1, STR:RC->hash
-    |  lwz NODE:TMP2, TAB:RB->node
-    |   evmergelo STR:RC, TISSTR, STR:RC
-    |    stb ZERO, TAB:RB->nomm		// Clear metamethod cache.
-    |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
-    |    evlddx SAVE0, BASE, RA
-    |  slwi TMP0, TMP1, 5
-    |  slwi TMP1, TMP1, 3
-    |  sub TMP1, TMP0, TMP1
-    |    lbz TMP3, TAB:RB->marked
-    |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
-    |1:
-    |  evldd TMP0, NODE:TMP2->key
-    |   evldd TMP1, NODE:TMP2->val
-    |  evcmpeq TMP0, STR:RC
-    |  checkanyfail >5
-    |   checknil TMP1
-    |   checkok >4			// Key found, but nil value?
-    |2:
-    |  andi. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
-    |    evstdd SAVE0, NODE:TMP2->val
-    |  bne >7
-    |3:
-    |  ins_next
-    |
-    |4:  // Check for __newindex if previous value is nil.
-    |  lwz TAB:TMP1, TAB:RB->metatable
-    |  cmplwi TAB:TMP1, 0
-    |  beq <2				// No metatable: done.
-    |  lbz TMP0, TAB:TMP1->nomm
-    |  andi. TMP0, TMP0, 1<<MM_newindex
-    |  bne <2				// 'no __newindex' flag set: done.
-    |  b ->vmeta_tsets
-    |
-    |5:  // Follow hash chain.
-    |  lwz NODE:TMP2, NODE:TMP2->next
-    |  cmplwi NODE:TMP2, 0
-    |  bne <1
-    |  // End of hash chain: key not found, add a new one.
-    |
-    |  // But check for __newindex first.
-    |  lwz TAB:TMP1, TAB:RB->metatable
-    |   la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
-    |   stw PC, SAVE_PC
-    |   mr CARG1, L
-    |  cmplwi TAB:TMP1, 0
-    |   stw BASE, L->base
-    |  beq >6				// No metatable: continue.
-    |  lbz TMP0, TAB:TMP1->nomm
-    |  andi. TMP0, TMP0, 1<<MM_newindex
-    |  beq ->vmeta_tsets		// 'no __newindex' flag NOT set: check.
-    |6:
-    |  mr CARG2, TAB:RB
-    |  evstdd STR:RC, 0(CARG3)
-    |  bl extern lj_tab_newkey		// (lua_State *L, GCtab *t, TValue *k)
-    |  // Returns TValue *.
-    |  lwz BASE, L->base
-    |  evstdd SAVE0, 0(CRET1)
-    |  b <3				// No 2nd write barrier needed.
-    |
-    |7:  // Possible table write barrier for the value. Skip valiswhite check.
-    |  barrierback TAB:RB, TMP3, TMP0
-    |  b <3
-    break;
-  case BC_TSETB:
-    |  // RA = src*8, RB = table*8, RC = index*8
-    |  evlddx TAB:RB, BASE, RB
-    |   srwi TMP0, RC, 3
-    |  checktab TAB:RB
-    |  checkfail ->vmeta_tsetb
-    |  lwz TMP1, TAB:RB->asize
-    |   lwz TMP2, TAB:RB->array
-    |    lbz TMP3, TAB:RB->marked
-    |  cmplw TMP0, TMP1
-    |   evlddx SAVE0, BASE, RA
-    |  bge ->vmeta_tsetb
-    |  evlddx TMP1, TMP2, RC
-    |  checknil TMP1
-    |  checkok >5
-    |1:
-    |  andi. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
-    |   evstddx SAVE0, TMP2, RC
-    |  bne >7
-    |2:
-    |  ins_next
-    |
-    |5:  // Check for __newindex if previous value is nil.
-    |  lwz TAB:TMP1, TAB:RB->metatable
-    |  cmplwi TAB:TMP1, 0
-    |  beq <1				// No metatable: done.
-    |  lbz TMP1, TAB:TMP1->nomm
-    |  andi. TMP1, TMP1, 1<<MM_newindex
-    |  bne <1				// 'no __newindex' flag set: done.
-    |  b ->vmeta_tsetb			// Caveat: preserve TMP0!
-    |
-    |7:  // Possible table write barrier for the value. Skip valiswhite check.
-    |  barrierback TAB:RB, TMP3, TMP0
-    |  b <2
-    break;
-
-  case BC_TSETM:
-    |  // RA = base*8 (table at base-1), RD = num_const*8 (start index)
-    |  add RA, BASE, RA
-    |1:
-    |   add TMP3, KBASE, RD
-    |  lwz TAB:CARG2, -4(RA)		// Guaranteed to be a table.
-    |    addic. TMP0, MULTRES, -8
-    |   lwz TMP3, 4(TMP3)		// Integer constant is in lo-word.
-    |    srwi CARG3, TMP0, 3
-    |    beq >4				// Nothing to copy?
-    |  add CARG3, CARG3, TMP3
-    |  lwz TMP2, TAB:CARG2->asize
-    |   slwi TMP1, TMP3, 3
-    |    lbz TMP3, TAB:CARG2->marked
-    |  cmplw CARG3, TMP2
-    |   add TMP2, RA, TMP0
-    |   lwz TMP0, TAB:CARG2->array
-    |  bgt >5
-    |   add TMP1, TMP1, TMP0
-    |    andi. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
-    |3:  // Copy result slots to table.
-    |   evldd TMP0, 0(RA)
-    |  addi RA, RA, 8
-    |  cmpw cr1, RA, TMP2
-    |   evstdd TMP0, 0(TMP1)
-    |    addi TMP1, TMP1, 8
-    |  blt cr1, <3
-    |  bne >7
-    |4:
-    |  ins_next
-    |
-    |5:  // Need to resize array part.
-    |   stw BASE, L->base
-    |  mr CARG1, L
-    |   stw PC, SAVE_PC
-    |  mr SAVE0, RD
-    |  bl extern lj_tab_reasize		// (lua_State *L, GCtab *t, int nasize)
-    |  // Must not reallocate the stack.
-    |  mr RD, SAVE0
-    |  b <1
-    |
-    |7:  // Possible table write barrier for any value. Skip valiswhite check.
-    |  barrierback TAB:CARG2, TMP3, TMP0
-    |  b <4
-    break;
-
-  /* -- Calls and vararg handling ----------------------------------------- */
-
-  case BC_CALLM:
-    |  // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
-    |  add NARGS8:RC, NARGS8:RC, MULTRES
-    |  // Fall through. Assumes BC_CALL follows.
-    break;
-  case BC_CALL:
-    |  // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
-    |  evlddx LFUNC:RB, BASE, RA
-    |   mr TMP2, BASE
-    |   add BASE, BASE, RA
-    |    subi NARGS8:RC, NARGS8:RC, 8
-    |  checkfunc LFUNC:RB
-    |   addi BASE, BASE, 8
-    |  checkfail ->vmeta_call
-    |  ins_call
-    break;
-
-  case BC_CALLMT:
-    |  // RA = base*8, (RB = 0,) RC = extra_nargs*8
-    |  add NARGS8:RC, NARGS8:RC, MULTRES
-    |  // Fall through. Assumes BC_CALLT follows.
-    break;
-  case BC_CALLT:
-    |  // RA = base*8, (RB = 0,) RC = (nargs+1)*8
-    |  evlddx LFUNC:RB, BASE, RA
-    |   add RA, BASE, RA
-    |    lwz TMP1, FRAME_PC(BASE)
-    |    subi NARGS8:RC, NARGS8:RC, 8
-    |  checkfunc LFUNC:RB
-    |   addi RA, RA, 8
-    |  checkfail ->vmeta_callt
-    |->BC_CALLT_Z:
-    |  andi. TMP0, TMP1, FRAME_TYPE	// Caveat: preserve cr0 until the crand.
-    |   lbz TMP3, LFUNC:RB->ffid
-    |    xori TMP2, TMP1, FRAME_VARG
-    |    cmplwi cr1, NARGS8:RC, 0
-    |  bne >7
-    |1:
-    |  stw LFUNC:RB, FRAME_FUNC(BASE)	// Copy function down, but keep PC.
-    |  li TMP2, 0
-    |   cmplwi cr7, TMP3, 1		// (> FF_C) Calling a fast function?
-    |    beq cr1, >3
-    |2:
-    |  addi TMP3, TMP2, 8
-    |   evlddx TMP0, RA, TMP2
-    |  cmplw cr1, TMP3, NARGS8:RC
-    |   evstddx TMP0, BASE, TMP2
-    |  mr TMP2, TMP3
-    |  bne cr1, <2
-    |3:
-    |  crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt
-    |  beq >5
-    |4:
-    |  ins_callt
-    |
-    |5:  // Tailcall to a fast function with a Lua frame below.
-    |  lwz INS, -4(TMP1)
-    |  decode_RA8 RA, INS
-    |  sub TMP1, BASE, RA
-    |  lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1)
-    |  lwz TMP1, LFUNC:TMP1->pc
-    |  lwz KBASE, PC2PROTO(k)(TMP1)	// Need to prepare KBASE.
-    |  b <4
-    |
-    |7:  // Tailcall from a vararg function.
-    |  andi. TMP0, TMP2, FRAME_TYPEP
-    |  bne <1				// Vararg frame below?
-    |  sub BASE, BASE, TMP2		// Relocate BASE down.
-    |  lwz TMP1, FRAME_PC(BASE)
-    |  andi. TMP0, TMP1, FRAME_TYPE
-    |  b <1
-    break;
-
-  case BC_ITERC:
-    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
-    |  subi RA, RA, 24			// evldd doesn't support neg. offsets.
-    |   mr TMP2, BASE
-    |  evlddx LFUNC:RB, BASE, RA
-    |   add BASE, BASE, RA
-    |   evldd TMP0, 8(BASE)
-    |    evldd TMP1, 16(BASE)
-    |  evstdd LFUNC:RB, 24(BASE)	// Copy callable.
-    |  checkfunc LFUNC:RB
-    |   evstdd TMP0, 32(BASE)		// Copy state.
-    |     li NARGS8:RC, 16		// Iterators get 2 arguments.
-    |    evstdd TMP1, 40(BASE)		// Copy control var.
-    |     addi BASE, BASE, 32
-    |  checkfail ->vmeta_call
-    |  ins_call
-    break;
-
-  case BC_ITERN:
-    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
-    |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
-    |.endif
-    |  add RA, BASE, RA
-    |  lwz TAB:RB, -12(RA)
-    |  lwz RC, -4(RA)			// Get index from control var.
-    |  lwz TMP0, TAB:RB->asize
-    |  lwz TMP1, TAB:RB->array
-    |   addi PC, PC, 4
-    |1:  // Traverse array part.
-    |  cmplw RC, TMP0
-    |   slwi TMP3, RC, 3
-    |  bge >5				// Index points after array part?
-    |  evlddx TMP2, TMP1, TMP3
-    |  checknil TMP2
-    |     lwz INS, -4(PC)
-    |  checkok >4
-    |   efdcfsi TMP0, RC
-    |    addi RC, RC, 1
-    |     addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-    |  evstdd TMP2, 8(RA)
-    |     decode_RD4 TMP1, INS
-    |    stw RC, -4(RA)			// Update control var.
-    |     add PC, TMP1, TMP3
-    |   evstdd TMP0, 0(RA)
-    |3:
-    |  ins_next
-    |
-    |4:  // Skip holes in array part.
-    |  addi RC, RC, 1
-    |  b <1
-    |
-    |5:  // Traverse hash part.
-    |  lwz TMP1, TAB:RB->hmask
-    |  sub RC, RC, TMP0
-    |   lwz TMP2, TAB:RB->node
-    |6:
-    |  cmplw RC, TMP1			// End of iteration? Branch to ITERL+1.
-    |   slwi TMP3, RC, 5
-    |  bgt <3
-    |   slwi RB, RC, 3
-    |   sub TMP3, TMP3, RB
-    |  evlddx RB, TMP2, TMP3
-    |   add NODE:TMP3, TMP2, TMP3
-    |  checknil RB
-    |     lwz INS, -4(PC)
-    |  checkok >7
-    |   evldd TMP3, NODE:TMP3->key
-    |     addis TMP2, PC, -(BCBIAS_J*4 >> 16)
-    |  evstdd RB, 8(RA)
-    |    add RC, RC, TMP0
-    |     decode_RD4 TMP1, INS
-    |   evstdd TMP3, 0(RA)
-    |    addi RC, RC, 1
-    |     add PC, TMP1, TMP2
-    |    stw RC, -4(RA)			// Update control var.
-    |  b <3
-    |
-    |7:  // Skip holes in hash part.
-    |  addi RC, RC, 1
-    |  b <6
-    break;
-
-  case BC_ISNEXT:
-    |  // RA = base*8, RD = target (points to ITERN)
-    |  add RA, BASE, RA
-    |   li TMP2, -24
-    |  evlddx CFUNC:TMP1, RA, TMP2
-    |   lwz TMP2, -16(RA)
-    |    lwz TMP3, -8(RA)
-    |  evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1
-    |   cmpwi cr0, TMP2, LJ_TTAB
-    |  cmpwi cr1, TMP0, LJ_TFUNC
-    |    cmpwi cr6, TMP3, LJ_TNIL
-    |  bne cr1, >5
-    |  lbz TMP1, CFUNC:TMP1->ffid
-    |   crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq
-    |  cmpwi cr7, TMP1, FF_next_N
-    |    srwi TMP0, RD, 1
-    |  crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
-    |    add TMP3, PC, TMP0
-    |  bne cr0, >5
-    |  lus TMP1, 0xfffe
-    |  ori TMP1, TMP1, 0x7fff
-    |  stw ZERO, -4(RA)			// Initialize control var.
-    |  stw TMP1, -8(RA)
-    |    addis PC, TMP3, -(BCBIAS_J*4 >> 16)
-    |1:
-    |  ins_next
-    |5:  // Despecialize bytecode if any of the checks fail.
-    |  li TMP0, BC_JMP
-    |   li TMP1, BC_ITERC
-    |  stb TMP0, -1(PC)
-    |    addis PC, TMP3, -(BCBIAS_J*4 >> 16)
-    |   stb TMP1, 3(PC)
-    |  b <1
-    break;
-
-  case BC_VARG:
-    |  // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
-    |  lwz TMP0, FRAME_PC(BASE)
-    |  add RC, BASE, RC
-    |   add RA, BASE, RA
-    |  addi RC, RC, FRAME_VARG
-    |   add TMP2, RA, RB
-    |  subi TMP3, BASE, 8		// TMP3 = vtop
-    |  sub RC, RC, TMP0			// RC = vbase
-    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
-    |  cmplwi cr1, RB, 0
-    |   sub. TMP1, TMP3, RC
-    |  beq cr1, >5			// Copy all varargs?
-    |   subi TMP2, TMP2, 16
-    |   ble >2				// No vararg slots?
-    |1:  // Copy vararg slots to destination slots.
-    |  evldd TMP0, 0(RC)
-    |   addi RC, RC, 8
-    |  evstdd TMP0, 0(RA)
-    |  cmplw RA, TMP2
-    |   cmplw cr1, RC, TMP3
-    |  bge >3				// All destination slots filled?
-    |    addi RA, RA, 8
-    |   blt cr1, <1			// More vararg slots?
-    |2:  // Fill up remainder with nil.
-    |  evstdd TISNIL, 0(RA)
-    |  cmplw RA, TMP2
-    |   addi RA, RA, 8
-    |  blt <2
-    |3:
-    |  ins_next
-    |
-    |5:  // Copy all varargs.
-    |  lwz TMP0, L->maxstack
-    |   li MULTRES, 8			// MULTRES = (0+1)*8
-    |  ble <3				// No vararg slots?
-    |  add TMP2, RA, TMP1
-    |  cmplw TMP2, TMP0
-    |   addi MULTRES, TMP1, 8
-    |  bgt >7
-    |6:
-    |  evldd TMP0, 0(RC)
-    |   addi RC, RC, 8
-    |  evstdd TMP0, 0(RA)
-    |  cmplw RC, TMP3
-    |   addi RA, RA, 8
-    |  blt <6				// More vararg slots?
-    |  b <3
-    |
-    |7:  // Grow stack for varargs.
-    |  mr CARG1, L
-    |   stw RA, L->top
-    |  sub SAVE0, RC, BASE		// Need delta, because BASE may change.
-    |   stw BASE, L->base
-    |  sub RA, RA, BASE
-    |   stw PC, SAVE_PC
-    |  srwi CARG2, TMP1, 3
-    |  bl extern lj_state_growstack	// (lua_State *L, int n)
-    |  lwz BASE, L->base
-    |  add RA, BASE, RA
-    |  add RC, BASE, SAVE0
-    |  subi TMP3, BASE, 8
-    |  b <6
-    break;
-
-  /* -- Returns ----------------------------------------------------------- */
-
-  case BC_RETM:
-    |  // RA = results*8, RD = extra_nresults*8
-    |  add RD, RD, MULTRES		// MULTRES >= 8, so RD >= 8.
-    |  // Fall through. Assumes BC_RET follows.
-    break;
-
-  case BC_RET:
-    |  // RA = results*8, RD = (nresults+1)*8
-    |  lwz PC, FRAME_PC(BASE)
-    |   add RA, BASE, RA
-    |    mr MULTRES, RD
-    |1:
-    |  andi. TMP0, PC, FRAME_TYPE
-    |   xori TMP1, PC, FRAME_VARG
-    |  bne ->BC_RETV_Z
-    |
-    |->BC_RET_Z:
-    |  // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
-    |   lwz INS, -4(PC)
-    |  cmpwi RD, 8
-    |   subi TMP2, BASE, 8
-    |   subi RC, RD, 8
-    |   decode_RB8 RB, INS
-    |  beq >3
-    |   li TMP1, 0
-    |2:
-    |  addi TMP3, TMP1, 8
-    |   evlddx TMP0, RA, TMP1
-    |  cmpw TMP3, RC
-    |   evstddx TMP0, TMP2, TMP1
-    |  beq >3
-    |  addi TMP1, TMP3, 8
-    |   evlddx TMP0, RA, TMP3
-    |  cmpw TMP1, RC
-    |   evstddx TMP0, TMP2, TMP3
-    |  bne <2
-    |3:
-    |5:
-    |  cmplw RB, RD
-    |   decode_RA8 RA, INS
-    |  bgt >6
-    |   sub BASE, TMP2, RA
-    |  lwz LFUNC:TMP1, FRAME_FUNC(BASE)
-    |  ins_next1
-    |  lwz TMP1, LFUNC:TMP1->pc
-    |  lwz KBASE, PC2PROTO(k)(TMP1)
-    |  ins_next2
-    |
-    |6:  // Fill up results with nil.
-    |  subi TMP1, RD, 8
-    |   addi RD, RD, 8
-    |  evstddx TISNIL, TMP2, TMP1
-    |  b <5
-    |
-    |->BC_RETV_Z:  // Non-standard return case.
-    |  andi. TMP2, TMP1, FRAME_TYPEP
-    |  bne ->vm_return
-    |  // Return from vararg function: relocate BASE down.
-    |  sub BASE, BASE, TMP1
-    |  lwz PC, FRAME_PC(BASE)
-    |  b <1
-    break;
-
-  case BC_RET0: case BC_RET1:
-    |  // RA = results*8, RD = (nresults+1)*8
-    |  lwz PC, FRAME_PC(BASE)
-    |   add RA, BASE, RA
-    |    mr MULTRES, RD
-    |  andi. TMP0, PC, FRAME_TYPE
-    |   xori TMP1, PC, FRAME_VARG
-    |  bne ->BC_RETV_Z
-    |
-    |  lwz INS, -4(PC)
-    |   subi TMP2, BASE, 8
-    |  decode_RB8 RB, INS
-    if (op == BC_RET1) {
-      |  evldd TMP0, 0(RA)
-      |  evstdd TMP0, 0(TMP2)
-    }
-    |5:
-    |  cmplw RB, RD
-    |   decode_RA8 RA, INS
-    |  bgt >6
-    |   sub BASE, TMP2, RA
-    |  lwz LFUNC:TMP1, FRAME_FUNC(BASE)
-    |  ins_next1
-    |  lwz TMP1, LFUNC:TMP1->pc
-    |  lwz KBASE, PC2PROTO(k)(TMP1)
-    |  ins_next2
-    |
-    |6:  // Fill up results with nil.
-    |  subi TMP1, RD, 8
-    |   addi RD, RD, 8
-    |  evstddx TISNIL, TMP2, TMP1
-    |  b <5
-    break;
-
-  /* -- Loops and branches ------------------------------------------------ */
-
-  case BC_FORL:
-    |.if JIT
-    |  hotloop
-    |.endif
-    |  // Fall through. Assumes BC_IFORL follows.
-    break;
-
-  case BC_JFORI:
-  case BC_JFORL:
-#if !LJ_HASJIT
-    break;
-#endif
-  case BC_FORI:
-  case BC_IFORL:
-    |  // RA = base*8, RD = target (after end of loop or start of loop)
-    vk = (op == BC_IFORL || op == BC_JFORL);
-    |  add RA, BASE, RA
-    |  evldd TMP1, FORL_IDX*8(RA)
-    |  evldd TMP3, FORL_STEP*8(RA)
-    |  evldd TMP2, FORL_STOP*8(RA)
-    if (!vk) {
-      |  evcmpgtu cr0, TMP1, TISNUM
-      |  evcmpgtu cr7, TMP3, TISNUM
-      |  evcmpgtu cr1, TMP2, TISNUM
-      |  cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
-      |  cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
-      |  blt ->vmeta_for
-    }
-    if (vk) {
-      |  efdadd TMP1, TMP1, TMP3
-      |  evstdd TMP1, FORL_IDX*8(RA)
-    }
-    |   evcmpgts TMP3, TISNIL
-    |  evstdd TMP1, FORL_EXT*8(RA)
-    |   bge >2
-    |  efdcmpgt TMP1, TMP2
-    |1:
-    if (op != BC_JFORL) {
-      |  srwi RD, RD, 1
-      |  add RD, PC, RD
-      if (op == BC_JFORI) {
-	|  addis PC, RD, -(BCBIAS_J*4 >> 16)
-      } else {
-	|  addis RD, RD, -(BCBIAS_J*4 >> 16)
-      }
-    }
-    if (op == BC_FORI) {
-      |  iselgt PC, RD, PC
-    } else if (op == BC_IFORL) {
-      |  iselgt PC, PC, RD
-    } else {
-      |  ble =>BC_JLOOP
-    }
-    |  ins_next
-    |2:
-    |  efdcmpgt TMP2, TMP1
-    |  b <1
-    break;
-
-  case BC_ITERL:
-    |.if JIT
-    |  hotloop
-    |.endif
-    |  // Fall through. Assumes BC_IITERL follows.
-    break;
-
-  case BC_JITERL:
-#if !LJ_HASJIT
-    break;
-#endif
-  case BC_IITERL:
-    |  // RA = base*8, RD = target
-    |  evlddx TMP1, BASE, RA
-    |   subi RA, RA, 8
-    |  checknil TMP1
-    |  checkok >1			// Stop if iterator returned nil.
-    if (op == BC_JITERL) {
-      |  NYI
-    } else {
-      |  branch_RD			// Otherwise save control var + branch.
-      |  evstddx TMP1, BASE, RA
-    }
-    |1:
-    |  ins_next
-    break;
-
-  case BC_LOOP:
-    |  // RA = base*8, RD = target (loop extent)
-    |  // Note: RA/RD is only used by trace recorder to determine scope/extent
-    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
-    |.if JIT
-    |  hotloop
-    |.endif
-    |  // Fall through. Assumes BC_ILOOP follows.
-    break;
-
-  case BC_ILOOP:
-    |  // RA = base*8, RD = target (loop extent)
-    |  ins_next
-    break;
-
-  case BC_JLOOP:
-    |.if JIT
-    |  NYI
-    |.endif
-    break;
-
-  case BC_JMP:
-    |  // RA = base*8 (only used by trace recorder), RD = target
-    |  branch_RD
-    |  ins_next
-    break;
-
-  /* -- Function headers -------------------------------------------------- */
-
-  case BC_FUNCF:
-    |.if JIT
-    |  hotcall
-    |.endif
-  case BC_FUNCV:  /* NYI: compiled vararg functions. */
-    |  // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
-    break;
-
-  case BC_JFUNCF:
-#if !LJ_HASJIT
-    break;
-#endif
-  case BC_IFUNCF:
-    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
-    |  lwz TMP2, L->maxstack
-    |   lbz TMP1, -4+PC2PROTO(numparams)(PC)
-    |    lwz KBASE, -4+PC2PROTO(k)(PC)
-    |  cmplw RA, TMP2
-    |   slwi TMP1, TMP1, 3
-    |  bgt ->vm_growstack_l
-    |  ins_next1
-    |2:
-    |  cmplw NARGS8:RC, TMP1		// Check for missing parameters.
-    |  ble >3
-    if (op == BC_JFUNCF) {
-      |  NYI
-    } else {
-      |  ins_next2
-    }
-    |
-    |3:  // Clear missing parameters.
-    |  evstddx TISNIL, BASE, NARGS8:RC
-    |  addi NARGS8:RC, NARGS8:RC, 8
-    |  b <2
-    break;
-
-  case BC_JFUNCV:
-#if !LJ_HASJIT
-    break;
-#endif
-    |  NYI  // NYI: compiled vararg functions
-    break;  /* NYI: compiled vararg functions. */
-
-  case BC_IFUNCV:
-    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
-    |  lwz TMP2, L->maxstack
-    |   add TMP1, BASE, RC
-    |  add TMP0, RA, RC
-    |   stw LFUNC:RB, 4(TMP1)		// Store copy of LFUNC.
-    |   addi TMP3, RC, 8+FRAME_VARG
-    |    lwz KBASE, -4+PC2PROTO(k)(PC)
-    |  cmplw TMP0, TMP2
-    |   stw TMP3, 0(TMP1)		// Store delta + FRAME_VARG.
-    |  bge ->vm_growstack_l
-    |  lbz TMP2, -4+PC2PROTO(numparams)(PC)
-    |   mr RA, BASE
-    |   mr RC, TMP1
-    |  ins_next1
-    |  cmpwi TMP2, 0
-    |   addi BASE, TMP1, 8
-    |  beq >3
-    |1:
-    |  cmplw RA, RC			// Less args than parameters?
-    |   evldd TMP0, 0(RA)
-    |  bge >4
-    |    evstdd TISNIL, 0(RA)		// Clear old fixarg slot (help the GC).
-    |    addi RA, RA, 8
-    |2:
-    |  addic. TMP2, TMP2, -1
-    |   evstdd TMP0, 8(TMP1)
-    |    addi TMP1, TMP1, 8
-    |  bne <1
-    |3:
-    |  ins_next2
-    |
-    |4:  // Clear missing parameters.
-    |  evmr TMP0, TISNIL
-    |  b <2
-    break;
-
-  case BC_FUNCC:
-  case BC_FUNCCW:
-    |  // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
-    if (op == BC_FUNCC) {
-      |  lwz TMP3, CFUNC:RB->f
-    } else {
-      |  lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH)
-    }
-    |   add TMP1, RA, NARGS8:RC
-    |   lwz TMP2, L->maxstack
-    |    add RC, BASE, NARGS8:RC
-    |   stw BASE, L->base
-    |   cmplw TMP1, TMP2
-    |    stw RC, L->top
-    |     li_vmstate C
-    |  mtctr TMP3
-    if (op == BC_FUNCCW) {
-      |  lwz CARG2, CFUNC:RB->f
-    }
-    |  mr CARG1, L
-    |   bgt ->vm_growstack_c		// Need to grow stack.
-    |     st_vmstate
-    |  bctrl				// (lua_State *L [, lua_CFunction f])
-    |  // Returns nresults.
-    |  lwz TMP1, L->top
-    |   slwi RD, CRET1, 3
-    |  lwz BASE, L->base
-    |    li_vmstate INTERP
-    |  lwz PC, FRAME_PC(BASE)		// Fetch PC of caller.
-    |   sub RA, TMP1, RD		// RA = L->top - nresults*8
-    |    st_vmstate
-    |  b ->vm_returnc
-    break;
-
-  /* ---------------------------------------------------------------------- */
-
-  default:
-    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
-    exit(2);
-    break;
-  }
-}
-
-static int build_backend(BuildCtx *ctx)
-{
-  int op;
-
-  dasm_growpc(Dst, BC__MAX);
-
-  build_subroutines(ctx);
-
-  |.code_op
-  for (op = 0; op < BC__MAX; op++)
-    build_ins(ctx, (BCOp)op, op);
-
-  return BC__MAX;
-}
-
-/* Emit pseudo frame-info for all assembler functions. */
-static void emit_asm_debug(BuildCtx *ctx)
-{
-  int i;
-  switch (ctx->mode) {
-  case BUILD_elfasm:
-    fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
-    fprintf(ctx->fp,
-	".Lframe0:\n"
-	"\t.long .LECIE0-.LSCIE0\n"
-	".LSCIE0:\n"
-	"\t.long 0xffffffff\n"
-	"\t.byte 0x1\n"
-	"\t.string \"\"\n"
-	"\t.uleb128 0x1\n"
-	"\t.sleb128 -4\n"
-	"\t.byte 65\n"
-	"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
-	"\t.align 2\n"
-	".LECIE0:\n\n");
-    fprintf(ctx->fp,
-	".LSFDE0:\n"
-	"\t.long .LEFDE0-.LASFDE0\n"
-	".LASFDE0:\n"
-	"\t.long .Lframe0\n"
-	"\t.long .Lbegin\n"
-	"\t.long %d\n"
-	"\t.byte 0xe\n\t.uleb128 %d\n"
-	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
-	"\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
-	(int)ctx->codesz, CFRAME_SIZE);
-    for (i = 14; i <= 31; i++)
-      fprintf(ctx->fp,
-	"\t.byte %d\n\t.uleb128 %d\n"
-	"\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
-	0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
-    fprintf(ctx->fp,
-	"\t.align 2\n"
-	".LEFDE0:\n\n");
-    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
-    fprintf(ctx->fp,
-	".Lframe1:\n"
-	"\t.long .LECIE1-.LSCIE1\n"
-	".LSCIE1:\n"
-	"\t.long 0\n"
-	"\t.byte 0x1\n"
-	"\t.string \"zPR\"\n"
-	"\t.uleb128 0x1\n"
-	"\t.sleb128 -4\n"
-	"\t.byte 65\n"
-	"\t.uleb128 6\n"			/* augmentation length */
-	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.long lj_err_unwind_dwarf-.\n"
-	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
-	"\t.align 2\n"
-	".LECIE1:\n\n");
-    fprintf(ctx->fp,
-	".LSFDE1:\n"
-	"\t.long .LEFDE1-.LASFDE1\n"
-	".LASFDE1:\n"
-	"\t.long .LASFDE1-.Lframe1\n"
-	"\t.long .Lbegin-.\n"
-	"\t.long %d\n"
-	"\t.uleb128 0\n"			/* augmentation length */
-	"\t.byte 0xe\n\t.uleb128 %d\n"
-	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
-	"\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
-	(int)ctx->codesz, CFRAME_SIZE);
-    for (i = 14; i <= 31; i++)
-      fprintf(ctx->fp,
-	"\t.byte %d\n\t.uleb128 %d\n"
-	"\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
-	0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
-    fprintf(ctx->fp,
-	"\t.align 2\n"
-	".LEFDE1:\n\n");
-    break;
-  default:
-    break;
-  }
-}
-
diff --git a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/ChangeLog b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/ChangeLog
similarity index 88%
rename from source/libs/luajit/LuaJIT-2.0.4-PATCHES/ChangeLog
rename to source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/ChangeLog
index fb756444ba100b3e331db89cd1a3bb5c99a78f84..5ef8ddb84fa78b47aeb27c8ecb6774b4fa42ff52 100644
--- a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/ChangeLog
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/ChangeLog
@@ -1,3 +1,11 @@
+2015-09-07  Peter Breitenlohner  <peb@mppmu.mpg.de>
+
+	Import LuaJIT-2.1.0-beta1.
+
+	* patch-01-LuaJITTeX: Adapted.
+	* patch-05-LuaJITTeX: Adapted.
+	* patch-06-ppc-darwin: Adapted.
+
 2015-06-12  Peter Breitenlohner  <peb@mppmu.mpg.de>
 
 	Import LuaJIT-2.0.4.
diff --git a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-01-LuaJITTeX b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-01-LuaJITTeX
similarity index 69%
rename from source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-01-LuaJITTeX
rename to source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-01-LuaJITTeX
index db988a6a46383eadf45070cfaa943e6561a215fd..4853f00cc52bfcc0299f021b23aede744eb3a793 100644
--- a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-01-LuaJITTeX
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-01-LuaJITTeX
@@ -1,6 +1,6 @@
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lauxlib.h LuaJIT-2.0.4/src/lauxlib.h
---- LuaJIT-2.0.4.orig/src/lauxlib.h	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lauxlib.h	2015-06-12 11:04:08.000000000 +0200
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lauxlib.h LuaJIT-2.1.0-beta1/src/lauxlib.h
+--- LuaJIT-2.1.0-beta1.orig/src/lauxlib.h	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lauxlib.h	2015-09-04 08:42:39.000000000 +0200
 @@ -86,6 +86,32 @@
  				int level);
  
@@ -34,9 +34,9 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lauxlib.h LuaJIT-2.0.4/src/lauxlib.h
  /*
  ** ===============================================================
  ** some useful macros
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lib_init.c LuaJIT-2.0.4/src/lib_init.c
---- LuaJIT-2.0.4.orig/src/lib_init.c	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lib_init.c	2015-06-12 11:04:08.000000000 +0200
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lib_init.c LuaJIT-2.1.0-beta1/src/lib_init.c
+--- LuaJIT-2.1.0-beta1.orig/src/lib_init.c	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lib_init.c	2015-09-04 08:42:39.000000000 +0200
 @@ -26,6 +26,7 @@
    { LUA_DBLIBNAME,	luaopen_debug },
    { LUA_BITLIBNAME,	luaopen_bit },
@@ -45,10 +45,10 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lib_init.c LuaJIT-2.0.4/src/lib_init
    { NULL,		NULL }
  };
  
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lib_package.c LuaJIT-2.0.4/src/lib_package.c
---- LuaJIT-2.0.4.orig/src/lib_package.c	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lib_package.c	2015-06-12 11:04:08.000000000 +0200
-@@ -354,6 +354,29 @@
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lib_package.c LuaJIT-2.1.0-beta1/src/lib_package.c
+--- LuaJIT-2.1.0-beta1.orig/src/lib_package.c	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lib_package.c	2015-09-04 08:42:39.000000000 +0200
+@@ -362,6 +362,29 @@
    return 1;  /* library loaded successfully */
  }
  
@@ -78,7 +78,7 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lib_package.c LuaJIT-2.0.4/src/lib_p
  static int lj_cf_package_loader_croot(lua_State *L)
  {
    const char *filename;
-@@ -373,6 +396,21 @@
+@@ -381,6 +404,21 @@
    return 1;
  }
  
@@ -100,9 +100,9 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lib_package.c LuaJIT-2.0.4/src/lib_p
  static int lj_cf_package_loader_preload(lua_State *L)
  {
    const char *name = luaL_checkstring(L, 1);
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lua.h LuaJIT-2.0.4/src/lua.h
---- LuaJIT-2.0.4.orig/src/lua.h	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lua.h	2015-06-12 11:04:08.000000000 +0200
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lua.h LuaJIT-2.1.0-beta1/src/lua.h
+--- LuaJIT-2.1.0-beta1.orig/src/lua.h	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lua.h	2015-09-04 08:42:39.000000000 +0200
 @@ -348,6 +348,16 @@
  		       const char *chunkname, const char *mode);
  
@@ -120,9 +120,9 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lua.h LuaJIT-2.0.4/src/lua.h
  struct lua_Debug {
    int event;
    const char *name;	/* (n) */
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lualib.h LuaJIT-2.0.4/src/lualib.h
---- LuaJIT-2.0.4.orig/src/lualib.h	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lualib.h	2015-06-12 11:04:08.000000000 +0200
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lualib.h LuaJIT-2.1.0-beta1/src/lualib.h
+--- LuaJIT-2.1.0-beta1.orig/src/lualib.h	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lualib.h	2015-09-04 08:42:39.000000000 +0200
 @@ -22,6 +22,8 @@
  #define LUA_JITLIBNAME	"jit"
  #define LUA_FFILIBNAME	"ffi"
@@ -141,10 +141,10 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lualib.h LuaJIT-2.0.4/src/lualib.h
  LUALIB_API void luaL_openlibs(lua_State *L);
  
  #ifndef lua_assert
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/Makefile LuaJIT-2.0.4/src/Makefile
---- LuaJIT-2.0.4.orig/src/Makefile	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/Makefile	2015-06-12 11:04:08.000000000 +0200
-@@ -100,7 +100,7 @@
+diff -ur LuaJIT-2.1.0-beta1.orig/src/Makefile LuaJIT-2.1.0-beta1/src/Makefile
+--- LuaJIT-2.1.0-beta1.orig/src/Makefile	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/Makefile	2015-09-04 08:42:39.000000000 +0200
+@@ -97,7 +97,7 @@
  # enabled by default. Some other features that *might* break some existing
  # code (e.g. __pairs or os.execute() return values) can be enabled here.
  # Note: this does not provide full compatibility with Lua 5.2 at this time.
@@ -153,7 +153,7 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/Makefile LuaJIT-2.0.4/src/Makefile
  #
  # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter.
  #XCFLAGS+= -DLUAJIT_DISABLE_JIT
-@@ -448,7 +448,7 @@
+@@ -456,7 +456,7 @@
  LJVM_BOUT= $(LJVM_S)
  LJVM_MODE= elfasm
  
@@ -162,15 +162,14 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/Makefile LuaJIT-2.0.4/src/Makefile
  	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
  LJLIB_C= $(LJLIB_O:.o=.c)
  
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/Makefile.dep LuaJIT-2.0.4/src/Makefile.dep
---- LuaJIT-2.0.4.orig/src/Makefile.dep	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/Makefile.dep	2015-06-12 11:04:08.000000000 +0200
+diff -ur LuaJIT-2.1.0-beta1.orig/src/Makefile.dep LuaJIT-2.1.0-beta1/src/Makefile.dep
+--- LuaJIT-2.1.0-beta1.orig/src/Makefile.dep	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/Makefile.dep	2015-09-04 08:46:10.000000000 +0200
 @@ -6,6 +6,7 @@
   lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
   lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
-  lj_lib.h lj_libdef.h
+  lj_strfmt.h lj_lib.h lj_libdef.h
 +lbitlib.o: lbitlib.c lua.h luaconf.h lauxlib.h lualib.h
  lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
-  lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
- lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
-Only in LuaJIT-2.0.4/src: Makefile.orig
+  lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+  lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
diff --git a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-05-LuaJITTeX b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-05-LuaJITTeX
similarity index 77%
rename from source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-05-LuaJITTeX
rename to source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-05-LuaJITTeX
index 6d091a020921f879f3b538f87e2756e3c558cd14..3a05a52d896bbb485275b11df28b2eb3d64cdcac 100644
--- a/source/libs/luajit/LuaJIT-2.0.4-PATCHES/patch-05-LuaJITTeX
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-05-LuaJITTeX
@@ -1,7 +1,7 @@
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lj_def.h LuaJIT-2.0.4/src/lj_def.h
---- LuaJIT-2.0.4.orig/src/lj_def.h	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lj_def.h	2015-06-12 11:05:39.000000000 +0200
-@@ -62,7 +62,7 @@
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lj_def.h LuaJIT-2.1.0-beta1/src/lj_def.h
+--- LuaJIT-2.1.0-beta1.orig/src/lj_def.h	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lj_def.h	2015-09-04 08:51:52.000000000 +0200
+@@ -66,7 +66,7 @@
  #define LJ_MAX_BCINS	(1<<26)		/* Max. # of bytecode instructions. */
  #define LJ_MAX_SLOTS	250		/* Max. # of slots in a Lua func. */
  #define LJ_MAX_LOCVAR	200		/* Max. # of local variables. */
@@ -9,11 +9,11 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lj_def.h LuaJIT-2.0.4/src/lj_def.h
 +#define LJ_MAX_UPVAL	249		/* Max. # of upvalues. */
  
  #define LJ_MAX_IDXCHAIN	100		/* __index/__newindex chain limit. */
- #define LJ_STACK_EXTRA	5		/* Extra stack space (metamethods). */
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lj_str.c LuaJIT-2.0.4/src/lj_str.c
---- LuaJIT-2.0.4.orig/src/lj_str.c	2015-05-14 20:30:00.000000000 +0200
-+++ LuaJIT-2.0.4/src/lj_str.c	2015-06-12 11:05:39.000000000 +0200
-@@ -90,6 +90,16 @@
+ #define LJ_STACK_EXTRA	(5+2*LJ_FR2)	/* Extra stack space (metamethods). */
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lj_str.c LuaJIT-2.1.0-beta1/src/lj_str.c
+--- LuaJIT-2.1.0-beta1.orig/src/lj_str.c	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lj_str.c	2015-09-04 08:51:52.000000000 +0200
+@@ -118,6 +118,16 @@
    g->strhash = newhash;
  }
  
@@ -30,7 +30,7 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lj_str.c LuaJIT-2.0.4/src/lj_str.c
  /* Intern a string and return string object. */
  GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
  {
-@@ -98,27 +108,44 @@
+@@ -126,27 +136,44 @@
    GCobj *o;
    MSize len = (MSize)lenx;
    MSize a, b, h = len;
@@ -92,9 +92,10 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lj_str.c LuaJIT-2.0.4/src/lj_str.c
    /* Check if the string has already been interned. */
    o = gcref(g->strhash[h & g->strmask]);
    if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
-diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lua.h LuaJIT-2.0.4/src/lua.h
---- LuaJIT-2.0.4.orig/src/lua.h	2015-06-12 11:04:08.000000000 +0200
-+++ LuaJIT-2.0.4/src/lua.h	2015-06-12 11:05:39.000000000 +0200
+Only in LuaJIT-2.1.0-beta1/src: lj_str.c.orig
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lua.h LuaJIT-2.1.0-beta1/src/lua.h
+--- LuaJIT-2.1.0-beta1.orig/src/lua.h	2015-09-04 08:42:39.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lua.h	2015-09-04 08:51:52.000000000 +0200
 @@ -103,6 +103,9 @@
  typedef LUA_INTEGER lua_Integer;
  
@@ -105,4 +106,3 @@ diff -ur -x lbitlib.c LuaJIT-2.0.4.orig/src/lua.h LuaJIT-2.0.4/src/lua.h
  
  /*
  ** state manipulation
-Only in LuaJIT-2.0.4/src: Makefile.orig
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-06-ppc-darwin b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-06-ppc-darwin
new file mode 100644
index 0000000000000000000000000000000000000000..17f7e4a1c08041d6258f1460dbea01cee50aa3df
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1-PATCHES/patch-06-ppc-darwin
@@ -0,0 +1,24 @@
+diff -ur LuaJIT-2.1.0-beta1.orig/src/host/buildvm.c LuaJIT-2.1.0-beta1/src/host/buildvm.c
+--- LuaJIT-2.1.0-beta1.orig/src/host/buildvm.c	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/host/buildvm.c	2015-09-04 09:03:00.000000000 +0200
+@@ -113,7 +113,7 @@
+       name[0] = '@';
+     else
+       *p = '\0';
+-#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
++#elif LJ_TARGET_PPC && !LJ_TARGET_OSX && !LJ_TARGET_CONSOLE
+     /* Keep @plt etc. */
+ #else
+     *p = '\0';
+diff -ur LuaJIT-2.1.0-beta1.orig/src/lj_arch.h LuaJIT-2.1.0-beta1/src/lj_arch.h
+--- LuaJIT-2.1.0-beta1.orig/src/lj_arch.h	2015-08-25 23:35:00.000000000 +0200
++++ LuaJIT-2.1.0-beta1/src/lj_arch.h	2015-09-04 08:58:07.000000000 +0200
+@@ -324,7 +324,7 @@
+ #if __GNUC__ < 4
+ #error "Need at least GCC 4.0 or newer"
+ #endif
+-#elif LJ_TARGET_ARM
++#elif LJ_TARGET_ARM || LJ_TARGET_PPC
+ #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
+ #error "Need at least GCC 4.2 or newer"
+ #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/COPYRIGHT b/source/libs/luajit/LuaJIT-2.1.0-beta1/COPYRIGHT
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/COPYRIGHT
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/COPYRIGHT
diff --git a/source/libs/luajit/LuaJIT-2.0.4/Makefile b/source/libs/luajit/LuaJIT-2.1.0-beta1/Makefile
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/Makefile
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/Makefile
index 0cbe741a424cb65797e7adf071ff96c862d81cdb..8ce773e3936ce075e33ba4a66834839a589a50c7 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/Makefile
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/Makefile
@@ -14,9 +14,10 @@
 ##############################################################################
 
 MAJVER=  2
-MINVER=  0
-RELVER=  4
-VERSION= $(MAJVER).$(MINVER).$(RELVER)
+MINVER=  1
+RELVER=  0
+PREREL=  -beta1
+VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
 ABIVER=  5.1
 
 ##############################################################################
@@ -83,8 +84,9 @@ FILE_SO= libluajit.so
 FILE_MAN= luajit.1
 FILE_PC= luajit.pc
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
-FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \
-	      dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua
+FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
+	      dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \
+	      dis_mips.lua dis_mipsel.lua vmdef.lua
 
 ifeq (,$(findstring Windows,$(OS)))
   ifeq (Darwin,$(shell uname -s))
@@ -109,7 +111,7 @@ install: $(INSTALL_DEP)
 	$(MKDIR) $(INSTALL_DIRS)
 	cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
 	cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
-	$(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+	$(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
 	cd src && test -f $(FILE_SO) && \
 	  $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
 	  $(LDCONFIG) $(INSTALL_LIB) && \
@@ -121,12 +123,18 @@ install: $(INSTALL_DEP)
 	  $(RM) $(FILE_PC).tmp
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
-	$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
+	@echo ""
+	@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
+	@echo "You can do this now by running this command (with sudo):"
+	@echo ""
+	@echo "  $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
+	@echo ""
+
 
 uninstall:
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
-	$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+	$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
 	for file in $(FILES_JITLIB); do \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  done
diff --git a/source/libs/luajit/LuaJIT-2.0.4/README b/source/libs/luajit/LuaJIT-2.1.0-beta1/README
similarity index 86%
rename from source/libs/luajit/LuaJIT-2.0.4/README
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/README
index 44366af516c484b544e8ad840c823b063ba3597e..ca70dd8eddf684a24e964e7272db336201005bfb 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/README
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/README
@@ -1,5 +1,5 @@
-README for LuaJIT 2.0.4
------------------------
+README for LuaJIT 2.1.0-beta1
+-----------------------------
 
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/bluequad-print.css b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/bluequad-print.css
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/bluequad-print.css
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/bluequad-print.css
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/bluequad.css b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/bluequad.css
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/bluequad.css
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/bluequad.css
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/changes.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/changes.html
similarity index 80%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/changes.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/changes.html
index d7b83ce68a6e4ec4e383b4cd9efccabccd969590..64dc4c2adc12b819f1b0b4e37e53dff17d0fa39e 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/changes.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/changes.html
@@ -44,6 +44,8 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -72,6 +74,59 @@ to see whether newer versions are available.
 </p>
 
 <div class="major" style="background: #d0d0ff;">
+<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 &mdash; 2015-08-25</h2>
+<p>
+This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
+Please take a look at the commit history for more details.
+</p>
+<ul>
+<li>Changes to the VM core:
+<ul>
+<li>Add low-overhead profiler (<tt>-jp</tt>).</li>
+<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li>
+<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li>
+<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li>
+<li>Parse binary number literals (<tt>0bxxx</tt>).</li>
+</ul></li>
+<li>Improvements to the JIT compiler:
+<ul>
+<li>Add trace stitching (disabled for now).</li>
+<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
+<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
+<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
+<li>Compile string concatenations (<tt>BC_CAT</tt>).</li>
+<li>Compile <tt>__concat</tt> metamethod.</li>
+<li>Various minor optimizations.</li>
+</ul></li>
+<li>Internal Changes:
+<ul>
+<li>Add support for embedding LuaJIT bytecode for builtins.</li>
+<li>Replace various builtins with embedded bytecode.</li>
+<li>Refactor string buffers and string formatting.</li>
+<li>Remove obsolete non-truncating number to integer conversions.</li>
+</ul></li>
+<li>Ports:
+<ul>
+<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li>
+<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li>
+<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li>
+<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
+<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
+<li>PPC/e500: Drop support for this architecture.</li>
+</ul></li>
+<li>FFI library:
+<ul>
+<li>FFI: Add 64 bit bitwise operations.</li>
+<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li>
+<li>FFI: Compile conversions from functions to function pointers.</li>
+<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li>
+<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
+<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
+</ul></li>
+</ul>
+</div>
+
+<div class="major" style="background: #ffffd0;">
 <h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 &mdash; 2015-05-14</h2>
 <ul>
 <li>Fix stack check in narrowing optimization.</li>
@@ -735,235 +790,6 @@ This matches the behavior of Lua 5.1, but not the specification.</li>
 no point in listing differences over earlier versions.</li>
 </ul>
 </div>
-
-<div class="major" style="background: #ffff80;">
-<h2 id="LuaJIT-1.1.8">LuaJIT 1.1.8 &mdash; 2012-04-16</h2>
-<ul>
-<li>Merged with Lua 5.1.5. Also integrated fixes for all
-<a href="http://www.lua.org/bugs.html#5.1.5"><span class="ext">&raquo;</span>&nbsp;<span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.5</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.7">LuaJIT 1.1.7 &mdash; 2011-05-05</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.6">LuaJIT 1.1.6 &mdash; 2010-03-28</h2>
-<ul>
-<li>Added fixes for the
-<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
-<li>Removed wrong GC check in <tt>jit_createstate()</tt>.
-Thanks to Tim Mensch.</li>
-<li>Fixed bad assertions while compiling <tt>table.insert()</tt> and
-<tt>table.remove()</tt>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
-<ul>
-<li>Merged with Lua 5.1.4. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
-<ul>
-<li>Merged with Lua 5.1.3. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
-<li>Fixed possible (but unlikely) stack corruption while compiling
-<tt>k^x</tt> expressions.</li>
-<li>Fixed DynASM template for cmpss instruction.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
-<ul>
-<li>Merged with Lua 5.1.2. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
-<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
-<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
-<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
-<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
-<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
-<li>The loadable debug modules now handle redirection to stdout
-(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
-<ul>
-<li>Fix MSVC inline assembly: use only local variables with
-<tt>lua_number2int()</tt>.</li>
-<li>Fix "attempt to call a thread value" bug on Mac OS X:
-make values of consts used as lightuserdata keys unique
-to avoid joining by the compiler/linker.</li>
-</ul>
-
-<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
-<ul>
-<li>Merged with Lua 5.1.1. Fixes all
-<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
-<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
-<li>Minor changes to DynASM: faster pre-processing, smaller encoding
-for some immediates.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.1 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-
-<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
-<ul>
-<li>Merged with Lua 5.1 (final).</li>
-
-<li>New JIT call frame setup:
-<ul>
-<li>The C stack is kept 16 byte aligned (faster).
-Mandatory for Mac OS X on Intel, too.</li>
-<li>Faster calling conventions for internal C helper functions.</li>
-<li>Better instruction scheduling for function prologue, OP_CALL and
-OP_RETURN.</li>
-</ul></li>
-
-<li>Miscellaneous optimizations:
-<ul>
-<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
-forwarding stalls.</li>
-<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
-and FP to integer conversions.</li>
-<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
-<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
-With better accuracy than the C variant, too.</li>
-<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
-use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
-</ul></li>
-
-<li>Changes in the optimizer:
-<ul>
-<li>Improved hinting for table keys derived from table values
-(<tt>t1[t2[x]]</tt>).</li>
-<li>Lookup hinting now works with arbitrary object types and
-supports index chains, too.</li>
-<li>Generate type hints for arithmetic and comparison operators,
-OP_LEN, OP_CONCAT and OP_FORPREP.</li>
-<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
-<li>Complete rewrite of <tt>jit.opt_inline</tt> module
-(ex <tt>jit.opt_lib</tt>).</li>
-</ul></li>
-
-<li>Use adaptive deoptimization:
-<ul>
-<li>If runtime verification of a contract fails, the affected
-instruction is recompiled and patched on-the-fly.
-Regular programs will trigger deoptimization only occasionally.</li>
-<li>This avoids generating code for uncommon fallback cases
-most of the time. Generated code is up to 30% smaller compared to
-LuaJIT&nbsp;1.0.3.</li>
-<li>Deoptimization is used for many opcodes and contracts:
-<ul>
-<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
-<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
-<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
-<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
-OP_FORPREP: operand type and range mismatches.</li>
-</ul></li>
-<li>Complete redesign of the debug and traceback info
-(bytecode &harr; mcode) to support deoptimization.
-Much more flexible and needs only 50% of the space.</li>
-<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
-<tt>jit.dump</tt> handle deoptimization.</li>
-</ul></li>
-
-<li>Inlined many popular library functions
-(for commonly used arguments only):
-<ul>
-<li>Most <tt>math.*</tt> functions (the 18 most used ones)
-[2x-10x faster].</li>
-<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
-[2x-10x faster].</li>
-<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
-[3x-5x faster].</li>
-<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
-[3x-5x faster].</li>
-<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
-[8x-15x faster].</li>
-</ul></li>
-
-<li>Changes in the core and loadable modules and the stand-alone executable:
-<ul>
-<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
-and <tt>jit.arch</tt>.</li>
-<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
-<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
-<li>New x86 disassembler module written in pure Lua. No dependency
-on ndisasm anymore. Flexible API, very compact (500 lines)
-and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
-<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
-on a separate line.</li>
-</ul></li>
-
-<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
-<li>Miscellaneous doc changes. Added a section about
-<a href="install.html#embedding">embedding LuaJIT</a>.</li>
-</ul>
-<p>
-This release is in sync with Coco 1.1.0 (see the
-<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
-</p>
-</div>
-
-<div class="major" style="background: #ffffd0;">
-<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
-<ul>
-<li>Even more docs.</li>
-<li>Unified closure checks in <tt>jit.*</tt>.</li>
-<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
-<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
-<li>Merged with Lua 5.1 alpha (including early bug fixes).</li>
-</ul>
-<p>
-This is the first public release of LuaJIT.
-</p>
-
-<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
-<ul>
-<li>Add support for flushing the Valgrind translation cache <br>
-(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
-<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
-variant for POSIX systems.</li>
-<li>Reorganized the C&nbsp;function signature handling in
-<tt>jit.opt_lib</tt>.</li>
-<li>Changed to index-based hints for inlining C&nbsp;functions.
-Still no support in the backend for inlining.</li>
-<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
-<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
-<li>Misc. changes to the Makefiles.</li>
-<li>Lots of new docs.</li>
-<li>Complete doc reorg.</li>
-</ul>
-<p>
-Not released because Lua 5.1 alpha came out today.
-</p>
-
-<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
-<ul>
-<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
-<li>Fix result handling for C &ndash;> JIT calls.</li>
-<li>Detect CPU feature bits.</li>
-<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
-<li>Add fallback instructions for FP compares.</li>
-<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
-<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
-(David Burgess).</li>
-<li>Misc. doc updates.</li>
-</ul>
-<p>
-Interim non-public release.
-Special thanks to Adam D. Moss for reporting most of the bugs.
-</p>
-
-<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
-<p>
-This is the initial non-public release of LuaJIT.
-</p>
-</div>
 <br class="flush">
 </div>
 <div id="foot">
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/contact.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/contact.html
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/contact.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/contact.html
index 0ef01a7c1597bc25fc49809d4d023135bde8d0e0..d92c3e38fb320d7155b9efb3a85a45eb795c5acd 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/contact.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/contact.html
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_c_api.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_c_api.html
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/ext_c_api.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_c_api.html
index 65981806da04cd9b33185b8b43ec004da181b32f..91dd9efbd84719c156945f09f9292c53da3e577d 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_c_api.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_c_api.html
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a class="current" href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi.html
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi.html
index 77b8e26c567e6521b699c3fa12ca61f42a62feb5..1ff22363484c57483a819ff0820d92360ccdb5ef 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi.html
@@ -41,6 +41,8 @@
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_api.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_api.html
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_api.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_api.html
index 8f577e9fea67e347ada405fc199a925f5e8710a9..b095c05ff3d7240aa16e436c8f01f2232c737a69 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_api.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_api.html
@@ -46,6 +46,8 @@ td.abiparam { font-weight: bold; width: 6em; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -466,6 +468,8 @@ otherwise. The following parameters are currently defined:
 <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
+<tr class="even">
+<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 
 <h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -542,8 +546,8 @@ corresponding ctype.
 The parser for Lua source code treats numeric literals with the
 suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
 integers. Case doesn't matter, but uppercase is recommended for
-readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal
-(<tt>0x2aLL</tt>) literals.
+readability. It handles decimal (<tt>42LL</tt>), hexadecimal
+(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
 </p>
 <p>
 The imaginary part of complex numbers can be specified by suffixing
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_semantics.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_semantics.html
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_semantics.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_semantics.html
index c267b555668e8f9ca9736f019b37a8ebe5b48110..889d44d8235bf05f47f8fc077cf52d30957532db 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_semantics.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_semantics.html
@@ -46,6 +46,8 @@ td.convop { font-style: italic; width: 40%; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -730,6 +732,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
 number (e.g. for regular floating-point calculations) with
 <tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 
+<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
+arithmetic operators apply analogously.<br>
+
+Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
+converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
+returns a Lua number.<br>
+
+For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
+conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
+<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
+
+For all other operations, only the first argument is used to determine
+the output type. This implies that a cdata number as a shift count for
+shifts and rotates is accepted, but that alone does <em>not</em> cause
+a cdata number output.
+
 </ul>
 
 <h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1205,9 +1223,8 @@ suboptimal performance, especially when used in inner loops:
 <li>Vector operations.</li>
 <li>Table initializers.</li>
 <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
-<li>Allocations of variable-length arrays or structs.</li>
-<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an
-alignment &gt; 8&nbsp;bytes.</li>
+<li>Non-default initialization of VLA/VLS or large C&nbsp;types
+(&gt; 128&nbsp;bytes or &gt; 16 array elements.</li>
 <li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
 <li>Pointer differences for element sizes that are not a power of
 two.</li>
@@ -1224,7 +1241,6 @@ value.</li>
 Other missing features:
 </p>
 <ul>
-<li>Bit operations for 64&nbsp;bit types.</li>
 <li>Arithmetic for <tt>complex</tt> numbers.</li>
 <li>Passing structs by value to vararg C&nbsp;functions.</li>
 <li><a href="extensions.html#exceptions">C++ exception interoperability</a>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_tutorial.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_tutorial.html
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_tutorial.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_tutorial.html
index 8f99bfb1bbd24f6108353b983753d48a941c25a3..e3f01460a803b9e443dd735fdb9c96c556fc9398 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_ffi_tutorial.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_ffi_tutorial.html
@@ -48,6 +48,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_jit.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_jit.html
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/ext_jit.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_jit.html
index 27351dd64b8382d5dce9b1156abb789208ba82af..a569dd570e46b82a3824d9731ca3ed4d04088639 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/ext_jit.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_jit.html
@@ -41,6 +41,8 @@
 <a class="current" href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -151,7 +153,7 @@ Contains the target OS name:
 <h3 id="jit_arch"><tt>jit.arch</tt></h3>
 <p>
 Contains the target architecture name:
-"x86", "x64", "arm", "ppc", "ppcspe", or "mips".
+"x86", "x64", "arm", "ppc", or "mips".
 </p>
 
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_profiler.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_profiler.html
new file mode 100644
index 0000000000000000000000000000000000000000..be63662b7553c2a91aaa54faf68c83f65ed945c2
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/ext_profiler.html
@@ -0,0 +1,365 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Profiler</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2015, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Profiler</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a class="current" href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT has an integrated statistical profiler with very low overhead. It
+allows sampling the currently executing stack and other parameters in
+regular intervals.
+</p>
+<p>
+The integrated profiler can be accessed from three levels:
+</p>
+<ul>
+<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
+<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
+<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
+<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
+</ul>
+
+<h2 id="hl_profiler">High-Level Profiler</h2>
+<p>
+The bundled high-level profiler offers basic profiling functionality. It
+generates simple textual summaries or source code annotations. It can be
+accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
+or from Lua code by loading the underlying <tt>jit.p</tt> module.
+</p>
+<p>
+To cut to the chase &mdash; run this to get a CPU usage profile by
+function name:
+</p>
+<pre class="code">
+luajit -jp myapp.lua
+</pre>
+<p>
+It's <em>not</em> a stated goal of the bundled profiler to add every
+possible option or to cater for special profiling needs. The low-level
+profiler APIs are documented below. They may be used by third-party
+authors to implement advanced functionality, e.g. IDE integration or
+graphical profilers.
+</p>
+<p>
+Note: Sampling works for both interpreted and JIT-compiled code. The
+results for JIT-compiled code may sometimes be surprising. LuaJIT
+heavily optimizes and inlines Lua code &mdash; there's no simple
+one-to-one correspondence between source code lines and the sampled
+machine code.
+</p>
+
+<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
+<p>
+The <tt>-jp</tt> command line option starts the high-level profiler.
+When the application run by the command line terminates, the profiler
+stops and writes the results to <tt>stdout</tt> or to the specified
+<tt>output</tt> file.
+</p>
+<p>
+The <tt>options</tt> argument specifies how the profiling is to be
+performed:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
+This is the default mode.</li>
+<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
+<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
+caller). Default: 1.</li>
+<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
+&rarr; callee).</li>
+<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
+depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
+<li><tt>p</tt> &mdash; Show full path for module names.</li>
+<li><tt>v</tt> &mdash; Show VM states.</li>
+<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
+<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
+<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
+<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
+<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
+<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
+Default: 3%.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
+Default: 10ms.<br>
+Note: The actual sampling precision is OS-dependent.</li>
+</ul>
+<p>
+The default output for <tt>-jp</tt> is a list of the most CPU consuming
+spots in the application. Increasing the stack dump depth with (say)
+<tt>-jp=2</tt> may help to point out the main callers or callees of
+hotspots. But sample aggregation is still flat per unique stack dump.
+</p>
+<p>
+To get a two-level view (split view) of callers/callees, use
+<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
+level are relative to the first level.
+</p>
+<p>
+To see how much time is spent in each line relative to a function, use
+<tt>-jp=fl</tt>.
+</p>
+<p>
+To see how much time is spent in different VM states or
+<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
+</p>
+<p>
+Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
+views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
+spent in a VM state or zone vs. hotspots. This can be used to answer
+questions like "Which time consuming functions are only interpreted?" or
+"What's the garbage collector overhead for a specific function?".
+</p>
+<p>
+Multiple options can be combined &mdash; but not all combinations make
+sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
+deep in 4ms intervals and shows a split view of the CPU consuming
+functions and their callers with a 1% threshold.
+</p>
+<p>
+Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
+always flat and at the line level. Obviously, the source code files need
+to be readable by the profiler script.
+</p>
+<p>
+The high-level profiler can also be started and stopped from Lua code with:
+</p>
+<pre class="code">
+require("jit.p").start(options, output)
+...
+require("jit.p").stop()
+</pre>
+
+<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
+<p>
+Zones can be used to provide information about different parts of an
+application to the high-level profiler. E.g. a game could make use of an
+<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
+organized as a stack.
+</p>
+<p>
+The <tt>jit.zone</tt> module needs to be loaded explicitly:
+</p>
+<pre class="code">
+local zone = require("jit.zone")
+</pre>
+<ul>
+<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
+<li><tt>zone()</tt> pops the current zone from the zone stack and
+returns its name.</li>
+<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
+<li><tt>zone:flush()</tt> flushes the zone stack.</li>
+</ul>
+<p>
+To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
+spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
+</p>
+
+<h2 id="ll_lua_api">Low-level Lua API</h2>
+<p>
+The <tt>jit.profile</tt> module gives access to the low-level API of the
+profiler from Lua code. This module needs to be loaded explicitly:
+<pre class="code">
+local profile = require("jit.profile")
+</pre>
+<p>
+This module can be used to implement your own higher-level profiler.
+A typical profiling run starts the profiler, captures stack dumps in
+the profiler callback, adds them to a hash table to aggregate the number
+of samples, stops the profiler and then analyzes all of the captured
+stack dumps. Other parameters can be sampled in the profiler callback,
+too. But it's important not to spend too much time in the callback,
+since this may skew the statistics.
+</p>
+
+<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. The <tt>mode</tt> argument is a
+string holding options:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
+<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
+10ms).</br>
+Note: The actual sampling precision is OS-dependent.
+</li>
+</ul>
+<p>
+The <tt>cb</tt> argument is a callback function which is called with
+three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
+called on a separate coroutine, the <tt>thread</tt> argument is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify the stack of that state or call functions on it.
+</p>
+<p>
+<tt>samples</tt> gives the number of accumulated samples since the last
+callback (usually 1).
+</p>
+<p>
+<tt>vmstate</tt> holds the VM state at the time the profiling timer
+triggered. This may or may not correspond to the state of the VM when
+the profiling callback is called. The state is either <tt>'N'</tt>
+native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
+C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
+compiler.
+</p>
+
+<h3 id="profile_stop"><tt>profile.stop()</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner. It
+returns a string with a stack dump for the <tt>thread</tt> (coroutine),
+formatted according to the <tt>fmt</tt> argument:
+</p>
+<ul>
+<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise
+only the file name is used.</li>
+<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise
+use module:line.</li>
+<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Dump module:line.</li>
+<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
+frame.</li>
+<li>All other characters are added verbatim to the output string.</li>
+</ul>
+<p>
+The <tt>depth</tt> argument gives the number of frames to dump, starting
+at the topmost frame of the thread. A negative number dumps the frames in
+inverse order.
+</p>
+<p>
+The first example prints a list of the current module names and line
+numbers of up to 10 frames in separate lines. The second example prints
+semicolon-separated function names for all frames (up to 100) in inverse
+order:
+</p>
+<pre class="code">
+print(profile.dumpstack(thread, "l\n", 10))
+print(profile.dumpstack(thread, "lZ;", -100))
+</pre>
+
+<h2 id="ll_c_api">Low-level C API</h2>
+<p>
+The profiler can be controlled directly from C&nbsp;code, e.g. for
+use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
+<a href="ext_c_api.html">Lua/C API</a> extensions).
+</p>
+
+<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. <a href="#profile_start">See
+above</a> for a description of the <tt>mode</tt> argument.
+</p>
+<p>
+The <tt>cb</tt> argument is a callback function with the following
+declaration:
+</p>
+<pre class="code">
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+                                        int samples, int vmstate);
+</pre>
+<p>
+<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify this stack or call functions on this stack &mdash;
+use a separate coroutine for this purpose. <a href="#profile_start">See
+above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
+</p>
+
+<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner.
+<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
+and <tt>depth</tt>.
+</p>
+<p>
+This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
+private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
+argument returns the length of the output string. The buffer is
+overwritten on the next call and deallocated when the profiler stops.
+You either need to consume the content immediately or copy it for later
+use.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2015 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/extensions.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/extensions.html
similarity index 89%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/extensions.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/extensions.html
index e9cd136cc4f653e197b46b598b41120dfe4461d3..84ca5ce46bf886e7d491a4475377a3efea024002 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/extensions.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/extensions.html
@@ -58,6 +58,8 @@ td.excinterop {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -113,6 +115,9 @@ bit.lshift bit.rshift bit.arshift bit.rol  bit.ror  bit.bswap
 This module is a LuaJIT built-in &mdash; you don't need to download or
 install Lua BitOp. The Lua BitOp site has full documentation for all
 <a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+The FFI adds support for
+<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
+using the same API functions.
 </p>
 <p>
 Please make sure to <tt>require</tt> the module before using any of
@@ -146,6 +151,11 @@ LuaJIT adds some
 <a href="ext_c_api.html">extra functions to the Lua/C API</a>.
 </p>
 
+<h3 id="profiler">Profiler</h3>
+<p>
+LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
+</p>
+
 <h2 id="library">Enhanced Standard Library Functions</h2>
 
 <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -173,7 +183,7 @@ in <tt>"-inf"</tt>.
 <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
 <p>
 All string-to-number conversions consistently convert integer and
-floating-point inputs in decimal and hexadecimal on all platforms.
+floating-point inputs in decimal, hexadecimal and binary on all platforms.
 <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
 problems with poor C library implementations. The builtin conversion
 function provides full precision according to the IEEE-754 standard, it
@@ -197,6 +207,36 @@ for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
 minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
 bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
 </p>
+<p>
+Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
+a different, incompatible bytecode format for ports that use this mode (e.g.
+ARM64). This may be rectified in the future.
+</p>
+
+<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
+<p>
+An extra library function <tt>table.new()</tt> can be made available via
+<tt>require("table.new")</tt>. This creates a pre-sized table, just like
+the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
+tables if the final table size is known and automatic table resizing is
+too expensive.
+</p>
+
+<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
+<p>
+An extra library function <tt>table.clear()</tt> can be made available
+via <tt>require("table.clear")</tt>. This clears all keys and values
+from a table, but preserves the allocated array/hash sizes. This is
+useful when a table, which is linked from multiple places, needs to be
+cleared and/or when recycling a table for use by the same context. This
+avoids managing backlinks, saves an allocation and the overhead of
+incremental array/hash part growth.
+</p>
+<p>
+Please note this function is meant for very specific situations. In most
+cases it's better to replace the (usually single) link with a new table
+and let the GC do its work.
+</p>
 
 <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
 <p>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/faq.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/faq.html
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/faq.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/faq.html
index 9902f0903c0850c95f76f7ac2675cec4b04786ab..aebaef579a4a872389095805675c6209ba92eb93 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/faq.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/faq.html
@@ -44,6 +44,8 @@ dd { margin-left: 1.5em; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/img/contact.png b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/img/contact.png
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/img/contact.png
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/img/contact.png
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/install.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/install.html
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/install.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/install.html
index 7a878b1cf8f4683ba197db3eb50321c7071d96a2..b5df697b6709cf6869cbde799b60df3878bb6d1f 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/install.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/install.html
@@ -69,6 +69,8 @@ td.compatno {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -122,7 +124,7 @@ operating systems, CPUs and compilers:
 <td class="compatos">GCC 4.x</td>
 <td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td>
 <td class="compatos">GCC 4.x</td>
-<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0</td>
+<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
 </tr>
 <tr class="odd">
 <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
@@ -132,18 +134,18 @@ operating systems, CPUs and compilers:
 <td class="compatos compatno">&nbsp;</td>
 </tr>
 <tr class="even">
-<td class="compatcpu"><a href="#cross2">PPC</a></td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
+<td class="compatcpu"><a href="#cross2">ARM64</a></td>
+<td class="compatos">GCC 4.8+</td>
+<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">Clang 3.5+</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 <tr class="odd">
-<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td>
-<td class="compatos">GCC 4.3+</td>
+<td class="compatcpu"><a href="#cross2">PPC</a></td>
 <td class="compatos">GCC 4.3+</td>
+<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
 <td class="compatos compatno">&nbsp;</td>
-<td class="compatos compatno">&nbsp;</td>
+<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
 </tr>
 <tr class="even">
 <td class="compatcpu"><a href="#cross2">MIPS</a></td>
@@ -383,10 +385,11 @@ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
 # ARM hard-float ABI with VFP (armhf, requires recent toolchain)
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 
+# ARM64 (requires x64 host)
+make CROSS=aarch64-linux-
+
 # PPC
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-# PPC/e500v2 (fast interpreter only)
-make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
 
 # MIPS big-endian
 make HOST_CC="gcc -m32" CROSS=mips-linux-
@@ -513,6 +516,16 @@ the following commands:
 cd src
 xedkbuild
 </pre>
+<p>
+To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host,
+open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
+<tt>cd</tt> to the directory where you've unpacked the sources and run
+the following commands:
+</p>
+<pre class="code">
+cd src
+xb1build
+</pre>
 
 <h2 id="embed">Embedding LuaJIT</h2>
 <p>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/luajit.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/luajit.html
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/luajit.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/luajit.html
index 45507c122b494bbc1c31446e5258c6058b174f8d..8a653e2d1a147d707c5ab1e7e6931410470936fa 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/luajit.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/luajit.html
@@ -126,6 +126,8 @@ table.feature small {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -164,13 +166,13 @@ LuaJIT is Copyright &copy; 2005-2015 Mike Pall, released under the
 <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
 </table>
 <table class="feature os os3">
-<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr>
+<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
 </table>
 <table class="feature compiler">
 <tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
 </table>
 <table class="feature cpu">
-<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr>
+<tr><td>x86</td><td>x64</td><td>ARM</td><td>ARM64</td><td>PPC</td><td>MIPS</td></tr>
 </table>
 <table class="feature fcompat">
 <tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/running.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/running.html
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/running.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/running.html
index c6e1c296ebad79fc8614f285187cc3e443b1733d..5ee67c9587c5f12bdfcd868b85b46ed47387693a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/running.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/running.html
@@ -63,6 +63,8 @@ td.param_default {
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a href="status.html">Status</a>
@@ -178,6 +180,7 @@ Here are the available LuaJIT control commands:
 <li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
 <li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
 <li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
+<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
 </ul>
 <p>
 The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
diff --git a/source/libs/luajit/LuaJIT-2.0.4/doc/status.html b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/status.html
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/doc/status.html
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/doc/status.html
index b4bbec793ed5223630f0aa6d87fa0a2c93bdc2e2..91ed9cba33af2969a0a93487e9ca5e6a55598506 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/doc/status.html
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/doc/status.html
@@ -44,6 +44,8 @@ ul li { padding-bottom: 0.3em; }
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
 <a class="current" href="status.html">Status</a>
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_arm.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_arm.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_arm.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm.lua
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_arm.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm.lua
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm64.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm64.h
new file mode 100644
index 0000000000000000000000000000000000000000..d912e61dda712bf7637195a5515b7ef027063dd1
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm64.h
@@ -0,0 +1,518 @@
+/*
+** DynASM ARM64 encoding engine.
+** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"arm64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals (bias -10). */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  int i;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  for (i = 0; i < maxsection; i++) {
+    D->sections[i].buf = NULL;  /* Need this for pass3. */
+    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+    D->sections[i].bsize = 0;
+    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
+  }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+  if ((n >> 12) == 0)
+    return n;
+  else if ((n & 0xff000fff) == 0)
+    return (n >> 12) | 0x1000;
+  else
+    return -1;
+}
+
+static int dasm_ffs(unsigned long long x)
+{
+  int n = -1;
+  while (x) { x >>= 1; n++; }
+  return n;
+}
+
+static int dasm_imm13(int lo, int hi)
+{
+  int inv = 0, w = 64, s = 0xfff, xa, xb;
+  unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
+  unsigned long long m = 1ULL, a, b, c;
+  if (n & 1) { n = ~n; inv = 1; }
+  a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
+  xa = dasm_ffs(a); xb = dasm_ffs(b);
+  if (c) {
+    w = dasm_ffs(c) - xa;
+    if (w == 32) m = 0x0000000100000001UL;
+    else if (w == 16) m = 0x0001000100010001UL;
+    else if (w == 8) m = 0x0101010101010101UL;
+    else if (w == 4) m = 0x1111111111111111UL;
+    else if (w == 2) m = 0x5555555555555555UL;
+    else return -1;
+    s = (-2*w & 0x3f) - 1;
+  } else if (!a) {
+    return -1;
+  } else if (xb == -1) {
+    xb = 64;
+  }
+  if ((b-a) * m != n) return -1;
+  if (inv) {
+    return ((w - xb) << 6) | (s+w+xa-xb);
+  } else {
+    return ((w - xa) << 6) | (s+xb-xa);
+  }
+  return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 16);
+    if (action >= DASM__MAX) {
+      ofs += 4;
+    } else {
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+	}
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if ((ins & 0x8000))
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      case DASM_IMM6:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM12:
+	CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13W:
+	CK(dasm_imm13(n, n) != -1, RANGE_I);
+	b[pos++] = n;
+	break;
+      case DASM_IMM13X: {
+	int m = va_arg(ap, int);
+	CK(dasm_imm13(n, m) != -1, RANGE_I);
+	b[pos++] = n;
+	b[pos++] = m;
+	break;
+	}
+      case DASM_IMML: {
+#ifdef DASM_CHECKS
+	int scale = (p[-2] >> 30);
+	CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
+	   (unsigned int)(n+256) < 512, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+	}
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: p++; break;
+	case DASM_REL_EXT: break;
+	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+	case DASM_IMML: pos++; break;
+	case DASM_IMM13X: pos += 2; break;
+	}
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	unsigned int action = (ins >> 16);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+	  goto patchrel;
+	case DASM_ALIGN:
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+	  break;
+	case DASM_REL_LG:
+	  CK(n >= 0, UNDEF_LG);
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+	patchrel:
+	  if (!(ins & 0xf800)) {  /* B, BL */
+	    CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
+	    cp[-1] |= ((n >> 2) & 0x03ffffff);
+	  } else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
+	    CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0);
+	  } else if ((ins & 0x3000) == 0x2000) {  /* ADR */
+	    CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
+	  } else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
+	    cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
+	  } else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
+	    CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 3) & 0x0007ffe0);
+	  }
+	  break;
+	case DASM_LABEL_LG:
+	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+	  break;
+	case DASM_IMM6:
+	  cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
+	  break;
+	case DASM_IMM12:
+	  cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
+	  break;
+	case DASM_IMM13W:
+	  cp[-1] |= (dasm_imm13(n, n) << 10);
+	  break;
+	case DASM_IMM13X:
+	  cp[-1] |= (dasm_imm13(n, *b++) << 10);
+	  break;
+	case DASM_IMML: {
+	  int scale = (p[-2] >> 30);
+	  cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
+	    ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+	  break;
+	  }
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+  return D->status;
+}
+#endif
+
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm64.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm64.lua
new file mode 100644
index 0000000000000000000000000000000000000000..9766e475b0faa55e1f91eddd44cef26471f93c89
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_arm64.lua
@@ -0,0 +1,1166 @@
+------------------------------------------------------------------------------
+-- DynASM ARM64 module.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+  arch =	"arm",
+  description =	"DynASM ARM64 module",
+  version =	"1.3.0",
+  vernum =	 10300,
+  release =	"2014-12-03",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x10000 + (val or 0))
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if n <= 0x000fffff then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  if n <= 0x000fffff then
+    insert(actlist, pos+1, n)
+    n = map_action.ESC * 0x10000
+  end
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+
+local map_extend = {
+  uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+  sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+}
+
+local map_cond = {
+  eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+  hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+  hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+local parse_reg_type
+
+local function parse_reg(expr)
+  if not expr then werror("expected register name") end
+  local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
+      if not parse_reg_type then
+	parse_reg_type = rt
+      elseif parse_reg_type ~= rt then
+	werror("register size mismatch")
+      end
+      return r, tp
+    end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_reg_base(expr)
+  if expr == "sp" then return 0x3e0 end
+  local base, tp = parse_reg(expr)
+  if parse_reg_type ~= "x" then werror("bad register type") end
+  parse_reg_type = false
+  return shl(base, 5), tp
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+	local s = sar(m, bits-1)
+	if s == 0 then return shl(m, shift)
+	elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+	if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+    return 0
+  end
+end
+
+local function parse_imm12(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if shr(n, 12) == 0 then
+      return shl(n, 10)
+    elseif band(n, 0xff000fff) == 0 then
+      return shr(n, 2) + 0x00400000
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM12", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm13(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  local r64 = parse_reg_type == "x"
+  if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+    local inv = false
+    if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+    local t = {}
+    for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+    local b = table.concat(t)
+    b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+    local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+    if p0 then
+      local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+      if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+	local s = band(-2*w, 0x3f) - 1
+	if w == 64 then s = s + 0x1000 end
+	if inv then
+	  return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+	else
+	  return shl(w-#p0, 16) + shl(s+#p1, 10)
+	end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif r64 then
+    waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+    actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+    return 0
+  else
+    waction("IMM13W", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm6(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    if n >= 0 and n <= 63 then
+      return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMM6", 0, imm)
+    return 0
+  end
+end
+
+local function parse_imm_load(imm, scale)
+  local n = parse_number(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+      return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+    elseif n >= -256 and n < 256 then
+      return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    waction("IMML", 0, imm)
+    return 0
+  end
+end
+
+local function parse_fpimm(imm)
+  imm = match(imm, "^#(.*)$")
+  if not imm then werror("expected immediate operand") end
+  local n = parse_number(imm)
+  if n then
+    local m, e = math.frexp(n)
+    local s, e2 = 0, band(e-2, 7)
+    if m < 0 then m = -m; s = 0x00100000 end
+    m = m*32-16
+    if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+      return s + shl(e2, 17) + shl(m, 13)
+    end
+    werror("out of range immediate `"..imm.."'")
+  else
+    werror("NYI fpimm action")
+  end
+end
+
+local function parse_shift(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  s = map_shift[s]
+  if not s then werror("expected shift operand") end
+  return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+end
+
+local function parse_lslx16(expr)
+  local n = match(expr, "^lsl%s*#(%d+)$")
+  n = tonumber(n)
+  if not n then werror("expected shift operand") end
+  if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+    werror("bad shift amount")
+  end
+  return shl(n, 17)
+end
+
+local function parse_extend(expr)
+  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+  if s == "lsl" then
+    s = parse_reg_type == "x" and 3 or 2
+  else
+    s = map_extend[s]
+  end
+  if not s then werror("expected extend operand") end
+  return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
+end
+
+local function parse_cond(expr, inv)
+  local c = map_cond[expr]
+  if not c then werror("expected condition operand") end
+  return shl(bit.bxor(c, inv), 12)
+end
+
+local function parse_load(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMML", 0, format(tp.ctypefmt, tailr))
+	  return op + base
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  local scale = shr(op, 30)
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+  elseif wb == "!" then
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if not p1a then werror("bad use of '!'") end
+    op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+    op = op + parse_reg_base(p1a)
+    if p2a ~= "" then
+      local imm = match(p2a, "^,%s*#(.*)$")
+      if imm then
+	op = op + parse_imm_load(imm, scale)
+      else
+	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+	op = op + shl(parse_reg(p2b), 16) + 0x00200800
+	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+	  werror("bad index register type")
+	end
+	if p3b == "" then
+	  if parse_reg_type ~= "x" then werror("bad index register type") end
+	  op = op + 0x6000
+	else
+	  if p3s == "" or p3s == "#0" then
+	  elseif p3s == "#"..scale then
+	    op = op + 0x1000
+	  else
+	    werror("bad scale")
+	  end
+	  if parse_reg_type == "x" then
+	    if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+	    elseif p3b == "sxtx" then op = op + 0xe000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  else
+	    if p3b == "uxtw" then op = op + 0x4000
+	    elseif p3b == "sxtw" then op = op + 0xc000
+	    else
+	      werror("bad extend/shift specifier")
+	    end
+	  end
+	end
+      end
+    else
+      if wb == "!" then werror("bad use of '!'") end
+      op = op + 0x01000000
+    end
+  end
+  return op
+end
+
+local function parse_load_pair(params, nparams, n, op)
+  if params[n+2] then werror("too many operands") end
+  local pn, p2 = params[n], params[n+1]
+  local scale = shr(op, 30) == 0 and 2 or 3
+  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+  if not p1 then
+    if not p2 then
+      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+      if reg and tailr ~= "" then
+	local base, tp = parse_reg_base(reg)
+	if tp then
+	  waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+	  return op + base + 0x01000000
+	end
+      end
+    end
+    werror("expected address operand")
+  end
+  if p2 then
+    if wb == "!" then werror("bad use of '!'") end
+    op = op + 0x00800000
+  else
+    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+    if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+    op = op + (wb == "!" and 0x01800000 or 0x01000000)
+  end
+  return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+local function branch_type(op)
+  if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+  elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+	 band(op, 0x3b000000) == 0x18000000 then
+    return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+  elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+  elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+  elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+  else
+    assert(false, "unknown branch type")
+  end
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
+local function alias_bfx(p)
+  p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+end
+
+local function alias_bfiz(p)
+  parse_reg(p[1])
+  if parse_reg_type == "w" then
+    p[3] = "#-("..p[3]:sub(2)..")%32"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  else
+    p[3] = "#-("..p[3]:sub(2)..")%64"
+    p[4] = "#("..p[4]:sub(2)..")-1"
+  end
+end
+
+local alias_lslimm = op_alias("ubfm_4", function(p)
+  parse_reg(p[1])
+  local sh = p[3]:sub(2)
+  if parse_reg_type == "w" then
+    p[3] = "#-("..sh..")%32"
+    p[4] = "#31-("..sh..")"
+  else
+    p[3] = "#-("..sh..")%64"
+    p[4] = "#63-("..sh..")"
+  end
+end)
+
+-- Template strings for ARM instructions.
+map_op = {
+  -- Basic data processing instructions.
+  add_3  = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
+  add_4  = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
+  adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
+  adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
+  cmn_2  = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
+  cmn_3  = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
+
+  sub_3  = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
+  sub_4  = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
+  subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
+  subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
+  cmp_2  = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
+  cmp_3  = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
+
+  neg_2  = "4b0003e0DMg",
+  neg_3  = "4b0003e0DMSg",
+  negs_2 = "6b0003e0DMg",
+  negs_3 = "6b0003e0DMSg",
+
+  adc_3  = "1a000000DNMg",
+  adcs_3 = "3a000000DNMg",
+  sbc_3  = "5a000000DNMg",
+  sbcs_3 = "7a000000DNMg",
+  ngc_2  = "5a0003e0DMg",
+  ngcs_2 = "7a0003e0DMg",
+
+  and_3  = "0a000000DNMg|12000000pDNig",
+  and_4  = "0a000000DNMSg",
+  orr_3  = "2a000000DNMg|32000000pDNig",
+  orr_4  = "2a000000DNMSg",
+  eor_3  = "4a000000DNMg|52000000pDNig",
+  eor_4  = "4a000000DNMSg",
+  ands_3 = "6a000000DNMg|72000000DNig",
+  ands_4 = "6a000000DNMSg",
+  tst_2  = "6a00001fNMg|7200001fNig",
+  tst_3  = "6a00001fNMSg",
+
+  bic_3  = "0a200000DNMg",
+  bic_4  = "0a200000DNMSg",
+  orn_3  = "2a200000DNMg",
+  orn_4  = "2a200000DNMSg",
+  eon_3  = "4a200000DNMg",
+  eon_4  = "4a200000DNMSg",
+  bics_3 = "6a200000DNMg",
+  bics_4 = "6a200000DNMSg",
+
+  movn_2 = "12800000DWg",
+  movn_3 = "12800000DWRg",
+  movz_2 = "52800000DWg",
+  movz_3 = "52800000DWRg",
+  movk_2 = "72800000DWg",
+  movk_3 = "72800000DWRg",
+
+  -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+  mov_2  = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+  mov_3  = "2a0003e0DMSg",
+  mvn_2  = "2a2003e0DMg",
+  mvn_3  = "2a2003e0DMSg",
+
+  adr_2  = "10000000DBx",
+  adrp_2 = "90000000DBx",
+
+  csel_4  = "1a800000DNMCg",
+  csinc_4 = "1a800400DNMCg",
+  csinv_4 = "5a800000DNMCg",
+  csneg_4 = "5a800400DNMCg",
+  cset_2  = "1a9f07e0Dcg",
+  csetm_2 = "5a9f03e0Dcg",
+  cinc_3  = "1a800400DNmcg",
+  cinv_3  = "5a800000DNmcg",
+  cneg_3  = "5a800400DNmcg",
+
+  ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+  ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+
+  madd_4 = "1b000000DNMAg",
+  msub_4 = "1b008000DNMAg",
+  mul_3  = "1b007c00DNMg",
+  mneg_3 = "1b00fc00DNMg",
+
+  smaddl_4 = "9b200000DxNMwAx",
+  smsubl_4 = "9b208000DxNMwAx",
+  smull_3  = "9b207c00DxNMw",
+  smnegl_3 = "9b20fc00DxNMw",
+  smulh_3  = "9b407c00DNMx",
+  umaddl_4 = "9ba00000DxNMwAx",
+  umsubl_4 = "9ba08000DxNMwAx",
+  umull_3  = "9ba07c00DxNMw",
+  umnegl_3 = "9ba0fc00DxNMw",
+  umulh_3  = "9bc07c00DNMx",
+
+  udiv_3 = "1ac00800DNMg",
+  sdiv_3 = "1ac00c00DNMg",
+
+  -- Bit operations.
+  sbfm_4 = "13000000DN12w|93400000DN12x",
+  bfm_4  = "33000000DN12w|b3400000DN12x",
+  ubfm_4 = "53000000DN12w|d3400000DN12x",
+  extr_4 = "13800000DNM2w|93c00000DNM2x",
+
+  sxtb_2 = "13001c00DNw|93401c00DNx",
+  sxth_2 = "13003c00DNw|93403c00DNx",
+  sxtw_2 = "93407c00DxNw",
+  uxtb_2 = "53001c00DNw",
+  uxth_2 = "53003c00DNw",
+
+  sbfx_4  = op_alias("sbfm_4", alias_bfx),
+  bfxil_4 = op_alias("bfm_4", alias_bfx),
+  ubfx_4  = op_alias("ubfm_4", alias_bfx),
+  sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+  bfi_4   = op_alias("bfm_4", alias_bfiz),
+  ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+
+  lsl_3  = function(params, nparams)
+    if params and params[3]:byte() == 35 then
+      return alias_lslimm(params, nparams)
+    else
+      return op_template(params, "1ac02000DNMg", nparams)
+    end
+  end,
+  lsr_3  = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+  asr_3  = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+  ror_3  = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+
+  clz_2   = "5ac01000DNg",
+  cls_2   = "5ac01400DNg",
+  rbit_2  = "5ac00000DNg",
+  rev_2   = "5ac00800DNw|dac00c00DNx",
+  rev16_2 = "5ac00400DNg",
+  rev32_2 = "dac00800DNx",
+
+  -- Loads and stores.
+  ["strb_*"]  = "38000000DwL",
+  ["ldrb_*"]  = "38400000DwL",
+  ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+  ["strh_*"]  = "78000000DwL",
+  ["ldrh_*"]  = "78400000DwL",
+  ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+  ["str_*"]   = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+  ["ldr_*"]   = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+  ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+  -- NOTE: ldur etc. are handled by ldr et al.
+
+  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+  ["ldpsw_*"] = "68400000DAxP",
+
+  -- Branches.
+  b_1    = "14000000B",
+  bl_1   = "94000000B",
+  blr_1  = "d63f0000Nx",
+  br_1   = "d61f0000Nx",
+  ret_0  = "d65f03c0",
+  ret_1  = "d65f0000Nx",
+  -- b.cond is added below.
+  cbz_2  = "34000000DBg",
+  cbnz_2 = "35000000DBg",
+  tbz_3  = "36000000DTBw|36000000DTBx",
+  tbnz_3 = "37000000DTBw|37000000DTBx",
+
+  -- Miscellaneous instructions.
+  -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+  -- TODO: sys, sysl, ic, dc, at, tlbi
+  -- TODO: hint, yield, wfe, wfi, sev, sevl
+  -- TODO: clrex, dsb, dmb, isb
+  nop_0  = "d503201f",
+  brk_0  = "d4200000",
+  brk_1  = "d4200000W",
+
+  -- Floating point instructions.
+  fmov_2  = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+  fabs_2  = "1e20c000DNf",
+  fneg_2  = "1e214000DNf",
+  fsqrt_2 = "1e21c000DNf",
+
+  fcvt_2  = "1e22c000DdNs|1e624000DsNd",
+
+  -- TODO: half-precision and fixed-point conversions.
+  fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+  fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+  fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+  fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+  fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+  fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+  fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+  fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+  fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+  fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+
+  scvtf_2  = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+  ucvtf_2  = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+
+  frintn_2 = "1e244000DNf",
+  frintp_2 = "1e24c000DNf",
+  frintm_2 = "1e254000DNf",
+  frintz_2 = "1e25c000DNf",
+  frinta_2 = "1e264000DNf",
+  frintx_2 = "1e274000DNf",
+  frinti_2 = "1e27c000DNf",
+
+  fadd_3   = "1e202800DNMf",
+  fsub_3   = "1e203800DNMf",
+  fmul_3   = "1e200800DNMf",
+  fnmul_3  = "1e208800DNMf",
+  fdiv_3   = "1e201800DNMf",
+
+  fmadd_4  = "1f000000DNMAf",
+  fmsub_4  = "1f008000DNMAf",
+  fnmadd_4 = "1f200000DNMAf",
+  fnmsub_4 = "1f208000DNMAf",
+
+  fmax_3   = "1e204800DNMf",
+  fmaxnm_3 = "1e206800DNMf",
+  fmin_3   = "1e205800DNMf",
+  fminnm_3 = "1e207800DNMf",
+
+  fcmp_2   = "1e202000NMf|1e202008NZf",
+  fcmpe_2  = "1e202010NMf|1e202018NZf",
+
+  fccmp_4  = "1e200400NMVCf",
+  fccmpe_4 = "1e200410NMVCf",
+
+  fcsel_4  = "1e200c00DNMCf",
+
+  -- TODO: crc32*, aes*, sha*, pmull
+  -- TODO: SIMD instructions.
+}
+
+for cond,c in pairs(map_cond) do
+  map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+  local rtt = {}
+
+  parse_reg_type = false
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    local q = params[n]
+    if p == "D" then
+      op = op + parse_reg(q); n = n + 1
+    elseif p == "N" then
+      op = op + shl(parse_reg(q), 5); n = n + 1
+    elseif p == "M" then
+      op = op + shl(parse_reg(q), 16); n = n + 1
+    elseif p == "A" then
+      op = op + shl(parse_reg(q), 10); n = n + 1
+    elseif p == "m" then
+      op = op + shl(parse_reg(params[n-1]), 16)
+
+    elseif p == "p" then
+      if q == "sp" then params[n] = "@x31" end
+    elseif p == "g" then
+      if parse_reg_type == "x" then
+	op = op + 0x80000000
+      elseif parse_reg_type ~= "w" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "f" then
+      if parse_reg_type == "d" then
+	op = op + 0x00400000
+      elseif parse_reg_type ~= "s" then
+	werror("bad register type")
+      end
+      parse_reg_type = false
+    elseif p == "x" or p == "w" or p == "d" or p == "s" then
+      if parse_reg_type ~= p then
+	werror("register size mismatch")
+      end
+      parse_reg_type = false
+
+    elseif p == "L" then
+      op = parse_load(params, nparams, n, op)
+    elseif p == "P" then
+      op = parse_load_pair(params, nparams, n, op)
+
+    elseif p == "B" then
+      local mode, v, s = parse_label(q, false); n = n + 1
+      local m = branch_type(op)
+      waction("REL_"..mode, v+m, s, 1)
+
+    elseif p == "I" then
+      op = op + parse_imm12(q); n = n + 1
+    elseif p == "i" then
+      op = op + parse_imm13(q); n = n + 1
+    elseif p == "W" then
+      op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+    elseif p == "T" then
+      op = op + parse_imm6(q); n = n + 1
+    elseif p == "1" then
+      op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+    elseif p == "2" then
+      op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+    elseif p == "5" then
+      op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+    elseif p == "V" then
+      op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+    elseif p == "F" then
+      op = op + parse_fpimm(q); n = n + 1
+    elseif p == "Z" then
+      if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+      n = n + 1
+
+    elseif p == "S" then
+      op = op + parse_shift(q); n = n + 1
+    elseif p == "X" then
+      op = op + parse_extend(q); n = n + 1
+    elseif p == "R" then
+      op = op + parse_lslx16(q); n = n + 1
+    elseif p == "C" then
+      op = op + parse_cond(q, 0); n = n + 1
+    elseif p == "c" then
+      op = op + parse_cond(q, 1); n = n + 1
+
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+function op_template(params, template, nparams)
+  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 3 positions.
+  if secpos+3 > maxsecpos then wflush() end
+  local pos = wpos()
+  local lpos, apos, spos = #actlist, #actargs, secpos
+
+  local ok, err
+  for t in gmatch(template, "[^|]+") do
+    ok, err = pcall(parse_template, params, t, nparams, pos)
+    if ok then return end
+    secpos = spos
+    actlist[lpos+1] = nil
+    actlist[lpos+2] = nil
+    actlist[lpos+3] = nil
+    actargs[apos+1] = nil
+    actargs[apos+2] = nil
+    actargs[apos+3] = nil
+  end
+  error(err, 0)
+end
+
+map_op[".template__"] = op_template
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+	return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_mips.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_mips.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_mips.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_mips.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_mips.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_mips.lua
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_mips.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_mips.lua
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_ppc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_ppc.h
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_ppc.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_ppc.h
index 7df493654351ecb4c26b3652d008154763f53b2c..332c64dc68ab0bad34428eae18ac26c3d41f3491 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_ppc.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_ppc.h
@@ -1,5 +1,5 @@
 /*
-** DynASM PPC encoding engine.
+** DynASM PPC/PPC64 encoding engine.
 ** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
@@ -21,7 +21,7 @@ enum {
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
   DASM__MAX
 };
 
@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
 #endif
 	b[pos++] = n;
 	break;
+      case DASM_IMMSH:
+	CK((n >> 6) == 0, RANGE_I);
+	b[pos++] = n;
+	break;
       }
     }
   }
@@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-	case DASM_IMM: pos++; break;
+	case DASM_IMM: case DASM_IMMSH: pos++; break;
 	}
       }
       stop: (void)0;
@@ -366,6 +370,9 @@ int dasm_encode(Dst_DECL, void *buffer)
 	case DASM_IMM:
 	  cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
 	  break;
+	case DASM_IMMSH:
+	  cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
+	  break;
 	default: *cp++ = ins; break;
 	}
       }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_ppc.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_ppc.lua
similarity index 63%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_ppc.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_ppc.lua
index 91f4ff9a4c8b917b0b30119d1099fa6bfde22f03..278f09526dd0fd2c03a9900148423fc3d930eb48 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_ppc.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_ppc.lua
@@ -1,8 +1,10 @@
 ------------------------------------------------------------------------------
--- DynASM PPC module.
+-- DynASM PPC/PPC64 module.
 --
 -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
+--
+-- Support for various extensions contributed by Caio Souza Oliveira.
 ------------------------------------------------------------------------------
 
 -- Module information:
@@ -11,7 +13,7 @@ local _info = {
   description =	"DynASM PPC module",
   version =	"1.3.0",
   vernum =	 10300,
-  release =	"2011-05-05",
+  release =	"2015-01-14",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM",
+  "REL_PC", "LABEL_PC", "IMM", "IMMSH"
 }
 
 -- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
 
 ------------------------------------------------------------------------------
 
+local map_op, op_template
+
+local function op_alias(opname, f)
+  return function(params, nparams)
+    if not params then return "-> "..opname:sub(1, -3) end
+    f(params, nparams)
+    op_template(params, map_op[opname], nparams)
+  end
+end
+
 -- Template strings for PPC instructions.
-local map_op = {
+map_op = {
   tdi_3 =	"08000000ARI",
   twi_3 =	"0c000000ARI",
   mulli_3 =	"1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
   std_2 =	"f8000000RD",
   stdu_2 =	"f8000001RD",
 
+  subi_3 =	op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
+  subis_3 =	op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
+  subic_3 =	op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
+  ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
+
+  rotlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  rotrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
+  end),
+  rotlw_3 =	op_alias("rlwnm_5", function(p)
+    p[4] = "0"; p[5] = "31"
+  end),
+  slwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[4] = "0"
+  end),
+  srwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
+  end),
+  clrlwi_3 =	op_alias("rlwinm_5", function(p)
+    p[4] = p[3]; p[3] = "0"; p[5] = "31"
+  end),
+  clrrwi_3 =	op_alias("rlwinm_5", function(p)
+    p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
+  end),
+
+  -- Primary opcode 4:
+  mulhhwu_3 =		"10000010RRR.",
+  machhwu_3 =		"10000018RRR.",
+  mulhhw_3 =		"10000050RRR.",
+  nmachhw_3 =		"1000005cRRR.",
+  machhwsu_3 =		"10000098RRR.",
+  machhws_3 =		"100000d8RRR.",
+  nmachhws_3 =		"100000dcRRR.",
+  mulchwu_3 =		"10000110RRR.",
+  macchwu_3 =		"10000118RRR.",
+  mulchw_3 =		"10000150RRR.",
+  macchw_3 =		"10000158RRR.",
+  nmacchw_3 =		"1000015cRRR.",
+  macchwsu_3 =		"10000198RRR.",
+  macchws_3 =		"100001d8RRR.",
+  nmacchws_3 =		"100001dcRRR.",
+  mullhw_3 =		"10000350RRR.",
+  maclhw_3 =		"10000358RRR.",
+  nmaclhw_3 =		"1000035cRRR.",
+  maclhwsu_3 =		"10000398RRR.",
+  maclhws_3 =		"100003d8RRR.",
+  nmaclhws_3 =		"100003dcRRR.",
+  machhwuo_3 =		"10000418RRR.",
+  nmachhwo_3 =		"1000045cRRR.",
+  machhwsuo_3 =		"10000498RRR.",
+  machhwso_3 =		"100004d8RRR.",
+  nmachhwso_3 =		"100004dcRRR.",
+  macchwuo_3 =		"10000518RRR.",
+  macchwo_3 =		"10000558RRR.",
+  nmacchwo_3 =		"1000055cRRR.",
+  macchwsuo_3 =		"10000598RRR.",
+  macchwso_3 =		"100005d8RRR.",
+  nmacchwso_3 =		"100005dcRRR.",
+  maclhwo_3 =		"10000758RRR.",
+  nmaclhwo_3 =		"1000075cRRR.",
+  maclhwsuo_3 =		"10000798RRR.",
+  maclhwso_3 =		"100007d8RRR.",
+  nmaclhwso_3 =		"100007dcRRR.",
+
+  vaddubm_3 =		"10000000VVV",
+  vmaxub_3 =		"10000002VVV",
+  vrlb_3 =		"10000004VVV",
+  vcmpequb_3 =		"10000006VVV",
+  vmuloub_3 =		"10000008VVV",
+  vaddfp_3 =		"1000000aVVV",
+  vmrghb_3 =		"1000000cVVV",
+  vpkuhum_3 =		"1000000eVVV",
+  vmhaddshs_4 =		"10000020VVVV",
+  vmhraddshs_4 =	"10000021VVVV",
+  vmladduhm_4 =		"10000022VVVV",
+  vmsumubm_4 =		"10000024VVVV",
+  vmsummbm_4 =		"10000025VVVV",
+  vmsumuhm_4 =		"10000026VVVV",
+  vmsumuhs_4 =		"10000027VVVV",
+  vmsumshm_4 =		"10000028VVVV",
+  vmsumshs_4 =		"10000029VVVV",
+  vsel_4 =		"1000002aVVVV",
+  vperm_4 =		"1000002bVVVV",
+  vsldoi_4 =		"1000002cVVVP",
+  vpermxor_4 =		"1000002dVVVV",
+  vmaddfp_4 =		"1000002eVVVV~",
+  vnmsubfp_4 =		"1000002fVVVV~",
+  vaddeuqm_4 =		"1000003cVVVV",
+  vaddecuq_4 =		"1000003dVVVV",
+  vsubeuqm_4 =		"1000003eVVVV",
+  vsubecuq_4 =		"1000003fVVVV",
+  vadduhm_3 =		"10000040VVV",
+  vmaxuh_3 =		"10000042VVV",
+  vrlh_3 =		"10000044VVV",
+  vcmpequh_3 =		"10000046VVV",
+  vmulouh_3 =		"10000048VVV",
+  vsubfp_3 =		"1000004aVVV",
+  vmrghh_3 =		"1000004cVVV",
+  vpkuwum_3 =		"1000004eVVV",
+  vadduwm_3 =		"10000080VVV",
+  vmaxuw_3 =		"10000082VVV",
+  vrlw_3 =		"10000084VVV",
+  vcmpequw_3 =		"10000086VVV",
+  vmulouw_3 =		"10000088VVV",
+  vmuluwm_3 =		"10000089VVV",
+  vmrghw_3 =		"1000008cVVV",
+  vpkuhus_3 =		"1000008eVVV",
+  vaddudm_3 =		"100000c0VVV",
+  vmaxud_3 =		"100000c2VVV",
+  vrld_3 =		"100000c4VVV",
+  vcmpeqfp_3 =		"100000c6VVV",
+  vcmpequd_3 =		"100000c7VVV",
+  vpkuwus_3 =		"100000ceVVV",
+  vadduqm_3 =		"10000100VVV",
+  vmaxsb_3 =		"10000102VVV",
+  vslb_3 =		"10000104VVV",
+  vmulosb_3 =		"10000108VVV",
+  vrefp_2 =		"1000010aV-V",
+  vmrglb_3 =		"1000010cVVV",
+  vpkshus_3 =		"1000010eVVV",
+  vaddcuq_3 =		"10000140VVV",
+  vmaxsh_3 =		"10000142VVV",
+  vslh_3 =		"10000144VVV",
+  vmulosh_3 =		"10000148VVV",
+  vrsqrtefp_2 =		"1000014aV-V",
+  vmrglh_3 =		"1000014cVVV",
+  vpkswus_3 =		"1000014eVVV",
+  vaddcuw_3 =		"10000180VVV",
+  vmaxsw_3 =		"10000182VVV",
+  vslw_3 =		"10000184VVV",
+  vmulosw_3 =		"10000188VVV",
+  vexptefp_2 =		"1000018aV-V",
+  vmrglw_3 =		"1000018cVVV",
+  vpkshss_3 =		"1000018eVVV",
+  vmaxsd_3 =		"100001c2VVV",
+  vsl_3 =		"100001c4VVV",
+  vcmpgefp_3 =		"100001c6VVV",
+  vlogefp_2 =		"100001caV-V",
+  vpkswss_3 =		"100001ceVVV",
+  vadduhs_3 =		"10000240VVV",
+  vminuh_3 =		"10000242VVV",
+  vsrh_3 =		"10000244VVV",
+  vcmpgtuh_3 =		"10000246VVV",
+  vmuleuh_3 =		"10000248VVV",
+  vrfiz_2 =		"1000024aV-V",
+  vsplth_3 =		"1000024cVV3",
+  vupkhsh_2 =		"1000024eV-V",
+  vminuw_3 =		"10000282VVV",
+  vminud_3 =		"100002c2VVV",
+  vcmpgtud_3 =		"100002c7VVV",
+  vrfim_2 =		"100002caV-V",
+  vcmpgtsb_3 =		"10000306VVV",
+  vcfux_3 =		"1000030aVVA~",
+  vaddshs_3 =		"10000340VVV",
+  vminsh_3 =		"10000342VVV",
+  vsrah_3 =		"10000344VVV",
+  vcmpgtsh_3 =		"10000346VVV",
+  vmulesh_3 =		"10000348VVV",
+  vcfsx_3 =		"1000034aVVA~",
+  vspltish_2 =		"1000034cVS",
+  vupkhpx_2 =		"1000034eV-V",
+  vaddsws_3 =		"10000380VVV",
+  vminsw_3 =		"10000382VVV",
+  vsraw_3 =		"10000384VVV",
+  vcmpgtsw_3 =		"10000386VVV",
+  vmulesw_3 =		"10000388VVV",
+  vctuxs_3 =		"1000038aVVA~",
+  vspltisw_2 =		"1000038cVS",
+  vminsd_3 =		"100003c2VVV",
+  vsrad_3 =		"100003c4VVV",
+  vcmpbfp_3 =		"100003c6VVV",
+  vcmpgtsd_3 =		"100003c7VVV",
+  vctsxs_3 =		"100003caVVA~",
+  vupklpx_2 =		"100003ceV-V",
+  vsububm_3 =		"10000400VVV",
+  ["bcdadd._4"] =	"10000401VVVy.",
+  vavgub_3 =		"10000402VVV",
+  vand_3 =		"10000404VVV",
+  ["vcmpequb._3"] =	"10000406VVV",
+  vmaxfp_3 =		"1000040aVVV",
+  vsubuhm_3 =		"10000440VVV",
+  ["bcdsub._4"] =	"10000441VVVy.",
+  vavguh_3 =		"10000442VVV",
+  vandc_3 =		"10000444VVV",
+  ["vcmpequh._3"] =	"10000446VVV",
+  vminfp_3 =		"1000044aVVV",
+  vpkudum_3 =		"1000044eVVV",
+  vsubuwm_3 =		"10000480VVV",
+  vavguw_3 =		"10000482VVV",
+  vor_3 =		"10000484VVV",
+  ["vcmpequw._3"] =	"10000486VVV",
+  vpmsumw_3 =		"10000488VVV",
+  ["vcmpeqfp._3"] =	"100004c6VVV",
+  ["vcmpequd._3"] =	"100004c7VVV",
+  vpkudus_3 =		"100004ceVVV",
+  vavgsb_3 =		"10000502VVV",
+  vavgsh_3 =		"10000542VVV",
+  vorc_3 =		"10000544VVV",
+  vbpermq_3 =		"1000054cVVV",
+  vpksdus_3 =		"1000054eVVV",
+  vavgsw_3 =		"10000582VVV",
+  vsld_3 =		"100005c4VVV",
+  ["vcmpgefp._3"] =	"100005c6VVV",
+  vpksdss_3 =		"100005ceVVV",
+  vsububs_3 =		"10000600VVV",
+  mfvscr_1 =		"10000604V--",
+  vsum4ubs_3 =		"10000608VVV",
+  vsubuhs_3 =		"10000640VVV",
+  mtvscr_1 =		"10000644--V",
+  ["vcmpgtuh._3"] =	"10000646VVV",
+  vsum4shs_3 =		"10000648VVV",
+  vupkhsw_2 =		"1000064eV-V",
+  vsubuws_3 =		"10000680VVV",
+  vshasigmaw_4 =	"10000682VVYp",
+  veqv_3 =		"10000684VVV",
+  vsum2sws_3 =		"10000688VVV",
+  vmrgow_3 =		"1000068cVVV",
+  vshasigmad_4 =	"100006c2VVYp",
+  vsrd_3 =		"100006c4VVV",
+  ["vcmpgtud._3"] =	"100006c7VVV",
+  vupklsw_2 =		"100006ceV-V",
+  vupkslw_2 =		"100006ceV-V",
+  vsubsbs_3 =		"10000700VVV",
+  vclzb_2 =		"10000702V-V",
+  vpopcntb_2 =		"10000703V-V",
+  ["vcmpgtsb._3"] =	"10000706VVV",
+  vsum4sbs_3 =		"10000708VVV",
+  vsubshs_3 =		"10000740VVV",
+  vclzh_2 =		"10000742V-V",
+  vpopcnth_2 =		"10000743V-V",
+  ["vcmpgtsh._3"] =	"10000746VVV",
+  vsubsws_3 =		"10000780VVV",
+  vclzw_2 =		"10000782V-V",
+  vpopcntw_2 =		"10000783V-V",
+  ["vcmpgtsw._3"] =	"10000786VVV",
+  vsumsws_3 =		"10000788VVV",
+  vmrgew_3 =		"1000078cVVV",
+  vclzd_2 =		"100007c2V-V",
+  vpopcntd_2 =		"100007c3V-V",
+  ["vcmpbfp._3"] =	"100007c6VVV",
+  ["vcmpgtsd._3"] =	"100007c7VVV",
+
   -- Primary opcode 19:
   mcrf_2 =	"4c000000XX",
   isync_0 =	"4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
   bclrl_2 =	"4c000021AA",
   bcctr_2 =	"4c000420AA",
   bcctrl_2 =	"4c000421AA",
+  bctar_2 =	"4c000460AA",
+  bctarl_2 =	"4c000461AA",
   blr_0 =	"4e800020",
   blrl_0 =	"4e800021",
   bctr_0 =	"4e800420",
@@ -327,6 +585,7 @@ local map_op = {
   cmpd_3 =	"7c200000XRR",
   cmpd_2 =	"7c200000-RR",
   tw_3 =	"7c000008ARR",
+  lvsl_3 =	"7c00000cVRR",
   subfc_3 =	"7c000010RRR.",
   subc_3 =	"7c000010RRR~.",
   mulhdu_3 =	"7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
   cmplw_2 =	"7c000040-RR",
   cmpld_3 =	"7c200040XRR",
   cmpld_2 =	"7c200040-RR",
+  lvsr_3 =	"7c00004cVRR",
   subf_3 =	"7c000050RRR.",
   sub_3 =	"7c000050RRR~.",
+  lbarx_3 =	"7c000068RR0R",
   ldux_3 =	"7c00006aRR0R",
   dcbst_2 =	"7c00006c-RR",
   lwzux_3 =	"7c00006eRR0R",
   cntlzd_2 =	"7c000074RR~",
   andc_3 =	"7c000078RR~R.",
   td_3 =	"7c000088ARR",
+  lvewx_3 =	"7c00008eVRR",
   mulhd_3 =	"7c000092RRR.",
+  addg6s_3 =	"7c000094RRR",
   mulhw_3 =	"7c000096RRR.",
+  dlmzb_3 =	"7c00009cRR~R.",
   ldarx_3 =	"7c0000a8RR0R",
   dcbf_2 =	"7c0000ac-RR",
   lbzx_3 =	"7c0000aeRR0R",
+  lvx_3 =	"7c0000ceVRR",
   neg_2 =	"7c0000d0RR.",
+  lharx_3 =	"7c0000e8RR0R",
   lbzux_3 =	"7c0000eeRR0R",
   popcntb_2 =	"7c0000f4RR~",
   not_2 =	"7c0000f8RR~%.",
   nor_3 =	"7c0000f8RR~R.",
+  stvebx_3 =	"7c00010eVRR",
   subfe_3 =	"7c000110RRR.",
   sube_3 =	"7c000110RRR~.",
   adde_3 =	"7c000114RRR.",
   stdx_3 =	"7c00012aRR0R",
-  stwcx_3 =	"7c00012cRR0R.",
+  ["stwcx._3"] =	"7c00012dRR0R.",
   stwx_3 =	"7c00012eRR0R",
   prtyw_2 =	"7c000134RR~",
+  stvehx_3 =	"7c00014eVRR",
   stdux_3 =	"7c00016aRR0R",
+  ["stqcx._3"] =	"7c00016dR:R0R.",
   stwux_3 =	"7c00016eRR0R",
   prtyd_2 =	"7c000174RR~",
+  stvewx_3 =	"7c00018eVRR",
   subfze_2 =	"7c000190RR.",
   addze_2 =	"7c000194RR.",
-  stdcx_3 =	"7c0001acRR0R.",
+  ["stdcx._3"] =	"7c0001adRR0R.",
   stbx_3 =	"7c0001aeRR0R",
+  stvx_3 =	"7c0001ceVRR",
   subfme_2 =	"7c0001d0RR.",
   mulld_3 =	"7c0001d2RRR.",
   addme_2 =	"7c0001d4RR.",
   mullw_3 =	"7c0001d6RRR.",
   dcbtst_2 =	"7c0001ec-RR",
   stbux_3 =	"7c0001eeRR0R",
+  bpermd_3 =	"7c0001f8RR~R",
+  lvepxl_3 =	"7c00020eVRR",
   add_3 =	"7c000214RRR.",
+  lqarx_3 =	"7c000228R:R0R",
   dcbt_2 =	"7c00022c-RR",
   lhzx_3 =	"7c00022eRR0R",
+  cdtbcd_2 =	"7c000234RR~",
   eqv_3 =	"7c000238RR~R.",
+  lvepx_3 =	"7c00024eVRR",
   eciwx_3 =	"7c00026cRR0R",
   lhzux_3 =	"7c00026eRR0R",
+  cbcdtd_2 =	"7c000274RR~",
   xor_3 =	"7c000278RR~R.",
   mfspefscr_1 =	"7c0082a6R",
   mfxer_1 =	"7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
   lhax_3 =	"7c0002aeRR0R",
   mftb_1 =	"7c0c42e6R",
   mftbu_1 =	"7c0d42e6R",
+  lvxl_3 =	"7c0002ceVRR",
   lwaux_3 =	"7c0002eaRR0R",
   lhaux_3 =	"7c0002eeRR0R",
+  popcntw_2 =	"7c0002f4RR~",
+  divdeu_3 =	"7c000312RRR.",
+  divweu_3 =	"7c000316RRR.",
   sthx_3 =	"7c00032eRR0R",
   orc_3 =	"7c000338RR~R.",
   ecowx_3 =	"7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
   mtctr_1 =	"7c0903a6R",
   dcbi_2 =	"7c0003ac-RR",
   nand_3 =	"7c0003b8RR~R.",
+  dsn_2 =	"7c0003c6-RR",
+  stvxl_3 =	"7c0003ceVRR",
   divd_3 =	"7c0003d2RRR.",
   divw_3 =	"7c0003d6RRR.",
+  popcntd_2 =	"7c0003f4RR~",
   cmpb_3 =	"7c0003f8RR~R.",
   mcrxr_1 =	"7c000400X",
+  lbdx_3 =	"7c000406RRR",
   subfco_3 =	"7c000410RRR.",
   subco_3 =	"7c000410RRR~.",
   addco_3 =	"7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
   lfsx_3 =	"7c00042eFR0R",
   srw_3 =	"7c000430RR~R.",
   srd_3 =	"7c000436RR~R.",
+  lhdx_3 =	"7c000446RRR",
   subfo_3 =	"7c000450RRR.",
   subo_3 =	"7c000450RRR~.",
   lfsux_3 =	"7c00046eFR0R",
+  lwdx_3 =	"7c000486RRR",
   lswi_3 =	"7c0004aaRR0A",
   sync_0 =	"7c0004ac",
   lwsync_0 =	"7c2004ac",
   ptesync_0 =	"7c4004ac",
   lfdx_3 =	"7c0004aeFR0R",
+  lddx_3 =	"7c0004c6RRR",
   nego_2 =	"7c0004d0RR.",
   lfdux_3 =	"7c0004eeFR0R",
+  stbdx_3 =	"7c000506RRR",
   subfeo_3 =	"7c000510RRR.",
   subeo_3 =	"7c000510RRR~.",
   addeo_3 =	"7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
   stswx_3 =	"7c00052aRR0R",
   stwbrx_3 =	"7c00052cRR0R",
   stfsx_3 =	"7c00052eFR0R",
+  sthdx_3 =	"7c000546RRR",
+  ["stbcx._3"] =	"7c00056dRRR",
   stfsux_3 =	"7c00056eFR0R",
+  stwdx_3 =	"7c000586RRR",
   subfzeo_2 =	"7c000590RR.",
   addzeo_2 =	"7c000594RR.",
   stswi_3 =	"7c0005aaRR0A",
+  ["sthcx._3"] =	"7c0005adRRR",
   stfdx_3 =	"7c0005aeFR0R",
+  stddx_3 =	"7c0005c6RRR",
   subfmeo_2 =	"7c0005d0RR.",
   mulldo_3 =	"7c0005d2RRR.",
   addmeo_2 =	"7c0005d4RR.",
   mullwo_3 =	"7c0005d6RRR.",
   dcba_2 =	"7c0005ec-RR",
   stfdux_3 =	"7c0005eeFR0R",
+  stvepxl_3 =	"7c00060eVRR",
   addo_3 =	"7c000614RRR.",
   lhbrx_3 =	"7c00062cRR0R",
+  lfdpx_3 =	"7c00062eF:RR",
   sraw_3 =	"7c000630RR~R.",
   srad_3 =	"7c000634RR~R.",
+  lfddx_3 =	"7c000646FRR",
+  stvepx_3 =	"7c00064eVRR",
   srawi_3 =	"7c000670RR~A.",
   sradi_3 =	"7c000674RR~H.",
   eieio_0 =	"7c0006ac",
   lfiwax_3 =	"7c0006aeFR0R",
+  divdeuo_3 =	"7c000712RRR.",
+  divweuo_3 =	"7c000716RRR.",
   sthbrx_3 =	"7c00072cRR0R",
+  stfdpx_3 =	"7c00072eF:RR",
   extsh_2 =	"7c000734RR~.",
+  stfddx_3 =	"7c000746FRR",
+  divdeo_3 =	"7c000752RRR.",
+  divweo_3 =	"7c000756RRR.",
   extsb_2 =	"7c000774RR~.",
   divduo_3 =	"7c000792RRR.",
   divwou_3 =	"7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
   divwo_3 =	"7c0007d6RRR.",
   dcbz_2 =	"7c0007ec-RR",
 
+  ["tbegin._1"] =	"7c00051d1",
+  ["tbegin._0"] =	"7c00051d",
+  ["tend._1"] =		"7c00055dY",
+  ["tend._0"] =		"7c00055d",
+  ["tendall._0"] =	"7e00055d",
+  tcheck_1 =		"7c00059cX",
+  ["tsr._1"] =		"7c0005dd1",
+  ["tsuspend._0"] =	"7c0005dd",
+  ["tresume._0"] =	"7c2005dd",
+  ["tabortwc._3"] =	"7c00061dARR",
+  ["tabortdc._3"] =	"7c00065dARR",
+  ["tabortwci._3"] =	"7c00069dARS",
+  ["tabortdci._3"] =	"7c0006ddARS",
+  ["tabort._1"] =	"7c00071d-R-",
+  ["treclaim._1"] =	"7c00075d-R",
+  ["trechkpt._0"] =	"7c0007dd",
+
+  lxsiwzx_3 =	"7c000018QRR",
+  lxsiwax_3 =	"7c000098QRR",
+  mfvsrd_2 =	"7c000066-Rq",
+  mfvsrwz_2 =	"7c0000e6-Rq",
+  stxsiwx_3 =	"7c000118QRR",
+  mtvsrd_2 =	"7c000166QR",
+  mtvsrwa_2 =	"7c0001a6QR",
+  lxvdsx_3 =	"7c000298QRR",
+  lxsspx_3 =	"7c000418QRR",
+  lxsdx_3 =	"7c000498QRR",
+  stxsspx_3 =	"7c000518QRR",
+  stxsdx_3 =	"7c000598QRR",
+  lxvw4x_3 =	"7c000618QRR",
+  lxvd2x_3 =	"7c000698QRR",
+  stxvw4x_3 =	"7c000718QRR",
+  stxvd2x_3 =	"7c000798QRR",
+
   -- Primary opcode 30:
   rldicl_4 =	"78000000RR~HM.",
   rldicr_4 =	"78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
   rldcl_4 =	"78000010RR~RM.",
   rldcr_4 =	"78000012RR~RM.",
 
+  rotldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = "0"
+  end),
+  rotrdi_3 =	op_alias("rldicl_4", function(p)
+    p[3] = "64-("..p[3]..")"; p[4] = "0"
+  end),
+  rotld_3 =	op_alias("rldcl_4", function(p)
+    p[4] = "0"
+  end),
+  sldi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"
+  end),
+  srdi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "64-("..p[3]..")"
+  end),
+  clrldi_3 =	op_alias("rldicl_4", function(p)
+    p[4] = p[3]; p[3] = "0"
+  end),
+  clrrdi_3 =	op_alias("rldicr_4", function(p)
+    p[4] = "63-("..p[3]..")"; p[3] = "0"
+  end),
+
+  -- Primary opcode 56:
+  lq_2 =	"e0000000R:D", -- NYI: displacement must be divisible by 8.
+
+  -- Primary opcode 57:
+  lfdp_2 =	"e4000000F:D", -- NYI: displacement must be divisible by 4.
+
   -- Primary opcode 59:
   fdivs_3 =	"ec000024FFF.",
   fsubs_3 =	"ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
   fmadds_4 =	"ec00003aFFFF~.",
   fnmsubs_4 =	"ec00003cFFFF~.",
   fnmadds_4 =	"ec00003eFFFF~.",
+  fcfids_2 =	"ec00069cF-F.",
+  fcfidus_2 =	"ec00079cF-F.",
+
+  dadd_3 =	"ec000004FFF.",
+  dqua_4 =	"ec000006FFFZ.",
+  dmul_3 =	"ec000044FFF.",
+  drrnd_4 =	"ec000046FFFZ.",
+  dscli_3 =	"ec000084FF6.",
+  dquai_4 =	"ec000086SF~FZ.",
+  dscri_3 =	"ec0000c4FF6.",
+  drintx_4 =	"ec0000c61F~FZ.",
+  dcmpo_3 =	"ec000104XFF",
+  dtstex_3 =	"ec000144XFF",
+  dtstdc_3 =	"ec000184XF6",
+  dtstdg_3 =	"ec0001c4XF6",
+  drintn_4 =	"ec0001c61F~FZ.",
+  dctdp_2 =	"ec000204F-F.",
+  dctfix_2 =	"ec000244F-F.",
+  ddedpd_3 =	"ec000284ZF~F.",
+  dxex_2 =	"ec0002c4F-F.",
+  dsub_3 =	"ec000404FFF.",
+  ddiv_3 =	"ec000444FFF.",
+  dcmpu_3 =	"ec000504XFF",
+  dtstsf_3 =	"ec000544XFF",
+  drsp_2 =	"ec000604F-F.",
+  dcffix_2 =	"ec000644F-F.",
+  denbcd_3 =	"ec000684YF~F.",
+  diex_3 =	"ec0006c4FFF.",
+
+  -- Primary opcode 60:
+  xsaddsp_3 =		"f0000000QQQ",
+  xsmaddasp_3 =		"f0000008QQQ",
+  xxsldwi_4 =		"f0000010QQQz",
+  xsrsqrtesp_2 =	"f0000028Q-Q",
+  xssqrtsp_2 =		"f000002cQ-Q",
+  xxsel_4 =		"f0000030QQQQ",
+  xssubsp_3 =		"f0000040QQQ",
+  xsmaddmsp_3 =		"f0000048QQQ",
+  xxpermdi_4 =		"f0000050QQQz",
+  xsresp_2 =		"f0000068Q-Q",
+  xsmulsp_3 =		"f0000080QQQ",
+  xsmsubasp_3 =		"f0000088QQQ",
+  xxmrghw_3 =		"f0000090QQQ",
+  xsdivsp_3 =		"f00000c0QQQ",
+  xsmsubmsp_3 =		"f00000c8QQQ",
+  xsadddp_3 =		"f0000100QQQ",
+  xsmaddadp_3 =		"f0000108QQQ",
+  xscmpudp_3 =		"f0000118XQQ",
+  xscvdpuxws_2 =	"f0000120Q-Q",
+  xsrdpi_2 =		"f0000124Q-Q",
+  xsrsqrtedp_2 =	"f0000128Q-Q",
+  xssqrtdp_2 =		"f000012cQ-Q",
+  xssubdp_3 =		"f0000140QQQ",
+  xsmaddmdp_3 =		"f0000148QQQ",
+  xscmpodp_3 =		"f0000158XQQ",
+  xscvdpsxws_2 =	"f0000160Q-Q",
+  xsrdpiz_2 =		"f0000164Q-Q",
+  xsredp_2 =		"f0000168Q-Q",
+  xsmuldp_3 =		"f0000180QQQ",
+  xsmsubadp_3 =		"f0000188QQQ",
+  xxmrglw_3 =		"f0000190QQQ",
+  xsrdpip_2 =		"f00001a4Q-Q",
+  xstsqrtdp_2 =		"f00001a8X-Q",
+  xsrdpic_2 =		"f00001acQ-Q",
+  xsdivdp_3 =		"f00001c0QQQ",
+  xsmsubmdp_3 =		"f00001c8QQQ",
+  xsrdpim_2 =		"f00001e4Q-Q",
+  xstdivdp_3 =		"f00001e8XQQ",
+  xvaddsp_3 =		"f0000200QQQ",
+  xvmaddasp_3 =		"f0000208QQQ",
+  xvcmpeqsp_3 =		"f0000218QQQ",
+  xvcvspuxws_2 =	"f0000220Q-Q",
+  xvrspi_2 =		"f0000224Q-Q",
+  xvrsqrtesp_2 =	"f0000228Q-Q",
+  xvsqrtsp_2 =		"f000022cQ-Q",
+  xvsubsp_3 =		"f0000240QQQ",
+  xvmaddmsp_3 =		"f0000248QQQ",
+  xvcmpgtsp_3 =		"f0000258QQQ",
+  xvcvspsxws_2 =	"f0000260Q-Q",
+  xvrspiz_2 =		"f0000264Q-Q",
+  xvresp_2 =		"f0000268Q-Q",
+  xvmulsp_3 =		"f0000280QQQ",
+  xvmsubasp_3 =		"f0000288QQQ",
+  xxspltw_3 =		"f0000290QQg~",
+  xvcmpgesp_3 =		"f0000298QQQ",
+  xvcvuxwsp_2 =		"f00002a0Q-Q",
+  xvrspip_2 =		"f00002a4Q-Q",
+  xvtsqrtsp_2 =		"f00002a8X-Q",
+  xvrspic_2 =		"f00002acQ-Q",
+  xvdivsp_3 =		"f00002c0QQQ",
+  xvmsubmsp_3 =		"f00002c8QQQ",
+  xvcvsxwsp_2 =		"f00002e0Q-Q",
+  xvrspim_2 =		"f00002e4Q-Q",
+  xvtdivsp_3 =		"f00002e8XQQ",
+  xvadddp_3 =		"f0000300QQQ",
+  xvmaddadp_3 =		"f0000308QQQ",
+  xvcmpeqdp_3 =		"f0000318QQQ",
+  xvcvdpuxws_2 =	"f0000320Q-Q",
+  xvrdpi_2 =		"f0000324Q-Q",
+  xvrsqrtedp_2 =	"f0000328Q-Q",
+  xvsqrtdp_2 =		"f000032cQ-Q",
+  xvsubdp_3 =		"f0000340QQQ",
+  xvmaddmdp_3 =		"f0000348QQQ",
+  xvcmpgtdp_3 =		"f0000358QQQ",
+  xvcvdpsxws_2 =	"f0000360Q-Q",
+  xvrdpiz_2 =		"f0000364Q-Q",
+  xvredp_2 =		"f0000368Q-Q",
+  xvmuldp_3 =		"f0000380QQQ",
+  xvmsubadp_3 =		"f0000388QQQ",
+  xvcmpgedp_3 =		"f0000398QQQ",
+  xvcvuxwdp_2 =		"f00003a0Q-Q",
+  xvrdpip_2 =		"f00003a4Q-Q",
+  xvtsqrtdp_2 =		"f00003a8X-Q",
+  xvrdpic_2 =		"f00003acQ-Q",
+  xvdivdp_3 =		"f00003c0QQQ",
+  xvmsubmdp_3 =		"f00003c8QQQ",
+  xvcvsxwdp_2 =		"f00003e0Q-Q",
+  xvrdpim_2 =		"f00003e4Q-Q",
+  xvtdivdp_3 =		"f00003e8XQQ",
+  xsnmaddasp_3 =	"f0000408QQQ",
+  xxland_3 =		"f0000410QQQ",
+  xscvdpsp_2 =		"f0000424Q-Q",
+  xscvdpspn_2 =		"f000042cQ-Q",
+  xsnmaddmsp_3 =	"f0000448QQQ",
+  xxlandc_3 =		"f0000450QQQ",
+  xsrsp_2 =		"f0000464Q-Q",
+  xsnmsubasp_3 =	"f0000488QQQ",
+  xxlor_3 =		"f0000490QQQ",
+  xscvuxdsp_2 =		"f00004a0Q-Q",
+  xsnmsubmsp_3 =	"f00004c8QQQ",
+  xxlxor_3 =		"f00004d0QQQ",
+  xscvsxdsp_2 =		"f00004e0Q-Q",
+  xsmaxdp_3 =		"f0000500QQQ",
+  xsnmaddadp_3 =	"f0000508QQQ",
+  xxlnor_3 =		"f0000510QQQ",
+  xscvdpuxds_2 =	"f0000520Q-Q",
+  xscvspdp_2 =		"f0000524Q-Q",
+  xscvspdpn_2 =		"f000052cQ-Q",
+  xsmindp_3 =		"f0000540QQQ",
+  xsnmaddmdp_3 =	"f0000548QQQ",
+  xxlorc_3 =		"f0000550QQQ",
+  xscvdpsxds_2 =	"f0000560Q-Q",
+  xsabsdp_2 =		"f0000564Q-Q",
+  xscpsgndp_3 =		"f0000580QQQ",
+  xsnmsubadp_3 =	"f0000588QQQ",
+  xxlnand_3 =		"f0000590QQQ",
+  xscvuxddp_2 =		"f00005a0Q-Q",
+  xsnabsdp_2 =		"f00005a4Q-Q",
+  xsnmsubmdp_3 =	"f00005c8QQQ",
+  xxleqv_3 =		"f00005d0QQQ",
+  xscvsxddp_2 =		"f00005e0Q-Q",
+  xsnegdp_2 =		"f00005e4Q-Q",
+  xvmaxsp_3 =		"f0000600QQQ",
+  xvnmaddasp_3 =	"f0000608QQQ",
+  ["xvcmpeqsp._3"] =	"f0000618QQQ",
+  xvcvspuxds_2 =	"f0000620Q-Q",
+  xvcvdpsp_2 =		"f0000624Q-Q",
+  xvminsp_3 =		"f0000640QQQ",
+  xvnmaddmsp_3 =	"f0000648QQQ",
+  ["xvcmpgtsp._3"] =	"f0000658QQQ",
+  xvcvspsxds_2 =	"f0000660Q-Q",
+  xvabssp_2 =		"f0000664Q-Q",
+  xvcpsgnsp_3 =		"f0000680QQQ",
+  xvnmsubasp_3 =	"f0000688QQQ",
+  ["xvcmpgesp._3"] =	"f0000698QQQ",
+  xvcvuxdsp_2 =		"f00006a0Q-Q",
+  xvnabssp_2 =		"f00006a4Q-Q",
+  xvnmsubmsp_3 =	"f00006c8QQQ",
+  xvcvsxdsp_2 =		"f00006e0Q-Q",
+  xvnegsp_2 =		"f00006e4Q-Q",
+  xvmaxdp_3 =		"f0000700QQQ",
+  xvnmaddadp_3 =	"f0000708QQQ",
+  ["xvcmpeqdp._3"] =	"f0000718QQQ",
+  xvcvdpuxds_2 =	"f0000720Q-Q",
+  xvcvspdp_2 =		"f0000724Q-Q",
+  xvmindp_3 =		"f0000740QQQ",
+  xvnmaddmdp_3 =	"f0000748QQQ",
+  ["xvcmpgtdp._3"] =	"f0000758QQQ",
+  xvcvdpsxds_2 =	"f0000760Q-Q",
+  xvabsdp_2 =		"f0000764Q-Q",
+  xvcpsgndp_3 =		"f0000780QQQ",
+  xvnmsubadp_3 =	"f0000788QQQ",
+  ["xvcmpgedp._3"] =	"f0000798QQQ",
+  xvcvuxddp_2 =		"f00007a0Q-Q",
+  xvnabsdp_2 =		"f00007a4Q-Q",
+  xvnmsubmdp_3 =	"f00007c8QQQ",
+  xvcvsxddp_2 =		"f00007e0Q-Q",
+  xvnegdp_2 =		"f00007e4Q-Q",
+
+  -- Primary opcode 61:
+  stfdp_2 =	"f4000000F:D", -- NYI: displacement must be divisible by 4.
+
+  -- Primary opcode 62:
+  stq_2 =	"f8000002R:D", -- NYI: displacement must be divisible by 8.
 
   -- Primary opcode 63:
   fdiv_3 =	"fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
   frsp_2 =	"fc000018F-F.",
   fctiw_2 =	"fc00001cF-F.",
   fctiwz_2 =	"fc00001eF-F.",
+  ftdiv_2 =	"fc000100X-F.",
+  fctiwu_2 =	"fc00011cF-F.",
+  fctiwuz_2 =	"fc00011eF-F.",
   mtfsfi_2 =	"fc00010cAA", -- NYI: upshift.
   fnabs_2 =	"fc000110F-F.",
+  ftsqrt_2 =	"fc000140X-F.",
   fabs_2 =	"fc000210F-F.",
   frin_2 =	"fc000310F-F.",
   friz_2 =	"fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
   -- NYI: mtfsf, mtfsb0, mtfsb1.
   fctid_2 =	"fc00065cF-F.",
   fctidz_2 =	"fc00065eF-F.",
+  fmrgow_3 =	"fc00068cFFF",
   fcfid_2 =	"fc00069cF-F.",
+  fctidu_2 =	"fc00075cF-F.",
+  fctiduz_2 =	"fc00075eF-F.",
+  fmrgew_3 =	"fc00078cFFF",
+  fcfidu_2 =	"fc00079cF-F.",
+
+  daddq_3 =	"fc000004F:F:F:.",
+  dquaq_4 =	"fc000006F:F:F:Z.",
+  dmulq_3 =	"fc000044F:F:F:.",
+  drrndq_4 =	"fc000046F:F:F:Z.",
+  dscliq_3 =	"fc000084F:F:6.",
+  dquaiq_4 =	"fc000086SF:~F:Z.",
+  dscriq_3 =	"fc0000c4F:F:6.",
+  drintxq_4 =	"fc0000c61F:~F:Z.",
+  dcmpoq_3 =	"fc000104XF:F:",
+  dtstexq_3 =	"fc000144XF:F:",
+  dtstdcq_3 =	"fc000184XF:6",
+  dtstdgq_3 =	"fc0001c4XF:6",
+  drintnq_4 =	"fc0001c61F:~F:Z.",
+  dctqpq_2 =	"fc000204F:-F:.",
+  dctfixq_2 =	"fc000244F:-F:.",
+  ddedpdq_3 =	"fc000284ZF:~F:.",
+  dxexq_2 =	"fc0002c4F:-F:.",
+  dsubq_3 =	"fc000404F:F:F:.",
+  ddivq_3 =	"fc000444F:F:F:.",
+  dcmpuq_3 =	"fc000504XF:F:",
+  dtstsfq_3 =	"fc000544XF:F:",
+  drdpq_2 =	"fc000604F:-F:.",
+  dcffixq_2 =	"fc000644F:-F:.",
+  denbcdq_3 =	"fc000684YF:~F:.",
+  diexq_3 =	"fc0006c4F:FF:.",
 
   -- Primary opcode 4, SPE APU extension:
   evaddw_3 =		"10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
 do
   local t = {}
   for k,v in pairs(map_op) do
-    if sub(v, -1) == "." then
+    if type(v) == "string" and sub(v, -1) == "." then
       local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
       t[sub(k, 1, -3).."."..sub(k, -2)] = v2
     end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
   werror("bad register name `"..expr.."'")
 end
 
+local function parse_vr(expr)
+  local r = match(expr, "^v([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_vs(expr)
+  local r = match(expr, "^vs([1-6]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 63 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
 local function parse_cr(expr)
   local r = match(expr, "^cr([0-7])$")
   if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
   werror("bad condition bit name `"..expr.."'")
 end
 
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+  local code = loadstring(s, "")
+  if code then setfenv(code, parse_ctx) end
+  return code
+end or function(s)
+  return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+  local x = tonumber(n)
+  if x then return x end
+  local code = loadenv("return "..n)
+  if code then
+    local ok, y = pcall(code)
+    if ok then return y end
+  end
+  return nil
+end
+
 local function parse_imm(imm, bits, shift, scale, signed)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
     local m = sar(n, scale)
     if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
       end
     end
     werror("out of range immediate `"..imm.."'")
-  elseif match(imm, "^r([1-3]?[0-9])$") or
+  elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+	 match(imm, "^vs([1-6]?[0-9])$") or
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
   else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
 end
 
 local function parse_shiftmask(imm, isshift)
-  local n = tonumber(imm)
+  local n = parse_number(imm)
   if n then
     if shr(n, 6) == 0 then
-      local lsb = band(imm, 31)
-      local msb = imm - lsb
+      local lsb = band(n, 31)
+      local msb = n - lsb
       return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
     end
     werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
 	 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
     werror("expected immediate operand, got register")
   else
-    werror("NYI: parameterized 64 bit shift/mask")
+    waction("IMMSH", isshift and 1 or 0, imm)
+    return 0;
   end
 end
 
@@ -1011,7 +1648,7 @@ end
 ------------------------------------------------------------------------------
 
 -- Handle opcodes defined with template strings.
-map_op[".template__"] = function(params, template, nparams)
+op_template = function(params, template, nparams)
   if not params then return sub(template, 9) end
   local op = tonumber(sub(template, 1, 8), 16)
   local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
     elseif p == "F" then
       rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+    elseif p == "V" then
+      rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
+    elseif p == "Q" then
+      local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
+      local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
+      op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
+    elseif p == "q" then
+      local vs = parse_vs(params[n]); n = n + 1
+      op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
     elseif p == "A" then
       rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
     elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
       rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
     elseif p == "X" then
       rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+    elseif p == "1" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
+    elseif p == "g" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
+    elseif p == "3" then
+      rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
+    elseif p == "P" then
+      rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "p" then
+      op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+    elseif p == "6" then
+      rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
+    elseif p == "Y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
+    elseif p == "y" then
+      rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
+    elseif p == "Z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
+    elseif p == "z" then
+      rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
     elseif p == "W" then
       op = op + parse_cr(params[n]); n = n + 1
     elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
       local lo = band(op, mm)
       local hi = band(op, shl(mm, 5))
       op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+    elseif p == ":" then
+      if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
     elseif p == "-" then
       rs = rs - 5
     elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
   wputpos(pos, op)
 end
 
+map_op[".template__"] = op_template
+
 ------------------------------------------------------------------------------
 
 -- Pseudo-opcode to mark the position where the action list is to be emitted.
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_proto.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_proto.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_proto.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_proto.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_x64.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_x64.lua
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_x64.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_x64.lua
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_x86.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_x86.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_x86.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_x86.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_x86.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_x86.lua
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dasm_x86.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dasm_x86.lua
diff --git a/source/libs/luajit/LuaJIT-2.0.4/dynasm/dynasm.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dynasm.lua
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/dynasm/dynasm.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/dynasm/dynasm.lua
diff --git a/source/libs/luajit/LuaJIT-2.0.4/etc/luajit.1 b/source/libs/luajit/LuaJIT-2.1.0-beta1/etc/luajit.1
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/etc/luajit.1
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/etc/luajit.1
diff --git a/source/libs/luajit/LuaJIT-2.0.4/etc/luajit.pc b/source/libs/luajit/LuaJIT-2.1.0-beta1/etc/luajit.pc
similarity index 89%
rename from source/libs/luajit/LuaJIT-2.0.4/etc/luajit.pc
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/etc/luajit.pc
index a652b40d461f5cfb8d898fb5cb2c6bb120adfb64..c99057f40c351d8689b016b52162df497cc684d1 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/etc/luajit.pc
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/etc/luajit.pc
@@ -1,8 +1,8 @@
 # Package information for LuaJIT to be used by pkg-config.
 majver=2
-minver=0
-relver=4
-version=${majver}.${minver}.${relver}
+minver=1
+relver=0
+version=${majver}.${minver}.${relver}-beta1
 abiver=5.1
 
 prefix=/usr/local
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/Makefile b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/Makefile
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/Makefile
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/Makefile
index 00381e82f723106b8b3f9251abd49627750fb77c..7f86e98ec038ff72b4c37b610f4c69547835172e 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/Makefile
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/Makefile
@@ -11,8 +11,8 @@
 ##############################################################################
 
 MAJVER=  2
-MINVER=  0
-RELVER=  4
+MINVER=  1
+RELVER=  0
 ABIVER=  5.1
 NODOTABIVER= 51
 
@@ -42,17 +42,14 @@ CCOPT= -O2 -fomit-frame-pointer
 #
 # Target-specific compiler options:
 #
-# x86 only: it's recommended to compile at least for i686. Better yet,
-# compile for an architecture that has SSE2, too (-msse -msse2).
-#
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
 # the binaries to a different machine you could also use: -march=native
 #
-CCOPT_x86= -march=i686
+CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
 CCOPT_x64=
 CCOPT_arm=
+CCOPT_arm64=
 CCOPT_ppc=
-CCOPT_ppcspe=
 CCOPT_mips=
 #
 CCDEBUG=
@@ -165,7 +162,8 @@ XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT
 #   make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
 #   make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 
-CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
 LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
 
 HOST_CC= $(CC)
@@ -204,6 +202,7 @@ TARGET_XLDFLAGS=
 TARGET_XLIBS= -lm
 TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
 TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
 TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
 TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -218,12 +217,17 @@ else
 ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
   TARGET_LJARCH= arm
 else
+ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+  TARGET_LJARCH= arm64
+else
 ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
+  else
+    TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
+  endif
   TARGET_LJARCH= ppc
 else
-ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
-  TARGET_LJARCH= ppcspe
-else
 ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
   ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
     TARGET_ARCH= -D__MIPSEL__=1
@@ -242,9 +246,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
   TARGET_SYS= PS3
   TARGET_ARCH+= -D__CELLOS_LV2__
   TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
-endif
-ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
-  TARGET_ARCH+= -DLUAJIT_NO_UNWIND
+  TARGET_XLIBS+= -lpthread
 endif
 
 TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -313,6 +315,9 @@ ifeq (iOS,$(TARGET_SYS))
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+  ifeq (arm64,$(TARGET_LJARCH))
+    TARGET_XCFLAGS+= -fno-omit-frame-pointer
+  endif
 else
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
@@ -397,17 +402,18 @@ ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH)))
 else
   TARGET_ARCH+= -DLJ_ABI_SOFTFP=1
 endif
+ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D NO_UNWIND
+  TARGET_ARCH+= -DLUAJIT_NO_UNWIND
+endif
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN
 endif
-ifeq (x86,$(TARGET_LJARCH))
-  ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
-    DASM_AFLAGS+= -D SSE
-  endif
-else
 ifeq (x64,$(TARGET_LJARCH))
-  DASM_ARCH= x86
+  ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
+    DASM_ARCH= x86
+  endif
 else
 ifeq (arm,$(TARGET_LJARCH))
   ifeq (iOS,$(TARGET_SYS))
@@ -421,13 +427,15 @@ ifeq (ppc,$(TARGET_LJARCH))
   ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D ROUND
   endif
-  ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH)))
+  ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
     DASM_AFLAGS+= -D GPR64
   endif
   ifeq (PS3,$(TARGET_SYS))
     DASM_AFLAGS+= -D PPE -D TOC
   endif
-endif
+  ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
+    DASM_ARCH= ppc64
+  endif
 endif
 endif
 endif
@@ -443,7 +451,7 @@ BUILDVM_X= $(BUILDVM_T)
 HOST_O= $(MINILUA_O) $(BUILDVM_O)
 HOST_T= $(MINILUA_T) $(BUILDVM_T)
 
-LJVM_S= lj_vm.s
+LJVM_S= lj_vm.S
 LJVM_O= lj_vm.o
 LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
@@ -452,10 +460,11 @@ LJLIB_O= lib_base.o lib_math.o lbitlib.o lib_bit.o lib_string.o lib_table.o \
 	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 
-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
+LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
-	  lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+	  lj_strfmt.o lj_api.o lj_profile.o \
+	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -575,6 +584,10 @@ amalg:
 clean:
 	$(HOST_RM) $(ALL_RM)
 
+libbc:
+	./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
+	$(MAKE) all
+
 depend:
 	@for file in $(ALL_HDRGEN); do \
 	  test -f $$file || touch $$file; \
@@ -589,7 +602,7 @@ depend:
 	  test -s $$file || $(HOST_RM) $$file; \
 	  done
 
-.PHONY: default all amalg clean depend
+.PHONY: default all amalg clean libbc depend
 
 ##############################################################################
 # Rules for generated files.
@@ -646,10 +659,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
 	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
 	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
 
-%.o: %.s
+%.o: %.S
 	$(E) "ASM       $@"
-	$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
-	$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+	$(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
+	$(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
 
 $(LUAJIT_O):
 	$(E) "CC        $@"
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/Makefile.dep b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/Makefile.dep
similarity index 57%
rename from source/libs/luajit/LuaJIT-2.0.4/src/Makefile.dep
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/Makefile.dep
index 17c52421944b8e2e6bb5efe07dd893dcea9a7e47..ccad82c713779de0f88b9e4818f91b03cf2bd163 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/Makefile.dep
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/Makefile.dep
@@ -5,44 +5,48 @@ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
  lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
  lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
- lj_lib.h lj_libdef.h
+ lj_strfmt.h lj_lib.h lj_libdef.h
 lbitlib.o: lbitlib.c lua.h luaconf.h lauxlib.h lualib.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
 lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
  lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
- lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+ lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
+ lj_libdef.h
 lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
 lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \
- lj_ffdef.h lj_lib.h lj_libdef.h
-lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
- lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
- lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \
- lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \
- lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
+ lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
+ lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
 lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
 lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
+ lj_libdef.h
 lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
 lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
- lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \
- lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
+ lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
 lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
- lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
 lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
 lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
  lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -51,17 +55,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
  lj_bcdef.h
 lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \
- lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
+ lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
+ lj_strfmt.h
 lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \
- lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+ lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
+ lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
 lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_carith.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
+ lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
 lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
+ lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h
 lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -72,107 +79,116 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
  lj_ccallback.h
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
 lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_clib.h
+ lj_cdata.h lj_clib.h lj_strfmt.h
 lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \
- lj_bc.h lj_vm.h lj_char.h lj_strscan.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
 lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
- lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
+ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
+ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
- lj_crecord.h
+ lj_crecord.h lj_strfmt.h
 lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
+ lj_ccallback.h
 lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
- lj_bc.h lj_vm.h lj_jit.h lj_ir.h
+ lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
 lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \
- lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \
- lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_vm.h luajit.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
+ lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
+ lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
  lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
- lj_traceerr.h lj_vm.h
+ lj_traceerr.h lj_vm.h lj_strfmt.h
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
  lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_recdef.h
+ lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h lj_vm.h
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \
- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
+ lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \
- lj_ir.h lj_dispatch.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
+ lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
- lj_vm.h lj_strscan.h lj_lib.h
+ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
- lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
+ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
+ lj_strfmt.h
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h
+ lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
+ lj_bcdump.h lj_lib.h
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \
- lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_vm.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
 lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h
 lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
- lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
- lj_strscan.h lj_folddef.h
+ lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
 lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
+ lj_vm.h
 lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
+ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
 lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_strscan.h
 lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
 lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_vm.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_vm.h
 lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \
- lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
+ lj_vm.h lj_vmevent.h
+lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \
- lj_ffrecord.h lj_snap.h lj_vm.h
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
  lj_target_*.h lj_ctype.h lj_cdata.h
 lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h
+lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_char.h lj_strscan.h
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -190,21 +206,22 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_vm.h
 ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
- lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \
- lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
- lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
- lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
- lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \
- luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \
- lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
- lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
- lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
- lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
+ lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
+ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
+ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
+ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
+ lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
+ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
+ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
  lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
  lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
  lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
@@ -221,7 +238,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
 host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
 host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
- lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
+ host/buildvm_libbc.h
 host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
  luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
 host/minilua.o: host/minilua.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/README b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/README
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/README
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/README
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm.c
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm.c
index 11f666076c4cf41984570418251d264d747236a9..8584e614d098c52a3882f1ffd3ac6660f6842cd4 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm.c
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
 #include "../dynasm/dasm_x86.h"
 #elif LJ_TARGET_ARM
 #include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "../dynasm/dasm_arm64.h"
 #elif LJ_TARGET_PPC
 #include "../dynasm/dasm_ppc.h"
-#elif LJ_TARGET_PPCSPE
-#include "../dynasm/dasm_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "../dynasm/dasm_mips.h"
 #else
@@ -113,8 +113,8 @@ static const char *sym_decorate(BuildCtx *ctx,
       name[0] = '@';
     else
       *p = '\0';
-#elif ((LJ_TARGET_PPC && !LJ_TARGET_OSX)  || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
-    /* Keep @plt. */
+#elif LJ_TARGET_PPC && !LJ_TARGET_OSX && !LJ_TARGET_CONSOLE
+    /* Keep @plt etc. */
 #else
     *p = '\0';
 #endif
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx)
   ctx->nreloc = 0;
 
   ctx->globnames = globnames;
+  ctx->extnames = extnames;
   ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
   ctx->nrelocsym = 0;
   for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
   char buf[80];
   int i;
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
-  fprintf(ctx->fp, "module(...)\n\n");
+  fprintf(ctx->fp, "return {\n\n");
 
   fprintf(ctx->fp, "bcnames = \"");
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
   fprintf(ctx->fp, "irnames = \"");
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
-  fprintf(ctx->fp, "\"\n\n");
+  fprintf(ctx->fp, "\",\n\n");
 
   fprintf(ctx->fp, "irfpm = { [0]=");
   for (i = 0; irfpm_names[i]; i++)
     fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
   fprintf(ctx->fp, "irfield = { [0]=");
   for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
     if (p) *p = '.';
     fprintf(ctx->fp, "\"%s\", ", buf);
   }
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
   fprintf(ctx->fp, "ircall = {\n[0]=");
   for (i = 0; ircall_names[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 
   fprintf(ctx->fp, "traceerr = {\n[0]=");
   for (i = 0; trace_errors[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
-  fprintf(ctx->fp, "}\n\n");
+  fprintf(ctx->fp, "},\n\n");
 }
 
 /* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +491,7 @@ int main(int argc, char **argv)
   case BUILD_vmdef:
     emit_vmdef(ctx);
     emit_lib(ctx);
+    fprintf(ctx->fp, "}\n\n");
     break;
   case BUILD_ffdef:
   case BUILD_libdef:
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm.h
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm.h
index b26218502e38d11a863b0ead6ee8de2bfe671610..5588555307eaa4e544a066f7f8a03ddfd007766b 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm.h
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
   const char *beginsym;
   /* Strings generated by DynASM. */
   const char *const *globnames;
+  const char *const *extnames;
   const char *dasm_ident;
   const char *dasm_arch;
   /* Relocations. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_asm.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_asm.c
similarity index 85%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_asm.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_asm.c
index 2c9a2d482e6d7d6b70935767c34e5b19c7247491..9b7ae53a26afe2bedb4b6081e243a8a3422caea4 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_asm.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_asm.c
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
   "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
 };
 
-/* Emit relocation for the incredibly stupid OSX assembler. */
-static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+/* Emit x86/x64 text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
 				const char *sym)
 {
   const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
     exit(1);
   }
   emit_asm_bytes(ctx, cp, n);
+  if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+    /* Various fixups for external symbols outside of our binary. */
+    if (ctx->mode == BUILD_elfasm) {
+      if (LJ_32)
+	fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
+      fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
+      if (LJ_32)
+	fprintf(ctx->fp, "#endif\n");
+      return;
+    } else if (LJ_32 && ctx->mode == BUILD_machasm) {
+      fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
+      return;
+    }
+  }
   fprintf(ctx->fp, "\t%s %s\n", opname, sym);
 }
 #else
@@ -107,7 +121,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
 	    ins, sym);
     exit(1);
   }
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+  if ((ins >> 26) == 0x25u) {
+    fprintf(ctx->fp, "\tbl %s\n", sym);
+  } else {
+    fprintf(stderr,
+	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+	    ins, sym);
+    exit(1);
+  }
+#elif LJ_TARGET_PPC
 #if LJ_TARGET_PS3
 #define TOCPREFIX "."
 #else
@@ -117,6 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
 	    (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
   } else if ((ins >> 26) == 18) {
+#if LJ_ARCH_PPC64
+    const char *suffix = strchr(sym, '@');
+    if (suffix && suffix[1] == 'h') {
+      fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+    } else if (suffix && suffix[1] == 'l') {
+      fprintf(ctx->fp, "\tld 12, %s\n", sym);
+    } else
+#endif
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
   } else {
     fprintf(stderr,
@@ -214,6 +245,9 @@ void emit_asm(BuildCtx *ctx)
   int i, rel;
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if LJ_ARCH_PPC64
+  fprintf(ctx->fp, "\t.abiversion 2\n");
+#endif
   fprintf(ctx->fp, "\t.text\n");
   emit_asm_align(ctx, 4);
 
@@ -254,8 +288,9 @@ void emit_asm(BuildCtx *ctx)
       BuildReloc *r = &ctx->reloc[rel];
       int n = r->ofs - ofs;
 #if LJ_TARGET_X86ORX64
-      if (ctx->mode == BUILD_machasm && r->type != 0) {
-	emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+      if (r->type != 0 &&
+	  (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
+	emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
       } else {
 	emit_asm_bytes(ctx, ctx->code+ofs, n);
 	emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -289,10 +324,7 @@ void emit_asm(BuildCtx *ctx)
 #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
     fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
 #endif
-#if LJ_TARGET_PPCSPE
-    /* Soft-float ABI + SPE. */
-    fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
-#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3
     /* Hard-float ABI. */
     fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
 #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_fold.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_fold.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_fold.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_fold.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_lib.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_lib.c
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_lib.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_lib.c
index c37301d462c8d0ea9ca94855f185e243e27787d8..e928673d4e08d12334a6bbc0640a66fc16995cc0 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_lib.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
 
 #include "buildvm.h"
 #include "lj_obj.h"
+#include "lj_bc.h"
 #include "lj_lib.h"
+#include "buildvm_libbc.h"
 
 /* Context for library definitions. */
 static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
   regfunc = REGFUNC_OK;
 }
 
+static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
+{
+  uint32_t v = *p++;
+  if (v >= 0x80) {
+    int sh = 0; v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *vv = v;
+  return p;
+}
+
+static void libdef_fixupbc(uint8_t *p)
+{
+  uint32_t i, sizebc;
+  p += 4;
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  p = libdef_uleb128(p, &sizebc);
+  for (i = 0; i < sizebc; i++, p += 4) {
+    uint8_t op = p[libbc_endian ? 3 : 0];
+    uint8_t ra = p[libbc_endian ? 2 : 1];
+    uint8_t rc = p[libbc_endian ? 1 : 2];
+    uint8_t rb = p[libbc_endian ? 0 : 3];
+    if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
+      op = BC_ISNUM; rc++;
+    }
+    p[LJ_ENDIAN_SELECT(0, 3)] = op;
+    p[LJ_ENDIAN_SELECT(1, 2)] = ra;
+    p[LJ_ENDIAN_SELECT(2, 1)] = rc;
+    p[LJ_ENDIAN_SELECT(3, 0)] = rb;
+  }
+}
+
+static void libdef_lua(BuildCtx *ctx, char *p, int arg)
+{
+  UNUSED(arg);
+  if (ctx->mode == BUILD_libdef) {
+    int i;
+    for (i = 0; libbc_map[i].name != NULL; i++) {
+      if (!strcmp(libbc_map[i].name, p)) {
+	int ofs = libbc_map[i].ofs;
+	int len = libbc_map[i+1].ofs - ofs;
+	obuf[2]++;  /* Bump hash table size. */
+	*optr++ = LIBINIT_LUA;
+	libdef_name(p, 0);
+	memcpy(optr, libbc_code + ofs, len);
+	libdef_fixupbc(optr);
+	optr += len;
+	return;
+      }
+    }
+    fprintf(stderr, "Error: missing libbc definition for %s\n", p);
+    exit(1);
+  }
+}
+
 static uint32_t find_rec(char *name)
 {
   char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
   { "CF(",	")",		libdef_func,		LIBINIT_CF },
   { "ASM(",	")",		libdef_func,		LIBINIT_ASM },
   { "ASM_(",	")",		libdef_func,		LIBINIT_ASM_ },
+  { "LUA(",	")",		libdef_lua,		0 },
   { "REC(",	")",		libdef_rec,		0 },
   { "PUSH(",	")",		libdef_push,		0 },
   { "SET(",	")",		libdef_set,		0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
       "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
       ffasmfunc);
   } else if (ctx->mode == BUILD_vmdef) {
-    fprintf(ctx->fp, "}\n\n");
+    fprintf(ctx->fp, "},\n\n");
   } else if (ctx->mode == BUILD_bcdef) {
     int i;
     fprintf(ctx->fp, "\n};\n\n");
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_libbc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_libbc.h
new file mode 100644
index 0000000000000000000000000000000000000000..45f8f8cbdf25bf1d76b03e62bb8b97219f74ba3b
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_libbc.h
@@ -0,0 +1,45 @@
+/* This is a generated file. DO NOT EDIT! */
+
+static const int libbc_endian = 0;
+
+static const uint8_t libbc_code[] = {
+#if LJ_FR2
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
+0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#else
+0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
+0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
+16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
+0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
+128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
+0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
+0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
+0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
+8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
+0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
+0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
+2,0,76,3,2,0,75,0,1,0,0,2,0
+#endif
+};
+
+static const struct { const char *name; int ofs; } libbc_map[] = {
+{"math_deg",0},
+{"math_rad",25},
+{"string_len",50},
+{"table_foreachi",69},
+{"table_foreach",136},
+{"table_getn",207},
+{"table_remove",226},
+{NULL,355}
+};
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_peobj.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_peobj.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/buildvm_peobj.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/buildvm_peobj.c
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/genlibbc.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/genlibbc.lua
new file mode 100644
index 0000000000000000000000000000000000000000..4398d8e74d935ec364fa6a0f35a2bf301454485a
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
+----------------------------------------------------------------------------
+-- Lua script to dump the bytecode of the library functions written in Lua.
+-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local ffi = require("ffi")
+local bit = require("bit")
+local vmdef = require("jit.vmdef")
+local bcnames = vmdef.bcnames
+
+local format = string.format
+
+local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
+
+local function usage(arg)
+  io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
+		  " [-o buildvm_libbc.h] lib_*.c\n")
+  os.exit(1)
+end
+
+local function parse_arg(arg)
+  local outfile = "-"
+  if not (arg and arg[1]) then
+    usage(arg)
+  end
+  if arg[1] == "-o" then
+    outfile = arg[2]
+    if not outfile then usage(arg) end
+    table.remove(arg, 1)
+    table.remove(arg, 1)
+  end
+  return outfile
+end
+
+local function read_files(names)
+  local src = ""
+  for _,name in ipairs(names) do
+    local fp = assert(io.open(name))
+    src = src .. fp:read("*a")
+    fp:close()
+  end
+  return src
+end
+
+local function transform_lua(code)
+  local fixup = {}
+  local n = -30000
+  code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
+    n = n + 1
+    fixup[n] = { "CHECK", tp }
+    return format("%s=%d", var, n)
+  end)
+  code = string.gsub(code, "PAIRS%((.-)%)", function(var)
+    fixup.PAIRS = true
+    return format("nil, %s, 0", var)
+  end)
+  return "return "..code, fixup
+end
+
+local function read_uleb128(p)
+  local v = p[0]; p = p + 1
+  if v >= 128 then
+    local sh = 7; v = v - 128
+    repeat
+      local r = p[0]
+      v = v + bit.lshift(bit.band(r, 127), sh)
+      sh = sh + 7
+      p = p + 1
+    until r < 128
+  end
+  return p, v
+end
+
+-- ORDER LJ_T
+local name2itype = {
+  str = 5, func = 9, tab = 12, int = 14, num = 15
+}
+
+local BC = {}
+for i=0,#bcnames/6-1 do
+  BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
+end
+local xop, xra = isbe and 3 or 0, isbe and 2 or 1
+local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
+
+local function fixup_dump(dump, fixup)
+  local buf = ffi.new("uint8_t[?]", #dump+1, dump)
+  local p = buf+5
+  local n, sizebc
+  p, n = read_uleb128(p)
+  local start = p
+  p = p + 4
+  p = read_uleb128(p)
+  p = read_uleb128(p)
+  p, sizebc = read_uleb128(p)
+  local rawtab = {}
+  for i=0,sizebc-1 do
+    local op = p[xop]
+    if op == BC.KSHORT then
+      local rd = p[xrc] + 256*p[xrb]
+      rd = bit.arshift(bit.lshift(rd, 16), 16)
+      local f = fixup[rd]
+      if f then
+	if f[1] == "CHECK" then
+	  local tp = f[2]
+	  if tp == "tab" then rawtab[p[xra]] = true end
+	  p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
+	  p[xrb] = 0
+	  p[xrc] = name2itype[tp]
+	else
+	  error("unhandled fixup type: "..f[1])
+	end
+      end
+    elseif op == BC.TGETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TGETR
+      end
+    elseif op == BC.TSETV then
+      if rawtab[p[xrb]] then
+	p[xop] = BC.TSETR
+      end
+    elseif op == BC.ITERC then
+      if fixup.PAIRS then
+	p[xop] = BC.ITERN
+      end
+    end
+    p = p + 4
+  end
+  return ffi.string(start, n)
+end
+
+local function find_defs(src)
+  local defs = {}
+  for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
+    local env = {}
+    local tcode, fixup = transform_lua(code)
+    local func = assert(load(tcode, "", nil, env))()
+    defs[name] = fixup_dump(string.dump(func, true), fixup)
+    defs[#defs+1] = name
+  end
+  return defs
+end
+
+local function gen_header(defs)
+  local t = {}
+  local function w(x) t[#t+1] = x end
+  w("/* This is a generated file. DO NOT EDIT! */\n\n")
+  w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
+  local s = ""
+  for _,name in ipairs(defs) do
+    s = s .. defs[name]
+  end
+  w("static const uint8_t libbc_code[] = {\n")
+  local n = 0
+  for i=1,#s do
+    local x = string.byte(s, i)
+    w(x); w(",")
+    n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
+    if n >= 75 then n = 0; w("\n") end
+  end
+  w("0\n};\n\n")
+  w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
+  local m = 0
+  for _,name in ipairs(defs) do
+    w('{"'); w(name); w('",'); w(m) w('},\n')
+    m = m + #defs[name]
+  end
+  w("{NULL,"); w(m); w("}\n};\n\n")
+  return table.concat(t)
+end
+
+local function write_file(name, data)
+  if name == "-" then
+    assert(io.write(data))
+    assert(io.flush())
+  else
+    local fp = io.open(name)
+    if fp then
+      local old = fp:read("*a")
+      fp:close()
+      if data == old then return end
+    end
+    fp = assert(io.open(name, "w"))
+    assert(fp:write(data))
+    assert(fp:close())
+  end
+end
+
+local outfile = parse_arg(arg)
+local src = read_files(arg)
+local defs = find_defs(src)
+local hdr = gen_header(defs)
+write_file(outfile, hdr)
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/genminilua.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/genminilua.lua
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/genminilua.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/genminilua.lua
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/host/minilua.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/minilua.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/host/minilua.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/host/minilua.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/bc.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/bc.lua
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/bc.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/bc.lua
index 46a40892e857c5f7b464eaea370416f292b3e4e7..320039ff3532c810538f94d1853fc7a30533d0bf 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/bc.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/bc.lua
@@ -41,7 +41,7 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
 end
 
 -- Public module functions.
-module(...)
-
-line = bcline
-dump = bcdump
-targets = bctargets
-
-on = bcliston
-off = bclistoff
-start = bcliston -- For -j command line option.
+return {
+  line = bcline,
+  dump = bcdump,
+  targets = bctargets,
+  on = bcliston,
+  off = bclistoff,
+  start = bcliston -- For -j command line option.
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/bcsave.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/bcsave.lua
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/bcsave.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/bcsave.lua
index 0319b3d25665cc6cccf5796d917888327be84eff..1fe9d38969727d94c3ab3bbf7dda39937a20d2cd 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/bcsave.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/bcsave.lua
@@ -11,7 +11,7 @@
 ------------------------------------------------------------------------------
 
 local jit = require("jit")
-assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local bit = require("bit")
 
 -- Symbol name prefix for LuaJIT bytecode.
@@ -63,7 +63,7 @@ local map_type = {
 }
 
 local map_arch = {
-  x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
+  x86 = true, x64 = true, arm = true, arm64 = true, ppc = true,
   mips = true, mipsel = true,
 }
 
@@ -202,7 +202,7 @@ typedef struct {
   local is64, isbe = false, false
   if ctx.arch == "x64" then
     is64 = true
-  elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
+  elseif ctx.arch == "ppc" or ctx.arch == "mips" then
     isbe = true
   end
 
@@ -237,7 +237,7 @@ typedef struct {
   hdr.eendian = isbe and 2 or 1
   hdr.eversion = 1
   hdr.type = f16(1)
-  hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
+  hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
   if ctx.arch == "mips" or ctx.arch == "mipsel" then
     hdr.flags = 0x50001006
   end
@@ -477,13 +477,13 @@ typedef struct {
 } mach_obj_64;
 typedef struct {
   mach_fat_header fat;
-  mach_fat_arch fat_arch[4];
+  mach_fat_arch fat_arch[2];
   struct {
     mach_header hdr;
     mach_segment_command seg;
     mach_section sec;
     mach_symtab_command sym;
-  } arch[4];
+  } arch[2];
   mach_nlist sym_entry;
   uint8_t space[4096];
 } mach_fat_obj;
@@ -494,6 +494,8 @@ typedef struct {
     is64, align, mobj = true, 8, "mach_obj_64"
   elseif ctx.arch == "arm" then
     isfat, mobj = true, "mach_fat_obj"
+  elseif ctx.arch == "arm64" then
+    is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
   else
     check(ctx.arch == "x86", "unsupported architecture for OSX")
   end
@@ -503,8 +505,8 @@ typedef struct {
   -- Create Mach-O object and fill in header.
   local o = ffi.new(mobj)
   local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
-  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
-  local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
+  local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
+  local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
   if isfat then
     o.fat.magic = be32(0xcafebabe)
     o.fat.nfat_arch = be32(#cpusubtype)
@@ -653,7 +655,7 @@ end
 ------------------------------------------------------------------------------
 
 -- Public module functions.
-module(...)
-
-start = docmd -- Process -b command line option.
+return {
+  start = docmd -- Process -b command line option.
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_arm.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_arm.lua
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_arm.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_arm.lua
index 59be715a988d700af6be4fe4a4475d9162c7d9cf..dfcbeeecf14b2d9552bb2d96d9c65f584da40387 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_arm.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 16 then return map_gpr[r] end
   return "d"..(r-16)
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_mips.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_mips.lua
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_mips.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_mips.lua
index acdd2be1663ea10154aae7dcdb83229d121501a4..9466f45c2c2da7f6913b6a9a19a5146cc3731f9e 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_mips.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_mips.lua
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = addr or 0
@@ -396,33 +396,33 @@ local function create_(code, addr, out)
   return ctx
 end
 
-local function create_el_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create_el(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.get = get_le
   return ctx
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
-local function disass_el_(code, addr, out)
-  create_el_(code, addr, out):disass()
+local function disass_el(code, addr, out)
+  create_el(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-create_el = create_el_
-disass = disass_
-disass_el = disass_el_
-regname = regname_
+return {
+  create = create,
+  create_el = create_el,
+  disass = disass,
+  disass_el = disass_el,
+  regname = regname
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_mipsel.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_mipsel.lua
similarity index 75%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_mipsel.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_mipsel.lua
index dd9d26ae60784825b5fed6e3febb8f0985201058..f06ffe855e320c2c3a45753408aa6ea2853a1243 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_mipsel.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
 -- MIPS disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 
-local require = require
-
-module(...)
-
-local dis_mips = require(_PACKAGE.."dis_mips")
-
-create = dis_mips.create_el
-disass = dis_mips.disass_el
-regname = dis_mips.regname
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_ppc.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_ppc.lua
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_ppc.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_ppc.lua
index d05c431116a74c5fbb68a12137cd8cfe2af96e18..e077d7acdd98371ef1f03d93a90ca99b02f5cb69 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_ppc.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 32 then return map_gpr[r] end
   return "f"..(r-32)
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_x64.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_x64.lua
similarity index 76%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_x64.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_x64.lua
index a80981bd5cb903b375fb20f176267759f5c829d6..15d55243a85ecb3776b8ea469b94b65e6ce1f893 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_x64.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
 -- x86/x64 disassembler module. All the interesting stuff is there.
 ------------------------------------------------------------------------------
 
-local require = require
-
-module(...)
-
-local dis_x86 = require(_PACKAGE.."dis_x86")
-
-create = dis_x86.create64
-disass = dis_x86.disass64
-regname = dis_x86.regname64
+local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
+return {
+  create = dis_x86.create64,
+  disass = dis_x86.disass64,
+  regname = dis_x86.regname64
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_x86.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_x86.lua
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_x86.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_x86.lua
index 078d6094dcb0b049e63a45937bfd5b105ae16094..6bc38066fe3bc251a47112e5526608e630d4c22f 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dis_x86.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dis_x86.lua
@@ -28,6 +28,8 @@ local type = type
 local sub, byte, format = string.sub, string.byte, string.format
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 local lower, rep = string.lower, string.rep
+local bit = require("bit")
+local tohex = bit.tohex
 
 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
 local map_opc1_32 = {
@@ -532,7 +534,7 @@ local function putpat(ctx, name, pat)
 	local lo = imm % 0x1000000
 	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
       else
-	x = format("0x%08x", imm)
+	x = "0x"..tohex(imm)
       end
     elseif p == "R" then
       local r = byte(code, pos-1, pos-1)%8
@@ -782,7 +784,7 @@ local function disass_block(ctx, ofs, len)
 end
 
 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
   local ctx = {}
   ctx.code = code
   ctx.addr = (addr or 0) - 1
@@ -796,8 +798,8 @@ local function create_(code, addr, out)
   return ctx
 end
 
-local function create64_(code, addr, out)
-  local ctx = create_(code, addr, out)
+local function create64(code, addr, out)
+  local ctx = create(code, addr, out)
   ctx.x64 = true
   ctx.map1 = map_opc1_64
   ctx.aregs = map_regs.Q
@@ -805,32 +807,32 @@ local function create64_(code, addr, out)
 end
 
 -- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
-  create_(code, addr, out):disass()
+local function disass(code, addr, out)
+  create(code, addr, out):disass()
 end
 
-local function disass64_(code, addr, out)
-  create64_(code, addr, out):disass()
+local function disass64(code, addr, out)
+  create64(code, addr, out):disass()
 end
 
 -- Return register name for RID.
-local function regname_(r)
+local function regname(r)
   if r < 8 then return map_regs.D[r+1] end
   return map_regs.X[r-7]
 end
 
-local function regname64_(r)
+local function regname64(r)
   if r < 16 then return map_regs.Q[r+1] end
   return map_regs.X[r-15]
 end
 
 -- Public module functions.
-module(...)
-
-create = create_
-create64 = create64_
-disass = disass_
-disass64 = disass64_
-regname = regname_
-regname64 = regname64_
+return {
+  create = create,
+  create64 = create64,
+  disass = disass,
+  disass64 = disass64,
+  regname = regname,
+  regname64 = regname64
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dump.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dump.lua
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/dump.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dump.lua
index d15c528ea808e0f6261fe0487a169db7b90ab7b9..5f858492bc4ba1b32a5ad73678647a821782941c 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/dump.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/dump.lua
@@ -55,7 +55,7 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
 local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
 local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
 local bit = require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex
 local sub, gsub, format = string.sub, string.gsub, string.format
 local byte, char, rep = string.byte, string.char, string.rep
 local type, tostring = type, tostring
@@ -91,6 +91,7 @@ local function fillsymtab_tr(tr, nexit)
   end
   for i=0,nexit-1 do
     local addr = traceexitstub(tr, i)
+    if addr < 0 then addr = addr + 2^32 end
     t[addr] = tostring(i)
   end
   local addr = traceexitstub(tr, nexit)
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit)
     local ircall = vmdef.ircall
     for i=0,#ircall do
       local addr = ircalladdr(i)
-      if addr ~= 0 then t[addr] = ircall[i] end
+      if addr ~= 0 then
+	if addr < 0 then addr = addr + 2^32 end
+	t[addr] = ircall[i]
+      end
     end
   end
   if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit)
 	nexit = 1000000
 	break
       end
+      if addr < 0 then addr = addr + 2^32 end
       t[addr] = tostring(i)
     end
     nexitsym = nexit
@@ -136,6 +141,7 @@ local function dump_mcode(tr)
   local mcode, addr, loop = tracemc(tr)
   if not mcode then return end
   if not disass then disass = require("jit.dis_"..jit.arch) end
+  if addr < 0 then addr = addr + 2^32 end
   out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
   local ctx = disass.create(mcode, addr, dumpwrite)
   ctx.hexdump = 0
@@ -270,8 +276,7 @@ local litname = {
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
-    if band(mode, 0x400) ~= 0 then s = s.." trunc"
-    elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+    if band(mode, 0x800) ~= 0 then s = s.." sext" end
     local c = shr(mode, 14)
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     t[mode] = s
@@ -280,6 +285,8 @@ local litname = {
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
+  ["BUFHDR"] = { [0] = "RESET", "APPEND" },
+  ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }
 
 local function ctlsub(c)
@@ -607,7 +614,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
       end
     else
       for i=1,ngpr do
-	out:write(format(" %08x", regs[i]))
+	out:write(" ", tohex(regs[i]))
 	if i % 8 == 0 then out:write("\n") end
       end
     end
@@ -691,9 +698,9 @@ local function dumpon(opt, outfile)
 end
 
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/p.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/p.lua
new file mode 100644
index 0000000000000000000000000000000000000000..97d4ccdf87c13586a97dde8ebdd306a9860c901c
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/p.lua
@@ -0,0 +1,310 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module is a simple command line interface to the built-in
+-- low-overhead profiler of LuaJIT.
+--
+-- The lower-level API of the profiler is accessible via the "jit.profile"
+-- module or the luaJIT_profile_* C API.
+--
+-- Example usage:
+--
+--   luajit -jp myapp.lua
+--   luajit -jp=s myapp.lua
+--   luajit -jp=-s myapp.lua
+--   luajit -jp=vl myapp.lua
+--   luajit -jp=G,profile.txt myapp.lua
+--
+-- The following dump features are available:
+--
+--   f  Stack dump: function name, otherwise module:line. Default mode.
+--   F  Stack dump: ditto, but always prepend module.
+--   l  Stack dump: module:line.
+--   <number> stack dump depth (callee < caller). Default: 1.
+--   -<number> Inverse stack dump depth (caller > callee).
+--   s  Split stack dump after first stack level. Implies abs(depth) >= 2.
+--   p  Show full path for module names.
+--   v  Show VM states. Can be combined with stack dumps, e.g. vf or fv.
+--   z  Show zones. Can be combined with stack dumps, e.g. zf or fz.
+--   r  Show raw sample counts. Default: show percentages.
+--   a  Annotate excerpts from source code files.
+--   A  Annotate complete source code files.
+--   G  Produce raw output suitable for graphical tools (e.g. flame graphs).
+--   m<number> Minimum sample percentage to be shown. Default: 3.
+--   i<number> Sampling interval in milliseconds. Default: 10.
+--
+----------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
+local profile = require("jit.profile")
+local vmdef = require("jit.vmdef")
+local math = math
+local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
+local sort, format = table.sort, string.format
+local stdout = io.stdout
+local zone -- Load jit.zone module on demand.
+
+-- Output file handle.
+local out
+
+------------------------------------------------------------------------------
+
+local prof_ud
+local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
+local prof_ann, prof_count1, prof_count2, prof_samples
+
+local map_vmmode = {
+  N = "Compiled",
+  I = "Interpreted",
+  C = "C code",
+  G = "Garbage Collector",
+  J = "JIT Compiler",
+}
+
+-- Profiler callback.
+local function prof_cb(th, samples, vmmode)
+  prof_samples = prof_samples + samples
+  local key_stack, key_stack2, key_state
+  -- Collect keys for sample.
+  if prof_states then
+    if prof_states == "v" then
+      key_state = map_vmmode[vmmode] or vmmode
+    else
+      key_state = zone:get() or "(none)"
+    end
+  end
+  if prof_fmt then
+    key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
+    key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
+      return vmdef.ffnames[tonumber(x)]
+    end)
+    if prof_split == 2 then
+      local k1, k2 = key_stack:match("(.-) [<>] (.*)")
+      if k2 then key_stack, key_stack2 = k1, k2 end
+    elseif prof_split == 3 then
+      key_stack2 = profile.dumpstack(th, "l", 1)
+    end
+  end
+  -- Order keys.
+  local k1, k2
+  if prof_split == 1 then
+    if key_state then
+      k1 = key_state
+      if key_stack then k2 = key_stack end
+    end
+  elseif key_stack then
+    k1 = key_stack
+    if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
+  end
+  -- Coalesce samples in one or two levels.
+  if k1 then
+    local t1 = prof_count1
+    t1[k1] = (t1[k1] or 0) + samples
+    if k2 then
+      local t2 = prof_count2
+      local t3 = t2[k1]
+      if not t3 then t3 = {}; t2[k1] = t3 end
+      t3[k2] = (t3[k2] or 0) + samples
+    end
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Show top N list.
+local function prof_top(count1, count2, samples, indent)
+  local t, n = {}, 0
+  for k, v in pairs(count1) do
+    n = n + 1
+    t[n] = k
+  end
+  sort(t, function(a, b) return count1[a] > count1[b] end)
+  for i=1,n do
+    local k = t[i]
+    local v = count1[k]
+    local pct = floor(v*100/samples + 0.5)
+    if pct < prof_min then break end
+    if not prof_raw then
+      out:write(format("%s%2d%%  %s\n", indent, pct, k))
+    elseif prof_raw == "r" then
+      out:write(format("%s%5d  %s\n", indent, v, k))
+    else
+      out:write(format("%s %d\n", k, v))
+    end
+    if count2 then
+      local r = count2[k]
+      if r then
+	prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and "  -- " or
+			    (prof_depth < 0 and "  -> " or "  <- "))
+      end
+    end
+  end
+end
+
+-- Annotate source code
+local function prof_annotate(count1, samples)
+  local files = {}
+  local ms = 0
+  for k, v in pairs(count1) do
+    local pct = floor(v*100/samples + 0.5)
+    ms = math.max(ms, v)
+    if pct >= prof_min then
+      local file, line = k:match("^(.*):(%d+)$")
+      local fl = files[file]
+      if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
+      line = tonumber(line)
+      fl[line] = prof_raw and v or pct
+    end
+  end
+  sort(files)
+  local fmtv, fmtn = " %3d%% | %s\n", "      | %s\n"
+  if prof_raw then
+    local n = math.max(5, math.ceil(math.log10(ms)))
+    fmtv = "%"..n.."d | %s\n"
+    fmtn = (" "):rep(n).." | %s\n"
+  end
+  local ann = prof_ann
+  for _, file in ipairs(files) do
+    local f0 = file:byte()
+    if f0 == 40 or f0 == 91 then
+      out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
+      break
+    end
+    local fp, err = io.open(file)
+    if not fp then
+      out:write(format("====== ERROR: %s: %s\n", file, err))
+      break
+    end
+    out:write(format("\n====== %s ======\n", file))
+    local fl = files[file]
+    local n, show = 1, false
+    if ann ~= 0 then
+      for i=1,ann do
+	if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
+      end
+    end
+    for line in fp:lines() do
+      if line:byte() == 27 then
+	out:write("[Cannot annotate bytecode file]\n")
+	break
+      end
+      local v = fl[n]
+      if ann ~= 0 then
+	local v2 = fl[n+ann]
+	if show then
+	  if v2 then show = n+ann elseif v then show = n
+	  elseif show+ann < n then show = false end
+	elseif v2 then
+	  show = n+ann
+	  out:write(format("@@ %d @@\n", n))
+	end
+	if not show then goto next end
+      end
+      if v then
+	out:write(format(fmtv, v, line))
+      else
+	out:write(format(fmtn, line))
+      end
+    ::next::
+      n = n + 1
+    end
+    fp:close()
+  end
+end
+
+------------------------------------------------------------------------------
+
+-- Finish profiling and dump result.
+local function prof_finish()
+  if prof_ud then
+    profile.stop()
+    local samples = prof_samples
+    if samples == 0 then
+      if prof_raw ~= true then out:write("[No samples collected]\n") end
+      return
+    end
+    if prof_ann then
+      prof_annotate(prof_count1, samples)
+    else
+      prof_top(prof_count1, prof_count2, samples, "")
+    end
+    prof_count1 = nil
+    prof_count2 = nil
+    prof_ud = nil
+  end
+end
+
+-- Start profiling.
+local function prof_start(mode)
+  local interval = ""
+  mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
+  prof_min = 3
+  mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
+  prof_depth = 1
+  mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
+  local m = {}
+  for c in mode:gmatch(".") do m[c] = c end
+  prof_states = m.z or m.v
+  if prof_states == "z" then zone = require("jit.zone") end
+  local scope = m.l or m.f or m.F or (prof_states and "" or "f")
+  local flags = (m.p or "")
+  prof_raw = m.r
+  if m.s then
+    prof_split = 2
+    if prof_depth == -1 or m["-"] then prof_depth = -2
+    elseif prof_depth == 1 then prof_depth = 2 end
+  elseif mode:find("[fF].*l") then
+    scope = "l"
+    prof_split = 3
+  else
+    prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
+  end
+  prof_ann = m.A and 0 or (m.a and 3)
+  if prof_ann then
+    scope = "l"
+    prof_fmt = "pl"
+    prof_split = 0
+    prof_depth = 1
+  elseif m.G and scope ~= "" then
+    prof_fmt = flags..scope.."Z;"
+    prof_depth = -100
+    prof_raw = true
+    prof_min = 0
+  elseif scope == "" then
+    prof_fmt = false
+  else
+    local sc = prof_split == 3 and m.f or m.F or scope
+    prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
+  end
+  prof_count1 = {}
+  prof_count2 = {}
+  prof_samples = 0
+  profile.start(scope:lower()..interval, prof_cb)
+  prof_ud = newproxy(true)
+  getmetatable(prof_ud).__gc = prof_finish
+end
+
+------------------------------------------------------------------------------
+
+local function start(mode, outfile)
+  if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
+  if outfile then
+    out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+  else
+    out = stdout
+  end
+  prof_start(mode or "f")
+end
+
+-- Public module functions.
+return {
+  start = start, -- For -j command line option.
+  stop = prof_finish
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/jit/v.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/v.lua
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/jit/v.lua
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/v.lua
index 32666fd1a2cc0f26f22d65be41d0ee6946578fd0..157c34bc6431fbb643b05e838acb1a06094cd452 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/jit/v.lua
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/v.lua
@@ -59,7 +59,7 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20004, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
       if ltype == "interpreter" then
 	out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
 	  tr, startex, startloc))
+      elseif ltype == "stitch" then
+	out:write(format("[TRACE %3s %s%s %s %s]\n",
+	  tr, startex, startloc, ltype, fmtfunc(func, pc)))
       elseif link == tr or link == 0 then
 	out:write(format("[TRACE %3s %s%s %s]\n",
 	  tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
 end
 
 -- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+  on = dumpon,
+  off = dumpoff,
+  start = dumpon -- For -j command line option.
+}
 
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/zone.lua b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/zone.lua
new file mode 100644
index 0000000000000000000000000000000000000000..69f0f169147f0894697e899f56a7d7601a111e8a
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/jit/zone.lua
@@ -0,0 +1,45 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler zones.
+--
+-- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module implements a simple hierarchical zone model.
+--
+-- Example usage:
+--
+--   local zone = require("jit.zone")
+--   zone("AI")
+--   ...
+--     zone("A*")
+--     ...
+--     print(zone:get()) --> "A*"
+--     ...
+--     zone()
+--   ...
+--   print(zone:get()) --> "AI"
+--   ...
+--   zone()
+--
+----------------------------------------------------------------------------
+
+local remove = table.remove
+
+return setmetatable({
+  flush = function(t)
+    for i=#t,1,-1 do t[i] = nil end
+  end,
+  get = function(t)
+    return t[#t]
+  end
+}, {
+  __call = function(t, zone)
+    if zone then
+      t[#t+1] = zone
+    else
+      return (assert(remove(t), "empty zone stack"))
+    end
+  end
+})
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lauxlib.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lauxlib.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lauxlib.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lauxlib.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_aux.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_aux.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_aux.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_aux.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_base.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_base.c
similarity index 90%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_base.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_base.c
index 17b9525d667ef61ba88cd3fb732f24e0fbc8574a..887fea7a58a233ef5df8c3b9f820f93720b2c675 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_base.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_base.c
@@ -32,6 +32,7 @@
 #include "lj_dispatch.h"
 #include "lj_char.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 
 /* -- Base library: checks ------------------------------------------------ */
@@ -86,10 +87,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
   cTValue *mo = lj_meta_lookup(L, o, mm);
   if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
     L->top = o+1;  /* Only keep one argument. */
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
   } else {
     if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+    if (LJ_FR2) { copyTV(L, o-1, o); o--; }
     setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
     if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
     return FFH_RES(3);
@@ -100,7 +102,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
 #endif
 
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(pairs)
+LJLIB_ASM(pairs)		LJLIB_REC(xpairs 0)
 {
   return ffh_pairs(L, MM_pairs);
 }
@@ -113,7 +115,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux)	LJLIB_REC(.)
 }
 
 LJLIB_PUSH(lastcl)
-LJLIB_ASM(ipairs)		LJLIB_REC(.)
+LJLIB_ASM(ipairs)		LJLIB_REC(xpairs 1)
 {
   return ffh_pairs(L, MM_ipairs);
 }
@@ -131,11 +133,11 @@ LJLIB_ASM(setmetatable)		LJLIB_REC(.)
     lj_err_caller(L, LJ_ERR_PROTMT);
   setgcref(t->metatable, obj2gco(mt));
   if (mt) { lj_gc_objbarriert(L, t, mt); }
-  settabV(L, L->base-1, t);
+  settabV(L, L->base-1-LJ_FR2, t);
   return FFH_RES(1);
 }
 
-LJLIB_CF(getfenv)
+LJLIB_CF(getfenv)		LJLIB_REC(.)
 {
   GCfunc *fn;
   cTValue *o = L->base;
@@ -144,6 +146,7 @@ LJLIB_CF(getfenv)
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   fn = &gcval(o)->fn;
   settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +168,7 @@ LJLIB_CF(setfenv)
     o = lj_debug_frame(L, level, &level);
     if (o == NULL)
       lj_err_arg(L, 1, LJ_ERR_INVLVL);
+    if (LJ_FR2) o--;
   }
   fn = &gcval(o)->fn;
   if (!isluafunc(fn))
@@ -257,7 +261,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
   if (base == 10) {
     TValue *o = lj_lib_checkany(L, 1);
     if (lj_strscan_numberobj(o)) {
-      copyTV(L, L->base-1, o);
+      copyTV(L, L->base-1-LJ_FR2, o);
       return FFH_RES(1);
     }
 #if LJ_HASFFI
@@ -270,11 +274,11 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	    ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
 	  int32_t i;
 	  lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
-	  setintV(L->base-1, i);
+	  setintV(L->base-1-LJ_FR2, i);
 	  return FFH_RES(1);
 	}
 	lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
-		       (uint8_t *)&(L->base-1)->n, o, 0);
+		       (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
 	return FFH_RES(1);
       }
     }
@@ -290,45 +294,29 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
       while (lj_char_isspace((unsigned char)(*ep))) ep++;
       if (*ep == '\0') {
 	if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-	  setintV(L->base-1, (int32_t)ul);
+	  setintV(L->base-1-LJ_FR2, (int32_t)ul);
 	else
-	  setnumV(L->base-1, (lua_Number)ul);
+	  setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
 	return FFH_RES(1);
       }
     }
   }
-  setnilV(L->base-1);
+  setnilV(L->base-1-LJ_FR2);
   return FFH_RES(1);
 }
 
-LJLIB_PUSH("nil")
-LJLIB_PUSH("false")
-LJLIB_PUSH("true")
 LJLIB_ASM(tostring)		LJLIB_REC(.)
 {
   TValue *o = lj_lib_checkany(L, 1);
   cTValue *mo;
   L->top = o+1;  /* Only keep one argument. */
   if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
-    copyTV(L, L->base-1, mo);  /* Replace callable. */
+    copyTV(L, L->base-1-LJ_FR2, mo);  /* Replace callable. */
     return FFH_TAILCALL;
-  } else {
-    GCstr *s;
-    if (tvisnumber(o)) {
-      s = lj_str_fromnumber(L, o);
-    } else if (tvispri(o)) {
-      s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
-    } else {
-      if (tvisfunc(o) && isffunc(funcV(o)))
-	lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
-      else
-	lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
-      /* Note: lua_pushfstring calls the GC which may invalidate o. */
-      s = strV(L->top-1);
-    }
-    setstrV(L, L->base-1, s);
-    return FFH_RES(1);
   }
+  lj_gc_check(L);
+  setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
+  return FFH_RES(1);
 }
 
 /* -- Base library: throw and catch errors -------------------------------- */
@@ -440,7 +428,7 @@ LJLIB_CF(dofile)
 
 LJLIB_CF(gcinfo)
 {
-  setintV(L->top++, (G(L)->gc.total >> 10));
+  setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
   return 1;
 }
 
@@ -506,21 +494,13 @@ LJLIB_CF(print)
   }
   shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
   for (i = 0; i < nargs; i++) {
+    cTValue *o = &L->base[i];
+    char buf[STRFMT_MAXBUF_NUM];
     const char *str;
     size_t size;
-    cTValue *o = &L->base[i];
-    if (shortcut && tvisstr(o)) {
-      str = strVdata(o);
-      size = strV(o)->len;
-    } else if (shortcut && tvisint(o)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(o));
-      size = (size_t)(buf+LJ_STR_INTBUF-p);
-      str = p;
-    } else if (shortcut && tvisnum(o)) {
-      char buf[LJ_STR_NUMBUF];
-      size = lj_str_bufnum(buf, o);
-      str = buf;
+    MSize len;
+    if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) {
+      size = len;
     } else {
       copyTV(L, L->top+1, o);
       copyTV(L, L->top, L->top-1);
@@ -558,7 +538,7 @@ LJLIB_CF(coroutine_status)
   if (co == L) s = "running";
   else if (co->status == LUA_YIELD) s = "suspended";
   else if (co->status != 0) s = "dead";
-  else if (co->base > tvref(co->stack)+1) s = "normal";
+  else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
   else if (co->top == co->base) s = "dead";
   else s = "suspended";
   lua_pushstring(L, s);
@@ -600,8 +580,8 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
       (co->status == 0 && co->top == co->base)) {
     ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
     if (wrap) lj_err_caller(L, em);
-    setboolV(L->base-1, 0);
-    setstrV(L, L->base, lj_err_str(L, em));
+    setboolV(L->base-1-LJ_FR2, 0);
+    setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
     return FFH_RES(2);
   }
   lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -642,9 +622,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
 
 LJLIB_CF(coroutine_wrap)
 {
+  GCfunc *fn;
   lj_cf_coroutine_create(L);
-  lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
-  setpc_wrap_aux(L, funcV(L->top-1));
+  fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+  setpc_wrap_aux(L, fn);
   return 1;
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_bit.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_bit.c
new file mode 100644
index 0000000000000000000000000000000000000000..55cb2a845a89c93365e74c467c81acd98db21029
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_bit.c
@@ -0,0 +1,180 @@
+/*
+** Bit manipulation library.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_bit_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#endif
+#include "lj_ff.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_bit
+
+#if LJ_HASFFI
+static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
+{
+  GCcdata *cd = lj_cdata_new_(L, id, 8);
+  *(uint64_t *)cdataptr(cd) = x;
+  setcdataV(L, L->base-1-LJ_FR2, cd);
+  return FFH_RES(1);
+}
+#else
+static int32_t bit_checkbit(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && lj_strscan_numberobj(o)))
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  if (LJ_LIKELY(tvisint(o))) {
+    return intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return i;
+  }
+}
+#endif
+
+LJLIB_ASM(bit_tobit)		LJLIB_REC(bit_tobit)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
+  return FFH_RES(1);
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bnot)		LJLIB_REC(bit_unary IR_BNOT)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, ~x) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bswap)		LJLIB_REC(bit_unary IR_BSWAP)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_lshift)		LJLIB_REC(bit_shift IR_BSHL)
+{
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t x = lj_carith_check64(L, 1, &id);
+  int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
+  if (id) {
+    x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
+    return bit_result64(L, id, x);
+  }
+  if (id2) setintV(L->base+1, sh);
+  return FFH_RETRY;
+#else
+  lj_lib_checknumber(L, 1);
+  bit_checkbit(L, 2);
+  return FFH_RETRY;
+#endif
+}
+LJLIB_ASM_(bit_rshift)		LJLIB_REC(bit_shift IR_BSHR)
+LJLIB_ASM_(bit_arshift)		LJLIB_REC(bit_shift IR_BSAR)
+LJLIB_ASM_(bit_rol)		LJLIB_REC(bit_shift IR_BROL)
+LJLIB_ASM_(bit_ror)		LJLIB_REC(bit_shift IR_BROR)
+
+LJLIB_ASM(bit_band)		LJLIB_REC(bit_nary IR_BAND)
+{
+#if LJ_HASFFI
+  CTypeID id = 0;
+  TValue *o = L->base, *top = L->top;
+  int i = 0;
+  do { lj_carith_check64(L, ++i, &id); } while (++o < top);
+  if (id) {
+    CTState *cts = ctype_cts(L);
+    CType *ct = ctype_get(cts, id);
+    int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
+    uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
+    o = L->base;
+    do {
+      lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
+      if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
+    } while (++o < top);
+    return bit_result64(L, id, y);
+  }
+  return FFH_RETRY;
+#else
+  int i = 0;
+  do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
+  return FFH_RETRY;
+#endif
+}
+LJLIB_ASM_(bit_bor)		LJLIB_REC(bit_nary IR_BOR)
+LJLIB_ASM_(bit_bxor)		LJLIB_REC(bit_nary IR_BXOR)
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(bit_tohex)		LJLIB_REC(.)
+{
+#if LJ_HASFFI
+  CTypeID id = 0, id2 = 0;
+  uint64_t b = lj_carith_check64(L, 1, &id);
+  int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
+				  (int32_t)lj_carith_check64(L, 2, &id2);
+#else
+  uint32_t b = (uint32_t)bit_checkbit(L, 1);
+  int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
+#endif
+  SBuf *sb = lj_buf_tmp_(L);
+  SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+#if LJ_HASFFI
+  if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
+#else
+  if (n < 8) b &= (1u << 4*n)-1;
+#endif
+  sb = lj_strfmt_putfxint(sb, sf, b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_bit(lua_State *L)
+{
+  LJ_LIB_REG(L, LUA_BITLIBNAME, bit);
+  return 1;
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_debug.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_debug.c
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_debug.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_debug.c
index e87c35cfe945332c53f1ba5bed974a1c44c729a0..b610fb4dcddd7b62653e198d3dc9aee8ab9e57d7 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_debug.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_debug.c
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
   return 1;
 }
 
-LJLIB_CF(debug_getmetatable)
+LJLIB_CF(debug_getmetatable)	LJLIB_REC(.)
 {
   lj_lib_checkany(L, 1);
   if (!lua_getmetatable(L, 1)) {
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_ffi.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_ffi.c
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_ffi.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_ffi.c
index f6df39d67f9a7eecd9113c616a61bc00dbdeda15..b2b2d37ff7bf1e98bba0d82f4a50b3d21289fedc 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_ffi.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_ffi.c
@@ -29,6 +29,7 @@
 #include "lj_ccall.h"
 #include "lj_ccallback.h"
 #include "lj_clib.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
       }
     }
     copyTV(L, base, L->top);
-    tv = L->top-1;
+    tv = L->top-1-LJ_FR2;
   }
   return lj_meta_tailcall(L, tv);
 }
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
       }
     }
   }
-  lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
+  lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
 checkgc:
   lj_gc_check(L);
   return 1;
@@ -507,7 +508,7 @@ LJLIB_CF(ffi_new)	LJLIB_REC(.)
   if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
     cd = lj_cdata_new(cts, id, sz);
   else
-    cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
+    cd = lj_cdata_newv(L, id, sz, ctype_align(info));
   setcdataV(L, o-1, cd);  /* Anchor the uninitialized cdata. */
   lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
 		   o, (MSize)(L->top - o));  /* Initialize cdata. */
@@ -558,6 +559,31 @@ LJLIB_CF(ffi_typeof)	LJLIB_REC(.)
   return 1;
 }
 
+/* Internal and unsupported API. */
+LJLIB_CF(ffi_typeinfo)
+{
+  CTState *cts = ctype_cts(L);
+  CTypeID id = (CTypeID)ffi_checkint(L, 1);
+  if (id > 0 && id < cts->top) {
+    CType *ct = ctype_get(cts, id);
+    GCtab *t;
+    lua_createtable(L, 0, 4);  /* Increment hash size if fields are added. */
+    t = tabV(L->top-1);
+    setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
+    if (ct->size != CTSIZE_INVALID)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
+    if (ct->sib)
+      setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
+    if (gcref(ct->name)) {
+      GCstr *s = gco2str(gcref(ct->name));
+      setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+    }
+    lj_gc_check(L);
+    return 1;
+  }
+  return 0;
+}
+
 LJLIB_CF(ffi_istype)	LJLIB_REC(.)
 {
   CTState *cts = ctype_cts(L);
@@ -725,6 +751,9 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
   case H_(4ab624a8,4ab624a8): b = 1; break;  /* win */
 #endif
   case H_(3af93066,1f001464): b = 1; break;  /* le/be */
+#if LJ_GC64
+  case H_(9e89d2c9,13c83c92): b = 1; break;  /* gc64 */
+#endif
   default:
     break;
   }
@@ -768,19 +797,11 @@ LJLIB_CF(ffi_gc)	LJLIB_REC(.)
   GCcdata *cd = ffi_checkcdata(L, 1);
   TValue *fin = lj_lib_checkany(L, 2);
   CTState *cts = ctype_cts(L);
-  GCtab *t = cts->finalizer;
   CType *ct = ctype_raw(cts, cd->ctypeid);
   if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
 	ctype_isrefarray(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  if (gcref(t->metatable)) {  /* Update finalizer table, if still enabled. */
-    copyTV(L, lj_tab_set(L, t, L->base), fin);
-    lj_gc_anybarriert(L, t);
-    if (!tvisnil(fin))
-      cd->marked |= LJ_GC_CDATA_FIN;
-    else
-      cd->marked &= ~LJ_GC_CDATA_FIN;
-  }
+  lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
   L->top = L->base+1;  /* Pass through the cdata object. */
   return 1;
 }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_init.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_init.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_init.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_init.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_io.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_io.c
similarity index 93%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_io.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_io.c
index 037aa28e562c514867dbcb5f1b26e20f6183bd2a..2aa834746e1f2f4771db0d0a528b426680fd35af 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_io.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_io.c
@@ -19,8 +19,10 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 #include "lj_ff.h"
 #include "lj_lib.h"
 
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
   IOFileUD *iof = io_file_new(L);
   iof->fp = fopen(fname, mode);
   if (iof->fp == NULL)
-    luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+    luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
   return iof;
 }
 
@@ -97,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
     int stat = -1;
 #if LJ_TARGET_POSIX
     stat = pclose(iof->fp);
-#elif LJ_TARGET_WINDOWS
+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
     stat = _pclose(iof->fp);
 #else
     lua_assert(0);
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
   MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
   char *buf;
   for (;;) {
-    buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    buf = lj_buf_tmp(L, m);
     if (fgets(buf+n, m-n, fp) == NULL) break;
     n += (MSize)strlen(buf+n);
     ok |= n;
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
 {
   MSize m, n;
   for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     n += (MSize)fread(buf+n, 1, m-n, fp);
     if (n != m) {
       setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
 static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
 {
   if (m) {
-    char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+    char *buf = lj_buf_tmp(L, m);
     MSize n = (MSize)fread(buf, 1, m, fp);
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     lj_gc_check(L);
@@ -230,19 +232,12 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
   cTValue *tv;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
-    if (tvisstr(tv)) {
-      MSize len = strV(tv)->len;
-      status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
-    } else if (tvisint(tv)) {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, intV(tv));
-      size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
-      status = status && (fwrite(p, 1, len, fp) == len);
-    } else if (tvisnum(tv)) {
-      status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
-    } else {
+    char buf[STRFMT_MAXBUF_NUM];
+    MSize len;
+    const char *p = lj_strfmt_wstrnum(buf, tv, &len);
+    if (!p)
       lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
-    }
+    status = status && (fwrite(p, 1, len, fp) == len);
   }
   if (LJ_52 && status) {
     L->top = L->base+1;
@@ -278,6 +273,15 @@ static int io_file_iter(lua_State *L)
   return n;
 }
 
+static int io_file_lines(lua_State *L)
+{
+  int n = (int)(L->top - L->base);
+  if (n > LJ_MAX_UPVAL)
+    lj_err_caller(L, LJ_ERR_UNPACK);
+  lua_pushcclosure(L, io_file_iter, n);
+  return 1;
+}
+
 /* -- I/O file methods ---------------------------------------------------- */
 
 #define LJLIB_MODULE_io_method
@@ -361,8 +365,7 @@ LJLIB_CF(io_method_setvbuf)
 LJLIB_CF(io_method_lines)
 {
   io_tofile(L);
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 
 LJLIB_CF(io_method___gc)
@@ -405,7 +408,7 @@ LJLIB_CF(io_open)
 
 LJLIB_CF(io_popen)
 {
-#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS
+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
   const char *fname = strdata(lj_lib_checkstr(L, 1));
   GCstr *s = lj_lib_optstr(L, 2);
   const char *mode = s ? strdata(s) : "r";
@@ -492,8 +495,7 @@ LJLIB_CF(io_lines)
   } else {  /* io.lines() iterates over stdin. */
     setudataV(L, L->base, IOSTDF_UD(L, GCROOT_IO_INPUT));
   }
-  lua_pushcclosure(L, io_file_iter, (int)(L->top - L->base));
-  return 1;
+  return io_file_lines(L);
 }
 
 LJLIB_CF(io_type)
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_jit.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_jit.c
similarity index 82%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_jit.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_jit.c
index 96525faf271199af60fdc69752f18b712f308114..178ef249df3feacefa9464f182ac9050ebe91053 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_jit.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_jit.c
@@ -10,13 +10,17 @@
 #include "lauxlib.h"
 #include "lualib.h"
 
-#include "lj_arch.h"
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_debug.h"
 #include "lj_str.h"
 #include "lj_tab.h"
+#include "lj_state.h"
 #include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
 #if LJ_HASJIT
 #include "lj_ir.h"
 #include "lj_jit.h"
@@ -24,6 +28,7 @@
 #include "lj_iropt.h"
 #include "lj_target.h"
 #endif
+#include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_vmevent.h"
@@ -279,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
 /* Names of link types. ORDER LJ_TRLINK */
 static const char *const jit_trlinkname[] = {
   "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
-  "interpreter", "return"
+  "interpreter", "return", "stitch"
 };
 
 /* local info = jit.util.traceinfo(tr) */
@@ -332,6 +337,13 @@ LJLIB_CF(jit_util_tracek)
       slot = ir->op2;
       ir = &T->ir[ir->op1];
     }
+#if LJ_HASFFI
+    if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
+      ptrdiff_t oldtop = savestack(L, L->top);
+      luaopen_ffi(L);  /* Load FFI library on-demand. */
+      L->top = restorestack(L, oldtop);
+    }
+#endif
     lj_ir_kvalue(L, L->top-2, ir);
     setintV(L->top-1, (int32_t)irt_type(ir->t));
     if (slot == -1)
@@ -416,6 +428,12 @@ LJLIB_CF(jit_util_ircalladdr)
 
 #include "lj_libdef.h"
 
+static int luaopen_jit_util(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_util);
+  return 1;
+}
+
 /* -- jit.opt module ------------------------------------------------------ */
 
 #if LJ_HASJIT
@@ -513,6 +531,104 @@ LJLIB_CF(jit_opt_start)
 
 #endif
 
+/* -- jit.profile module -------------------------------------------------- */
+
+#if LJ_HASPROFILE
+
+#define LJLIB_MODULE_jit_profile
+
+/* Not loaded by default, use: local profile = require("jit.profile") */
+
+static const char KEY_PROFILE_THREAD = 't';
+static const char KEY_PROFILE_FUNC = 'f';
+
+static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+				 int vmstate)
+{
+  TValue key;
+  cTValue *tv;
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  tv = lj_tab_get(L, tabV(registry(L)), &key);
+  if (tvisfunc(tv)) {
+    char vmst = (char)vmstate;
+    int status;
+    setfuncV(L2, L2->top++, funcV(tv));
+    setthreadV(L2, L2->top++, L);
+    setintV(L2->top++, samples);
+    setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
+    status = lua_pcall(L2, 3, 0, 0);  /* callback(thread, samples, vmstate) */
+    if (status) {
+      if (G(L2)->panic) G(L2)->panic(L2);
+      exit(EXIT_FAILURE);
+    }
+    lj_trace_abort(G(L2));
+  }
+}
+
+/* profile.start(mode, cb) */
+LJLIB_CF(jit_profile_start)
+{
+  GCtab *registry = tabV(registry(L));
+  GCstr *mode = lj_lib_optstr(L, 1);
+  GCfunc *func = lj_lib_checkfunc(L, 2);
+  lua_State *L2 = lua_newthread(L);  /* Thread that runs profiler callback. */
+  TValue key;
+  /* Anchor thread and function in registry. */
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setthreadV(L, lj_tab_set(L, registry, &key), L2);
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setfuncV(L, lj_tab_set(L, registry, &key), func);
+  lj_gc_anybarriert(L, registry);
+  luaJIT_profile_start(L, mode ? strdata(mode) : "",
+		       (luaJIT_profile_callback)jit_profile_callback, L2);
+  return 0;
+}
+
+/* profile.stop() */
+LJLIB_CF(jit_profile_stop)
+{
+  GCtab *registry;
+  TValue key;
+  luaJIT_profile_stop(L);
+  registry = tabV(registry(L));
+  setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
+  setnilV(lj_tab_set(L, registry, &key));
+  setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
+  setnilV(lj_tab_set(L, registry, &key));
+  lj_gc_anybarriert(L, registry);
+  return 0;
+}
+
+/* dump = profile.dumpstack([thread,] fmt, depth) */
+LJLIB_CF(jit_profile_dumpstack)
+{
+  lua_State *L2 = L;
+  int arg = 0;
+  size_t len;
+  int depth;
+  GCstr *fmt;
+  const char *p;
+  if (L->top > L->base && tvisthread(L->base)) {
+    L2 = threadV(L->base);
+    arg = 1;
+  }
+  fmt = lj_lib_checkstr(L, arg+1);
+  depth = lj_lib_checkint(L, arg+2);
+  p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
+  lua_pushlstring(L, p, len);
+  return 1;
+}
+
+#include "lj_libdef.h"
+
+static int luaopen_jit_profile(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, jit_profile);
+  return 1;
+}
+
+#endif
+
 /* -- JIT compiler initialization ----------------------------------------- */
 
 #if LJ_HASJIT
@@ -538,23 +654,17 @@ static uint32_t jit_cpudetect(lua_State *L)
   uint32_t features[4];
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
 #if !LJ_HASJIT
-#define JIT_F_CMOV	1
 #define JIT_F_SSE2	2
 #endif
-    flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
 #if LJ_HASJIT
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
-      if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
-	flags |= JIT_F_P4;  /* Currently unused. */
-      else if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
+      if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
 	flags |= JIT_F_LEA_AGU;
     } else if (vendor[2] == 0x444d4163) {  /* AMD. */
       uint32_t fam = (features[0] & 0x0ff00f00);
-      if (fam == 0x00000f00)  /* K8. */
-	flags |= JIT_F_SPLIT_XMM;
       if (fam >= 0x00000f00)  /* K8, K10. */
 	flags |= JIT_F_PREFER_IMUL;
     }
@@ -562,14 +672,8 @@ static uint32_t jit_cpudetect(lua_State *L)
   }
   /* Check for required instruction set support on x86 (unnecessary on x64). */
 #if LJ_TARGET_X86
-#if !defined(LUAJIT_CPU_NOCMOV)
-  if (!(flags & JIT_F_CMOV))
-    luaL_error(L, "CPU not supported");
-#endif
-#if defined(LUAJIT_CPU_SSE2)
   if (!(flags & JIT_F_SSE2))
-    luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-#endif
+    luaL_error(L, "CPU with SSE2 required");
 #endif
 #elif LJ_TARGET_ARM
 #if LJ_HASJIT
@@ -591,6 +695,8 @@ static uint32_t jit_cpudetect(lua_State *L)
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
 #endif
+#elif LJ_TARGET_ARM64
+  /* No optional CPU features to detect (for now). */
 #elif LJ_TARGET_PPC
 #if LJ_HASJIT
 #if LJ_ARCH_SQRT
@@ -600,8 +706,6 @@ static uint32_t jit_cpudetect(lua_State *L)
   flags |= JIT_F_ROUND;
 #endif
 #endif
-#elif LJ_TARGET_PPCSPE
-  /* Nothing to do. */
 #elif LJ_TARGET_MIPS
 #if LJ_HASJIT
   /* Compile-time MIPS CPU detection. */
@@ -631,11 +735,7 @@ static void jit_init(lua_State *L)
   uint32_t flags = jit_cpudetect(L);
 #if LJ_HASJIT
   jit_State *J = L2J(L);
-#if LJ_TARGET_X86
-  /* Silently turn off the JIT compiler on CPUs without SSE2. */
-  if ((flags & JIT_F_SSE2))
-#endif
-    J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+  J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
   memcpy(J->param, jit_param_default, sizeof(J->param));
   lj_dispatch_update(G(L));
 #else
@@ -645,19 +745,23 @@ static void jit_init(lua_State *L)
 
 LUALIB_API int luaopen_jit(lua_State *L)
 {
+  jit_init(L);
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
   lua_pushliteral(L, LUAJIT_VERSION);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
+#if LJ_HASPROFILE
+  lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
+		tabref(L->env));
+#endif
 #ifndef LUAJIT_DISABLE_JITUTIL
-  LJ_LIB_REG(L, "jit.util", jit_util);
+  lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
 #endif
 #if LJ_HASJIT
   LJ_LIB_REG(L, "jit.opt", jit_opt);
 #endif
   L->top -= 2;
-  jit_init(L);
   return 1;
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_math.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_math.c
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_math.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_math.c
index 40f29142d7051d6bc750029973175396e4624d7f..78838fcd6c00dbf8210b82b4343deeb0a7dddfce 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_math.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_math.c
@@ -47,12 +47,6 @@ LJLIB_ASM_(math_tanh)		LJLIB_REC(math_htrig IRCALL_tanh)
 LJLIB_ASM_(math_frexp)
 LJLIB_ASM_(math_modf)		LJLIB_REC(.)
 
-LJLIB_PUSH(57.29577951308232)
-LJLIB_ASM_(math_deg)		LJLIB_REC(math_degrad)
-
-LJLIB_PUSH(0.017453292519943295)
-LJLIB_ASM_(math_rad)		LJLIB_REC(math_degrad)
-
 LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 {
   double x = lj_lib_checknum(L, 1);
@@ -63,12 +57,15 @@ LJLIB_ASM(math_log)		LJLIB_REC(math_log)
 #else
     x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
 #endif
-    setnumV(L->base-1, x*y);  /* Do NOT join the expression to x / y. */
+    setnumV(L->base-1-LJ_FR2, x*y);  /* Do NOT join the expression to x / y. */
     return FFH_RES(1);
   }
   return FFH_RETRY;
 }
 
+LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
+LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
+
 LJLIB_ASM(math_atan2)		LJLIB_REC(.)
 {
   lj_lib_checknum(L, 1);
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_os.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_os.c
similarity index 91%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_os.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_os.c
index bb5a141e3cd1ba01235ebd392932ec413711563e..7b5873a518a4f13457a5a66937116e349a35e2d8 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_os.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_os.c
@@ -17,7 +17,10 @@
 #include "lualib.h"
 
 #include "lj_obj.h"
+#include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
 #include "lj_lib.h"
 
 #if LJ_TARGET_POSIX
@@ -188,7 +191,7 @@ LJLIB_CF(os_date)
 #endif
   }
   if (stm == NULL) {  /* Invalid date? */
-    setnilV(L->top-1);
+    setnilV(L->top++);
   } else if (strcmp(s, "*t") == 0) {
     lua_createtable(L, 0, 9);  /* 9 = number of fields */
     setfield(L, "sec", stm->tm_sec);
@@ -200,23 +203,25 @@ LJLIB_CF(os_date)
     setfield(L, "wday", stm->tm_wday+1);
     setfield(L, "yday", stm->tm_yday+1);
     setboolfield(L, "isdst", stm->tm_isdst);
-  } else {
-    char cc[3];
-    luaL_Buffer b;
-    cc[0] = '%'; cc[2] = '\0';
-    luaL_buffinit(L, &b);
-    for (; *s; s++) {
-      if (*s != '%' || *(s + 1) == '\0') {  /* No conversion specifier? */
-	luaL_addchar(&b, *s);
-      } else {
-	size_t reslen;
-	char buff[200];  /* Should be big enough for any conversion result. */
-	cc[1] = *(++s);
-	reslen = strftime(buff, sizeof(buff), cc, stm);
-	luaL_addlstring(&b, buff, reslen);
+  } else if (*s) {
+    SBuf *sb = &G(L)->tmpbuf;
+    MSize sz = 0;
+    const char *q;
+    for (q = s; *q; q++)
+      sz += (*q == '%') ? 30 : 1;  /* Overflow doesn't matter. */
+    setsbufL(sb, L);
+    for (;;) {
+      char *buf = lj_buf_need(sb, sz);
+      size_t len = strftime(buf, sbufsz(sb), s, stm);
+      if (len) {
+	setstrV(L, L->top++, lj_str_new(L, buf, len));
+	lj_gc_check(L);
+	break;
       }
+      sz += (sz|1);
     }
-    luaL_pushresult(&b);
+  } else {
+    setstrV(L, L->top++, &G(L)->strempty);
   }
   return 1;
 }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_package.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_package.c
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_package.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_package.c
index a668193b1c93afb944c1824d52e9af39e72cd378..f8d69bfea82f698fceffac9bf43663fdf8da5673 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_package.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_package.c
@@ -96,9 +96,17 @@ static void setprogdir(lua_State *L)
 static void pusherror(lua_State *L)
 {
   DWORD error = GetLastError();
+#if LJ_TARGET_XBOXONE
+  wchar_t wbuffer[128];
+  char buffer[128*2];
+  if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
+      WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
+#else
   char buffer[128];
   if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
       NULL, error, 0, buffer, sizeof(buffer), NULL))
+#endif
     lua_pushstring(L, buffer);
   else
     lua_pushfstring(L, "system error %d\n", error);
@@ -111,7 +119,7 @@ static void ll_unloadlib(void *lib)
 
 static void *ll_load(lua_State *L, const char *path, int gl)
 {
-  HINSTANCE lib = LoadLibraryA(path);
+  HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
   if (lib == NULL) pusherror(L);
   UNUSED(gl);
   return lib;
@@ -226,7 +234,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
       const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
       lua_pop(L, 1);
       if (bcdata) {
-	if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+	if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
 	  return PACKAGE_ERR_LOAD;
 	return 0;
       }
@@ -421,7 +429,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
   if (lua_isnil(L, -1)) {  /* Not found? */
     const char *bcname = mksymname(L, name, SYMPREFIX_BC);
     const char *bcdata = ll_bcsym(NULL, bcname);
-    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+    if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
       lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
   }
   return 1;
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_string.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_string.c
similarity index 62%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_string.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_string.c
index c6168edbb67ad9e0d27a59cb7bdc7c124267896c..a6d9986ac6370afc900198eddb7b419324c55603 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_string.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_string.c
@@ -6,8 +6,6 @@
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 
-#include <stdio.h>
-
 #define lib_string_c
 #define LUA_LIB
 
@@ -18,6 +16,7 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
@@ -25,17 +24,19 @@
 #include "lj_ff.h"
 #include "lj_bcdump.h"
 #include "lj_char.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 
 /* ------------------------------------------------------------------------ */
 
 #define LJLIB_MODULE_string
 
-LJLIB_ASM(string_len)		LJLIB_REC(.)
-{
-  lj_lib_checkstr(L, 1);
-  return FFH_RETRY;
-}
+LJLIB_LUA(string_len) /*
+  function(s)
+    CHECK_str(s)
+    return #s
+  end
+*/
 
 LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
 {
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte)		LJLIB_REC(string_range 0)
   lj_state_checkstack(L, (MSize)n);
   p = (const unsigned char *)strdata(s) + start;
   for (i = 0; i < n; i++)
-    setintV(L->base + i-1, p[i]);
+    setintV(L->base + i-1-LJ_FR2, p[i]);
   return FFH_RES(n);
 }
 
-LJLIB_ASM(string_char)
+LJLIB_ASM(string_char)		LJLIB_REC(.)
 {
   int i, nargs = (int)(L->top - L->base);
-  char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs);
+  char *buf = lj_buf_tmp(L, (MSize)nargs);
   for (i = 1; i <= nargs; i++) {
     int32_t k = lj_lib_checkint(L, i);
     if (!checku8(k))
       lj_err_arg(L, i, LJ_ERR_BADVAL);
     buf[i-1] = (char)k;
   }
-  setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+  setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
   return FFH_RES(1);
 }
 
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub)		LJLIB_REC(string_range 1)
   return FFH_RETRY;
 }
 
-LJLIB_ASM(string_rep)
+LJLIB_CF(string_rep)		LJLIB_REC(.)
 {
   GCstr *s = lj_lib_checkstr(L, 1);
-  int32_t k = lj_lib_checkint(L, 2);
+  int32_t rep = lj_lib_checkint(L, 2);
   GCstr *sep = lj_lib_optstr(L, 3);
-  int32_t len = (int32_t)s->len;
-  global_State *g = G(L);
-  int64_t tlen;
-  const char *src;
-  char *buf;
-  if (k <= 0) {
-  empty:
-    setstrV(L, L->base-1, &g->strempty);
-    return FFH_RES(1);
+  SBuf *sb = lj_buf_tmp_(L);
+  if (sep && rep > 1) {
+    GCstr *s2 = lj_buf_cat2str(L, sep, s);
+    lj_buf_reset(sb);
+    lj_buf_putstr(sb, s);
+    s = s2;
+    rep--;
   }
-  if (sep) {
-    tlen = (int64_t)len + sep->len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-    tlen *= k;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  } else {
-    tlen = (int64_t)k * len;
-    if (tlen > LJ_MAX_STR)
-      lj_err_caller(L, LJ_ERR_STROV);
-  }
-  if (tlen == 0) goto empty;
-  buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
-  src = strdata(s);
-  if (sep) {
-    tlen -= sep->len;  /* Ignore trailing separator. */
-    if (k > 1) {  /* Paste one string and one separator. */
-      int32_t i;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = strdata(sep); len = sep->len;
-      i = 0; while (i < len) *buf++ = src[i++];
-      src = g->tmpbuf.buf; len += s->len; k--;  /* Now copy that k-1 times. */
-    }
-  }
-  do {
-    int32_t i = 0;
-    do { *buf++ = src[i++]; } while (i < len);
-  } while (--k > 0);
-  setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
-  return FFH_RES(1);
+  sb = lj_buf_putstr_rep(sb, s, rep);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
+  return 1;
 }
 
-LJLIB_ASM(string_reverse)
+LJLIB_ASM(string_reverse)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
 {
-  GCstr *s = lj_lib_checkstr(L, 1);
-  lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+  lj_lib_checkstr(L, 1);
   return FFH_RETRY;
 }
-LJLIB_ASM_(string_lower)
-LJLIB_ASM_(string_upper)
+LJLIB_ASM_(string_lower)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
+LJLIB_ASM_(string_upper)  LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
 
 /* ------------------------------------------------------------------------ */
 
-static int writer_buf(lua_State *L, const void *p, size_t size, void *b)
+static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
 {
-  luaL_addlstring((luaL_Buffer *)b, (const char *)p, size);
+  lj_buf_putmem((SBuf *)sb, p, (MSize)size);
   UNUSED(L);
   return 0;
 }
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
 {
   GCfunc *fn = lj_lib_checkfunc(L, 1);
   int strip = L->base+1 < L->top && tvistruecond(L->base+1);
-  luaL_Buffer b;
+  SBuf *sb = lj_buf_tmp_(L);  /* Assumes lj_bcwrite() doesn't use tmpbuf. */
   L->top = L->base+1;
-  luaL_buffinit(L, &b);
-  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
+  if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
     lj_err_caller(L, LJ_ERR_STRDUMP);
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
 }
 
@@ -183,7 +154,6 @@ typedef struct MatchState {
 } MatchState;
 
 #define L_ESC		'%'
-#define SPECIALS	"^$*+?.([%-"
 
 static int check_capture(MatchState *ms, int l)
 {
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
   return s;
 }
 
-static const char *lmemfind(const char *s1, size_t l1,
-			    const char *s2, size_t l2)
-{
-  if (l2 == 0) {
-    return s1;  /* empty strings are everywhere */
-  } else if (l2 > l1) {
-    return NULL;  /* avoids a negative `l1' */
-  } else {
-    const char *init;  /* to search for a `*s2' inside `s1' */
-    l2--;  /* 1st char will be checked by `memchr' */
-    l1 = l1-l2;  /* `s2' cannot be found after that */
-    while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
-      init++;   /* 1st char is already checked */
-      if (memcmp(init, s2+1, l2) == 0) {
-	return init-1;
-      } else {  /* correct `l1' and `s1' to try again */
-	l1 -= (size_t)(init-s1);
-	s1 = init;
-      }
-    }
-    return NULL;  /* not found */
-  }
-}
-
 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
 {
   if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
   return nlevels;  /* number of strings pushed */
 }
 
-static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
-{
-  /* relative string position: negative means back from end */
-  if (pos < 0) pos += (ptrdiff_t)len + 1;
-  return (pos >= 0) ? pos : 0;
-}
-
 static int str_find_aux(lua_State *L, int find)
 {
-  size_t l1, l2;
-  const char *s = luaL_checklstring(L, 1, &l1);
-  const char *p = luaL_checklstring(L, 2, &l2);
-  ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
-  if (init < 0) {
-    init = 0;
-  } else if ((size_t)(init) > l1) {
+  GCstr *s = lj_lib_checkstr(L, 1);
+  GCstr *p = lj_lib_checkstr(L, 2);
+  int32_t start = lj_lib_optint(L, 3, 1);
+  MSize st;
+  if (start < 0) start += (int32_t)s->len; else start--;
+  if (start < 0) start = 0;
+  st = (MSize)start;
+  if (st > s->len) {
 #if LJ_52
     setnilV(L->top-1);
     return 1;
 #else
-    init = (ptrdiff_t)l1;
+    st = s->len;
 #endif
   }
-  if (find && (lua_toboolean(L, 4) ||  /* explicit request? */
-      strpbrk(p, SPECIALS) == NULL)) {  /* or no special characters? */
-    /* do a plain search */
-    const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
-    if (s2) {
-      lua_pushinteger(L, s2-s+1);
-      lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+  if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
+	       !lj_str_haspattern(p))) {  /* Search for fixed string. */
+    const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
+    if (q) {
+      setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
+      setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
       return 2;
     }
-  } else {
+  } else {  /* Search for pattern. */
     MatchState ms;
-    int anchor = (*p == '^') ? (p++, 1) : 0;
-    const char *s1=s+init;
+    const char *pstr = strdata(p);
+    const char *sstr = strdata(s) + st;
+    int anchor = 0;
+    if (*pstr == '^') { pstr++; anchor = 1; }
     ms.L = L;
-    ms.src_init = s;
-    ms.src_end = s+l1;
-    do {
-      const char *res;
+    ms.src_init = strdata(s);
+    ms.src_end = strdata(s) + s->len;
+    do {  /* Loop through string and try to match the pattern. */
+      const char *q;
       ms.level = ms.depth = 0;
-      if ((res=match(&ms, s1, p)) != NULL) {
+      q = match(&ms, sstr, pstr);
+      if (q) {
 	if (find) {
-	  lua_pushinteger(L, s1-s+1);  /* start */
-	  lua_pushinteger(L, res-s);   /* end */
-	  return push_captures(&ms, NULL, 0) + 2;
+	  setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
+	  setintV(L->top++, (int32_t)(q-strdata(s)));
+	  return push_captures(&ms, NULL, NULL) + 2;
 	} else {
-	  return push_captures(&ms, s1, res);
+	  return push_captures(&ms, sstr, q);
 	}
       }
-    } while (s1++ < ms.src_end && !anchor);
+    } while (sstr++ < ms.src_end && !anchor);
   }
-  lua_pushnil(L);  /* not found */
+  setnilV(L->top-1);  /* Not found. */
   return 1;
 }
 
-LJLIB_CF(string_find)
+LJLIB_CF(string_find)		LJLIB_REC(.)
 {
   return str_find_aux(L, 1);
 }
@@ -698,221 +640,91 @@ LJLIB_CF(string_gsub)
 
 /* ------------------------------------------------------------------------ */
 
-/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
-#define MAX_FMTITEM	512
-/* valid flags in a format specification */
-#define FMT_FLAGS	"-+ #0"
-/*
-** maximum size of each format specification (such as '%-099.99d')
-** (+10 accounts for %99.99x plus margin of error)
-*/
-#define MAX_FMTSPEC	(sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
-
-static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
-{
-  GCstr *str = lj_lib_checkstr(L, arg);
-  int32_t len = (int32_t)str->len;
-  const char *s = strdata(str);
-  luaL_addchar(b, '"');
-  while (len--) {
-    uint32_t c = uchar(*s);
-    if (c == '"' || c == '\\' || c == '\n') {
-      luaL_addchar(b, '\\');
-    } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
-      uint32_t d;
-      luaL_addchar(b, '\\');
-      if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
-	luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
-	goto tens;
-      } else if (c >= 10) {
-      tens:
-	d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
-      }
-      c += '0';
-    }
-    luaL_addchar(b, c);
-    s++;
-  }
-  luaL_addchar(b, '"');
-}
-
-static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
-{
-  const char *p = strfrmt;
-  while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++;  /* skip flags */
-  if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
-    lj_err_caller(L, LJ_ERR_STRFMTR);
-  if (lj_char_isdigit(uchar(*p))) p++;  /* skip width */
-  if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  if (*p == '.') {
-    p++;
-    if (lj_char_isdigit(uchar(*p))) p++;  /* skip precision */
-    if (lj_char_isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  }
-  if (lj_char_isdigit(uchar(*p)))
-    lj_err_caller(L, LJ_ERR_STRFMTW);
-  *(form++) = '%';
-  strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
-  form += p - strfrmt + 1;
-  *form = '\0';
-  return p;
-}
-
-static void addintlen(char *form)
-{
-  size_t l = strlen(form);
-  char spec = form[l - 1];
-  strcpy(form + l - 1, LUA_INTFRMLEN);
-  form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
-  form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
-}
-
-static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (LUA_INTFRM_T)intV(o);
-    else
-      return (LUA_INTFRM_T)numV(o);
-  }
-}
-
-static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
-{
-  if (sizeof(LUA_INTFRM_T) == 4) {
-    return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
-  } else {
-    cTValue *o;
-    lj_lib_checknumber(L, arg);
-    o = L->base+arg-1;
-    if (tvisint(o))
-      return (unsigned LUA_INTFRM_T)intV(o);
-    else if ((int32_t)o->u32.hi < 0)
-      return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
-    else
-      return (unsigned LUA_INTFRM_T)numV(o);
-  }
-}
-
-static GCstr *meta_tostring(lua_State *L, int arg)
+/* Emulate tostring() inline. */
+static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
 {
   TValue *o = L->base+arg-1;
   cTValue *mo;
   lua_assert(o < L->top);  /* Caller already checks for existence. */
   if (LJ_LIKELY(tvisstr(o)))
     return strV(o);
-  if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+  if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
     copyTV(L, L->top++, mo);
     copyTV(L, L->top++, o);
     lua_call(L, 1, 1);
-    L->top--;
-    if (tvisstr(L->top))
-      return strV(L->top);
-    o = L->base+arg-1;
-    copyTV(L, o, L->top);
-  }
-  if (tvisnumber(o)) {
-    return lj_str_fromnumber(L, o);
-  } else if (tvisnil(o)) {
-    return lj_str_newlit(L, "nil");
-  } else if (tvisfalse(o)) {
-    return lj_str_newlit(L, "false");
-  } else if (tvistrue(o)) {
-    return lj_str_newlit(L, "true");
-  } else {
-    if (tvisfunc(o) && isffunc(funcV(o)))
-      lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
-    else
-      lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
-    L->top--;
-    return strV(L->top);
+    copyTV(L, L->base+arg-1, --L->top);
+    return NULL;  /* Buffer may be overwritten, retry. */
   }
-}
-
-LJLIB_CF(string_format)
-{
-  int arg = 1, top = (int)(L->top - L->base);
-  GCstr *fmt = lj_lib_checkstr(L, arg);
-  const char *strfrmt = strdata(fmt);
-  const char *strfrmt_end = strfrmt + fmt->len;
-  luaL_Buffer b;
-  luaL_buffinit(L, &b);
-  while (strfrmt < strfrmt_end) {
-    if (*strfrmt != L_ESC) {
-      luaL_addchar(&b, *strfrmt++);
-    } else if (*++strfrmt == L_ESC) {
-      luaL_addchar(&b, *strfrmt++);  /* %% */
-    } else { /* format item */
-      char form[MAX_FMTSPEC];  /* to store the format (`%...') */
-      char buff[MAX_FMTITEM];  /* to store the formatted item */
+  return lj_strfmt_obj(L, o);
+}
+
+LJLIB_CF(string_format)		LJLIB_REC(.)
+{
+  int arg, top = (int)(L->top - L->base);
+  GCstr *fmt;
+  SBuf *sb;
+  FormatState fs;
+  SFormat sf;
+  int retry = 0;
+again:
+  arg = 1;
+  sb = lj_buf_tmp_(L);
+  fmt = lj_lib_checkstr(L, arg);
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    if (sf == STRFMT_LIT) {
+      lj_buf_putmem(sb, fs.str, fs.len);
+    } else if (sf == STRFMT_ERR) {
+      lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
+    } else {
       if (++arg > top)
 	luaL_argerror(L, arg, lj_obj_typename[0]);
-      strfrmt = scanformat(L, strfrmt, form);
-      switch (*strfrmt++) {
-      case 'c':
-	sprintf(buff, form, lj_lib_checkint(L, arg));
+      switch (STRFMT_TYPE(sf)) {
+      case STRFMT_INT:
+	if (tvisint(L->base+arg-1)) {
+	  int32_t k = intV(L->base+arg-1);
+	  if (sf == STRFMT_INT)
+	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
+	  else
+	    lj_strfmt_putfxint(sb, sf, k);
+	} else {
+	  lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+	}
 	break;
-      case 'd':  case 'i':
-	addintlen(form);
-	sprintf(buff, form, num2intfrm(L, arg));
+      case STRFMT_UINT:
+	if (tvisint(L->base+arg-1))
+	  lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
+	else
+	  lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
 	break;
-      case 'o':  case 'u':  case 'x':  case 'X':
-	addintlen(form);
-	sprintf(buff, form, num2uintfrm(L, arg));
+      case STRFMT_NUM:
+	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
 	break;
-      case 'e':  case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
-	TValue tv;
-	tv.n = lj_lib_checknum(L, arg);
-	if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
-	  /* Canonicalize output of non-finite values. */
-	  char *p, nbuf[LJ_STR_NUMBUF];
-	  size_t len = lj_str_bufnum(nbuf, &tv);
-	  if (strfrmt[-1] < 'a') {
-	    nbuf[len-3] = nbuf[len-3] - 0x20;
-	    nbuf[len-2] = nbuf[len-2] - 0x20;
-	    nbuf[len-1] = nbuf[len-1] - 0x20;
-	  }
-	  nbuf[len] = '\0';
-	  for (p = form; *p < 'A' && *p != '.'; p++) ;
-	  *p++ = 's'; *p = '\0';
-	  sprintf(buff, form, nbuf);
-	  break;
-	}
-	sprintf(buff, form, (double)tv.n);
+      case STRFMT_STR: {
+	GCstr *str = string_fmt_tostring(L, arg, retry);
+	if (str == NULL)
+	  retry = 1;
+	else if ((sf & STRFMT_T_QUOTED))
+	  lj_strfmt_putquoted(sb, str);  /* No formatting. */
+	else
+	  lj_strfmt_putfstr(sb, sf, str);
 	break;
 	}
-      case 'q':
-	addquoted(L, &b, arg);
-	continue;
-      case 'p':
-	lj_str_pushf(L, "%p", lua_topointer(L, arg));
-	luaL_addvalue(&b);
-	continue;
-      case 's': {
-	GCstr *str = meta_tostring(L, arg);
-	if (!strchr(form, '.') && str->len >= 100) {
-	  /* no precision and string is too long to be formatted;
-	     keep original string */
-	  setstrV(L, L->top++, str);
-	  luaL_addvalue(&b);
-	  continue;
-	}
-	sprintf(buff, form, strdata(str));
+      case STRFMT_CHAR:
+	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+	break;
+      case STRFMT_PTR:  /* No formatting. */
+	lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
 	break;
-	}
       default:
-	lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
+	lua_assert(0);
 	break;
       }
-      luaL_addlstring(&b, buff, strlen(buff));
     }
   }
-  luaL_pushresult(&b);
+  if (retry++ == 1) goto again;
+  setstrV(L, L->top-1, lj_buf_str(L, sb));
+  lj_gc_check(L);
   return 1;
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lib_table.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_table.c
similarity index 73%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lib_table.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_table.c
index fbfe863873b3febdb199393f15451d792358cdc7..56612aba983d85fa00e0dc7a8d9bf85ba41ac47a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lib_table.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lib_table.c
@@ -16,57 +16,43 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
+#include "lj_ff.h"
 #include "lj_lib.h"
 
 /* ------------------------------------------------------------------------ */
 
 #define LJLIB_MODULE_table
 
-LJLIB_CF(table_foreachi)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  MSize i, n = lj_tab_len(t);
-  for (i = 1; i <= n; i++) {
-    cTValue *val;
-    setfuncV(L, L->top, func);
-    setintV(L->top+1, i);
-    val = lj_tab_getint(t, (int32_t)i);
-    if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreachi) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for i=1,#t do
+      local r = f(i, t[i])
+      if r ~= nil then return r end
+    end
+  end
+*/
 
-LJLIB_CF(table_foreach)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  GCfunc *func = lj_lib_checkfunc(L, 2);
-  L->top = L->base+3;
-  setnilV(L->top-1);
-  while (lj_tab_next(L, t, L->top-1)) {
-    copyTV(L, L->top+2, L->top);
-    copyTV(L, L->top+1, L->top-1);
-    setfuncV(L, L->top, func);
-    L->top += 3;
-    lua_call(L, 2, 1);
-    if (!tvisnil(L->top-1))
-      return 1;
-    L->top--;
-  }
-  return 0;
-}
+LJLIB_LUA(table_foreach) /*
+  function(t, f)
+    CHECK_tab(t)
+    CHECK_func(f)
+    for k, v in PAIRS(t) do
+      local r = f(k, v)
+      if r ~= nil then return r end
+    end
+  end
+*/
 
-LJLIB_ASM(table_getn)		LJLIB_REC(.)
-{
-  lj_lib_checktab(L, 1);
-  return FFH_UNREACHABLE;
-}
+LJLIB_LUA(table_getn) /*
+  function(t)
+    CHECK_tab(t)
+    return #t
+  end
+*/
 
 LJLIB_CF(table_maxn)
 {
@@ -119,52 +105,47 @@ LJLIB_CF(table_insert)		LJLIB_REC(.)
   return 0;
 }
 
-LJLIB_CF(table_remove)		LJLIB_REC(.)
-{
-  GCtab *t = lj_lib_checktab(L, 1);
-  int32_t e = (int32_t)lj_tab_len(t);
-  int32_t pos = lj_lib_optint(L, 2, e);
-  if (!(1 <= pos && pos <= e))  /* Nothing to remove? */
-    return 0;
-  lua_rawgeti(L, 1, pos);  /* Get previous value. */
-  /* NOBARRIER: This just moves existing elements around. */
-  for (; pos < e; pos++) {
-    cTValue *src = lj_tab_getint(t, pos+1);
-    TValue *dst = lj_tab_setint(L, t, pos);
-    if (src) {
-      copyTV(L, dst, src);
-    } else {
-      setnilV(dst);
-    }
-  }
-  setnilV(lj_tab_setint(L, t, e));  /* Remove (last) value. */
-  return 1;  /* Return previous value. */
-}
+LJLIB_LUA(table_remove) /*
+  function(t, pos)
+    CHECK_tab(t)
+    local len = #t
+    if pos == nil then
+      if len ~= 0 then
+	local old = t[len]
+	t[len] = nil
+	return old
+      end
+    else
+      CHECK_int(pos)
+      if pos >= 1 and pos <= len then
+	local old = t[pos]
+	for i=pos+1,len do
+	  t[i-1] = t[i]
+	end
+	t[len] = nil
+	return old
+      end
+    end
+  end
+*/
 
-LJLIB_CF(table_concat)
+LJLIB_CF(table_concat)		LJLIB_REC(.)
 {
-  luaL_Buffer b;
   GCtab *t = lj_lib_checktab(L, 1);
   GCstr *sep = lj_lib_optstr(L, 2);
-  MSize seplen = sep ? sep->len : 0;
   int32_t i = lj_lib_optint(L, 3, 1);
   int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
 	      lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
-  luaL_buffinit(L, &b);
-  if (i <= e) {
-    for (;;) {
-      cTValue *o;
-      lua_rawgeti(L, 1, i);
-      o = L->top-1;
-      if (!(tvisstr(o) || tvisnumber(o)))
-	lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
-      luaL_addvalue(&b);
-      if (i++ == e) break;
-      if (seplen)
-	luaL_addlstring(&b, strdata(sep), seplen);
-    }
+  SBuf *sb = lj_buf_tmp_(L);
+  SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+  if (LJ_UNLIKELY(!sbx)) {  /* Error: bad element type. */
+    int32_t idx = (int32_t)(intptr_t)sbufP(sb);
+    cTValue *o = lj_tab_getint(t, idx);
+    lj_err_callerv(L, LJ_ERR_TABCAT,
+		   lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
   }
-  luaL_pushresult(&b);
+  setstrV(L, L->top-1, lj_buf_str(L, sbx));
+  lj_gc_check(L);
   return 1;
 }
 
@@ -284,6 +265,30 @@ LJLIB_CF(table_pack)
 }
 #endif
 
+LJLIB_NOREG LJLIB_CF(table_new)		LJLIB_REC(.)
+{
+  int32_t a = lj_lib_checkint(L, 1);
+  int32_t h = lj_lib_checkint(L, 2);
+  lua_createtable(L, a, h);
+  return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_clear)	LJLIB_REC(.)
+{
+  lj_tab_clear(lj_lib_checktab(L, 1));
+  return 0;
+}
+
+static int luaopen_table_new(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
+}
+
+static int luaopen_table_clear(lua_State *L)
+{
+  return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
+}
+
 /* ------------------------------------------------------------------------ */
 
 #include "lj_libdef.h"
@@ -295,6 +300,8 @@ LUALIB_API int luaopen_table(lua_State *L)
   lua_getglobal(L, "unpack");
   lua_setfield(L, -2, "unpack");
 #endif
+  lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
+  lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
   return 1;
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj.supp b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj.supp
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj.supp
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj.supp
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_alloc.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_alloc.c
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_alloc.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_alloc.c
index 7c7ec678726c11384afee9029010ab7a7f5207e3..0aad826d36175a6636468e3e93d85f52d4be69b6 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_alloc.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_alloc.c
@@ -77,7 +77,7 @@
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 
 /* Undocumented, but hey, that's what we all love so much about Windows. */
 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
@@ -174,8 +174,10 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
 #endif
 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
 
-#if LJ_64
-/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
+#if LJ_64 && !LJ_GC64
+/* 64 bit mode with 32 bit pointers needs special support for allocating
+** memory in the lower 2GB.
+*/
 
 #if defined(MAP_32BIT)
 
@@ -258,7 +260,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
 
 #else
 
-/* 32 bit mode is easy. */
+/* 32 bit mode and GC64 mode is easy. */
 static LJ_AINLINE void *CALL_MMAP(size_t size)
 {
   int olderr = errno;
@@ -294,7 +296,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_MAYMOVE	1
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #else
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_alloc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_alloc.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_alloc.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_alloc.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_api.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_api.c
similarity index 92%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_api.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_api.c
index 3bedb39fd70a708d701d3d7e1e1105c5f68fb746..1f09284f991471f6e00b7ddbc7572f8e86a8f02c 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_api.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_api.c
@@ -24,6 +24,7 @@
 #include "lj_trace.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 /* -- Common helper functions --------------------------------------------- */
 
@@ -188,7 +189,7 @@ LUA_API int lua_type(lua_State *L, int idx)
   cTValue *o = index2adr(L, idx);
   if (tvisnumber(o)) {
     return LUA_TNUMBER;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o)) {
     return LUA_TLIGHTUSERDATA;
 #endif
@@ -268,7 +269,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     return 0;
   } else if (tvispri(o1)) {
     return o1 != niltv(L) && o2 != niltv(L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   } else if (tvislightud(o1)) {
     return o1->u64 == o2->u64;
 #endif
@@ -283,8 +284,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
     } else {
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
   }
 }
@@ -306,8 +307,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
     } else {
       L->top = base+2;
       lj_vm_call(L, base, 1+1);
-      L->top -= 2;
-      return tvistruecond(L->top+1);
+      L->top -= 2+LJ_FR2;
+      return tvistruecond(L->top+1+LJ_FR2);
     }
   }
 }
@@ -434,7 +435,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
   } else {
     if (len != NULL) *len = 0;
@@ -453,7 +454,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +476,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
   } else if (tvisnumber(o)) {
     lj_gc_check(L);
     o = index2adr(L, idx);  /* GC may move the stack. */
-    s = lj_str_fromnumber(L, o);
+    s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
   } else {
     lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +508,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
   } else if (tvisudata(o)) {
     return udataV(o)->len;
   } else if (tvisnumber(o)) {
-    GCstr *s = lj_str_fromnumber(L, o);
+    GCstr *s = lj_strfmt_number(L, o);
     setstrV(L, o, s);
     return s->len;
   } else {
@@ -545,17 +546,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
 
 LUA_API const void *lua_topointer(lua_State *L, int idx)
 {
-  cTValue *o = index2adr(L, idx);
-  if (tvisudata(o))
-    return uddata(udataV(o));
-  else if (tvislightud(o))
-    return lightudV(o);
-  else if (tviscdata(o))
-    return cdataptr(cdataV(o));
-  else if (tvisgcv(o))
-    return gcV(o);
-  else
-    return NULL;
+  return lj_obj_ptr(index2adr(L, idx));
 }
 
 /* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +597,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
 				     va_list argp)
 {
   lj_gc_check(L);
-  return lj_str_pushvf(L, fmt, argp);
+  return lj_strfmt_pushvf(L, fmt, argp);
 }
 
 LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +606,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
   va_list argp;
   lj_gc_check(L);
   va_start(argp, fmt);
-  ret = lj_str_pushvf(L, fmt, argp);
+  ret = lj_strfmt_pushvf(L, fmt, argp);
   va_end(argp);
   return ret;
 }
@@ -649,10 +640,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
 
 LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
 {
-  GCtab *t;
   lj_gc_check(L);
-  t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
-  settabV(L, L->top, t);
+  settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
   incr_top(L);
 }
 
@@ -715,8 +704,8 @@ LUA_API void lua_concat(lua_State *L, int n)
       n -= (int)(L->top - top);
       L->top = top+2;
       lj_vm_call(L, top, 1+1);
-      L->top--;
-      copyTV(L, L->top-1, L->top);
+      L->top -= 1+LJ_FR2;
+      copyTV(L, L->top-1, L->top+LJ_FR2);
     } while (--n > 0);
   } else if (n == 0) {  /* Push empty string. */
     setstrV(L, L->top, &G(L)->strempty);
@@ -735,8 +724,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
   if (v == NULL) {
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   copyTV(L, L->top-1, v);
 }
@@ -751,8 +740,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
   if (v == NULL) {
     L->top += 2;
     lj_vm_call(L, L->top-2, 1+1);
-    L->top -= 2;
-    v = L->top+1;
+    L->top -= 2+LJ_FR2;
+    v = L->top+1+LJ_FR2;
   }
   copyTV(L, L->top, v);
   incr_top(L);
@@ -893,13 +882,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
   o = lj_meta_tset(L, t, L->top-2);
   if (o) {
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top-1);
     L->top -= 2;
+    copyTV(L, o, L->top+1);
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 3;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 3+LJ_FR2;
   }
 }
 
@@ -913,14 +903,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
   setstrV(L, &key, lj_str_newz(L, k));
   o = lj_meta_tset(L, t, &key);
   if (o) {
-    L->top--;
     /* NOBARRIER: lj_meta_tset ensures the table is not black. */
-    copyTV(L, o, L->top);
+    copyTV(L, o, --L->top);
   } else {
-    L->top += 3;
-    copyTV(L, L->top-1, L->top-6);
-    lj_vm_call(L, L->top-3, 0+1);
-    L->top -= 2;
+    TValue *base = L->top;
+    copyTV(L, base+2, base-3-2*LJ_FR2);
+    L->top = base+3;
+    lj_vm_call(L, base, 0+1);
+    L->top -= 2+LJ_FR2;
   }
 }
 
@@ -1027,11 +1017,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
 
 /* -- Calls --------------------------------------------------------------- */
 
+#if LJ_FR2
+static TValue *api_call_base(lua_State *L, int nargs)
+{
+  TValue *o = L->top, *base = o - nargs;
+  L->top = o+1;
+  for (; o > base; o--) copyTV(L, o, o-1);
+  setnilV(o);
+  return o+1;
+}
+#else
+#define api_call_base(L, nargs)	(L->top - (nargs))
+#endif
+
 LUA_API void lua_call(lua_State *L, int nargs, int nresults)
 {
   api_check(L, L->status == 0 || L->status == LUA_ERRERR);
   api_checknelems(L, nargs+1);
-  lj_vm_call(L, L->top - nargs, nresults+1);
+  lj_vm_call(L, api_call_base(L, nargs), nresults+1);
 }
 
 LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1049,7 +1052,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
     api_checkvalidindex(L, o);
     ef = savestack(L, o);
   }
-  status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+  status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
   if (status) hook_restore(g, oldh);
   return status;
 }
@@ -1057,12 +1060,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
 static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
 {
   GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+  TValue *top = L->top;
   fn->c.f = func;
-  setfuncV(L, L->top, fn);
-  setlightudV(L->top+1, checklightudptr(L, ud));
+  setfuncV(L, top++, fn);
+  if (LJ_FR2) setnilV(top++);
+  setlightudV(top++, checklightudptr(L, ud));
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
-  L->top += 2;
-  return L->top-1;  /* Now call the newly allocated C function. */
+  L->top = top;
+  return top-1;  /* Now call the newly allocated C function. */
 }
 
 LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1079,10 +1084,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
 LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
 {
   if (luaL_getmetafield(L, idx, field)) {
-    TValue *base = L->top--;
-    copyTV(L, base, index2adr(L, idx));
-    L->top = base+1;
-    lj_vm_call(L, base, 1+1);
+    TValue *top = L->top--;
+    if (LJ_FR2) setnilV(top++);
+    copyTV(L, top++, index2adr(L, idx));
+    L->top = top;
+    lj_vm_call(L, top-1, 1+1);
     return 1;
   }
   return 0;
@@ -1109,12 +1115,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
     } else {  /* Yield from hook: add a pseudo-frame. */
       TValue *top = L->top;
       hook_leave(g);
-      top->u64 = cframe_multres(cf);
-      setcont(top+1, lj_cont_hook);
-      setframe_pc(top+1, cframe_pc(cf)-1);
-      setframe_gc(top+2, obj2gco(L));
-      setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
-      L->top = L->base = top+3;
+      (top++)->u64 = cframe_multres(cf);
+      setcont(top, lj_cont_hook);
+      if (LJ_FR2) top++;
+      setframe_pc(top, cframe_pc(cf)-1);
+      if (LJ_FR2) top++;
+      setframe_gc(top, obj2gco(L), LJ_TTHREAD);
+      setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+      L->top = L->base = top+1;
 #if LJ_TARGET_X64
       lj_err_throw(L, LUA_YIELD);
 #else
@@ -1131,7 +1139,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
 LUA_API int lua_resume(lua_State *L, int nargs)
 {
   if (L->cframe == NULL && L->status <= LUA_YIELD)
-    return lj_vm_resume(L, L->top - nargs, 0, 0);
+    return lj_vm_resume(L,
+      L->status == 0 ? api_call_base(L, nargs) : L->top - nargs,
+      0, 0);
   L->top = L->base;
   setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
   incr_top(L);
@@ -1161,7 +1171,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
     res = (int)(g->gc.total & 0x3ff);
     break;
   case LUA_GCSTEP: {
-    MSize a = (MSize)data << 10;
+    GCSize a = (GCSize)data << 10;
     g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
     while (g->gc.total >= g->gc.threshold)
       if (lj_gc_step(L) > 0) {
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_arch.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_arch.h
similarity index 79%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_arch.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_arch.h
index 20d1eb1183ed11fa96e011edf3b3b4054e3be0dc..6342fbbf83d4280b52072ab7f93ce6030f722508 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_arch.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_arch.h
@@ -19,10 +19,10 @@
 #define LUAJIT_ARCH_x64		2
 #define LUAJIT_ARCH_ARM		3
 #define LUAJIT_ARCH_arm		3
-#define LUAJIT_ARCH_PPC		4
-#define LUAJIT_ARCH_ppc		4
-#define LUAJIT_ARCH_PPCSPE	5
-#define LUAJIT_ARCH_ppcspe	5
+#define LUAJIT_ARCH_ARM64	4
+#define LUAJIT_ARCH_arm64	4
+#define LUAJIT_ARCH_PPC		5
+#define LUAJIT_ARCH_ppc		5
 #define LUAJIT_ARCH_MIPS	6
 #define LUAJIT_ARCH_mips	6
 
@@ -43,12 +43,10 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
+#elif defined(__aarch64__)
+#define LUAJIT_TARGET	LUAJIT_ARCH_ARM64
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
-#ifdef __NO_FPRS__
-#define LUAJIT_TARGET	LUAJIT_ARCH_PPCSPE
-#else
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
-#endif
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS
 #else
@@ -96,7 +94,7 @@
 #define LJ_TARGET_WINDOWS	(LUAJIT_OS == LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_LINUX		(LUAJIT_OS == LUAJIT_OS_LINUX)
 #define LJ_TARGET_OSX		(LUAJIT_OS == LUAJIT_OS_OSX)
-#define LJ_TARGET_IOS		(LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_IOS		(LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
 #define LJ_TARGET_POSIX		(LUAJIT_OS > LUAJIT_OS_WINDOWS)
 #define LJ_TARGET_DLOPEN	LJ_TARGET_POSIX
 
@@ -122,6 +120,12 @@
 #define LJ_TARGET_CONSOLE	1
 #endif
 
+#ifdef _DURANGO
+#define LJ_TARGET_XBOXONE	1
+#define LJ_TARGET_CONSOLE	1
+#define LJ_TARGET_GC64		1
+#endif
+
 #define LJ_NUMMODE_SINGLE	0	/* Single-number mode only. */
 #define LJ_NUMMODE_SINGLE_DUAL	1	/* Default to single-number mode. */
 #define LJ_NUMMODE_DUAL		2	/* Dual-number mode only. */
@@ -160,6 +164,9 @@
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
+#ifdef LUAJIT_ENABLE_GC64
+#define LJ_TARGET_GC64		1
+#endif
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
 
@@ -193,15 +200,45 @@
 #define LJ_ARCH_VERSION		50
 #endif
 
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
+
+#define LJ_ARCH_NAME		"arm64"
+#define LJ_ARCH_BITS		64
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#define LJ_TARGET_ARM64		1
+#define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+#define LJ_ARCH_NOJIT		1	/* NYI */
+
+#define LJ_ARCH_VERSION		80
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
-#define LJ_ARCH_NAME		"ppc"
+#ifndef LJ_ARCH_ENDIAN
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#define LJ_ARCH_ENDIAN		LUAJIT_LE
+#else
+#define LJ_ARCH_ENDIAN		LUAJIT_BE
+#endif
+#endif
+
 #if _LP64
 #define LJ_ARCH_BITS		64
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_ARCH_NAME		"ppc64le"
+#else
+#define LJ_ARCH_NAME		"ppc64"
+#endif
 #else
 #define LJ_ARCH_BITS		32
+#define LJ_ARCH_NAME		"ppc"
 #endif
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
+
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
@@ -210,6 +247,15 @@
 #define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
 
+#if LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC32ON64	1
+#define LJ_ARCH_NOFFI		1
+#elif LJ_ARCH_BITS == 64
+#define LJ_ARCH_PPC64		1
+#define LJ_TARGET_GC64		1
+#define LJ_ARCH_NOJIT		1	/* NYI */
+#endif
+
 #if _ARCH_PWR7
 #define LJ_ARCH_VERSION		70
 #elif _ARCH_PWR6
@@ -223,10 +269,6 @@
 #else
 #define LJ_ARCH_VERSION		0
 #endif
-#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
-#define LJ_ARCH_PPC64		1
-#define LJ_ARCH_NOFFI		1
-#endif
 #if _ARCH_PPCSQ
 #define LJ_ARCH_SQRT		1
 #endif
@@ -240,25 +282,6 @@
 #define LJ_ARCH_XENON		1
 #endif
 
-#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
-
-#define LJ_ARCH_NAME		"ppcspe"
-#define LJ_ARCH_BITS		32
-#define LJ_ARCH_ENDIAN		LUAJIT_BE
-#ifndef LJ_ABI_SOFTFP
-#define LJ_ABI_SOFTFP		1
-#endif
-#define LJ_ABI_EABI		1
-#define LJ_TARGET_PPCSPE	1
-#define LJ_TARGET_EHRETREG	3
-#define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
-#define LJ_TARGET_MASKSHIFT	0
-#define LJ_TARGET_MASKROT	1
-#define LJ_TARGET_UNIFYROT	1	/* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE
-#define LJ_ARCH_NOFFI		1	/* NYI: comparisons, calls. */
-#define LJ_ARCH_NOJIT		1
-
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -305,6 +328,16 @@
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
 #error "Need at least GCC 4.2 or newer"
 #endif
+#elif LJ_TARGET_ARM64
+#if __clang__
+#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+#error "Need at least GCC 4.8 or newer"
+#endif
+#endif
 #elif !LJ_TARGET_PS3
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #error "Need at least GCC 4.3 or newer"
@@ -328,15 +361,25 @@
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
 #endif
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+#if defined(__AARCH64EB__)
+#error "No support for big-endian ARM64"
+#endif
+#if defined(_ILP32)
+#error "No support for ILP32 model on ARM64"
+#endif
+#elif LJ_TARGET_PPC
 #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
 #error "No support for PowerPC CPUs without double-precision FPU"
 #endif
-#if defined(_LITTLE_ENDIAN)
-#error "No support for little-endian PowerPC"
+#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
+#error "No support for little-endian PPC32"
 #endif
-#if defined(_LP64)
-#error "No support for PowerPC 64 bit mode"
+#if LJ_ARCH_PPC64
+#error "No support for PowerPC 64 bit mode (yet)"
+#endif
+#ifdef __NO_FPRS__
+#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
 #endif
 #elif LJ_TARGET_MIPS
 #if defined(__mips_soft_float)
@@ -369,8 +412,22 @@
 #endif
 #endif
 
+/* 64 bit GC references. */
+#if LJ_TARGET_GC64
+#define LJ_GC64			1
+#else
+#define LJ_GC64			0
+#endif
+
+/* 2-slot frame info. */
+#if LJ_GC64
+#define LJ_FR2			1
+#else
+#define LJ_FR2			0
+#endif
+
 /* Disable or enable the JIT compiler. */
-#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64
 #define LJ_HASJIT		0
 #else
 #define LJ_HASJIT		1
@@ -383,6 +440,21 @@
 #define LJ_HASFFI		1
 #endif
 
+#if defined(LUAJIT_DISABLE_PROFILE)
+#define LJ_HASPROFILE		0
+#elif LJ_TARGET_POSIX
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_SIGPROF	1
+#elif LJ_TARGET_PS3
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_PTHREAD	1
+#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_WTHREAD	1
+#else
+#define LJ_HASPROFILE		0
+#endif
+
 #ifndef LJ_ARCH_HASFPU
 #define LJ_ARCH_HASFPU		1
 #endif
@@ -415,15 +487,15 @@
 #define LJ_TARGET_UNALIGNED	0
 #endif
 
-/* Various workarounds for embedded operating systems. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
+/* Various workarounds for embedded operating systems or weak C runtimes. */
+#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_LOG2
 #endif
-#if defined(__symbian__)
+#if defined(__symbian__) || LJ_TARGET_WINDOWS
 #define LUAJIT_NO_EXP2
 #endif
 
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3
+#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
 #define LJ_NO_UNWIND		1
 #endif
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm.c
similarity index 83%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_asm.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm.c
index 804b4dc80b24f321a78f9549de01f663267137c9..9db950a2bd18278262266f4e3b817441f1b0ccb2 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm.c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
 #error "Missing instruction emitter for target CPU"
 #endif
 
+/* Generic load/store of register from/to stack slot. */
+#define emit_spload(as, ir, r, ofs) \
+  emit_loadofs(as, ir, (r), RID_SP, (ofs))
+#define emit_spstore(as, ir, r, ofs) \
+  emit_storeofs(as, ir, (r), RID_SP, (ofs))
+
 /* -- Register allocator debugging ---------------------------------------- */
 
 /* #define LUAJIT_DEBUG_RA */
@@ -336,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
     emit_getgl(as, r, jit_base);
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
     lua_assert(irt_isnil(ir->t));  /* REF_NIL stores ASMREF_L register. */
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
 #if LJ_64
   } else if (ir->o == IR_KINT64) {
     emit_loadu64(as, r, ir_kint64(ir)->u64);
@@ -694,7 +700,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
 	emit_loadu64(as, dest, ir_kint64(ir)->u64);
 	return;
 #endif
-      } else {
+      } else if (ir->o != IR_KPRI) {
 	lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
 		   ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
 	emit_loadi(as, dest, ir->i);
@@ -944,44 +950,6 @@ static void asm_snap_prep(ASMState *as)
 
 /* -- Miscellaneous helpers ----------------------------------------------- */
 
-/* Collect arguments from CALL* and CARG instructions. */
-static void asm_collectargs(ASMState *as, IRIns *ir,
-			    const CCallInfo *ci, IRRef *args)
-{
-  uint32_t n = CCI_NARGS(ci);
-  lua_assert(n <= CCI_NARGS_MAX*2);  /* Account for split args. */
-  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
-  while (n-- > 1) {
-    ir = IR(ir->op1);
-    lua_assert(ir->o == IR_CARG);
-    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
-  }
-  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
-  lua_assert(IR(ir->op1)->o != IR_CARG);
-}
-
-/* Reconstruct CCallInfo flags for CALLX*. */
-static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
-{
-  uint32_t nargs = 0;
-  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
-    IRIns *ira = IR(ir->op1);
-    nargs++;
-    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
-  }
-#if LJ_HASFFI
-  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
-    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
-    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
-    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
-#if LJ_TARGET_X86
-    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
-#endif
-  }
-#endif
-  return (nargs | (ir->t.irt << CCI_OTSHIFT));
-}
-
 /* Calculate stack adjustment. */
 static int32_t asm_stack_adjust(ASMState *as)
 {
@@ -1066,6 +1034,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
   as->gcsteps = 0x80000000;  /* Prevent implicit GC check further up. */
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
+
+static void asm_bufhdr(ASMState *as, IRIns *ir)
+{
+  Reg sb = ra_dest(as, ir, RSET_GPR);
+  if ((ir->op2 & IRBUFHDR_APPEND)) {
+    /* Rematerialize const buffer pointer instead of likely spill. */
+    IRIns *irp = IR(ir->op1);
+    if (!(ra_hasreg(irp->r) || irp == ir-1 ||
+	  (irp == ir-2 && !ra_used(ir-1)))) {
+      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
+	irp = IR(irp->op1);
+      if (irref_isk(irp->op1)) {
+	ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
+	ir = irp;
+      }
+    }
+  } else {
+    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+    /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
+    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
+    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
+  }
+#if LJ_TARGET_X86ORX64
+  ra_left(as, sb, ir->op1);
+#else
+  ra_leftov(as, sb, ir->op1);
+#endif
+}
+
+static void asm_bufput(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
+  IRRef args[3];
+  IRIns *irs;
+  int kchar = -1;
+  args[0] = ir->op1;  /* SBuf * */
+  args[1] = ir->op2;  /* GCstr * */
+  irs = IR(ir->op2);
+  lua_assert(irt_isstr(irs->t));
+  if (irs->o == IR_KGC) {
+    GCstr *s = ir_kstr(irs);
+    if (s->len == 1) {  /* Optimize put of single-char string constant. */
+      kchar = strdata(s)[0];
+      args[1] = ASMREF_TMP1;  /* int, truncated to char */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+    }
+  } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
+    if (irs->o == IR_TOSTR) {  /* Fuse number to string conversions. */
+      if (irs->op2 == IRTOSTR_NUM) {
+	args[1] = ASMREF_TMP1;  /* TValue * */
+	ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
+      } else {
+	lua_assert(irt_isinteger(IR(irs->op1)->t));
+	args[1] = irs->op1;  /* int */
+	if (irs->op2 == IRTOSTR_INT)
+	  ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
+	else
+	  ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+      }
+    } else if (irs->o == IR_SNEW) {  /* Fuse string allocation. */
+      args[1] = irs->op1;  /* const void * */
+      args[2] = irs->op2;  /* MSize */
+      ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
+    }
+  }
+  asm_setupresult(as, ir, ci);  /* SBuf * */
+  asm_gencall(as, ci, args);
+  if (args[1] == ASMREF_TMP1) {
+    Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+    if (kchar == -1)
+      asm_tvptr(as, tmp, irs->op1);
+    else
+      ra_allockreg(as, kchar, tmp);
+  }
+}
+
+static void asm_bufstr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
+  IRRef args[1];
+  args[0] = ir->op1;  /* SBuf *sb */
+  as->gcsteps++;
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci;
+  IRRef args[2];
+  args[0] = ASMREF_L;
+  as->gcsteps++;
+  if (ir->op2 == IRTOSTR_NUM) {
+    args[1] = ASMREF_TMP1;  /* cTValue * */
+    ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
+  } else {
+    args[1] = ir->op1;  /* int32_t k */
+    if (ir->op2 == IRTOSTR_INT)
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
+    else
+      ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
+  }
+  asm_setupresult(as, ir, ci);  /* GCstr * */
+  asm_gencall(as, ci, args);
+  if (ir->op2 == IRTOSTR_NUM)
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+}
+
+#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+  IRCallID id;
+  IRRef args[2];
+  lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
+  args[LJ_BE] = (ir-1)->op1;
+  args[LJ_LE] = ir->op1;
+  if (st == IRT_NUM || st == IRT_FLOAT) {
+    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+    ir--;
+  } else {
+    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+  }
+  {
+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
+    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
+    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
+#else
+    const CCallInfo *ci = &lj_ir_callinfo[id];
+#endif
+    asm_setupresult(as, ir, ci);
+    asm_gencall(as, ci, args);
+  }
+}
+#endif
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+  IRRef args[3];
+  if (ir->r == RID_SINK)
+    return;
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ir->op1;      /* GCtab *t     */
+  args[2] = ASMREF_TMP1;  /* cTValue *key */
+  asm_setupresult(as, ir, ci);  /* TValue * */
+  asm_gencall(as, ci, args);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+}
+
+static void asm_lref(ASMState *as, IRIns *ir)
+{
+  Reg r = ra_dest(as, ir, RSET_GPR);
+#if LJ_TARGET_X86ORX64
+  ra_left(as, r, ASMREF_L);
+#else
+  ra_leftov(as, r, ASMREF_L);
+#endif
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Collect arguments from CALL* and CARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+			    const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n = CCI_XNARGS(ci);
+  lua_assert(n <= CCI_NARGS_MAX*2);  /* Account for split args. */
+  if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+  while (n-- > 1) {
+    ir = IR(ir->op1);
+    lua_assert(ir->o == IR_CARG);
+    args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+  }
+  args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+  lua_assert(IR(ir->op1)->o != IR_CARG);
+}
+
+/* Reconstruct CCallInfo flags for CALLX*. */
+static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+{
+  uint32_t nargs = 0;
+  if (ir->op1 != REF_NIL) {  /* Count number of arguments first. */
+    IRIns *ira = IR(ir->op1);
+    nargs++;
+    while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+  }
+#if LJ_HASFFI
+  if (IR(ir->op2)->o == IR_CARG) {  /* Copy calling convention info. */
+    CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
+    CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
+    nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
+#if LJ_TARGET_X86
+    nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
+#endif
+  }
+#endif
+  return (nargs | (ir->t.irt << CCI_OTSHIFT));
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[id];
+  IRRef args[2];
+  args[0] = ir->op1;
+  args[1] = ir->op2;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX];
+  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+  asm_collectargs(as, ir, ci, args);
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+#if !LJ_SOFTFP
+static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+  IRRef args[2];
+  args[0] = lref;
+  args[1] = rref;
+  asm_setupresult(as, ir, ci);
+  asm_gencall(as, ci, args);
+}
+
+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+{
+  IRIns *irp = IR(ir->op1);
+  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+    IRIns *irpp = IR(irp->op1);
+    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+      asm_fppow(as, ir, irpp->op1, irp->op2);
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif
+
 /* -- PHI and loop handling ----------------------------------------------- */
 
 /* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1339,6 +1560,129 @@ static void asm_loop(ASMState *as)
 #error "Missing assembler for target CPU"
 #endif
 
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+  switch ((IROp)ir->o) {
+  /* Miscellaneous ops. */
+  case IR_LOOP: asm_loop(as); break;
+  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
+  case IR_USE:
+    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+  case IR_PHI: asm_phi(as, ir); break;
+  case IR_HIOP: asm_hiop(as, ir); break;
+  case IR_GCSTEP: asm_gcstep(as, ir); break;
+  case IR_PROF: asm_prof(as, ir); break;
+
+  /* Guarded assertions. */
+  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+  case IR_ABC:
+    asm_comp(as, ir);
+    break;
+  case IR_EQ: case IR_NE:
+    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+      as->curins--;
+      asm_href(as, ir-1, (IROp)ir->o);
+    } else {
+      asm_equal(as, ir);
+    }
+    break;
+
+  case IR_RETF: asm_retf(as, ir); break;
+
+  /* Bit ops. */
+  case IR_BNOT: asm_bnot(as, ir); break;
+  case IR_BSWAP: asm_bswap(as, ir); break;
+  case IR_BAND: asm_band(as, ir); break;
+  case IR_BOR: asm_bor(as, ir); break;
+  case IR_BXOR: asm_bxor(as, ir); break;
+  case IR_BSHL: asm_bshl(as, ir); break;
+  case IR_BSHR: asm_bshr(as, ir); break;
+  case IR_BSAR: asm_bsar(as, ir); break;
+  case IR_BROL: asm_brol(as, ir); break;
+  case IR_BROR: asm_bror(as, ir); break;
+
+  /* Arithmetic ops. */
+  case IR_ADD: asm_add(as, ir); break;
+  case IR_SUB: asm_sub(as, ir); break;
+  case IR_MUL: asm_mul(as, ir); break;
+  case IR_DIV: asm_div(as, ir); break;
+  case IR_MOD: asm_mod(as, ir); break;
+  case IR_POW: asm_pow(as, ir); break;
+  case IR_NEG: asm_neg(as, ir); break;
+  case IR_ABS: asm_abs(as, ir); break;
+  case IR_ATAN2: asm_atan2(as, ir); break;
+  case IR_LDEXP: asm_ldexp(as, ir); break;
+  case IR_MIN: asm_min(as, ir); break;
+  case IR_MAX: asm_max(as, ir); break;
+  case IR_FPMATH: asm_fpmath(as, ir); break;
+
+  /* Overflow-checking arithmetic ops. */
+  case IR_ADDOV: asm_addov(as, ir); break;
+  case IR_SUBOV: asm_subov(as, ir); break;
+  case IR_MULOV: asm_mulov(as, ir); break;
+
+  /* Memory references. */
+  case IR_AREF: asm_aref(as, ir); break;
+  case IR_HREF: asm_href(as, ir, 0); break;
+  case IR_HREFK: asm_hrefk(as, ir); break;
+  case IR_NEWREF: asm_newref(as, ir); break;
+  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+  case IR_FREF: asm_fref(as, ir); break;
+  case IR_STRREF: asm_strref(as, ir); break;
+  case IR_LREF: asm_lref(as, ir); break;
+
+  /* Loads and stores. */
+  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+    asm_ahuvload(as, ir);
+    break;
+  case IR_FLOAD: asm_fload(as, ir); break;
+  case IR_XLOAD: asm_xload(as, ir); break;
+  case IR_SLOAD: asm_sload(as, ir); break;
+
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+  case IR_FSTORE: asm_fstore(as, ir); break;
+  case IR_XSTORE: asm_xstore(as, ir); break;
+
+  /* Allocations. */
+  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+  case IR_TNEW: asm_tnew(as, ir); break;
+  case IR_TDUP: asm_tdup(as, ir); break;
+  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
+
+  /* Buffer operations. */
+  case IR_BUFHDR: asm_bufhdr(as, ir); break;
+  case IR_BUFPUT: asm_bufput(as, ir); break;
+  case IR_BUFSTR: asm_bufstr(as, ir); break;
+
+  /* Write barriers. */
+  case IR_TBAR: asm_tbar(as, ir); break;
+  case IR_OBAR: asm_obar(as, ir); break;
+
+  /* Type conversions. */
+  case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_CONV: asm_conv(as, ir); break;
+  case IR_TOSTR: asm_tostr(as, ir); break;
+  case IR_STRTO: asm_strto(as, ir); break;
+
+  /* Calls. */
+  case IR_CALLA:
+    as->gcsteps++;
+    /* fallthrough */
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+  case IR_CALLXS: asm_callx(as, ir); break;
+  case IR_CARG: break;
+
+  default:
+    setintV(&as->J->errinfo, ir->o);
+    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+    break;
+  }
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Head of a root trace. */
@@ -1569,7 +1913,7 @@ static void asm_tail_link(ASMState *as)
     mres = (int32_t)(snap->nslots - baseslot);
     switch (bc_op(*pc)) {
     case BC_CALLM: case BC_CALLMT:
-      mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break;
+      mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
     case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
     case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
     default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1677,7 +2021,7 @@ static void asm_setup_regsp(ASMState *as)
 	as->modset |= RSET_SCRATCH;
       continue;
       }
-    case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       ir->prev = asm_setup_call_slots(as, ir, ci);
       if (inloop)
@@ -1722,10 +2066,20 @@ static void asm_setup_regsp(ASMState *as)
       /* fallthrough */
 #endif
     /* C calls evict all scratch regs and return results in RID_RET. */
-    case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
+    case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
       if (REGARG_NUMGPR < 3 && as->evenspill < 3)
 	as->evenspill = 3;  /* lj_str_new and lj_tab_newkey need 3 args. */
-    case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
+#if LJ_TARGET_X86 && LJ_HASFFI
+      if (0) {
+    case IR_CNEW:
+	if (ir->op2 != REF_NIL && as->evenspill < 4)
+	  as->evenspill = 4;  /* lj_cdata_newv needs 4 args. */
+      }
+#else
+    case IR_CNEW:
+#endif
+    case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
+    case IR_BUFSTR:
       ir->prev = REGSP_HINT(RID_RET);
       if (inloop)
 	as->modset = RSET_SCRATCH;
@@ -1734,21 +2088,26 @@ static void asm_setup_regsp(ASMState *as)
       if (inloop)
 	as->modset = RSET_SCRATCH;
       break;
-#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP
-    case IR_ATAN2: case IR_LDEXP:
+#if !LJ_SOFTFP
+    case IR_ATAN2:
+#if LJ_TARGET_X86
+      if (as->evenspill < 4)  /* Leave room to call atan2(). */
+	as->evenspill = 4;
+#endif
+#if !LJ_TARGET_X86ORX64
+    case IR_LDEXP:
+#endif
 #endif
     case IR_POW:
       if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-#if LJ_TARGET_X86ORX64
-	ir->prev = REGSP_HINT(RID_XMM0);
 	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+	  as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
+	break;
 #else
 	ir->prev = REGSP_HINT(RID_FPRET);
-	if (inloop)
-	  as->modset |= RSET_SCRATCH;
-#endif
 	continue;
+#endif
       }
       /* fallthrough for integer POW */
     case IR_DIV: case IR_MOD:
@@ -1761,26 +2120,25 @@ static void asm_setup_regsp(ASMState *as)
       break;
     case IR_FPMATH:
 #if LJ_TARGET_X86ORX64
-      if (ir->op2 == IRFPM_EXP2) {  /* May be joined to lj_vm_pow_sse. */
-	ir->prev = REGSP_HINT(RID_XMM0);
-#if !LJ_64
-	if (as->evenspill < 4)  /* Leave room for 16 byte scratch area. */
+      if (ir->op2 <= IRFPM_TRUNC) {
+	if (!(as->flags & JIT_F_SSE4_1)) {
+	  ir->prev = REGSP_HINT(RID_XMM0);
+	  if (inloop)
+	    as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+	  continue;
+	}
+	break;
+      } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
+	if (as->evenspill < 4)  /* Leave room to call pow(). */
 	  as->evenspill = 4;
-#endif
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-	continue;
-      } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
-	ir->prev = REGSP_HINT(RID_XMM0);
-	if (inloop)
-	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
-	continue;
       }
+#endif
+      if (inloop)
+	as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
       break;
 #else
       ir->prev = REGSP_HINT(RID_FPRET);
-      if (inloop)
-	as->modset |= RSET_SCRATCH;
       continue;
 #endif
 #if LJ_TARGET_X86ORX64
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_asm.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_arm.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_arm.h
similarity index 89%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_arm.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_arm.h
index 9e4cf4367f07640f6113c3e752fcda46da14e20e..81843caf96ad0a1655e4d77780c446b0f8c729cb 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_arm.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 0;
 #if LJ_SOFTFP
   Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   UNUSED(ci);
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 {
   IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -529,6 +520,8 @@ static void asm_tobit(ASMState *as, IRIns *ir)
   emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
   emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
 }
+#else
+#define asm_tobit(as, ir)	lua_assert(0)
 #endif
 
 static void asm_conv(ASMState *as, IRIns *ir)
@@ -601,31 +594,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
 }
 
-#if !LJ_SOFTFP && LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  CCallInfo ci;
-  IRRef args[2];
-  args[0] = (ir-1)->op1;
-  args[1] = ir->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = lj_ir_callinfo[id];
-#if !LJ_ABI_SOFTFP
-  ci.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
-#endif
-  asm_setupresult(as, ir, &ci);
-  asm_gencall(as, &ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +657,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
 }
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
@@ -714,7 +684,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
       Reg src = ra_alloc1(as, ref, allow);
       emit_lso(as, ARMI_STR, src, RID_SP, 0);
     }
-    if ((ir+1)->o == IR_HIOP)
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
       type = ra_alloc1(as, ref+1, allow);
     else
       type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +692,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
     emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1106,7 +1041,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1(as, ir->op2,
@@ -1116,6 +1051,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
 }
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1273,19 +1210,16 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
+  IRRef args[4];
   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
@@ -1307,16 +1241,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir--;
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   {
-    uint32_t k = emit_isk12(ARMI_MOV, ctypeid);
-    Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow);
+    uint32_t k = emit_isk12(ARMI_MOV, id);
+    Reg r = k ? RID_R1 : ra_allock(as, id, allow);
     emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
     emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
     emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
     if (k) emit_d(as, ARMI_MOV^k, RID_R1);
   }
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1393,24 +1339,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
   emit_dm(as, ai, (dest & 15), (left & 15));
 }
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
+		RID2RSET(RID_R3)|RID2RSET(RID_R12);
+  RegSet of;
+  Reg dest, src;
+  ra_evictset(as, drop);
+  dest = ra_dest(as, ir, RSET_FPR);
+  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+				   (void *)lj_vm_trunc_sf);
+  /* Workaround to protect argument GPRs from being used for remat. */
+  of = as->freeset;
+  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
+  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
+  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
+  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, ARMI_VSQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
+#else
+#define asm_fpmath(as, ir)	lua_assert(0)
 #endif
 
 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1460,32 +1423,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
   asm_intop(as, ir, ai);
 }
 
-static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
-{
-  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
-    uint32_t cc = (as->mcp[1] >> 28);
-    as->flagmcp = NULL;
-    if (cc <= CC_NE) {
-      as->mcp++;
-      ai |= ARMI_S;
-    } else if (cc == CC_GE) {
-      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-      ai |= ARMI_S;
-    } else if (cc == CC_LT) {
-      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-      ai |= ARMI_S;
-    }  /* else: other conds don't work with bit ops. */
-  }
-  if (ir->op2 == 0) {
-    Reg dest = ra_dest(as, ir, RSET_GPR);
-    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
-    emit_d(as, ai^m, dest);
-  } else {
-    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
-    asm_intop(as, ir, ai);
-  }
-}
-
 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1551,6 +1488,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
   asm_intmul(as, ir);
 }
 
+#define asm_addov(as, ir)	asm_add(as, ir)
+#define asm_subov(as, ir)	asm_sub(as, ir)
+#define asm_mulov(as, ir)	asm_mul(as, ir)
+
+#if LJ_SOFTFP
+#define asm_div(as, ir)		lua_assert(0)
+#define asm_pow(as, ir)		lua_assert(0)
+#define asm_abs(as, ir)		lua_assert(0)
+#define asm_atan2(as, ir)	lua_assert(0)
+#define asm_ldexp(as, ir)	lua_assert(0)
+#else
+#define asm_div(as, ir)		asm_fparith(as, ir, ARMI_VDIV_D)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+#define asm_abs(as, ir)		asm_fpunary(as, ir, ARMI_VABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+#endif
+
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 {
 #if !LJ_SOFTFP
@@ -1562,41 +1519,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
   asm_intneg(as, ir, ARMI_RSB);
 }
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+  if (as->flagmcp == as->mcp) {  /* Try to drop cmp r, #0. */
+    uint32_t cc = (as->mcp[1] >> 28);
+    as->flagmcp = NULL;
+    if (cc <= CC_NE) {
+      as->mcp++;
+      ai |= ARMI_S;
+    } else if (cc == CC_GE) {
+      *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+      ai |= ARMI_S;
+    } else if (cc == CC_LT) {
+      *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+      ai |= ARMI_S;
+    }  /* else: other conds don't work with bit ops. */
+  }
+  if (ir->op2 == 0) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+    emit_d(as, ai^m, dest);
+  } else {
+    /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+    asm_intop(as, ir, ai);
+  }
 }
 
-#if !LJ_SOFTFP
-static void asm_callround(ASMState *as, IRIns *ir, int id)
-{
-  /* The modified regs must match with the *.dasc implementation. */
-  RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
-		RID2RSET(RID_R3)|RID2RSET(RID_R12);
-  RegSet of;
-  Reg dest, src;
-  ra_evictset(as, drop);
-  dest = ra_dest(as, ir, RSET_FPR);
-  emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
-  emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
-		id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
-				   (void *)lj_vm_trunc_sf);
-  /* Workaround to protect argument GPRs from being used for remat. */
-  of = as->freeset;
-  as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
-  as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
-  src = ra_alloc1(as, ir->op1, RSET_FPR);  /* May alloc GPR to remat FPR. */
-  as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
-  emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
-}
-#endif
+#define asm_bnot(as, ir)	asm_bitop(as, ir, ARMI_MVN)
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1613,6 +1564,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_band(as, ir)	asm_bitop(as, ir, ARMI_AND)
+#define asm_bor(as, ir)		asm_bitop(as, ir, ARMI_ORR)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, ARMI_EOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
 {
   if (irref_isk(ir->op2)) {  /* Constant shifts. */
@@ -1630,6 +1585,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
   }
 }
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, ARMSH_LSL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, ARMSH_LSR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, ARMSH_ASR)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, ARMSH_ROR)
+#define asm_brol(as, ir)	lua_assert(0)
+
 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
   uint32_t kcmp = 0, kmov = 0;
@@ -1703,6 +1664,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
     asm_intmin_max(as, ir, cc);
 }
 
+#define asm_min(as, ir)		asm_min_max(as, ir, CC_GT, CC_HI)
+#define asm_max(as, ir)		asm_min_max(as, ir, CC_LT, CC_LO)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 /* Map of comparisons to flags. ORDER IR. */
@@ -1818,6 +1782,18 @@ notst:
     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
 }
 
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+#endif
+    asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1892,7 +1868,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
   } else if ((ir-1)->o == IR_XSTORE) {
     if ((ir-1)->r != RID_SINK)
-      asm_xstore(as, ir, 4);
+      asm_xstore_(as, ir, 4);
     return;
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
@@ -1940,6 +1916,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
+  emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1969,7 +1955,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
 	   (int32_t)offsetof(lua_State, maxstack));
   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
-    int32_t i = i32ptr(&J2G(as->J)->jit_L);
+    int32_t i = i32ptr(&J2G(as->J)->cur_L);
     if (ra_hasspill(irp->s))
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1977,7 +1963,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
     emit_loadi(as, RID_TMP, (i & ~4095));
   } else {
-    emit_getgl(as, RID_TMP, jit_L);
+    emit_getgl(as, RID_TMP, cur_L);
   }
 }
 
@@ -2086,13 +2072,13 @@ static void asm_loop_fixup(ASMState *as)
 
 /* -- Head of trace ------------------------------------------------------- */
 
-/* Reload L register from g->jit_L. */
+/* Reload L register from g->cur_L. */
 static void asm_head_lreg(ASMState *as)
 {
   IRIns *ir = IR(ASMREF_L);
   if (ra_used(ir)) {
     Reg r = ra_dest(as, ir, RSET_GPR);
-    emit_getgl(as, r, jit_L);
+    emit_getgl(as, r, cur_L);
     ra_evictk(as);
   }
 }
@@ -2163,143 +2149,13 @@ static void asm_tail_prep(ASMState *as)
   *p = 0;  /* Prevent load/store merging. */
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-#if !LJ_SOFTFP
-    if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
-#endif
-    asm_intcomp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
-  case IR_BOR:  asm_bitop(as, ir, ARMI_ORR); break;
-  case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
-  case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
-  case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
-  case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
-  case IR_BROL: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
-  case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
-  case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-#if LJ_SOFTFP
-  case IR_DIV: case IR_POW: case IR_ABS:
-  case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
-    lua_assert(0);  /* Unused for LJ_SOFTFP. */
-    break;
-#else
-  case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, ARMI_VSQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-#endif
-
-  case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
-  case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   IRRef args[CCI_NARGS_MAX*2];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_mips.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_mips.h
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_mips.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_mips.h
index 78bd26d5db578a6e0e6645d90528d194ab3421d7..adea0e32367bd8ccca5eb2f143899619634ff9e7 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_mips.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_mips.h
@@ -226,7 +226,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 16;
   Reg gpr, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
@@ -326,15 +326,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   }
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 {
   IRRef args[CCI_NARGS_MAX*2];
@@ -362,16 +353,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
 }
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
 {
   /* The modified regs must match with the *.dasc implementation. */
@@ -391,7 +372,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -520,28 +501,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
 }
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[LJ_BE?0:1] = ir->op1;
-  args[LJ_BE?1:0] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -558,6 +517,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	   RID_SP, sps_scale(ir->s));
 }
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
@@ -581,27 +542,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -631,7 +571,7 @@ static void asm_aref(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
   RegSet allow = RSET_GPR;
   int destused = ra_used(ir);
@@ -657,37 +597,42 @@ static void asm_href(ASMState *as, IRIns *ir)
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
 
-  /* Key not found in chain: load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
-  if (destused)
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  else
-    *--as->mcp = MIPSI_NOP;
   /* Follow hash chain until the end. */
-  emit_move(as, dest, tmp1);
+  emit_move(as, dest, tmp2);
   l_loop = --as->mcp;
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next));
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
   l_next = emit_label(as);
 
   /* Type and value comparison. */
+  if (merge == IR_EQ) {  /* Must match asm_guard(). */
+    emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
+    l_end = asm_exitstub_addr(as);
+  }
   if (irt_isnum(kt)) {
     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
-	emit_tg(as, MIPSI_MFC1, tmp1, key+1);
-    emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
-    emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
+    *--as->mcp = MIPSI_NOP;  /* Avoid NaN comparison overhead. */
+    emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
+    emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
   } else {
     if (irt_ispri(kt)) {
-      emit_branch(as, MIPSI_BEQ, tmp1, type, l_end);
+      emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
     } else {
-      emit_branch(as, MIPSI_BEQ, tmp2, key, l_end);
-      emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
-      emit_branch(as, MIPSI_BNE, tmp1, type, l_next);
+      emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
+      emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
+      emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
     }
   }
-  emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
-  *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+  emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
+  *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
 
   /* Load main position relative to tab->node into dest. */
   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
@@ -777,20 +722,6 @@ nolo:
     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  if (ir->r != RID_SINK) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-    IRRef args[3];
-    args[0] = ASMREF_L;     /* lua_State *L */
-    args[1] = ir->op1;      /* GCtab *t     */
-    args[2] = ASMREF_TMP1;  /* cTValue *key */
-    asm_setupresult(as, ir, ci);  /* TValue * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -919,7 +850,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
   if (ir->r != RID_SINK) {
     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -928,6 +859,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
 }
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
   IRType1 t = ir->t;
@@ -1003,7 +936,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
       if (irt_isint(t)) {
 	Reg tmp = ra_scratch(as, RSET_FPR);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
-	emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
+	emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
 	dest = tmp;
 	t.irt = IRT_NUM;  /* Check for original type. */
       } else {
@@ -1043,19 +976,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
@@ -1064,6 +993,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
@@ -1078,12 +1008,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; if (LJ_BE) ir++; else ir--;
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1153,23 +1095,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
   emit_fg(as, mi, dest, left);
 }
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 <= IRFPM_TRUNC)
+    asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
+  else if (ir->op2 == IRFPM_SQRT)
+    asm_fpunary(as, ir, MIPSI_SQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1215,6 +1150,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_div(as, ir)		asm_fparith(as, ir, MIPSI_DIV_D)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 {
   if (irt_isnum(ir->t)) {
@@ -1226,6 +1165,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, MIPSI_ABS_D)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir)
 {
   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1259,13 +1202,21 @@ static void asm_arithov(ASMState *as, IRIns *ir)
     emit_move(as, RID_TMP, dest == left ? left : right);
 }
 
+#define asm_addov(as, ir)	asm_arithov(as, ir)
+#define asm_subov(as, ir)	asm_arithov(as, ir)
+
 static void asm_mulov(ASMState *as, IRIns *ir)
 {
-#if LJ_DUALNUM
-#error "NYI: MULOV"
-#else
-  UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused in single-number mode. */
-#endif
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+						 right), dest));
+  asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
+  emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
+  emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
+  emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+  emit_dst(as, MIPSI_MULT, 0, left, right);
 }
 
 #if LJ_HASFFI
@@ -1352,7 +1303,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 #endif
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irl = IR(ir->op1);
@@ -1366,7 +1317,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
   emit_dst(as, MIPSI_NOR, dest, left, right);
 }
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1402,6 +1353,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   emit_dst(as, mi, dest, left, right);
 }
 
+#define asm_band(as, ir)	asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
+#define asm_bor(as, ir)		asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1415,7 +1370,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   }
 }
 
-static void asm_bitror(ASMState *as, IRIns *ir)
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
+#define asm_brol(as, ir)	lua_assert(0)
+
+static void asm_bror(ASMState *as, IRIns *ir)
 {
   if ((as->flags & JIT_F_MIPS32R2)) {
     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1464,6 +1424,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
 }
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 static void asm_comp(ASMState *as, IRIns *ir)
@@ -1501,7 +1464,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
 }
 
-static void asm_compeq(ASMState *as, IRIns *ir)
+static void asm_equal(ASMState *as, IRIns *ir)
 {
   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
   right = (left >> 8); left &= 255;
@@ -1575,8 +1538,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, LJ_LE ? 4 : 0);
-      asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+      asm_xstore_(as, ir, LJ_LE ? 4 : 0);
+      asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
     }
     return;
   }
@@ -1600,6 +1563,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
+  emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, MIPSI_LBU, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1624,7 +1598,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
 }
@@ -1772,131 +1746,13 @@ static void asm_tail_prep(ASMState *as)
   as->invmcp = as->loopref ? as->mcp : NULL;
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
-  case IR_BOR:  asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
-  case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
-  case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
-  case IR_BROL: lua_assert(0); break;
-  case IR_BROR: asm_bitror(as, ir); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 <= IRFPM_TRUNC)
-      asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    else if (ir->op2 == IRFPM_SQRT)
-      asm_fpunary(as, ir, MIPSI_SQRT_D);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir); break;
-  case IR_SUBOV: asm_arithov(as, ir); break;
-  case IR_MULOV: asm_mulov(as, ir); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   IRRef args[CCI_NARGS_MAX*2];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++) {
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_ppc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_ppc.h
similarity index 89%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_ppc.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_ppc.h
index 2c5d74a8d949461ffde052f98708d032d5b2ca1a..7deeb66e51f2bd40b6a3e9f1ebc9ca64ac859903 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_ppc.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_ppc.h
@@ -251,7 +251,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 8;
   Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
   if ((void *)ci->func)
@@ -323,23 +323,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
+#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
+#endif
     } else {
       ra_destreg(as, ir, RID_RET);
     }
   }
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 static void asm_callx(ASMState *as, IRIns *ir)
 {
   IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +345,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
   func = ir->op2; irf = IR(func);
   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
   if (irref_isk(func)) {  /* Call to constant address. */
-    ci.func = (ASMFunction)(void *)(irf->i);
+    ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
   } else {  /* Need a non-argument register for indirect calls. */
     RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
     Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +356,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
   asm_gencall(as, &ci, args);
 }
 
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* -- Returns ------------------------------------------------------------- */
 
 /* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +363,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -513,28 +496,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
   }
 }
 
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
-  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
-  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
-  IRCallID id;
-  const CCallInfo *ci;
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = (ir-1)->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
-    ir--;
-  } else {
-    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
-  }
-  ci = &lj_ir_callinfo[id];
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-#endif
-
 static void asm_strto(ASMState *as, IRIns *ir)
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -553,6 +514,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
 }
 
+/* -- Memory references --------------------------------------------------- */
+
 /* Get pointer to TValue. */
 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
@@ -566,7 +529,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
     /* Otherwise use g->tmptv to hold the TValue. */
     RegSet allow = rset_exclude(RSET_GPR, dest);
     Reg type;
-    emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
     if (!irt_ispri(ir->t)) {
       Reg src = ra_alloc1(as, ref, allow);
       emit_setgl(as, src, tmptv.gcr);
@@ -576,27 +539,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
   }
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
-  } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-  }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -773,20 +715,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   }
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -917,7 +845,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 }
 
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
 {
   IRIns *irb;
   if (ir->r == RID_SINK)
@@ -934,6 +862,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
   }
 }
 
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
   IRType1 t = ir->t;
@@ -1084,19 +1014,15 @@ dotypecheck:
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+  IRRef args[4];
   RegSet drop = RSET_SCRATCH;
-  lua_assert(sz != CTSIZE_INVALID);
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
-
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   ra_evictset(as, drop);
@@ -1105,6 +1031,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
 
   /* Initialize immutable cdata object. */
   if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
     int32_t ofs = sizeof(GCcdata);
     lua_assert(sz == 4 || sz == 8);
     if (sz == 8) {
@@ -1118,12 +1045,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
       if (ofs == sizeof(GCcdata)) break;
       ofs -= 4; ir++;
     }
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
+
   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
-  emit_ti(as, PPCI_LI, RID_TMP, ctypeid);  /* Lower 16 bit used. Sign-ext ok. */
+  emit_ti(as, PPCI_LI, RID_TMP, id);  /* Lower 16 bit used. Sign-ext ok. */
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
 	       ra_releasetmp(as, ASMREF_TMP1));
@@ -1197,23 +1136,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
   emit_fb(as, pi, dest, left);
 }
 
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-      IRRef args[2];
-      args[0] = irpp->op1;
-      args[1] = irp->op2;
-      asm_setupresult(as, ir, ci);
-      asm_gencall(as, ci, args);
-      return 1;
-    }
-  }
-  return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+    return;
+  if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
+    asm_fpunary(as, ir, PPCI_FSQRT);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
 
 static void asm_add(ASMState *as, IRIns *ir)
@@ -1313,6 +1243,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_div(as, ir)		asm_fparith(as, ir, PPCI_FDIV)
+#define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
+#define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
+
 static void asm_neg(ASMState *as, IRIns *ir)
 {
   if (irt_isnum(ir->t)) {
@@ -1331,6 +1265,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_abs(as, ir)		asm_fpunary(as, ir, PPCI_FABS)
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+#define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
+
 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
 {
   Reg dest, left, right;
@@ -1346,6 +1284,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
   emit_tab(as, pi|PPCF_DOT, dest, left, right);
 }
 
+#define asm_addov(as, ir)	asm_arithov(as, ir, PPCI_ADDO)
+#define asm_subov(as, ir)	asm_arithov(as, ir, PPCI_SUBFO)
+#define asm_mulov(as, ir)	asm_arithov(as, ir, PPCI_MULLWO)
+
 #if LJ_HASFFI
 static void asm_add64(ASMState *as, IRIns *ir)
 {
@@ -1425,7 +1367,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
 }
 #endif
 
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
 {
   Reg dest, left, right;
   PPCIns pi = PPCI_NOR;
@@ -1452,7 +1394,7 @@ nofuse:
   emit_asb(as, pi, dest, left, right);
 }
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   IRIns *irx;
@@ -1473,32 +1415,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   }
 }
 
-static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
-{
-  Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-  if (irref_isk(ir->op2)) {
-    int32_t k = IR(ir->op2)->i;
-    Reg tmp = left;
-    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
-      if (!checku16(k)) {
-	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
-	if ((k & 0xffff) == 0) return;
-      }
-      emit_asi(as, pik, dest, left, k);
-      return;
-    }
-  }
-  /* May fail due to spills/restores above, but simplifies the logic. */
-  if (as->flagmcp == as->mcp) {
-    as->flagmcp = NULL;
-    as->mcp++;
-    pi |= PPCF_DOT;
-  }
-  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
-  emit_asb(as, pi, dest, left, right);
-}
-
 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
 {
@@ -1529,7 +1445,7 @@ nofuse:
   *--as->mcp = pi | PPCF_T(left);
 }
 
-static void asm_bitand(ASMState *as, IRIns *ir)
+static void asm_band(ASMState *as, IRIns *ir)
 {
   Reg dest, left, right;
   IRRef lref = ir->op1;
@@ -1584,6 +1500,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
   emit_asb(as, PPCI_AND ^ dot, dest, left, right);
 }
 
+static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  if (irref_isk(ir->op2)) {
+    int32_t k = IR(ir->op2)->i;
+    Reg tmp = left;
+    if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
+      if (!checku16(k)) {
+	emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
+	if ((k & 0xffff) == 0) return;
+      }
+      emit_asi(as, pik, dest, left, k);
+      return;
+    }
+  }
+  /* May fail due to spills/restores above, but simplifies the logic. */
+  if (as->flagmcp == as->mcp) {
+    as->flagmcp = NULL;
+    as->mcp++;
+    pi |= PPCF_DOT;
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_asb(as, pi, dest, left, right);
+}
+
+#define asm_bor(as, ir)		asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
 {
   Reg dest, left;
@@ -1609,6 +1554,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
   }
 }
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, PPCI_SLW, 0)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, PPCI_SRW, 1)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
+#define asm_brol(as, ir) \
+  asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
+		       PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
+#define asm_bror(as, ir)	lua_assert(0)
+
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 {
   if (irt_isnum(ir->t)) {
@@ -1639,6 +1592,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   }
 }
 
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 #define CC_UNSIGNED	0x08	/* Unsigned integer comparison. */
@@ -1715,6 +1671,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1760,8 +1718,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
-      asm_xstore(as, ir, 0);
-      asm_xstore(as, ir-1, 4);
+      asm_xstore_(as, ir, 0);
+      asm_xstore_(as, ir-1, 4);
     }
     return;
   }
@@ -1785,6 +1743,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, PPCI_LBZ, RID_TMP,
+	       (int32_t)offsetof(global_State, hookmask));
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1806,7 +1775,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
   emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
   if (pbase == RID_TMP)
     emit_getgl(as, RID_TMP, jit_base);
-  emit_getgl(as, tmp, jit_L);
+  emit_getgl(as, tmp, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
 }
@@ -1967,136 +1936,13 @@ static void asm_tail_prep(ASMState *as)
   }
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_EQ: case IR_NE:
-    if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
-      as->curins--;
-      asm_href(as, ir-1, (IROp)ir->o);
-      break;
-    }
-    /* fallthrough */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_ABC:
-    asm_comp(as, ir);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_bitnot(as, ir); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_bitand(as, ir); break;
-  case IR_BOR:  asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
-  case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
-  case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
-  case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
-  case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
-			     PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
-  case IR_BROR: lua_assert(0); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB: asm_sub(as, ir); break;
-  case IR_MUL: asm_mul(as, ir); break;
-  case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
-  case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
-  case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
-  case IR_NEG: asm_neg(as, ir); break;
-
-  case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
-  case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
-  case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
-  case IR_MIN: asm_min_max(as, ir, 0); break;
-  case IR_MAX: asm_min_max(as, ir, 1); break;
-  case IR_FPMATH:
-    if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
-      break;
-    if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
-      asm_fpunary(as, ir, PPCI_FSQRT);
-    else
-      asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
-    break;
-
-  /* Overflow-checking arithmetic ops. */
-  case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
-  case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
-  case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir, 0); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: asm_fload(as, ir); break;
-  case IR_XLOAD: asm_xload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: asm_fstore(as, ir); break;
-  case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   IRRef args[CCI_NARGS_MAX*2];
-  uint32_t i, nargs = (int)CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++)
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_x86.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_x86.h
similarity index 86%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_x86.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_x86.h
index 0b6b2d4a5a70780ff393195e0626f8f52e9c8644..941d09192c2034321fe158cae7b8da2177c77ea1 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_asm_x86.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_asm_x86.h
@@ -392,7 +392,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
 /* Count the required number of stack slots for a call. */
 static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t i, nargs = CCI_NARGS(ci);
+  uint32_t i, nargs = CCI_XNARGS(ci);
   int nslots = 0;
 #if LJ_64
   if (LJ_ABI_WIN) {
@@ -425,7 +425,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
 /* Generate a call to a C function. */
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
-  uint32_t n, nargs = CCI_NARGS(ci);
+  uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = STACKARG_OFS;
 #if LJ_64
   uint32_t gprs = REGARG_GPRS;
@@ -560,7 +560,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
       if (ra_hasreg(dest)) {
 	ra_free(as, dest);
 	ra_modified(as, dest);
-	emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
+	emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
 		  dest, RID_ESP, ofs);
       }
       if ((ci->flags & CCI_CASTU64)) {
@@ -584,15 +584,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
   }
 }
 
-static void asm_call(ASMState *as, IRIns *ir)
-{
-  IRRef args[CCI_NARGS_MAX];
-  const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
-  asm_collectargs(as, ir, ci, args);
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
-
 /* Return a constant function pointer or NULL for indirect calls. */
 static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
 {
@@ -652,7 +643,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
 {
   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
   void *pc = ir_kptr(IR(ir->op2));
-  int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
@@ -672,8 +663,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
   asm_guardcc(as, CC_NE);
   emit_rr(as, XO_UCOMISD, left, tmp);
   emit_rr(as, XO_CVTSI2SD, tmp, dest);
-  if (!(as->flags & JIT_F_SPLIT_XMM))
-    emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
+  emit_rr(as, XO_XORPS, tmp, tmp);  /* Avoid partial register stall. */
   emit_rr(as, XO_CVTTSD2SI, dest, left);
   /* Can't fuse since left is needed twice. */
 }
@@ -729,8 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
 	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
     }
-    if (!(as->flags & JIT_F_SPLIT_XMM))
-      emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
+    emit_rr(as, XO_XORPS, dest, dest);  /* Avoid partial register stall. */
   } else if (stfp) {  /* FP to integer conversion. */
     if (irt_isguard(ir->t)) {
       /* Checked conversions are only supported from number to int. */
@@ -738,9 +727,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
     } else {
       Reg dest = ra_dest(as, ir, RSET_GPR);
-      x86Op op = st == IRT_NUM ?
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
+      x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
       if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
 	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
 	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -834,8 +821,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
   if (ra_hasreg(dest)) {
     ra_free(as, dest);
     ra_modified(as, dest);
-    emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
-	      dest, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
   }
   emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
 	    irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -863,7 +849,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
   Reg lo, hi;
   lua_assert(st == IRT_NUM || st == IRT_FLOAT);
   lua_assert(dt == IRT_I64 || dt == IRT_U64);
-  lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
   hi = ra_dest(as, ir, RSET_GPR);
   lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
   if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -906,6 +891,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
 	   st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
 	   asm_fuseload(as, ir->op1, RSET_EMPTY));
 }
+
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+  if (irt_isfp(ir->t))
+    asm_conv_fp_int64(as, ir);
+  else
+    asm_conv_int64_fp(as, ir);
+}
 #endif
 
 static void asm_strto(ASMState *as, IRIns *ir)
@@ -927,29 +920,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
 	    RID_ESP, sps_scale(ir->s));
 }
 
-static void asm_tostr(ASMState *as, IRIns *ir)
+/* -- Memory references --------------------------------------------------- */
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 {
-  IRIns *irl = IR(ir->op1);
-  IRRef args[2];
-  args[0] = ASMREF_L;
-  as->gcsteps++;
-  if (irt_isnum(irl->t)) {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
-    args[1] = ASMREF_TMP1;  /* const lua_Number * */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
-    emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
-	      RID_ESP, ra_spill(as, irl));
+  IRIns *ir = IR(ref);
+  if (irt_isnum(ir->t)) {
+    /* For numbers use the constant itself or a spill slot as a TValue. */
+    if (irref_isk(ref))
+      emit_loada(as, dest, ir_knum(ir));
+    else
+      emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
   } else {
-    const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
-    args[1] = ir->op1;  /* int32_t k */
-    asm_setupresult(as, ir, ci);  /* GCstr * */
-    asm_gencall(as, ci, args);
+    /* Otherwise use g->tmptv to hold the TValue. */
+    if (!irref_isk(ref)) {
+      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+    } else if (!irt_ispri(ir->t)) {
+      emit_movmroi(as, dest, 0, ir->i);
+    }
+    if (!(LJ_64 && irt_islightud(ir->t)))
+      emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+    emit_loada(as, dest, &J2G(as->J)->tmptv);
   }
 }
 
-/* -- Memory references --------------------------------------------------- */
-
 static void asm_aref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,23 +956,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
     emit_rr(as, XO_MOV, dest, as->mrm.base);
 }
 
-/* Merge NE(HREF, niltv) check. */
-static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
-{
-  /* Assumes nothing else generates NE of HREF. */
-  if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
-      ra_hasreg(ir->r)) {
-    MCode *p = as->mcp;
-    p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
-    /* Ensure no loop branch inversion happened. */
-    if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
-      as->mcp = p;  /* Kill cmp reg, imm32 + jz exit. */
-      return p + *(int32_t *)(p-4);  /* Return exit address. */
-    }
-  }
-  return NULL;
-}
-
 /* Inlined hash lookup. Specialized for key type and for const keys.
 ** The equivalent C code is:
 **   Node *n = hashkey(t, key);
@@ -985,10 +964,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
 **   } while ((n = nextnode(n)));
 **   return niltv(L);
 */
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 {
-  MCode *nilexit = merge_href_niltv(as, ir);  /* Do this before any restores. */
   RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
   Reg dest = ra_dest(as, ir, allow);
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
   Reg key = RID_NONE, tmp = RID_NONE;
@@ -1005,14 +984,12 @@ static void asm_href(ASMState *as, IRIns *ir)
       tmp = ra_scratch(as, rset_exclude(allow, key));
   }
 
-  /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
-  if (nilexit && ir[1].o == IR_NE) {
-    emit_jcc(as, CC_E, nilexit);  /* XI_JMP is not found by lj_asm_patchexit. */
-    nilexit = NULL;
-  } else {
+  if (merge == IR_NE)
+    asm_guardcc(as, CC_E);  /* XI_JMP is not found by lj_asm_patchexit. */
+  else if (destused)
     emit_loada(as, dest, niltvg(J2G(as->J)));
-  }
 
   /* Follow hash chain until the end. */
   l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1021,8 +998,8 @@ static void asm_href(ASMState *as, IRIns *ir)
   l_next = emit_label(as);
 
   /* Type and value comparison. */
-  if (nilexit)
-    emit_jcc(as, CC_E, nilexit);
+  if (merge == IR_EQ)
+    asm_guardcc(as, CC_E);
   else
     emit_sjcc(as, CC_E, l_end);
   if (irt_isnum(kt)) {
@@ -1178,41 +1155,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
 #endif
 }
 
-static void asm_newref(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
-  IRRef args[3];
-  IRIns *irkey;
-  Reg tmp;
-  if (ir->r == RID_SINK)
-    return;
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ir->op1;      /* GCtab *t     */
-  args[2] = ASMREF_TMP1;  /* cTValue *key */
-  asm_setupresult(as, ir, ci);  /* TValue * */
-  asm_gencall(as, ci, args);
-  tmp = ra_releasetmp(as, ASMREF_TMP1);
-  irkey = IR(ir->op2);
-  if (irt_isnum(irkey->t)) {
-    /* For numbers use the constant itself or a spill slot as a TValue. */
-    if (irref_isk(ir->op2))
-      emit_loada(as, tmp, ir_knum(irkey));
-    else
-      emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    if (!irref_isk(ir->op2)) {
-      Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
-      emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
-    } else if (!irt_ispri(irkey->t)) {
-      emit_movmroi(as, tmp, 0, irkey->i);
-    }
-    if (!(LJ_64 && irt_islightud(irkey->t)))
-      emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
-    emit_loada(as, tmp, &J2G(as->J)->tmptv);
-  }
-}
-
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1272,7 +1214,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   case IRT_U8: xo = XO_MOVZXb; break;
   case IRT_I16: xo = XO_MOVSXw; break;
   case IRT_U16: xo = XO_MOVZXw; break;
-  case IRT_NUM: xo = XMM_MOVRM(as); break;
+  case IRT_NUM: xo = XO_MOVSD; break;
   case IRT_FLOAT: xo = XO_MOVSS; break;
   default:
     if (LJ_64 && irt_is64(ir->t))
@@ -1285,6 +1227,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
   emit_mrm(as, xo, dest, RID_MRM);
 }
 
+#define asm_fload(as, ir)	asm_fxload(as, ir)
+#define asm_xload(as, ir)	asm_fxload(as, ir)
+
 static void asm_fxstore(ASMState *as, IRIns *ir)
 {
   RegSet allow = RSET_GPR;
@@ -1348,6 +1293,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
   }
 }
 
+#define asm_fstore(as, ir)	asm_fxstore(as, ir)
+#define asm_xstore(as, ir)	asm_fxstore(as, ir)
+
 #if LJ_64
 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
 {
@@ -1386,7 +1334,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     Reg dest = ra_dest(as, ir, allow);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
-    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
+    emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
   } else {
     asm_fuseahuref(as, ir->op1, RSET_GPR);
   }
@@ -1452,7 +1400,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
     Reg left = ra_scratch(as, RSET_FPR);
     asm_tointg(as, ir, left);  /* Frees dest reg. Do this before base alloc. */
     base = ra_alloc1(as, REF_BASE, RSET_GPR);
-    emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
+    emit_rmro(as, XO_MOVSD, left, base, ofs);
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
 #if LJ_64
   } else if (irt_islightud(t)) {
@@ -1470,11 +1418,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
     if ((ir->op2 & IRSLOAD_CONVERT)) {
       t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
-      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
-    } else if (irt_isnum(t)) {
-      emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
+      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
     } else {
-      emit_rmro(as, XO_MOV, dest, base, ofs);
+      emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
     }
   } else {
     if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1501,15 +1447,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
 static void asm_cnew(ASMState *as, IRIns *ir)
 {
   CTState *cts = ctype_ctsG(J2G(as->J));
-  CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
-  CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
-	      lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
-  IRRef args[2];
-  lua_assert(sz != CTSIZE_INVALID);
+  IRRef args[4];
+  lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
 
-  args[0] = ASMREF_L;     /* lua_State *L */
-  args[1] = ASMREF_TMP1;  /* MSize size   */
   as->gcsteps++;
   asm_setupresult(as, ir, ci);  /* GCcdata * */
 
@@ -1552,15 +1496,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
     } while (1);
 #endif
     lua_assert(sz == 4 || sz == 8);
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
   }
 
   /* Combine initialization of marked, gct and ctypeid. */
   emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
   emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
-	   (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16)));
+	   (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
   emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
   emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
 
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
   asm_gencall(as, ci, args);
   emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
 }
@@ -1638,36 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
   }
 }
 
-/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
-static int fpmjoin_pow(ASMState *as, IRIns *ir)
-{
-  IRIns *irp = IR(ir->op1);
-  if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
-    IRIns *irpp = IR(irp->op1);
-    if (irpp == ir-2 && irpp->o == IR_FPMATH &&
-	irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
-      /* The modified regs must match with the *.dasc implementation. */
-      RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
-      IRIns *irx;
-      if (ra_hasreg(ir->r))
-	rset_clear(drop, ir->r);  /* Dest reg handled below. */
-      ra_evictset(as, drop);
-      ra_destreg(as, ir, RID_XMM0);
-      emit_call(as, lj_vm_pow_sse);
-      irx = IR(irpp->op1);
-      if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
-	irx->r = RID_INIT;  /* Avoid allocating xmm1 for x. */
-      ra_left(as, RID_XMM0, irpp->op1);
-      ra_left(as, RID_XMM1, irp->op2);
-      return 1;
-    }
-  }
-  return 0;
-}
-
 static void asm_fpmath(ASMState *as, IRIns *ir)
 {
-  IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
   if (fpm == IRFPM_SQRT) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1698,51 +1626,29 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
 		    fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
       ra_left(as, RID_XMM0, ir->op1);
     }
-  } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
+  } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
     /* Rejoined to pow(). */
-  } else {  /* Handle x87 ops. */
-    int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
-    Reg dest = ir->r;
-    if (ra_hasreg(dest)) {
-      ra_free(as, dest);
-      ra_modified(as, dest);
-      emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
-    }
-    emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
-    switch (fpm) {  /* st0 = lj_vm_*(st0) */
-    case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
-    case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
-    case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
-    case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
-    case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
-    case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
-      /* Note: the use of fyl2xp1 would be pointless here. When computing
-      ** log(1.0+eps) the precision is already lost after 1.0 is added.
-      ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
-      */
-      emit_x87op(as, XI_FYL2X); break;
-    case IRFPM_OTHER:
-      switch (ir->o) {
-      case IR_ATAN2:
-	emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
-      case IR_LDEXP:
-	emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
-      default: lua_assert(0); break;
-      }
-      break;
-    default: lua_assert(0); break;
-    }
-    asm_x87load(as, ir->op1);
-    switch (fpm) {
-    case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
-    case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
-    case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
-    case IRFPM_OTHER:
-      if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
-      break;
-    default: break;
-    }
+  } else {
+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+  }
+}
+
+#define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
+
+static void asm_ldexp(ASMState *as, IRIns *ir)
+{
+  int32_t ofs = sps_scale(ir->s);  /* Use spill slot or temp slots. */
+  Reg dest = ir->r;
+  if (ra_hasreg(dest)) {
+    ra_free(as, dest);
+    ra_modified(as, dest);
+    emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
   }
+  emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+  emit_x87op(as, XI_FPOP1);
+  emit_x87op(as, XI_FSCALE);
+  asm_x87load(as, ir->op1);
+  asm_x87load(as, ir->op2);
 }
 
 static void asm_fppowi(ASMState *as, IRIns *ir)
@@ -1758,26 +1664,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
   ra_left(as, RID_EAX, ir->op2);
 }
 
-#if LJ_64 && LJ_HASFFI
-static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_pow(ASMState *as, IRIns *ir)
 {
-  const CCallInfo *ci = &lj_ir_callinfo[id];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
-}
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					  IRCALL_lj_carith_powu64);
+  else
 #endif
-
-static void asm_intmod(ASMState *as, IRIns *ir)
-{
-  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
-  IRRef args[2];
-  args[0] = ir->op1;
-  args[1] = ir->op2;
-  asm_setupresult(as, ir, ci);
-  asm_gencall(as, ci, args);
+    asm_fppowi(as, ir);
 }
 
 static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1960,6 +1855,44 @@ static void asm_add(ASMState *as, IRIns *ir)
     asm_intarith(as, ir, XOg_ADD);
 }
 
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_SUBSD);
+  else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
+    asm_intarith(as, ir, XOg_SUB);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MULSD);
+  else
+    asm_intarith(as, ir, XOg_X_IMUL);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isnum(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					  IRCALL_lj_carith_divu64);
+  else
+#endif
+    asm_fparith(as, ir, XO_DIVSD);
+}
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+  if (!irt_isint(ir->t))
+    asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					  IRCALL_lj_carith_modu64);
+  else
+#endif
+    asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
 static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1967,7 +1900,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
   ra_left(as, dest, ir->op1);
 }
 
-static void asm_min_max(ASMState *as, IRIns *ir, int cc)
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_XORPS);
+  else
+    asm_neg_not(as, ir, XOg_NEG);
+}
+
+#define asm_abs(as, ir)		asm_fparith(as, ir, XO_ANDPS)
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
 {
   Reg right, dest = ra_dest(as, ir, RSET_GPR);
   IRRef lref = ir->op1, rref = ir->op2;
@@ -1978,7 +1921,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
   ra_left(as, dest, lref);
 }
 
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_min(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MINSD);
+  else
+    asm_intmin_max(as, ir, CC_G);
+}
+
+static void asm_max(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fparith(as, ir, XO_MAXSD);
+  else
+    asm_intmin_max(as, ir, CC_L);
+}
+
+/* Note: don't use LEA for overflow-checking arithmetic! */
+#define asm_addov(as, ir)	asm_intarith(as, ir, XOg_ADD)
+#define asm_subov(as, ir)	asm_intarith(as, ir, XOg_SUB)
+#define asm_mulov(as, ir)	asm_intarith(as, ir, XOg_X_IMUL)
+
+#define asm_bnot(as, ir)	asm_neg_not(as, ir, XOg_NOT)
+
+static void asm_bswap(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
   as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1986,6 +1952,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
   ra_left(as, dest, ir->op1);
 }
 
+#define asm_band(as, ir)	asm_intarith(as, ir, XOg_AND)
+#define asm_bor(as, ir)		asm_intarith(as, ir, XOg_OR)
+#define asm_bxor(as, ir)	asm_intarith(as, ir, XOg_XOR)
+
 static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
 {
   IRRef rref = ir->op2;
@@ -2025,6 +1995,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
   */
 }
 
+#define asm_bshl(as, ir)	asm_bitshift(as, ir, XOg_SHL)
+#define asm_bshr(as, ir)	asm_bitshift(as, ir, XOg_SHR)
+#define asm_bsar(as, ir)	asm_bitshift(as, ir, XOg_SAR)
+#define asm_brol(as, ir)	asm_bitshift(as, ir, XOg_ROL)
+#define asm_bror(as, ir)	asm_bitshift(as, ir, XOg_ROR)
+
 /* -- Comparisons --------------------------------------------------------- */
 
 /* Virtual flags for unordered FP comparisons. */
@@ -2051,8 +2027,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
 };
 
 /* FP and integer comparisons. */
-static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
+static void asm_comp(ASMState *as, IRIns *ir)
 {
+  uint32_t cc = asm_compmap[ir->o];
   if (irt_isnum(ir->t)) {
     IRRef lref = ir->op1;
     IRRef rref = ir->op2;
@@ -2207,6 +2184,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
   }
 }
 
+#define asm_equal(as, ir)	asm_comp(as, ir)
+
 #if LJ_32 && LJ_HASFFI
 /* 64 bit integer comparisons in 32 bit mode. */
 static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2289,13 +2268,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
-    if (usehi || uselo) {
-      if (irt_isfp(ir->t))
-	asm_conv_fp_int64(as, ir);
-      else
-	asm_conv_int64_fp(as, ir);
-    }
     as->curins--;  /* Always skip the CONV. */
+    if (usehi || uselo)
+      asm_conv64(as, ir);
     return;
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
     asm_comp_int64(as, ir);
@@ -2344,6 +2319,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
 #endif
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guardcc(as, CC_NE);
+  emit_i8(as, HOOK_PROFILE);
+  emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
+}
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2365,7 +2350,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
     emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
 	      ptr2addr(&J2G(as->J)->jit_base));
   emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
-  emit_getgl(as, r, jit_L);
+  emit_getgl(as, r, cur_L);
   if (allow == RSET_EMPTY)  /* Spill temp. register. */
     emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
 }
@@ -2593,163 +2578,6 @@ static void asm_tail_prep(ASMState *as)
   }
 }
 
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
-  switch ((IROp)ir->o) {
-  /* Miscellaneous ops. */
-  case IR_LOOP: asm_loop(as); break;
-  case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
-  case IR_USE:
-    ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
-  case IR_PHI: asm_phi(as, ir); break;
-  case IR_HIOP: asm_hiop(as, ir); break;
-  case IR_GCSTEP: asm_gcstep(as, ir); break;
-
-  /* Guarded assertions. */
-  case IR_LT: case IR_GE: case IR_LE: case IR_GT:
-  case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
-  case IR_EQ: case IR_NE: case IR_ABC:
-    asm_comp(as, ir, asm_compmap[ir->o]);
-    break;
-
-  case IR_RETF: asm_retf(as, ir); break;
-
-  /* Bit ops. */
-  case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
-  case IR_BSWAP: asm_bitswap(as, ir); break;
-
-  case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
-  case IR_BOR:  asm_intarith(as, ir, XOg_OR); break;
-  case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
-
-  case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
-  case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
-  case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
-  case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
-  case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
-
-  /* Arithmetic ops. */
-  case IR_ADD: asm_add(as, ir); break;
-  case IR_SUB:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_SUBSD);
-    else  /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
-      asm_intarith(as, ir, XOg_SUB);
-    break;
-  case IR_MUL:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MULSD);
-    else
-      asm_intarith(as, ir, XOg_X_IMUL);
-    break;
-  case IR_DIV:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					     IRCALL_lj_carith_divu64);
-    else
-#endif
-      asm_fparith(as, ir, XO_DIVSD);
-    break;
-  case IR_MOD:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isint(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					     IRCALL_lj_carith_modu64);
-    else
-#endif
-      asm_intmod(as, ir);
-    break;
-
-  case IR_NEG:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_XORPS);
-    else
-      asm_neg_not(as, ir, XOg_NEG);
-    break;
-  case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
-
-  case IR_MIN:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MINSD);
-    else
-      asm_min_max(as, ir, CC_G);
-    break;
-  case IR_MAX:
-    if (irt_isnum(ir->t))
-      asm_fparith(as, ir, XO_MAXSD);
-    else
-      asm_min_max(as, ir, CC_L);
-    break;
-
-  case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
-    asm_fpmath(as, ir);
-    break;
-  case IR_POW:
-#if LJ_64 && LJ_HASFFI
-    if (!irt_isnum(ir->t))
-      asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					     IRCALL_lj_carith_powu64);
-    else
-#endif
-      asm_fppowi(as, ir);
-    break;
-
-  /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
-  case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
-  case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
-  case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
-
-  /* Memory references. */
-  case IR_AREF: asm_aref(as, ir); break;
-  case IR_HREF: asm_href(as, ir); break;
-  case IR_HREFK: asm_hrefk(as, ir); break;
-  case IR_NEWREF: asm_newref(as, ir); break;
-  case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
-  case IR_FREF: asm_fref(as, ir); break;
-  case IR_STRREF: asm_strref(as, ir); break;
-
-  /* Loads and stores. */
-  case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-    asm_ahuvload(as, ir);
-    break;
-  case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
-  case IR_SLOAD: asm_sload(as, ir); break;
-
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
-  case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
-
-  /* Allocations. */
-  case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
-  case IR_TNEW: asm_tnew(as, ir); break;
-  case IR_TDUP: asm_tdup(as, ir); break;
-  case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
-  /* Write barriers. */
-  case IR_TBAR: asm_tbar(as, ir); break;
-  case IR_OBAR: asm_obar(as, ir); break;
-
-  /* Type conversions. */
-  case IR_TOBIT: asm_tobit(as, ir); break;
-  case IR_CONV: asm_conv(as, ir); break;
-  case IR_TOSTR: asm_tostr(as, ir); break;
-  case IR_STRTO: asm_strto(as, ir); break;
-
-  /* Calls. */
-  case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
-  case IR_CALLXS: asm_callx(as, ir); break;
-  case IR_CARG: break;
-
-  default:
-    setintV(&as->J->errinfo, ir->o);
-    lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
-    break;
-  }
-}
-
 /* -- Trace setup --------------------------------------------------------- */
 
 /* Ensure there are enough stack slots for call arguments. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bc.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bc.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_bc.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bc.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bc.h
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_bc.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bc.h
index 7436fabfe173be867d4ef6b338966815d5576995..64c1bcda35a27b77abb6d5e4e8396c7a8bf89eaa 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bc.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bc.h
@@ -89,6 +89,8 @@
   _(ISFC,	dst,	___,	var,	___) \
   _(IST,	___,	___,	var,	___) \
   _(ISF,	___,	___,	var,	___) \
+  _(ISTYPE,	var,	___,	lit,	___) \
+  _(ISNUM,	var,	___,	lit,	___) \
   \
   /* Unary ops. */ \
   _(MOV,	dst,	___,	var,	___) \
@@ -143,10 +145,12 @@
   _(TGETV,	dst,	var,	var,	index) \
   _(TGETS,	dst,	var,	str,	index) \
   _(TGETB,	dst,	var,	lit,	index) \
+  _(TGETR,	dst,	var,	var,	index) \
   _(TSETV,	var,	var,	var,	newindex) \
   _(TSETS,	var,	var,	str,	newindex) \
   _(TSETB,	var,	var,	lit,	newindex) \
   _(TSETM,	base,	___,	num,	newindex) \
+  _(TSETR,	var,	var,	var,	newindex) \
   \
   /* Calls and vararg handling. T = tail call. */ \
   _(CALLM,	base,	lit,	lit,	call) \
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bcdump.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcdump.h
similarity index 92%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_bcdump.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcdump.h
index 812d0e151694879e8269915f97e44dcce8b4216a..c38983146bbadec5173fa9df552b5aaa31c54ce1 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bcdump.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcdump.h
@@ -36,14 +36,15 @@
 /* If you perform *any* kind of private modifications to the bytecode itself
 ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
 */
-#define BCDUMP_VERSION		1
+#define BCDUMP_VERSION		2
 
 /* Compatibility flags. */
 #define BCDUMP_F_BE		0x01
 #define BCDUMP_F_STRIP		0x02
 #define BCDUMP_F_FFI		0x04
+#define BCDUMP_F_FR2		0x08
 
-#define BCDUMP_F_KNOWN		(BCDUMP_F_FFI*2-1)
+#define BCDUMP_F_KNOWN		(BCDUMP_F_FR2*2-1)
 
 /* Type codes for the GC constants of a prototype. Plus length for strings. */
 enum {
@@ -61,6 +62,7 @@ enum {
 
 LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
 		       void *data, int strip);
+LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
 LJ_FUNC GCproto *lj_bcread(LexState *ls);
 
 #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bcread.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcread.c
similarity index 81%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_bcread.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcread.c
index 25859d2fa6206b449e4360cbd0fc6d6af03ea802..5e5021773d763319b8833bab1fdde6a5c4927680 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bcread.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcread.c
@@ -9,6 +9,7 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_bc.h"
@@ -20,6 +21,7 @@
 #include "lj_lex.h"
 #include "lj_bcdump.h"
 #include "lj_state.h"
+#include "lj_strfmt.h"
 
 /* Reuse some lexer fields for our own purposes. */
 #define bcread_flags(ls)	ls->level
@@ -38,84 +40,73 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
   const char *name = ls->chunkarg;
   if (*name == BCDUMP_HEAD1) name = "(binary)";
   else if (*name == '@' || *name == '=') name++;
-  lj_str_pushf(L, "%s: %s", name, err2msg(em));
+  lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
   lj_err_throw(L, LUA_ERRSYNTAX);
 }
 
-/* Resize input buffer. */
-static void bcread_resize(LexState *ls, MSize len)
-{
-  if (ls->sb.sz < len) {
-    MSize sz = ls->sb.sz * 2;
-    while (len > sz) sz = sz * 2;
-    lj_str_resizebuf(ls->L, &ls->sb, sz);
-    /* Caveat: this may change ls->sb.buf which may affect ls->p. */
-  }
-}
-
-/* Refill buffer if needed. */
+/* Refill buffer. */
 static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
 {
   lua_assert(len != 0);
-  if (len > LJ_MAX_MEM || ls->current < 0)
+  if (len > LJ_MAX_BUF || ls->c < 0)
     bcread_error(ls, LJ_ERR_BCBAD);
   do {
     const char *buf;
-    size_t size;
-    if (ls->n) {  /* Copy remainder to buffer. */
-      if (ls->sb.n) {  /* Move down in buffer. */
-	lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n);
-	if (ls->n != ls->sb.n)
-	  memmove(ls->sb.buf, ls->p, ls->n);
+    size_t sz;
+    char *p = sbufB(&ls->sb);
+    MSize n = (MSize)(ls->pe - ls->p);
+    if (n) {  /* Copy remainder to buffer. */
+      if (sbuflen(&ls->sb)) {  /* Move down in buffer. */
+	lua_assert(ls->pe == sbufP(&ls->sb));
+	if (ls->p != p) memmove(p, ls->p, n);
       } else {  /* Copy from buffer provided by reader. */
-	bcread_resize(ls, len);
-	memcpy(ls->sb.buf, ls->p, ls->n);
+	p = lj_buf_need(&ls->sb, len);
+	memcpy(p, ls->p, n);
       }
-      ls->p = ls->sb.buf;
+      ls->p = p;
+      ls->pe = p + n;
     }
-    ls->sb.n = ls->n;
-    buf = ls->rfunc(ls->L, ls->rdata, &size);  /* Get more data from reader. */
-    if (buf == NULL || size == 0) {  /* EOF? */
+    setsbufP(&ls->sb, p + n);
+    buf = ls->rfunc(ls->L, ls->rdata, &sz);  /* Get more data from reader. */
+    if (buf == NULL || sz == 0) {  /* EOF? */
       if (need) bcread_error(ls, LJ_ERR_BCBAD);
-      ls->current = -1;  /* Only bad if we get called again. */
+      ls->c = -1;  /* Only bad if we get called again. */
       break;
     }
-    if (ls->sb.n) {  /* Append to buffer. */
-      MSize n = ls->sb.n + (MSize)size;
-      bcread_resize(ls, n < len ? len : n);
-      memcpy(ls->sb.buf + ls->sb.n, buf, size);
-      ls->n = ls->sb.n = n;
-      ls->p = ls->sb.buf;
+    if (n) {  /* Append to buffer. */
+      n += (MSize)sz;
+      p = lj_buf_need(&ls->sb, n < len ? len : n);
+      memcpy(sbufP(&ls->sb), buf, sz);
+      setsbufP(&ls->sb, p + n);
+      ls->p = p;
+      ls->pe = p + n;
     } else {  /* Return buffer provided by reader. */
-      ls->n = (MSize)size;
       ls->p = buf;
+      ls->pe = buf + sz;
     }
-  } while (ls->n < len);
+  } while (ls->p + len > ls->pe);
 }
 
 /* Need a certain number of bytes. */
 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
 {
-  if (LJ_UNLIKELY(ls->n < len))
+  if (LJ_UNLIKELY(ls->p + len > ls->pe))
     bcread_fill(ls, len, 1);
 }
 
 /* Want to read up to a certain number of bytes, but may need less. */
 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
 {
-  if (LJ_UNLIKELY(ls->n < len))
+  if (LJ_UNLIKELY(ls->p + len > ls->pe))
     bcread_fill(ls, len, 0);
 }
 
-#define bcread_dec(ls)		check_exp(ls->n > 0, ls->n--)
-#define bcread_consume(ls, len)	check_exp(ls->n >= (len), ls->n -= (len))
-
 /* Return memory block from buffer. */
-static uint8_t *bcread_mem(LexState *ls, MSize len)
+static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
 {
   uint8_t *p = (uint8_t *)ls->p;
-  bcread_consume(ls, len);
-  ls->p = (char *)p + len;
+  ls->p += len;
+  lua_assert(ls->p <= ls->pe);
   return p;
 }
 
@@ -128,25 +119,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
 /* Read byte from buffer. */
 static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
 {
-  bcread_dec(ls);
+  lua_assert(ls->p < ls->pe);
   return (uint32_t)(uint8_t)*ls->p++;
 }
 
 /* Read ULEB128 value from buffer. */
-static uint32_t bcread_uleb128(LexState *ls)
+static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
 {
-  const uint8_t *p = (const uint8_t *)ls->p;
-  uint32_t v = *p++;
-  if (LJ_UNLIKELY(v >= 0x80)) {
-    int sh = 0;
-    v &= 0x7f;
-    do {
-     v |= ((*p & 0x7f) << (sh += 7));
-     bcread_dec(ls);
-   } while (*p++ >= 0x80);
-  }
-  bcread_dec(ls);
-  ls->p = (char *)p;
+  uint32_t v = lj_buf_ruleb128(&ls->p);
+  lua_assert(ls->p <= ls->pe);
   return v;
 }
 
@@ -160,11 +141,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
     v &= 0x3f;
     do {
      v |= ((*p & 0x7f) << (sh += 7));
-     bcread_dec(ls);
    } while (*p++ >= 0x80);
   }
-  bcread_dec(ls);
   ls->p = (char *)p;
+  lua_assert(ls->p <= ls->pe);
   return v;
 }
 
@@ -212,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o)
     o->u32.hi = bcread_uleb128(ls);
   } else {
     lua_assert(tp <= BCDUMP_KTAB_TRUE);
-    setitype(o, ~tp);
+    setpriV(o, ~tp);
   }
 }
 
@@ -326,25 +306,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
 }
 
 /* Read a prototype. */
-static GCproto *bcread_proto(LexState *ls)
+GCproto *lj_bcread_proto(LexState *ls)
 {
   GCproto *pt;
   MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
   MSize ofsk, ofsuv, ofsdbg;
   MSize sizedbg = 0;
   BCLine firstline = 0, numline = 0;
-  MSize len, startn;
-
-  /* Read length. */
-  if (ls->n > 0 && ls->p[0] == 0) {  /* Shortcut EOF. */
-    ls->n--; ls->p++;
-    return NULL;
-  }
-  bcread_want(ls, 5);
-  len = bcread_uleb128(ls);
-  if (!len) return NULL;  /* EOF */
-  bcread_need(ls, len);
-  startn = ls->n;
 
   /* Read prototype header. */
   flags = bcread_byte(ls);
@@ -413,9 +381,6 @@ static GCproto *bcread_proto(LexState *ls)
     setmref(pt->uvinfo, NULL);
     setmref(pt->varinfo, NULL);
   }
-
-  if (len != startn - ls->n)
-    bcread_error(ls, LJ_ERR_BCBAD);
   return pt;
 }
 
@@ -429,6 +394,7 @@ static int bcread_header(LexState *ls)
       bcread_byte(ls) != BCDUMP_VERSION) return 0;
   bcread_flags(ls) = flags = bcread_uleb128(ls);
   if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
+  if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
   if ((flags & BCDUMP_F_FFI)) {
 #if LJ_HASFFI
     lua_State *L = ls->L;
@@ -455,19 +421,34 @@ static int bcread_header(LexState *ls)
 GCproto *lj_bcread(LexState *ls)
 {
   lua_State *L = ls->L;
-  lua_assert(ls->current == BCDUMP_HEAD1);
+  lua_assert(ls->c == BCDUMP_HEAD1);
   bcread_savetop(L, ls, L->top);
-  lj_str_resetbuf(&ls->sb);
+  lj_buf_reset(&ls->sb);
   /* Check for a valid bytecode dump header. */
   if (!bcread_header(ls))
     bcread_error(ls, LJ_ERR_BCFMT);
   for (;;) {  /* Process all prototypes in the bytecode dump. */
-    GCproto *pt = bcread_proto(ls);
-    if (!pt) break;
+    GCproto *pt;
+    MSize len;
+    const char *startp;
+    /* Read length. */
+    if (ls->p < ls->pe && ls->p[0] == 0) {  /* Shortcut EOF. */
+      ls->p++;
+      break;
+    }
+    bcread_want(ls, 5);
+    len = bcread_uleb128(ls);
+    if (!len) break;  /* EOF */
+    bcread_need(ls, len);
+    startp = ls->p;
+    pt = lj_bcread_proto(ls);
+    if (ls->p != startp + len)
+      bcread_error(ls, LJ_ERR_BCBAD);
     setprotoV(L, L->top, pt);
     incr_top(L);
   }
-  if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls))
+  if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
+      L->top-1 != bcread_oldtop(L, ls))
     bcread_error(ls, LJ_ERR_BCBAD);
   /* Pop off last prototype. */
   L->top--;
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bcwrite.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcwrite.c
similarity index 57%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_bcwrite.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcwrite.c
index ff97450b8fe55fe393a1550a2ee47101a6b24f27..b2c097383870f8214280475140beb68640e682d8 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_bcwrite.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
 
 #include "lj_obj.h"
 #include "lj_gc.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_bc.h"
 #if LJ_HASFFI
 #include "lj_ctype.h"
@@ -17,13 +17,13 @@
 #include "lj_dispatch.h"
 #include "lj_jit.h"
 #endif
+#include "lj_strfmt.h"
 #include "lj_bcdump.h"
 #include "lj_vm.h"
 
 /* Context for bytecode writer. */
 typedef struct BCWriteCtx {
   SBuf sb;			/* Output buffer. */
-  lua_State *L;			/* Lua state. */
   GCproto *pt;			/* Root prototype. */
   lua_Writer wfunc;		/* Writer callback. */
   void *wdata;			/* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
   int status;			/* Status from writer callback. */
 } BCWriteCtx;
 
-/* -- Output buffer handling ---------------------------------------------- */
-
-/* Resize buffer if needed. */
-static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
-{
-  MSize sz = ctx->sb.sz * 2;
-  while (ctx->sb.n + len > sz) sz = sz * 2;
-  lj_str_resizebuf(ctx->L, &ctx->sb, sz);
-}
-
-/* Need a certain amount of buffer space. */
-static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
-{
-  if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
-    bcwrite_resize(ctx, len);
-}
-
-/* Add memory block to buffer. */
-static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
-{
-  uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
-  MSize i;
-  ctx->sb.n += len;
-  for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
-}
-
-/* Add byte to buffer. */
-static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
-{
-  ctx->sb.buf[ctx->sb.n++] = b;
-}
-
-/* Add ULEB128 value to buffer. */
-static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
-{
-  MSize n = ctx->sb.n;
-  uint8_t *p = (uint8_t *)ctx->sb.buf;
-  for (; v >= 0x80; v >>= 7)
-    p[n++] = (uint8_t)((v & 0x7f) | 0x80);
-  p[n++] = (uint8_t)v;
-  ctx->sb.n = n;
-}
-
 /* -- Bytecode writer ----------------------------------------------------- */
 
 /* Write a single constant key/value of a template table. */
 static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
 {
-  bcwrite_need(ctx, 1+10);
+  char *p = lj_buf_more(&ctx->sb, 1+10);
   if (tvisstr(o)) {
     const GCstr *str = strV(o);
     MSize len = str->len;
-    bcwrite_need(ctx, 5+len);
-    bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len);
-    bcwrite_block(ctx, strdata(str), len);
+    p = lj_buf_more(&ctx->sb, 5+len);
+    p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
+    p = lj_buf_wmem(p, strdata(str), len);
   } else if (tvisint(o)) {
-    bcwrite_byte(ctx, BCDUMP_KTAB_INT);
-    bcwrite_uleb128(ctx, intV(o));
+    *p++ = BCDUMP_KTAB_INT;
+    p = lj_strfmt_wuleb128(p, intV(o));
   } else if (tvisnum(o)) {
     if (!LJ_DUALNUM && narrow) {  /* Narrow number constants to integers. */
       lua_Number num = numV(o);
       int32_t k = lj_num2int(num);
       if (num == (lua_Number)k) {  /* -0 is never a constant. */
-	bcwrite_byte(ctx, BCDUMP_KTAB_INT);
-	bcwrite_uleb128(ctx, k);
+	*p++ = BCDUMP_KTAB_INT;
+	p = lj_strfmt_wuleb128(p, k);
+	setsbufP(&ctx->sb, p);
 	return;
       }
     }
-    bcwrite_byte(ctx, BCDUMP_KTAB_NUM);
-    bcwrite_uleb128(ctx, o->u32.lo);
-    bcwrite_uleb128(ctx, o->u32.hi);
+    *p++ = BCDUMP_KTAB_NUM;
+    p = lj_strfmt_wuleb128(p, o->u32.lo);
+    p = lj_strfmt_wuleb128(p, o->u32.hi);
   } else {
     lua_assert(tvispri(o));
-    bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o));
+    *p++ = BCDUMP_KTAB_NIL+~itype(o);
   }
+  setsbufP(&ctx->sb, p);
 }
 
 /* Write a template table. */
-static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
+static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
 {
   MSize narray = 0, nhash = 0;
   if (t->asize > 0) {  /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
       nhash += !tvisnil(&node[i].val);
   }
   /* Write number of array slots and hash slots. */
-  bcwrite_uleb128(ctx, narray);
-  bcwrite_uleb128(ctx, nhash);
+  p = lj_strfmt_wuleb128(p, narray);
+  p = lj_strfmt_wuleb128(p, nhash);
+  setsbufP(&ctx->sb, p);
   if (narray) {  /* Write array entries (may contain nil). */
     MSize i;
     TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
   for (i = 0; i < sizekgc; i++, kr++) {
     GCobj *o = gcref(*kr);
     MSize tp, need = 1;
+    char *p;
     /* Determine constant type and needed size. */
     if (o->gch.gct == ~LJ_TSTR) {
       tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
       need = 1+2*5;
     }
     /* Write constant type. */
-    bcwrite_need(ctx, need);
-    bcwrite_uleb128(ctx, tp);
+    p = lj_buf_more(&ctx->sb, need);
+    p = lj_strfmt_wuleb128(p, tp);
     /* Write constant data (if any). */
     if (tp >= BCDUMP_KGC_STR) {
-      bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len);
+      p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
     } else if (tp == BCDUMP_KGC_TAB) {
-      bcwrite_ktab(ctx, gco2tab(o));
+      bcwrite_ktab(ctx, p, gco2tab(o));
+      continue;
 #if LJ_HASFFI
     } else if (tp != BCDUMP_KGC_CHILD) {
-      cTValue *p = (TValue *)cdataptr(gco2cd(o));
-      bcwrite_uleb128(ctx, p[0].u32.lo);
-      bcwrite_uleb128(ctx, p[0].u32.hi);
+      cTValue *q = (TValue *)cdataptr(gco2cd(o));
+      p = lj_strfmt_wuleb128(p, q[0].u32.lo);
+      p = lj_strfmt_wuleb128(p, q[0].u32.hi);
       if (tp == BCDUMP_KGC_COMPLEX) {
-	bcwrite_uleb128(ctx, p[1].u32.lo);
-	bcwrite_uleb128(ctx, p[1].u32.hi);
+	p = lj_strfmt_wuleb128(p, q[1].u32.lo);
+	p = lj_strfmt_wuleb128(p, q[1].u32.hi);
       }
 #endif
     }
+    setsbufP(&ctx->sb, p);
   }
 }
 
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
 {
   MSize i, sizekn = pt->sizekn;
   cTValue *o = mref(pt->k, TValue);
-  bcwrite_need(ctx, 10*sizekn);
+  char *p = lj_buf_more(&ctx->sb, 10*sizekn);
   for (i = 0; i < sizekn; i++, o++) {
     int32_t k;
     if (tvisint(o)) {
@@ -220,58 +183,58 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
 	k = lj_num2int(num);
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	save_int:
-	  bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u));
-	  if (k < 0) {
-	    char *p = &ctx->sb.buf[ctx->sb.n-1];
-	    *p = (*p & 7) | ((k>>27) & 0x18);
-	  }
+	  p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
+	  if (k < 0)
+	    p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
 	  continue;
 	}
       }
-      bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
-      if (o->u32.lo >= 0x80000000u) {
-	char *p = &ctx->sb.buf[ctx->sb.n-1];
-	*p = (*p & 7) | ((o->u32.lo>>27) & 0x18);
-      }
-      bcwrite_uleb128(ctx, o->u32.hi);
+      p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
+      if (o->u32.lo >= 0x80000000u)
+	p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
+      p = lj_strfmt_wuleb128(p, o->u32.hi);
     }
   }
+  setsbufP(&ctx->sb, p);
 }
 
 /* Write bytecode instructions. */
-static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt)
+static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
 {
   MSize nbc = pt->sizebc-1;  /* Omit the [JI]FUNC* header. */
 #if LJ_HASJIT
-  uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n];
+  uint8_t *q = (uint8_t *)p;
 #endif
-  bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+  p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+  UNUSED(ctx);
 #if LJ_HASJIT
   /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
   if ((pt->flags & PROTO_ILOOP) || pt->trace) {
-    jit_State *J = L2J(ctx->L);
+    jit_State *J = L2J(sbufL(&ctx->sb));
     MSize i;
-    for (i = 0; i < nbc; i++, p += sizeof(BCIns)) {
-      BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)];
+    for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
+      BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
       if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
 	  op == BC_JFORI) {
-	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
+	q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
       } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
-	BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8);
+	BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
 	BCIns ins = traceref(J, rd)->startins;
-	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
-	p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
-	p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
+	q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
+	q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
+	q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
       }
     }
   }
 #endif
+  return p;
 }
 
 /* Write prototype. */
 static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 {
   MSize sizedbg = 0;
+  char *p;
 
   /* Recursively write children of prototype. */
   if ((pt->flags & PROTO_CHILD)) {
@@ -285,31 +248,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
   }
 
   /* Start writing the prototype info to a buffer. */
-  lj_str_resetbuf(&ctx->sb);
-  ctx->sb.n = 5;  /* Leave room for final size. */
-  bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+  p = lj_buf_need(&ctx->sb,
+		  5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+  p += 5;  /* Leave room for final size. */
 
   /* Write prototype header. */
-  bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)));
-  bcwrite_byte(ctx, pt->numparams);
-  bcwrite_byte(ctx, pt->framesize);
-  bcwrite_byte(ctx, pt->sizeuv);
-  bcwrite_uleb128(ctx, pt->sizekgc);
-  bcwrite_uleb128(ctx, pt->sizekn);
-  bcwrite_uleb128(ctx, pt->sizebc-1);
+  *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
+  *p++ = pt->numparams;
+  *p++ = pt->framesize;
+  *p++ = pt->sizeuv;
+  p = lj_strfmt_wuleb128(p, pt->sizekgc);
+  p = lj_strfmt_wuleb128(p, pt->sizekn);
+  p = lj_strfmt_wuleb128(p, pt->sizebc-1);
   if (!ctx->strip) {
     if (proto_lineinfo(pt))
       sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
-    bcwrite_uleb128(ctx, sizedbg);
+    p = lj_strfmt_wuleb128(p, sizedbg);
     if (sizedbg) {
-      bcwrite_uleb128(ctx, pt->firstline);
-      bcwrite_uleb128(ctx, pt->numline);
+      p = lj_strfmt_wuleb128(p, pt->firstline);
+      p = lj_strfmt_wuleb128(p, pt->numline);
     }
   }
 
   /* Write bytecode instructions and upvalue refs. */
-  bcwrite_bytecode(ctx, pt);
-  bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2);
+  p = bcwrite_bytecode(ctx, p, pt);
+  p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
+  setsbufP(&ctx->sb, p);
 
   /* Write constants. */
   bcwrite_kgc(ctx, pt);
@@ -317,18 +281,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
 
   /* Write debug info, if not stripped. */
   if (sizedbg) {
-    bcwrite_need(ctx, sizedbg);
-    bcwrite_block(ctx, proto_lineinfo(pt), sizedbg);
+    p = lj_buf_more(&ctx->sb, sizedbg);
+    p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
+    setsbufP(&ctx->sb, p);
   }
 
   /* Pass buffer to writer function. */
   if (ctx->status == 0) {
-    MSize n = ctx->sb.n - 5;
+    MSize n = sbuflen(&ctx->sb) - 5;
     MSize nn = (lj_fls(n)+8)*9 >> 6;
-    ctx->sb.n = 5 - nn;
-    bcwrite_uleb128(ctx, n);  /* Fill in final size. */
-    lua_assert(ctx->sb.n == 5);
-    ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata);
+    char *q = sbufB(&ctx->sb) + (5 - nn);
+    p = lj_strfmt_wuleb128(q, n);  /* Fill in final size. */
+    lua_assert(p == sbufB(&ctx->sb) + 5);
+    ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
   }
 }
 
@@ -338,20 +303,21 @@ static void bcwrite_header(BCWriteCtx *ctx)
   GCstr *chunkname = proto_chunkname(ctx->pt);
   const char *name = strdata(chunkname);
   MSize len = chunkname->len;
-  lj_str_resetbuf(&ctx->sb);
-  bcwrite_need(ctx, 5+5+len);
-  bcwrite_byte(ctx, BCDUMP_HEAD1);
-  bcwrite_byte(ctx, BCDUMP_HEAD2);
-  bcwrite_byte(ctx, BCDUMP_HEAD3);
-  bcwrite_byte(ctx, BCDUMP_VERSION);
-  bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) +
-		   (LJ_BE ? BCDUMP_F_BE : 0) +
-		   ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
+  char *p = lj_buf_need(&ctx->sb, 5+5+len);
+  *p++ = BCDUMP_HEAD1;
+  *p++ = BCDUMP_HEAD2;
+  *p++ = BCDUMP_HEAD3;
+  *p++ = BCDUMP_VERSION;
+  *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
+	 LJ_BE*BCDUMP_F_BE +
+	 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
+	 LJ_FR2*BCDUMP_F_FR2;
   if (!ctx->strip) {
-    bcwrite_uleb128(ctx, len);
-    bcwrite_block(ctx, name, len);
+    p = lj_strfmt_wuleb128(p, len);
+    p = lj_buf_wmem(p, name, len);
   }
-  ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata);
+  ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
+			   (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
 }
 
 /* Write footer of bytecode dump. */
@@ -359,7 +325,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
 {
   if (ctx->status == 0) {
     uint8_t zero = 0;
-    ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata);
+    ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
   }
 }
 
@@ -367,8 +333,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
 static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
 {
   BCWriteCtx *ctx = (BCWriteCtx *)ud;
-  UNUSED(dummy);
-  lj_str_resizebuf(L, &ctx->sb, 1024);  /* Avoids resize for most prototypes. */
+  UNUSED(L); UNUSED(dummy);
+  lj_buf_need(&ctx->sb, 1024);  /* Avoids resize for most prototypes. */
   bcwrite_header(ctx);
   bcwrite_proto(ctx, ctx->pt);
   bcwrite_footer(ctx);
@@ -381,16 +347,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
 {
   BCWriteCtx ctx;
   int status;
-  ctx.L = L;
   ctx.pt = pt;
   ctx.wfunc = writer;
   ctx.wdata = data;
   ctx.strip = strip;
   ctx.status = 0;
-  lj_str_initbuf(&ctx.sb);
+  lj_buf_init(L, &ctx.sb);
   status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
   if (status == 0) status = ctx.status;
-  lj_str_freebuf(G(ctx.L), &ctx.sb);
+  lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
   return status;
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_buf.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_buf.c
new file mode 100644
index 0000000000000000000000000000000000000000..023bb9aac8fc6a6bd083808b633cd759b46c1ed7
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_buf.c
@@ -0,0 +1,234 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_buf_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_strfmt.h"
+
+/* -- Buffer management --------------------------------------------------- */
+
+static void buf_grow(SBuf *sb, MSize sz)
+{
+  MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
+  char *b;
+  if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
+  while (nsz < sz) nsz += nsz;
+  b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
+  setmref(sb->b, b);
+  setmref(sb->p, b + len);
+  setmref(sb->e, b + nsz);
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
+{
+  lua_assert(sz > sbufsz(sb));
+  if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
+    lj_err_mem(sbufL(sb));
+  buf_grow(sb, sz);
+  return sbufB(sb);
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
+{
+  MSize len = sbuflen(sb);
+  lua_assert(sz > sbufleft(sb));
+  if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+    lj_err_mem(sbufL(sb));
+  buf_grow(sb, len + sz);
+  return sbufP(sb);
+}
+
+void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
+{
+  char *b = sbufB(sb);
+  MSize osz = (MSize)(sbufE(sb) - b);
+  if (osz > 2*LJ_MIN_SBUF) {
+    MSize n = (MSize)(sbufP(sb) - b);
+    b = lj_mem_realloc(L, b, osz, (osz >> 1));
+    setmref(sb->b, b);
+    setmref(sb->p, b + n);
+    setmref(sb->e, b + (osz >> 1));
+  }
+}
+
+char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
+{
+  SBuf *sb = &G(L)->tmpbuf;
+  setsbufL(sb, L);
+  return lj_buf_need(sb, sz);
+}
+
+/* -- Low-level buffer put operations ------------------------------------- */
+
+SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
+{
+  char *p = lj_buf_more(sb, len);
+  p = lj_buf_wmem(p, q, len);
+  setsbufP(sb, p);
+  return sb;
+}
+
+#if LJ_HASJIT
+SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+{
+  char *p = lj_buf_more(sb, 1);
+  *p++ = (char)c;
+  setsbufP(sb, p);
+  return sb;
+}
+#endif
+
+SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len);
+  p = lj_buf_wmem(p, strdata(s), len);
+  setsbufP(sb, p);
+  return sb;
+}
+
+/* -- High-level buffer put operations ------------------------------------ */
+
+SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s)+len-1;
+  while (p < e)
+    *p++ = *q--;
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s);
+  for (; p < e; p++, q++) {
+    uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+    *p = c + ((c >= 'A' && c <= 'Z') << 5);
+#else
+    if (c >= 'A' && c <= 'Z') c += 0x20;
+    *p = c;
+#endif
+  }
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
+{
+  MSize len = s->len;
+  char *p = lj_buf_more(sb, len), *e = p+len;
+  const char *q = strdata(s);
+  for (; p < e; p++, q++) {
+    uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+    *p = c - ((c >= 'a' && c <= 'z') << 5);
+#else
+    if (c >= 'a' && c <= 'z') c -= 0x20;
+    *p = c;
+#endif
+  }
+  setsbufP(sb, p);
+  return sb;
+}
+
+SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
+{
+  MSize len = s->len;
+  if (rep > 0 && len) {
+    uint64_t tlen = (uint64_t)rep * len;
+    char *p;
+    if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
+      lj_err_mem(sbufL(sb));
+    p = lj_buf_more(sb, (MSize)tlen);
+    if (len == 1) {  /* Optimize a common case. */
+      uint32_t c = strdata(s)[0];
+      do { *p++ = c; } while (--rep > 0);
+    } else {
+      const char *e = strdata(s) + len;
+      do {
+	const char *q = strdata(s);
+	do { *p++ = *q++; } while (q < e);
+      } while (--rep > 0);
+    }
+    setsbufP(sb, p);
+  }
+  return sb;
+}
+
+SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
+{
+  MSize seplen = sep ? sep->len : 0;
+  if (i <= e) {
+    for (;;) {
+      cTValue *o = lj_tab_getint(t, i);
+      char *p;
+      if (!o) {
+      badtype:  /* Error: bad element type. */
+	setsbufP(sb, (void *)(intptr_t)i);  /* Store failing index. */
+	return NULL;
+      } else if (tvisstr(o)) {
+	MSize len = strV(o)->len;
+	p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
+      } else if (tvisint(o)) {
+	p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
+      } else if (tvisnum(o)) {
+	p = lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM+seplen), o);
+      } else {
+	goto badtype;
+      }
+      if (i++ == e) {
+	setsbufP(sb, p);
+	break;
+      }
+      if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
+      setsbufP(sb, p);
+    }
+  }
+  return sb;
+}
+
+/* -- Miscellaneous buffer operations ------------------------------------- */
+
+GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
+{
+  return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
+}
+
+/* Concatenate two strings. */
+GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
+{
+  MSize len1 = s1->len, len2 = s2->len;
+  char *buf = lj_buf_tmp(L, len1 + len2);
+  memcpy(buf, strdata(s1), len1);
+  memcpy(buf+len1, strdata(s2), len2);
+  return lj_str_new(L, buf, len1 + len2);
+}
+
+/* Read ULEB128 from buffer. */
+uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
+{
+  const uint8_t *p = (const uint8_t *)*pp;
+  uint32_t v = *p++;
+  if (LJ_UNLIKELY(v >= 0x80)) {
+    int sh = 0;
+    v &= 0x7f;
+    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+  }
+  *pp = (const char *)p;
+  return v;
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_buf.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_buf.h
new file mode 100644
index 0000000000000000000000000000000000000000..1cf1780b02087b40ff71f5682a0866858c81ada3
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_buf.h
@@ -0,0 +1,105 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BUF_H
+#define _LJ_BUF_H
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_str.h"
+
+/* Resizable string buffers. Struct definition in lj_obj.h. */
+#define sbufB(sb)	(mref((sb)->b, char))
+#define sbufP(sb)	(mref((sb)->p, char))
+#define sbufE(sb)	(mref((sb)->e, char))
+#define sbufL(sb)	(mref((sb)->L, lua_State))
+#define sbufsz(sb)	((MSize)(sbufE((sb)) - sbufB((sb))))
+#define sbuflen(sb)	((MSize)(sbufP((sb)) - sbufB((sb))))
+#define sbufleft(sb)	((MSize)(sbufE((sb)) - sbufP((sb))))
+#define setsbufP(sb, q)	(setmref((sb)->p, (q)))
+#define setsbufL(sb, l)	(setmref((sb)->L, (l)))
+
+/* Buffer management */
+LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
+LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
+LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
+LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
+
+static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
+{
+  setsbufL(sb, L);
+  setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
+}
+
+static LJ_AINLINE void lj_buf_reset(SBuf *sb)
+{
+  setmrefr(sb->p, sb->b);
+}
+
+static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
+{
+  SBuf *sb = &G(L)->tmpbuf;
+  setsbufL(sb, L);
+  lj_buf_reset(sb);
+  return sb;
+}
+
+static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
+{
+  lj_mem_free(g, sbufB(sb), sbufsz(sb));
+}
+
+static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > sbufsz(sb)))
+    return lj_buf_need2(sb, sz);
+  return sbufB(sb);
+}
+
+static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > sbufleft(sb)))
+    return lj_buf_more2(sb, sz);
+  return sbufP(sb);
+}
+
+/* Low-level buffer put operations */
+LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
+#if LJ_HASJIT
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
+#endif
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
+
+static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
+{
+  return (char *)memcpy(p, q, len) + len;
+}
+
+static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
+{
+  char *p = lj_buf_more(sb, 1);
+  *p++ = (char)c;
+  setsbufP(sb, p);
+}
+
+/* High-level buffer put operations */
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
+LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
+LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
+			    int32_t i, int32_t e);
+
+/* Miscellaneous buffer operations */
+LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
+LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
+LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
+
+static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
+{
+  return lj_str_new(L, sbufB(sb), sbuflen(sb));
+}
+
+#endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_carith.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_carith.c
similarity index 81%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_carith.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_carith.c
index 2a358a9bb0dc74255d606064a3b1e0b5194e769b..9032ea32410974290aedab46b807e8925e6e78e1 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_carith.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_carith.c
@@ -11,10 +11,12 @@
 #include "lj_err.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
+#include "lj_ir.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
 #include "lj_cdata.h"
 #include "lj_carith.h"
+#include "lj_strscan.h"
 
 /* -- C data arithmetic --------------------------------------------------- */
 
@@ -272,6 +274,80 @@ int lj_carith_op(lua_State *L, MMS mm)
   return lj_carith_meta(L, cts, &ca, mm);
 }
 
+/* -- 64 bit bit operations helpers --------------------------------------- */
+
+#if LJ_64
+#define B64DEF(name) \
+  static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
+#else
+/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
+#define B64DEF(name) \
+  uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
+#endif
+
+B64DEF(shl64) { return x << (sh&63); }
+B64DEF(shr64) { return x >> (sh&63); }
+B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
+B64DEF(rol64) { return lj_rol(x, (sh&63)); }
+B64DEF(ror64) { return lj_ror(x, (sh&63)); }
+
+#undef B64DEF
+
+uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
+{
+  switch (op) {
+  case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
+  case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
+  case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
+  case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
+  case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
+  default: lua_assert(0); break;
+  }
+  return x;
+}
+
+/* Equivalent to lj_lib_checkbit(), but handles cdata. */
+uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
+{
+  TValue *o = L->base + narg-1;
+  if (o >= L->top) {
+  err:
+    lj_err_argt(L, narg, LUA_TNUMBER);
+  } else if (LJ_LIKELY(tvisnumber(o))) {
+    /* Handled below. */
+  } else if (tviscdata(o)) {
+    CTState *cts = ctype_cts(L);
+    uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
+    CTypeID sid = cdataV(o)->ctypeid;
+    CType *s = ctype_get(cts, sid);
+    uint64_t x;
+    if (ctype_isref(s->info)) {
+      sp = *(void **)sp;
+      sid = ctype_cid(s->info);
+    }
+    s = ctype_raw(cts, sid);
+    if (ctype_isenum(s->info)) s = ctype_child(cts, s);
+    if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+	CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
+      *id = CTID_UINT64;  /* Use uint64_t, since it has the highest rank. */
+    else if (!*id)
+      *id = CTID_INT64;  /* Use int64_t, unless already set. */
+    lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
+		   (uint8_t *)&x, sp, CCF_ARG(narg));
+    return x;
+  } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
+    goto err;
+  }
+  if (LJ_LIKELY(tvisint(o))) {
+    return (uint32_t)intV(o);
+  } else {
+    int32_t i = lj_num2bit(numV(o));
+    if (LJ_DUALNUM) setintV(o, i);
+    return (uint32_t)i;
+  }
+}
+
+
 /* -- 64 bit integer arithmetic helpers ----------------------------------- */
 
 #if LJ_32 && LJ_HASJIT
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_carith.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_carith.h
similarity index 60%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_carith.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_carith.h
index 8c4bdbbe2a612cc4de2808683487e739d7214ec0..da8320f306675de4e7acfa0cf1b25e0ce33ae5e0 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_carith.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_carith.h
@@ -12,6 +12,16 @@
 
 LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
 
+#if LJ_32
+LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
+#endif
+LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
+LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
+
 #if LJ_32 && LJ_HASJIT
 LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
 #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ccall.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccall.c
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ccall.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccall.c
index 998417c1cf886ccf5456d6b3bd934ddb9003078a..5ab5b60daab7974b3e62f58dcca9732c89a1b65a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ccall.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccall.c
@@ -9,7 +9,6 @@
 
 #include "lj_gc.h"
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
@@ -291,6 +290,75 @@
 #define CCALL_HANDLE_RET \
   if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
 
+#elif LJ_TARGET_ARM64
+/* -- ARM64 calling conventions ------------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = !ccall_classify_struct(cts, ctr); \
+  if (cc->retref) cc->retp = dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  unsigned int cl = ccall_classify_struct(cts, ctr); \
+  if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
+    CTSize i = (cl >> 8) - 1; \
+    do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \
+  } else { \
+    if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
+    memcpy(dp, sp, ctr->size); \
+  }
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in one or two FPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
+    ((float *)dp)[0] = cc->fpr[0].f; \
+    ((float *)dp)[1] = cc->fpr[1].f; \
+  } else {  /* Copy complex double from FPRs. */ \
+    ((double *)dp)[0] = cc->fpr[0].d; \
+    ((double *)dp)[1] = cc->fpr[1].d; \
+  }
+
+#define CCALL_HANDLE_STRUCTARG \
+  unsigned int cl = ccall_classify_struct(cts, d); \
+  if (cl == 0) {  /* Pass struct by reference. */ \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  } else if (cl > 1) {  /* Pass struct in FPRs or on stack. */ \
+    isfp = (cl & 4) ? 2 : 1; \
+  }  /* else: Pass struct in GPRs or on stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex by value in separate (!) FPRs or on stack. */ \
+  isfp = ctr->size == 2*sizeof(float) ? 2 : 1;
+
+#define CCALL_HANDLE_REGARG \
+  if (LJ_TARGET_IOS && isva) { \
+    /* IOS: All variadic arguments are on the stack. */ \
+  } else if (isfp) {  /* Try to pass argument in FPRs. */ \
+    int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
+    if (nfpr + n2 <= CCALL_NARG_FPR) { \
+      dp = &cc->fpr[nfpr]; \
+      nfpr += n2; \
+      goto done; \
+    } else { \
+      nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
+      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+    } \
+  } else {  /* Try to pass argument in GPRs. */ \
+    if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+    if (ngpr + n <= maxgpr) { \
+      dp = &cc->gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } else { \
+      ngpr = maxgpr;  /* Prevent reordering. */ \
+      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
+    } \
+  }
+
 #elif LJ_TARGET_PPC
 /* -- PPC calling conventions --------------------------------------------- */
 
@@ -339,42 +407,6 @@
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     ctr = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */
 
-#elif LJ_TARGET_PPCSPE
-/* -- PPC/SPE calling conventions ----------------------------------------- */
-
-#define CCALL_HANDLE_STRUCTRET \
-  cc->retref = 1;  /* Return all structs by reference. */ \
-  cc->gpr[ngpr++] = (GPRArg)dp;
-
-#define CCALL_HANDLE_COMPLEXRET \
-  /* Complex values are returned in 2 or 4 GPRs. */ \
-  cc->retref = 0;
-
-#define CCALL_HANDLE_COMPLEXRET2 \
-  memcpy(dp, sp, ctr->size);  /* Copy complex from GPRs. */
-
-#define CCALL_HANDLE_STRUCTARG \
-  rp = cdataptr(lj_cdata_new(cts, did, sz)); \
-  sz = CTSIZE_PTR;  /* Pass all structs by reference. */
-
-#define CCALL_HANDLE_COMPLEXARG \
-  /* Pass complex by value in 2 or 4 GPRs. */
-
-/* PPC/SPE has a softfp ABI. */
-#define CCALL_HANDLE_REGARG \
-  if (n > 1) {  /* Doesn't fit in a single GPR? */ \
-    lua_assert(n == 2 || n == 4);  /* int64_t, double or complex (float). */ \
-    if (n == 2) \
-      ngpr = (ngpr + 1u) & ~1u;  /* Only align 64 bit value to regpair. */ \
-    else if (ngpr + n > maxgpr) \
-      ngpr = maxgpr;  /* Prevent reordering. */ \
-  } \
-  if (ngpr + n <= maxgpr) { \
-    dp = &cc->gpr[ngpr]; \
-    ngpr += n; \
-    goto done; \
-  }
-
 #elif LJ_TARGET_MIPS
 /* -- MIPS calling conventions -------------------------------------------- */
 
@@ -621,6 +653,52 @@ noth:  /* Not a homogeneous float/double aggregate. */
 
 #endif
 
+/* -- ARM64 ABI struct classification ------------------------------------- */
+
+#if LJ_TARGET_ARM64
+
+/* Classify a struct based on its fields. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+  CTSize sz = ct->size;
+  unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+  while (ct->sib) {
+    CType *sct;
+    ct = ctype_get(cts, ct->sib);
+    if (ctype_isfield(ct->info)) {
+      sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	r |= sct->size;
+	if (!isu) n++; else if (n == 0) n = 1;
+      } else if (ctype_iscomplex(sct->info)) {
+	r |= (sct->size >> 1);
+	if (!isu) n += 2; else if (n < 2) n = 2;
+      } else if (ctype_isstruct(sct->info)) {
+	goto substruct;
+      } else {
+	goto noth;
+      }
+    } else if (ctype_isbitfield(ct->info)) {
+      goto noth;
+    } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      sct = ctype_rawchild(cts, ct);
+    substruct:
+      if (sct->size > 0) {
+	unsigned int s = ccall_classify_struct(cts, sct);
+	if (s <= 1) goto noth;
+	r |= (s & 255);
+	if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+      }
+    }
+  }
+  if ((r == 4 || r == 8) && n <= 4)
+    return r + (n << 8);
+noth:  /* Not a homogeneous float/double aggregate. */
+  return (sz <= 16);  /* Return structs of size <= 16 in GPRs. */
+}
+
+#endif
+
 /* -- Common C call handling ---------------------------------------------- */
 
 /* Infer the destination CTypeID for a vararg argument. */
@@ -803,6 +881,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
       cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1];  /* Split complex double. */
       cc->fpr[nfpr-2].d[1] = 0;
     }
+#elif LJ_TARGET_ARM64
+    if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
+      /* Split float HFA or complex float into separate registers. */
+      CTSize i = (sz >> 2) - 1;
+      do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
+    }
 #else
     UNUSED(isfp);
 #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ccall.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccall.h
similarity index 93%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ccall.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccall.h
index f55301093139b186ee6b06333efeb06677edd95a..91983feebd7ec5b4d8babf7ad0176479a9b1d423 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ccall.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccall.h
@@ -68,27 +68,32 @@ typedef union FPRArg {
   float f[2];
 } FPRArg;
 
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_ARM64
 
 #define CCALL_NARG_GPR		8
+#define CCALL_NRET_GPR		2
 #define CCALL_NARG_FPR		8
-#define CCALL_NRET_GPR		4	/* For complex double. */
-#define CCALL_NRET_FPR		1
-#define CCALL_SPS_EXTRA		4
+#define CCALL_NRET_FPR		4
 #define CCALL_SPS_FREE		0
 
 typedef intptr_t GPRArg;
-typedef double FPRArg;
+typedef union FPRArg {
+  double d;
+  float f;
+  uint32_t u32;
+} FPRArg;
 
-#elif LJ_TARGET_PPCSPE
+#elif LJ_TARGET_PPC
 
 #define CCALL_NARG_GPR		8
-#define CCALL_NARG_FPR		0
-#define CCALL_NRET_GPR		4	/* For softfp complex double. */
-#define CCALL_NRET_FPR		0
-#define CCALL_SPS_FREE		0	/* NYI */
+#define CCALL_NARG_FPR		8
+#define CCALL_NRET_GPR		4	/* For complex double. */
+#define CCALL_NRET_FPR		1
+#define CCALL_SPS_EXTRA		4
+#define CCALL_SPS_FREE		0
 
 typedef intptr_t GPRArg;
+typedef double FPRArg;
 
 #elif LJ_TARGET_MIPS
 
@@ -145,6 +150,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #elif LJ_TARGET_X86
   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
+#elif LJ_TARGET_ARM64
+  void *retp;			/* Aggregate return pointer in x8. */
 #elif LJ_TARGET_PPC
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ccallback.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccallback.c
similarity index 89%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ccallback.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccallback.c
index b210641f9148cbebc04388edb58cdaba644db97e..065c329fa7e38fcd228d2fb904e048b8b0d4a747 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ccallback.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccallback.c
@@ -27,7 +27,7 @@
 
 #if LJ_OS_NOJIT
 
-/* Disabled callback support. */
+/* Callbacks disabled. */
 #define CALLBACK_SLOT2OFS(slot)	(0*(slot))
 #define CALLBACK_OFS2SLOT(ofs)	(0*(ofs))
 #define CALLBACK_MAX_SLOT	0
@@ -35,7 +35,7 @@
 #elif LJ_TARGET_X86ORX64
 
 #define CALLBACK_MCODE_HEAD	(LJ_64 ? 8 : 0)
-#define CALLBACK_MCODE_GROUP	(-2+1+2+5+(LJ_64 ? 6 : 5))
+#define CALLBACK_MCODE_GROUP	(-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
 
 #define CALLBACK_SLOT2OFS(slot) \
   (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -54,23 +54,18 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 #elif LJ_TARGET_ARM
 
 #define CALLBACK_MCODE_HEAD		32
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_MCODE_HEAD		32
 
 #elif LJ_TARGET_PPC
 
 #define CALLBACK_MCODE_HEAD		24
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
 
 #elif LJ_TARGET_MIPS
 
 #define CALLBACK_MCODE_HEAD		24
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
 
 #else
 
@@ -81,6 +76,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 
 #endif
 
+#ifndef CALLBACK_SLOT2OFS
+#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
+#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
+#define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+#endif
+
 /* Convert callback slot number to callback function pointer. */
 static void *callback_slot2ptr(CTState *cts, MSize slot)
 {
@@ -119,8 +120,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
       /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
       *p++ = XI_PUSH + RID_EBP;
       *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
+#if LJ_GC64
+      *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
+      *(uint64_t *)p = (uint64_t)(g); p += 8;
+#else
       *p++ = XI_MOVri | RID_EBP;
       *(int32_t *)p = i32ptr(g); p += 4;
+#endif
 #if LJ_64
       /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
       *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
@@ -157,6 +163,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
   }
   lua_assert(p - page <= CALLBACK_MCODE_SIZE);
 }
+#elif LJ_TARGET_ARM64
+static void callback_mcode_init(global_State *g, uint32_t *page)
+{
+  uint32_t *p = page;
+  void *target = (void *)lj_vm_ffi_callback;
+  MSize slot;
+  *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4);
+  *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5);
+  *p++ = A64I_BR | A64F_N(RID_X11);
+  *p++ = A64I_NOP;
+  ((void **)p)[0] = target;
+  ((void **)p)[1] = g;
+  p += 4;
+  for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+    *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot);
+    *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu);
+    p++;
+  }
+  lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+}
 #elif LJ_TARGET_PPC
 static void callback_mcode_init(global_State *g, uint32_t *page)
 {
@@ -351,6 +377,29 @@ void lj_ccallback_mcode_free(CTState *cts)
     goto done; \
   } CALLBACK_HANDLE_REGARG_FP2
 
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_HANDLE_REGARG \
+  if (isfp) { \
+    if (nfpr + n <= CCALL_NARG_FPR) { \
+      sp = &cts->cb.fpr[nfpr]; \
+      nfpr += n; \
+      goto done; \
+    } else { \
+      nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
+    } \
+  } else { \
+    if (!LJ_TARGET_IOS && n > 1) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+    if (ngpr + n <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } else { \
+      ngpr = CCALL_NARG_GPR;  /* Prevent reordering. */ \
+    } \
+  }
+
 #elif LJ_TARGET_PPC
 
 #define CALLBACK_HANDLE_REGARG \
@@ -411,6 +460,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
   int gcsteps = 0;
   CType *ct;
   GCfunc *fn;
+  int fntp;
   MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
 #if CCALL_NARG_FPR
   MSize nfpr = 0;
@@ -421,18 +471,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
 
   if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
     ct = ctype_get(cts, id);
-    rid = ctype_cid(ct->info);
+    rid = ctype_cid(ct->info);  /* Return type. x86: +(spadj<<16). */
     fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
+    fntp = LJ_TFUNC;
   } else {  /* Must set up frame first, before throwing the error. */
     ct = NULL;
     rid = 0;
     fn = (GCfunc *)L;
+    fntp = LJ_TTHREAD;
+  }
+  /* Continuation returns from callback. */
+  if (LJ_FR2) {
+    (o++)->u64 = LJ_CONT_FFI_CALLBACK;
+    (o++)->u64 = rid;
+    o++;
+  } else {
+    o->u32.lo = LJ_CONT_FFI_CALLBACK;
+    o->u32.hi = rid;
+    o++;
   }
-  o->u32.lo = LJ_CONT_FFI_CALLBACK;  /* Continuation returns from callback. */
-  o->u32.hi = rid;  /* Return type. x86: +(spadj<<16). */
-  o++;
-  setframe_gc(o, obj2gco(fn));
-  setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
+  setframe_gc(o, obj2gco(fn), fntp);
+  setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
   L->top = L->base = ++o;
   if (!ct)
     lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -483,8 +542,13 @@ static void callback_conv_args(CTState *cts, lua_State *L)
   L->top = o;
 #if LJ_TARGET_X86
   /* Store stack adjustment for returns from non-cdecl callbacks. */
-  if (ctype_cconv(ct->info) != CTCC_CDECL)
+  if (ctype_cconv(ct->info) != CTCC_CDECL) {
+#if LJ_FR2
+    (L->base-3)->u64 |= (nsp << (16+2));
+#else
     (L->base-2)->u32.hi |= (nsp << (16+2));
+#endif
+  }
 #endif
   while (gcsteps-- > 0)
     lj_gc_check(L);
@@ -493,7 +557,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
 /* Convert Lua object to callback result. */
 static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
 {
+#if LJ_FR2
+  CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
+#else
   CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
+#endif
 #if LJ_TARGET_X86
   cts->cb.gpr[2] = 0;
 #endif
@@ -529,7 +597,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
   lua_State *L = cts->L;
   global_State *g = cts->g;
   lua_assert(L != NULL);
-  if (gcref(g->jit_L)) {
+  if (tvref(g->jit_base)) {
     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
     if (g->panic) g->panic(L);
     exit(EXIT_FAILURE);
@@ -562,9 +630,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
   }
   callback_conv_result(cts, L, o);
   /* Finally drop C frame and continuation frame. */
-  L->cframe = cframe_prev(L->cframe);
-  L->top -= 2;
+  L->top -= 2+2*LJ_FR2;
   L->base = obase;
+  L->cframe = cframe_prev(L->cframe);
   cts->cb.slot = 0;  /* Blacklist C function that called the callback. */
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ccallback.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccallback.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ccallback.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ccallback.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cconv.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cconv.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_cconv.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cconv.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cconv.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cconv.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_cconv.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cconv.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cdata.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cdata.c
similarity index 93%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_cdata.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cdata.c
index 39fc13a9ff41b36c81fe904098a6bb738efa08ab..5cd2c1140efd61fc0a749a25e308d7aaea27b91d 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cdata.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cdata.c
@@ -9,7 +9,6 @@
 
 #include "lj_gc.h"
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_cconv.h"
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
 }
 
 /* Allocate variable-sized or specially aligned C data object. */
-GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
+GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
 {
   global_State *g;
   MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
 		(align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
-  char *p = lj_mem_newt(cts->L, extra + sz, char);
+  char *p = lj_mem_newt(L, extra + sz, char);
   uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
   uintptr_t almask = (1u << align) - 1u;
   GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
   cdatav(cd)->offset = (uint16_t)((char *)cd - p);
   cdatav(cd)->extra = extra;
   cdatav(cd)->len = sz;
-  g = cts->g;
+  g = G(L);
   setgcrefr(cd->nextgc, g->gc.root);
   setgcref(g->gc.root, obj2gco(cd));
   newwhite(g, obj2gco(cd));
@@ -76,21 +75,20 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
   }
 }
 
-TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd)
+void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
 {
-  global_State *g = G(L);
-  GCtab *t = ctype_ctsG(g)->finalizer;
+  GCtab *t = ctype_ctsG(G(L))->finalizer;
   if (gcref(t->metatable)) {
     /* Add cdata to finalizer table, if still enabled. */
     TValue *tv, tmp;
     setcdataV(L, &tmp, cd);
     lj_gc_anybarriert(L, t);
     tv = lj_tab_set(L, t, &tmp);
-    cd->marked |= LJ_GC_CDATA_FIN;
-    return tv;
-  } else {
-    /* Otherwise return dummy TValue. */
-    return &g->tmptv;
+    setgcV(L, tv, obj, it);
+    if (!tvisnil(tv))
+      cd->marked |= LJ_GC_CDATA_FIN;
+    else
+      cd->marked &= ~LJ_GC_CDATA_FIN;
   }
 }
 
@@ -123,7 +121,12 @@ collect_attrib:
     idx = (ptrdiff_t)intV(key);
     goto integer_key;
   } else if (tvisnum(key)) {  /* Numeric key. */
-    idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key));
+#ifdef _MSC_VER
+    /* Workaround for MSVC bug. */
+    volatile
+#endif
+    lua_Number n = numV(key);
+    idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
   integer_key:
     if (ctype_ispointer(ct->info)) {
       CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info));  /* Element size. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cdata.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cdata.h
similarity index 92%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_cdata.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cdata.h
index 3a1275e6c1687fb86387c69dcf1fa8e6b33b6167..c8975be1c9f8b530cfab86985936a50be37efe06 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cdata.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cdata.h
@@ -58,11 +58,12 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
 }
 
 LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
-LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz,
+LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
 			       CTSize align);
 
 LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
-LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd);
+LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
+			     uint32_t it);
 
 LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
 			      uint8_t **pp, CTInfo *qual);
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_char.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_char.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_char.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_char.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_char.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_char.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_char.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_char.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_clib.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_clib.c
similarity index 91%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_clib.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_clib.c
index d352609097525453ed069d336e2d2912dfd3ca8b..1e927ebe99eb0179236146cfadede167f87acad7 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_clib.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_clib.c
@@ -16,6 +16,7 @@
 #include "lj_cconv.h"
 #include "lj_cdata.h"
 #include "lj_clib.h"
+#include "lj_strfmt.h"
 
 /* -- OS-specific functions ----------------------------------------------- */
 
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
 #endif
      ) {
     if (!strchr(name, '.')) {
-      name = lj_str_pushf(L, CLIB_SOEXT, name);
+      name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
       L->top--;
 #ifdef __CYGWIN__
     } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
     }
     if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
 	  name[2] == CLIB_SOPREFIX[2])) {
-      name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name);
+      name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
       L->top--;
     }
   }
@@ -171,11 +172,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
 					    const char *name)
 {
   DWORD err = GetLastError();
+#if LJ_TARGET_XBOXONE
+  wchar_t wbuf[128];
+  char buf[128*2];
+  if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
+		      NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) ||
+      !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL))
+#else
   char buf[128];
   if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
 		      NULL, err, 0, buf, sizeof(buf), NULL))
+#endif
     buf[0] = '\0';
-  lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf));
+  lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
 }
 
 static int clib_needext(const char *s)
@@ -190,7 +199,7 @@ static int clib_needext(const char *s)
 static const char *clib_extname(lua_State *L, const char *name)
 {
   if (clib_needext(name)) {
-    name = lj_str_pushf(L, "%s.dll", name);
+    name = lj_strfmt_pushf(L, "%s.dll", name);
     L->top--;
   }
   return name;
@@ -199,7 +208,7 @@ static const char *clib_extname(lua_State *L, const char *name)
 static void *clib_loadlib(lua_State *L, const char *name, int global)
 {
   DWORD oldwerr = GetLastError();
-  void *h = (void *)LoadLibraryA(clib_extname(L, name));
+  void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0);
   if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
   SetLastError(oldwerr);
   UNUSED(global);
@@ -240,9 +249,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
 	  GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
 			     (const char *)&_fmode, &h);
 	  break;
-	case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break;
-	case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break;
-	case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break;
+	case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0); break;
+	case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break;
+	case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break;
 	}
 	if (!h) continue;
 	clib_def_handle[i] = (void *)h;
@@ -263,7 +272,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
 LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
 					    const char *name)
 {
-  lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS"));
+  lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
 }
 
 static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -347,7 +356,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
 	CTInfo cconv = ctype_cconv(ct->info);
 	if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
 	  CTSize sz = clib_func_argsize(cts, ct);
-	  const char *symd = lj_str_pushf(L,
+	  const char *symd = lj_strfmt_pushf(L,
 			       cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
 			       sym, sz);
 	  L->top--;
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_clib.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_clib.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_clib.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_clib.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cparse.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cparse.c
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_cparse.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cparse.c
index b9df88d7702143260659d1677b2050afe7e3f10f..f212bd3682075f76836cb848ac878cb2ac6ff192 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cparse.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cparse.c
@@ -9,13 +9,14 @@
 
 #include "lj_gc.h"
 #include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_ctype.h"
 #include "lj_cparse.h"
 #include "lj_frame.h"
 #include "lj_vm.h"
 #include "lj_char.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 /*
 ** Important note: this is NOT a validating C parser! This is a minimal
@@ -46,9 +47,9 @@ static const char *cp_tok2str(CPState *cp, CPToken tok)
   if (tok > CTOK_OFS)
     return ctoknames[tok-CTOK_OFS-1];
   else if (!lj_char_iscntrl(tok))
-    return lj_str_pushf(cp->L, "%c", tok);
+    return lj_strfmt_pushf(cp->L, "%c", tok);
   else
-    return lj_str_pushf(cp->L, "char(%d)", tok);
+    return lj_strfmt_pushf(cp->L, "char(%d)", tok);
 }
 
 /* End-of-line? */
@@ -85,24 +86,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
   return cp_get(cp);
 }
 
-/* Grow save buffer. */
-static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
-{
-  MSize newsize;
-  if (cp->sb.sz >= CPARSE_MAX_BUF/2)
-    cp_err(cp, LJ_ERR_XELEM);
-  newsize = cp->sb.sz * 2;
-  lj_str_resizebuf(cp->L, &cp->sb, newsize);
-  cp->sb.buf[cp->sb.n++] = (char)c;
-}
-
 /* Save character in buffer. */
 static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
 {
-  if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz))
-    cp_save_grow(cp, c);
-  else
-    cp->sb.buf[cp->sb.n++] = (char)c;
+  lj_buf_putb(&cp->sb, c);
 }
 
 /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +109,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
     tokstr = NULL;
   } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
 	     tok >= CTOK_FIRSTDECL) {
-    if (cp->sb.n == 0) cp_save(cp, '$');
+    if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
     cp_save(cp, '\0');
-    tokstr = cp->sb.buf;
+    tokstr = sbufB(&cp->sb);
   } else {
     tokstr = cp_tok2str(cp, tok);
   }
   L = cp->L;
   va_start(argp, em);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   if (tokstr)
-    msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
+    msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
   if (cp->linenumber > 1)
-    msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber);
+    msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
   lj_err_callermsg(L, msg);
 }
 
@@ -164,7 +151,7 @@ static CPToken cp_number(CPState *cp)
   TValue o;
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   cp_save(cp, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C);
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
   else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +164,7 @@ static CPToken cp_number(CPState *cp)
 static CPToken cp_ident(CPState *cp)
 {
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
-  cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n);
+  cp->str = lj_buf_str(cp->L, &cp->sb);
   cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
   if (ctype_type(cp->ct->info) == CT_KW)
     return ctype_cid(cp->ct->info);
@@ -263,11 +250,11 @@ static CPToken cp_string(CPState *cp)
   }
   cp_get(cp);
   if (delim == '"') {
-    cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n);
+    cp->str = lj_buf_str(cp->L, &cp->sb);
     return CTOK_STRING;
   } else {
-    if (cp->sb.n != 1) cp_err_token(cp, '\'');
-    cp->val.i32 = (int32_t)(char)cp->sb.buf[0];
+    if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
+    cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
     cp->val.id = CTID_INT32;
     return CTOK_INTEGER;
   }
@@ -296,7 +283,7 @@ static void cp_comment_cpp(CPState *cp)
 /* Lexical scanner for C. Only a minimal subset is implemented. */
 static CPToken cp_next_(CPState *cp)
 {
-  lj_str_resetbuf(&cp->sb);
+  lj_buf_reset(&cp->sb);
   for (;;) {
     if (lj_char_isident(cp->c))
       return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -380,8 +367,7 @@ static void cp_init(CPState *cp)
   cp->depth = 0;
   cp->curpack = 0;
   cp->packstack[0] = 255;
-  lj_str_initbuf(&cp->sb);
-  lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
+  lj_buf_init(cp->L, &cp->sb);
   lua_assert(cp->p != NULL);
   cp_get(cp);  /* Read-ahead first char. */
   cp->tok = 0;
@@ -393,7 +379,7 @@ static void cp_init(CPState *cp)
 static void cp_cleanup(CPState *cp)
 {
   global_State *g = G(cp->L);
-  lj_str_freebuf(g, &cp->sb);
+  lj_buf_free(g, &cp->sb);
 }
 
 /* Check and consume optional token. */
@@ -798,6 +784,10 @@ static void cp_push_type(CPDecl *decl, CTypeID id)
     cp_push(decl, info & ~CTMASK_CID, size);  /* Copy type. */
     break;
   case CT_ARRAY:
+    if ((ct->info & (CTF_VECTOR|CTF_COMPLEX))) {
+      info |= (decl->attr & CTF_QUAL);
+      decl->attr &= ~CTF_QUAL;
+    }
     cp_push_type(decl, ctype_cid(info));  /* Unroll. */
     cp_push(decl, info & ~CTMASK_CID, size);  /* Copy type. */
     decl->stack[decl->pos].sib = 1;  /* Mark as already checked and sized. */
@@ -1012,7 +1002,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
   if (cp->tok == CTOK_STRING) {
     GCstr *str = cp->str;
     while (cp_next(cp) == CTOK_STRING) {
-      lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
+      lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
       cp->L->top--;
       str = strV(cp->L->top);
     }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_cparse.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cparse.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_cparse.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_cparse.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_crecord.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_crecord.c
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_crecord.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_crecord.c
index a46665e9af0cd07557109e325ff5fe3ccf1c08b2..e200cc99423e2fa768bb6ec484cad26b2064f4e6 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_crecord.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_crecord.c
@@ -11,13 +11,13 @@
 #if LJ_HASJIT && LJ_HASFFI
 
 #include "lj_err.h"
-#include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_frame.h"
 #include "lj_ctype.h"
 #include "lj_cdata.h"
 #include "lj_cparse.h"
 #include "lj_cconv.h"
+#include "lj_carith.h"
 #include "lj_clib.h"
 #include "lj_ccall.h"
 #include "lj_ff.h"
@@ -31,6 +31,7 @@
 #include "lj_snap.h"
 #include "lj_crecord.h"
 #include "lj_dispatch.h"
+#include "lj_strfmt.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)			(&J->cur.ir[(ref)])
@@ -441,7 +442,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
     /* fallthrough */
   case CCX(I, F):
     if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
-    sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY);
+    sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
     goto xstore;
   case CCX(I, P):
   case CCX(I, A):
@@ -521,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
     if (st == IRT_CDATA) goto err_nyi;
     /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
     sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
-		  st, IRCONV_TRUNC|IRCONV_ANY);
+		  st, IRCONV_ANY);
     goto xstore;
 
   /* Destination is an array. */
@@ -640,12 +641,23 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
       sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
       sid = CTID_A_CCHAR;
     }
-  } else {  /* NYI: tref_istab(sp), tref_islightud(sp). */
+  } else if (tref_islightud(sp)) {
+#if LJ_64
+    sp = emitir(IRT(IR_BAND, IRT_P64), sp,
+		lj_ir_kint64(J, U64x(00007fff,ffffffff)));
+#endif
+  } else {  /* NYI: tref_istab(sp). */
     IRType t;
     sid = argv2cdata(J, sp, sval)->ctypeid;
     s = ctype_raw(cts, sid);
     svisnz = cdataptr(cdataV(sval));
-    t = crec_ct2irt(cts, s);
+    if (ctype_isfunc(s->info)) {
+      sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
+      s = ctype_get(cts, sid);
+      t = IRT_PTR;
+    } else {
+      t = crec_ct2irt(cts, s);
+    }
     if (ctype_isptr(s->info)) {
       sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
       if (ctype_isref(s->info)) {
@@ -867,21 +879,17 @@ again:
 }
 
 /* Record setting a finalizer. */
-static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin)
+static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
 {
-  TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd);
-  TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4));
-  if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; }
-  if (tvisfunc(fin)) {
-    emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
-    emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
-  } else if (tviscdata(fin)) {
-    emitir(IRT(IR_XSTORE, IRT_P32), trlo,
-	   lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
-    emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
+  if (tvisgcv(fin)) {
+    if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
+  } else if (tvisnil(fin)) {
+    trfin = lj_ir_kptr(J, NULL);
   } else {
     lj_trace_err(J, LJ_TRERR_BADTYPE);
   }
+  lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
+	     trfin, lj_ir_kint(J, (int32_t)itype(fin)));
   J->needsnap = 1;
 }
 
@@ -892,10 +900,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
   CTSize sz;
   CTInfo info = lj_ctype_info(cts, id, &sz);
   CType *d = ctype_raw(cts, id);
-  TRef trid;
-  if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN)
-    lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: large/special allocations. */
-  trid = lj_ir_kint(J, id);
+  TRef trcd, trid = lj_ir_kint(J, id);
+  cTValue *fin;
   /* Use special instruction to box pointer or 32/64 bit integer. */
   if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
     TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -903,11 +909,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
 	      sz == 4 ? lj_ir_kint(J, 0) :
 	      (lj_needsplit(J), lj_ir_kint64(J, 0));
     J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
+    return;
   } else {
-    TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL);
-    cTValue *fin;
-    J->base[0] = trcd;
-    if (J->base[1] && !J->base[2] &&
+    TRef trsz = TREF_NIL;
+    if ((info & CTF_VLA)) {  /* Calculate VLA/VLS size at runtime. */
+      CTSize sz0, sz1;
+      if (!J->base[1] || J->base[2])
+	lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: init VLA/VLS. */
+      trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
+			J->base[1], &rd->argv[1]);
+      sz0 = lj_ctype_vlsize(cts, d, 0);
+      sz1 = lj_ctype_vlsize(cts, d, 1);
+      trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
+      trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
+      J->base[1] = 0;  /* Simplify logic below. */
+    } else if (ctype_align(info) > CT_MEMALIGN) {
+      trsz = lj_ir_kint(J, sz);
+    }
+    trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
+    if (sz > 128 || (info & CTF_VLA)) {
+      TRef dp;
+      CTSize align;
+    special:  /* Only handle bulk zero-fill for large/VLA/VLS types. */
+      if (J->base[1])
+	lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: init large/VLA/VLS types. */
+      dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
+      if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
+      align = ctype_align(info);
+      if (align < CT_MEMALIGN) align = CT_MEMALIGN;
+      crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
+    } else if (J->base[1] && !J->base[2] &&
 	!lj_cconv_multi_init(cts, d, &rd->argv[1])) {
       goto single_init;
     } else if (ctype_isarray(d->info)) {
@@ -918,8 +949,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
       TValue *sval = &tv;
       MSize i;
       tv.u64 = 0;
-      if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))
-	lj_trace_err(J, LJ_TRERR_NYICONV);  /* NYI: init array of aggregates. */
+      if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
+	  esize * CREC_FILL_MAXUNROLL < sz)
+	goto special;
       for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
 	TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
 			 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -976,11 +1008,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
 	crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
       }
     }
-    /* Handle __gc metamethod. */
-    fin = lj_ctype_meta(cts, id, MM_gc);
-    if (fin)
-      crec_finalizer(J, trcd, fin);
   }
+  J->base[0] = trcd;
+  /* Handle __gc metamethod. */
+  fin = lj_ctype_meta(cts, id, MM_gc);
+  if (fin)
+    crec_finalizer(J, trcd, 0, fin);
 }
 
 /* Record argument conversions. */
@@ -1090,7 +1123,7 @@ static void crec_snap_caller(jit_State *J)
   ptrdiff_t delta;
   if (!frame_islua(base-1) || J->framedepth <= 0)
     lj_trace_err(J, LJ_TRERR_NYICALL);
-  J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]);
+  J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
   L->top = base; L->base = base - delta;
   J->base[-1] = TREF_FALSE;
   J->base -= delta; J->baseslot -= (BCReg)delta;
@@ -1233,7 +1266,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
     for (i = 0; i < 2; i++) {
       IRType st = tref_type(sp[i]);
       if (st == IRT_NUM || st == IRT_FLOAT)
-	sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY);
+	sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
       else if (!(st == IRT_I64 || st == IRT_U64))
 	sp[i] = emitconv(sp[i], dt, IRT_INT,
 			 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1301,15 +1334,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
     CTypeID id;
 #if LJ_64
     if (t == IRT_NUM || t == IRT_FLOAT)
-      tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY);
+      tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
     else if (!(t == IRT_I64 || t == IRT_U64))
       tr = emitconv(tr, IRT_INTP, IRT_INT,
 		    ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
 #else
     if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
       tr = emitconv(tr, IRT_INTP, t,
-		    (t == IRT_NUM || t == IRT_FLOAT) ?
-		    IRCONV_TRUNC|IRCONV_ANY : 0);
+		    (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
     }
 #endif
     tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1452,8 +1484,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
 	!irt_isguard(J->guardemit)) {
       const BCIns *pc = frame_contpc(J->L->base-1) - 1;
       if (bc_op(*pc) <= BC_ISNEP) {
-	setframe_pc(&J2G(J)->tmptv, pc);
-	J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
+	J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
 	J->postproc = LJ_POST_FIXCOMP;
       }
     }
@@ -1642,7 +1673,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
 void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
 {
   argv2cdata(J, J->base[0], &rd->argv[0]);
-  crec_finalizer(J, J->base[0], &rd->argv[1]);
+  if (!J->base[1])
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
+}
+
+/* -- 64 bit bit.* library functions -------------------------------------- */
+
+/* Determine bit operation type from argument type. */
+static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
+{
+  if (tviscdata(tv)) {
+    CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
+    if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
+    if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+	CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
+      return CTID_UINT64;  /* Use uint64_t, since it has the highest rank. */
+    return CTID_INT64;  /* Otherwise use int64_t. */
+  }
+  return 0;  /* Use regular 32 bit ops. */
+}
+
+void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+		       J->base[0], &rd->argv[0]);
+  if (!tref_isinteger(tr))
+    tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
+  J->base[0] = tr;
+}
+
+int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
+  if (id) {
+    TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = 0;
+  MSize i;
+  for (i = 0; J->base[i] != 0; i++) {
+    CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
+    if (id < aid) id = aid;  /* Determine highest type rank of all arguments. */
+  }
+  if (id) {
+    CType *ct = ctype_get(cts, id);
+    uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
+    TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
+    for (i = 1; J->base[i] != 0; i++) {
+      TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
+      tr = emitir(ot, tr, tr2);
+    }
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id;
+  TRef tsh = 0;
+  if (J->base[0] && tref_iscdata(J->base[1])) {
+    tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+		     J->base[1], &rd->argv[1]);
+    if (!tref_isinteger(tsh))
+      tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
+    J->base[1] = tsh;
+  }
+  id = crec_bit64_type(cts, &rd->argv[0]);
+  if (id) {
+    TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    uint32_t op = rd->data;
+    if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
+    if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+	!tref_isk(tsh))
+      tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
+#ifdef LJ_TARGET_UNIFYROT
+      if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+	op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+	tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+      }
+#endif
+    tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
+    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+    return 1;
+  }
+  return 0;
+}
+
+TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
+  TRef tr, trsf = J->base[1];
+  SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+  int32_t n;
+  if (trsf) {
+    CTypeID id2 = 0;
+    n = (int32_t)lj_carith_check64(J->L, 2, &id2);
+    if (id2)
+      trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
+    else
+      trsf = lj_opt_narrow_tobit(J, trsf);
+    emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n));  /* Specialize to n. */
+  } else {
+    n = id ? 16 : 8;
+  }
+  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+  if (id) {
+    tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+    if (n < 16)
+      tr = emitir(IRT(IR_BAND, IRT_U64), tr,
+		  lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
+  } else {
+    tr = lj_opt_narrow_tobit(J, J->base[0]);
+    if (n < 8)
+      tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
+    tr = emitconv(tr, IRT_U64, IRT_INT, 0);  /* No sign-extension. */
+    lj_needsplit(J);
+  }
+  return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
 }
 
 /* -- Miscellaneous library functions ------------------------------------- */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_crecord.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_crecord.h
similarity index 78%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_crecord.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_crecord.h
index a4628cacad58cb1fa47aeff028ed0e50e761f7bb..59f342a1d9a8064c993560e51ba350e341c82192 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_crecord.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_crecord.h
@@ -25,6 +25,13 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
 LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
+
+LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
+LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
+
 LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
 #endif
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ctype.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ctype.c
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ctype.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ctype.c
index ac3017494254c0e4c058c397620c57617ec3f2e0..2e23c994bb0587e9d4b5c5bc546c73115b72a8f2 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ctype.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ctype.c
@@ -11,6 +11,7 @@
 #include "lj_err.h"
 #include "lj_str.h"
 #include "lj_tab.h"
+#include "lj_strfmt.h"
 #include "lj_ctype.h"
 #include "lj_ccallback.h"
 
@@ -568,19 +569,19 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
 /* Convert complex to string with 'i' or 'I' suffix. */
 GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
 {
-  char buf[2*LJ_STR_NUMBUF+2+1];
+  char buf[2*STRFMT_MAXBUF_NUM+2+1], *p = buf;
   TValue re, im;
-  size_t len;
   if (size == 2*sizeof(double)) {
     re.n = *(double *)sp; im.n = ((double *)sp)[1];
   } else {
     re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
   }
-  len = lj_str_bufnum(buf, &re);
-  if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+';
-  len += lj_str_bufnum(buf+len, &im);
-  buf[len] = buf[len-1] >= 'a' ? 'I' : 'i';
-  return lj_str_new(L, buf, len+1);
+  p = lj_strfmt_wnum(p, &re);
+  if (!(im.u32.hi & 0x80000000u) || im.n != im.n) *p++ = '+';
+  p = lj_strfmt_wnum(p, &im);
+  *p = *(p-1) >= 'a' ? 'I' : 'i';
+  p++;
+  return lj_str_new(L, buf, p-buf);
 }
 
 /* -- C type state -------------------------------------------------------- */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ctype.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ctype.h
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ctype.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ctype.h
index 3df26f095c2c47751b33a128bd989cdb23bdb2d3..6639547a43180e01c683a900055dd1a4613cdaab 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ctype.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ctype.h
@@ -263,7 +263,7 @@ typedef struct CTState {
 /* -- Predefined types ---------------------------------------------------- */
 
 /* Target-dependent types. */
-#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#if LJ_TARGET_PPC
 #define CTTYDEFP(_) \
   _(LINT32,		4,	CT_NUM, CTF_LONG|CTALIGN(2))
 #else
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_debug.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_debug.c
similarity index 77%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_debug.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_debug.c
index bd2fa1f5646dfc67d92f4a5b81132167dbee62d7..3226d03b9e187251d75ffd85de9d972c446a3135 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_debug.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_debug.c
@@ -9,12 +9,12 @@
 #include "lj_obj.h"
 #include "lj_err.h"
 #include "lj_debug.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
 #include "lj_state.h"
 #include "lj_frame.h"
 #include "lj_bc.h"
-#include "lj_vm.h"
+#include "lj_strfmt.h"
 #if LJ_HASJIT
 #include "lj_jit.h"
 #endif
@@ -24,11 +24,11 @@
 /* Get frame corresponding to a level. */
 cTValue *lj_debug_frame(lua_State *L, int level, int *size)
 {
-  cTValue *frame, *nextframe, *bot = tvref(L->stack);
+  cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
   /* Traverse frames backwards. */
   for (nextframe = frame = L->base-1; frame > bot; ) {
     if (frame_gc(frame) == obj2gco(L))
-      level++;  /* Skip dummy frames. See lj_meta_call(). */
+      level++;  /* Skip dummy frames. See lj_err_optype_call(). */
     if (level-- == 0) {
       *size = (int)(nextframe - frame);
       return frame;  /* Level found. */
@@ -87,8 +87,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
 	if (frame_islua(f)) {
 	  f = frame_prevl(f);
 	} else {
-	  if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) &&
-			       (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK))
+	  if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
 	    cf = cframe_raw(cframe_prev(cf));
 	  f = frame_prevd(f);
 	}
@@ -142,38 +141,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
 
 /* -- Variable names ------------------------------------------------------ */
 
-/* Read ULEB128 value. */
-static uint32_t debug_read_uleb128(const uint8_t **pp)
-{
-  const uint8_t *p = *pp;
-  uint32_t v = *p++;
-  if (LJ_UNLIKELY(v >= 0x80)) {
-    int sh = 0;
-    v &= 0x7f;
-    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
-  }
-  *pp = p;
-  return v;
-}
-
 /* Get name of a local variable from slot number and PC. */
 static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
 {
-  const uint8_t *p = proto_varinfo(pt);
+  const char *p = (const char *)proto_varinfo(pt);
   if (p) {
     BCPos lastpc = 0;
     for (;;) {
-      const char *name = (const char *)p;
-      uint32_t vn = *p++;
+      const char *name = p;
+      uint32_t vn = *(const uint8_t *)p;
       BCPos startpc, endpc;
       if (vn < VARNAME__MAX) {
 	if (vn == VARNAME_END) break;  /* End of varinfo. */
       } else {
-	while (*p++) ;  /* Skip over variable name string. */
+	do { p++; } while (*(const uint8_t *)p);  /* Skip over variable name. */
       }
-      lastpc = startpc = lastpc + debug_read_uleb128(&p);
+      p++;
+      lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
       if (startpc > pc) break;
-      endpc = startpc + debug_read_uleb128(&p);
+      endpc = startpc + lj_buf_ruleb128(&p);
       if (pc < endpc && slot-- == 0) {
 	if (vn < VARNAME__MAX) {
 #define VARNAMESTR(name, str)	str "\0"
@@ -198,7 +184,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
   TValue *nextframe = size ? frame + size : NULL;
   GCfunc *fn = frame_func(frame);
   BCPos pc = debug_framepc(L, fn, nextframe);
-  if (!nextframe) nextframe = L->top;
+  if (!nextframe) nextframe = L->top+LJ_FR2;
   if ((int)slot1 < 0) {  /* Negative slot number is for varargs. */
     if (pc != NO_BCPOS) {
       GCproto *pt = funcproto(fn);
@@ -208,7 +194,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
 	  nextframe = frame;
 	  frame = frame_prevd(frame);
 	}
-	if (frame + slot1 < nextframe) {
+	if (frame + slot1+LJ_FR2 < nextframe) {
 	  *name = "(*vararg)";
 	  return frame+slot1;
 	}
@@ -219,7 +205,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
   if (pc != NO_BCPOS &&
       (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
     ;
-  else if (slot1 > 0 && frame + slot1 < nextframe)
+  else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
     *name = "(*temporary)";
   return frame+slot1;
 }
@@ -282,7 +268,7 @@ restart:
 	*name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
 	if (ip > proto_bc(pt)) {
 	  BCIns insp = ip[-1];
-	  if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 &&
+	  if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
 	      bc_d(insp) == bc_b(ins))
 	    return "method";
 	}
@@ -299,12 +285,12 @@ restart:
 }
 
 /* Deduce function name from caller of a frame. */
-const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
+const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
 {
-  TValue *pframe;
+  cTValue *pframe;
   GCfunc *fn;
   BCPos pc;
-  if (frame <= tvref(L->stack))
+  if (frame <= tvref(L->stack)+LJ_FR2)
     return NULL;
   if (frame_isvarg(frame))
     frame = frame_prevd(frame);
@@ -330,7 +316,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
 /* -- Source code locations ----------------------------------------------- */
 
 /* Generate shortened source name. */
-void lj_debug_shortname(char *out, GCstr *str)
+void lj_debug_shortname(char *out, GCstr *str, BCLine line)
 {
   const char *src = strdata(str);
   if (*src == '=') {
@@ -344,11 +330,11 @@ void lj_debug_shortname(char *out, GCstr *str)
       *out++ = '.'; *out++ = '.'; *out++ = '.';
     }
     strcpy(out, src);
-  } else {  /* Output [string "string"]. */
+  } else {  /* Output [string "string"] or [builtin:name]. */
     size_t len;  /* Length, up to first control char. */
     for (len = 0; len < LUA_IDSIZE-12; len++)
       if (((const unsigned char *)src)[len] < ' ') break;
-    strcpy(out, "[string \""); out += 9;
+    strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
     if (src[len] != '\0') {  /* Must truncate? */
       if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
       strncpy(out, src, len); out += len;
@@ -356,7 +342,7 @@ void lj_debug_shortname(char *out, GCstr *str)
     } else {
       strcpy(out, src); out += len;
     }
-    strcpy(out, "\"]");
+    strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
   }
 }
 
@@ -369,14 +355,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
     if (isluafunc(fn)) {
       BCLine line = debug_frameline(L, fn, nextframe);
       if (line >= 0) {
+	GCproto *pt = funcproto(fn);
 	char buf[LUA_IDSIZE];
-	lj_debug_shortname(buf, proto_chunkname(funcproto(fn)));
-	lj_str_pushf(L, "%s:%d: %s", buf, line, msg);
+	lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
+	lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
 	return;
       }
     }
   }
-  lj_str_pushf(L, "%s", msg);
+  lj_strfmt_pushf(L, "%s", msg);
 }
 
 /* Push location string for a bytecode position to Lua stack. */
@@ -386,20 +373,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
   const char *s = strdata(name);
   MSize i, len = name->len;
   BCLine line = lj_debug_line(pt, pc);
-  if (*s == '@') {
+  if (pt->firstline == ~(BCLine)0) {
+    lj_strfmt_pushf(L, "builtin:%s", s);
+  } else if (*s == '@') {
     s++; len--;
     for (i = len; i > 0; i--)
       if (s[i] == '/' || s[i] == '\\') {
 	s += i+1;
 	break;
       }
-    lj_str_pushf(L, "%s:%d", s, line);
+    lj_strfmt_pushf(L, "%s:%d", s, line);
   } else if (len > 40) {
-    lj_str_pushf(L, "%p:%d", pt, line);
+    lj_strfmt_pushf(L, "%p:%d", pt, line);
   } else if (*s == '=') {
-    lj_str_pushf(L, "%s:%d", s+1, line);
+    lj_strfmt_pushf(L, "%s:%d", s+1, line);
   } else {
-    lj_str_pushf(L, "\"%s\":%d", s, line);
+    lj_strfmt_pushf(L, "\"%s\":%d", s, line);
   }
 }
 
@@ -462,7 +451,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
 	BCLine firstline = pt->firstline;
 	GCstr *name = proto_chunkname(pt);
 	ar->source = strdata(name);
-	lj_debug_shortname(ar->short_src, name);
+	lj_debug_shortname(ar->short_src, name, pt->firstline);
 	ar->linedefined = (int)firstline;
 	ar->lastlinedefined = (int)(firstline + pt->numline);
 	ar->what = (firstline || !pt->numline) ? "Lua" : "main";
@@ -552,6 +541,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
   }
 }
 
+#if LJ_HASPROFILE
+/* Put the chunkname into a buffer. */
+static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
+{
+  GCstr *name = proto_chunkname(pt);
+  const char *p = strdata(name);
+  if (pt->firstline == ~(BCLine)0) {
+    lj_buf_putmem(sb, "[builtin:", 9);
+    lj_buf_putstr(sb, name);
+    lj_buf_putb(sb, ']');
+    return 0;
+  }
+  if (*p == '=' || *p == '@') {
+    MSize len = name->len-1;
+    p++;
+    if (pathstrip) {
+      int i;
+      for (i = len-1; i >= 0; i--)
+	if (p[i] == '/' || p[i] == '\\') {
+	  len -= i+1;
+	  p = p+i+1;
+	  break;
+	}
+    }
+    lj_buf_putmem(sb, p, len);
+  } else {
+    lj_buf_putmem(sb, "[string]", 8);
+  }
+  return 1;
+}
+
+/* Put a compact stack dump into a buffer. */
+void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
+{
+  int level = 0, dir = 1, pathstrip = 1;
+  MSize lastlen = 0;
+  if (depth < 0) { level = ~depth; depth = dir = -1; }  /* Reverse frames. */
+  while (level != depth) {  /* Loop through all frame. */
+    int size;
+    cTValue *frame = lj_debug_frame(L, level, &size);
+    if (frame) {
+      cTValue *nextframe = size ? frame+size : NULL;
+      GCfunc *fn = frame_func(frame);
+      const uint8_t *p = (const uint8_t *)fmt;
+      int c;
+      while ((c = *p++)) {
+	switch (c) {
+	case 'p':  /* Preserve full path. */
+	  pathstrip = 0;
+	  break;
+	case 'F': case 'f': {  /* Dump function name. */
+	  const char *name;
+	  const char *what = lj_debug_funcname(L, frame, &name);
+	  if (what) {
+	    if (c == 'F' && isluafunc(fn)) {  /* Dump module:name for 'F'. */
+	      GCproto *pt = funcproto(fn);
+	      if (pt->firstline != ~(BCLine)0) {  /* Not a bytecode builtin. */
+		debug_putchunkname(sb, pt, pathstrip);
+		lj_buf_putb(sb, ':');
+	      }
+	    }
+	    lj_buf_putmem(sb, name, (MSize)strlen(name));
+	    break;
+	  }  /* else: can't derive a name, dump module:line. */
+	  }
+	  /* fallthrough */
+	case 'l':  /* Dump module:line. */
+	  if (isluafunc(fn)) {
+	    GCproto *pt = funcproto(fn);
+	    if (debug_putchunkname(sb, pt, pathstrip)) {
+	      /* Regular Lua function. */
+	      BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
+				       pt->firstline;
+	      lj_buf_putb(sb, ':');
+	      lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
+	    }
+	  } else if (isffunc(fn)) {  /* Dump numbered builtins. */
+	    lj_buf_putmem(sb, "[builtin#", 9);
+	    lj_strfmt_putint(sb, fn->c.ffid);
+	    lj_buf_putb(sb, ']');
+	  } else {  /* Dump C function address. */
+	    lj_buf_putb(sb, '@');
+	    lj_strfmt_putptr(sb, fn->c.f);
+	  }
+	  break;
+	case 'Z':  /* Zap trailing separator. */
+	  lastlen = sbuflen(sb);
+	  break;
+	default:
+	  lj_buf_putb(sb, c);
+	  break;
+	}
+      }
+    } else if (dir == 1) {
+      break;
+    } else {
+      level -= size;  /* Reverse frame order: quickly skip missing level. */
+    }
+    level += dir;
+  }
+  if (lastlen)
+    setsbufP(sb, sbufB(sb) + lastlen);  /* Zap trailing separator. */
+}
+#endif
+
 /* Number of frames for the leading and trailing part of a traceback. */
 #define TRACEBACK_LEVELS1	12
 #define TRACEBACK_LEVELS2	10
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_debug.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_debug.h
similarity index 86%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_debug.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_debug.h
index fa8988c3e79b70a669b2266ca6f762b3077c6a1e..11d308a41b3e3552e8b5cfb31a70e97d520c4f40 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_debug.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_debug.h
@@ -32,14 +32,18 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
 LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
 LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
 				      BCReg slot, const char **name);
-LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame,
+LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
 				      const char **name);
-LJ_FUNC void lj_debug_shortname(char *out, GCstr *str);
+LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
 LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
 			     cTValue *frame, cTValue *nextframe);
 LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
 LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
 			     int ext);
+#if LJ_HASPROFILE
+LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
+				int depth);
+#endif
 
 /* Fixed internal variable names. */
 #define VARNAMEDEF(_) \
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_def.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_def.h
similarity index 93%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_def.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_def.h
index 213a4e70e03566f12671aa9f38558c4967d82fb2..04bf00c89be451a326a876563c4fbf6d69a24730 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_def.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_def.h
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
 #include <stdlib.h>
 
 /* Various VM limits. */
-#define LJ_MAX_MEM	0x7fffff00	/* Max. total memory allocation. */
+#define LJ_MAX_MEM32	0x7fffff00	/* Max. 32 bit memory allocation. */
+#define LJ_MAX_MEM64	((uint64_t)1<<47)  /* Max. 64 bit memory allocation. */
+/* Max. total memory allocation. */
+#define LJ_MAX_MEM	(LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
 #define LJ_MAX_ALLOC	LJ_MAX_MEM	/* Max. individual allocation length. */
-#define LJ_MAX_STR	LJ_MAX_MEM	/* Max. string length. */
-#define LJ_MAX_UDATA	LJ_MAX_MEM	/* Max. userdata length. */
+#define LJ_MAX_STR	LJ_MAX_MEM32	/* Max. string length. */
+#define LJ_MAX_BUF	LJ_MAX_MEM32	/* Max. buffer length. */
+#define LJ_MAX_UDATA	LJ_MAX_MEM32	/* Max. userdata length. */
 
 #define LJ_MAX_STRTAB	(1<<26)		/* Max. string table size. */
 #define LJ_MAX_HBITS	26		/* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
 #define LJ_MAX_ASIZE	((1<<(LJ_MAX_ABITS-1))+1)  /* Max. array part size. */
 #define LJ_MAX_COLOSIZE	16		/* Max. elems for colocated array. */
 
-#define LJ_MAX_LINE	LJ_MAX_MEM	/* Max. source code line number. */
+#define LJ_MAX_LINE	LJ_MAX_MEM32	/* Max. source code line number. */
 #define LJ_MAX_XLEVEL	200		/* Max. syntactic nesting level. */
 #define LJ_MAX_BCINS	(1<<26)		/* Max. # of bytecode instructions. */
 #define LJ_MAX_SLOTS	250		/* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
 #define LJ_MAX_UPVAL	249		/* Max. # of upvalues. */
 
 #define LJ_MAX_IDXCHAIN	100		/* __index/__newindex chain limit. */
-#define LJ_STACK_EXTRA	5		/* Extra stack space (metamethods). */
+#define LJ_STACK_EXTRA	(5+2*LJ_FR2)	/* Extra stack space (metamethods). */
 
 #define LJ_NUM_CBPAGE	1		/* Number of FFI callback pages. */
 
@@ -99,6 +103,14 @@ typedef unsigned int uintptr_t;
 #define checki32(x)	((x) == (int32_t)(x))
 #define checku32(x)	((x) == (uint32_t)(x))
 #define checkptr32(x)	((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
+#define checkptr47(x)	(((uint64_t)(x) >> 47) == 0)
+#if LJ_GC64
+#define checkptrGC(x)	(checkptr47((x)))
+#elif LJ_64
+#define checkptrGC(x)	(checkptr32((x)))
+#else
+#define checkptrGC(x)	1
+#endif
 
 /* Every half-decent C compiler transforms this into a rotate instruction. */
 #define lj_rol(x, n)	(((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_dispatch.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_dispatch.c
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_dispatch.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_dispatch.c
index 37256576ec2ee8a92e900dd4e01fee46c00e6d4c..1a07371cacfb4661e676a2c6465ea4f2a2e0a889 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_dispatch.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_dispatch.c
@@ -8,6 +8,7 @@
 
 #include "lj_obj.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_func.h"
 #include "lj_str.h"
 #include "lj_tab.h"
@@ -17,6 +18,7 @@
 #include "lj_frame.h"
 #include "lj_bc.h"
 #include "lj_ff.h"
+#include "lj_strfmt.h"
 #if LJ_HASJIT
 #include "lj_jit.h"
 #endif
@@ -25,6 +27,9 @@
 #endif
 #include "lj_trace.h"
 #include "lj_dispatch.h"
+#if LJ_HASPROFILE
+#include "lj_profile.h"
+#endif
 #include "lj_vm.h"
 #include "luajit.h"
 
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
 #include <math.h>
 LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
 							  lua_State *co);
+#if !LJ_HASJIT
+#define lj_dispatch_stitch	lj_dispatch_ins
+#endif
+#if !LJ_HASPROFILE
+#define lj_dispatch_profile	lj_dispatch_ins
+#endif
 
 #define GOTFUNC(name)	(ASMFunction)name,
 static const ASMFunction dispatch_got[] = {
@@ -82,11 +93,12 @@ void lj_dispatch_init_hotcount(global_State *g)
 #endif
 
 /* Internal dispatch mode bits. */
-#define DISPMODE_JIT	0x01	/* JIT compiler on. */
-#define DISPMODE_REC	0x02	/* Recording active. */
+#define DISPMODE_CALL	0x01	/* Override call dispatch. */
+#define DISPMODE_RET	0x02	/* Override return dispatch. */
 #define DISPMODE_INS	0x04	/* Override instruction dispatch. */
-#define DISPMODE_CALL	0x08	/* Override call dispatch. */
-#define DISPMODE_RET	0x10	/* Override return dispatch. */
+#define DISPMODE_JIT	0x10	/* JIT compiler on. */
+#define DISPMODE_REC	0x20	/* Recording active. */
+#define DISPMODE_PROF	0x40	/* Profiling active. */
 
 /* Update dispatch table depending on various flags. */
 void lj_dispatch_update(global_State *g)
@@ -97,6 +109,9 @@ void lj_dispatch_update(global_State *g)
   mode |= (G2J(g)->flags & JIT_F_ON) ? DISPMODE_JIT : 0;
   mode |= G2J(g)->state != LJ_TRACE_IDLE ?
 	    (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
+#endif
+#if LJ_HASPROFILE
+  mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
 #endif
   mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
   mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
@@ -126,9 +141,9 @@ void lj_dispatch_update(global_State *g)
     disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
 
     /* Set dynamic instruction dispatch. */
-    if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) {
+    if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
       /* Need to update the whole table. */
-      if (!(mode & (DISPMODE_REC|DISPMODE_INS))) {  /* No ins dispatch? */
+      if (!(mode & DISPMODE_INS)) {  /* No ins dispatch? */
 	/* Copy static dispatch table to dynamic dispatch table. */
 	memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
 	/* Overwrite with dynamic return dispatch. */
@@ -140,12 +155,13 @@ void lj_dispatch_update(global_State *g)
 	}
       } else {
 	/* The recording dispatch also checks for hooks. */
-	ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
+	ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
+			(mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
 	uint32_t i;
 	for (i = 0; i < GG_LEN_SDISP; i++)
 	  disp[i] = f;
       }
-    } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) {
+    } else if (!(mode & DISPMODE_INS)) {
       /* Otherwise set dynamic counting ins. */
       disp[BC_FORL] = f_forl;
       disp[BC_ITERL] = f_iterl;
@@ -352,10 +368,19 @@ static void callhook(lua_State *L, int event, BCLine line)
     /* Top frame, nextframe = NULL. */
     ar.i_ci = (int)((L->base-1) - tvref(L->stack));
     lj_state_checkstack(L, 1+LUA_MINSTACK);
+#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
+    lj_profile_hook_enter(g);
+#else
     hook_enter(g);
+#endif
     hookf(L, &ar);
     lua_assert(hook_active(g));
+    setgcref(g->cur_L, obj2gco(L));
+#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
+    lj_profile_hook_leave(g);
+#else
     hook_leave(g);
+#endif
   }
 }
 
@@ -368,7 +393,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
   if (bc_op(ins) == BC_UCLO)
     ins = pc[bc_j(ins)];
   switch (bc_op(ins)) {
-  case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1;
+  case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
   case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
   case BC_TSETM: return bc_a(ins) + nres-1;
   default: return pt->framesize;
@@ -492,3 +517,41 @@ out:
   return makeasmfunc(lj_bc_ofs[op]);  /* Return static dispatch target. */
 }
 
+#if LJ_HASJIT
+/* Stitch a new trace. */
+void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
+{
+  ERRNO_SAVE
+  lua_State *L = J->L;
+  void *cf = cframe_raw(L->cframe);
+  const BCIns *oldpc = cframe_pc(cf);
+  setcframe_pc(cf, pc);
+  /* Before dispatch, have to bias PC by 1. */
+  L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
+  lj_trace_stitch(J, pc-1);  /* Point to the CALL instruction. */
+  setcframe_pc(cf, oldpc);
+  ERRNO_RESTORE
+}
+#endif
+
+#if LJ_HASPROFILE
+/* Profile dispatch. */
+void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
+{
+  ERRNO_SAVE
+  GCfunc *fn = curr_func(L);
+  GCproto *pt = funcproto(fn);
+  void *cf = cframe_raw(L->cframe);
+  const BCIns *oldpc = cframe_pc(cf);
+  global_State *g;
+  setcframe_pc(cf, pc);
+  L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
+  lj_profile_interpreter(L);
+  setcframe_pc(cf, oldpc);
+  g = G(L);
+  setgcref(g->cur_L, obj2gco(L));
+  setvmstate(g, INTERP);
+  ERRNO_RESTORE
+}
+#endif
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_dispatch.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_dispatch.h
similarity index 84%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_dispatch.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_dispatch.h
index 778affc8cdc44ca86d4e1b3b39e86825e4c1cf11..1e247e382861a202f93caa97f3db8cd8edc8367d 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_dispatch.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_dispatch.h
@@ -29,15 +29,17 @@
   _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
   _(pow) _(fmod) _(ldexp) \
-  _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \
+  _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
+  _(lj_dispatch_profile) _(lj_err_throw) \
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
   _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
   _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
-  _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \
-  _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \
-  _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \
-  _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
-  JITGOTDEF(_) FFIGOTDEF(_)
+  _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
+  _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_num) \
+  _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
+  _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
+  _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
+  _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_)
 
 enum {
 #define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +62,7 @@ typedef uint16_t HotCount;
 #define HOTCOUNT_CALL		1
 
 /* This solves a circular dependency problem -- bump as needed. Sigh. */
-#define GG_NUM_ASMFF	62
+#define GG_NUM_ASMFF	57
 
 #define GG_LEN_DDISP	(BC__MAX + GG_NUM_ASMFF)
 #define GG_LEN_SDISP	BC_FUNCF
@@ -109,7 +111,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
 /* Instruction dispatch callback for hooks or when recording. */
 LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
 LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
-LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc);
+#if LJ_HASJIT
+LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
+#endif
+#if LJ_HASPROFILE
+LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
+#endif
 
 #if LJ_HASFFI && !defined(_BUILDVM_H)
 /* Save/restore errno and GetLastError() around hooks, exits and recording. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_arm.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_arm.h
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_arm.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_arm.h
index 8c5e5379c75980c6c406a8094c86d3dd13365796..45ce519ef6bacdab5e1e738bc3f115ab8a7aa5d4 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_arm.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_arm.h
@@ -308,30 +308,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
   emit_dm(as, ARMI_MOV, dst, src);
 }
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 #if LJ_SOFTFP
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
 #else
   if (r >= RID_MAX_GPR)
-    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs);
+    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
   else
 #endif
-    emit_lso(as, ARMI_LDR, r, RID_SP, ofs);
+    emit_lso(as, ARMI_LDR, r, base, ofs);
 }
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
 #if LJ_SOFTFP
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
 #else
   if (r >= RID_MAX_GPR)
-    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs);
+    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
   else
 #endif
-    emit_lso(as, ARMI_STR, r, RID_SP, ofs);
+    emit_lso(as, ARMI_STR, r, base, ofs);
 }
 
 /* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_mips.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_mips.h
similarity index 92%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_mips.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_mips.h
index 0fc07d9101db37f963861affcb06b0b44803ed82..8e7ee66fb1a2d27a07fd4be787fab7a8a87e2905 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_mips.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_mips.h
@@ -178,24 +178,24 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
     emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
 }
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_tsi(as, MIPSI_LW, r, RID_SP, ofs);
+    emit_tsi(as, MIPSI_LW, r, base, ofs);
   else
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
-	     (r & 31), RID_SP, ofs);
+	     (r & 31), base, ofs);
 }
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_tsi(as, MIPSI_SW, r, RID_SP, ofs);
+    emit_tsi(as, MIPSI_SW, r, base, ofs);
   else
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
-	     (r&31), RID_SP, ofs);
+	     (r&31), base, ofs);
 }
 
 /* Add offset to pointer. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_ppc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_ppc.h
similarity index 92%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_ppc.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_ppc.h
index 14edf00fd648b71cd5b6eae07b8bf13744ee1686..087860ed382c65472642c1de1234ee659bb9772b 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_ppc.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_ppc.h
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
     emit_fb(as, PPCI_FMR, dst, src);
 }
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_tai(as, PPCI_LWZ, r, RID_SP, ofs);
+    emit_tai(as, PPCI_LWZ, r, base, ofs);
   else
-    emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs);
+    emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
 }
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_tai(as, PPCI_STW, r, RID_SP, ofs);
+    emit_tai(as, PPCI_STW, r, base, ofs);
   else
-    emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs);
+    emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
 }
 
 /* Emit a compare (for equality) with a constant operand. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_x86.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_x86.h
similarity index 93%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_x86.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_x86.h
index 3a2f6510b9f51b8b1957c66ef41d0908b6c289e3..ac42db3e20f6e3573464b99a20bfe3ac0c512f88 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_emit_x86.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_emit_x86.h
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
 
 /* -- Emit loads/stores --------------------------------------------------- */
 
-/* Instruction selection for XMM moves. */
-#define XMM_MOVRR(as)	((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
-#define XMM_MOVRM(as)	((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
-
 /* mov [base+ofs], i */
 static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
 {
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
   if (tvispzero(tv))  /* Use xor only for +0. */
     emit_rr(as, XO_XORPS, r, r);
   else
-    emit_rma(as, XMM_MOVRM(as), r, &tv->n);
+    emit_rma(as, XO_MOVSD, r, &tv->n);
 }
 
 /* -- Emit control-flow instructions -------------------------------------- */
@@ -427,25 +423,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
   if (dst < RID_MAX_GPR)
     emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
   else
-    emit_rr(as, XMM_MOVRR(as), dst, src);
+    emit_rr(as, XO_MOVAPS, dst, src);
 }
 
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs);
+    emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
   else
-    emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
 }
 
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs);
+    emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
   else
-    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs);
+    emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
 }
 
 /* Add offset to pointer. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_err.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_err.c
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_err.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_err.c
index 081bfde4af5e5f6e347981af9dfae192c1f57876..2e20c2c0f80249aaef3aff3adede239b41098390 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_err.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_err.c
@@ -16,6 +16,7 @@
 #include "lj_ff.h"
 #include "lj_trace.h"
 #include "lj_vm.h"
+#include "lj_strfmt.h"
 
 /*
 ** LuaJIT can either use internal or external frame unwinding:
@@ -57,10 +58,10 @@
 ** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
 ** EXT is mandatory on WIN64 since the calling convention has an abundance
 ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
-** EXT is mandatory on POSIX/x64 since the interpreter doesn't save r12/r13.
+** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
 */
 
-#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL))
+#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
 #define LJ_UNWIND_EXT	1
 #elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
 #define LJ_UNWIND_EXT	1
@@ -98,14 +99,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       TValue *top = restorestack(L, -nres);
       if (frame < top) {  /* Frame reached? */
 	if (errcode) {
-	  L->cframe = cframe_prev(cf);
 	  L->base = frame+1;
+	  L->cframe = cframe_prev(cf);
 	  unwindstack(L, top);
 	}
 	return cf;
       }
     }
-    if (frame <= tvref(L->stack))
+    if (frame <= tvref(L->stack)+LJ_FR2)
       break;
     switch (frame_typep(frame)) {
     case FRAME_LUA:  /* Lua frame. */
@@ -113,14 +114,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
       frame = frame_prevl(frame);
       break;
     case FRAME_C:  /* C frame. */
-#if LJ_HASFFI
     unwind_c:
-#endif
 #if LJ_UNWIND_EXT
       if (errcode) {
-	L->cframe = cframe_prev(cf);
 	L->base = frame_prevd(frame) + 1;
-	unwindstack(L, frame);
+	L->cframe = cframe_prev(cf);
+	unwindstack(L, frame - LJ_FR2);
       } else if (cf != stopcf) {
 	cf = cframe_prev(cf);
 	frame = frame_prevd(frame);
@@ -143,16 +142,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	return cf;
       }
       if (errcode) {
-	L->cframe = cframe_prev(cf);
 	L->base = frame_prevd(frame) + 1;
-	unwindstack(L, frame);
+	L->cframe = cframe_prev(cf);
+	unwindstack(L, frame - LJ_FR2);
       }
       return cf;
     case FRAME_CONT:  /* Continuation frame. */
-#if LJ_HASFFI
-      if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+      if (frame_iscont_fficb(frame))
 	goto unwind_c;
-#endif
     case FRAME_VARG:  /* Vararg frame. */
       frame = frame_prevd(frame);
       break;
@@ -165,8 +162,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 	}
 	if (frame_typep(frame) == FRAME_PCALL)
 	  hook_leave(G(L));
-	L->cframe = cf;
 	L->base = frame_prevd(frame) + 1;
+	L->cframe = cf;
 	unwindstack(L, L->base);
       }
       return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -174,8 +171,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
   }
   /* No C frame. */
   if (errcode) {
+    L->base = tvref(L->stack)+1+LJ_FR2;
     L->cframe = NULL;
-    L->base = tvref(L->stack)+1;
     unwindstack(L, L->base);
     if (G(L)->panic)
       G(L)->panic(L);
@@ -452,7 +449,7 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
 {
   global_State *g = G(L);
   lj_trace_abort(g);
-  setgcrefnull(g->jit_L);
+  setmref(g->jit_base, NULL);
   L->status = 0;
 #if LJ_UNWIND_EXT
   err_raise_ext(errcode);
@@ -497,7 +494,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
 /* Find error function for runtime errors. Requires an extra stack traversal. */
 static ptrdiff_t finderrfunc(lua_State *L)
 {
-  cTValue *frame = L->base-1, *bot = tvref(L->stack);
+  cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
   void *cf = L->cframe;
   while (frame > bot && cf) {
     while (cframe_nres(cframe_raw(cf)) < 0) {  /* cframe without frame? */
@@ -521,10 +518,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
       frame = frame_prevd(frame);
       break;
     case FRAME_CONT:
-#if LJ_HASFFI
-      if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+      if (frame_iscont_fficb(frame))
 	cf = cframe_prev(cf);
-#endif
       frame = frame_prevd(frame);
       break;
     case FRAME_CP:
@@ -535,8 +530,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
       break;
     case FRAME_PCALL:
     case FRAME_PCALLH:
-      if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue)))  /* xpcall? */
-	return savestack(L, frame-1);  /* Point to xpcall's errorfunc. */
+      if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
+	return savestack(L, frame_prevd(frame)+1);  /* xpcall's errorfunc. */
       return 0;
     default:
       lua_assert(0);
@@ -559,8 +554,9 @@ LJ_NOINLINE void lj_err_run(lua_State *L)
       lj_err_throw(L, LUA_ERRERR);
     }
     L->status = LUA_ERRERR;
-    copyTV(L, top, top-1);
+    copyTV(L, top+LJ_FR2, top-1);
     copyTV(L, top-1, errfunc);
+    if (LJ_FR2) setnilV(top++);
     L->top = top+1;
     lj_vm_call(L, top, 1+1);  /* Stack: |errfunc|msg| -> |msg| */
   }
@@ -574,7 +570,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
   va_list argp;
   va_start(argp, em);
   if (curr_funcisL(L)) L->top = curr_topL(L);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   lj_debug_addloc(L, msg, L->base-1, NULL);
   lj_err_run(L);
@@ -592,11 +588,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
 {
   char buff[LUA_IDSIZE];
   const char *msg;
-  lj_debug_shortname(buff, src);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
-  msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
+  lj_debug_shortname(buff, src, line);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
   if (tok)
-    lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
+    lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
   lj_err_throw(L, LUA_ERRSYNTAX);
 }
 
@@ -635,8 +631,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
   const BCIns *pc = cframe_Lpc(L);
   if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
     const char *tname = lj_typename(o);
+    if (LJ_FR2) o++;
     setframe_pc(o, pc);
-    setframe_gc(o, obj2gco(L));
+    setframe_gc(o, obj2gco(L), LJ_TTHREAD);
     L->top = L->base = o+1;
     err_msgv(L, LJ_ERR_BADCALL, tname);
   }
@@ -651,13 +648,10 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
   if (frame_islua(frame)) {
     pframe = frame_prevl(frame);
   } else if (frame_iscont(frame)) {
-#if LJ_HASFFI
-    if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) {
+    if (frame_iscont_fficb(frame)) {
       pframe = frame;
       frame = NULL;
-    } else
-#endif
-    {
+    } else {
       pframe = frame_prevd(frame);
 #if LJ_HASFFI
       /* Remove frame for FFI metamethods. */
@@ -680,7 +674,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
   const char *msg;
   va_list argp;
   va_start(argp, em);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   lj_err_callermsg(L, msg);
 }
@@ -700,9 +694,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
   if (narg < 0 && narg > LUA_REGISTRYINDEX)
     narg = (int)(L->top - L->base) + narg + 1;
   if (ftype && ftype[3] == 'h' && --narg == 0)  /* Check for "method". */
-    msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
+    msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
   else
-    msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
+    msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
   lj_err_callermsg(L, msg);
 }
 
@@ -712,7 +706,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
   const char *msg;
   va_list argp;
   va_start(argp, em);
-  msg = lj_str_pushvf(L, err2msg(em), argp);
+  msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
   err_argmsg(L, narg, msg);
 }
@@ -742,7 +736,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
     TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
     tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
   }
-  msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
+  msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
   err_argmsg(L, narg, msg);
 }
 
@@ -792,7 +786,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
   const char *msg;
   va_list argp;
   va_start(argp, fmt);
-  msg = lj_str_pushvf(L, fmt, argp);
+  msg = lj_strfmt_pushvf(L, fmt, argp);
   va_end(argp);
   lj_err_callermsg(L, msg);
   return 0;  /* unreachable */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_err.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_err.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_err.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_err.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_errmsg.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_errmsg.h
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_errmsg.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_errmsg.h
index 1c9487607b63b81d8a16e504510c45697c27ef86..7717665b0383b685aa0263439190ef8c161d81ac 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_errmsg.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_errmsg.h
@@ -96,9 +96,7 @@ ERRDEF(STRPATX,	"pattern too complex")
 ERRDEF(STRCAPI,	"invalid capture index")
 ERRDEF(STRCAPN,	"too many captures")
 ERRDEF(STRCAPU,	"unfinished capture")
-ERRDEF(STRFMTO,	"invalid option " LUA_QL("%%%c") " to " LUA_QL("format"))
-ERRDEF(STRFMTR,	"invalid format (repeated flags)")
-ERRDEF(STRFMTW,	"invalid format (width or precision too long)")
+ERRDEF(STRFMT,	"invalid option " LUA_QS " to " LUA_QL("format"))
 ERRDEF(STRGSRV,	"invalid replacement value (a %s)")
 ERRDEF(BADMODN,	"name conflict for module " LUA_QS)
 #if LJ_HASJIT
@@ -118,7 +116,6 @@ ERRDEF(JITOPT,	"unknown or malformed optimization flag " LUA_QS)
 /* Lexer/parser errors. */
 ERRDEF(XMODE,	"attempt to load chunk with wrong mode")
 ERRDEF(XNEAR,	"%s near " LUA_QS)
-ERRDEF(XELEM,	"lexical element too long")
 ERRDEF(XLINES,	"chunk has too many lines")
 ERRDEF(XLEVELS,	"chunk has too many syntax levels")
 ERRDEF(XNUMBER,	"malformed number")
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ff.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ff.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ff.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ff.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ffrecord.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ffrecord.c
similarity index 61%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ffrecord.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ffrecord.c
index 69f71ab2efb686749bef8ac00c60d02661578563..a08113ca386003fe6e10f9b12ed5f9596889ac0c 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ffrecord.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ffrecord.c
@@ -27,6 +27,7 @@
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)			(&J->cur.ir[(ref)])
@@ -79,10 +80,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
     GCstr *s;
     if (!tvisnumber(o))
       lj_trace_err(J, LJ_TRERR_BADTYPE);
-    if (tvisint(o))
-      s = lj_str_fromint(J->L, intV(o));
-    else
-      s = lj_str_fromnum(J->L, &o->n);
+    s = lj_strfmt_number(J->L, o);
     setstrV(J->L, o, s);
     return s;
   }
@@ -98,14 +96,93 @@ static ptrdiff_t results_wanted(jit_State *J)
     return -1;
 }
 
-/* Throw error for unsupported variant of fast function. */
-LJ_NORET static void recff_nyiu(jit_State *J)
+#ifdef LUAJIT_TRACE_STITCHING
+/* This feature is disabled for now due to a design mistake. Sorry.
+**
+** It causes unpredictable behavior and crashes when a full trace flush
+** happens with a stitching continuation still in the stack somewhere.
+*/
+
+/* Trace stitching: add continuation below frame to start a new trace. */
+static void recff_stitch(jit_State *J)
 {
-  setfuncV(J->L, &J->errinfo, J->fn);
-  lj_trace_err_info(J, LJ_TRERR_NYIFFU);
+  ASMFunction cont = lj_cont_stitch;
+  TraceNo traceno = J->cur.traceno;
+  lua_State *L = J->L;
+  TValue *base = L->base;
+  const BCIns *pc = frame_pc(base-1);
+  TValue *pframe = frame_prevl(base-1);
+  TRef trcont;
+
+  lua_assert(!LJ_FR2);  /* TODO_FR2: handle frame shift. */
+  /* Move func + args up in Lua stack and insert continuation. */
+  memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
+  setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
+  setcont(base, cont);
+  setframe_pc(base, pc);
+  if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
+  L->base += 2;
+  L->top += 2;
+
+  /* Ditto for the IR. */
+  memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
+#if LJ_64
+  trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
+#else
+  trcont = lj_ir_kptr(J, (void *)cont);
+#endif
+  J->base[0] = trcont | TREF_CONT;
+  J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
+  J->base += 2;
+  J->baseslot += 2;
+  J->framedepth++;
+
+  lj_record_stop(J, LJ_TRLINK_STITCH, 0);
+
+  /* Undo Lua stack changes. */
+  memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
+  setframe_pc(base-1, pc);
+  L->base -= 2;
+  L->top -= 2;
+}
+
+/* Fallback handler for fast functions that are not recorded (yet). */
+static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
+{
+  if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
+    lj_trace_err_info(J, LJ_TRERR_TRACEUV);
+  } else {
+    /* Can only stitch from Lua call. */
+    if (J->framedepth && frame_islua(J->L->base-1)) {
+      BCOp op = bc_op(*frame_pc(J->L->base-1));
+      /* Stitched trace cannot start with *M op with variable # of args. */
+      if (!(op == BC_CALLM || op == BC_CALLMT ||
+	    op == BC_RETM || op == BC_TSETM)) {
+	switch (J->fn->c.ffid) {
+	case FF_error:
+	case FF_debug_sethook:
+	case FF_jit_flush:
+	  break;  /* Don't stitch across special builtins. */
+	default:
+	  recff_stitch(J);  /* Use trace stitching. */
+	  rd->nres = -1;
+	  return;
+	}
+      }
+    }
+    /* Otherwise stop trace and return to interpreter. */
+    lj_record_stop(J, LJ_TRLINK_RETURN, 0);
+    rd->nres = -1;
+  }
 }
 
-/* Fallback handler for all fast functions that are not recorded (yet). */
+/* Fallback handler for unsupported variants of fast functions. */
+#define recff_nyiu	recff_nyi
+
+/* Must stop the trace for classic C functions with arbitrary side-effects. */
+#define recff_c		recff_nyi
+#else
+/* Fallback handler for fast functions that are not recorded (yet). */
 static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
 {
   setfuncV(J->L, &J->errinfo, J->fn);
@@ -113,13 +190,29 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
   UNUSED(rd);
 }
 
-/* C functions can have arbitrary side-effects and are not recorded (yet). */
+/* Throw error for unsupported variant of fast function. */
+LJ_NORET static void recff_nyiu(jit_State *J, RecordFFData *rd)
+{
+  setfuncV(J->L, &J->errinfo, J->fn);
+  lj_trace_err_info(J, LJ_TRERR_NYIFFU);
+  UNUSED(rd);
+}
+
+/* Must abort the trace for classic C functions with arbitrary side-effects. */
 static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
 {
   setfuncV(J->L, &J->errinfo, J->fn);
   lj_trace_err_info(J, LJ_TRERR_NYICF);
   UNUSED(rd);
 }
+#endif
+
+/* Emit BUFHDR for the global temporary buffer. */
+static TRef recff_bufhdr(jit_State *J)
+{
+  return emitir(IRT(IR_BUFHDR, IRT_P32),
+		lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
+}
 
 /* -- Base library fast functions ----------------------------------------- */
 
@@ -135,7 +228,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
   uint32_t t;
   if (tvisnumber(&rd->argv[0]))
     t = ~LJ_TNUMX;
-  else if (LJ_64 && tvislightud(&rd->argv[0]))
+  else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
     t = ~LJ_TLIGHTUD;
   else
     t = ~itype(&rd->argv[0]);
@@ -263,7 +356,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
 	  J->base[i] = J->base[start+i];
       }  /* else: Interpreter will throw. */
     } else {
-      recff_nyiu(J);
+      recff_nyiu(J, rd);
+      return;
     }
   }  /* else: Interpreter will throw. */
 }
@@ -274,14 +368,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
   TRef base = J->base[1];
   if (tr && !tref_isnil(base)) {
     base = lj_opt_narrow_toint(J, base);
-    if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
-      recff_nyiu(J);
+    if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
+      recff_nyiu(J, rd);
+      return;
+    }
   }
   if (tref_isnumber_str(tr)) {
     if (tref_isstr(tr)) {
       TValue tmp;
-      if (!lj_strscan_num(strV(&rd->argv[0]), &tmp))
-	recff_nyiu(J);  /* Would need an inverted STRTO for this case. */
+      if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
+	recff_nyiu(J, rd);  /* Would need an inverted STRTO for this case. */
+	return;
+      }
       tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
     }
 #if LJ_HASFFI
@@ -336,13 +434,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
   if (tref_isstr(tr)) {
     /* Ignore __tostring in the string base metatable. */
     /* Pass on result in J->base[0]. */
-  } else if (!recff_metacall(J, rd, MM_tostring)) {
+  } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
     if (tref_isnumber(tr)) {
-      J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+      J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
+			  tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
     } else if (tref_ispri(tr)) {
-      J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)]));
+      J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
     } else {
-      recff_nyiu(J);
+      recff_nyiu(J, rd);
+      return;
     }
   }
 }
@@ -364,14 +464,14 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
   }  /* else: Interpreter will throw. */
 }
 
-static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
 {
   if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) {
     TRef tab = J->base[0];
     if (tref_istab(tab)) {
       J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
       J->base[1] = tab;
-      J->base[2] = lj_ir_kint(J, 0);
+      J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
       rd->nres = 3;
     }  /* else: Interpreter will throw. */
   }
@@ -399,6 +499,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
     TValue argv0, argv1;
     TRef tmp;
     int errcode;
+    lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
     /* Swap function and traceback. */
     tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
     copyTV(J->L, &argv0, &rd->argv[0]);
@@ -416,6 +517,18 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
   }  /* else: Interpreter will throw. */
 }
 
+static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
+{
+  TRef tr = J->base[0];
+  /* Only support getfenv(0) for now. */
+  if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
+    TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
+    J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
+    return;
+  }
+  recff_nyiu(J, rd);
+}
+
 /* -- Math library fast functions ----------------------------------------- */
 
 static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
@@ -528,14 +641,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
   rd->nres = 2;
 }
 
-static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
-{
-  TRef tr = lj_ir_tonum(J, J->base[0]);
-  TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
-  J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
-  UNUSED(rd);
-}
-
 static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
 {
   TRef tr = lj_ir_tonum(J, J->base[0]);
@@ -592,48 +697,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
 
 /* -- Bit library fast functions ------------------------------------------ */
 
-/* Record unary bit.tobit, bit.bnot, bit.bswap. */
+/* Record bit.tobit. */
+static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
+{
+  TRef tr = J->base[0];
+#if LJ_HASFFI
+  if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
+#endif
+  J->base[0] = lj_opt_narrow_tobit(J, tr);
+  UNUSED(rd);
+}
+
+/* Record unary bit.bnot, bit.bswap. */
 static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
+#if LJ_HASFFI
+  if (recff_bit64_unary(J, rd))
+    return;
+#endif
+  J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
 }
 
 /* Record N-ary bit.band, bit.bor, bit.bxor. */
 static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  uint32_t op = rd->data;
-  BCReg i;
-  for (i = 1; J->base[i] != 0; i++)
-    tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i]));
-  J->base[0] = tr;
+#if LJ_HASFFI
+  if (recff_bit64_nary(J, rd))
+    return;
+#endif
+  {
+    TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+    uint32_t ot = IRTI(rd->data);
+    BCReg i;
+    for (i = 1; J->base[i] != 0; i++)
+      tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
+    J->base[0] = tr;
+  }
 }
 
 /* Record bit shifts. */
 static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
 {
-  TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
-  TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
-  IROp op = (IROp)rd->data;
-  if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
-      !tref_isk(tsh))
-    tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
+#if LJ_HASFFI
+  if (recff_bit64_shift(J, rd))
+    return;
+#endif
+  {
+    TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+    TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
+    IROp op = (IROp)rd->data;
+    if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+	!tref_isk(tsh))
+      tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
 #ifdef LJ_TARGET_UNIFYROT
-  if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
-    op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
-    tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+    if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+      op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+      tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+    }
+#endif
+    J->base[0] = emitir(IRTI(op), tr, tsh);
   }
+}
+
+static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
+{
+#if LJ_HASFFI
+  TRef hdr = recff_bufhdr(J);
+  TRef tr = recff_bit64_tohex(J, rd, hdr);
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+#else
+  recff_nyiu(J, rd);  /* Don't bother working around this NYI. */
 #endif
-  J->base[0] = emitir(IRTI(op), tr, tsh);
 }
 
 /* -- String library fast functions --------------------------------------- */
 
-static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd)
+/* Specialize to relative starting position for string. */
+static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
+			       TRef trlen, TRef tr0)
 {
-  J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN);
-  UNUSED(rd);
+  int32_t start = *st;
+  if (start < 0) {
+    emitir(IRTGI(IR_LT), tr, tr0);
+    tr = emitir(IRTI(IR_ADD), trlen, tr);
+    start = start + (int32_t)s->len;
+    emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
+    if (start < 0) {
+      tr = tr0;
+      start = 0;
+    }
+  } else if (start == 0) {
+    emitir(IRTGI(IR_EQ), tr, tr0);
+    tr = tr0;
+  } else {
+    tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
+    emitir(IRTGI(IR_GE), tr, tr0);
+    start--;
+  }
+  *st = start;
+  return tr;
 }
 
 /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -680,29 +842,11 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
   } else if ((MSize)end <= str->len) {
     emitir(IRTGI(IR_ULE), trend, trlen);
   } else {
-    emitir(IRTGI(IR_GT), trend, trlen);
+    emitir(IRTGI(IR_UGT), trend, trlen);
     end = (int32_t)str->len;
     trend = trlen;
   }
-  if (start < 0) {
-    emitir(IRTGI(IR_LT), trstart, tr0);
-    trstart = emitir(IRTI(IR_ADD), trlen, trstart);
-    start = start+(int32_t)str->len;
-    emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
-    if (start < 0) {
-      trstart = tr0;
-      start = 0;
-    }
-  } else {
-    if (start == 0) {
-      emitir(IRTGI(IR_EQ), trstart, tr0);
-      trstart = tr0;
-    } else {
-      trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
-      emitir(IRTGI(IR_GE), trstart, tr0);
-      start--;
-    }
-  }
+  trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
   if (rd->data) {  /* Return string.sub result. */
     if (end - start >= 0) {
       /* Also handle empty range here, to avoid extra traces. */
@@ -712,7 +856,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
       J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
     } else {  /* Range underflow: return empty string. */
       emitir(IRTGI(IR_LT), trend, trstart);
-      J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0));
+      J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
     }
   } else {  /* Return string.byte result(s). */
     ptrdiff_t i, len = end - start;
@@ -734,48 +878,200 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
   }
 }
 
-/* -- Table library fast functions ---------------------------------------- */
-
-static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
 {
-  if (tref_istab(J->base[0]))
-    J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]);
-  /* else: Interpreter will throw. */
+  TRef k255 = lj_ir_kint(J, 255);
+  BCReg i;
+  for (i = 0; J->base[i] != 0; i++) {  /* Convert char values to strings. */
+    TRef tr = lj_opt_narrow_toint(J, J->base[i]);
+    emitir(IRTGI(IR_ULE), tr, k255);
+    J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
+  }
+  if (i > 1) {  /* Concatenate the strings, if there's more than one. */
+    TRef hdr = recff_bufhdr(J), tr = hdr;
+    for (i = 0; J->base[i] != 0; i++)
+      tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
+    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  }
   UNUSED(rd);
 }
 
-static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
 {
-  TRef tab = J->base[0];
-  rd->nres = 0;
-  if (tref_istab(tab)) {
-    if (tref_isnil(J->base[1])) {  /* Simple pop: t[#t] = nil */
-      TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab);
-      GCtab *t = tabV(&rd->argv[0]);
-      MSize len = lj_tab_len(t);
-      emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
-      if (len) {
-	RecordIndex ix;
-	ix.tab = tab;
-	ix.key = trlen;
-	settabV(J->L, &ix.tabv, t);
-	setintV(&ix.keyv, len);
-	ix.idxchain = 0;
-	if (results_wanted(J) != 0) {  /* Specialize load only if needed. */
-	  ix.val = 0;
-	  J->base[0] = lj_record_idx(J, &ix);  /* Load previous value. */
-	  rd->nres = 1;
-	  /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */
-	}
-	ix.val = TREF_NIL;
-	lj_record_idx(J, &ix);  /* Remove value. */
+  TRef str = lj_ir_tostr(J, J->base[0]);
+  TRef rep = lj_opt_narrow_toint(J, J->base[1]);
+  TRef hdr, tr, str2 = 0;
+  if (!tref_isnil(J->base[2])) {
+    TRef sep = lj_ir_tostr(J, J->base[2]);
+    int32_t vrep = argv2int(J, &rd->argv[1]);
+    emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
+    if (vrep > 1) {
+      TRef hdr2 = recff_bufhdr(J);
+      TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), hdr2, sep);
+      tr2 = emitir(IRT(IR_BUFPUT, IRT_P32), tr2, str);
+      str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
+    }
+  }
+  tr = hdr = recff_bufhdr(J);
+  if (str2) {
+    tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, str);
+    str = str2;
+    rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
+  }
+  tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+}
+
+static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
+{
+  TRef str = lj_ir_tostr(J, J->base[0]);
+  TRef hdr = recff_bufhdr(J);
+  TRef tr = lj_ir_call(J, rd->data, hdr, str);
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+}
+
+static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
+{
+  TRef trstr = lj_ir_tostr(J, J->base[0]);
+  TRef trpat = lj_ir_tostr(J, J->base[1]);
+  TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
+  TRef tr0 = lj_ir_kint(J, 0);
+  TRef trstart;
+  GCstr *str = argv2str(J, &rd->argv[0]);
+  GCstr *pat = argv2str(J, &rd->argv[1]);
+  int32_t start;
+  J->needsnap = 1;
+  if (tref_isnil(J->base[2])) {
+    trstart = lj_ir_kint(J, 1);
+    start = 1;
+  } else {
+    trstart = lj_opt_narrow_toint(J, J->base[2]);
+    start = argv2int(J, &rd->argv[2]);
+  }
+  trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
+  if ((MSize)start <= str->len) {
+    emitir(IRTGI(IR_ULE), trstart, trlen);
+  } else {
+    emitir(IRTGI(IR_UGT), trstart, trlen);
+#if LJ_52
+    J->base[0] = TREF_NIL;
+    return;
+#else
+    trstart = trlen;
+    start = str->len;
+#endif
+  }
+  /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
+  if ((J->base[2] && tref_istruecond(J->base[3])) ||
+      (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
+       !lj_str_haspattern(pat))) {  /* Search for fixed string. */
+    TRef trsptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
+    TRef trpptr = emitir(IRT(IR_STRREF, IRT_P32), trpat, tr0);
+    TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
+    TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
+    TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
+    TRef trp0 = lj_ir_kkptr(J, NULL);
+    if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
+		    str->len-(MSize)start, pat->len)) {
+      TRef pos;
+      emitir(IRTG(IR_NE, IRT_P32), tr, trp0);
+      pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_P32), trstr, tr0));
+      J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
+      J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
+      rd->nres = 2;
+    } else {
+      emitir(IRTG(IR_EQ, IRT_P32), tr, trp0);
+      J->base[0] = TREF_NIL;
+    }
+  } else {  /* Search for pattern. */
+    recff_nyiu(J, rd);
+    return;
+  }
+}
+
+static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+{
+  TRef trfmt = lj_ir_tostr(J, J->base[0]);
+  GCstr *fmt = argv2str(J, &rd->argv[0]);
+  int arg = 1;
+  TRef hdr, tr;
+  FormatState fs;
+  SFormat sf;
+  /* Specialize to the format string. */
+  emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
+  tr = hdr = recff_bufhdr(J);
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {  /* Parse format. */
+    TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
+    TRef trsf = lj_ir_kint(J, (int32_t)sf);
+    IRCallID id;
+    switch (STRFMT_TYPE(sf)) {
+    case STRFMT_LIT:
+      tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
+		  lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
+      break;
+    case STRFMT_INT:
+      id = IRCALL_lj_strfmt_putfnum_int;
+    handle_int:
+      if (!tref_isinteger(tra))
+	goto handle_num;
+      if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
+	tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
+		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
+      } else {
+#if LJ_HASFFI
+	tra = emitir(IRT(IR_CONV, IRT_U64), tra,
+		     (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
+	lj_needsplit(J);
+#else
+	recff_nyiu(J, rd);  /* Don't bother working around this NYI. */
+	return;
+#endif
+      }
+      break;
+    case STRFMT_UINT:
+      id = IRCALL_lj_strfmt_putfnum_uint;
+      goto handle_int;
+    case STRFMT_NUM:
+      id = IRCALL_lj_strfmt_putfnum;
+    handle_num:
+      tra = lj_ir_tonum(J, tra);
+      tr = lj_ir_call(J, id, tr, trsf, tra);
+      if (LJ_SOFTFP) lj_needsplit(J);
+      break;
+    case STRFMT_STR:
+      if (!tref_isstr(tra)) {
+	recff_nyiu(J, rd);  /* NYI: __tostring and non-string types for %s. */
+	return;
       }
-    } else {  /* Complex case: remove in the middle. */
-      recff_nyiu(J);
+      if (sf == STRFMT_STR)  /* Shortcut for plain %s. */
+	tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
+      else if ((sf & STRFMT_T_QUOTED))
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
+      else
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
+      break;
+    case STRFMT_CHAR:
+      tra = lj_opt_narrow_toint(J, tra);
+      if (sf == STRFMT_CHAR)  /* Shortcut for plain %c. */
+	tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr,
+		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
+      else
+	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
+      break;
+    case STRFMT_PTR:  /* NYI */
+    case STRFMT_ERR:
+    default:
+      recff_nyiu(J, rd);
+      return;
     }
-  }  /* else: Interpreter will throw. */
+  }
+  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
 }
 
+/* -- Table library fast functions ---------------------------------------- */
+
 static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
 {
   RecordIndex ix;
@@ -792,11 +1088,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
       ix.idxchain = 0;
       lj_record_idx(J, &ix);  /* Set new value. */
     } else {  /* Complex case: insert in the middle. */
-      recff_nyiu(J);
+      recff_nyiu(J, rd);
+      return;
     }
   }  /* else: Interpreter will throw. */
 }
 
+static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
+{
+  TRef tab = J->base[0];
+  if (tref_istab(tab)) {
+    TRef sep = !tref_isnil(J->base[1]) ?
+	       lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
+    TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
+	       lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
+    TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
+	       lj_opt_narrow_toint(J, J->base[3]) :
+	       lj_ir_call(J, IRCALL_lj_tab_len, tab);
+    TRef hdr = recff_bufhdr(J);
+    TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
+    emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
+    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  }  /* else: Interpreter will throw. */
+  UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
+{
+  TRef tra = lj_opt_narrow_toint(J, J->base[0]);
+  TRef trh = lj_opt_narrow_toint(J, J->base[1]);
+  J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
+  UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
+{
+  TRef tr = J->base[0];
+  if (tref_istab(tr)) {
+    rd->nres = 0;
+    lj_ir_call(J, IRCALL_lj_tab_clear, tr);
+    J->needsnap = 1;
+  }  /* else: Interpreter will throw. */
+}
+
 /* -- I/O library fast functions ------------------------------------------ */
 
 /* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -832,7 +1166,10 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
     TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
     TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
     if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
-      TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
+      IRIns *irs = IR(tref_ref(str));
+      TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
+		irs->op1 :
+		emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
       tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
       if (results_wanted(J) != 0)  /* Check result only if not ignored. */
 	emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
@@ -854,6 +1191,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd)
   J->base[0] = TREF_TRUE;
 }
 
+/* -- Debug library fast functions ---------------------------------------- */
+
+static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd)
+{
+  GCtab *mt;
+  TRef mtref;
+  TRef tr = J->base[0];
+  if (tref_istab(tr)) {
+    mt = tabref(tabV(&rd->argv[0])->metatable);
+    mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META);
+  } else if (tref_isudata(tr)) {
+    mt = tabref(udataV(&rd->argv[0])->metatable);
+    mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META);
+  } else {
+    mt = tabref(basemt_obj(J2G(J), &rd->argv[0]));
+    J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL;
+    return;
+  }
+  emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
+  J->base[0] = mt ? mtref : TREF_NIL;
+}
+
 /* -- Record calls to fast functions -------------------------------------- */
 
 #include "lj_recdef.h"
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ffrecord.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ffrecord.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ffrecord.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ffrecord.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_frame.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_frame.h
similarity index 64%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_frame.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_frame.h
index cd57be223fc25f9cdc05debf36c4fdd63e8be3a6..a86c36be7e5901f53517d3a1acd4babd6f32419a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_frame.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_frame.h
@@ -11,7 +11,16 @@
 
 /* -- Lua stack frame ----------------------------------------------------- */
 
-/* Frame type markers in callee function slot (callee base-1). */
+/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
+**
+**    PC  00  Lua frame
+** delta 001  C frame
+** delta 010  Continuation frame
+** delta 011  Lua vararg frame
+** delta 101  cpcall() frame
+** delta 110  ff pcall() frame
+** delta 111  ff pcall() frame with active hook
+*/
 enum {
   FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
   FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
 #define FRAME_TYPEP		(FRAME_TYPE|FRAME_P)
 
 /* Macros to access and modify Lua frames. */
+#if LJ_FR2
+/* Two-slot frame info, required for 64 bit PC/GCRef:
+**
+**                   base-2  base-1      |  base  base+1 ...
+**                  [func   PC/delta/ft] | [slots ...]
+**                  ^-- frame            | ^-- base   ^-- top
+**
+** Continuation frames:
+**
+**   base-4  base-3  base-2  base-1      |  base  base+1 ...
+**  [cont      PC ] [func   PC/delta/ft] | [slots ...]
+**                  ^-- frame            | ^-- base   ^-- top
+*/
+#define frame_gc(f)		(gcval((f)-1))
+#define frame_ftsz(f)		((ptrdiff_t)(f)->ftsz)
+#define frame_pc(f)		((const BCIns *)frame_ftsz(f))
+#define setframe_gc(f, p, tp)	(setgcVraw((f)-1, (p), (tp)))
+#define setframe_ftsz(f, sz)	((f)->ftsz = (sz))
+#define setframe_pc(f, pc)	((f)->ftsz = (int64_t)(intptr_t)(pc))
+#else
+/* One-slot frame info, sufficient for 32 bit PC/GCRef:
+**
+**              base-1              |  base  base+1 ...
+**              lo     hi           |
+**             [func | PC/delta/ft] | [slots ...]
+**             ^-- frame            | ^-- base   ^-- top
+**
+** Continuation frames:
+**
+**  base-2      base-1              |  base  base+1 ...
+**  lo     hi   lo     hi           |
+** [cont | PC] [func | PC/delta/ft] | [slots ...]
+**             ^-- frame            | ^-- base   ^-- top
+*/
 #define frame_gc(f)		(gcref((f)->fr.func))
-#define frame_func(f)		(&frame_gc(f)->fn)
-#define frame_ftsz(f)		((f)->fr.tp.ftsz)
+#define frame_ftsz(f)		((ptrdiff_t)(f)->fr.tp.ftsz)
+#define frame_pc(f)		(mref((f)->fr.tp.pcr, const BCIns))
+#define setframe_gc(f, p, tp)	(setgcref((f)->fr.func, (p)), UNUSED(tp))
+#define setframe_ftsz(f, sz)	((f)->fr.tp.ftsz = (int32_t)(sz))
+#define setframe_pc(f, pc)	(setmref((f)->fr.tp.pcr, (pc)))
+#endif
 
 #define frame_type(f)		(frame_ftsz(f) & FRAME_TYPE)
 #define frame_typep(f)		(frame_ftsz(f) & FRAME_TYPEP)
@@ -33,27 +80,36 @@ enum {
 #define frame_isvarg(f)		(frame_typep(f) == FRAME_VARG)
 #define frame_ispcall(f)	((frame_ftsz(f) & 6) == FRAME_PCALL)
 
-#define frame_pc(f)		(mref((f)->fr.tp.pcr, const BCIns))
+#define frame_func(f)		(&frame_gc(f)->fn)
+#define frame_delta(f)		(frame_ftsz(f) >> 3)
+#define frame_sized(f)		(frame_ftsz(f) & ~FRAME_TYPEP)
+
+enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
+
+#if LJ_FR2
+#define frame_contpc(f)		(frame_pc((f)-2))
+#define frame_contv(f)		(((f)-3)->u64)
+#else
 #define frame_contpc(f)		(frame_pc((f)-1))
-#if LJ_64
+#define frame_contv(f)		(((f)-1)->u32.lo)
+#endif
+#if LJ_FR2
+#define frame_contf(f)		((ASMFunction)(uintptr_t)((f)-3)->u64)
+#elif LJ_64
 #define frame_contf(f) \
   ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
 			 (intptr_t)(int32_t)((f)-1)->u32.lo))
 #else
 #define frame_contf(f)		((ASMFunction)gcrefp(((f)-1)->gcr, void))
 #endif
-#define frame_delta(f)		(frame_ftsz(f) >> 3)
-#define frame_sized(f)		(frame_ftsz(f) & ~FRAME_TYPEP)
+#define frame_iscont_fficb(f) \
+  (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
 
-#define frame_prevl(f)		((f) - (1+bc_a(frame_pc(f)[-1])))
+#define frame_prevl(f)		((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
 #define frame_prevd(f)		((TValue *)((char *)(f) - frame_sized(f)))
 #define frame_prev(f)		(frame_islua(f)?frame_prevl(f):frame_prevd(f))
 /* Note: this macro does not skip over FRAME_VARG. */
 
-#define setframe_pc(f, pc)	(setmref((f)->fr.tp.pcr, (pc)))
-#define setframe_ftsz(f, sz)	((f)->fr.tp.ftsz = (sz))
-#define setframe_gc(f, p)	(setgcref((f)->fr.func, (p)))
-
 /* -- C stack frame ------------------------------------------------------- */
 
 /* Macros to access and modify the C stack frame chain. */
@@ -71,22 +127,42 @@ enum {
 #elif LJ_TARGET_X64
 #if LJ_ABI_WIN
 #define CFRAME_OFS_PREV		(13*8)
+#if LJ_GC64
+#define CFRAME_OFS_PC		(12*8)
+#define CFRAME_OFS_L		(11*8)
+#define CFRAME_OFS_ERRF		(21*4)
+#define CFRAME_OFS_NRES		(20*4)
+#define CFRAME_OFS_MULTRES	(8*4)
+#else
 #define CFRAME_OFS_PC		(25*4)
 #define CFRAME_OFS_L		(24*4)
 #define CFRAME_OFS_ERRF		(23*4)
 #define CFRAME_OFS_NRES		(22*4)
 #define CFRAME_OFS_MULTRES	(21*4)
+#endif
 #define CFRAME_SIZE		(10*8)
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)
 #define CFRAME_SHIFT_MULTRES	0
 #else
 #define CFRAME_OFS_PREV		(4*8)
+#if LJ_GC64
+#define CFRAME_OFS_PC		(3*8)
+#define CFRAME_OFS_L		(2*8)
+#define CFRAME_OFS_ERRF		(3*4)
+#define CFRAME_OFS_NRES		(2*4)
+#define CFRAME_OFS_MULTRES	(0*4)
+#else
 #define CFRAME_OFS_PC		(7*4)
 #define CFRAME_OFS_L		(6*4)
 #define CFRAME_OFS_ERRF		(5*4)
 #define CFRAME_OFS_NRES		(4*4)
 #define CFRAME_OFS_MULTRES	(1*4)
+#endif
+#if LJ_NO_UNWIND
+#define CFRAME_SIZE		(12*8)
+#else
 #define CFRAME_SIZE		(10*8)
+#endif
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 16)
 #define CFRAME_SHIFT_MULTRES	0
 #endif
@@ -103,6 +179,15 @@ enum {
 #define CFRAME_SIZE		64
 #endif
 #define CFRAME_SHIFT_MULTRES	3
+#elif LJ_TARGET_ARM64
+#define CFRAME_OFS_ERRF		196
+#define CFRAME_OFS_NRES		200
+#define CFRAME_OFS_PREV		160
+#define CFRAME_OFS_L		176
+#define CFRAME_OFS_PC		168
+#define CFRAME_OFS_MULTRES	192
+#define CFRAME_SIZE		208
+#define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_PPC
 #if LJ_TARGET_XBOX360
 #define CFRAME_OFS_ERRF		424
@@ -113,7 +198,7 @@ enum {
 #define CFRAME_OFS_MULTRES	408
 #define CFRAME_SIZE		384
 #define CFRAME_SHIFT_MULTRES	3
-#elif LJ_ARCH_PPC64
+#elif LJ_ARCH_PPC32ON64
 #define CFRAME_OFS_ERRF		472
 #define CFRAME_OFS_NRES		468
 #define CFRAME_OFS_PREV		448
@@ -132,15 +217,6 @@ enum {
 #define CFRAME_SIZE		272
 #define CFRAME_SHIFT_MULTRES	3
 #endif
-#elif LJ_TARGET_PPCSPE
-#define CFRAME_OFS_ERRF		28
-#define CFRAME_OFS_NRES		24
-#define CFRAME_OFS_PREV		20
-#define CFRAME_OFS_L		16
-#define CFRAME_OFS_PC		12
-#define CFRAME_OFS_MULTRES	8
-#define CFRAME_SIZE		184
-#define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_MIPS
 #define CFRAME_OFS_ERRF		124
 #define CFRAME_OFS_NRES		120
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_func.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_func.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_func.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_func.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_func.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_func.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_func.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_func.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_gc.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gc.c
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_gc.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gc.c
index b498abaa84d94d8d572ff83eec961d1f888033f6..99d664aa2a17357c392d60f7094dacce7d847a1c 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_gc.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gc.c
@@ -12,6 +12,7 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_func.h"
@@ -267,12 +268,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
 {
   TValue *frame, *top = th->top-1, *bot = tvref(th->stack);
   /* Note: extra vararg frame not skipped, marks function twice (harmless). */
-  for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) {
+  for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) {
     GCfunc *fn = frame_func(frame);
     TValue *ftop = frame;
     if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
     if (ftop > top) top = ftop;
-    gc_markobj(g, fn);  /* Need to mark hidden function (or L). */
+    if (!LJ_FR2) gc_markobj(g, fn);  /* Need to mark hidden function (or L). */
   }
   top++;  /* Correct bias of -1 (frame == base-1). */
   if (top > tvref(th->maxstack)) top = tvref(th->maxstack);
@@ -283,7 +284,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
 static void gc_traverse_thread(global_State *g, lua_State *th)
 {
   TValue *o, *top = th->top;
-  for (o = tvref(th->stack)+1; o < top; o++)
+  for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++)
     gc_marktv(g, o);
   if (g->gc.state == GCSatomic) {
     top = tvref(th->stack) + th->stacksize;
@@ -348,15 +349,6 @@ static size_t gc_propagate_gray(global_State *g)
 
 /* -- Sweep phase --------------------------------------------------------- */
 
-/* Try to shrink some common data structures. */
-static void gc_shrink(global_State *g, lua_State *L)
-{
-  if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
-    lj_str_resize(L, g->strmask >> 1);  /* Shrink string table. */
-  if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
-    lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1);  /* Shrink temp buf. */
-}
-
 /* Type of GC free functions. */
 typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
 
@@ -382,7 +374,7 @@ static const GCFreeFunc gc_freefunc[] = {
 };
 
 /* Full sweep of a GC list. */
-#define gc_fullsweep(g, p)	gc_sweep(g, (p), LJ_MAX_MEM)
+#define gc_fullsweep(g, p)	gc_sweep(g, (p), ~(uint32_t)0)
 
 /* Partial sweep of a GC list. */
 static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
@@ -460,17 +452,18 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
 {
   /* Save and restore lots of state around the __gc callback. */
   uint8_t oldh = hook_save(g);
-  MSize oldt = g->gc.threshold;
+  GCSize oldt = g->gc.threshold;
   int errcode;
   TValue *top;
   lj_trace_abort(g);
-  top = L->top;
-  L->top = top+2;
   hook_entergc(g);  /* Disable hooks and new traces during __gc. */
   g->gc.threshold = LJ_MAX_MEM;  /* Prevent GC steps. */
-  copyTV(L, top, mo);
-  setgcV(L, top+1, o, ~o->gch.gct);
-  errcode = lj_vm_pcall(L, top+1, 1+0, -1);  /* Stack: |mo|o| -> | */
+  top = L->top;
+  copyTV(L, top++, mo);
+  if (LJ_FR2) setnilV(top++);
+  setgcV(L, top, o, ~o->gch.gct);
+  L->top = top+1;
+  errcode = lj_vm_pcall(L, top, 1+0, -1);  /* Stack: |mo|o| -> | */
   hook_restore(g, oldh);
   g->gc.threshold = oldt;  /* Restore GC threshold. */
   if (errcode)
@@ -483,7 +476,7 @@ static void gc_finalize(lua_State *L)
   global_State *g = G(L);
   GCobj *o = gcnext(gcref(g->gc.mmudata));
   cTValue *mo;
-  lua_assert(gcref(g->jit_L) == NULL);  /* Must not be called on trace. */
+  lua_assert(tvref(g->jit_base) == NULL);  /* Must not be called on trace. */
   /* Unchain from list of userdata to be finalized. */
   if (o == gcref(g->gc.mmudata))
     setgcrefnull(g->gc.mmudata);
@@ -592,11 +585,13 @@ static void atomic(global_State *g, lua_State *L)
   /* All marking done, clear weak tables. */
   gc_clearweak(gcref(g->gc.weak));
 
+  lj_buf_shrink(L, &g->tmpbuf);  /* Shrink temp buffer. */
+
   /* Prepare for sweep phase. */
   g->gc.currentwhite = (uint8_t)otherwhite(g);  /* Flip current white. */
   g->strempty.marked = g->gc.currentwhite;
   setmref(g->gc.sweep, &g->gc.root);
-  g->gc.estimate = g->gc.total - (MSize)udsize;  /* Initial estimate. */
+  g->gc.estimate = g->gc.total - (GCSize)udsize;  /* Initial estimate. */
 }
 
 /* GC state machine. Returns a cost estimate for each step performed. */
@@ -613,14 +608,14 @@ static size_t gc_onestep(lua_State *L)
     g->gc.state = GCSatomic;  /* End of mark phase. */
     return 0;
   case GCSatomic:
-    if (gcref(g->jit_L))  /* Don't run atomic phase on trace. */
+    if (tvref(g->jit_base))  /* Don't run atomic phase on trace. */
       return LJ_MAX_MEM;
     atomic(g, L);
     g->gc.state = GCSsweepstring;  /* Start of sweep phase. */
     g->gc.sweepstr = 0;
     return 0;
   case GCSsweepstring: {
-    MSize old = g->gc.total;
+    GCSize old = g->gc.total;
     gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]);  /* Sweep one chain. */
     if (g->gc.sweepstr > g->strmask)
       g->gc.state = GCSsweep;  /* All string hash chains sweeped. */
@@ -629,12 +624,13 @@ static size_t gc_onestep(lua_State *L)
     return GCSWEEPCOST;
     }
   case GCSsweep: {
-    MSize old = g->gc.total;
+    GCSize old = g->gc.total;
     setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
     lua_assert(old >= g->gc.total);
     g->gc.estimate -= old - g->gc.total;
     if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
-      gc_shrink(g, L);
+      if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
+	lj_str_resize(L, g->strmask >> 1);  /* Shrink string table. */
       if (gcref(g->gc.mmudata)) {  /* Need any finalizations? */
 	g->gc.state = GCSfinalize;
 #if LJ_HASFFI
@@ -649,7 +645,7 @@ static size_t gc_onestep(lua_State *L)
     }
   case GCSfinalize:
     if (gcref(g->gc.mmudata) != NULL) {
-      if (gcref(g->jit_L))  /* Don't call finalizers on trace. */
+      if (tvref(g->jit_base))  /* Don't call finalizers on trace. */
 	return LJ_MAX_MEM;
       gc_finalize(L);  /* Finalize one userdata object. */
       if (g->gc.estimate > GCFINALIZECOST)
@@ -672,7 +668,7 @@ static size_t gc_onestep(lua_State *L)
 int LJ_FASTCALL lj_gc_step(lua_State *L)
 {
   global_State *g = G(L);
-  MSize lim;
+  GCSize lim;
   int32_t ostate = g->vmstate;
   setvmstate(g, GC);
   lim = (GCSTEPSIZE/100) * g->gc.stepmul;
@@ -681,13 +677,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L)
   if (g->gc.total > g->gc.threshold)
     g->gc.debt += g->gc.total - g->gc.threshold;
   do {
-    lim -= (MSize)gc_onestep(L);
+    lim -= (GCSize)gc_onestep(L);
     if (g->gc.state == GCSpause) {
       g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
       g->vmstate = ostate;
       return 1;  /* Finished a GC cycle. */
     }
-  } while ((int32_t)lim > 0);
+  } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0));
   if (g->gc.debt < GCSTEPSIZE) {
     g->gc.threshold = g->gc.total + GCSTEPSIZE;
     g->vmstate = ostate;
@@ -711,8 +707,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
 /* Perform multiple GC steps. Called from JIT-compiled code. */
 int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
 {
-  lua_State *L = gco2th(gcref(g->jit_L));
-  L->base = mref(G(L)->jit_base, TValue);
+  lua_State *L = gco2th(gcref(g->cur_L));
+  L->base = tvref(G(L)->jit_base);
   L->top = curr_topL(L);
   while (steps-- > 0 && lj_gc_step(L) == 0)
     ;
@@ -806,7 +802,7 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
 /* -- Allocator ----------------------------------------------------------- */
 
 /* Call pluggable memory allocator to allocate or resize a fragment. */
-void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
+void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
 {
   global_State *g = G(L);
   lua_assert((osz == 0) == (p == NULL));
@@ -814,19 +810,19 @@ void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
   if (p == NULL && nsz > 0)
     lj_err_mem(L);
   lua_assert((nsz == 0) == (p == NULL));
-  lua_assert(checkptr32(p));
+  lua_assert(checkptrGC(p));
   g->gc.total = (g->gc.total - osz) + nsz;
   return p;
 }
 
 /* Allocate new GC object and link it to the root set. */
-void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size)
+void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
 {
   global_State *g = G(L);
   GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
   if (o == NULL)
     lj_err_mem(L);
-  lua_assert(checkptr32(o));
+  lua_assert(checkptrGC(o));
   g->gc.total += size;
   setgcrefr(o->gch.nextgc, g->gc.root);
   setgcref(g->gc.root, o);
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_gc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gc.h
similarity index 93%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_gc.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gc.h
index ba061bc5d9b8d27d2575bead277ebc8823582555..847eb7835d96912cb24643c5d2f56f065819ce23 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_gc.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gc.h
@@ -107,8 +107,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
       lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
 
 /* Allocator. */
-LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz);
-LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size);
+LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz);
+LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size);
 LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
 			  MSize *szp, MSize lim, MSize esz);
 
@@ -116,13 +116,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
 
 static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
 {
-  g->gc.total -= (MSize)osize;
+  g->gc.total -= (GCSize)osize;
   g->allocf(g->allocd, p, osize, 0);
 }
 
-#define lj_mem_newvec(L, n, t)	((t *)lj_mem_new(L, (MSize)((n)*sizeof(t))))
+#define lj_mem_newvec(L, n, t)	((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t))))
 #define lj_mem_reallocvec(L, p, on, n, t) \
-  ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t))))
+  ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t))))
 #define lj_mem_growvec(L, p, n, m, t) \
   ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
 #define lj_mem_freevec(g, p, n, t)	lj_mem_free(g, (p), (n)*sizeof(t))
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_gdbjit.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gdbjit.c
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_gdbjit.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gdbjit.c
index d3f3e6119b3217917a57fe8906bdd9cc2ec05c45..9b95e525840e743da3ddfbb14dd0f4c4032e2832 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_gdbjit.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
 #include "lj_err.h"
 #include "lj_debug.h"
 #include "lj_frame.h"
+#include "lj_buf.h"
+#include "lj_strfmt.h"
 #include "lj_jit.h"
 #include "lj_dispatch.h"
 
@@ -428,16 +430,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
   *ctx->p++ = '0' + n;
 }
 
-/* Add a ULEB128 value. */
-static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
-{
-  uint8_t *p = ctx->p;
-  for (; v >= 0x80; v >>= 7)
-    *p++ = (uint8_t)((v & 0x7f) | 0x80);
-  *p++ = (uint8_t)v;
-  ctx->p = p;
-}
-
 /* Add a SLEB128 value. */
 static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
 {
@@ -454,7 +446,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
 #define DU16(x)		(*(uint16_t *)p = (x), p += 2)
 #define DU32(x)		(*(uint32_t *)p = (x), p += 4)
 #define DADDR(x)	(*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
-#define DUV(x)		(ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p)
+#define DUV(x)		(p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
 #define DSV(x)		(ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
 #define DSTR(str)	(ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
 #define DALIGNNOP(s)	while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
@@ -564,8 +556,8 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
     DB(DW_CFA_offset|DW_REG_15); DUV(4);
     DB(DW_CFA_offset|DW_REG_14); DUV(5);
     /* Extra registers saved for JIT-compiled code. */
-    DB(DW_CFA_offset|DW_REG_13); DUV(9);
-    DB(DW_CFA_offset|DW_REG_12); DUV(10);
+    DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9);
+    DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10);
 #elif LJ_TARGET_ARM
     {
       int i;
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_gdbjit.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gdbjit.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_gdbjit.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_gdbjit.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ir.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ir.c
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ir.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ir.c
index 439f3fc3456ea504d1de815cf466a6386f919825..9682e05e577116d59dd8548607c1bdb9f4330432 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ir.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ir.c
@@ -15,6 +15,7 @@
 #if LJ_HASJIT
 
 #include "lj_gc.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_ir.h"
@@ -29,6 +30,7 @@
 #endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 #include "lj_lib.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
@@ -251,7 +253,7 @@ TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
       goto found;
   ref = ir_nextk(J);
   ir = IR(ref);
-  lua_assert(checkptr32(tv));
+  lua_assert(checkptrGC(tv));
   setmref(ir->ptr, tv);
   ir->t.irt = t;
   ir->o = op;
@@ -305,6 +307,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
 {
   IRIns *ir, *cir = J->cur.ir;
   IRRef ref;
+  lua_assert(!LJ_GC64);  /* TODO_GC64: major changes required. */
   lua_assert(!isdead(J2G(J), o));
   for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
     if (ir_kgc(&cir[ref]) == o)
@@ -390,7 +393,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
   UNUSED(L);
   lua_assert(ir->o != IR_KSLOT);  /* Common mistake. */
   switch (ir->o) {
-  case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break;
+  case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
   case IR_KINT: setintV(tv, ir->i); break;
   case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
   case IR_KPTR: case IR_KKPTR: case IR_KNULL:
@@ -443,7 +446,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
   if (!tref_isstr(tr)) {
     if (!tref_isnumber(tr))
       lj_trace_err(J, LJ_TRERR_BADTYPE);
-    tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+    tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
+		tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
   }
   return tr;
 }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ir.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ir.h
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ir.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ir.h
index 8126482e84904e57d18068c4e21c12ba800f596d..56e19774c9773077d03bfd6b1a37956030101583 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ir.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ir.h
@@ -40,6 +40,7 @@
   _(USE,	S , ref, ___) \
   _(PHI,	S , ref, ref) \
   _(RENAME,	S , ref, lit) \
+  _(PROF,	S , ___, ___) \
   \
   /* Constants. */ \
   _(KPRI,	N , ___, ___) \
@@ -96,6 +97,7 @@
   _(UREFC,	LW, ref, lit) \
   _(FREF,	R , ref, lit) \
   _(STRREF,	N , ref, ref) \
+  _(LREF,	L , ___, ___) \
   \
   /* Loads and Stores. These must be in the same order. */ \
   _(ALOAD,	L , ref, ___) \
@@ -120,6 +122,11 @@
   _(CNEW,	AW, ref, ref) \
   _(CNEWI,	NW, ref, ref)  /* CSE is ok, not marked as A. */ \
   \
+  /* Buffer operations. */ \
+  _(BUFHDR,	L , ref, lit) \
+  _(BUFPUT,	L , ref, ref) \
+  _(BUFSTR,	A , ref, ref) \
+  \
   /* Barriers. */ \
   _(TBAR,	S , ref, ___) \
   _(OBAR,	S , ref, ref) \
@@ -128,11 +135,12 @@
   /* Type conversions. */ \
   _(CONV,	NW, ref, lit) \
   _(TOBIT,	N , ref, ref) \
-  _(TOSTR,	N , ref, ___) \
+  _(TOSTR,	N , ref, lit) \
   _(STRTO,	N , ref, ___) \
   \
   /* Calls. */ \
   _(CALLN,	N , ref, lit) \
+  _(CALLA,	A , ref, lit) \
   _(CALLL,	L , ref, lit) \
   _(CALLS,	S , ref, lit) \
   _(CALLXS,	S , ref, ref) \
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM)
   _(STR_LEN,	offsetof(GCstr, len)) \
   _(FUNC_ENV,	offsetof(GCfunc, l.env)) \
   _(FUNC_PC,	offsetof(GCfunc, l.pc)) \
+  _(FUNC_FFID,	offsetof(GCfunc, l.ffid)) \
+  _(THREAD_ENV,	offsetof(lua_State, env)) \
   _(TAB_META,	offsetof(GCtab, metatable)) \
   _(TAB_ARRAY,	offsetof(GCtab, array)) \
   _(TAB_NODE,	offsetof(GCtab, node)) \
@@ -221,13 +231,16 @@ IRFLDEF(FLENUM)
 #define IRXLOAD_VOLATILE	2	/* Load from volatile data. */
 #define IRXLOAD_UNALIGNED	4	/* Unaligned load. */
 
+/* BUFHDR mode, stored in op2. */
+#define IRBUFHDR_RESET		0	/* Reset buffer. */
+#define IRBUFHDR_APPEND		1	/* Append to buffer. */
+
 /* CONV mode, stored in op2. */
 #define IRCONV_SRCMASK		0x001f	/* Source IRType. */
 #define IRCONV_DSTMASK		0x03e0	/* Dest. IRType (also in ir->t). */
 #define IRCONV_DSH		5
 #define IRCONV_NUM_INT		((IRT_NUM<<IRCONV_DSH)|IRT_INT)
 #define IRCONV_INT_NUM		((IRT_INT<<IRCONV_DSH)|IRT_NUM)
-#define IRCONV_TRUNC		0x0400	/* Truncate number to integer. */
 #define IRCONV_SEXT		0x0800	/* Sign-extend integer to integer. */
 #define IRCONV_MODEMASK		0x0fff
 #define IRCONV_CONVMASK		0xf000
@@ -238,6 +251,11 @@ IRFLDEF(FLENUM)
 #define IRCONV_INDEX  (2<<IRCONV_CSH)	/* Check + special backprop rules. */
 #define IRCONV_CHECK  (3<<IRCONV_CSH)	/* Number checked for integerness. */
 
+/* TOSTR mode, stored in op2. */
+#define IRTOSTR_INT		0	/* Convert integer to string. */
+#define IRTOSTR_NUM		1	/* Convert number to string. */
+#define IRTOSTR_CHAR		2	/* Convert char value to string. */
+
 /* -- IR operands --------------------------------------------------------- */
 
 /* IR operand mode (2 bit). */
@@ -302,6 +320,7 @@ IRTDEF(IRTENUM)
   IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
   IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
   IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
+  /* TODO_GC64: major changes required for all uses of IRT_P32. */
 
   /* Additional flags. */
   IRT_MARK = 0x20,	/* Marker for misc. purposes. */
@@ -353,7 +372,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
 #define irt_isaddr(t)		(irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
 #define irt_isint64(t)		(irt_typerange((t), IRT_I64, IRT_U64))
 
-#if LJ_64
+#if LJ_GC64
+#define IRT_IS64 \
+  ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
+   (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
+   (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA))
+#elif LJ_64
 #define IRT_IS64 \
   ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
 #else
@@ -374,7 +398,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
     return IRT_INT;
   else if (tvisnum(tv))
     return IRT_NUM;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
   else if (tvislightud(tv))
     return IRT_LIGHTUD;
 #endif
@@ -464,6 +488,7 @@ typedef uint32_t TRef;
 #define tref_isnil(tr)		(tref_istype((tr), IRT_NIL))
 #define tref_isfalse(tr)	(tref_istype((tr), IRT_FALSE))
 #define tref_istrue(tr)		(tref_istype((tr), IRT_TRUE))
+#define tref_islightud(tr)	(tref_istype((tr), IRT_LIGHTUD))
 #define tref_isstr(tr)		(tref_istype((tr), IRT_STR))
 #define tref_isfunc(tr)		(tref_istype((tr), IRT_FUNC))
 #define tref_iscdata(tr)	(tref_istype((tr), IRT_CDATA))
@@ -528,6 +553,7 @@ typedef union IRIns {
   MRef ptr;		/* Pointer constant (overlaps op12). */
 } IRIns;
 
+/* TODO_GC64: major changes required. */
 #define ir_kgc(ir)	check_exp((ir)->o == IR_KGC, gcref((ir)->gcr))
 #define ir_kstr(ir)	(gco2str(ir_kgc((ir))))
 #define ir_ktab(ir)	(gco2tab(ir_kgc((ir))))
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ircall.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ircall.h
similarity index 62%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_ircall.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ircall.h
index 893dac2f81c131d6532ac1a7eed8e40c2b94b56b..84e41ecfccbd8cc090ac6519467131461d157db8 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_ircall.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_ircall.h
@@ -16,7 +16,7 @@ typedef struct CCallInfo {
   uint32_t flags;		/* Number of arguments and flags. */
 } CCallInfo;
 
-#define CCI_NARGS(ci)		((ci)->flags & 0xff)	/* Extract # of args. */
+#define CCI_NARGS(ci)		((ci)->flags & 0xff)	/* # of args. */
 #define CCI_NARGS_MAX		32			/* Max. # of args. */
 
 #define CCI_OTSHIFT		16
@@ -25,6 +25,7 @@ typedef struct CCallInfo {
 #define CCI_OP(ci)		((ci)->flags >> CCI_OPSHIFT)  /* Get op. */
 
 #define CCI_CALL_N		(IR_CALLN << CCI_OPSHIFT)
+#define CCI_CALL_A		(IR_CALLA << CCI_OPSHIFT)
 #define CCI_CALL_L		(IR_CALLL << CCI_OPSHIFT)
 #define CCI_CALL_S		(IR_CALLS << CCI_OPSHIFT)
 #define CCI_CALL_FN		(CCI_CALL_N|CCI_CC_FASTCALL)
@@ -45,6 +46,17 @@ typedef struct CCallInfo {
 #define CCI_CC_FASTCALL		0x2000	/* Fastcall calling convention. */
 #define CCI_CC_STDCALL		0x3000	/* Stdcall calling convention. */
 
+/* Extra args for SOFTFP, SPLIT 64 bit. */
+#define CCI_XARGS_SHIFT		14
+#define CCI_XARGS(ci)		(((ci)->flags >> CCI_XARGS_SHIFT) & 3)
+#define CCI_XA			(1u << CCI_XARGS_SHIFT)
+
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
+#define CCI_XNARGS(ci)		(CCI_NARGS((ci)) + CCI_XARGS((ci)))
+#else
+#define CCI_XNARGS(ci)		CCI_NARGS((ci))
+#endif
+
 /* Helpers for conditional function definitions. */
 #define IRCALLCOND_ANY(x)		x
 
@@ -93,26 +105,52 @@ typedef struct CCallInfo {
 #endif
 
 #if LJ_SOFTFP
-#define ARG1_FP		2	/* Treat as 2 32 bit arguments. */
+#define XA_FP		CCI_XA
+#define XA2_FP		(CCI_XA+CCI_XA)
 #else
-#define ARG1_FP		1
+#define XA_FP		0
+#define XA2_FP		0
 #endif
 
 #if LJ_32
-#define ARG2_64		4	/* Treat as 4 32 bit arguments. */
+#define XA_64		CCI_XA
+#define XA2_64		(CCI_XA+CCI_XA)
 #else
-#define ARG2_64		2
+#define XA_64		0
+#define XA2_64		0
 #endif
 
 /* Function definitions for CALL* instructions. */
 #define IRCALLDEF(_) \
   _(ANY,	lj_str_cmp,		2,  FN, INT, CCI_NOFPRCLOBBER) \
+  _(ANY,	lj_str_find,		4,   N, P32, 0) \
   _(ANY,	lj_str_new,		3,   S, STR, CCI_L) \
   _(ANY,	lj_strscan_num,		2,  FN, INT, 0) \
-  _(ANY,	lj_str_fromint,		2,  FN, STR, CCI_L) \
-  _(ANY,	lj_str_fromnum,		2,  FN, STR, CCI_L) \
+  _(ANY,	lj_strfmt_int,		2,  FN, STR, CCI_L) \
+  _(ANY,	lj_strfmt_num,		2,  FN, STR, CCI_L) \
+  _(ANY,	lj_strfmt_char,		2,  FN, STR, CCI_L) \
+  _(ANY,	lj_strfmt_putint,	2,  FL, P32, 0) \
+  _(ANY,	lj_strfmt_putnum,	2,  FL, P32, 0) \
+  _(ANY,	lj_strfmt_putquoted,	2,  FL, P32, 0) \
+  _(ANY,	lj_strfmt_putfxint,	3,   L, P32, XA_64) \
+  _(ANY,	lj_strfmt_putfnum_int,	3,   L, P32, XA_FP) \
+  _(ANY,	lj_strfmt_putfnum_uint,	3,   L, P32, XA_FP) \
+  _(ANY,	lj_strfmt_putfnum,	3,   L, P32, XA_FP) \
+  _(ANY,	lj_strfmt_putfstr,	3,   L, P32, 0) \
+  _(ANY,	lj_strfmt_putfchar,	3,   L, P32, 0) \
+  _(ANY,	lj_buf_putmem,		3,   S, P32, 0) \
+  _(ANY,	lj_buf_putstr,		2,  FL, P32, 0) \
+  _(ANY,	lj_buf_putchar,		2,  FL, P32, 0) \
+  _(ANY,	lj_buf_putstr_reverse,	2,  FL, P32, 0) \
+  _(ANY,	lj_buf_putstr_lower,	2,  FL, P32, 0) \
+  _(ANY,	lj_buf_putstr_upper,	2,  FL, P32, 0) \
+  _(ANY,	lj_buf_putstr_rep,	3,   L, P32, 0) \
+  _(ANY,	lj_buf_puttab,		5,   L, P32, 0) \
+  _(ANY,	lj_buf_tostr,		1,  FL, STR, 0) \
+  _(ANY,	lj_tab_new_ah,		3,   A, TAB, CCI_L) \
   _(ANY,	lj_tab_new1,		2,  FS, TAB, CCI_L) \
   _(ANY,	lj_tab_dup,		2,  FS, TAB, CCI_L) \
+  _(ANY,	lj_tab_clear,		1,  FS, NIL, 0) \
   _(ANY,	lj_tab_newkey,		3,   S, P32, CCI_L) \
   _(ANY,	lj_tab_len,		1,  FL, INT, 0) \
   _(ANY,	lj_gc_step_jit,		2,  FS, NIL, CCI_L) \
@@ -120,29 +158,29 @@ typedef struct CCallInfo {
   _(ANY,	lj_mem_newgco,		2,  FS, P32, CCI_L) \
   _(ANY,	lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_RANDFPR)\
   _(ANY,	lj_vm_modi,		2,  FN, INT, 0) \
-  _(ANY,	sinh,			ARG1_FP,  N, NUM, 0) \
-  _(ANY,	cosh,			ARG1_FP,  N, NUM, 0) \
-  _(ANY,	tanh,			ARG1_FP,  N, NUM, 0) \
-  _(ANY,	fputc,			2,  S, INT, 0) \
-  _(ANY,	fwrite,			4,  S, INT, 0) \
-  _(ANY,	fflush,			1,  S, INT, 0) \
+  _(ANY,	sinh,			1,   N, NUM, XA_FP) \
+  _(ANY,	cosh,			1,   N, NUM, XA_FP) \
+  _(ANY,	tanh,			1,   N, NUM, XA_FP) \
+  _(ANY,	fputc,			2,   S, INT, 0) \
+  _(ANY,	fwrite,			4,   S, INT, 0) \
+  _(ANY,	fflush,			1,   S, INT, 0) \
   /* ORDER FPM */ \
-  _(FPMATH,	lj_vm_floor,		ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	lj_vm_ceil,		ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	lj_vm_trunc,		ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	sqrt,			ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	exp,			ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	lj_vm_exp2,		ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	log,			ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	lj_vm_log2,		ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	log10,			ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	sin,			ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	cos,			ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	tan,			ARG1_FP,   N, NUM, 0) \
-  _(FPMATH,	lj_vm_powi,		ARG1_FP+1, N, NUM, 0) \
-  _(FPMATH,	pow,			ARG1_FP*2, N, NUM, 0) \
-  _(FPMATH,	atan2,			ARG1_FP*2, N, NUM, 0) \
-  _(FPMATH,	ldexp,			ARG1_FP+1, N, NUM, 0) \
+  _(FPMATH,	lj_vm_floor,		1,   N, NUM, XA_FP) \
+  _(FPMATH,	lj_vm_ceil,		1,   N, NUM, XA_FP) \
+  _(FPMATH,	lj_vm_trunc,		1,   N, NUM, XA_FP) \
+  _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
+  _(ANY,	exp,			1,   N, NUM, XA_FP) \
+  _(ANY,	lj_vm_exp2,		1,   N, NUM, XA_FP) \
+  _(ANY,	log,			1,   N, NUM, XA_FP) \
+  _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
+  _(ANY,	log10,			1,   N, NUM, XA_FP) \
+  _(ANY,	sin,			1,   N, NUM, XA_FP) \
+  _(ANY,	cos,			1,   N, NUM, XA_FP) \
+  _(ANY,	tan,			1,   N, NUM, XA_FP) \
+  _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \
+  _(ANY,	pow,			2,   N, NUM, XA2_FP) \
+  _(ANY,	atan2,			2,   N, NUM, XA2_FP) \
+  _(ANY,	ldexp,			2,   N, NUM, XA_FP) \
   _(SOFTFP,	lj_vm_tobit,		2,   N, INT, 0) \
   _(SOFTFP,	softfp_add,		4,   N, NUM, 0) \
   _(SOFTFP,	softfp_sub,		4,   N, NUM, 0) \
@@ -159,26 +197,32 @@ typedef struct CCallInfo {
   _(SOFTFP_FFI,	softfp_ui2f,		1,   N, FLOAT, 0) \
   _(SOFTFP_FFI,	softfp_f2i,		1,   N, INT, 0) \
   _(SOFTFP_FFI,	softfp_f2ui,		1,   N, INT, 0) \
-  _(FP64_FFI,	fp64_l2d,		2,   N, NUM, 0) \
-  _(FP64_FFI,	fp64_ul2d,		2,   N, NUM, 0) \
-  _(FP64_FFI,	fp64_l2f,		2,   N, FLOAT, 0) \
-  _(FP64_FFI,	fp64_ul2f,		2,   N, FLOAT, 0) \
-  _(FP64_FFI,	fp64_d2l,		ARG1_FP,   N, I64, 0) \
-  _(FP64_FFI,	fp64_d2ul,		ARG1_FP,   N, U64, 0) \
+  _(FP64_FFI,	fp64_l2d,		1,   N, NUM, XA_64) \
+  _(FP64_FFI,	fp64_ul2d,		1,   N, NUM, XA_64) \
+  _(FP64_FFI,	fp64_l2f,		1,   N, FLOAT, XA_64) \
+  _(FP64_FFI,	fp64_ul2f,		1,   N, FLOAT, XA_64) \
+  _(FP64_FFI,	fp64_d2l,		1,   N, I64, XA_FP) \
+  _(FP64_FFI,	fp64_d2ul,		1,   N, U64, XA_FP) \
   _(FP64_FFI,	fp64_f2l,		1,   N, I64, 0) \
   _(FP64_FFI,	fp64_f2ul,		1,   N, U64, 0) \
-  _(FFI,	lj_carith_divi64,	ARG2_64,   N, I64, CCI_NOFPRCLOBBER) \
-  _(FFI,	lj_carith_divu64,	ARG2_64,   N, U64, CCI_NOFPRCLOBBER) \
-  _(FFI,	lj_carith_modi64,	ARG2_64,   N, I64, CCI_NOFPRCLOBBER) \
-  _(FFI,	lj_carith_modu64,	ARG2_64,   N, U64, CCI_NOFPRCLOBBER) \
-  _(FFI,	lj_carith_powi64,	ARG2_64,   N, I64, CCI_NOFPRCLOBBER) \
-  _(FFI,	lj_carith_powu64,	ARG2_64,   N, U64, CCI_NOFPRCLOBBER) \
-  _(FFI,	lj_cdata_setfin,	2,        FN, P32, CCI_L) \
-  _(FFI,	strlen,			1,         L, INTP, 0) \
-  _(FFI,	memcpy,			3,         S, PTR, 0) \
-  _(FFI,	memset,			3,         S, PTR, 0) \
-  _(FFI,	lj_vm_errno,		0,         S, INT, CCI_NOFPRCLOBBER) \
-  _(FFI32,	lj_carith_mul64,	ARG2_64,   N, I64, CCI_NOFPRCLOBBER)
+  _(FFI,	lj_carith_divi64,	2,   N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+  _(FFI,	lj_carith_divu64,	2,   N, U64, XA2_64|CCI_NOFPRCLOBBER) \
+  _(FFI,	lj_carith_modi64,	2,   N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+  _(FFI,	lj_carith_modu64,	2,   N, U64, XA2_64|CCI_NOFPRCLOBBER) \
+  _(FFI,	lj_carith_powi64,	2,   N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+  _(FFI,	lj_carith_powu64,	2,   N, U64, XA2_64|CCI_NOFPRCLOBBER) \
+  _(FFI,	lj_cdata_newv,		4,   S, CDATA, CCI_L) \
+  _(FFI,	lj_cdata_setfin,	4,   S, NIL, CCI_L) \
+  _(FFI,	strlen,			1,   L, INTP, 0) \
+  _(FFI,	memcpy,			3,   S, PTR, 0) \
+  _(FFI,	memset,			3,   S, PTR, 0) \
+  _(FFI,	lj_vm_errno,		0,   S, INT, CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_mul64,	2,   N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_shl64,	2,   N, U64, XA_64|CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_shr64,	2,   N, U64, XA_64|CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_sar64,	2,   N, U64, XA_64|CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_rol64,	2,   N, U64, XA_64|CCI_NOFPRCLOBBER) \
+  _(FFI32,	lj_carith_ror64,	2,   N, U64, XA_64|CCI_NOFPRCLOBBER) \
   \
   /* End of list. */
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_iropt.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_iropt.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_iropt.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_iropt.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_jit.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_jit.h
similarity index 92%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_jit.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_jit.h
index eb7654770bc1e71e463661e8e79f88e44ace0b96..1df56cae55ec6e5f700c2a1d341351b417f6ddb4 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_jit.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_jit.h
@@ -14,18 +14,15 @@
 
 /* CPU-specific JIT engine flags. */
 #if LJ_TARGET_X86ORX64
-#define JIT_F_CMOV		0x00000010
-#define JIT_F_SSE2		0x00000020
-#define JIT_F_SSE3		0x00000040
-#define JIT_F_SSE4_1		0x00000080
-#define JIT_F_P4		0x00000100
-#define JIT_F_PREFER_IMUL	0x00000200
-#define JIT_F_SPLIT_XMM		0x00000400
-#define JIT_F_LEA_AGU		0x00000800
+#define JIT_F_SSE2		0x00000010
+#define JIT_F_SSE3		0x00000020
+#define JIT_F_SSE4_1		0x00000040
+#define JIT_F_PREFER_IMUL	0x00000080
+#define JIT_F_LEA_AGU		0x00000100
 
 /* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST		JIT_F_CMOV
-#define JIT_F_CPUSTRING		"\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM"
+#define JIT_F_CPU_FIRST		JIT_F_SSE2
+#define JIT_F_CPUSTRING		"\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
 #elif LJ_TARGET_ARM
 #define JIT_F_ARMV6_		0x00000010
 #define JIT_F_ARMV6T2_		0x00000020
@@ -100,6 +97,7 @@
   _(\012, maxirconst,	500)	/* Max. # of IR constants of a trace. */ \
   _(\007, maxside,	100)	/* Max. # of side traces of a root trace. */ \
   _(\007, maxsnap,	500)	/* Max. # of snapshots for a trace. */ \
+  _(\011, minstitch,	0)	/* Min. # of IR ins for a stitched trace. */ \
   \
   _(\007, hotloop,	56)	/* # of iter. to detect a hot loop/call. */ \
   _(\007, hotexit,	10)	/* # of taken exits to start a side trace. */ \
@@ -205,7 +203,8 @@ typedef enum {
   LJ_TRLINK_UPREC,		/* Up-recursion. */
   LJ_TRLINK_DOWNREC,		/* Down-recursion. */
   LJ_TRLINK_INTERP,		/* Fallback to interpreter. */
-  LJ_TRLINK_RETURN		/* Return to interpreter. */
+  LJ_TRLINK_RETURN,		/* Return to interpreter. */
+  LJ_TRLINK_STITCH		/* Trace stitching. */
 } TraceLink;
 
 /* Trace object. */
@@ -214,6 +213,9 @@ typedef struct GCtrace {
   uint8_t topslot;	/* Top stack slot already checked to be allocated. */
   uint8_t linktype;	/* Type of link. */
   IRRef nins;		/* Next IR instruction. Biased with REF_BIAS. */
+#if LJ_GC64
+  uint32_t unused_gc64;
+#endif
   GCRef gclist;
   IRIns *ir;		/* IR instructions/constants. Biased with REF_BIAS. */
   IRRef nk;		/* Lowest IR constant. Biased with REF_BIAS. */
@@ -288,6 +290,16 @@ typedef struct ScEvEntry {
   uint8_t dir;		/* Direction. 1: +, 0: -. */
 } ScEvEntry;
 
+/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
+typedef struct RBCHashEntry {
+  MRef pc;		/* Bytecode PC. */
+  GCRef pt;		/* Prototype. */
+  IRRef ref;		/* IR reference. */
+} RBCHashEntry;
+
+/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
+#define RBCHASH_SLOTS	8
+
 /* 128 bit SIMD constants. */
 enum {
   LJ_KSIMD_ABS,
@@ -362,8 +374,9 @@ typedef struct jit_State {
 
   PostProc postproc;	/* Required post-processing after execution. */
 #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-  int needsplit;	/* Need SPLIT pass. */
+  uint8_t needsplit;	/* Need SPLIT pass. */
 #endif
+  uint8_t retryrec;	/* Retry recording. */
 
   GCRef *trace;		/* Array of traces. */
   TraceNo freetrace;	/* Start of scan for next free trace. */
@@ -380,6 +393,10 @@ typedef struct jit_State {
   uint32_t penaltyslot;	/* Round-robin index into penalty slots. */
   uint32_t prngstate;	/* PRNG state. */
 
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+  RBCHashEntry rbchash[RBCHASH_SLOTS];  /* Reverse bytecode map. */
+#endif
+
   BPropEntry bpropcache[BPROP_SLOTS];  /* Backpropagation cache slots. */
   uint32_t bpropslot;	/* Round-robin index into bpropcache slots. */
 
@@ -400,6 +417,12 @@ typedef struct jit_State {
   size_t szallmcarea;	/* Total size of all allocated mcode areas. */
 
   TValue errinfo;	/* Additional info element for trace errors. */
+
+#if LJ_HASPROFILE
+  GCproto *prev_pt;	/* Previous prototype. */
+  BCLine prev_line;	/* Previous line. */
+  int prof_mode;	/* Profiling mode: 0, 'f', 'l'. */
+#endif
 }
 #if LJ_TARGET_ARM
 LJ_ALIGN(16)		/* For DISPATCH-relative addresses in assembler part. */
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lex.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lex.c
new file mode 100644
index 0000000000000000000000000000000000000000..8409cd78a7136224947e72e60c8f76e0011bcb0f
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lex.c
@@ -0,0 +1,482 @@
+/*
+** Lexical analyzer.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_lex_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#if LJ_HASFFI
+#include "lj_tab.h"
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lualib.h"
+#endif
+#include "lj_state.h"
+#include "lj_lex.h"
+#include "lj_parse.h"
+#include "lj_char.h"
+#include "lj_strscan.h"
+#include "lj_strfmt.h"
+
+/* Lua lexer token names. */
+static const char *const tokennames[] = {
+#define TKSTR1(name)		#name,
+#define TKSTR2(name, sym)	#sym,
+TKDEF(TKSTR1, TKSTR2)
+#undef TKSTR1
+#undef TKSTR2
+  NULL
+};
+
+/* -- Buffer handling ----------------------------------------------------- */
+
+#define LEX_EOF			(-1)
+#define lex_iseol(ls)		(ls->c == '\n' || ls->c == '\r')
+
+/* Get more input from reader. */
+static LJ_NOINLINE LexChar lex_more(LexState *ls)
+{
+  size_t sz;
+  const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
+  if (p == NULL || sz == 0) return LEX_EOF;
+  ls->pe = p + sz;
+  ls->p = p + 1;
+  return (LexChar)(uint8_t)p[0];
+}
+
+/* Get next character. */
+static LJ_AINLINE LexChar lex_next(LexState *ls)
+{
+  return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
+}
+
+/* Save character. */
+static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
+{
+  lj_buf_putb(&ls->sb, c);
+}
+
+/* Save previous character and get next character. */
+static LJ_AINLINE LexChar lex_savenext(LexState *ls)
+{
+  lex_save(ls, ls->c);
+  return lex_next(ls);
+}
+
+/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
+static void lex_newline(LexState *ls)
+{
+  LexChar old = ls->c;
+  lua_assert(lex_iseol(ls));
+  lex_next(ls);  /* Skip "\n" or "\r". */
+  if (lex_iseol(ls) && ls->c != old) lex_next(ls);  /* Skip "\n\r" or "\r\n". */
+  if (++ls->linenumber >= LJ_MAX_LINE)
+    lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
+}
+
+/* -- Scanner for terminals ----------------------------------------------- */
+
+/* Parse a number literal. */
+static void lex_number(LexState *ls, TValue *tv)
+{
+  StrScanFmt fmt;
+  LexChar c, xp = 'e';
+  lua_assert(lj_char_isdigit(ls->c));
+  if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
+    xp = 'p';
+  while (lj_char_isident(ls->c) || ls->c == '.' ||
+	 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
+    c = ls->c;
+    lex_savenext(ls);
+  }
+  lex_save(ls, '\0');
+  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
+	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
+	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
+  if (LJ_DUALNUM && fmt == STRSCAN_INT) {
+    setitype(tv, LJ_TISNUM);
+  } else if (fmt == STRSCAN_NUM) {
+    /* Already in correct format. */
+#if LJ_HASFFI
+  } else if (fmt != STRSCAN_ERROR) {
+    lua_State *L = ls->L;
+    GCcdata *cd;
+    lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG);
+    if (!ctype_ctsG(G(L))) {
+      ptrdiff_t oldtop = savestack(L, L->top);
+      luaopen_ffi(L);  /* Load FFI library on-demand. */
+      L->top = restorestack(L, oldtop);
+    }
+    if (fmt == STRSCAN_IMAG) {
+      cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
+      ((double *)cdataptr(cd))[0] = 0;
+      ((double *)cdataptr(cd))[1] = numV(tv);
+    } else {
+      cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8);
+      *(uint64_t *)cdataptr(cd) = tv->u64;
+    }
+    lj_parse_keepcdata(ls, tv, cd);
+#endif
+  } else {
+    lua_assert(fmt == STRSCAN_ERROR);
+    lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
+  }
+}
+
+/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
+static int lex_skipeq(LexState *ls)
+{
+  int count = 0;
+  LexChar s = ls->c;
+  lua_assert(s == '[' || s == ']');
+  while (lex_savenext(ls) == '=')
+    count++;
+  return (ls->c == s) ? count : (-count) - 1;
+}
+
+/* Parse a long string or long comment (tv set to NULL). */
+static void lex_longstring(LexState *ls, TValue *tv, int sep)
+{
+  lex_savenext(ls);  /* Skip second '['. */
+  if (lex_iseol(ls))  /* Skip initial newline. */
+    lex_newline(ls);
+  for (;;) {
+    switch (ls->c) {
+    case LEX_EOF:
+      lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
+      break;
+    case ']':
+      if (lex_skipeq(ls) == sep) {
+	lex_savenext(ls);  /* Skip second ']'. */
+	goto endloop;
+      }
+      break;
+    case '\n':
+    case '\r':
+      lex_save(ls, '\n');
+      lex_newline(ls);
+      if (!tv) lj_buf_reset(&ls->sb);  /* Don't waste space for comments. */
+      break;
+    default:
+      lex_savenext(ls);
+      break;
+    }
+  } endloop:
+  if (tv) {
+    GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
+				      sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
+    setstrV(ls->L, tv, str);
+  }
+}
+
+/* Parse a string. */
+static void lex_string(LexState *ls, TValue *tv)
+{
+  LexChar delim = ls->c;  /* Delimiter is '\'' or '"'. */
+  lex_savenext(ls);
+  while (ls->c != delim) {
+    switch (ls->c) {
+    case LEX_EOF:
+      lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
+      continue;
+    case '\n':
+    case '\r':
+      lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
+      continue;
+    case '\\': {
+      LexChar c = lex_next(ls);  /* Skip the '\\'. */
+      switch (c) {
+      case 'a': c = '\a'; break;
+      case 'b': c = '\b'; break;
+      case 'f': c = '\f'; break;
+      case 'n': c = '\n'; break;
+      case 'r': c = '\r'; break;
+      case 't': c = '\t'; break;
+      case 'v': c = '\v'; break;
+      case 'x':  /* Hexadecimal escape '\xXX'. */
+	c = (lex_next(ls) & 15u) << 4;
+	if (!lj_char_isdigit(ls->c)) {
+	  if (!lj_char_isxdigit(ls->c)) goto err_xesc;
+	  c += 9 << 4;
+	}
+	c += (lex_next(ls) & 15u);
+	if (!lj_char_isdigit(ls->c)) {
+	  if (!lj_char_isxdigit(ls->c)) goto err_xesc;
+	  c += 9;
+	}
+	break;
+      case 'z':  /* Skip whitespace. */
+	lex_next(ls);
+	while (lj_char_isspace(ls->c))
+	  if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
+	continue;
+      case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
+      case '\\': case '\"': case '\'': break;
+      case LEX_EOF: continue;
+      default:
+	if (!lj_char_isdigit(c))
+	  goto err_xesc;
+	c -= '0';  /* Decimal escape '\ddd'. */
+	if (lj_char_isdigit(lex_next(ls))) {
+	  c = c*10 + (ls->c - '0');
+	  if (lj_char_isdigit(lex_next(ls))) {
+	    c = c*10 + (ls->c - '0');
+	    if (c > 255) {
+	    err_xesc:
+	      lj_lex_error(ls, TK_string, LJ_ERR_XESC);
+	    }
+	    lex_next(ls);
+	  }
+	}
+	lex_save(ls, c);
+	continue;
+      }
+      lex_save(ls, c);
+      lex_next(ls);
+      continue;
+      }
+    default:
+      lex_savenext(ls);
+      break;
+    }
+  }
+  lex_savenext(ls);  /* Skip trailing delimiter. */
+  setstrV(ls->L, tv,
+	  lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
+}
+
+/* -- Main lexical scanner ------------------------------------------------ */
+
+/* Get next lexical token. */
+static LexToken lex_scan(LexState *ls, TValue *tv)
+{
+  lj_buf_reset(&ls->sb);
+  for (;;) {
+    if (lj_char_isident(ls->c)) {
+      GCstr *s;
+      if (lj_char_isdigit(ls->c)) {  /* Numeric literal. */
+	lex_number(ls, tv);
+	return TK_number;
+      }
+      /* Identifier or reserved word. */
+      do {
+	lex_savenext(ls);
+      } while (lj_char_isident(ls->c));
+      s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
+      setstrV(ls->L, tv, s);
+      if (s->reserved > 0)  /* Reserved word? */
+	return TK_OFS + s->reserved;
+      return TK_name;
+    }
+    switch (ls->c) {
+    case '\n':
+    case '\r':
+      lex_newline(ls);
+      continue;
+    case ' ':
+    case '\t':
+    case '\v':
+    case '\f':
+      lex_next(ls);
+      continue;
+    case '-':
+      lex_next(ls);
+      if (ls->c != '-') return '-';
+      lex_next(ls);
+      if (ls->c == '[') {  /* Long comment "--[=*[...]=*]". */
+	int sep = lex_skipeq(ls);
+	lj_buf_reset(&ls->sb);  /* `lex_skipeq' may dirty the buffer */
+	if (sep >= 0) {
+	  lex_longstring(ls, NULL, sep);
+	  lj_buf_reset(&ls->sb);
+	  continue;
+	}
+      }
+      /* Short comment "--.*\n". */
+      while (!lex_iseol(ls) && ls->c != LEX_EOF)
+	lex_next(ls);
+      continue;
+    case '[': {
+      int sep = lex_skipeq(ls);
+      if (sep >= 0) {
+	lex_longstring(ls, tv, sep);
+	return TK_string;
+      } else if (sep == -1) {
+	return '[';
+      } else {
+	lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
+	continue;
+      }
+      }
+    case '=':
+      lex_next(ls);
+      if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
+    case '<':
+      lex_next(ls);
+      if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
+    case '>':
+      lex_next(ls);
+      if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
+    case '~':
+      lex_next(ls);
+      if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
+    case ':':
+      lex_next(ls);
+      if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
+    case '"':
+    case '\'':
+      lex_string(ls, tv);
+      return TK_string;
+    case '.':
+      if (lex_savenext(ls) == '.') {
+	lex_next(ls);
+	if (ls->c == '.') {
+	  lex_next(ls);
+	  return TK_dots;   /* ... */
+	}
+	return TK_concat;   /* .. */
+      } else if (!lj_char_isdigit(ls->c)) {
+	return '.';
+      } else {
+	lex_number(ls, tv);
+	return TK_number;
+      }
+    case LEX_EOF:
+      return TK_eof;
+    default: {
+      LexChar c = ls->c;
+      lex_next(ls);
+      return c;  /* Single-char tokens (+ - / ...). */
+    }
+    }
+  }
+}
+
+/* -- Lexer API ----------------------------------------------------------- */
+
+/* Setup lexer state. */
+int lj_lex_setup(lua_State *L, LexState *ls)
+{
+  int header = 0;
+  ls->L = L;
+  ls->fs = NULL;
+  ls->pe = ls->p = NULL;
+  ls->vstack = NULL;
+  ls->sizevstack = 0;
+  ls->vtop = 0;
+  ls->bcstack = NULL;
+  ls->sizebcstack = 0;
+  ls->tok = 0;
+  ls->lookahead = TK_eof;  /* No look-ahead token. */
+  ls->linenumber = 1;
+  ls->lastline = 1;
+  lex_next(ls);  /* Read-ahead first char. */
+  if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
+      (uint8_t)ls->p[1] == 0xbf) {  /* Skip UTF-8 BOM (if buffered). */
+    ls->p += 2;
+    lex_next(ls);
+    header = 1;
+  }
+  if (ls->c == '#') {  /* Skip POSIX #! header line. */
+    do {
+      lex_next(ls);
+      if (ls->c == LEX_EOF) return 0;
+    } while (!lex_iseol(ls));
+    lex_newline(ls);
+    header = 1;
+  }
+  if (ls->c == LUA_SIGNATURE[0]) {  /* Bytecode dump. */
+    if (header) {
+      /*
+      ** Loading bytecode with an extra header is disabled for security
+      ** reasons. This may circumvent the usual check for bytecode vs.
+      ** Lua code by looking at the first char. Since this is a potential
+      ** security violation no attempt is made to echo the chunkname either.
+      */
+      setstrV(L, L->top++, lj_err_str(L, LJ_ERR_BCBAD));
+      lj_err_throw(L, LUA_ERRSYNTAX);
+    }
+    return 1;
+  }
+  return 0;
+}
+
+/* Cleanup lexer state. */
+void lj_lex_cleanup(lua_State *L, LexState *ls)
+{
+  global_State *g = G(L);
+  lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
+  lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
+  lj_buf_free(g, &ls->sb);
+}
+
+/* Return next lexical token. */
+void lj_lex_next(LexState *ls)
+{
+  ls->lastline = ls->linenumber;
+  if (LJ_LIKELY(ls->lookahead == TK_eof)) {  /* No lookahead token? */
+    ls->tok = lex_scan(ls, &ls->tokval);  /* Get next token. */
+  } else {  /* Otherwise return lookahead token. */
+    ls->tok = ls->lookahead;
+    ls->lookahead = TK_eof;
+    ls->tokval = ls->lookaheadval;
+  }
+}
+
+/* Look ahead for the next token. */
+LexToken lj_lex_lookahead(LexState *ls)
+{
+  lua_assert(ls->lookahead == TK_eof);
+  ls->lookahead = lex_scan(ls, &ls->lookaheadval);
+  return ls->lookahead;
+}
+
+/* Convert token to string. */
+const char *lj_lex_token2str(LexState *ls, LexToken tok)
+{
+  if (tok > TK_OFS)
+    return tokennames[tok-TK_OFS-1];
+  else if (!lj_char_iscntrl(tok))
+    return lj_strfmt_pushf(ls->L, "%c", tok);
+  else
+    return lj_strfmt_pushf(ls->L, "char(%d)", tok);
+}
+
+/* Lexer error. */
+void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
+{
+  const char *tokstr;
+  va_list argp;
+  if (tok == 0) {
+    tokstr = NULL;
+  } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
+    lex_save(ls, '\0');
+    tokstr = sbufB(&ls->sb);
+  } else {
+    tokstr = lj_lex_token2str(ls, tok);
+  }
+  va_start(argp, em);
+  lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
+  va_end(argp);
+}
+
+/* Initialize strings for reserved words. */
+void lj_lex_init(lua_State *L)
+{
+  uint32_t i;
+  for (i = 0; i < TK_RESERVED; i++) {
+    GCstr *s = lj_str_newz(L, tokennames[i]);
+    fixstring(s);  /* Reserved words are never collected. */
+    s->reserved = (uint8_t)(i+1);
+  }
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lex.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lex.h
similarity index 86%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_lex.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lex.h
index fe01768645622ccbc97840d3005ca4af36ed8b72..acd2285d789ccac165a87a251f9a2d36391d0a7e 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lex.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
   TK_RESERVED = TK_while - TK_OFS
 };
 
-typedef int LexToken;
+typedef int LexChar;	/* Lexical character. Unsigned ext. from char. */
+typedef int LexToken;	/* Lexical token. */
 
 /* Combined bytecode ins/line. Only used during bytecode generation. */
 typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
 typedef struct LexState {
   struct FuncState *fs;	/* Current FuncState. Defined in lj_parse.c. */
   struct lua_State *L;	/* Lua state. */
-  TValue tokenval;	/* Current token value. */
+  TValue tokval;	/* Current token value. */
   TValue lookaheadval;	/* Lookahead token value. */
-  int current;		/* Current character (charint). */
-  LexToken token;	/* Current token. */
-  LexToken lookahead;	/* Lookahead token. */
-  MSize n;		/* Bytes left in input buffer. */
   const char *p;	/* Current position in input buffer. */
+  const char *pe;	/* End of input buffer. */
+  LexChar c;		/* Current character. */
+  LexToken tok;		/* Current token. */
+  LexToken lookahead;	/* Lookahead token. */
   SBuf sb;		/* String buffer for tokens. */
   lua_Reader rfunc;	/* Reader callback. */
   void *rdata;		/* Reader callback data. */
@@ -78,8 +79,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
 LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
 LJ_FUNC void lj_lex_next(LexState *ls);
 LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
-LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token);
-LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...);
+LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
+LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
 LJ_FUNC void lj_lex_init(lua_State *L);
 
 #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lib.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lib.c
similarity index 77%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_lib.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lib.c
index 856685ee3679b1054fc6a71e8846d75a3375b5e6..b16d0564fa7e3353c900dfcc55e7fc2c1540c109 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lib.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lib.c
@@ -18,6 +18,9 @@
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
+#include "lj_lex.h"
+#include "lj_bcdump.h"
 #include "lj_lib.h"
 
 /* -- Library initialization ---------------------------------------------- */
@@ -43,6 +46,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
   return tabV(L->top-1);
 }
 
+static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
+{
+  int len = *p++;
+  GCstr *name = lj_str_new(L, (const char *)p, len);
+  LexState ls;
+  GCproto *pt;
+  GCfunc *fn;
+  memset(&ls, 0, sizeof(ls));
+  ls.L = L;
+  ls.p = (const char *)(p+len);
+  ls.pe = (const char *)~(uintptr_t)0;
+  ls.c = -1;
+  ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
+  ls.chunkname = name;
+  pt = lj_bcread_proto(&ls);
+  pt->firstline = ~(BCLine)0;
+  fn = lj_func_newL_empty(L, pt, tabref(L->env));
+  /* NOBARRIER: See below for common barrier. */
+  setfuncV(L, lj_tab_setstr(L, tab, name), fn);
+  return (const uint8_t *)ls.p;
+}
+
 void lj_lib_register(lua_State *L, const char *libname,
 		     const uint8_t *p, const lua_CFunction *cf)
 {
@@ -87,6 +112,9 @@ void lj_lib_register(lua_State *L, const char *libname,
       ofn = fn;
     } else {
       switch (tag | len) {
+      case LIBINIT_LUA:
+	p = lib_read_lfunc(L, p, tab);
+	break;
       case LIBINIT_SET:
 	L->top -= 2;
 	if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -120,6 +148,37 @@ void lj_lib_register(lua_State *L, const char *libname,
   }
 }
 
+/* Push internal function on the stack. */
+GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n)
+{
+  GCfunc *fn;
+  lua_pushcclosure(L, f, n);
+  fn = funcV(L->top-1);
+  fn->c.ffid = (uint8_t)id;
+  setmref(fn->c.pc, &G(L)->bc_cfunc_int);
+  return fn;
+}
+
+void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env)
+{
+  luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
+  lua_pushcfunction(L, f);
+  /* NOBARRIER: The function is new (marked white). */
+  setgcref(funcV(L->top-1)->c.env, obj2gco(env));
+  lua_setfield(L, -2, name);
+  L->top--;
+}
+
+int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name)
+{
+  GCfunc *fn = lj_lib_pushcf(L, cf, id);
+  GCtab *t = tabref(curr_func(L)->c.env);  /* Reference to parent table. */
+  setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn);
+  lj_gc_anybarriert(L, t);
+  setfuncV(L, L->top++, fn);
+  return 1;
+}
+
 /* -- Type checks --------------------------------------------------------- */
 
 TValue *lj_lib_checkany(lua_State *L, int narg)
@@ -137,7 +196,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
     if (LJ_LIKELY(tvisstr(o))) {
       return strV(o);
     } else if (tvisnumber(o)) {
-      GCstr *s = lj_str_fromnumber(L, o);
+      GCstr *s = lj_strfmt_number(L, o);
       setstrV(L, o, s);
       return s;
     }
@@ -196,20 +255,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
   return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
 }
 
-int32_t lj_lib_checkbit(lua_State *L, int narg)
-{
-  TValue *o = L->base + narg-1;
-  if (!(o < L->top && lj_strscan_numberobj(o)))
-    lj_err_argt(L, narg, LUA_TNUMBER);
-  if (LJ_LIKELY(tvisint(o))) {
-    return intV(o);
-  } else {
-    int32_t i = lj_num2bit(numV(o));
-    if (LJ_DUALNUM) setintV(o, i);
-    return i;
-  }
-}
-
 GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
 {
   TValue *o = L->base + narg-1;
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lib.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lib.h
similarity index 85%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_lib.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lib.h
index 9320f34fdac309d6deb7594fca27d88afe429591..3fa7aa17eff3848db3ce8aaf89fa7decde200abe 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_lib.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_lib.h
@@ -41,15 +41,22 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
 LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
 LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
 LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
-LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
 LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
 LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
 LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
 LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
 
 /* Avoid including lj_frame.h. */
+#if LJ_GC64
+#define lj_lib_upvalue(L, n) \
+  (&gcval(L->base-2)->fn.c.upvalue[(n)-1])
+#elif LJ_FR2
+#define lj_lib_upvalue(L, n) \
+  (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1])
+#else
 #define lj_lib_upvalue(L, n) \
   (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
+#endif
 
 #if LJ_TARGET_WINDOWS
 #define lj_lib_checkfpu(L) \
@@ -60,23 +67,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
 #define lj_lib_checkfpu(L)	UNUSED(L)
 #endif
 
-/* Push internal function on the stack. */
-static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
-				     int id, int n)
-{
-  GCfunc *fn;
-  lua_pushcclosure(L, f, n);
-  fn = funcV(L->top-1);
-  fn->c.ffid = (uint8_t)id;
-  setmref(fn->c.pc, &G(L)->bc_cfunc_int);
-}
-
+LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n);
 #define lj_lib_pushcf(L, fn, id)	(lj_lib_pushcc(L, (fn), (id), 0))
 
 /* Library function declarations. Scanned by buildvm. */
 #define LJLIB_CF(name)		static int lj_cf_##name(lua_State *L)
 #define LJLIB_ASM(name)		static int lj_ffh_##name(lua_State *L)
 #define LJLIB_ASM_(name)
+#define LJLIB_LUA(name)
 #define LJLIB_SET(name)
 #define LJLIB_PUSH(arg)
 #define LJLIB_REC(handler)
@@ -88,6 +86,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
 
 LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
 			     const uint8_t *init, const lua_CFunction *cf);
+LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f,
+			   GCtab *env);
+LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
+			   const char *name);
 
 /* Library init data tags. */
 #define LIBINIT_LENMASK	0x3f
@@ -96,7 +98,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
 #define LIBINIT_ASM	0x40
 #define LIBINIT_ASM_	0x80
 #define LIBINIT_STRING	0xc0
-#define LIBINIT_MAXSTR	0x39
+#define LIBINIT_MAXSTR	0x38
+#define LIBINIT_LUA	0xf9
 #define LIBINIT_SET	0xfa
 #define LIBINIT_NUMBER	0xfb
 #define LIBINIT_COPY	0xfc
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_load.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_load.c
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_load.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_load.c
index ff7b85117ff74ea1e0b6804ed041366f83a1002f..95a6ab0d4de7844cf5509692674ac3e7d7d8d4de 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_load.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_load.c
@@ -15,7 +15,7 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_func.h"
 #include "lj_frame.h"
 #include "lj_vm.h"
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
   ls.rdata = data;
   ls.chunkarg = chunkname ? chunkname : "?";
   ls.mode = mode;
-  lj_str_initbuf(&ls.sb);
+  lj_buf_init(L, &ls.sb);
   status = lj_vm_cpcall(L, NULL, &ls, cpparser);
   lj_lex_cleanup(L, &ls);
   lj_gc_check(L);
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_mcode.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_mcode.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_mcode.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_mcode.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_mcode.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_mcode.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_mcode.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_mcode.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_meta.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_meta.c
similarity index 83%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_meta.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_meta.c
index faaaf70264a595f6d8b98809628bd9ced503fae1..104ecf07c3d2076e1a98c959b5ddd3ff5acc681f 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_meta.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_meta.c
@@ -12,6 +12,7 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
@@ -19,6 +20,8 @@
 #include "lj_bc.h"
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
+#include "lj_lib.h"
 
 /* -- Metamethod handling ------------------------------------------------- */
 
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
   TValue *base = L->base;
   TValue *top = L->top;
   const BCIns *pc = frame_pc(base-1);  /* Preserve old PC from frame. */
-  copyTV(L, base-1, tv);  /* Replace frame with new object. */
-  top->u32.lo = LJ_CONT_TAILCALL;
-  setframe_pc(top, pc);
-  setframe_gc(top+1, obj2gco(L));  /* Dummy frame object. */
-  setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT);
-  L->base = L->top = top+2;
+  copyTV(L, base-1-LJ_FR2, tv);  /* Replace frame with new object. */
+  if (LJ_FR2)
+    (top++)->u64 = LJ_CONT_TAILCALL;
+  else
+    top->u32.lo = LJ_CONT_TAILCALL;
+  setframe_pc(top++, pc);
+  if (LJ_FR2) top++;
+  setframe_gc(top, obj2gco(L), LJ_TTHREAD);  /* Dummy frame object. */
+  setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
+  L->base = L->top = top+1;
   /*
   ** before:   [old_mo|PC]    [... ...]
   **                         ^base     ^top
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
   */
   TValue *top = L->top;
   if (curr_funcisL(L)) top = curr_topL(L);
-  setcont(top, cont);  /* Assembler VM stores PC in upper word. */
-  copyTV(L, top+1, mo);  /* Store metamethod and two arguments. */
-  copyTV(L, top+2, a);
-  copyTV(L, top+3, b);
-  return top+2;  /* Return new base. */
+  setcont(top++, cont);  /* Assembler VM stores PC in upper word or FR2. */
+  if (LJ_FR2) setnilV(top++);
+  copyTV(L, top++, mo);  /* Store metamethod and two arguments. */
+  if (LJ_FR2) setnilV(top++);
+  copyTV(L, top, a);
+  copyTV(L, top+1, b);
+  return top;  /* Return new base. */
 }
 
 /* -- C helpers for some instructions, called from assembler VM ----------- */
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
   }
 }
 
-/* In-place coercion of a number to a string. */
-static LJ_AINLINE int tostring(lua_State *L, TValue *o)
-{
-  if (tvisstr(o)) {
-    return 1;
-  } else if (tvisnumber(o)) {
-    setstrV(L, o, lj_str_fromnumber(L, o));
-    return 1;
-  } else {
-    return 0;
-  }
-}
-
 /* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
 TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
 {
   int fromc = 0;
   if (left < 0) { left = -left; fromc = 1; }
   do {
-    int n = 1;
-    if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) {
+    if (!(tvisstr(top) || tvisnumber(top)) ||
+	!(tvisstr(top-1) || tvisnumber(top-1))) {
       cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
       if (tvisnil(mo)) {
 	mo = lj_meta_lookup(L, top, MM_concat);
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
       ** after mm:  [...][CAT stack ...] <--push-- [result]
       ** next step: [...][CAT stack .............]
       */
-      copyTV(L, top+2, top);  /* Careful with the order of stack copies! */
-      copyTV(L, top+1, top-1);
-      copyTV(L, top, mo);
+      copyTV(L, top+2*LJ_FR2+2, top);  /* Carefully ordered stack copies! */
+      copyTV(L, top+2*LJ_FR2+1, top-1);
+      copyTV(L, top+LJ_FR2, mo);
       setcont(top-1, lj_cont_cat);
+      if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; }
       return top+1;  /* Trigger metamethod call. */
-    } else if (strV(top)->len == 0) {  /* Shortcut. */
-      (void)tostring(L, top-1);
     } else {
       /* Pick as many strings as possible from the top and concatenate them:
       **
@@ -281,27 +276,28 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
       ** concat:    [...][CAT stack ...] [result]
       ** next step: [...][CAT stack ............]
       */
-      MSize tlen = strV(top)->len;
-      char *buffer;
-      int i;
-      for (n = 1; n <= left && tostring(L, top-n); n++) {
-	MSize len = strV(top-n)->len;
-	if (len >= LJ_MAX_STR - tlen)
-	  lj_err_msg(L, LJ_ERR_STROV);
-	tlen += len;
-      }
-      buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen);
-      n--;
-      tlen = 0;
-      for (i = n; i >= 0; i--) {
-	MSize len = strV(top-i)->len;
-	memcpy(buffer + tlen, strVdata(top-i), len);
-	tlen += len;
+      TValue *e, *o = top;
+      uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
+      char *p, *buf;
+      do {
+	o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
+      } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
+      if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
+      p = buf = lj_buf_tmp(L, (MSize)tlen);
+      for (e = top, top = o; o <= e; o++) {
+	if (tvisstr(o)) {
+	  GCstr *s = strV(o);
+	  MSize len = s->len;
+	  p = lj_buf_wmem(p, strdata(s), len);
+	} else if (tvisint(o)) {
+	  p = lj_strfmt_wint(p, intV(o));
+	} else {
+	  lua_assert(tvisnum(o));
+	  p = lj_strfmt_wnum(p, o);
+	}
       }
-      setstrV(L, top-n, lj_str_new(L, buffer, tlen));
+      setstrV(L, top, lj_str_new(L, buf, (size_t)(p-buf)));
     }
-    left -= n;
-    top -= n;
   } while (left >= 1);
   if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
     if (!fromc) L->top = curr_topL(L);
@@ -338,12 +334,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
 	return (TValue *)(intptr_t)ne;
     }
     top = curr_top(L);
-    setcont(top, ne ? lj_cont_condf : lj_cont_condt);
-    copyTV(L, top+1, mo);
+    setcont(top++, ne ? lj_cont_condf : lj_cont_condt);
+    if (LJ_FR2) setnilV(top++);
+    copyTV(L, top++, mo);
+    if (LJ_FR2) setnilV(top++);
     it = ~(uint32_t)o1->gch.gct;
-    setgcV(L, top+2, o1, it);
-    setgcV(L, top+3, o2, it);
-    return top+2;  /* Trigger metamethod call. */
+    setgcV(L, top, o1, it);
+    setgcV(L, top+1, o2, it);
+    return top;  /* Trigger metamethod call. */
   }
   return (TValue *)(intptr_t)ne;
 }
@@ -366,7 +364,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
     o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
   } else {
     lua_assert(op == BC_ISEQP);
-    setitype(&tv, ~bc_d(ins));
+    setpriV(&tv, ~bc_d(ins));
     o2 = &tv;
   }
   mo = lj_meta_lookup(L, o1mm, MM_eq);
@@ -423,6 +421,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
   }
 }
 
+/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
+void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
+{
+  L->top = curr_topL(L);
+  ra++; tp--;
+  lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX);  /* ISTYPE -> ISNUM broken. */
+  if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
+  else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
+  else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
+  else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
+}
+
 /* Helper for calls. __call metamethod. */
 void lj_meta_call(lua_State *L, TValue *func, TValue *top)
 {
@@ -430,7 +440,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
   TValue *p;
   if (!tvisfunc(mo))
     lj_err_optype_call(L, func);
-  for (p = top; p > func; p--) copyTV(L, p, p-1);
+  for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1);
+  if (LJ_FR2) copyTV(L, func+2, func);
   copyTV(L, func, mo);
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_meta.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_meta.h
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_meta.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_meta.h
index 2c1ad0dd9b868ffa1911c50efef4d6687075d596..7f71633362b1e08682bc65dea1b117c43744c18e 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_meta.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
 LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
 LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
 LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
+LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
 LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
 LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_obj.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_obj.c
similarity index 69%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_obj.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_obj.c
index 7fab714e5c9e244d6ef8704219c783b25af45be0..b78d2c8d835d4ad9343ef3784b5559db4e100cea 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_obj.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = {  /* ORDER LJ_T */
 };
 
 /* Compare two objects without calling metamethods. */
-int lj_obj_equal(cTValue *o1, cTValue *o2)
+int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
 {
   if (itype(o1) == itype(o2)) {
     if (tvispri(o1))
@@ -33,3 +33,18 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
   return numberVnum(o1) == numberVnum(o2);
 }
 
+/* Return pointer to object or its object data. */
+const void * LJ_FASTCALL lj_obj_ptr(cTValue *o)
+{
+  if (tvisudata(o))
+    return uddata(udataV(o));
+  else if (tvislightud(o))
+    return lightudV(o);
+  else if (LJ_HASFFI && tviscdata(o))
+    return cdataptr(cdataV(o));
+  else if (tvisgcv(o))
+    return gcV(o);
+  else
+    return NULL;
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_obj.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_obj.h
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_obj.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_obj.h
index 6e8381cbadfd699d38808f05d04e316a9d3fe5f3..74ed59bc72658b18d30872712b41b30c3894f8f3 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_obj.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_obj.h
@@ -15,42 +15,75 @@
 
 /* -- Memory references (32 bit address space) ---------------------------- */
 
-/* Memory size. */
+/* Memory and GC object sizes. */
 typedef uint32_t MSize;
+#if LJ_GC64
+typedef uint64_t GCSize;
+#else
+typedef uint32_t GCSize;
+#endif
 
 /* Memory reference */
 typedef struct MRef {
+#if LJ_GC64
+  uint64_t ptr64;	/* True 64 bit pointer. */
+#else
   uint32_t ptr32;	/* Pseudo 32 bit pointer. */
+#endif
 } MRef;
 
+#if LJ_GC64
+#define mref(r, t)	((t *)(void *)(r).ptr64)
+
+#define setmref(r, p)	((r).ptr64 = (uint64_t)(void *)(p))
+#define setmrefr(r, v)	((r).ptr64 = (v).ptr64)
+#else
 #define mref(r, t)	((t *)(void *)(uintptr_t)(r).ptr32)
 
 #define setmref(r, p)	((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
 #define setmrefr(r, v)	((r).ptr32 = (v).ptr32)
+#endif
 
 /* -- GC object references (32 bit address space) ------------------------- */
 
 /* GCobj reference */
 typedef struct GCRef {
+#if LJ_GC64
+  uint64_t gcptr64;	/* True 64 bit pointer. */
+#else
   uint32_t gcptr32;	/* Pseudo 32 bit pointer. */
+#endif
 } GCRef;
 
 /* Common GC header for all collectable objects. */
 #define GCHeader	GCRef nextgc; uint8_t marked; uint8_t gct
 /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
 
+#if LJ_GC64
+#define gcref(r)	((GCobj *)(r).gcptr64)
+#define gcrefp(r, t)	((t *)(void *)(r).gcptr64)
+#define gcrefu(r)	((r).gcptr64)
+#define gcrefeq(r1, r2)	((r1).gcptr64 == (r2).gcptr64)
+
+#define setgcref(r, gc)	((r).gcptr64 = (uint64_t)&(gc)->gch)
+#define setgcreft(r, gc, it) \
+  (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47)
+#define setgcrefp(r, p)	((r).gcptr64 = (uint64_t)(p))
+#define setgcrefnull(r)	((r).gcptr64 = 0)
+#define setgcrefr(r, v)	((r).gcptr64 = (v).gcptr64)
+#else
 #define gcref(r)	((GCobj *)(uintptr_t)(r).gcptr32)
 #define gcrefp(r, t)	((t *)(void *)(uintptr_t)(r).gcptr32)
 #define gcrefu(r)	((r).gcptr32)
-#define gcrefi(r)	((int32_t)(r).gcptr32)
 #define gcrefeq(r1, r2)	((r1).gcptr32 == (r2).gcptr32)
-#define gcnext(gc)	(gcref((gc)->gch.nextgc))
 
 #define setgcref(r, gc)	((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
-#define setgcrefi(r, i)	((r).gcptr32 = (uint32_t)(i))
 #define setgcrefp(r, p)	((r).gcptr32 = (uint32_t)(uintptr_t)(p))
 #define setgcrefnull(r)	((r).gcptr32 = 0)
 #define setgcrefr(r, v)	((r).gcptr32 = (v).gcptr32)
+#endif
+
+#define gcnext(gc)	(gcref((gc)->gch.nextgc))
 
 /* IMPORTANT NOTE:
 **
@@ -119,11 +152,12 @@ typedef int32_t BCLine;  /* Bytecode line number. */
 /* Internal assembler functions. Never call these directly from C. */
 typedef void (*ASMFunction)(void);
 
-/* Resizable string buffer. Need this here, details in lj_str.h. */
+/* Resizable string buffer. Need this here, details in lj_buf.h. */
 typedef struct SBuf {
-  char *buf;		/* String buffer base. */
-  MSize n;		/* String buffer length. */
-  MSize sz;		/* String buffer size. */
+  MRef p;		/* String buffer pointer. */
+  MRef e;		/* String buffer end pointer. */
+  MRef b;		/* String buffer base. */
+  MRef L;		/* lua_State, used for buffer resizing. */
 } SBuf;
 
 /* -- Tags and values ----------------------------------------------------- */
@@ -131,13 +165,23 @@ typedef struct SBuf {
 /* Frame link. */
 typedef union {
   int32_t ftsz;		/* Frame type and size of previous frame. */
-  MRef pcr;		/* Overlaps PC for Lua frames. */
+  MRef pcr;		/* Or PC for Lua frames. */
 } FrameLink;
 
 /* Tagged value. */
 typedef LJ_ALIGN(8) union TValue {
   uint64_t u64;		/* 64 bit pattern overlaps number. */
   lua_Number n;		/* Number object overlaps split tag/value object. */
+#if LJ_GC64
+  GCRef gcr;		/* GCobj reference with tag. */
+  int64_t it64;
+  struct {
+    LJ_ENDIAN_LOHI(
+      int32_t i;	/* Integer value. */
+    , uint32_t it;	/* Internal object tag. Must overlap MSW of number. */
+    )
+  };
+#else
   struct {
     LJ_ENDIAN_LOHI(
       union {
@@ -147,12 +191,17 @@ typedef LJ_ALIGN(8) union TValue {
     , uint32_t it;	/* Internal object tag. Must overlap MSW of number. */
     )
   };
+#endif
+#if LJ_FR2
+  int64_t ftsz;		/* Frame type and size of previous frame, or PC. */
+#else
   struct {
     LJ_ENDIAN_LOHI(
       GCRef func;	/* Function for next frame (or dummy L). */
     , FrameLink tp;	/* Link to previous frame. */
     )
   } fr;
+#endif
   struct {
     LJ_ENDIAN_LOHI(
       uint32_t lo;	/* Lower 32 bits of number. */
@@ -172,6 +221,8 @@ typedef const TValue cTValue;
 
 /* Internal object tags.
 **
+** Format for 32 bit GC references (!LJ_GC64):
+**
 ** Internal tags overlap the MSW of a number object (must be a double).
 ** Interpreted as a double these are special NaNs. The FPU only generates
 ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
@@ -186,6 +237,18 @@ typedef const TValue cTValue;
 ** int (LJ_DUALNUM)|  itype  |   int   |
 ** number           -------double------
 **
+** Format for 64 bit GC references (LJ_GC64):
+**
+** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next
+** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer,
+** a zero-extended 32 bit integer or all bits set to 1 for primitive types.
+**
+**                     ------MSW------.------LSW------
+** primitive types    |1..1|itype|1..................1|
+** GC objects/lightud |1..1|itype|-------GCRef--------|
+** int (LJ_DUALNUM)   |1..1|itype|0..0|-----int-------|
+** number              ------------double-------------
+**
 ** ORDER LJ_T
 ** Primitive types nil/false/true must be first, lightuserdata next.
 ** GC objects are at the end, table/userdata must be lowest.
@@ -208,7 +271,7 @@ typedef const TValue cTValue;
 #define LJ_TNUMX		(~13u)
 
 /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define LJ_TISNUM		0xfffeffffu
 #else
 #define LJ_TISNUM		LJ_TNUMX
@@ -218,6 +281,10 @@ typedef const TValue cTValue;
 #define LJ_TISGCV		(LJ_TSTR+1)
 #define LJ_TISTABUD		LJ_TTAB
 
+#if LJ_GC64
+#define LJ_GCVMASK		(((uint64_t)1 << 47) - 1)
+#endif
+
 /* -- String object ------------------------------------------------------- */
 
 /* String object header. String payload follows. */
@@ -291,6 +358,9 @@ typedef struct GCproto {
   uint8_t numparams;	/* Number of parameters. */
   uint8_t framesize;	/* Fixed frame size. */
   MSize sizebc;		/* Number of bytecode instructions. */
+#if LJ_GC64
+  uint32_t unused_gc64;
+#endif
   GCRef gclist;
   MRef k;		/* Split constant array (points to the middle). */
   MRef uv;		/* Upvalue list. local slot|0x8000 or parent uv idx. */
@@ -402,7 +472,9 @@ typedef struct Node {
   TValue val;		/* Value object. Must be first field. */
   TValue key;		/* Key object. */
   MRef next;		/* Hash chain. */
+#if !LJ_GC64
   MRef freetop;		/* Top of free elements (stored in t->node[0]). */
+#endif
 } Node;
 
 LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
@@ -417,12 +489,22 @@ typedef struct GCtab {
   MRef node;		/* Hash part. */
   uint32_t asize;	/* Size of array part (keys [0, asize-1]). */
   uint32_t hmask;	/* Hash part mask (size of hash part - 1). */
+#if LJ_GC64
+  MRef freetop;		/* Top of free elements. */
+#endif
 } GCtab;
 
 #define sizetabcolo(n)	((n)*sizeof(TValue) + sizeof(GCtab))
 #define tabref(r)	(&gcref((r))->tab)
 #define noderef(r)	(mref((r), Node))
 #define nextnode(n)	(mref((n)->next, Node))
+#if LJ_GC64
+#define getfreetop(t, n)	(noderef((t)->freetop))
+#define setfreetop(t, n, v)	(setmref((t)->freetop, (v)))
+#else
+#define getfreetop(t, n)	(noderef((n)->freetop))
+#define setfreetop(t, n, v)	(setmref((n)->freetop, (v)))
+#endif
 
 /* -- State objects ------------------------------------------------------- */
 
@@ -489,8 +571,8 @@ typedef enum {
 #define mmname_str(g, mm)	(strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
 
 typedef struct GCState {
-  MSize total;		/* Memory currently allocated. */
-  MSize threshold;	/* Memory threshold. */
+  GCSize total;		/* Memory currently allocated. */
+  GCSize threshold;	/* Memory threshold. */
   uint8_t currentwhite;	/* Current white color. */
   uint8_t state;	/* GC state. */
   uint8_t nocdatafin;	/* No cdata finalizer called. */
@@ -502,9 +584,9 @@ typedef struct GCState {
   GCRef grayagain;	/* List of objects for atomic traversal. */
   GCRef weak;		/* List of weak tables (to be cleared). */
   GCRef mmudata;	/* List of userdata (to be finalized). */
+  GCSize debt;		/* Debt (how much GC is behind schedule). */
+  GCSize estimate;	/* Estimate of memory actually in use. */
   MSize stepmul;	/* Incremental GC step granularity. */
-  MSize debt;		/* Debt (how much GC is behind schedule). */
-  MSize estimate;	/* Estimate of memory actually in use. */
   MSize pause;		/* Pause between successive GC cycles. */
 } GCState;
 
@@ -516,8 +598,8 @@ typedef struct global_State {
   lua_Alloc allocf;	/* Memory allocator. */
   void *allocd;		/* Memory allocator data. */
   GCState gc;		/* Garbage collector. */
-  SBuf tmpbuf;		/* Temporary buffer for string concatenation. */
-  Node nilnode;		/* Fallback 1-element hash part (nil key and value). */
+  volatile int32_t vmstate;  /* VM state or current JIT code trace number. */
+  SBuf tmpbuf;		/* Temporary string buffer. */
   GCstr strempty;	/* Empty string. */
   uint8_t stremptyz;	/* Zero terminator of empty string. */
   uint8_t hookmask;	/* Hook mask. */
@@ -526,17 +608,17 @@ typedef struct global_State {
   GCRef mainthref;	/* Link to main thread. */
   TValue registrytv;	/* Anchor for registry. */
   TValue tmptv, tmptv2;	/* Temporary TValues. */
+  Node nilnode;		/* Fallback 1-element hash part (nil key and value). */
   GCupval uvhead;	/* Head of double-linked list of all open upvalues. */
   int32_t hookcount;	/* Instruction hook countdown. */
   int32_t hookcstart;	/* Start count for instruction hook counter. */
   lua_Hook hookf;	/* Hook function. */
   lua_CFunction wrapf;	/* Wrapper for C function calls. */
   lua_CFunction panic;	/* Called as a last resort for errors. */
-  volatile int32_t vmstate;  /* VM state or current JIT code trace number. */
   BCIns bc_cfunc_int;	/* Bytecode for internal C function calls. */
   BCIns bc_cfunc_ext;	/* Bytecode for external C function calls. */
-  GCRef jit_L;		/* Current JIT code lua_State or NULL. */
-  MRef jit_base;	/* Current JIT code L->base. */
+  GCRef cur_L;		/* Currently executing lua_State. */
+  MRef jit_base;	/* Current JIT code L->base or NULL. */
   MRef ctype_state;	/* Pointer to C type state. */
   GCRef gcroot[GCROOT_MAX];  /* GC roots. */
 } global_State;
@@ -553,6 +635,7 @@ typedef struct global_State {
 #define HOOK_ACTIVE_SHIFT	4
 #define HOOK_VMEVENT		0x20
 #define HOOK_GC			0x40
+#define HOOK_PROFILE		0x80
 #define hook_active(g)		((g)->hookmask & HOOK_ACTIVE)
 #define hook_enter(g)		((g)->hookmask |= HOOK_ACTIVE)
 #define hook_entergc(g)		((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
@@ -583,7 +666,13 @@ struct lua_State {
 #define registry(L)		(&G(L)->registrytv)
 
 /* Macros to access the currently executing (Lua) function. */
+#if LJ_GC64
+#define curr_func(L)		(&gcval(L->base-2)->fn)
+#elif LJ_FR2
+#define curr_func(L)		(&gcref((L->base-2)->gcr)->fn)
+#else
 #define curr_func(L)		(&gcref((L->base-1)->fr.func)->fn)
+#endif
 #define curr_funcisL(L)		(isluafunc(curr_func(L)))
 #define curr_proto(L)		(funcproto(curr_func(L)))
 #define curr_topL(L)		(L->base + curr_proto(L)->framesize)
@@ -647,12 +736,17 @@ typedef union GCobj {
 #endif
 
 /* Macros to test types. */
+#if LJ_GC64
+#define itype(o)	((uint32_t)((o)->it64 >> 47))
+#define tvisnil(o)	((o)->it64 == -1)
+#else
 #define itype(o)	((o)->it)
 #define tvisnil(o)	(itype(o) == LJ_TNIL)
+#endif
 #define tvisfalse(o)	(itype(o) == LJ_TFALSE)
 #define tvistrue(o)	(itype(o) == LJ_TTRUE)
 #define tvisbool(o)	(tvisfalse(o) || tvistrue(o))
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define tvislightud(o)	(((int32_t)itype(o) >> 15) == -2)
 #else
 #define tvislightud(o)	(itype(o) == LJ_TLIGHTUD)
@@ -686,7 +780,7 @@ typedef union GCobj {
 #define rawnumequal(o1, o2)	((o1)->u64 == (o2)->u64)
 
 /* Macros to convert type ids. */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
 #define itypemap(o) \
   (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
 #else
@@ -694,8 +788,12 @@ typedef union GCobj {
 #endif
 
 /* Macros to get tagged values. */
+#if LJ_GC64
+#define gcval(o)	((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK))
+#else
 #define gcval(o)	(gcref((o)->gcr))
-#define boolV(o)	check_exp(tvisbool(o), (LJ_TFALSE - (o)->it))
+#endif
+#define boolV(o)	check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
 #if LJ_64
 #define lightudV(o) \
   check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff)))
@@ -714,13 +812,23 @@ typedef union GCobj {
 #define intV(o)		check_exp(tvisint(o), (int32_t)(o)->i)
 
 /* Macros to set tagged values. */
+#if LJ_GC64
+#define setitype(o, i)		((o)->it = ((i) << 15))
+#define setnilV(o)		((o)->it64 = -1)
+#define setpriV(o, x)		((o)->it64 = (int64_t)~((uint64_t)~(x)<<47))
+#define setboolV(o, x)		((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47))
+#else
 #define setitype(o, i)		((o)->it = (i))
 #define setnilV(o)		((o)->it = LJ_TNIL)
 #define setboolV(o, x)		((o)->it = LJ_TFALSE-(uint32_t)(x))
+#define setpriV(o, i)		(setitype((o), (i)))
+#endif
 
 static LJ_AINLINE void setlightudV(TValue *o, void *p)
 {
-#if LJ_64
+#if LJ_GC64
+  o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
+#elif LJ_64
   o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48);
 #else
   setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD);
@@ -730,10 +838,16 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
 #if LJ_64
 #define checklightudptr(L, p) \
   (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
+#else
+#define checklightudptr(L, p)	(p)
+#endif
+
+#if LJ_FR2
+#define setcont(o, f)		((o)->u64 = (uint64_t)(uintptr_t)(void *)(f))
+#elif LJ_64
 #define setcont(o, f) \
   ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
 #else
-#define checklightudptr(L, p)	(p)
 #define setcont(o, f)		setlightudV((o), (void *)(f))
 #endif
 
@@ -741,9 +855,18 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
   UNUSED(L), lua_assert(!tvisgcv(o) || \
   ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o))))
 
-static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype)
+static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
 {
-  setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o);
+#if LJ_GC64
+  setgcreft(o->gcr, v, itype);
+#else
+  setgcref(o->gcr, v); setitype(o, itype);
+#endif
+}
+
+static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
+{
+  setgcVraw(o, v, it); tvchecklive(L, o);
 }
 
 #define define_setV(name, type, tag) \
@@ -810,11 +933,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
 #endif
 }
 
-#if LJ_TARGET_X86 && !defined(__SSE2__)
-#define lj_num2int(n)   lj_num2bit((n))
-#else
 #define lj_num2int(n)   ((int32_t)(n))
-#endif
 
 static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
 {
@@ -851,6 +970,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
 #define lj_typename(o)	(lj_obj_itypename[itypemap(o)])
 
 /* Compare two objects without calling metamethods. */
-LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2);
+LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
+LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o);
 
 #endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_dce.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_dce.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_dce.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_dce.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_fold.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_fold.c
similarity index 88%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_fold.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_fold.c
index d00fdd56f5a8f445147eabc926836d03c9bc2a1e..f809a991a73729956e5cbaa6a29ac14b103225ea 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_fold.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
 
 #if LJ_HASJIT
 
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_ir.h"
 #include "lj_jit.h"
+#include "lj_ircall.h"
 #include "lj_iropt.h"
 #include "lj_trace.h"
 #if LJ_HASFFI
 #include "lj_ctype.h"
-#endif
 #include "lj_carith.h"
+#endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_strfmt.h"
 
 /* Here's a short description how the FOLD engine processes instructions:
 **
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
 
 /* Barrier to prevent folding across a GC step.
 ** GC steps can only happen at the head of a trace and at LOOP.
-** And the GC is only driven forward if there is at least one allocation.
+** And the GC is only driven forward if there's at least one allocation.
 */
 #define gcstep_barrier(J, ref) \
   ((ref) < J->chain[IR_LOOP] && \
    (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
     J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
-    J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
+    J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
+    J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
 
 /* -- Constant folding for FP numbers ------------------------------------- */
 
@@ -336,11 +340,9 @@ LJFOLDF(kfold_intcomp0)
 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
 {
   switch (op) {
-#if LJ_64 || LJ_HASFFI
+#if LJ_HASFFI
   case IR_ADD: k1 += k2; break;
   case IR_SUB: k1 -= k2; break;
-#endif
-#if LJ_HASFFI
   case IR_MUL: k1 *= k2; break;
   case IR_BAND: k1 &= k2; break;
   case IR_BOR: k1 |= k2; break;
@@ -392,20 +394,10 @@ LJFOLD(BROL KINT64 KINT)
 LJFOLD(BROR KINT64 KINT)
 LJFOLDF(kfold_int64shift)
 {
-#if LJ_HASFFI || LJ_64
+#if LJ_HASFFI
   uint64_t k = ir_k64(fleft)->u64;
   int32_t sh = (fright->i & 63);
-  switch ((IROp)fins->o) {
-  case IR_BSHL: k <<= sh; break;
-#if LJ_HASFFI
-  case IR_BSHR: k >>= sh; break;
-  case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
-  case IR_BROL: k = lj_rol(k, sh); break;
-  case IR_BROR: k = lj_ror(k, sh); break;
-#endif
-  default: lua_assert(0); break;
-  }
-  return INT64FOLD(k);
+  return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
 #else
   UNUSED(J); lua_assert(0); return FAILFOLD;
 #endif
@@ -528,6 +520,180 @@ LJFOLDF(kfold_strcmp)
   return NEXTFOLD;
 }
 
+/* -- Constant folding and forwarding for buffers ------------------------- */
+
+/*
+** Buffer ops perform stores, but their effect is limited to the buffer
+** itself. Also, buffer ops are chained: a use of an op implies a use of
+** all other ops up the chain. Conversely, if an op is unused, all ops
+** up the chain can go unsed. This largely eliminates the need to treat
+** them as stores.
+**
+** Alas, treating them as normal (IRM_N) ops doesn't work, because they
+** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
+** or if FOLD is disabled.
+**
+** The compromise is to declare them as loads, emit them like stores and
+** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
+** fragments left over from CSE are eliminated by DCE.
+*/
+
+/* BUFHDR is emitted like a store, see below. */
+
+LJFOLD(BUFPUT BUFHDR BUFSTR)
+LJFOLDF(bufput_append)
+{
+  /* New buffer, no other buffer op inbetween and same buffer? */
+  if ((J->flags & JIT_F_OPT_FWD) &&
+      !(fleft->op2 & IRBUFHDR_APPEND) &&
+      fleft->prev == fright->op2 &&
+      fleft->op1 == IR(fright->op2)->op1) {
+    IRRef ref = fins->op1;
+    IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND);  /* Modify BUFHDR. */
+    IR(ref)->op1 = fright->op1;
+    return ref;
+  }
+  return EMITFOLD;  /* Always emit, CSE later. */
+}
+
+LJFOLD(BUFPUT any any)
+LJFOLDF(bufput_kgc)
+{
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
+    GCstr *s2 = ir_kstr(fright);
+    if (s2->len == 0) {  /* Empty string? */
+      return LEFTFOLD;
+    } else {
+      if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
+	  !irt_isphi(fleft->t)) {  /* Join two constant string puts in a row. */
+	GCstr *s1 = ir_kstr(IR(fleft->op2));
+	IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
+	/* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
+	IR(fins->op1)->op2 = kref;  /* Modify previous BUFPUT. */
+	return fins->op1;
+      }
+    }
+  }
+  return EMITFOLD;  /* Always emit, CSE later. */
+}
+
+LJFOLD(BUFSTR any any)
+LJFOLDF(bufstr_kfold_cse)
+{
+  lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
+	     fleft->o == IR_CALLL);
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
+    if (fleft->o == IR_BUFHDR) {  /* No put operations? */
+      if (!(fleft->op2 & IRBUFHDR_APPEND))  /* Empty buffer? */
+	return lj_ir_kstr(J, &J2G(J)->strempty);
+      fins->op1 = fleft->op1;
+      fins->op2 = fleft->prev;  /* Relies on checks in bufput_append. */
+      return CSEFOLD;
+    } else if (fleft->o == IR_BUFPUT) {
+      IRIns *irb = IR(fleft->op1);
+      if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
+	return fleft->op2;  /* Shortcut for a single put operation. */
+    }
+  }
+  /* Try to CSE the whole chain. */
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+    IRRef ref = J->chain[IR_BUFSTR];
+    while (ref) {
+      IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
+      while (ira->o == irb->o && ira->op2 == irb->op2) {
+	lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
+		   ira->o == IR_CALLL || ira->o == IR_CARG);
+	if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
+	  return ref;  /* CSE succeeded. */
+	if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
+	  break;
+	ira = IR(ira->op1);
+	irb = IR(irb->op1);
+      }
+      ref = irs->prev;
+    }
+  }
+  return EMITFOLD;  /* No CSE possible. */
+}
+
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
+LJFOLDF(bufput_kfold_op)
+{
+  if (irref_isk(fleft->op2)) {
+    const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
+    SBuf *sb = lj_buf_tmp_(J->L);
+    sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
+						       ir_kstr(IR(fleft->op2)));
+    fins->o = IR_BUFPUT;
+    fins->op1 = fleft->op1;
+    fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
+    return RETRYFOLD;
+  }
+  return EMITFOLD;  /* Always emit, CSE later. */
+}
+
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
+LJFOLDF(bufput_kfold_rep)
+{
+  if (irref_isk(fleft->op2)) {
+    IRIns *irc = IR(fleft->op1);
+    if (irref_isk(irc->op2)) {
+      SBuf *sb = lj_buf_tmp_(J->L);
+      sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
+      fins->o = IR_BUFPUT;
+      fins->op1 = irc->op1;
+      fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
+      return RETRYFOLD;
+    }
+  }
+  return EMITFOLD;  /* Always emit, CSE later. */
+}
+
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
+LJFOLDF(bufput_kfold_fmt)
+{
+  IRIns *irc = IR(fleft->op1);
+  lua_assert(irref_isk(irc->op2));  /* SFormat must be const. */
+  if (irref_isk(fleft->op2)) {
+    SFormat sf = (SFormat)IR(irc->op2)->i;
+    IRIns *ira = IR(fleft->op2);
+    SBuf *sb = lj_buf_tmp_(J->L);
+    switch (fins->op2) {
+    case IRCALL_lj_strfmt_putfxint:
+      sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
+      break;
+    case IRCALL_lj_strfmt_putfstr:
+      sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
+      break;
+    case IRCALL_lj_strfmt_putfchar:
+      sb = lj_strfmt_putfchar(sb, sf, ira->i);
+      break;
+    case IRCALL_lj_strfmt_putfnum_int:
+    case IRCALL_lj_strfmt_putfnum_uint:
+    case IRCALL_lj_strfmt_putfnum:
+    default: {
+      const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
+      sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
+							 ir_knum(ira)->n);
+      break;
+      }
+    }
+    fins->o = IR_BUFPUT;
+    fins->op1 = irc->op1;
+    fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
+    return RETRYFOLD;
+  }
+  return EMITFOLD;  /* Always emit, CSE later. */
+}
+
 /* -- Constant folding of pointer arithmetic ------------------------------ */
 
 LJFOLD(ADD KGC KINT)
@@ -648,27 +814,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
 LJFOLDF(kfold_conv_knum_int_num)
 {
   lua_Number n = knumleft;
-  if (!(fins->op2 & IRCONV_TRUNC)) {
-    int32_t k = lj_num2int(n);
-    if (irt_isguard(fins->t) && n != (lua_Number)k) {
-      /* We're about to create a guard which always fails, like CONV +1.5.
-      ** Some pathological loops cause this during LICM, e.g.:
-      **   local x,k,t = 0,1.5,{1,[1.5]=2}
-      **   for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
-      **   assert(x == 300)
-      */
-      return FAILFOLD;
-    }
-    return INTFOLD(k);
-  } else {
-    return INTFOLD((int32_t)n);
+  int32_t k = lj_num2int(n);
+  if (irt_isguard(fins->t) && n != (lua_Number)k) {
+    /* We're about to create a guard which always fails, like CONV +1.5.
+    ** Some pathological loops cause this during LICM, e.g.:
+    **   local x,k,t = 0,1.5,{1,[1.5]=2}
+    **   for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
+    **   assert(x == 300)
+    */
+    return FAILFOLD;
   }
+  return INTFOLD(k);
 }
 
 LJFOLD(CONV KNUM IRCONV_U32_NUM)
 LJFOLDF(kfold_conv_knum_u32_num)
 {
-  lua_assert((fins->op2 & IRCONV_TRUNC));
 #ifdef _MSC_VER
   {  /* Workaround for MSVC bug. */
     volatile uint32_t u = (uint32_t)knumleft;
@@ -682,27 +843,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
 LJFOLD(CONV KNUM IRCONV_I64_NUM)
 LJFOLDF(kfold_conv_knum_i64_num)
 {
-  lua_assert((fins->op2 & IRCONV_TRUNC));
   return INT64FOLD((uint64_t)(int64_t)knumleft);
 }
 
 LJFOLD(CONV KNUM IRCONV_U64_NUM)
 LJFOLDF(kfold_conv_knum_u64_num)
 {
-  lua_assert((fins->op2 & IRCONV_TRUNC));
   return INT64FOLD(lj_num2u64(knumleft));
 }
 
-LJFOLD(TOSTR KNUM)
+LJFOLD(TOSTR KNUM any)
 LJFOLDF(kfold_tostr_knum)
 {
-  return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
+  return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
 }
 
-LJFOLD(TOSTR KINT)
+LJFOLD(TOSTR KINT any)
 LJFOLDF(kfold_tostr_kint)
 {
-  return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
+  return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
+		       lj_strfmt_int(J->L, fleft->i) :
+		       lj_strfmt_char(J->L, fleft->i));
 }
 
 LJFOLD(STRTO KGC)
@@ -1205,7 +1366,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
   ** But this is mainly intended for simple address arithmetic.
   ** Also it's easier for the backend to optimize the original multiplies.
   */
-  if (k == 1) {  /* i * 1 ==> i */
+  if (k == 0) {  /* i * 0 ==> 0 */
+    return RIGHTFOLD;
+  } else if (k == 1) {  /* i * 1 ==> i */
     return LEFTFOLD;
   } else if ((k & (k-1)) == 0) {  /* i * 2^k ==> i << k */
     fins->o = IR_BSHL;
@@ -1218,9 +1381,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
 LJFOLD(MUL any KINT)
 LJFOLDF(simplify_intmul_k32)
 {
-  if (fright->i == 0)  /* i * 0 ==> 0 */
-    return INTFOLD(0);
-  else if (fright->i > 0)
+  if (fright->i >= 0)
     return simplify_intmul_k(J, fright->i);
   return NEXTFOLD;
 }
@@ -1228,14 +1389,13 @@ LJFOLDF(simplify_intmul_k32)
 LJFOLD(MUL any KINT64)
 LJFOLDF(simplify_intmul_k64)
 {
-  if (ir_kint64(fright)->u64 == 0)  /* i * 0 ==> 0 */
-    return INT64FOLD(0);
-#if LJ_64
-  /* NYI: SPLIT for BSHL and 32 bit backend support. */
-  else if (ir_kint64(fright)->u64 < 0x80000000u)
+#if LJ_HASFFI
+  if (ir_kint64(fright)->u64 < 0x80000000u)
     return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
-#endif
   return NEXTFOLD;
+#else
+  UNUSED(J); lua_assert(0); return FAILFOLD;
+#endif
 }
 
 LJFOLD(MOD any KINT)
@@ -1535,7 +1695,7 @@ LJFOLD(BOR BOR KINT64)
 LJFOLD(BXOR BXOR KINT64)
 LJFOLDF(reassoc_intarith_k64)
 {
-#if LJ_HASFFI || LJ_64
+#if LJ_HASFFI
   IRIns *irk = IR(fleft->op2);
   if (irk->o == IR_KINT64) {
     uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
@@ -1953,6 +2113,7 @@ LJFOLDF(fwd_href_tdup)
 ** an aliased table, as it may invalidate all of the pointers and fields.
 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
 ** FLOADs. And NEWREF itself is treated like a store (see below).
+** LREF is constant (per trace) since coroutine switches are not inlined.
 */
 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
 LJFOLDF(fload_tab_tnew_asize)
@@ -2016,6 +2177,14 @@ LJFOLDF(fload_str_len_snew)
   return NEXTFOLD;
 }
 
+LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
+LJFOLDF(fload_str_len_tostr)
+{
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
+    return INTFOLD(1);
+  return NEXTFOLD;
+}
+
 /* The C type ID of cdata objects is immutable. */
 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
 LJFOLDF(fload_cdata_typeid_kgc)
@@ -2062,6 +2231,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
 }
 
 LJFOLD(FLOAD any IRFL_STR_LEN)
+LJFOLD(FLOAD any IRFL_FUNC_ENV)
+LJFOLD(FLOAD any IRFL_THREAD_ENV)
 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
 LJFOLD(FLOAD any IRFL_CDATA_PTR)
 LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2127,6 +2298,17 @@ LJFOLDF(barrier_tnew_tdup)
   return DROPFOLD;
 }
 
+/* -- Profiling ----------------------------------------------------------- */
+
+LJFOLD(PROF any any)
+LJFOLDF(prof)
+{
+  IRRef ref = J->chain[IR_PROF];
+  if (ref+1 == J->cur.nins)  /* Drop neighbouring IR_PROF. */
+    return ref;
+  return EMITFOLD;
+}
+
 /* -- Stores and allocations ---------------------------------------------- */
 
 /* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2149,8 +2331,9 @@ LJFOLD(XSTORE any any)
 LJFOLDX(lj_opt_dse_xstore)
 
 LJFOLD(NEWREF any any)  /* Treated like a store. */
-LJFOLD(CALLS any any)
+LJFOLD(CALLA any any)
 LJFOLD(CALLL any any)  /* Safeguard fallback. */
+LJFOLD(CALLS any any)
 LJFOLD(CALLXS any any)
 LJFOLD(XBAR)
 LJFOLD(RETF any any)  /* Modifies BASE. */
@@ -2158,6 +2341,7 @@ LJFOLD(TNEW any any)
 LJFOLD(TDUP any)
 LJFOLD(CNEW any any)
 LJFOLD(XSNEW any any)
+LJFOLD(BUFHDR any any)
 LJFOLDX(lj_ir_emit)
 
 /* ------------------------------------------------------------------------ */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_loop.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_loop.c
similarity index 96%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_loop.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_loop.c
index b7d1923edb7351bc613c327223e28caf3feaceeb..4b4ab7dc3a3c58d786ad80eff82c0e6faf4ce828 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_loop.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
 #if LJ_HASJIT
 
 #include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_iropt.h"
@@ -254,9 +254,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
   J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap);
 }
 
+typedef struct LoopState {
+  jit_State *J;
+  IRRef1 *subst;
+  MSize sizesubst;
+} LoopState;
+
 /* Unroll loop. */
-static void loop_unroll(jit_State *J)
+static void loop_unroll(LoopState *lps)
 {
+  jit_State *J = lps->J;
   IRRef1 phi[LJ_MAX_PHI];
   uint32_t nphi = 0;
   IRRef1 *subst;
@@ -265,13 +272,13 @@ static void loop_unroll(jit_State *J)
   SnapEntry *loopmap, *psentinel;
   IRRef ins, invar;
 
-  /* Use temp buffer for substitution table.
+  /* Allocate substitution table.
   ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
-  ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
   */
   invar = J->cur.nins;
-  subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
-				   (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS;
+  lps->sizesubst = invar - REF_BIAS;
+  lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1);
+  subst = lps->subst - REF_BIAS;
   subst[REF_BASE] = REF_BASE;
 
   /* LOOP separates the pre-roll from the loop body. */
@@ -396,7 +403,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap)
 static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
 {
   UNUSED(L); UNUSED(dummy);
-  loop_unroll((jit_State *)ud);
+  loop_unroll((LoopState *)ud);
   return NULL;
 }
 
@@ -406,7 +413,13 @@ int lj_opt_loop(jit_State *J)
   IRRef nins = J->cur.nins;
   SnapNo nsnap = J->cur.nsnap;
   MSize nsnapmap = J->cur.nsnapmap;
-  int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
+  LoopState lps;
+  int errcode;
+  lps.J = J;
+  lps.subst = NULL;
+  lps.sizesubst = 0;
+  errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt);
+  lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1);
   if (LJ_UNLIKELY(errcode)) {
     lua_State *L = J->L;
     if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) {  /* Trace error? */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_mem.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_mem.c
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_mem.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_mem.c
index a4d96fc0f9cb2fc6f4c6e9d996019550370fcc83..e04a6228f4bbeab039ed76a89353bfbea266cf6a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_mem.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_mem.c
@@ -17,6 +17,7 @@
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_iropt.h"
+#include "lj_ircall.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)		(&J->cur.ir[(ref)])
@@ -308,7 +309,21 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
   return 1;  /* No conflict. Can fold to niltv. */
 }
 
-/* Check whether there's no aliasing NEWREF for the left operand. */
+/* Check whether there's no aliasing table.clear. */
+static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
+{
+  IRRef ref = J->chain[IR_CALLS];
+  while (ref > lim) {
+    IRIns *calls = IR(ref);
+    if (calls->op2 == IRCALL_lj_tab_clear &&
+	(ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
+      return 0;  /* Conflict. */
+    ref = calls->prev;
+  }
+  return 1;  /* No conflict. Can safely FOLD/CSE. */
+}
+
+/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
 int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
 {
   IRRef ta = fins->op1;
@@ -319,7 +334,7 @@ int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
       return 0;  /* Conflict. */
     ref = newref->prev;
   }
-  return 1;  /* No conflict. Can safely FOLD/CSE. */
+  return fwd_aa_tab_clear(J, lim, ta);
 }
 
 /* ASTORE/HSTORE elimination. */
@@ -854,6 +869,10 @@ TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
     ref = store->prev;
   }
 
+  /* Search for aliasing table.clear. */
+  if (!fwd_aa_tab_clear(J, lim, tab))
+    return lj_ir_emit(J);
+
   /* Try to find a matching load. Below the conflicting store, if any. */
   return lj_opt_cselim(J, lim);
 }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_narrow.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_narrow.c
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_narrow.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_narrow.c
index 58b3763dba5af3d7657e046b0cd73a6a44ae3514..d1993452a6afff14da2b9b57184507528381eaa1 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_narrow.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_narrow.c
@@ -205,7 +205,6 @@ typedef struct NarrowConv {
   jit_State *J;		/* JIT compiler state. */
   NarrowIns *sp;	/* Current stack pointer. */
   NarrowIns *maxsp;	/* Maximum stack pointer minus redzone. */
-  int lim;		/* Limit on the number of emitted conversions. */
   IRRef mode;		/* Conversion mode (IRCONV_*). */
   IRType t;		/* Destination type: IRT_INT or IRT_I64. */
   NarrowIns stack[NARROW_MAX_STACK];  /* Stack holding stack-machine code. */
@@ -342,7 +341,7 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
       NarrowIns *savesp = nc->sp;
       int count = narrow_conv_backprop(nc, ir->op1, depth);
       count += narrow_conv_backprop(nc, ir->op2, depth);
-      if (count <= nc->lim) {  /* Limit total number of conversions. */
+      if (count <= 1) {  /* Limit total number of conversions. */
 	*nc->sp++ = NARROWINS(IRT(ir->o, nc->t), ref);
 	return count;
       }
@@ -414,12 +413,10 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
     nc.t = irt_type(fins->t);
     if (fins->o == IR_TOBIT) {
       nc.mode = IRCONV_TOBIT;  /* Used only in the backpropagation cache. */
-      nc.lim = 2;  /* TOBIT can use a more optimistic rule. */
     } else {
       nc.mode = fins->op2;
-      nc.lim = 1;
     }
-    if (narrow_conv_backprop(&nc, fins->op1, 0) <= nc.lim)
+    if (narrow_conv_backprop(&nc, fins->op1, 0) <= 1)
       return narrow_conv_emit(J, &nc);
   }
   return NEXTFOLD;
@@ -504,8 +501,7 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
 {
   lua_assert(tref_isnumber(tr));
   if (tref_isnum(tr))
-    return emitir(IRT(IR_CONV, IRT_INTP), tr,
-		  (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
+    return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
   /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
   return narrow_stripov(J, tr, IR_MULOV,
 			LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_sink.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_sink.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_sink.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_sink.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_split.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_split.c
similarity index 87%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_split.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_split.c
index 1cee5093b6d35d2e8a50e4369c1fa38f08258687..81ded6c0a01c045ae0f68d81811d9e49a76f08d3 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_opt_split.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_opt_split.c
@@ -11,7 +11,7 @@
 #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
 
 #include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_ircall.h"
@@ -139,6 +139,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 }
+#endif
 
 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +156,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 }
-#endif
 
 /* Emit a CALLN with two split 64 bit arguments. */
 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -195,6 +195,118 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
   return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
 }
 
+#if LJ_HASFFI
+static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
+			    IRIns *oir, IRIns *nir, IRIns *ir)
+{
+  IROp op = ir->o;
+  IRRef kref = nir->op2;
+  if (irref_isk(kref)) {  /* Optimize constant shifts. */
+    int32_t k = (IR(kref)->i & 63);
+    IRRef lo = nir->op1, hi = hisubst[ir->op1];
+    if (op == IR_BROL || op == IR_BROR) {
+      if (op == IR_BROR) k = (-k & 63);
+      if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
+      if (k == 0) {
+      passthrough:
+	J->cur.nins--;
+	ir->prev = lo;
+	return hi;
+      } else {
+	TRef k1, k2;
+	IRRef t1, t2, t3, t4;
+	J->cur.nins--;
+	k1 = lj_ir_kint(J, k);
+	k2 = lj_ir_kint(J, (-k & 31));
+	t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
+	t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
+	t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
+	t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
+	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
+	return split_emit(J, IRTI(IR_BOR), t2, t3);
+      }
+    } else if (k == 0) {
+      goto passthrough;
+    } else if (k < 32) {
+      if (op == IR_BSHL) {
+	IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
+	IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
+	return split_emit(J, IRTI(IR_BOR), t1, t2);
+      } else {
+	IRRef t1 = ir->prev, t2;
+	lua_assert(op == IR_BSHR || op == IR_BSAR);
+	nir->o = IR_BSHR;
+	t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
+	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
+	return split_emit(J, IRTI(op), hi, kref);
+      }
+    } else {
+      if (op == IR_BSHL) {
+	if (k == 32)
+	  J->cur.nins--;
+	else
+	  lo = ir->prev;
+	ir->prev = lj_ir_kint(J, 0);
+	return lo;
+      } else {
+	lua_assert(op == IR_BSHR || op == IR_BSAR);
+	if (k == 32) {
+	  J->cur.nins--;
+	  ir->prev = hi;
+	} else {
+	  nir->op1 = hi;
+	}
+	if (op == IR_BSHR)
+	  return lj_ir_kint(J, 0);
+	else
+	  return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
+      }
+    }
+  }
+  return split_call_li(J, hisubst, oir, ir,
+		       op - IR_BSHL + IRCALL_lj_carith_shl64);
+}
+
+static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
+			 IRIns *nir, IRIns *ir)
+{
+  IROp op = ir->o;
+  IRRef hi, kref = nir->op2;
+  if (irref_isk(kref)) {  /* Optimize bit operations with lo constant. */
+    int32_t k = IR(kref)->i;
+    if (k == 0 || k == -1) {
+      if (op == IR_BAND) k = ~k;
+      if (k == 0) {
+	J->cur.nins--;
+	ir->prev = nir->op1;
+      } else if (op == IR_BXOR) {
+	nir->o = IR_BNOT;
+	nir->op2 = 0;
+      } else {
+	J->cur.nins--;
+	ir->prev = kref;
+      }
+    }
+  }
+  hi = hisubst[ir->op1];
+  kref = hisubst[ir->op2];
+  if (irref_isk(kref)) {  /* Optimize bit operations with hi constant. */
+    int32_t k = IR(kref)->i;
+    if (k == 0 || k == -1) {
+      if (op == IR_BAND) k = ~k;
+      if (k == 0) {
+	return hi;
+      } else if (op == IR_BXOR) {
+	return split_emit(J, IRTI(IR_BNOT), hi, 0);
+      } else {
+	return kref;
+      }
+    }
+  }
+  return split_emit(J, IRTI(op), hi, kref);
+}
+#endif
+
 /* Substitute references of a snapshot. */
 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
 {
@@ -214,7 +326,7 @@ static void split_ir(jit_State *J)
   IRRef nins = J->cur.nins, nk = J->cur.nk;
   MSize irlen = nins - nk;
   MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
-  IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
+  IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
   IRRef1 *hisubst;
   IRRef ref, snref;
   SnapShot *snap;
@@ -438,6 +550,19 @@ static void split_ir(jit_State *J)
 			   irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
 					      IRCALL_lj_carith_powu64);
 	break;
+      case IR_BNOT:
+	hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
+	break;
+      case IR_BSWAP:
+	ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
+	hi = nref;
+	break;
+      case IR_BAND: case IR_BOR: case IR_BXOR:
+	hi = split_bitop(J, hisubst, nir, ir);
+	break;
+      case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
+	hi = split_bitshift(J, hisubst, oir, nir, ir);
+	break;
       case IR_FLOAD:
 	lua_assert(ir->op2 == IRFL_CDATA_INT64);
 	hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_parse.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_parse.c
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_parse.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_parse.c
index abfac3c0c813b52bd6557baaba34153c86c8b573..9891897ed6f409567e662d20d879872b8ad01610 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_parse.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_parse.c
@@ -13,6 +13,7 @@
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_debug.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_func.h"
@@ -21,6 +22,7 @@
 #if LJ_HASFFI
 #include "lj_ctype.h"
 #endif
+#include "lj_strfmt.h"
 #include "lj_lex.h"
 #include "lj_parse.h"
 #include "lj_vm.h"
@@ -165,12 +167,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
 
 LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
 {
-  lj_lex_error(ls, ls->token, em);
+  lj_lex_error(ls, ls->tok, em);
 }
 
-LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token)
+LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
 {
-  lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token));
+  lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
 }
 
 LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -660,16 +662,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
   BCReg idx, func, obj = expr_toanyreg(fs, e);
   expr_free(fs, e);
   func = fs->freereg;
-  bcemit_AD(fs, BC_MOV, func+1, obj);  /* Copy object to first argument. */
+  bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj);  /* Copy object to 1st argument. */
   lua_assert(expr_isstrk(key));
   idx = const_str(fs, key);
   if (idx <= BCMAX_C) {
-    bcreg_reserve(fs, 2);
+    bcreg_reserve(fs, 2+LJ_FR2);
     bcemit_ABC(fs, BC_TGETS, func, obj, idx);
   } else {
-    bcreg_reserve(fs, 3);
-    bcemit_AD(fs, BC_KSTR, func+2, idx);
-    bcemit_ABC(fs, BC_TGETV, func, obj, func+2);
+    bcreg_reserve(fs, 3+LJ_FR2);
+    bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx);
+    bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2);
     fs->freereg--;
   }
   e->u.s.info = func;
@@ -983,7 +985,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
 /* Check and consume optional token. */
 static int lex_opt(LexState *ls, LexToken tok)
 {
-  if (ls->token == tok) {
+  if (ls->tok == tok) {
     lj_lex_next(ls);
     return 1;
   }
@@ -993,7 +995,7 @@ static int lex_opt(LexState *ls, LexToken tok)
 /* Check and consume token. */
 static void lex_check(LexState *ls, LexToken tok)
 {
-  if (ls->token != tok)
+  if (ls->tok != tok)
     err_token(ls, tok);
   lj_lex_next(ls);
 }
@@ -1007,7 +1009,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
     } else {
       const char *swhat = lj_lex_token2str(ls, what);
       const char *swho = lj_lex_token2str(ls, who);
-      lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line);
+      lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
     }
   }
 }
@@ -1016,9 +1018,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
 static GCstr *lex_str(LexState *ls)
 {
   GCstr *s;
-  if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto))
+  if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
     err_token(ls, TK_name);
-  s = strV(&ls->tokenval);
+  s = strV(&ls->tokval);
   lj_lex_next(ls);
   return s;
 }
@@ -1431,78 +1433,46 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
   }
 }
 
-/* Resize buffer if needed. */
-static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
-{
-  MSize sz = ls->sb.sz * 2;
-  while (ls->sb.n + len > sz) sz = sz * 2;
-  lj_str_resizebuf(ls->L, &ls->sb, sz);
-}
-
-static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
-{
-  if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
-    fs_buf_resize(ls, len);
-}
-
-/* Add string to buffer. */
-static void fs_buf_str(LexState *ls, const char *str, MSize len)
-{
-  char *p = ls->sb.buf + ls->sb.n;
-  MSize i;
-  ls->sb.n += len;
-  for (i = 0; i < len; i++) p[i] = str[i];
-}
-
-/* Add ULEB128 value to buffer. */
-static void fs_buf_uleb128(LexState *ls, uint32_t v)
-{
-  MSize n = ls->sb.n;
-  uint8_t *p = (uint8_t *)ls->sb.buf;
-  for (; v >= 0x80; v >>= 7)
-    p[n++] = (uint8_t)((v & 0x7f) | 0x80);
-  p[n++] = (uint8_t)v;
-  ls->sb.n = n;
-}
-
 /* Prepare variable info for prototype. */
 static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
 {
   VarInfo *vs =ls->vstack, *ve;
   MSize i, n;
   BCPos lastpc;
-  lj_str_resetbuf(&ls->sb);  /* Copy to temp. string buffer. */
+  lj_buf_reset(&ls->sb);  /* Copy to temp. string buffer. */
   /* Store upvalue names. */
   for (i = 0, n = fs->nuv; i < n; i++) {
     GCstr *s = strref(vs[fs->uvmap[i]].name);
     MSize len = s->len+1;
-    fs_buf_need(ls, len);
-    fs_buf_str(ls, strdata(s), len);
+    char *p = lj_buf_more(&ls->sb, len);
+    p = lj_buf_wmem(p, strdata(s), len);
+    setsbufP(&ls->sb, p);
   }
-  *ofsvar = ls->sb.n;
+  *ofsvar = sbuflen(&ls->sb);
   lastpc = 0;
   /* Store local variable names and compressed ranges. */
   for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
     if (!gola_isgotolabel(vs)) {
       GCstr *s = strref(vs->name);
       BCPos startpc;
+      char *p;
       if ((uintptr_t)s < VARNAME__MAX) {
-	fs_buf_need(ls, 1 + 2*5);
-	ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s;
+	p = lj_buf_more(&ls->sb, 1 + 2*5);
+	*p++ = (char)(uintptr_t)s;
       } else {
 	MSize len = s->len+1;
-	fs_buf_need(ls, len + 2*5);
-	fs_buf_str(ls, strdata(s), len);
+	p = lj_buf_more(&ls->sb, len + 2*5);
+	p = lj_buf_wmem(p, strdata(s), len);
       }
       startpc = vs->startpc;
-      fs_buf_uleb128(ls, startpc-lastpc);
-      fs_buf_uleb128(ls, vs->endpc-startpc);
+      p = lj_strfmt_wuleb128(p, startpc-lastpc);
+      p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
+      setsbufP(&ls->sb, p);
       lastpc = startpc;
     }
   }
-  fs_buf_need(ls, 1);
-  ls->sb.buf[ls->sb.n++] = '\0';  /* Terminator for varinfo. */
-  return ls->sb.n;
+  lj_buf_putb(&ls->sb, '\0');  /* Terminator for varinfo. */
+  return sbuflen(&ls->sb);
 }
 
 /* Fixup variable info for prototype. */
@@ -1510,7 +1480,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
 {
   setmref(pt->uvinfo, p);
   setmref(pt->varinfo, (char *)p + ofsvar);
-  memcpy(p, ls->sb.buf, ls->sb.n);  /* Copy from temp. string buffer. */
+  memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb));  /* Copy from temp. buffer. */
 }
 #else
 
@@ -1619,7 +1589,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
   L->top--;  /* Pop table of constants. */
   ls->vtop = fs->vbase;  /* Reset variable stack. */
   ls->fs = fs->prev;
-  lua_assert(ls->fs != NULL || ls->token == TK_eof);
+  lua_assert(ls->fs != NULL || ls->tok == TK_eof);
   return pt;
 }
 
@@ -1716,10 +1686,9 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
 static void expr_kvalue(TValue *v, ExpDesc *e)
 {
   if (e->k <= VKTRUE) {
-    setitype(v, ~(uint32_t)e->k);
+    setpriV(v, ~(uint32_t)e->k);
   } else if (e->k == VKSTR) {
-    setgcref(v->gcr, obj2gco(e->u.sval));
-    setitype(v, LJ_TSTR);
+    setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
   } else {
     lua_assert(tvisnumber(expr_numtv(e)));
     *v = *expr_numtv(e);
@@ -1741,15 +1710,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
   bcreg_reserve(fs, 1);
   freg++;
   lex_check(ls, '{');
-  while (ls->token != '}') {
+  while (ls->tok != '}') {
     ExpDesc key, val;
     vcall = 0;
-    if (ls->token == '[') {
+    if (ls->tok == '[') {
       expr_bracket(ls, &key);  /* Already calls expr_toval. */
       if (!expr_isk(&key)) expr_index(fs, e, &key);
       if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
       lex_check(ls, '=');
-    } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) &&
+    } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
 	       lj_lex_lookahead(ls) == '=') {
       expr_str(ls, &key);
       lex_check(ls, '=');
@@ -1842,11 +1811,11 @@ static BCReg parse_params(LexState *ls, int needself)
   lex_check(ls, '(');
   if (needself)
     var_new_lit(ls, nparams++, "self");
-  if (ls->token != ')') {
+  if (ls->tok != ')') {
     do {
-      if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) {
+      if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
 	var_new(ls, nparams++, lex_str(ls));
-      } else if (ls->token == TK_dots) {
+      } else if (ls->tok == TK_dots) {
 	lj_lex_next(ls);
 	fs->flags |= PROTO_VARARG;
 	break;
@@ -1880,7 +1849,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
   fs.bclim = pfs->bclim - pfs->pc;
   bcemit_AD(&fs, BC_FUNCF, 0, 0);  /* Placeholder. */
   parse_chunk(ls);
-  if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line);
+  if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
   pt = fs_finish(ls, (ls->lastline = ls->linenumber));
   pfs->bcbase = ls->bcstack + oldbase;  /* May have been reallocated. */
   pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1919,13 +1888,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
   BCIns ins;
   BCReg base;
   BCLine line = ls->linenumber;
-  if (ls->token == '(') {
+  if (ls->tok == '(') {
 #if !LJ_52
     if (line != ls->lastline)
       err_syntax(ls, LJ_ERR_XAMBIG);
 #endif
     lj_lex_next(ls);
-    if (ls->token == ')') {  /* f(). */
+    if (ls->tok == ')') {  /* f(). */
       args.k = VVOID;
     } else {
       expr_list(ls, &args);
@@ -1933,11 +1902,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
 	setbc_b(bcptr(fs, &args), 0);  /* Pass on multiple results. */
     }
     lex_match(ls, ')', '(', line);
-  } else if (ls->token == '{') {
+  } else if (ls->tok == '{') {
     expr_table(ls, &args);
-  } else if (ls->token == TK_string) {
+  } else if (ls->tok == TK_string) {
     expr_init(&args, VKSTR, 0);
-    args.u.sval = strV(&ls->tokenval);
+    args.u.sval = strV(&ls->tokval);
     lj_lex_next(ls);
   } else {
     err_syntax(ls, LJ_ERR_XFUNARG);
@@ -1946,11 +1915,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
   lua_assert(e->k == VNONRELOC);
   base = e->u.s.info;  /* Base register for call. */
   if (args.k == VCALL) {
-    ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1);
+    ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
   } else {
     if (args.k != VVOID)
       expr_tonextreg(fs, &args);
-    ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base);
+    ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2);
   }
   expr_init(e, VCALL, bcemit_INS(fs, ins));
   e->u.s.aux = base;
@@ -1963,33 +1932,34 @@ static void expr_primary(LexState *ls, ExpDesc *v)
 {
   FuncState *fs = ls->fs;
   /* Parse prefix expression. */
-  if (ls->token == '(') {
+  if (ls->tok == '(') {
     BCLine line = ls->linenumber;
     lj_lex_next(ls);
     expr(ls, v);
     lex_match(ls, ')', '(', line);
     expr_discharge(ls->fs, v);
-  } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) {
+  } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
     var_lookup(ls, v);
   } else {
     err_syntax(ls, LJ_ERR_XSYMBOL);
   }
   for (;;) {  /* Parse multiple expression suffixes. */
-    if (ls->token == '.') {
+    if (ls->tok == '.') {
       expr_field(ls, v);
-    } else if (ls->token == '[') {
+    } else if (ls->tok == '[') {
       ExpDesc key;
       expr_toanyreg(fs, v);
       expr_bracket(ls, &key);
       expr_index(fs, v, &key);
-    } else if (ls->token == ':') {
+    } else if (ls->tok == ':') {
       ExpDesc key;
       lj_lex_next(ls);
       expr_str(ls, &key);
       bcemit_method(fs, v, &key);
       parse_args(ls, v);
-    } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') {
+    } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
       expr_tonextreg(fs, v);
+      if (LJ_FR2) bcreg_reserve(fs, 1);
       parse_args(ls, v);
     } else {
       break;
@@ -2000,14 +1970,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
 /* Parse simple expression. */
 static void expr_simple(LexState *ls, ExpDesc *v)
 {
-  switch (ls->token) {
+  switch (ls->tok) {
   case TK_number:
-    expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0);
-    copyTV(ls->L, &v->u.nval, &ls->tokenval);
+    expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
+    copyTV(ls->L, &v->u.nval, &ls->tokval);
     break;
   case TK_string:
     expr_init(v, VKSTR, 0);
-    v->u.sval = strV(&ls->tokenval);
+    v->u.sval = strV(&ls->tokval);
     break;
   case TK_nil:
     expr_init(v, VKNIL, 0);
@@ -2095,11 +2065,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
 static void expr_unop(LexState *ls, ExpDesc *v)
 {
   BCOp op;
-  if (ls->token == TK_not) {
+  if (ls->tok == TK_not) {
     op = BC_NOT;
-  } else if (ls->token == '-') {
+  } else if (ls->tok == '-') {
     op = BC_UNM;
-  } else if (ls->token == '#') {
+  } else if (ls->tok == '#') {
     op = BC_LEN;
   } else {
     expr_simple(ls, v);
@@ -2116,7 +2086,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
   BinOpr op;
   synlevel_begin(ls);
   expr_unop(ls, v);
-  op = token2binop(ls->token);
+  op = token2binop(ls->tok);
   while (op != OPR_NOBINOPR && priority[op].left > limit) {
     ExpDesc v2;
     BinOpr nextop;
@@ -2305,9 +2275,9 @@ static void parse_func(LexState *ls, BCLine line)
   lj_lex_next(ls);  /* Skip 'function'. */
   /* Parse function name. */
   var_lookup(ls, &v);
-  while (ls->token == '.')  /* Multiple dot-separated fields. */
+  while (ls->tok == '.')  /* Multiple dot-separated fields. */
     expr_field(ls, &v);
-  if (ls->token == ':') {  /* Optional colon to signify method call. */
+  if (ls->tok == ':') {  /* Optional colon to signify method call. */
     needself = 1;
     expr_field(ls, &v);
   }
@@ -2320,9 +2290,9 @@ static void parse_func(LexState *ls, BCLine line)
 /* -- Control transfer statements ----------------------------------------- */
 
 /* Check for end of block. */
-static int endofblock(LexToken token)
+static int parse_isend(LexToken tok)
 {
-  switch (token) {
+  switch (tok) {
   case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
     return 1;
   default:
@@ -2337,7 +2307,7 @@ static void parse_return(LexState *ls)
   FuncState *fs = ls->fs;
   lj_lex_next(ls);  /* Skip 'return'. */
   fs->flags |= PROTO_HAS_RETURN;
-  if (endofblock(ls->token) || ls->token == ';') {  /* Bare return. */
+  if (parse_isend(ls->tok) || ls->tok == ';') {  /* Bare return. */
     ins = BCINS_AD(BC_RET0, 0, 1);
   } else {  /* Return with one or more values. */
     ExpDesc e;  /* Receives the _last_ expression in the list. */
@@ -2403,18 +2373,18 @@ static void parse_label(LexState *ls)
   lex_check(ls, TK_label);
   /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
   for (;;) {
-    if (ls->token == TK_label) {
+    if (ls->tok == TK_label) {
       synlevel_begin(ls);
       parse_label(ls);
       synlevel_end(ls);
-    } else if (LJ_52 && ls->token == ';') {
+    } else if (LJ_52 && ls->tok == ';') {
       lj_lex_next(ls);
     } else {
       break;
     }
   }
   /* Trailing label is considered to be outside of scope. */
-  if (endofblock(ls->token) && ls->token != TK_until)
+  if (parse_isend(ls->tok) && ls->tok != TK_until)
     ls->vstack[idx].slot = fs->bl->nactvar;
   gola_resolve(ls, fs->bl, idx);
 }
@@ -2570,7 +2540,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
   lex_check(ls, TK_in);
   line = ls->linenumber;
   assign_adjust(ls, 3, expr_list(ls, &e), &e);
-  bcreg_bump(fs, 3);  /* The iterator needs another 3 slots (func + 2 args). */
+  /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
+  bcreg_bump(fs, 3+LJ_FR2);
   isnext = (nvars <= 5 && predict_next(ls, fs, exprpc));
   var_add(ls, 3);  /* Hidden control variables. */
   lex_check(ls, TK_do);
@@ -2598,9 +2569,9 @@ static void parse_for(LexState *ls, BCLine line)
   fscope_begin(fs, &bl, FSCOPE_LOOP);
   lj_lex_next(ls);  /* Skip 'for'. */
   varname = lex_str(ls);  /* Get first variable name. */
-  if (ls->token == '=')
+  if (ls->tok == '=')
     parse_for_num(ls, varname, line);
-  else if (ls->token == ',' || ls->token == TK_in)
+  else if (ls->tok == ',' || ls->tok == TK_in)
     parse_for_iter(ls, varname);
   else
     err_syntax(ls, LJ_ERR_XFOR);
@@ -2626,12 +2597,12 @@ static void parse_if(LexState *ls, BCLine line)
   BCPos flist;
   BCPos escapelist = NO_JMP;
   flist = parse_then(ls);
-  while (ls->token == TK_elseif) {  /* Parse multiple 'elseif' blocks. */
+  while (ls->tok == TK_elseif) {  /* Parse multiple 'elseif' blocks. */
     jmp_append(fs, &escapelist, bcemit_jmp(fs));
     jmp_tohere(fs, flist);
     flist = parse_then(ls);
   }
-  if (ls->token == TK_else) {  /* Parse optional 'else' block. */
+  if (ls->tok == TK_else) {  /* Parse optional 'else' block. */
     jmp_append(fs, &escapelist, bcemit_jmp(fs));
     jmp_tohere(fs, flist);
     lj_lex_next(ls);  /* Skip 'else'. */
@@ -2649,7 +2620,7 @@ static void parse_if(LexState *ls, BCLine line)
 static int parse_stmt(LexState *ls)
 {
   BCLine line = ls->linenumber;
-  switch (ls->token) {
+  switch (ls->tok) {
   case TK_if:
     parse_if(ls, line);
     break;
@@ -2707,7 +2678,7 @@ static void parse_chunk(LexState *ls)
 {
   int islast = 0;
   synlevel_begin(ls);
-  while (!islast && !endofblock(ls->token)) {
+  while (!islast && !parse_isend(ls->tok)) {
     islast = parse_stmt(ls);
     lex_opt(ls, ';');
     lua_assert(ls->fs->framesize >= ls->fs->freereg &&
@@ -2742,7 +2713,7 @@ GCproto *lj_parse(LexState *ls)
   bcemit_AD(&fs, BC_FUNCV, 0, 0);  /* Placeholder. */
   lj_lex_next(ls);  /* Read-ahead first token. */
   parse_chunk(ls);
-  if (ls->token != TK_eof)
+  if (ls->tok != TK_eof)
     err_token(ls, TK_eof);
   pt = fs_finish(ls, ls->linenumber);
   L->top--;  /* Drop chunkname. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_parse.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_parse.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_parse.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_parse.h
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_profile.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_profile.c
new file mode 100644
index 0000000000000000000000000000000000000000..c7e53963b5b526a791b8bce617482b3db03e33b5
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_profile.c
@@ -0,0 +1,368 @@
+/*
+** Low-overhead profiling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_profile_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASPROFILE
+
+#include "lj_buf.h"
+#include "lj_frame.h"
+#include "lj_debug.h"
+#include "lj_dispatch.h"
+#if LJ_HASJIT
+#include "lj_jit.h"
+#include "lj_trace.h"
+#endif
+#include "lj_profile.h"
+
+#include "luajit.h"
+
+#if LJ_PROFILE_SIGPROF
+
+#include <sys/time.h>
+#include <signal.h>
+#define profile_lock(ps)	UNUSED(ps)
+#define profile_unlock(ps)	UNUSED(ps)
+
+#elif LJ_PROFILE_PTHREAD
+
+#include <pthread.h>
+#include <time.h>
+#if LJ_TARGET_PS3
+#include <sys/timer.h>
+#endif
+#define profile_lock(ps)	pthread_mutex_lock(&ps->lock)
+#define profile_unlock(ps)	pthread_mutex_unlock(&ps->lock)
+
+#elif LJ_PROFILE_WTHREAD
+
+#define WIN32_LEAN_AND_MEAN
+#if LJ_TARGET_XBOX360
+#include <xtl.h>
+#include <xbox.h>
+#else
+#include <windows.h>
+#endif
+typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
+#define profile_lock(ps)	EnterCriticalSection(&ps->lock)
+#define profile_unlock(ps)	LeaveCriticalSection(&ps->lock)
+
+#endif
+
+/* Profiler state. */
+typedef struct ProfileState {
+  global_State *g;		/* VM state that started the profiler. */
+  luaJIT_profile_callback cb;	/* Profiler callback. */
+  void *data;			/* Profiler callback data. */
+  SBuf sb;			/* String buffer for stack dumps. */
+  int interval;			/* Sample interval in milliseconds. */
+  int samples;			/* Number of samples for next callback. */
+  int vmstate;			/* VM state when profile timer triggered. */
+#if LJ_PROFILE_SIGPROF
+  struct sigaction oldsa;	/* Previous SIGPROF state. */
+#elif LJ_PROFILE_PTHREAD
+  pthread_mutex_t lock;		/* g->hookmask update lock. */
+  pthread_t thread;		/* Timer thread. */
+  int abort;			/* Abort timer thread. */
+#elif LJ_PROFILE_WTHREAD
+#if LJ_TARGET_WINDOWS
+  HINSTANCE wmm;		/* WinMM library handle. */
+  WMM_TPFUNC wmm_tbp;		/* WinMM timeBeginPeriod function. */
+  WMM_TPFUNC wmm_tep;		/* WinMM timeEndPeriod function. */
+#endif
+  CRITICAL_SECTION lock;	/* g->hookmask update lock. */
+  HANDLE thread;		/* Timer thread. */
+  int abort;			/* Abort timer thread. */
+#endif
+} ProfileState;
+
+/* Sadly, we have to use a static profiler state.
+**
+** The SIGPROF variant needs a static pointer to the global state, anyway.
+** And it would be hard to extend for multiple threads. You can still use
+** multiple VMs in multiple threads, but only profile one at a time.
+*/
+static ProfileState profile_state;
+
+/* Default sample interval in milliseconds. */
+#define LJ_PROFILE_INTERVAL_DEFAULT	10
+
+/* -- Profiler/hook interaction ------------------------------------------- */
+
+#if !LJ_PROFILE_SIGPROF
+void LJ_FASTCALL lj_profile_hook_enter(global_State *g)
+{
+  ProfileState *ps = &profile_state;
+  if (ps->g) {
+    profile_lock(ps);
+    hook_enter(g);
+    profile_unlock(ps);
+  } else {
+    hook_enter(g);
+  }
+}
+
+void LJ_FASTCALL lj_profile_hook_leave(global_State *g)
+{
+  ProfileState *ps = &profile_state;
+  if (ps->g) {
+    profile_lock(ps);
+    hook_leave(g);
+    profile_unlock(ps);
+  } else {
+    hook_leave(g);
+  }
+}
+#endif
+
+/* -- Profile callbacks --------------------------------------------------- */
+
+/* Callback from profile hook (HOOK_PROFILE already cleared). */
+void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
+{
+  ProfileState *ps = &profile_state;
+  global_State *g = G(L);
+  uint8_t mask;
+  profile_lock(ps);
+  mask = (g->hookmask & ~HOOK_PROFILE);
+  if (!(mask & HOOK_VMEVENT)) {
+    int samples = ps->samples;
+    ps->samples = 0;
+    g->hookmask = HOOK_VMEVENT;
+    lj_dispatch_update(g);
+    profile_unlock(ps);
+    ps->cb(ps->data, L, samples, ps->vmstate);  /* Invoke user callback. */
+    profile_lock(ps);
+    mask |= (g->hookmask & HOOK_PROFILE);
+  }
+  g->hookmask = mask;
+  lj_dispatch_update(g);
+  profile_unlock(ps);
+}
+
+/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
+static void profile_trigger(ProfileState *ps)
+{
+  global_State *g = ps->g;
+  uint8_t mask;
+  profile_lock(ps);
+  ps->samples++;  /* Always increment number of samples. */
+  mask = g->hookmask;
+  if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) {  /* Set profile hook. */
+    int st = g->vmstate;
+    ps->vmstate = st >= 0 ? 'N' :
+		  st == ~LJ_VMST_INTERP ? 'I' :
+		  st == ~LJ_VMST_C ? 'C' :
+		  st == ~LJ_VMST_GC ? 'G' : 'J';
+    g->hookmask = (mask | HOOK_PROFILE);
+    lj_dispatch_update(g);
+  }
+  profile_unlock(ps);
+}
+
+/* -- OS-specific profile timer handling ---------------------------------- */
+
+#if LJ_PROFILE_SIGPROF
+
+/* SIGPROF handler. */
+static void profile_signal(int sig)
+{
+  UNUSED(sig);
+  profile_trigger(&profile_state);
+}
+
+/* Start profiling timer. */
+static void profile_timer_start(ProfileState *ps)
+{
+  int interval = ps->interval;
+  struct itimerval tm;
+  struct sigaction sa;
+  tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
+  tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
+  setitimer(ITIMER_PROF, &tm, NULL);
+  sa.sa_flags = SA_RESTART;
+  sa.sa_handler = profile_signal;
+  sigemptyset(&sa.sa_mask);
+  sigaction(SIGPROF, &sa, &ps->oldsa);
+}
+
+/* Stop profiling timer. */
+static void profile_timer_stop(ProfileState *ps)
+{
+  struct itimerval tm;
+  tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
+  tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
+  setitimer(ITIMER_PROF, &tm, NULL);
+  sigaction(SIGPROF, &ps->oldsa, NULL);
+}
+
+#elif LJ_PROFILE_PTHREAD
+
+/* POSIX timer thread. */
+static void *profile_thread(ProfileState *ps)
+{
+  int interval = ps->interval;
+#if !LJ_TARGET_PS3
+  struct timespec ts;
+  ts.tv_sec = interval / 1000;
+  ts.tv_nsec = (interval % 1000) * 1000000;
+#endif
+  while (1) {
+#if LJ_TARGET_PS3
+    sys_timer_usleep(interval * 1000);
+#else
+    nanosleep(&ts, NULL);
+#endif
+    if (ps->abort) break;
+    profile_trigger(ps);
+  }
+  return NULL;
+}
+
+/* Start profiling timer thread. */
+static void profile_timer_start(ProfileState *ps)
+{
+  pthread_mutex_init(&ps->lock, 0);
+  ps->abort = 0;
+  pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
+}
+
+/* Stop profiling timer thread. */
+static void profile_timer_stop(ProfileState *ps)
+{
+  ps->abort = 1;
+  pthread_join(ps->thread, NULL);
+  pthread_mutex_destroy(&ps->lock);
+}
+
+#elif LJ_PROFILE_WTHREAD
+
+/* Windows timer thread. */
+static DWORD WINAPI profile_thread(void *psx)
+{
+  ProfileState *ps = (ProfileState *)psx;
+  int interval = ps->interval;
+#if LJ_TARGET_WINDOWS
+  ps->wmm_tbp(interval);
+#endif
+  while (1) {
+    Sleep(interval);
+    if (ps->abort) break;
+    profile_trigger(ps);
+  }
+#if LJ_TARGET_WINDOWS
+  ps->wmm_tep(interval);
+#endif
+  return 0;
+}
+
+/* Start profiling timer thread. */
+static void profile_timer_start(ProfileState *ps)
+{
+#if LJ_TARGET_WINDOWS
+  if (!ps->wmm) {  /* Load WinMM library on-demand. */
+    ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0);
+    if (ps->wmm) {
+      ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
+      ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
+      if (!ps->wmm_tbp || !ps->wmm_tep) {
+	ps->wmm = NULL;
+	return;
+      }
+    }
+  }
+#endif
+  InitializeCriticalSection(&ps->lock);
+  ps->abort = 0;
+  ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
+}
+
+/* Stop profiling timer thread. */
+static void profile_timer_stop(ProfileState *ps)
+{
+  ps->abort = 1;
+  WaitForSingleObject(ps->thread, INFINITE);
+  DeleteCriticalSection(&ps->lock);
+}
+
+#endif
+
+/* -- Public profiling API ------------------------------------------------ */
+
+/* Start profiling. */
+LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
+				  luaJIT_profile_callback cb, void *data)
+{
+  ProfileState *ps = &profile_state;
+  int interval = LJ_PROFILE_INTERVAL_DEFAULT;
+  while (*mode) {
+    int m = *mode++;
+    switch (m) {
+    case 'i':
+      interval = 0;
+      while (*mode >= '0' && *mode <= '9')
+	interval = interval * 10 + (*mode++ - '0');
+      if (interval <= 0) interval = 1;
+      break;
+#if LJ_HASJIT
+    case 'l': case 'f':
+      L2J(L)->prof_mode = m;
+      lj_trace_flushall(L);
+      break;
+#endif
+    default:  /* Ignore unknown mode chars. */
+      break;
+    }
+  }
+  if (ps->g) {
+    luaJIT_profile_stop(L);
+    if (ps->g) return;  /* Profiler in use by another VM. */
+  }
+  ps->g = G(L);
+  ps->interval = interval;
+  ps->cb = cb;
+  ps->data = data;
+  ps->samples = 0;
+  lj_buf_init(L, &ps->sb);
+  profile_timer_start(ps);
+}
+
+/* Stop profiling. */
+LUA_API void luaJIT_profile_stop(lua_State *L)
+{
+  ProfileState *ps = &profile_state;
+  global_State *g = ps->g;
+  if (G(L) == g) {  /* Only stop profiler if started by this VM. */
+    profile_timer_stop(ps);
+    g->hookmask &= ~HOOK_PROFILE;
+    lj_dispatch_update(g);
+#if LJ_HASJIT
+    G2J(g)->prof_mode = 0;
+    lj_trace_flushall(L);
+#endif
+    lj_buf_free(g, &ps->sb);
+    setmref(ps->sb.b, NULL);
+    setmref(ps->sb.e, NULL);
+    ps->g = NULL;
+  }
+}
+
+/* Return a compact stack dump. */
+LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+					     int depth, size_t *len)
+{
+  ProfileState *ps = &profile_state;
+  SBuf *sb = &ps->sb;
+  setsbufL(sb, L);
+  lj_buf_reset(sb);
+  lj_debug_dumpstack(L, sb, fmt, depth);
+  *len = (size_t)sbuflen(sb);
+  return sbufB(sb);
+}
+
+#endif
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_profile.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_profile.h
new file mode 100644
index 0000000000000000000000000000000000000000..26cb9db380cccb876711c6e31d8cd8251a390aac
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_profile.h
@@ -0,0 +1,21 @@
+/*
+** Low-overhead profiling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_PROFILE_H
+#define _LJ_PROFILE_H
+
+#include "lj_obj.h"
+
+#if LJ_HASPROFILE
+
+LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
+#if !LJ_PROFILE_SIGPROF
+LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g);
+LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g);
+#endif
+
+#endif
+
+#endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_record.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_record.c
similarity index 85%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_record.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_record.c
index 843108c85a79bb9652f08385d2497cd7b7c31bf4..dc5f2d547ae4fa262ecdd6096dc246320b8b4935 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_record.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_record.c
@@ -20,6 +20,9 @@
 #endif
 #include "lj_bc.h"
 #include "lj_ff.h"
+#if LJ_HASPROFILE
+#include "lj_debug.h"
+#endif
 #include "lj_ir.h"
 #include "lj_jit.h"
 #include "lj_ircall.h"
@@ -230,8 +233,12 @@ static void canonicalize_slots(jit_State *J)
 }
 
 /* Stop recording. */
-static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
+void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
 {
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+  if (J->retryrec)
+    lj_trace_err(J, LJ_TRERR_RETRY);
+#endif
   lj_trace_end(J);
   J->cur.linktype = (uint8_t)linktype;
   J->cur.link = (uint16_t)lnk;
@@ -499,8 +506,8 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
 static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
 {
   BCReg ra = bc_a(iterins);
-  lua_assert(J->base[ra] != 0);
-  if (!tref_isnil(J->base[ra])) {  /* Looping back? */
+  lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
+  if (!tref_isnil(getslot(J, ra))) {  /* Looping back? */
     J->base[ra-1] = J->base[ra];  /* Copy result of ITERC to control var. */
     J->maxslot = ra-1+bc_b(J->pc[-1]);
     J->pc += bc_j(iterins)+1;
@@ -538,12 +545,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
 /* Handle the case when an interpreted loop op is hit. */
 static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
 {
-  if (J->parent == 0) {
+  if (J->parent == 0 && J->exitno == 0) {
     if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
       /* Same loop? */
       if (ev == LOOPEV_LEAVE)  /* Must loop back to form a root trace. */
 	lj_trace_err(J, LJ_TRERR_LLEAVE);
-      rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Looping root trace. */
+      lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Looping trace. */
     } else if (ev != LOOPEV_LEAVE) {  /* Entering inner loop? */
       /* It's usually better to abort here and wait until the inner loop
       ** is traced. But if the inner loop repeatedly didn't loop back,
@@ -568,18 +575,64 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
 /* Handle the case when an already compiled loop op is hit. */
 static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
 {
-  if (J->parent == 0) {  /* Root trace hit an inner loop. */
+  if (J->parent == 0 && J->exitno == 0) {  /* Root trace hit an inner loop. */
     /* Better let the inner loop spawn a side trace back here. */
     lj_trace_err(J, LJ_TRERR_LINNER);
   } else if (ev != LOOPEV_LEAVE) {  /* Side trace enters a compiled loop. */
     J->instunroll = 0;  /* Cannot continue across a compiled loop op. */
     if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
-      rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Form an extra loop. */
+      lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Form extra loop. */
     else
-      rec_stop(J, LJ_TRLINK_ROOT, lnk);  /* Link to the loop. */
+      lj_record_stop(J, LJ_TRLINK_ROOT, lnk);  /* Link to the loop. */
   }  /* Side trace continues across a loop that's left or not entered. */
 }
 
+/* -- Record profiler hook checks ----------------------------------------- */
+
+#if LJ_HASPROFILE
+
+/* Need to insert profiler hook check? */
+static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
+{
+  GCproto *ppt;
+  lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l');
+  if (!pt)
+    return 0;
+  ppt = J->prev_pt;
+  J->prev_pt = pt;
+  if (pt != ppt && ppt) {
+    J->prev_line = -1;
+    return 1;
+  }
+  if (J->prof_mode == 'l') {
+    BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc));
+    BCLine pline = J->prev_line;
+    J->prev_line = line;
+    if (pline != line)
+      return 1;
+  }
+  return 0;
+}
+
+static void rec_profile_ins(jit_State *J, const BCIns *pc)
+{
+  if (J->prof_mode && rec_profile_need(J, J->pt, pc)) {
+    emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
+    lj_snap_add(J);
+  }
+}
+
+static void rec_profile_ret(jit_State *J)
+{
+  if (J->prof_mode == 'f') {
+    emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
+    J->prev_pt = NULL;
+    lj_snap_add(J);
+  }
+}
+
+#endif
+
 /* -- Record calls and returns -------------------------------------------- */
 
 /* Specialize to the runtime value of the called function or its prototype. */
@@ -595,6 +648,21 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
       (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);  /* Prevent GC of proto. */
       return tr;
     }
+  } else {
+    /* Don't specialize to non-monomorphic builtins. */
+    switch (fn->c.ffid) {
+    case FF_coroutine_wrap_aux:
+    case FF_string_gmatch_aux:
+      /* NYI: io_file_iter doesn't have an ffid, yet. */
+      {  /* Specialize to the ffid. */
+	TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
+	emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
+      }
+      return tr;
+    default:
+      /* NYI: don't specialize to non-monomorphic C functions. */
+      break;
+    }
   }
   /* Otherwise specialize to the function (closure) value itself. */
   kfunc = lj_ir_kfunc(J, fn);
@@ -609,6 +677,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
   TValue *functv = &J->L->base[func];
   TRef *fbase = &J->base[func];
   ptrdiff_t i;
+  lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
   for (i = 0; i <= nargs; i++)
     (void)getslot(J, func+i);  /* Ensure func and all args have a reference. */
   if (!tref_isfunc(fbase[0])) {  /* Resolve __call metamethod. */
@@ -678,6 +747,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
   return 0;
 }
 
+static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
+
 /* Record return. */
 void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
 {
@@ -700,12 +771,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
   /* Return to lower frame via interpreter for unhandled cases. */
   if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
        (!frame_islua(frame) ||
-	(J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) {
+	(J->parent == 0 && J->exitno == 0 &&
+	 !bc_isret(bc_op(J->cur.startins))))) {
     /* NYI: specialize to frame type and return directly, not via RET*. */
     for (i = 0; i < (ptrdiff_t)rbase; i++)
       J->base[i] = 0;  /* Purge dead slots. */
     J->maxslot = rbase + (BCReg)gotresults;
-    rec_stop(J, LJ_TRLINK_RETURN, 0);  /* Return to interpreter. */
+    lj_record_stop(J, LJ_TRLINK_RETURN, 0);  /* Return to interpreter. */
     return;
   }
   if (frame_isvarg(frame)) {
@@ -722,14 +794,15 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
     BCIns callins = *(frame_pc(frame)-1);
     ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
     BCReg cbase = bc_a(callins);
-    GCproto *pt = funcproto(frame_func(frame - (cbase+1)));
+    GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2)));
+    lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame teardown. */
     if ((pt->flags & PROTO_NOJIT))
       lj_trace_err(J, LJ_TRERR_CJITOFF);
     if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
       if (check_downrec_unroll(J, pt)) {
 	J->maxslot = (BCReg)(rbase + gotresults);
 	lj_snap_purge(J);
-	rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno);  /* Down-recursion. */
+	lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno);  /* Down-rec. */
 	return;
       }
       lj_snap_add(J);
@@ -742,7 +815,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
       lua_assert(J->baseslot > cbase+1);
       J->baseslot -= cbase+1;
       J->base -= cbase+1;
-    } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
+    } else if (J->parent == 0 && J->exitno == 0 &&
+	       !bc_isret(bc_op(J->cur.startins))) {
       /* Return to lower frame would leave the loop in a root trace. */
       lj_trace_err(J, LJ_TRERR_LLEAVE);
     } else if (J->needsnap) {  /* Tailcalled to ff with side-effects. */
@@ -774,7 +848,24 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
     } else if (cont == lj_cont_nop) {
       /* Nothing to do here. */
     } else if (cont == lj_cont_cat) {
-      lua_assert(0);
+      BCReg bslot = bc_b(*(frame_contpc(frame)-1));
+      TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
+      if (bslot != cbase-2) {  /* Concatenate the remainder. */
+	TValue *b = J->L->base, save;  /* Simulate lower frame and result. */
+	J->base[cbase-2] = tr;
+	copyTV(J->L, &save, b-2);
+	if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2);
+	J->L->base = b - cbase;
+	tr = rec_cat(J, bslot, cbase-2);
+	b = J->L->base + cbase;  /* Undo. */
+	J->L->base = b;
+	copyTV(J->L, b-2, &save);
+      }
+      if (tr) {  /* Store final result. */
+	BCReg dst = bc_a(*(frame_contpc(frame)-1));
+	J->base[dst] = tr;
+	if (dst >= J->maxslot) J->maxslot = dst+1;
+      }  /* Otherwise continue with another __concat call. */
     } else {
       /* Result type already specialized. */
       lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
@@ -790,13 +881,11 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
 /* Prepare to record call to metamethod. */
 static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
 {
-  BCReg s, top = curr_proto(J->L)->framesize;
-  TRef trcont;
-  setcont(&J->L->base[top], cont);
+  BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
 #if LJ_64
-  trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
+  TRef trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
 #else
-  trcont = lj_ir_kptr(J, (void *)cont);
+  TRef trcont = lj_ir_kptr(J, (void *)cont);
 #endif
   J->base[top] = trcont | TREF_CONT;
   J->framedepth++;
@@ -877,7 +966,7 @@ nocheck:
 static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
 {
   /* Set up metamethod call first to save ix->tab and ix->tabv. */
-  BCReg func = rec_mm_prep(J, lj_cont_ra);
+  BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
   TRef *base = J->base + func;
   TValue *basev = J->L->base + func;
   base[1] = ix->tab; base[2] = ix->key;
@@ -893,6 +982,7 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
     lj_trace_err(J, LJ_TRERR_NOMM);
   }
 ok:
+  lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
   base[0] = ix->mobj;
   copyTV(J->L, basev+0, &ix->mobjv);
   lj_record_call(J, func, 2);
@@ -909,6 +999,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
     BCReg func = rec_mm_prep(J, lj_cont_ra);
     TRef *base = J->base + func;
     TValue *basev = J->L->base + func;
+    lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
     base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
     base[1] = tr; copyTV(J->L, basev+1, tv);
 #if LJ_52
@@ -931,6 +1022,7 @@ static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
   BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
   TRef *base = J->base + func;
   TValue *tv = J->L->base + func;
+  lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
   base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
   copyTV(J->L, tv+0, &ix->mobjv);
   copyTV(J->L, tv+1, &ix->valv);
@@ -1039,6 +1131,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
 
 /* -- Indexed access ------------------------------------------------------ */
 
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+/* Bump table allocations in bytecode when they grow during recording. */
+static void rec_idx_bump(jit_State *J, RecordIndex *ix)
+{
+  RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))];
+  if (tref_ref(ix->tab) == rbc->ref) {
+    const BCIns *pc = mref(rbc->pc, const BCIns);
+    GCtab *tb = tabV(&ix->tabv);
+    uint32_t nhbits;
+    IRIns *ir;
+    if (!tvisnil(&ix->keyv))
+      (void)lj_tab_set(J->L, tb, &ix->keyv);  /* Grow table right now. */
+    nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0;
+    ir = IR(tref_ref(ix->tab));
+    if (ir->o == IR_TNEW) {
+      uint32_t ah = bc_d(*pc);
+      uint32_t asize = ah & 0x7ff, hbits = ah >> 11;
+      if (nhbits > hbits) hbits = nhbits;
+      if (tb->asize > asize) {
+	asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff;
+      }
+      if ((asize | (hbits<<11)) != ah) {  /* Has the size changed? */
+	/* Patch bytecode, but continue recording (for more patching). */
+	setbc_d(pc, (asize | (hbits<<11)));
+	/* Patching TNEW operands is only safe if the trace is aborted. */
+	ir->op1 = asize; ir->op2 = hbits;
+	J->retryrec = 1;  /* Abort the trace at the end of recording. */
+      }
+    } else if (ir->o == IR_TDUP) {
+      GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc)));
+      /* Grow template table, but preserve keys with nil values. */
+      if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) ||
+	  (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) {
+	Node *node = noderef(tpl->node);
+	uint32_t i, hmask = tpl->hmask, asize;
+	TValue *array;
+	for (i = 0; i <= hmask; i++) {
+	  if (!tvisnil(&node[i].key) && tvisnil(&node[i].val))
+	    settabV(J->L, &node[i].val, tpl);
+	}
+	if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) {
+	  TValue *o = lj_tab_set(J->L, tpl, &ix->keyv);
+	  if (tvisnil(o)) settabV(J->L, o, tpl);
+	}
+	lj_tab_resize(J->L, tpl, tb->asize, nhbits);
+	node = noderef(tpl->node);
+	hmask = tpl->hmask;
+	for (i = 0; i <= hmask; i++) {
+	  /* This is safe, since template tables only hold immutable values. */
+	  if (tvistab(&node[i].val))
+	    setnilV(&node[i].val);
+	}
+	/* The shape of the table may have changed. Clean up array part, too. */
+	asize = tpl->asize;
+	array = tvref(tpl->array);
+	for (i = 0; i < asize; i++) {
+	  if (tvistab(&array[i]))
+	    setnilV(&array[i]);
+	}
+	J->retryrec = 1;  /* Abort the trace at the end of recording. */
+      }
+    }
+  }
+}
+#endif
+
 /* Record bounds-check. */
 static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
 {
@@ -1078,11 +1236,12 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
 }
 
 /* Record indexed key lookup. */
-static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
+static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref)
 {
   TRef key;
   GCtab *t = tabV(&ix->tabv);
   ix->oldv = lj_tab_get(J->L, t, &ix->keyv);  /* Lookup previous value. */
+  *rbref = 0;
 
   /* Integer keys are looked up in the array part first. */
   key = ix->key;
@@ -1132,8 +1291,9 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
     MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
     if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
 	hslot <= 65535*(MSize)sizeof(Node)) {
-      TRef node, kslot;
-      TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
+      TRef node, kslot, hm;
+      *rbref = J->cur.nins;  /* Mark possible rollback point. */
+      hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
       emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
       node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE);
       kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
@@ -1166,6 +1326,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
 {
   TRef xref;
   IROp xrefop, loadop;
+  IRRef rbref;
   cTValue *oldv;
 
   while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
@@ -1178,6 +1339,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
       BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
       TRef *base = J->base + func;
       TValue *tv = J->L->base + func;
+      lua_assert(!LJ_FR2);  /* TODO_FR2: handle different frame setup. */
       base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
       setfuncV(J->L, tv+0, funcV(&ix->mobjv));
       copyTV(J->L, tv+1, &ix->tabv);
@@ -1211,7 +1373,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
   }
 
   /* Record the key lookup. */
-  xref = rec_idx_key(J, ix);
+  xref = rec_idx_key(J, ix, &rbref);
   xrefop = IR(tref_ref(xref))->o;
   loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
   /* The lj_meta_tset() inconsistency is gone, but better play safe. */
@@ -1226,6 +1388,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
     } else {
       res = emitir(IRTG(loadop, t), xref, 0);
     }
+    if (tref_ref(res) < rbref)  /* HREFK + load forwarded? */
+      lj_ir_rollback(J, rbref);  /* Rollback to eliminate hmask guard. */
     if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
       goto handlemm;
     if (irtype_ispri(t)) res = TREF_PRI(t);  /* Canonicalize primitives. */
@@ -1233,6 +1397,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
   } else {  /* Indexed store. */
     GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
     int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
+    if (tref_ref(xref) < rbref)  /* HREFK forwarded? */
+      lj_ir_rollback(J, rbref);  /* Rollback to eliminate hmask guard. */
     if (tvisnil(oldv)) {  /* Previous value was nil? */
       /* Need to duplicate the hasmm check for the early guards. */
       int hasmm = 0;
@@ -1256,6 +1422,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
 	  key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
 	xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key);
 	keybarrier = 0;  /* NEWREF already takes care of the key barrier. */
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+	if ((J->flags & JIT_F_OPT_SINK))  /* Avoid a separate flag. */
+	  rec_idx_bump(J, ix);
+#endif
       }
     } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
       /* Cannot derive that the previous value was non-nil, must do checks. */
@@ -1290,6 +1460,31 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
   }
 }
 
+static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
+{
+  RecordIndex ix;
+  cTValue *basev = J->L->base;
+  GCtab *t = tabV(&basev[ra-1]);
+  settabV(J->L, &ix.tabv, t);
+  ix.tab = getslot(J, ra-1);
+  ix.idxchain = 0;
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+  if ((J->flags & JIT_F_OPT_SINK)) {
+    if (t->asize < i+rn-ra)
+      lj_tab_reasize(J->L, t, i+rn-ra);
+    setnilV(&ix.keyv);
+    rec_idx_bump(J, &ix);
+  }
+#endif
+  for (; ra < rn; i++, ra++) {
+    setintV(&ix.keyv, i);
+    ix.key = lj_ir_kint(J, i);
+    copyTV(J->L, &ix.valv, &basev[ra]);
+    ix.val = getslot(J, ra);
+    lj_record_idx(J, &ix);
+  }
+}
+
 /* -- Upvalue access ------------------------------------------------------ */
 
 /* Check whether upvalue is immutable and ok to constify. */
@@ -1401,9 +1596,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
     if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
       J->pc++;
       if (J->framedepth + J->retdepth == 0)
-	rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno);  /* Tail-recursion. */
+	lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno);  /* Tail-rec. */
       else
-	rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno);  /* Up-recursion. */
+	lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno);  /* Up-recursion. */
     }
   } else {
     if (count > J->param[JIT_P_callunroll]) {
@@ -1477,9 +1672,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
   }
   J->instunroll = 0;  /* Cannot continue across a compiled function. */
   if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
-    rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno);  /* Extra tail-recursion. */
+    lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno);  /* Extra tail-rec. */
   else
-    rec_stop(J, LJ_TRLINK_ROOT, lnk);  /* Link to the function. */
+    lj_record_stop(J, LJ_TRLINK_ROOT, lnk);  /* Link to the function. */
 }
 
 /* -- Vararg handling ----------------------------------------------------- */
@@ -1524,7 +1719,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
 	if (nvararg >= nresults)
 	  emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
 	else
-	  emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1)));
+	  emitir(IRTGI(IR_EQ), fr,
+		 lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
 	vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
 	vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
 	for (i = 0; i < nload; i++) {
@@ -1599,8 +1795,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
 {
   uint32_t asize = ah & 0x7ff;
   uint32_t hbits = ah >> 11;
+  TRef tr;
   if (asize == 0x7ff) asize = 0x801;
-  return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
+  tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+  J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr);
+  setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc);
+  setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
+#endif
+  return tr;
+}
+
+/* -- Concatenation ------------------------------------------------------- */
+
+static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
+{
+  TRef *top = &J->base[topslot];
+  TValue savetv[5];
+  BCReg s;
+  RecordIndex ix;
+  lua_assert(baseslot < topslot);
+  for (s = baseslot; s <= topslot; s++)
+    (void)getslot(J, s);  /* Ensure all arguments have a reference. */
+  if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
+    TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
+    /* First convert numbers to strings. */
+    for (trp = top; trp >= base; trp--) {
+      if (tref_isnumber(*trp))
+	*trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
+		      tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
+      else if (!tref_isstr(*trp))
+	break;
+    }
+    xbase = ++trp;
+    tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
+		      lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
+    do {
+      tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
+    } while (trp <= top);
+    tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+    J->maxslot = (BCReg)(xbase - J->base);
+    if (xbase == base) return tr;  /* Return simple concatenation result. */
+    /* Pass partial result. */
+    topslot = J->maxslot--;
+    *xbase = tr;
+    top = xbase;
+    setstrV(J->L, &ix.keyv, &J2G(J)->strempty);  /* Simulate string result. */
+  } else {
+    J->maxslot = topslot-1;
+    copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
+  }
+  copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
+  ix.tab = top[-1];
+  ix.key = top[0];
+  memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv));  /* Save slots. */
+  rec_mm_arith(J, &ix, MM_concat);  /* Call __concat metamethod. */
+  memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv));  /* Restore slots. */
+  return 0;  /* No result yet. */
 }
 
 /* -- Record bytecode ops ------------------------------------------------- */
@@ -1641,7 +1892,7 @@ void lj_record_ins(jit_State *J)
   if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
     switch (J->postproc) {
     case LJ_POST_FIXCOMP:  /* Fixup comparison. */
-      pc = frame_pc(&J2G(J)->tmptv);
+      pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64;
       rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
       /* fallthrough */
     case LJ_POST_FIXGUARD:  /* Fixup and emit pending guard. */
@@ -1709,6 +1960,10 @@ void lj_record_ins(jit_State *J)
   rec_check_ir(J);
 #endif
 
+#if LJ_HASPROFILE
+  rec_profile_ins(J, pc);
+#endif
+
   /* Keep a copy of the runtime values of var/num/str operands. */
 #define rav	(&ix.valv)
 #define rbv	(&ix.tabv)
@@ -1735,7 +1990,7 @@ void lj_record_ins(jit_State *J)
   switch (bcmode_c(op)) {
   case BCMvar:
     copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
-  case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
+  case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
   case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
     copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
     lj_ir_knumint(J, numV(tv)); } break;
@@ -1830,6 +2085,18 @@ void lj_record_ins(jit_State *J)
       J->maxslot = bc_a(pc[1]);  /* Shrink used slots. */
     break;
 
+  case BC_ISTYPE: case BC_ISNUM:
+    /* These coercions need to correspond with lj_meta_istype(). */
+    if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
+      ra = lj_opt_narrow_toint(J, ra);
+    else if (rc == ~LJ_TNUMX+2)
+      ra = lj_ir_tonum(J, ra);
+    else if (rc == ~LJ_TSTR+1)
+      ra = lj_ir_tostr(J, ra);
+    /* else: type specialization suffices. */
+    J->base[bc_a(ins)] = ra;
+    break;
+
   /* -- Unary ops --------------------------------------------------------- */
 
   case BC_NOT:
@@ -1893,6 +2160,12 @@ void lj_record_ins(jit_State *J)
       rc = rec_mm_arith(J, &ix, MM_pow);
     break;
 
+  /* -- Miscellaneous ops ------------------------------------------------- */
+
+  case BC_CAT:
+    rc = rec_cat(J, rb, rc);
+    break;
+
   /* -- Constant and move ops --------------------------------------------- */
 
   case BC_MOV:
@@ -1941,6 +2214,14 @@ void lj_record_ins(jit_State *J)
     ix.idxchain = LJ_MAX_IDXCHAIN;
     rc = lj_record_idx(J, &ix);
     break;
+  case BC_TGETR: case BC_TSETR:
+    ix.idxchain = 0;
+    rc = lj_record_idx(J, &ix);
+    break;
+
+  case BC_TSETM:
+    rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
+    break;
 
   case BC_TNEW:
     rc = rec_tnew(J, rc);
@@ -1948,33 +2229,38 @@ void lj_record_ins(jit_State *J)
   case BC_TDUP:
     rc = emitir(IRTG(IR_TDUP, IRT_TAB),
 		lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0);
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+    J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc);
+    setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc);
+    setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
+#endif
     break;
 
   /* -- Calls and vararg handling ----------------------------------------- */
 
   case BC_ITERC:
-    J->base[ra] = getslot(J, ra-3);
-    J->base[ra+1] = getslot(J, ra-2);
-    J->base[ra+2] = getslot(J, ra-1);
+    J->base[ra] = getslot(J, ra-3-LJ_FR2);
+    J->base[ra+1] = getslot(J, ra-2-LJ_FR2);
+    J->base[ra+2] = getslot(J, ra-1-LJ_FR2);
     { /* Do the actual copy now because lj_record_call needs the values. */
       TValue *b = &J->L->base[ra];
-      copyTV(J->L, b, b-3);
-      copyTV(J->L, b+1, b-2);
-      copyTV(J->L, b+2, b-1);
+      copyTV(J->L, b, b-3-LJ_FR2);
+      copyTV(J->L, b+1, b-2-LJ_FR2);
+      copyTV(J->L, b+2, b-1-LJ_FR2);
     }
     lj_record_call(J, ra, (ptrdiff_t)rc-1);
     break;
 
   /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
   case BC_CALLM:
-    rc = (BCReg)(J->L->top - J->L->base) - ra;
+    rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
     /* fallthrough */
   case BC_CALL:
     lj_record_call(J, ra, (ptrdiff_t)rc-1);
     break;
 
   case BC_CALLMT:
-    rc = (BCReg)(J->L->top - J->L->base) - ra;
+    rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
     /* fallthrough */
   case BC_CALLT:
     lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
@@ -1991,6 +2277,9 @@ void lj_record_ins(jit_State *J)
     rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
     /* fallthrough */
   case BC_RET: case BC_RET0: case BC_RET1:
+#if LJ_HASPROFILE
+    rec_profile_ret(J);
+#endif
     lj_record_ret(J, ra, (ptrdiff_t)rc-1);
     break;
 
@@ -2003,7 +2292,7 @@ void lj_record_ins(jit_State *J)
   case BC_JFORI:
     lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
     if (rec_for(J, pc, 0) != LOOPEV_LEAVE)  /* Link to existing loop. */
-      rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
+      lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
     /* Continue tracing if the loop is not entered. */
     break;
 
@@ -2070,10 +2359,8 @@ void lj_record_ins(jit_State *J)
     /* fallthrough */
   case BC_ITERN:
   case BC_ISNEXT:
-  case BC_CAT:
   case BC_UCLO:
   case BC_FNEW:
-  case BC_TSETM:
     setintV(&J->errinfo, (int32_t)op);
     lj_trace_err_info(J, LJ_TRERR_NYIBC);
     break;
@@ -2139,6 +2426,12 @@ static const BCIns *rec_setup_root(jit_State *J)
     J->maxslot = J->pt->numparams;
     pc++;
     break;
+  case BC_CALLM:
+  case BC_CALL:
+  case BC_ITERC:
+    /* No bytecode range check for stitched traces. */
+    pc++;
+    break;
   default:
     lua_assert(0);
     break;
@@ -2154,6 +2447,9 @@ void lj_record_setup(jit_State *J)
   /* Initialize state related to current trace. */
   memset(J->slot, 0, sizeof(J->slot));
   memset(J->chain, 0, sizeof(J->chain));
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+  memset(J->rbchash, 0, sizeof(J->rbchash));
+#endif
   memset(J->bpropcache, 0, sizeof(J->bpropcache));
   J->scev.idx = REF_NIL;
   setmref(J->scev.pc, NULL);
@@ -2207,7 +2503,7 @@ void lj_record_setup(jit_State *J)
     if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
 	T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
 				    J->param[JIT_P_tryside]) {
-      rec_stop(J, LJ_TRLINK_INTERP, 0);
+      lj_record_stop(J, LJ_TRLINK_INTERP, 0);
     }
   } else {  /* Root trace. */
     J->cur.root = 0;
@@ -2219,9 +2515,15 @@ void lj_record_setup(jit_State *J)
     lj_snap_add(J);
     if (bc_op(J->cur.startins) == BC_FORL)
       rec_for_loop(J, J->pc-1, &J->scev, 1);
+    else if (bc_op(J->cur.startins) == BC_ITERC)
+      J->startpc = NULL;
     if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
       lj_trace_err(J, LJ_TRERR_STACKOV);
   }
+#if LJ_HASPROFILE
+  J->prev_pt = NULL;
+  J->prev_line = -1;
+#endif
 #ifdef LUAJIT_ENABLE_CHECKHOOK
   /* Regularly check for instruction/line hooks from compiled code and
   ** exit to the interpreter if the hooks are set.
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_record.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_record.h
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_record.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_record.h
index c9f4882a9056afe22a879e30549525a92cb90ed6..732adb47ce293675a79d4c499a8f362f57d3123b 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_record.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_record.h
@@ -28,6 +28,7 @@ typedef struct RecordIndex {
 
 LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
 			     cTValue *av, cTValue *bv);
+LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
 LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
 
 LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_snap.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_snap.c
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_snap.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_snap.c
index 5c870bafd4c41aacbcbc1c3075cb7fd6fc0cfee2..d8e7987c5c2591c560aead7c998a2376271c0308 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_snap.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_snap.c
@@ -97,8 +97,10 @@ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
 {
   cTValue *frame = J->L->base - 1;
   cTValue *lim = J->L->base - J->baseslot;
-  cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
+  GCfunc *fn = frame_func(frame);
+  cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
   MSize f = 0;
+  lua_assert(!LJ_FR2);  /* TODO_FR2: store 64 bit PCs. */
   map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
   while (frame > lim) {  /* Backwards traversal of all frames above base. */
     if (frame_islua(frame)) {
@@ -240,7 +242,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
     case BCMbase:
       if (op >= BC_CALLM && op <= BC_VARG) {
 	BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
-		    maxslot : (bc_a(ins) + bc_c(ins));
+		    maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
+	if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
 	s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
 	for (; s < top; s++) USE_SLOT(s);
 	for (; s < maxslot; s++) DEF_SLOT(s);
@@ -599,6 +602,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
   }
   if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
     rs = snap_renameref(T, snapno, ref, rs);
+  lua_assert(!LJ_GC64);  /* TODO_GC64: handle 64 bit references. */
   if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
     int32_t *sps = &ex->spill[regsp_spill(rs)];
     if (irt_isinteger(t)) {
@@ -612,8 +616,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
       o->u64 = *(uint64_t *)sps;
     } else {
       lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
-      setgcrefi(o->gcr, *sps);
-      setitype(o, irt_toitype(t));
+      setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
     }
   } else {  /* Restore from register. */
     Reg r = regsp_reg(rs);
@@ -631,10 +634,10 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
     } else if (LJ_64 && irt_islightud(t)) {
       /* 64 bit lightuserdata which may escape already has the tag bits. */
       o->u64 = ex->gpr[r-RID_MIN_GPR];
+    } else if (irt_ispri(t)) {
+      setpriV(o, irt_toitype(t));
     } else {
-      if (!irt_ispri(t))
-	setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
-      setitype(o, irt_toitype(t));
+      setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
     }
   }
 }
@@ -795,7 +798,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
   MSize n, nent = snap->nent;
   SnapEntry *map = &T->snapmap[snap->mapofs];
   SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
-  int32_t ftsz0;
+  ptrdiff_t ftsz0;
   TValue *frame;
   BloomFilter rfilt = snap_renamefilter(T, snapno);
   const BCIns *pc = snap_pc(map[nent]);
@@ -836,8 +839,9 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
 	snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
 	o->u32.hi = tmp.u32.lo;
       } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+	lua_assert(!LJ_FR2);  /* TODO_FR2: store 64 bit PCs. */
 	/* Overwrite tag with frame link. */
-	o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0;
+	setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
 	L->base = o+1;
       }
     }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_snap.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_snap.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_snap.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_snap.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_state.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_state.c
similarity index 91%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_state.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_state.c
index e654afaeb989fe1dd880e5cae988c9c66f1a1be0..84b4d113a2bee8f9f49eb0a62a55198bdba01a98 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_state.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_state.c
@@ -12,6 +12,7 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_func.h"
@@ -26,6 +27,7 @@
 #include "lj_vm.h"
 #include "lj_lex.h"
 #include "lj_alloc.h"
+#include "luajit.h"
 
 /* -- Stack handling ------------------------------------------------------ */
 
@@ -47,6 +49,7 @@
 ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
 ** slots above top, but then mobj is always a function. So we can get by
 ** with 5 extra slots.
+** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC.
 */
 
 /* Resize stack slots and adjust pointers in state. */
@@ -59,7 +62,7 @@ static void resizestack(lua_State *L, MSize n)
   GCobj *up;
   lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1);
   st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
-				(MSize)(L->stacksize*sizeof(TValue)),
+				(MSize)(oldsize*sizeof(TValue)),
 				(MSize)(realsize*sizeof(TValue)));
   setmref(L->stack, st);
   delta = (char *)st - (char *)oldst;
@@ -67,12 +70,12 @@ static void resizestack(lua_State *L, MSize n)
   while (oldsize < realsize)  /* Clear new slots. */
     setnilV(st + oldsize++);
   L->stacksize = realsize;
+  if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize)
+    setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
   L->base = (TValue *)((char *)L->base + delta);
   L->top = (TValue *)((char *)L->top + delta);
   for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
     setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta));
-  if (obj2gco(L) == gcref(G(L)->jit_L))
-    setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
 }
 
 /* Relimit stack after error, in case the limit was overdrawn. */
@@ -89,7 +92,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
     return;  /* Avoid stack shrinking while handling stack overflow. */
   if (4*used < L->stacksize &&
       2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
-      obj2gco(L) != gcref(G(L)->jit_L))  /* Don't shrink stack of live trace. */
+      /* Don't shrink stack of live trace. */
+      (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
     resizestack(L, L->stacksize >> 1);
 }
 
@@ -125,8 +129,9 @@ static void stack_init(lua_State *L1, lua_State *L)
   L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
   stend = st + L1->stacksize;
   setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1);
-  L1->base = L1->top = st+1;
-  setthreadV(L1, st, L1);  /* Needed for curr_funcisL() on empty stack. */
+  setthreadV(L1, st++, L1);  /* Needed for curr_funcisL() on empty stack. */
+  if (LJ_FR2) setnilV(st++);
+  L1->base = L1->top = st;
   while (st < stend)  /* Clear new slots. */
     setnilV(st++);
 }
@@ -164,7 +169,7 @@ static void close_state(lua_State *L)
   lj_ctype_freestate(g);
 #endif
   lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
-  lj_str_freebuf(g, &g->tmpbuf);
+  lj_buf_free(g, &g->tmpbuf);
   lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
   lua_assert(g->gc.total == sizeof(GG_State));
 #ifndef LUAJIT_USE_SYSMALLOC
@@ -184,7 +189,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
   GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
   lua_State *L = &GG->L;
   global_State *g = &GG->g;
-  if (GG == NULL || !checkptr32(GG)) return NULL;
+  if (GG == NULL || !checkptrGC(GG)) return NULL;
   memset(GG, 0, sizeof(GG_State));
   L->gct = ~LJ_TTHREAD;
   L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED;  /* Prevent free. */
@@ -202,8 +207,10 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
   setnilV(registry(L));
   setnilV(&g->nilnode.val);
   setnilV(&g->nilnode.key);
+#if !LJ_GC64
   setmref(g->nilnode.freetop, &g->nilnode);
-  lj_str_initbuf(&g->tmpbuf);
+#endif
+  lj_buf_init(NULL, &g->tmpbuf);
   g->gc.state = GCSpause;
   setgcref(g->gc.root, obj2gco(L));
   setmref(g->gc.sweep, &g->gc.root);
@@ -236,6 +243,10 @@ LUA_API void lua_close(lua_State *L)
   global_State *g = G(L);
   int i;
   L = mainthread(g);  /* Only the main thread can be closed. */
+#if LJ_HASPROFILE
+  luaJIT_profile_stop(L);
+#endif
+  setgcrefnull(g->cur_L);
   lj_func_closeuv(L, tvref(L->stack));
   lj_gc_separateudata(g, 1);  /* Separate udata which have GC metamethods. */
 #if LJ_HASJIT
@@ -246,8 +257,8 @@ LUA_API void lua_close(lua_State *L)
   for (i = 0;;) {
     hook_enter(g);
     L->status = 0;
+    L->base = L->top = tvref(L->stack) + 1 + LJ_FR2;
     L->cframe = NULL;
-    L->base = L->top = tvref(L->stack) + 1;
     if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
       if (++i >= 10) break;
       lj_gc_separateudata(g, 1);  /* Separate udata again. */
@@ -279,6 +290,8 @@ lua_State *lj_state_new(lua_State *L)
 void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
 {
   lua_assert(L != mainthread(g));
+  if (obj2gco(L) == gcref(g->cur_L))
+    setgcrefnull(g->cur_L);
   lj_func_closeuv(L, tvref(L->stack));
   lua_assert(gcref(L->openupval) == NULL);
   lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_state.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_state.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_state.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_state.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_str.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_str.c
similarity index 54%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_str.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_str.c
index 3239bfc957c63f1074a98ee35c809b3f4270dd95..3bc8079f37c87dc7fbe5946bfcad441677500d59 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_str.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_str.c
@@ -1,13 +1,8 @@
 /*
 ** String handling.
 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
-**
-** Portions taken verbatim or adapted from the Lua interpreter.
-** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
 */
 
-#include <stdio.h>
-
 #define lj_str_c
 #define LUA_CORE
 
@@ -15,10 +10,9 @@
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_str.h"
-#include "lj_state.h"
 #include "lj_char.h"
 
-/* -- String interning ---------------------------------------------------- */
+/* -- String helpers ------------------------------------------------------ */
 
 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -64,6 +58,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
   return 0;
 }
 
+/* Find fixed string p inside string s. */
+const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
+{
+  if (plen <= slen) {
+    if (plen == 0) {
+      return s;
+    } else {
+      int c = *(const uint8_t *)p++;
+      plen--; slen -= plen;
+      while (slen) {
+	const char *q = (const char *)memchr(s, c, slen);
+	if (!q) break;
+	if (memcmp(q+1, p, plen) == 0) return q;
+	q++; slen -= (MSize)(q-s); s = q;
+      }
+    }
+  }
+  return NULL;
+}
+
+/* Check whether a string has a pattern matching character. */
+int lj_str_haspattern(GCstr *s)
+{
+  const char *p = strdata(s), *q = p + s->len;
+  while (p < q) {
+    int c = *(const uint8_t *)p++;
+    if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
+      return 1;  /* Found a pattern matching char. */
+  }
+  return 0;  /* No pattern matching chars found. */
+}
+
+/* -- String interning ---------------------------------------------------- */
+
 /* Resize the string hash table (grow and shrink). */
 void lj_str_resize(lua_State *L, MSize newmask)
 {
@@ -194,173 +222,3 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
   lj_mem_free(g, s, sizestring(s));
 }
 
-/* -- Type conversions ---------------------------------------------------- */
-
-/* Print number to buffer. Canonicalizes non-finite values. */
-size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
-{
-  if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) {  /* Finite? */
-    lua_Number n = o->n;
-#if __BIONIC__
-    if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
-#endif
-    return (size_t)lua_number2str(s, n);
-  } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
-    s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
-  } else if ((o->u32.hi & 0x80000000) == 0) {
-    s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
-  } else {
-    s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
-  }
-}
-
-/* Print integer to buffer. Returns pointer to start. */
-char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
-{
-  uint32_t u = (uint32_t)(k < 0 ? -k : k);
-  p += 1+10;
-  do { *--p = (char)('0' + u % 10); } while (u /= 10);
-  if (k < 0) *--p = '-';
-  return p;
-}
-
-/* Convert number to string. */
-GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
-{
-  char buf[LJ_STR_NUMBUF];
-  size_t len = lj_str_bufnum(buf, (TValue *)np);
-  return lj_str_new(L, buf, len);
-}
-
-/* Convert integer to string. */
-GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
-{
-  char s[1+10];
-  char *p = lj_str_bufint(s, k);
-  return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
-}
-
-GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
-{
-  return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
-}
-
-/* -- String formatting --------------------------------------------------- */
-
-static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
-{
-  char *p;
-  MSize i;
-  if (sb->n + len > sb->sz) {
-    MSize sz = sb->sz * 2;
-    while (sb->n + len > sz) sz = sz * 2;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  p = sb->buf + sb->n;
-  sb->n += len;
-  for (i = 0; i < len; i++) p[i] = str[i];
-}
-
-static void addchar(lua_State *L, SBuf *sb, int c)
-{
-  if (sb->n + 1 > sb->sz) {
-    MSize sz = sb->sz * 2;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  sb->buf[sb->n++] = (char)c;
-}
-
-/* Push formatted message as a string object to Lua stack. va_list variant. */
-const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
-{
-  SBuf *sb = &G(L)->tmpbuf;
-  lj_str_needbuf(L, sb, (MSize)strlen(fmt));
-  lj_str_resetbuf(sb);
-  for (;;) {
-    const char *e = strchr(fmt, '%');
-    if (e == NULL) break;
-    addstr(L, sb, fmt, (MSize)(e-fmt));
-    /* This function only handles %s, %c, %d, %f and %p formats. */
-    switch (e[1]) {
-    case 's': {
-      const char *s = va_arg(argp, char *);
-      if (s == NULL) s = "(null)";
-      addstr(L, sb, s, (MSize)strlen(s));
-      break;
-      }
-    case 'c':
-      addchar(L, sb, va_arg(argp, int));
-      break;
-    case 'd': {
-      char buf[LJ_STR_INTBUF];
-      char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
-      addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
-      break;
-      }
-    case 'f': {
-      char buf[LJ_STR_NUMBUF];
-      TValue tv;
-      MSize len;
-      tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
-      len = (MSize)lj_str_bufnum(buf, &tv);
-      addstr(L, sb, buf, len);
-      break;
-      }
-    case 'p': {
-#define FMTP_CHARS	(2*sizeof(ptrdiff_t))
-      char buf[2+FMTP_CHARS];
-      ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
-      ptrdiff_t i, lasti = 2+FMTP_CHARS;
-      if (p == 0) {
-	addstr(L, sb, "NULL", 4);
-	break;
-      }
-#if LJ_64
-      /* Shorten output for 64 bit pointers. */
-      lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
-#endif
-      buf[0] = '0';
-      buf[1] = 'x';
-      for (i = lasti-1; i >= 2; i--, p >>= 4)
-	buf[i] = "0123456789abcdef"[(p & 15)];
-      addstr(L, sb, buf, (MSize)lasti);
-      break;
-      }
-    case '%':
-      addchar(L, sb, '%');
-      break;
-    default:
-      addchar(L, sb, '%');
-      addchar(L, sb, e[1]);
-      break;
-    }
-    fmt = e+2;
-  }
-  addstr(L, sb, fmt, (MSize)strlen(fmt));
-  setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
-  incr_top(L);
-  return strVdata(L->top - 1);
-}
-
-/* Push formatted message as a string object to Lua stack. Vararg variant. */
-const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
-{
-  const char *msg;
-  va_list argp;
-  va_start(argp, fmt);
-  msg = lj_str_pushvf(L, fmt, argp);
-  va_end(argp);
-  return msg;
-}
-
-/* -- Buffer handling ----------------------------------------------------- */
-
-char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
-{
-  if (sz > sb->sz) {
-    if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
-    lj_str_resizebuf(L, sb, sz);
-  }
-  return sb->buf;
-}
-
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_str.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_str.h
new file mode 100644
index 0000000000000000000000000000000000000000..d8465de663bfd75ca1455474ec206daea6e9e28a
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_str.h
@@ -0,0 +1,27 @@
+/*
+** String handling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_STR_H
+#define _LJ_STR_H
+
+#include <stdarg.h>
+
+#include "lj_obj.h"
+
+/* String helpers. */
+LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
+LJ_FUNC const char *lj_str_find(const char *s, const char *f,
+				MSize slen, MSize flen);
+LJ_FUNC int lj_str_haspattern(GCstr *s);
+
+/* String interning. */
+LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
+LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
+LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
+
+#define lj_str_newz(L, s)	(lj_str_new(L, s, strlen(s)))
+#define lj_str_newlit(L, s)	(lj_str_new(L, "" s, sizeof(s)-1))
+
+#endif
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strfmt.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strfmt.c
new file mode 100644
index 0000000000000000000000000000000000000000..d54e796ae2a31dce9e8b1d9546cdfc9d37d52615
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strfmt.c
@@ -0,0 +1,554 @@
+/*
+** String formatting.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include <stdio.h>
+
+#define lj_strfmt_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_state.h"
+#include "lj_char.h"
+#include "lj_strfmt.h"
+
+/* -- Format parser ------------------------------------------------------- */
+
+static const uint8_t strfmt_map[('x'-'A')+1] = {
+  STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
+  0,0,0,0,0,0,
+  STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
+  0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
+};
+
+SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
+{
+  const uint8_t *p = fs->p, *e = fs->e;
+  fs->str = (const char *)p;
+  for (; p < e; p++) {
+    if (*p == '%') {  /* Escape char? */
+      if (p[1] == '%') {  /* '%%'? */
+	fs->p = ++p+1;
+	goto retlit;
+      } else {
+	SFormat sf = 0;
+	uint32_t c;
+	if (p != (const uint8_t *)fs->str)
+	  break;
+	for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
+	  /* Parse flags. */
+	  if (*p == '-') sf |= STRFMT_F_LEFT;
+	  else if (*p == '+') sf |= STRFMT_F_PLUS;
+	  else if (*p == '0') sf |= STRFMT_F_ZERO;
+	  else if (*p == ' ') sf |= STRFMT_F_SPACE;
+	  else if (*p == '#') sf |= STRFMT_F_ALT;
+	  else break;
+	}
+	if ((uint32_t)*p - '0' < 10) {  /* Parse width. */
+	  uint32_t width = (uint32_t)*p++ - '0';
+	  if ((uint32_t)*p - '0' < 10)
+	    width = (uint32_t)*p++ - '0' + width*10;
+	  sf |= (width << STRFMT_SH_WIDTH);
+	}
+	if (*p == '.') {  /* Parse precision. */
+	  uint32_t prec = 0;
+	  p++;
+	  if ((uint32_t)*p - '0' < 10) {
+	    prec = (uint32_t)*p++ - '0';
+	    if ((uint32_t)*p - '0' < 10)
+	      prec = (uint32_t)*p++ - '0' + prec*10;
+	  }
+	  sf |= ((prec+1) << STRFMT_SH_PREC);
+	}
+	/* Parse conversion. */
+	c = (uint32_t)*p - 'A';
+	if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
+	  uint32_t sx = strfmt_map[c];
+	  if (sx) {
+	    fs->p = p+1;
+	    return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
+	  }
+	}
+	/* Return error location. */
+	if (*p >= 32) p++;
+	fs->len = (MSize)(p - (const uint8_t *)fs->str);
+	fs->p = fs->e;
+	return STRFMT_ERR;
+      }
+    }
+  }
+  fs->p = p;
+retlit:
+  fs->len = (MSize)(p - (const uint8_t *)fs->str);
+  return fs->len ? STRFMT_LIT : STRFMT_EOF;
+}
+
+/* -- Raw conversions ----------------------------------------------------- */
+
+/* Write number to bufer. */
+char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o)
+{
+  if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) {  /* Finite? */
+#if __BIONIC__
+    if (tvismzero(o)) { *p++ = '-'; *p++ = '0'; return p; }
+#endif
+    return p + lua_number2str(p, o->n);
+  } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
+    *p++ = 'n'; *p++ = 'a'; *p++ = 'n';
+  } else if ((o->u32.hi & 0x80000000) == 0) {
+    *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
+  } else {
+    *p++ = '-'; *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
+  }
+  return p;
+}
+
+#define WINT_R(x, sh, sc) \
+  { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
+
+/* Write integer to buffer. */
+char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
+{
+  uint32_t u = (uint32_t)k;
+  if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
+  if (u < 10000) {
+    if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3;
+  } else {
+    uint32_t v = u / 10000; u -= v * 10000;
+    if (v < 10000) {
+      if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7;
+    } else {
+      uint32_t w = v / 10000; v -= w * 10000;
+      if (w >= 10) WINT_R(w, 10, 10)
+      *p++ = (char)('0'+w);
+    }
+    WINT_R(v, 23, 1000)
+    dig7: WINT_R(v, 12, 100)
+    dig6: WINT_R(v, 10, 10)
+    dig5: *p++ = (char)('0'+v);
+  }
+  WINT_R(u, 23, 1000)
+  dig3: WINT_R(u, 12, 100)
+  dig2: WINT_R(u, 10, 10)
+  dig1: *p++ = (char)('0'+u);
+  return p;
+}
+#undef WINT_R
+
+/* Write pointer to buffer. */
+char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
+{
+  ptrdiff_t x = (ptrdiff_t)v;
+  MSize i, n = STRFMT_MAXBUF_PTR;
+  if (x == 0) {
+    *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
+    return p;
+  }
+#if LJ_64
+  /* Shorten output for 64 bit pointers. */
+  n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
+#endif
+  p[0] = '0';
+  p[1] = 'x';
+  for (i = n-1; i >= 2; i--, x >>= 4)
+    p[i] = "0123456789abcdef"[(x & 15)];
+  return p+n;
+}
+
+/* Write ULEB128 to buffer. */
+char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
+{
+  for (; v >= 0x80; v >>= 7)
+    *p++ = (char)((v & 0x7f) | 0x80);
+  *p++ = (char)v;
+  return p;
+}
+
+/* Return string or write number to buffer and return pointer to start. */
+const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp)
+{
+  if (tvisstr(o)) {
+    *lenp = strV(o)->len;
+    return strVdata(o);
+  } else if (tvisint(o)) {
+    *lenp = (MSize)(lj_strfmt_wint(buf, intV(o)) - buf);
+    return buf;
+  } else if (tvisnum(o)) {
+    *lenp = (MSize)(lj_strfmt_wnum(buf, o) - buf);
+    return buf;
+  } else {
+    return NULL;
+  }
+}
+
+/* -- Unformatted conversions to buffer ----------------------------------- */
+
+/* Add integer to buffer. */
+SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
+{
+  setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
+  return sb;
+}
+
+#if LJ_HASJIT
+/* Add number to buffer. */
+SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
+{
+  setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), o));
+  return sb;
+}
+#endif
+
+SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
+{
+  setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
+  return sb;
+}
+
+/* Add quoted string to buffer. */
+SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
+{
+  const char *s = strdata(str);
+  MSize len = str->len;
+  lj_buf_putb(sb, '"');
+  while (len--) {
+    uint32_t c = (uint32_t)(uint8_t)*s++;
+    char *p = lj_buf_more(sb, 4);
+    if (c == '"' || c == '\\' || c == '\n') {
+      *p++ = '\\';
+    } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
+      uint32_t d;
+      *p++ = '\\';
+      if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
+	*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
+	goto tens;
+      } else if (c >= 10) {
+      tens:
+	d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
+      }
+      c += '0';
+    }
+    *p++ = (char)c;
+    setsbufP(sb, p);
+  }
+  lj_buf_putb(sb, '"');
+  return sb;
+}
+
+/* -- Formatted conversions to buffer ------------------------------------- */
+
+/* Add formatted char to buffer. */
+SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
+{
+  MSize width = STRFMT_WIDTH(sf);
+  char *p = lj_buf_more(sb, width > 1 ? width : 1);
+  if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
+  while (width-- > 1) *p++ = ' ';
+  if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
+  setsbufP(sb, p);
+  return sb;
+}
+
+/* Add formatted string to buffer. */
+SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
+{
+  MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
+  MSize width = STRFMT_WIDTH(sf);
+  char *p = lj_buf_more(sb, width > len ? width : len);
+  if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
+  while (width-- > len) *p++ = ' ';
+  if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
+  setsbufP(sb, p);
+  return sb;
+}
+
+/* Add formatted signed/unsigned integer to buffer. */
+SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
+{
+  char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
+#ifdef LUA_USE_ASSERT
+  char *ps;
+#endif
+  MSize prefix = 0, len, prec, pprec, width, need;
+
+  /* Figure out signed prefixes. */
+  if (STRFMT_TYPE(sf) == STRFMT_INT) {
+    if ((int64_t)k < 0) {
+      k = (uint64_t)-(int64_t)k;
+      prefix = 256 + '-';
+    } else if ((sf & STRFMT_F_PLUS)) {
+      prefix = 256 + '+';
+    } else if ((sf & STRFMT_F_SPACE)) {
+      prefix = 256 + ' ';
+    }
+  }
+
+  /* Convert number and store to fixed-size buffer in reverse order. */
+  prec = STRFMT_PREC(sf);
+  if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
+  if (k == 0) {  /* Special-case zero argument. */
+    if (prec != 0 ||
+	(sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
+      *--q = '0';
+  } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) {  /* Decimal. */
+    uint32_t k2;
+    while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
+    k2 = (uint32_t)k;
+    do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
+  } else if ((sf & STRFMT_T_HEX)) {  /* Hex. */
+    const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
+						 "0123456789abcdef";
+    do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
+    if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
+  } else {  /* Octal. */
+    do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
+    if ((sf & STRFMT_F_ALT)) *--q = '0';
+  }
+
+  /* Calculate sizes. */
+  len = (MSize)(buf + sizeof(buf) - q);
+  if ((int32_t)len >= (int32_t)prec) prec = len;
+  width = STRFMT_WIDTH(sf);
+  pprec = prec + (prefix >> 8);
+  need = width > pprec ? width : pprec;
+  p = lj_buf_more(sb, need);
+#ifdef LUA_USE_ASSERT
+  ps = p;
+#endif
+
+  /* Format number with leading/trailing whitespace and zeros. */
+  if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
+    while (width-- > pprec) *p++ = ' ';
+  if (prefix) {
+    if ((char)prefix >= 'X') *p++ = '0';
+    *p++ = (char)prefix;
+  }
+  if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
+    while (width-- > pprec) *p++ = '0';
+  while (prec-- > len) *p++ = '0';
+  while (q < buf + sizeof(buf)) *p++ = *q++;  /* Add number itself. */
+  if ((sf & STRFMT_F_LEFT))
+    while (width-- > pprec) *p++ = ' ';
+
+  lua_assert(need == (MSize)(p - ps));
+  setsbufP(sb, p);
+  return sb;
+}
+
+/* Add number formatted as signed integer to buffer. */
+SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
+{
+  int64_t k = (int64_t)n;
+  if (checki32(k) && sf == STRFMT_INT)
+    return lj_strfmt_putint(sb, (int32_t)k);  /* Shortcut for plain %d. */
+  else
+    return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
+}
+
+/* Add number formatted as unsigned integer to buffer. */
+SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
+{
+  int64_t k;
+  if (n >= 9223372036854775808.0)
+    k = (int64_t)(n - 18446744073709551616.0);
+  else
+    k = (int64_t)n;
+  return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
+}
+
+/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */
+#define STRFMT_FMTNUMBUF	512
+
+/* Add formatted floating-point number to buffer. */
+SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
+{
+  TValue tv;
+  tv.n = n;
+  if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
+    /* Canonicalize output of non-finite values. */
+    MSize width = STRFMT_WIDTH(sf), len = 3;
+    int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
+    char *p;
+    if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) {
+      ch ^= ('n' << 16) | ('a' << 8) | 'n';
+      if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+    } else {
+      ch ^= ('i' << 16) | ('n' << 8) | 'f';
+      if ((tv.u32.hi & 0x80000000)) prefix = '-';
+      else if ((sf & STRFMT_F_PLUS)) prefix = '+';
+      else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+    }
+    if (prefix) len = 4;
+    p = lj_buf_more(sb, width > len ? width : len);
+    if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
+    if (prefix) *p++ = prefix;
+    *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
+    if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
+    setsbufP(sb, p);
+  } else {  /* Delegate to sprintf() for now. */
+    uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf);
+    char fmt[1+5+2+3+1+1], *p = fmt;
+    *p++ = '%';
+    if ((sf & STRFMT_F_LEFT)) *p++ = '-';
+    if ((sf & STRFMT_F_PLUS)) *p++ = '+';
+    if ((sf & STRFMT_F_ZERO)) *p++ = '0';
+    if ((sf & STRFMT_F_SPACE)) *p++ = ' ';
+    if ((sf & STRFMT_F_ALT)) *p++ = '#';
+    if (width) {
+      uint8_t x = width / 10, y = width % 10;
+      if (x) *p++ = '0' + x;
+      *p++ = '0' + y;
+    }
+    if (prec != 255) {
+      uint8_t x = prec / 10, y = prec % 10;
+      *p++ = '.';
+      if (x) *p++ = '0' + x;
+      *p++ = '0' + y;
+    }
+    *p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0);
+    *p = '\0';
+    p = lj_buf_more(sb, STRFMT_FMTNUMBUF);
+    setsbufP(sb, p + sprintf(p, fmt, n));
+  }
+  return sb;
+}
+
+/* -- Conversions to strings ---------------------------------------------- */
+
+/* Convert integer to string. */
+GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
+{
+  char buf[STRFMT_MAXBUF_INT];
+  MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
+  return lj_str_new(L, buf, len);
+}
+
+/* Convert number to string. */
+GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
+{
+  char buf[STRFMT_MAXBUF_NUM];
+  MSize len = (MSize)(lj_strfmt_wnum(buf, o) - buf);
+  return lj_str_new(L, buf, len);
+}
+
+/* Convert integer or number to string. */
+GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
+{
+  return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
+}
+
+#if LJ_HASJIT
+/* Convert char value to string. */
+GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
+{
+  char buf[1];
+  buf[0] = c;
+  return lj_str_new(L, buf, 1);
+}
+#endif
+
+/* Raw conversion of object to string. */
+GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
+{
+  if (tvisstr(o)) {
+    return strV(o);
+  } else if (tvisnumber(o)) {
+    return lj_strfmt_number(L, o);
+  } else if (tvisnil(o)) {
+    return lj_str_newlit(L, "nil");
+  } else if (tvisfalse(o)) {
+    return lj_str_newlit(L, "false");
+  } else if (tvistrue(o)) {
+    return lj_str_newlit(L, "true");
+  } else {
+    char buf[8+2+2+16], *p = buf;
+    p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
+    *p++ = ':'; *p++ = ' ';
+    if (tvisfunc(o) && isffunc(funcV(o))) {
+      p = lj_buf_wmem(p, "builtin#", 8);
+      p = lj_strfmt_wint(p, funcV(o)->c.ffid);
+    } else {
+      p = lj_strfmt_wptr(p, lj_obj_ptr(o));
+    }
+    return lj_str_new(L, buf, (size_t)(p - buf));
+  }
+}
+
+/* -- Internal string formatting ------------------------------------------ */
+
+/*
+** These functions are only used for lua_pushfstring(), lua_pushvfstring()
+** and for internal string formatting (e.g. error messages). Caveat: unlike
+** string.format(), only a limited subset of formats and flags are supported!
+**
+** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
+** - %d %u %o %x with full formatting, 32 bit integers only.
+** - %f and other FP formats are really %.14g.
+** - %s %c %p without formatting.
+*/
+
+/* Push formatted message as a string object to Lua stack. va_list variant. */
+const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
+{
+  SBuf *sb = lj_buf_tmp_(L);
+  FormatState fs;
+  SFormat sf;
+  GCstr *str;
+  lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    switch (STRFMT_TYPE(sf)) {
+    case STRFMT_LIT:
+      lj_buf_putmem(sb, fs.str, fs.len);
+      break;
+    case STRFMT_INT:
+      lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
+      break;
+    case STRFMT_UINT:
+      lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
+      break;
+    case STRFMT_NUM: {
+      TValue tv;
+      tv.n = va_arg(argp, lua_Number);
+      setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), &tv));
+      break;
+      }
+    case STRFMT_STR: {
+      const char *s = va_arg(argp, char *);
+      if (s == NULL) s = "(null)";
+      lj_buf_putmem(sb, s, (MSize)strlen(s));
+      break;
+      }
+    case STRFMT_CHAR:
+      lj_buf_putb(sb, va_arg(argp, int));
+      break;
+    case STRFMT_PTR:
+      lj_strfmt_putptr(sb, va_arg(argp, void *));
+      break;
+    case STRFMT_ERR:
+    default:
+      lj_buf_putb(sb, '?');
+      lua_assert(0);
+      break;
+    }
+  }
+  str = lj_buf_str(L, sb);
+  setstrV(L, L->top, str);
+  incr_top(L);
+  return strdata(str);
+}
+
+/* Push formatted message as a string object to Lua stack. Vararg variant. */
+const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
+{
+  const char *msg;
+  va_list argp;
+  va_start(argp, fmt);
+  msg = lj_strfmt_pushvf(L, fmt, argp);
+  va_end(argp);
+  return msg;
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strfmt.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strfmt.h
new file mode 100644
index 0000000000000000000000000000000000000000..dcfaf2e393b1a97e71a37a48d75888ee9f318f69
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strfmt.h
@@ -0,0 +1,125 @@
+/*
+** String formatting.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_STRFMT_H
+#define _LJ_STRFMT_H
+
+#include "lj_obj.h"
+
+typedef uint32_t SFormat;  /* Format indicator. */
+
+/* Format parser state. */
+typedef struct FormatState {
+  const uint8_t *p;	/* Current format string pointer. */
+  const uint8_t *e;	/* End of format string. */
+  const char *str;	/* Returned literal string. */
+  MSize len;		/* Size of literal string. */
+} FormatState;
+
+/* Format types (max. 16). */
+typedef enum FormatType {
+  STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
+  STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
+} FormatType;
+
+/* Format subtypes (bits are reused). */
+#define STRFMT_T_HEX	0x0010	/* STRFMT_UINT */
+#define STRFMT_T_OCT	0x0020	/* STRFMT_UINT */
+#define STRFMT_T_FP_A	0x0000	/* STRFMT_NUM */
+#define STRFMT_T_FP_E	0x0010	/* STRFMT_NUM */
+#define STRFMT_T_FP_F	0x0020	/* STRFMT_NUM */
+#define STRFMT_T_FP_G	0x0030	/* STRFMT_NUM */
+#define STRFMT_T_QUOTED	0x0010	/* STRFMT_STR */
+
+/* Format flags. */
+#define STRFMT_F_LEFT	0x0100
+#define STRFMT_F_PLUS	0x0200
+#define STRFMT_F_ZERO	0x0400
+#define STRFMT_F_SPACE	0x0800
+#define STRFMT_F_ALT	0x1000
+#define STRFMT_F_UPPER	0x2000
+
+/* Format indicator fields. */
+#define STRFMT_SH_WIDTH	16
+#define STRFMT_SH_PREC	24
+
+#define STRFMT_TYPE(sf)		((FormatType)((sf) & 15))
+#define STRFMT_WIDTH(sf)	(((sf) >> STRFMT_SH_WIDTH) & 255u)
+#define STRFMT_PREC(sf)		((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
+#define STRFMT_FP(sf)		(((sf) >> 4) & 3)
+
+/* Formats for conversion characters. */
+#define STRFMT_A	(STRFMT_NUM|STRFMT_T_FP_A)
+#define STRFMT_C	(STRFMT_CHAR)
+#define STRFMT_D	(STRFMT_INT)
+#define STRFMT_E	(STRFMT_NUM|STRFMT_T_FP_E)
+#define STRFMT_F	(STRFMT_NUM|STRFMT_T_FP_F)
+#define STRFMT_G	(STRFMT_NUM|STRFMT_T_FP_G)
+#define STRFMT_I	STRFMT_D
+#define STRFMT_O	(STRFMT_UINT|STRFMT_T_OCT)
+#define STRFMT_P	(STRFMT_PTR)
+#define STRFMT_Q	(STRFMT_STR|STRFMT_T_QUOTED)
+#define STRFMT_S	(STRFMT_STR)
+#define STRFMT_U	(STRFMT_UINT)
+#define STRFMT_X	(STRFMT_UINT|STRFMT_T_HEX)
+
+/* Maximum buffer sizes for conversions. */
+#define STRFMT_MAXBUF_XINT	(1+22)  /* '0' prefix + uint64_t in octal. */
+#define STRFMT_MAXBUF_INT	(1+10)  /* Sign + int32_t in decimal. */
+#define STRFMT_MAXBUF_NUM	LUAI_MAXNUMBER2STR
+#define STRFMT_MAXBUF_PTR	(2+2*sizeof(ptrdiff_t))  /* "0x" + hex ptr. */
+
+/* Format parser. */
+LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
+
+static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
+{
+  fs->p = (const uint8_t *)p;
+  fs->e = (const uint8_t *)p + len;
+  lua_assert(*fs->e == 0);  /* Must be NUL-terminated (may have NULs inside). */
+}
+
+/* Raw conversions. */
+LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
+LJ_FUNC char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o);
+LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
+LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
+LJ_FUNC const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp);
+
+/* Unformatted conversions to buffer. */
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
+#if LJ_HASJIT
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
+#endif
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
+
+/* Formatted conversions to buffer. */
+LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
+LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
+LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
+LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
+LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
+LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
+
+/* Conversions to strings. */
+LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
+LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
+LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
+#if LJ_HASJIT
+LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
+#endif
+LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
+
+/* Internal string formatting. */
+LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
+				     va_list argp);
+LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
+#ifdef __GNUC__
+  __attribute__ ((format (printf, 2, 3)))
+#endif
+  ;
+
+#endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_strscan.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strscan.c
similarity index 90%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_strscan.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strscan.c
index 568f647d6a935bbac3d66b34a2a77b8cec153f5e..d3c5ba9124fa1457f3688665317ed6df88bb167c 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_strscan.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strscan.c
@@ -140,7 +140,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
     break;
   }
 
-  /* Reduce range then convert to double. */
+  /* Reduce range, then convert to double. */
   if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
   strscan_double(x, o, ex2, neg);
   return fmt;
@@ -326,6 +326,49 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
   return fmt;
 }
 
+/* Parse binary number. */
+static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
+			      StrScanFmt fmt, uint32_t opt,
+			      int32_t ex2, int32_t neg, uint32_t dig)
+{
+  uint64_t x = 0;
+  uint32_t i;
+
+  if (ex2 || dig > 64) return STRSCAN_ERROR;
+
+  /* Scan binary digits. */
+  for (i = dig; i; i--, p++) {
+    if ((*p & ~1) != '0') return STRSCAN_ERROR;
+    x = (x << 1) | (*p & 1);
+  }
+
+  /* Format-specific handling. */
+  switch (fmt) {
+  case STRSCAN_INT:
+    if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
+      o->i = neg ? -(int32_t)x : (int32_t)x;
+      return STRSCAN_INT;  /* Fast path for 32 bit integers. */
+    }
+    if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
+    /* fallthrough */
+  case STRSCAN_U32:
+    if (dig > 32) return STRSCAN_ERROR;
+    o->i = neg ? -(int32_t)x : (int32_t)x;
+    return STRSCAN_U32;
+  case STRSCAN_I64:
+  case STRSCAN_U64:
+    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    return fmt;
+  default:
+    break;
+  }
+
+  /* Reduce range, then convert to double. */
+  if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
+  strscan_double(x, o, ex2, neg);
+  return fmt;
+}
+
 /* Scan string containing a number. Returns format. Returns value in o. */
 StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 {
@@ -364,8 +407,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
 
     /* Determine base and skip leading zeros. */
     if (LJ_UNLIKELY(*p <= '0')) {
-      if (*p == '0' && casecmp(p[1], 'x'))
-	base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
+      if (*p == '0') {
+	if (casecmp(p[1], 'x'))
+	  base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
+	else if (casecmp(p[1], 'b'))
+	  base = 2, cmask = LJ_CHAR_DIGIT, p += 2;
+      }
       for ( ; ; p++) {
 	if (*p == '0') {
 	  hasdig = 1;
@@ -403,7 +450,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
     }
 
     /* Parse exponent. */
-    if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
+    if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
       uint32_t xx;
       int negx = 0;
       fmt = STRSCAN_NUM; p++;
@@ -460,6 +507,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
       return strscan_oct(sp, o, fmt, neg, dig);
     if (base == 16)
       fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig);
+    else if (base == 2)
+      fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig);
     else
       fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_strscan.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strscan.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_strscan.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_strscan.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_tab.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_tab.c
similarity index 93%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_tab.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_tab.c
index a8062db7cf711f65d58809921e894813f9204fd2..88bf1089b5bf9354ddcb18f07f0590b6fc12392a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_tab.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_tab.c
@@ -29,7 +29,12 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
 #define hashlohi(t, lo, hi)	hashmask((t), hashrot((lo), (hi)))
 #define hashnum(t, o)		hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
 #define hashptr(t, p)		hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS)
+#if LJ_GC64
+#define hashgcref(t, r) \
+  hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
+#else
 #define hashgcref(t, r)		hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
+#endif
 
 /* Hash an arbitrary key and return its anchor position in the hash table. */
 static Node *hashkey(const GCtab *t, cTValue *key)
@@ -58,8 +63,8 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
     lj_err_msg(L, LJ_ERR_TABOV);
   hsize = 1u << hbits;
   node = lj_mem_newvec(L, hsize, Node);
-  setmref(node->freetop, &node[hsize]);
   setmref(t->node, node);
+  setfreetop(t, node, &node[hsize]);
   t->hmask = hsize-1;
 }
 
@@ -98,6 +103,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
   GCtab *t;
   /* First try to colocate the array part. */
   if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
+    Node *nilnode;
     lua_assert((sizeof(GCtab) & 7) == 0);
     t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
     t->gct = ~LJ_TTAB;
@@ -107,8 +113,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
     setgcrefnull(t->metatable);
     t->asize = asize;
     t->hmask = 0;
-    setmref(t->node, &G(L)->nilnode);
+    nilnode = &G(L)->nilnode;
+    setmref(t->node, nilnode);
+#if LJ_GC64
+    setmref(t->freetop, nilnode);
+#endif
   } else {  /* Otherwise separately allocate the array part. */
+    Node *nilnode;
     t = lj_mem_newobj(L, GCtab);
     t->gct = ~LJ_TTAB;
     t->nomm = (uint8_t)~0;
@@ -117,7 +128,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
     setgcrefnull(t->metatable);
     t->asize = 0;  /* In case the array allocation fails. */
     t->hmask = 0;
-    setmref(t->node, &G(L)->nilnode);
+    nilnode = &G(L)->nilnode;
+    setmref(t->node, nilnode);
+#if LJ_GC64
+    setmref(t->freetop, nilnode);
+#endif
     if (asize > 0) {
       if (asize > LJ_MAX_ASIZE)
 	lj_err_msg(L, LJ_ERR_TABOV);
@@ -149,6 +164,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
   return t;
 }
 
+/* The API of this function conforms to lua_createtable(). */
+GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h)
+{
+  return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h));
+}
+
 #if LJ_HASJIT
 GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
 {
@@ -185,7 +206,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
     Node *node = noderef(t->node);
     Node *knode = noderef(kt->node);
     ptrdiff_t d = (char *)node - (char *)knode;
-    setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d));
+    setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d));
     for (i = 0; i <= hmask; i++) {
       Node *kn = &knode[i];
       Node *n = &node[i];
@@ -198,6 +219,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
   return t;
 }
 
+/* Clear a table. */
+void LJ_FASTCALL lj_tab_clear(GCtab *t)
+{
+  clearapart(t);
+  if (t->hmask > 0) {
+    Node *node = noderef(t->node);
+    setfreetop(t, node, &node[t->hmask+1]);
+    clearhpart(t);
+  }
+}
+
 /* Free a table. */
 void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
 {
@@ -214,7 +246,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
 /* -- Table resizing ------------------------------------------------------ */
 
 /* Resize a table to fit the new array/hash part sizes. */
-static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
+void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
 {
   Node *oldnode = noderef(t->node);
   uint32_t oldasize = t->asize;
@@ -247,6 +279,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
   } else {
     global_State *g = G(L);
     setmref(t->node, &g->nilnode);
+#if LJ_GC64
+    setmref(t->freetop, &g->nilnode);
+#endif
     t->hmask = 0;
   }
   if (asize < oldasize) {  /* Array part shrinks? */
@@ -348,7 +383,7 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
   asize += countint(ek, bins);
   na = bestasize(bins, &asize);
   total -= na;
-  resizetab(L, t, asize, hsize2hbits(total));
+  lj_tab_resize(L, t, asize, hsize2hbits(total));
 }
 
 #if LJ_HASFFI
@@ -360,7 +395,7 @@ void lj_tab_rehash(lua_State *L, GCtab *t)
 
 void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
 {
-  resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
+  lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
 }
 
 /* -- Table getters ------------------------------------------------------- */
@@ -428,7 +463,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
   Node *n = hashkey(t, key);
   if (!tvisnil(&n->val) || t->hmask == 0) {
     Node *nodebase = noderef(t->node);
-    Node *collide, *freenode = noderef(nodebase->freetop);
+    Node *collide, *freenode = getfreetop(t, nodebase);
     lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1);
     do {
       if (freenode == nodebase) {  /* No free node found? */
@@ -436,7 +471,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
 	return lj_tab_set(L, t, key);  /* Retry key insertion. */
       }
     } while (!tvisnil(&(--freenode)->key));
-    setmref(nodebase->freetop, freenode);
+    setfreetop(t, nodebase, freenode);
     lua_assert(freenode != &G(L)->nilnode);
     collide = hashkey(t, &n->key);
     if (collide != n) {  /* Colliding node not the main node? */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_tab.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_tab.h
similarity index 89%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_tab.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_tab.h
index f0d228eb20a943c90c34c8c7ce4b9ed4994967cc..7cf031be9afb137a81b2dea3704af85bb306204b 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_tab.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_tab.h
@@ -34,14 +34,17 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
 #define hsize2hbits(s)	((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
 
 LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
+LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h);
 #if LJ_HASJIT
 LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
 #endif
 LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
+LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
 LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
 #if LJ_HASFFI
 LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
 #endif
+LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits);
 LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
 
 /* Caveat: all getters except lj_tab_get() can return NULL! */
@@ -53,7 +56,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
 /* Caveat: all setters require a write barrier for the stored value. */
 
 LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
-LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
+LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
 LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
 LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target.h
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_target.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target.h
index 1a242325f5b1675a82ec223cead1aab68adbab8b..0daecb11c863205d5029e7f3b41eb857f0f5f548 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target.h
@@ -138,6 +138,8 @@ typedef uint32_t RegCost;
 #include "lj_target_x86.h"
 #elif LJ_TARGET_ARM
 #include "lj_target_arm.h"
+#elif LJ_TARGET_ARM64
+#include "lj_target_arm64.h"
 #elif LJ_TARGET_PPC
 #include "lj_target_ppc.h"
 #elif LJ_TARGET_MIPS
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_arm.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_arm.h
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_target_arm.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_arm.h
index 6d4d0aed00e69268a3ff67e2aa0e445b41fff7b8..0a243b3740b50337001681a1fb3d048a5c33c9d5 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_arm.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_arm.h
@@ -243,10 +243,6 @@ typedef enum ARMIns {
   ARMI_VCVT_S32_F64 = 0xeebd0bc0,
   ARMI_VCVT_U32_F32 = 0xeebc0ac0,
   ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-  ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-  ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-  ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-  ARMI_VCVTR_U32_F64 = 0xeebc0b40,
   ARMI_VCVT_F32_S32 = 0xeeb80ac0,
   ARMI_VCVT_F64_S32 = 0xeeb80bc0,
   ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_arm64.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_arm64.h
new file mode 100644
index 0000000000000000000000000000000000000000..99e0adc9d6653d3eb858206a9456eed2e881be7b
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_arm64.h
@@ -0,0 +1,97 @@
+/*
+** Definitions for ARM64 CPUs.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_ARM64_H
+#define _LJ_TARGET_ARM64_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#define GPRDEF(_) \
+  _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
+  _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
+  _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
+  _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
+#define FPRDEF(_) \
+  _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
+  _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
+  _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
+  _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
+#define VRIDDEF(_)
+
+#define RIDENUM(name)	RID_##name,
+
+enum {
+  GPRDEF(RIDENUM)		/* General-purpose registers (GPRs). */
+  FPRDEF(RIDENUM)		/* Floating-point registers (FPRs). */
+  RID_MAX,
+  RID_TMP = RID_LR,
+  RID_ZERO = RID_SP,
+
+  /* Calling conventions. */
+  RID_RET = RID_X0,
+  RID_FPRET = RID_D0,
+
+  /* These definitions must match with the *.dasc file(s): */
+  RID_BASE = RID_X19,		/* Interpreter BASE. */
+  RID_LPC = RID_X21,		/* Interpreter PC. */
+  RID_GL = RID_X22,		/* Interpreter GL. */
+  RID_LREG = RID_X23,		/* Interpreter L. */
+
+  /* Register ranges [min, max) and number of registers. */
+  RID_MIN_GPR = RID_X0,
+  RID_MAX_GPR = RID_SP+1,
+  RID_MIN_FPR = RID_MAX_GPR,
+  RID_MAX_FPR = RID_D31+1,
+  RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+  RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
+};
+
+#define RID_NUM_KREF		RID_NUM_GPR
+#define RID_MIN_KREF		RID_X0
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Make use of all registers, except for x18, fp, lr and sp. */
+#define RSET_FIXED \
+  (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP))
+#define RSET_GPR	(RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
+#define RSET_FPR	RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
+#define RSET_ALL	(RSET_GPR|RSET_FPR)
+#define RSET_INIT	RSET_ALL
+
+/* lr is an implicit scratch register. */
+#define RSET_SCRATCH_GPR	(RSET_RANGE(RID_X0, RID_X17+1))
+#define RSET_SCRATCH_FPR \
+  (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
+#define RSET_SCRATCH		(RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
+#define REGARG_FIRSTGPR		RID_X0
+#define REGARG_LASTGPR		RID_X7
+#define REGARG_NUMGPR		8
+#define REGARG_FIRSTFPR		RID_D0
+#define REGARG_LASTFPR		RID_D7
+#define REGARG_NUMFPR		8
+
+/* -- Instructions -------------------------------------------------------- */
+
+/* Instruction fields. */
+#define A64F_D(r)	(r)
+#define A64F_N(r)       ((r) << 5)
+#define A64F_A(r)       ((r) << 10)
+#define A64F_M(r)       ((r) << 16)
+#define A64F_U16(x)	((x) << 5)
+#define A64F_S26(x)	(x)
+#define A64F_S19(x)	((x) << 5)
+
+typedef enum A64Ins {
+  A64I_MOVZw = 0x52800000,
+  A64I_MOVZx = 0xd2800000,
+  A64I_LDRLw = 0x18000000,
+  A64I_LDRLx = 0x58000000,
+  A64I_NOP = 0xd503201f,
+  A64I_B = 0x14000000,
+  A64I_BR = 0xd61f0000,
+} A64Ins;
+
+#endif
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_mips.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_mips.h
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_target_mips.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_mips.h
index 0ab140bf32741b217606fff0dfe3f4c53d6cb838..76645bcac5ea38ffd5d52aa74c1a2e423ff16c4e 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_mips.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_mips.h
@@ -169,6 +169,9 @@ typedef enum MIPSIns {
   MIPSI_SLTU = 0x0000002b,
   MIPSI_MOVZ = 0x0000000a,
   MIPSI_MOVN = 0x0000000b,
+  MIPSI_MFHI = 0x00000010,
+  MIPSI_MFLO = 0x00000012,
+  MIPSI_MULT = 0x00000018,
 
   MIPSI_SLL = 0x00000000,
   MIPSI_SRL = 0x00000002,
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_ppc.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_ppc.h
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_target_ppc.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_ppc.h
index 2caeeb044c7c73f9f5c0404310b0268288465269..99867688b7d3b208d49c5d2f75f4c2937fdb9bb3 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_ppc.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_ppc.h
@@ -104,7 +104,7 @@ enum {
 /* This definition must match with the *.dasc file(s). */
 typedef struct {
   lua_Number fpr[RID_NUM_FPR];	/* Floating-point registers. */
-  int32_t gpr[RID_NUM_GPR];	/* General-purpose registers. */
+  intptr_t gpr[RID_NUM_GPR];	/* General-purpose registers. */
   int32_t spill[256];		/* Spill slots. */
 } ExitState;
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_x86.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_x86.h
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_target_x86.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_x86.h
index d12a1b87c0018aa5c14ef171d49bb350f5efb55a..fc9d37024b011ba99ac92198105ad3e52b8be7e9 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_target_x86.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_target_x86.h
@@ -33,6 +33,7 @@ enum {
   RID_MRM = RID_MAX,		/* Pseudo-id for ModRM operand. */
 
   /* Calling conventions. */
+  RID_SP = RID_ESP,
   RID_RET = RID_EAX,
 #if LJ_64
   RID_FPRET = RID_XMM0,
@@ -131,7 +132,11 @@ enum {
 #define SPS_FIXED	(4*2)
 #define SPS_FIRST	(4*2)	/* Don't use callee register save area. */
 #else
+#if LJ_GC64
+#define SPS_FIXED	2
+#else
 #define SPS_FIXED	4
+#endif
 #define SPS_FIRST	2
 #endif
 #else
@@ -277,10 +282,8 @@ typedef enum {
   XO_ROUNDSD =	0x0b3a0ffc,  /* Really 66 0f 3a 0b. See asm_fpmath. */
   XO_UCOMISD =	XO_660f(2e),
   XO_CVTSI2SD =	XO_f20f(2a),
-  XO_CVTSD2SI =	XO_f20f(2d),
   XO_CVTTSD2SI=	XO_f20f(2c),
   XO_CVTSI2SS =	XO_f30f(2a),
-  XO_CVTSS2SI =	XO_f30f(2d),
   XO_CVTTSS2SI=	XO_f30f(2c),
   XO_CVTSS2SD =	XO_f30f(5a),
   XO_CVTSD2SS =	XO_f20f(5a),
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_trace.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_trace.c
similarity index 92%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_trace.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_trace.c
index e51ec5467a01dbde857203dbe9fc94eadba143f8..42f4321d5d2f7520f90c5609945c1ef977531578 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_trace.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_trace.c
@@ -117,15 +117,22 @@ static void perftools_addtrace(GCtrace *T)
 }
 #endif
 
-/* Save current trace by copying and compacting it. */
-static void trace_save(jit_State *J)
+/* Allocate space for copy of trace. */
+static GCtrace *trace_save_alloc(jit_State *J)
 {
   size_t sztr = ((sizeof(GCtrace)+7)&~7);
   size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns);
   size_t sz = sztr + szins +
 	      J->cur.nsnap*sizeof(SnapShot) +
 	      J->cur.nsnapmap*sizeof(SnapEntry);
-  GCtrace *T = lj_mem_newt(J->L, (MSize)sz, GCtrace);
+  return lj_mem_newt(J->L, (MSize)sz, GCtrace);
+}
+
+/* Save current trace by copying and compacting it. */
+static void trace_save(jit_State *J, GCtrace *T)
+{
+  size_t sztr = ((sizeof(GCtrace)+7)&~7);
+  size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns);
   char *p = (char *)T + sztr;
   memcpy(T, &J->cur, sizeof(GCtrace));
   setgcrefr(T->nextgc, J2G(J)->gc.root);
@@ -360,7 +367,7 @@ static void trace_start(jit_State *J)
   TraceNo traceno;
 
   if ((J->pt->flags & PROTO_NOJIT)) {  /* JIT disabled for this proto? */
-    if (J->parent == 0) {
+    if (J->parent == 0 && J->exitno == 0) {
       /* Lazy bytecode patching to disable hotcount events. */
       lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
 		 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
@@ -394,6 +401,7 @@ static void trace_start(jit_State *J)
   J->guardemit.irt = 0;
   J->postproc = LJ_POST_NONE;
   lj_resetsplit(J);
+  J->retryrec = 0;
   setgcref(J->cur.startpt, obj2gco(J->pt));
 
   L = J->L;
@@ -417,6 +425,7 @@ static void trace_stop(jit_State *J)
   BCOp op = bc_op(J->cur.startins);
   GCproto *pt = &gcref(J->cur.startpt)->pt;
   TraceNo traceno = J->cur.traceno;
+  GCtrace *T = trace_save_alloc(J);  /* Do this first. May throw OOM. */
   lua_State *L;
 
   switch (op) {
@@ -453,6 +462,12 @@ static void trace_stop(jit_State *J)
       root->nextside = (TraceNo1)traceno;
     }
     break;
+  case BC_CALLM:
+  case BC_CALL:
+  case BC_ITERC:
+    /* Trace stitching: patch link of previous trace. */
+    traceref(J, J->exitno)->link = traceno;
+    break;
   default:
     lua_assert(0);
     break;
@@ -461,12 +476,13 @@ static void trace_stop(jit_State *J)
   /* Commit new mcode only after all patching is done. */
   lj_mcode_commit(J, J->cur.mcode);
   J->postproc = LJ_POST_NONE;
-  trace_save(J);
+  trace_save(J, T);
 
   L = J->L;
   lj_vmevent_send(L, TRACE,
     setstrV(L, L->top++, lj_str_newlit(L, "stop"));
     setintV(L->top++, traceno);
+    setfuncV(L, L->top++, J->fn);
   );
 }
 
@@ -502,8 +518,17 @@ static int trace_abort(jit_State *J)
     return 1;  /* Retry ASM with new MCode area. */
   }
   /* Penalize or blacklist starting bytecode instruction. */
-  if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins)))
-    penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
+  if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
+    if (J->exitno == 0) {
+      BCIns *startpc = mref(J->cur.startpc, BCIns);
+      if (e == LJ_TRERR_RETRY)
+	hotcount_set(J2GG(J), startpc+1, 1);  /* Immediate retry. */
+      else
+	penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e);
+    } else {
+      traceref(J, J->exitno)->link = J->exitno;  /* Self-link is blacklisted. */
+    }
+  }
 
   /* Is there anything to abort? */
   traceno = J->cur.traceno;
@@ -672,6 +697,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
 {
   SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
   if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
+      isluafunc(curr_func(J->L)) &&
       snap->count != SNAPCOUNT_DONE &&
       ++snap->count >= J->param[JIT_P_hotexit]) {
     lua_assert(J->state == LJ_TRACE_IDLE);
@@ -681,6 +707,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
   }
 }
 
+/* Stitch a new trace to the previous trace. */
+void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
+{
+  /* Only start a new trace if not recording or inside __gc call or vmevent. */
+  if (J->state == LJ_TRACE_IDLE &&
+      !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
+    J->parent = 0;  /* Have to treat it like a root trace. */
+    /* J->exitno is set to the invoking trace. */
+    J->state = LJ_TRACE_START;
+    lj_trace_ins(J, pc);
+  }
+}
+
+
 /* Tiny struct to pass data to protected call. */
 typedef struct ExitDataCP {
   jit_State *J;
@@ -767,17 +807,20 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   if (errcode)
     return -errcode;  /* Return negated error code. */
 
-  lj_vmevent_send(L, TEXIT,
-    lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
-    setintV(L->top++, J->parent);
-    setintV(L->top++, J->exitno);
-    trace_exit_regs(L, ex);
-  );
+  if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
+    lj_vmevent_send(L, TEXIT,
+      lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
+      setintV(L->top++, J->parent);
+      setintV(L->top++, J->exitno);
+      trace_exit_regs(L, ex);
+    );
 
   pc = exd.pc;
   cf = cframe_raw(L->cframe);
   setcframe_pc(cf, pc);
-  if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
+  if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
+    /* Just exit to interpreter. */
+  } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
     if (!(G(L)->hookmask & HOOK_GC))
       lj_gc_step(L);  /* Exited because of GC: drive GC forward. */
   } else {
@@ -801,7 +844,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
   ERRNO_RESTORE
   switch (bc_op(*pc)) {
   case BC_CALLM: case BC_CALLMT:
-    return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc));
+    return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) + LJ_FR2);
   case BC_RETM:
     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
   case BC_TSETM:
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_trace.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_trace.h
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_trace.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_trace.h
index 4fbe5cf257a396f76e69b231222449c1c489a70d..9eaf91b0e33495563966e669c4ab3fb118f42870 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_trace.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_trace.h
@@ -34,6 +34,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
 /* Event handling. */
 LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
 LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
+LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
 LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
 
 /* Signal asynchronous abort of trace or end of trace. */
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_traceerr.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_traceerr.h
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_traceerr.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_traceerr.h
index 9bef117aba1293c49980b6030b777964ecc60a6e..12e90d0395c572090ebbc8fbdb0eae045a1d9d4d 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_traceerr.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_traceerr.h
@@ -7,10 +7,12 @@
 
 /* Recording. */
 TREDEF(RECERR,	"error thrown or hook called during recording")
+TREDEF(TRACEUV,	"trace too short")
 TREDEF(TRACEOV,	"trace too long")
 TREDEF(STACKOV,	"trace too deep")
 TREDEF(SNAPOV,	"too many snapshots")
 TREDEF(BLACKL,	"blacklisted")
+TREDEF(RETRY,	"retry recording")
 TREDEF(NYIBC,	"NYI: bytecode %d")
 
 /* Recording loop ops. */
@@ -23,7 +25,7 @@ TREDEF(BADTYPE,	"bad argument type")
 TREDEF(CJITOFF,	"JIT compilation disabled for function")
 TREDEF(CUNROLL,	"call unroll limit reached")
 TREDEF(DOWNREC,	"down-recursion, restarting")
-TREDEF(NYICF,	"NYI: C function %p")
+TREDEF(NYICF,	"NYI: C function %s")
 TREDEF(NYIFF,	"NYI: FastFunc %s")
 TREDEF(NYIFFU,	"NYI: unsupported variant of FastFunc %s")
 TREDEF(NYIRETL,	"NYI: return to lower frame")
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_udata.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_udata.c
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_udata.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_udata.c
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_udata.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_udata.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_udata.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_udata.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_vm.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vm.h
similarity index 91%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_vm.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vm.h
index 036cabc57fe27a4bac872313659603783df25068..b31e22f70f7c60d4022c45e3f94c2abaa3387597 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_vm.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vm.h
@@ -43,13 +43,14 @@ LJ_ASMF void lj_vm_record(void);
 LJ_ASMF void lj_vm_inshook(void);
 LJ_ASMF void lj_vm_rethook(void);
 LJ_ASMF void lj_vm_callhook(void);
+LJ_ASMF void lj_vm_profhook(void);
 
 /* Trace exit handling. */
 LJ_ASMF void lj_vm_exit_handler(void);
 LJ_ASMF void lj_vm_exit_interp(void);
 
 /* Internal math helper functions. */
-#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
 #define lj_vm_floor	floor
 #define lj_vm_ceil	ceil
 #else
@@ -60,7 +61,7 @@ LJ_ASMF double lj_vm_floor_sf(double);
 LJ_ASMF double lj_vm_ceil_sf(double);
 #endif
 #endif
-#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
+#ifdef LUAJIT_NO_LOG2
 LJ_ASMF double lj_vm_log2(double);
 #else
 #define lj_vm_log2	log2
@@ -71,12 +72,12 @@ LJ_ASMF double lj_vm_log2(double);
 LJ_ASMF void lj_vm_floor_sse(void);
 LJ_ASMF void lj_vm_ceil_sse(void);
 LJ_ASMF void lj_vm_trunc_sse(void);
-LJ_ASMF void lj_vm_exp_x87(void);
-LJ_ASMF void lj_vm_exp2_x87(void);
-LJ_ASMF void lj_vm_pow_sse(void);
 LJ_ASMF void lj_vm_powi_sse(void);
+#define lj_vm_powi	NULL
 #else
-#if LJ_TARGET_PPC
+LJ_ASMF double lj_vm_powi(double, int32_t);
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
 #define lj_vm_trunc	trunc
 #else
 LJ_ASMF double lj_vm_trunc(double);
@@ -84,13 +85,11 @@ LJ_ASMF double lj_vm_trunc(double);
 LJ_ASMF double lj_vm_trunc_sf(double);
 #endif
 #endif
-LJ_ASMF double lj_vm_powi(double, int32_t);
 #ifdef LUAJIT_NO_EXP2
 LJ_ASMF double lj_vm_exp2(double);
 #else
 #define lj_vm_exp2	exp2
 #endif
-#endif
 LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
 #if LJ_HASFFI
 LJ_ASMF int lj_vm_errno(void);
@@ -104,8 +103,7 @@ LJ_ASMF void lj_cont_nop(void);  /* Do nothing, just continue execution. */
 LJ_ASMF void lj_cont_condt(void);  /* Branch if result is true. */
 LJ_ASMF void lj_cont_condf(void);  /* Branch if result is false. */
 LJ_ASMF void lj_cont_hook(void);  /* Continue from hook yield. */
-
-enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
+LJ_ASMF void lj_cont_stitch(void);  /* Trace stitching. */
 
 /* Start of the ASM code. */
 LJ_ASMF char lj_vm_asm_begin[];
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_vmevent.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmevent.c
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_vmevent.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmevent.c
index 81fe47d4dc9c37d018d4cb77454d7c296b99ec90..87ebcfbd2864ae36f26884fcf855489c59c4b78d 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_vmevent.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmevent.c
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
     if (tv && tvisfunc(tv)) {
       lj_state_checkstack(L, LUA_MINSTACK);
       setfuncV(L, L->top++, funcV(tv));
+      if (LJ_FR2) setnilV(L->top++);
       return savestack(L, L->top);
     }
   }
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_vmevent.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmevent.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_vmevent.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmevent.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lj_vmmath.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmmath.c
similarity index 71%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lj_vmmath.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmmath.c
index 31c6029fce19b00d51faf4c2695502a22bd87891..ecad2950d93f52aa6e433ae52afc0678ea951e4c 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/lj_vmmath.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lj_vmmath.c
@@ -13,16 +13,29 @@
 #include "lj_ir.h"
 #include "lj_vm.h"
 
-/* -- Helper functions for generated machine code ------------------------- */
+/* -- Wrapper functions --------------------------------------------------- */
 
-#if LJ_TARGET_X86ORX64
-/* Wrapper functions to avoid linker issues on OSX. */
-LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); }
-LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); }
-LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); }
+#if LJ_TARGET_X86 && __ELF__ && __PIC__
+/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
+LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
+LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
+LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
+LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
+LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
+LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
+LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
+LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
+LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
+LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
+LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
+LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
+LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
+LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
+LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
 #endif
 
-#if !LJ_TARGET_X86ORX64
+/* -- Helper functions for generated machine code ------------------------- */
+
 double lj_vm_foldarith(double x, double y, int op)
 {
   switch (op) {
@@ -43,7 +56,6 @@ double lj_vm_foldarith(double x, double y, int op)
   default: return x;
   }
 }
-#endif
 
 #if LJ_HASJIT
 
@@ -61,7 +73,7 @@ double lj_vm_exp2(double a)
 }
 #endif
 
-#if !(LJ_TARGET_ARM || LJ_TARGET_PPC)
+#if !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)
 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
 {
   uint32_t y, ua, ub;
@@ -107,6 +119,7 @@ double lj_vm_powi(double x, int32_t k)
   else
     return 1.0 / lj_vm_powui(x, (uint32_t)-k);
 }
+#endif
 
 /* Computes fpm(x) for extended math functions. */
 double lj_vm_foldfpm(double x, int fpm)
@@ -128,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm)
   }
   return 0;
 }
-#endif
 
 #if LJ_HASFFI
 int lj_vm_errno(void)
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/ljamalg.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/ljamalg.c
similarity index 97%
rename from source/libs/luajit/LuaJIT-2.0.4/src/ljamalg.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/ljamalg.c
index 9b237b7e9d234bb71cd4d6bf3b870e672fddc633..be0c52d743efe79d3b8841a147e12afa5b37f2e2 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/ljamalg.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/ljamalg.c
@@ -33,6 +33,7 @@
 #include "lj_char.c"
 #include "lj_bc.c"
 #include "lj_obj.c"
+#include "lj_buf.c"
 #include "lj_str.c"
 #include "lj_tab.c"
 #include "lj_func.c"
@@ -44,7 +45,9 @@
 #include "lj_vmevent.c"
 #include "lj_vmmath.c"
 #include "lj_strscan.c"
+#include "lj_strfmt.c"
 #include "lj_api.c"
+#include "lj_profile.c"
 #include "lj_lex.c"
 #include "lj_parse.c"
 #include "lj_bcread.c"
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lua.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lua.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lua.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lua.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lua.hpp b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lua.hpp
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lua.hpp
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lua.hpp
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/luaconf.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/luaconf.h
similarity index 99%
rename from source/libs/luajit/LuaJIT-2.0.4/src/luaconf.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/luaconf.h
index 84fa6418766a2e3639c2d39240e415ec38782640..79f514825f87394d8971fda3e8b7a0bb16cef99f 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/luaconf.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/luaconf.h
@@ -37,7 +37,7 @@
 #endif
 #define LUA_LROOT	"/usr/local"
 #define LUA_LUADIR	"/lua/5.1/"
-#define LUA_LJDIR	"/luajit-2.0.4/"
+#define LUA_LJDIR	"/luajit-2.1.0-beta1/"
 
 #ifdef LUA_ROOT
 #define LUA_JROOT	LUA_ROOT
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/luajit.c b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/luajit.c
similarity index 98%
rename from source/libs/luajit/LuaJIT-2.0.4/src/luajit.c
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/luajit.c
index 85d713fb42142bc50c00b6178668c212618e134f..0ebc73005f864a0106eb265dd1dd968543b1825a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/luajit.c
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/luajit.c
@@ -61,8 +61,9 @@ static void laction(int i)
 
 static void print_usage(void)
 {
-  fprintf(stderr,
-  "usage: %s [options]... [script [args]...].\n"
+  fputs("usage: ", stderr);
+  fputs(progname, stderr);
+  fputs(" [options]... [script [args]...].\n"
   "Available options are:\n"
   "  -e chunk  Execute string " LUA_QL("chunk") ".\n"
   "  -l name   Require library " LUA_QL("name") ".\n"
@@ -73,16 +74,14 @@ static void print_usage(void)
   "  -v        Show version information.\n"
   "  -E        Ignore environment variables.\n"
   "  --        Stop handling options.\n"
-  "  -         Execute stdin and stop handling options.\n"
-  ,
-  progname);
+  "  -         Execute stdin and stop handling options.\n", stderr);
   fflush(stderr);
 }
 
 static void l_message(const char *pname, const char *msg)
 {
-  if (pname) fprintf(stderr, "%s: ", pname);
-  fprintf(stderr, "%s\n", msg);
+  if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); }
+  fputs(msg, stderr); fputc('\n', stderr);
   fflush(stderr);
 }
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/luajit.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/luajit.h
similarity index 81%
rename from source/libs/luajit/LuaJIT-2.0.4/src/luajit.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/luajit.h
index 9ced18eb8262d6f85349deb6f29ed7729efe70c5..960418519ee6a419bb0f800f7a284cc680500927 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/luajit.h
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/luajit.h
@@ -30,9 +30,9 @@
 
 #include "lua.h"
 
-#define LUAJIT_VERSION		"LuaJIT 2.0.4"
-#define LUAJIT_VERSION_NUM	20004  /* Version 2.0.4 = 02.00.04. */
-#define LUAJIT_VERSION_SYM	luaJIT_version_2_0_4
+#define LUAJIT_VERSION		"LuaJIT 2.1.0-beta1"
+#define LUAJIT_VERSION_NUM	20100  /* Version 2.1.0 = 02.01.00. */
+#define LUAJIT_VERSION_SYM	luaJIT_version_2_1_0_beta1
 #define LUAJIT_COPYRIGHT	"Copyright (C) 2005-2015 Mike Pall"
 #define LUAJIT_URL		"http://luajit.org/"
 
@@ -64,6 +64,15 @@ enum {
 /* Control the JIT engine. */
 LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
 
+/* Low-overhead profiling API. */
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+					int samples, int vmstate);
+LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
+				  luaJIT_profile_callback cb, void *data);
+LUA_API void luaJIT_profile_stop(lua_State *L);
+LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+					     int depth, size_t *len);
+
 /* Enforce (dynamic) linker error for version mismatches. Call from main. */
 LUA_API void LUAJIT_VERSION_SYM(void);
 
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/lualib.h b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/lualib.h
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/lualib.h
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/lualib.h
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/msvcbuild.bat b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/msvcbuild.bat
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/msvcbuild.bat
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/msvcbuild.bat
index 4b501855ad252855d303d1edd814bc1d42255dc1..0360d7e33b8487b5d9ca60ffa133eb9f3af02f66 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/msvcbuild.bat
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/msvcbuild.bat
@@ -37,6 +37,7 @@ if exist minilua.exe.manifest^
 @if errorlevel 8 goto :X64
 @set DASMFLAGS=-D WIN -D JIT -D FFI
 @set LJARCH=x86
+@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
 :X64
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
 @if errorlevel 1 goto :BAD
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/ps4build.bat b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/ps4build.bat
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/ps4build.bat
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/ps4build.bat
index 42fc9a64a99b815790ef947756ab67873d2d568e..337a44fa72d44f11819339c643c3217ab222b14a 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/ps4build.bat
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/ps4build.bat
@@ -27,11 +27,11 @@ if exist minilua.exe.manifest^
 @minilua
 @if not errorlevel 8 goto :FAIL
 
-@set DASMFLAGS=-D P64
+@set DASMFLAGS=-D P64 -D NO_UNWIND
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
 @if errorlevel 1 goto :BAD
 
-%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c
+%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj
 @if errorlevel 1 goto :BAD
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/psvitabuild.bat b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/psvitabuild.bat
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/psvitabuild.bat
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/psvitabuild.bat
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/vm_arm.dasc b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_arm.dasc
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/vm_arm.dasc
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_arm.dasc
index 457efa63ce2725fc2b88052358c42971a3da665a..0bd9b147ad39e1078c5effab7b1945227c737645 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/vm_arm.dasc
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_arm.dasc
@@ -99,6 +99,7 @@
 |.type NODE,		Node
 |.type NARGS8,		int
 |.type TRACE,		GCtrace
+|.type SBUF,		SBuf
 |
 |//-----------------------------------------------------------------------
 |
@@ -418,13 +419,14 @@ static void build_subroutines(BuildCtx *ctx)
   |    add CARG2, sp, #CFRAME_RESUME
   |  ldrb CARG1, L->status
   |   str CARG3, SAVE_ERRF
-  |    str CARG2, L->cframe
+  |   str L, SAVE_PC			// Any value outside of bytecode is ok.
   |   str CARG3, SAVE_CFRAME
   |  cmp CARG1, #0
-  |   str L, SAVE_PC			// Any value outside of bytecode is ok.
+  |    str CARG2, L->cframe
   |  beq >3
   |
   |  // Resume after yield (like a return).
+  |  str L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |  mov RA, BASE
   |   ldr BASE, L->base
   |   ldr CARG1, L->top
@@ -458,14 +460,15 @@ static void build_subroutines(BuildCtx *ctx)
   |   str CARG3, SAVE_NRES
   |    mov L, CARG1
   |   str CARG1, SAVE_L
-  |    mov BASE, CARG2
-  |  str sp, L->cframe			// Add our C frame to cframe chain.
   |    ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |     mov BASE, CARG2
   |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |  str RC, SAVE_CFRAME
   |    add DISPATCH, DISPATCH, #GG_G2DISP
+  |  str sp, L->cframe			// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  str L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |  ldr RB, L->base			// RB = old base (for vmeta_call).
   |   ldr CARG1, L->top
   |    mov MASKR8, #255
@@ -491,20 +494,21 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L, CARG1
   |   ldr RA, L:CARG1->stack
   |  str CARG1, SAVE_L
+  |    ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
   |   ldr RB, L->top
   |  str CARG1, SAVE_PC			// Any value outside of bytecode is ok.
   |  ldr RC, L->cframe
+  |    add DISPATCH, DISPATCH, #GG_G2DISP
   |   sub RA, RA, RB			// Compute -savestack(L, L->top).
-  |  str sp, L->cframe			// Add our C frame to cframe chain.
   |  mov RB, #0
   |   str RA, SAVE_NRES			// Neg. delta means cframe w/o frame.
   |  str RB, SAVE_ERRF			// No error function.
   |  str RC, SAVE_CFRAME
+  |  str sp, L->cframe			// Add our C frame to cframe chain.
+  |    str L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |  blx CARG4			// (lua_State *L, lua_CFunction func, void *ud)
-  |   ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
   |  movs BASE, CRET1
-  |    mov PC, #FRAME_CP
-  |   add DISPATCH, DISPATCH, #GG_G2DISP
+  |   mov PC, #FRAME_CP
   |  bne <3				// Else continue with the call.
   |  b ->vm_leave_cp			// No base? Just remove C frame.
   |
@@ -614,6 +618,16 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
   |  b ->vm_call_dispatch_f
   |
+  |->vmeta_tgetr:
+  |  .IOS mov RC, BASE
+  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // Returns cTValue * or NULL.
+  |  .IOS mov BASE, RC
+  |  cmp CRET1, #0
+  |  ldrdne CARG12, [CRET1]
+  |  mvneq CARG2, #~LJ_TNIL
+  |  b ->BC_TGETR_Z
+  |
   |//-----------------------------------------------------------------------
   |
   |->vmeta_tsets1:
@@ -671,6 +685,15 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
   |  b ->vm_call_dispatch_f
   |
+  |->vmeta_tsetr:
+  |  str BASE, L->base
+  |  .IOS mov RC, BASE
+  |  str PC, SAVE_PC
+  |  bl extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
+  |  // Returns TValue *.
+  |  .IOS mov BASE, RC
+  |  b ->BC_TSETR_Z
+  |
   |//-- Comparison metamethods ---------------------------------------------
   |
   |->vmeta_comp:
@@ -735,6 +758,17 @@ static void build_subroutines(BuildCtx *ctx)
   |  b <3
   |.endif
   |
+  |->vmeta_istype:
+  |  sub PC, PC, #4
+  |   str BASE, L->base
+  |   mov CARG1, L
+  |   lsr CARG2, RA, #3
+  |   mov CARG3, RC
+  |  str PC, SAVE_PC
+  |  bl extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
+  |  .IOS ldr BASE, L->base
+  |  b ->cont_nop
+  |
   |//-- Arithmetic metamethods ---------------------------------------------
   |
   |->vmeta_arith_vn:
@@ -1052,7 +1086,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  ffgccheck
   |  mov CARG1, L
   |  mov CARG2, BASE
-  |  bl extern lj_str_fromnumber	// (lua_State *L, cTValue *o)
+  |  bl extern lj_strfmt_number		// (lua_State *L, cTValue *o)
   |  // Returns GCstr *.
   |  ldr BASE, L->base
   |  mvn CARG2, #~LJ_TSTR
@@ -1230,9 +1264,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr CARG3, L:RA->base
   |    mv_vmstate CARG2, INTERP
   |  ldr CARG4, L:RA->top
-  |    st_vmstate CARG2
   |   cmp CRET1, #LUA_YIELD
   |  ldr BASE, L->base
+  |    str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+  |    st_vmstate CARG2
   |   bhi >8
   |  subs RC, CARG4, CARG3
   |   ldr CARG1, L->maxstack
@@ -1500,19 +1535,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
-  |->ff_math_deg:
-  |.if FPU
-  |  .ffunc_d math_rad
-  |  vldr d1, CFUNC:CARG3->upvalue[0]
-  |  vmul.f64 d0, d0, d1
-  |  b ->fff_resd
-  |.else
-  |  .ffunc_n math_rad
-  |  ldrd CARG34, CFUNC:CARG3->upvalue[0]
-  |  bl extern __aeabi_dmul
-  |  b ->fff_restv
-  |.endif
-  |
   |.if HFABI
   |  .ffunc math_ldexp
   |  ldr CARG4, [BASE, #4]
@@ -1687,12 +1709,6 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |//-- String library -----------------------------------------------------
   |
-  |.ffunc_1 string_len
-  |  checkstr CARG2, ->fff_fallback
-  |  ldr CARG1, STR:CARG1->len
-  |  mvn CARG2, #~LJ_TISNUM
-  |  b ->fff_restv
-  |
   |.ffunc string_byte			// Only handle the 1-arg case here.
   |  ldrd CARG12, [BASE]
   |    ldr PC, [BASE, FRAME_PC]
@@ -1725,6 +1741,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CARG1, L
   |   str PC, SAVE_PC
   |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
+  |->fff_resstr:
   |  // Returns GCstr *.
   |  ldr BASE, L->base
   |   mvn CARG2, #~LJ_TSTR
@@ -1768,91 +1785,28 @@ static void build_subroutines(BuildCtx *ctx)
   |  mvn CARG2, #~LJ_TSTR
   |  b ->fff_restv
   |
-  |.ffunc string_rep			// Only handle the 1-char case inline.
-  |  ffgccheck
-  |  ldrd CARG12, [BASE]
-  |   ldrd CARG34, [BASE, #8]
-  |    cmp NARGS8:RC, #16
-  |    bne ->fff_fallback		// Exactly 2 arguments
-  |  checktp CARG2, LJ_TSTR
-  |   checktpeq CARG4, LJ_TISNUM
-  |   bne ->fff_fallback
-  |  subs CARG4, CARG3, #1
-  |   ldr CARG2, STR:CARG1->len
-  |  blt ->fff_emptystr			// Count <= 0?
-  |   cmp CARG2, #1
-  |   blo ->fff_emptystr		// Zero-length string?
-  |   bne ->fff_fallback		// Fallback for > 1-char strings.
-  |  ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
-  |   ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
-  |   ldr CARG1, STR:CARG1[1]
-  |  cmp RB, CARG3
-  |  blo ->fff_fallback
-  |1:  // Fill buffer with char.
-  |   strb CARG1, [CARG2, CARG4]
-  |  subs CARG4, CARG4, #1
-  |  bge <1
-  |  b ->fff_newstr
-  |
-  |.ffunc string_reverse
+  |.macro ffstring_op, name
+  |  .ffunc string_ .. name
   |  ffgccheck
-  |  ldrd CARG12, [BASE]
+  |  ldr CARG3, [BASE, #4]
   |   cmp NARGS8:RC, #8
+  |  ldr STR:CARG2, [BASE]
   |   blo ->fff_fallback
-  |  checkstr CARG2, ->fff_fallback
-  |  ldr CARG3, STR:CARG1->len
-  |   ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
-  |    ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
-  |  mov CARG4, CARG3
-  |  add CARG1, STR:CARG1, #sizeof(GCstr)
-  |   cmp RB, CARG3
-  |   blo ->fff_fallback
-  |1:  // Reverse string copy.
-  |  ldrb RB, [CARG1], #1
-  |   subs CARG4, CARG4, #1
-  |   blt ->fff_newstr
-  |  strb RB, [CARG2, CARG4]
-  |  b <1
-  |
-  |.macro ffstring_case, name, lo
-  |  .ffunc name
-  |  ffgccheck
-  |  ldrd CARG12, [BASE]
-  |   cmp NARGS8:RC, #8
-  |   blo ->fff_fallback
-  |  checkstr CARG2, ->fff_fallback
-  |  ldr CARG3, STR:CARG1->len
-  |   ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
-  |    ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
-  |  mov CARG4, #0
-  |  add CARG1, STR:CARG1, #sizeof(GCstr)
-  |   cmp RB, CARG3
-  |   blo ->fff_fallback
-  |1:  // ASCII case conversion.
-  |  ldrb RB, [CARG1, CARG4]
-  |   cmp CARG4, CARG3
-  |   bhs ->fff_newstr
-  |  sub RC, RB, #lo
-  |  cmp RC, #26
-  |  eorlo RB, RB, #0x20
-  |  strb RB, [CARG2, CARG4]
-  |   add CARG4, CARG4, #1
-  |  b <1
+  |  sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
+  |  checkstr CARG3, ->fff_fallback
+  |  ldr CARG4, SBUF:CARG1->b
+  |   str BASE, L->base
+  |   str PC, SAVE_PC
+  |   str L, SBUF:CARG1->L
+  |  str CARG4, SBUF:CARG1->p
+  |  bl extern lj_buf_putstr_ .. name
+  |  bl extern lj_buf_tostr
+  |  b ->fff_resstr
   |.endmacro
   |
-  |ffstring_case string_lower, 65
-  |ffstring_case string_upper, 97
-  |
-  |//-- Table library ------------------------------------------------------
-  |
-  |.ffunc_1 table_getn
-  |  checktab CARG2, ->fff_fallback
-  |  .IOS mov RA, BASE
-  |  bl extern lj_tab_len		// (GCtab *t)
-  |  // Returns uint32_t (but less than 2^31).
-  |  .IOS mov BASE, RA
-  |  mvn CARG2, #~LJ_TISNUM
-  |  b ->fff_restv
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
   |
   |//-- Bit library --------------------------------------------------------
   |
@@ -2126,6 +2080,69 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
   |   ldr INS, [PC, #-4]
   |  bx CRET1
+  |
+  |->cont_stitch:			// Trace stitching.
+  |.if JIT
+  |  // RA = resultptr, CARG4 = meta base
+  |   ldr RB, SAVE_MULTRES
+  |  ldr INS, [PC, #-4]
+  |    ldr CARG3, [CARG4, #-24]		// Save previous trace number.
+  |   subs RB, RB, #8
+  |  decode_RA8 RC, INS			// Call base.
+  |   beq >2
+  |1:  // Move results down.
+  |  ldrd CARG12, [RA]
+  |    add RA, RA, #8
+  |   subs RB, RB, #8
+  |  strd CARG12, [BASE, RC]
+  |    add RC, RC, #8
+  |   bne <1
+  |2:
+  |   decode_RA8 RA, INS
+  |   decode_RB8 RB, INS
+  |   add RA, RA, RB
+  |  ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
+  |3:
+  |   cmp RA, RC
+  |  mvn CARG2, #~LJ_TNIL
+  |   bhi >9				// More results wanted?
+  |
+  |  ldr TRACE:RA, [CARG1, CARG3, lsl #2]
+  |  cmp TRACE:RA, #0
+  |  beq ->cont_nop
+  |  ldrh RC, TRACE:RA->link
+  |  cmp RC, CARG3
+  |  beq ->cont_nop			// Blacklisted.
+  |  cmp RC, #0
+  |  bne =>BC_JLOOP			// Jump to stitched trace.
+  |
+  |  // Stitch a new trace to the previous trace.
+  |  str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
+  |  str L, [DISPATCH, #DISPATCH_J(L)]
+  |  str BASE, L->base
+  |  sub CARG1, DISPATCH, #-GG_DISP2J
+  |  mov CARG2, PC
+  |  bl extern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
+  |  ldr BASE, L->base
+  |  b ->cont_nop
+  |
+  |9:  // Fill up results with nil.
+  |  strd CARG12, [BASE, RC]
+  |  add RC, RC, #8
+  |  b <3
+  |.endif
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  mov CARG2, PC
+  |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  ldr BASE, L->base
+  |  sub PC, PC, #4
+  |  b ->cont_nop
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------
@@ -2151,14 +2168,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  add CARG1, CARG1, CARG2, asr #6
   |   ldr CARG2, [lr, #4]	// Load exit stub group offset.
   |   sub CARG1, CARG1, lr
-  |  ldr L, [DISPATCH, #DISPATCH_GL(jit_L)]
+  |  ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
   |   add CARG1, CARG2, CARG1, lsr #2	// Compute exit number.
   |    ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
   |   str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
   |   mov CARG4, #0
-  |  str L, [DISPATCH, #DISPATCH_J(L)]
   |    str BASE, L->base
-  |   str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)]
+  |  str L, [DISPATCH, #DISPATCH_J(L)]
+  |   str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
   |  sub CARG1, DISPATCH, #-GG_DISP2J
   |  mov CARG2, sp
   |  bl extern lj_trace_exit		// (jit_State *J, ExitState *ex)
@@ -2177,13 +2194,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr L, SAVE_L
   |1:
   |  cmp CARG1, #0
-  |  blt >3				// Check for error from exit.
+  |  blt >9				// Check for error from exit.
   |   lsl RC, CARG1, #3
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
   |   str RC, SAVE_MULTRES
   |   mov CARG3, #0
+  |   str BASE, L->base
   |  ldr CARG2, LFUNC:CARG2->field_pc
-  |   str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)]
+  |   str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
   |    mv_vmstate CARG4, INTERP
   |  ldr KBASE, [CARG2, #PC2PROTO(k)]
   |  // Modified copy of ins_next which handles function header dispatch, too.
@@ -2192,15 +2210,32 @@ static void build_subroutines(BuildCtx *ctx)
   |   ldr INS, [PC], #4
   |     lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
   |    st_vmstate CARG4
+  |  cmp OP, #BC_FUNCC+2		// Fast function?
+  |  bhs >4
+  |2:
   |  cmp OP, #BC_FUNCF			// Function header?
   |  ldr OP, [DISPATCH, OP, lsl #2]
   |   decode_RA8 RA, INS
   |   lsrlo RC, INS, #16	// No: Decode operands A*8 and D.
   |   subhs RC, RC, #8
   |   addhs RA, RA, BASE	// Yes: RA = BASE+framesize*8, RC = nargs*8
+  |   ldrhs CARG3, [BASE, FRAME_FUNC]
   |  bx OP
   |
-  |3:  // Rethrow error from the right C frame.
+  |4:  // Check frame below fast function.
+  |  ldr CARG1, [BASE, FRAME_PC]
+  |  ands CARG2, CARG1, #FRAME_TYPE
+  |  bne <2			// Trace stitching continuation?
+  |  // Otherwise set KBASE for Lua function below fast function.
+  |  ldr CARG3, [CARG1, #-4]
+  |  decode_RA8 CARG1, CARG3
+  |  sub CARG2, BASE, CARG1
+  |  ldr LFUNC:CARG3, [CARG2, #-16]
+  |  ldr CARG3, LFUNC:CARG3->field_pc
+  |  ldr KBASE, [CARG3, #PC2PROTO(k)]
+  |  b <2
+  |
+  |9:  // Rethrow error from the right C frame.
   |  rsb CARG2, CARG1, #0
   |  mov CARG1, L
   |  bl extern lj_err_throw		// (lua_State *L, int errcode)
@@ -2833,6 +2868,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_next
     break;
 
+  case BC_ISTYPE:
+    |  // RA = src*8, RC = -type
+    |  ldrd CARG12, [BASE, RA]
+    |   ins_next1
+    |  cmn CARG2, RC
+    |   ins_next2
+    |  bne ->vmeta_istype
+    |   ins_next3
+    break;
+  case BC_ISNUM:
+    |  // RA = src*8, RC = -(TISNUM-1)
+    |  ldrd CARG12, [BASE, RA]
+    |   ins_next1
+    |  checktp CARG2, LJ_TISNUM
+    |   ins_next2
+    |  bhs ->vmeta_istype
+    |   ins_next3
+    break;
+
   /* -- Unary ops --------------------------------------------------------- */
 
   case BC_MOV:
@@ -3503,6 +3557,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  bne <1				// 'no __index' flag set: done.
     |  b ->vmeta_tgetb
     break;
+  case BC_TGETR:
+    |  decode_RB8 RB, INS
+    |   decode_RC8 RC, INS
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  ldr TAB:CARG1, [BASE, RB]
+    |   ldr CARG2, [BASE, RC]
+    |  ldr CARG4, TAB:CARG1->array
+    |    ldr CARG3, TAB:CARG1->asize
+    |  add CARG4, CARG4, CARG2, lsl #3
+    |    cmp CARG2, CARG3		// In array part?
+    |    bhs ->vmeta_tgetr
+    |  ldrd CARG12, [CARG4]
+    |->BC_TGETR_Z:
+    |   ins_next1
+    |   ins_next2
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    break;
 
   case BC_TSETV:
     |  decode_RB8 RB, INS
@@ -3673,6 +3745,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  barrierback TAB:CARG1, INS, CARG3
     |  b <2
     break;
+  case BC_TSETR:
+    |  decode_RB8 RB, INS
+    |   decode_RC8 RC, INS
+    |  // RA = src*8, RB = table*8, RC = key*8
+    |  ldr TAB:CARG2, [BASE, RB]
+    |   ldr CARG3, [BASE, RC]
+    |     ldrb INS, TAB:CARG2->marked
+    |  ldr CARG1, TAB:CARG2->array
+    |    ldr CARG4, TAB:CARG2->asize
+    |     tst INS, #LJ_GC_BLACK		// isblack(table)
+    |  add CARG1, CARG1, CARG3, lsl #3
+    |     bne >7
+    |2:
+    |    cmp CARG3, CARG4		// In array part?
+    |    bhs ->vmeta_tsetr
+    |->BC_TSETR_Z:
+    |  ldrd CARG34, [BASE, RA]
+    |   ins_next1
+    |   ins_next2
+    |  strd CARG34, [CARG1]
+    |   ins_next3
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, INS, RB
+    |  b <2
+    break;
 
   case BC_TSETM:
     |  // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -4270,7 +4368,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   st_vmstate CARG2
     |  ldr RA, TRACE:RC->mcode
     |   str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
-    |   str L, [DISPATCH, #DISPATCH_GL(jit_L)]
+    |   str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
     |  bx RA
     |.endif
     break;
@@ -4388,6 +4486,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ldr BASE, L->base
     |    mv_vmstate CARG3, INTERP
     |   ldr CRET2, L->top
+    |    str L, [DISPATCH, #DISPATCH_GL(cur_L)]
     |   lsl RC, CRET1, #3
     |    st_vmstate CARG3
     |  ldr PC, [BASE, FRAME_PC]
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_arm64.dasc b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_arm64.dasc
new file mode 100644
index 0000000000000000000000000000000000000000..f1251f2c48acff274fda5122f7a07a7d0b9b21e8
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_arm64.dasc
@@ -0,0 +1,3764 @@
+|// Low-level VM code for ARM64 CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch arm64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|// Note: The ragged indentation of the instructions is intentional.
+|//       The starting columns indicate data dependencies.
+|
+|//-----------------------------------------------------------------------
+|
+|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
+|//
+|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
+|// x18 is reserved on most platforms. Don't use it, save it or restore it.
+|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
+|// depending on the instruction.
+|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
+|//
+|// x0-x7/v0-v7 hold parameters and results.
+|
+|// Fixed register assignments for the interpreter.
+|
+|// The following must be C callee-save.
+|.define BASE,		x19	// Base of current Lua stack frame.
+|.define KBASE,		x20	// Constants of current Lua function.
+|.define PC,		x21	// Next PC.
+|.define GLREG,		x22	// Global state.
+|.define LREG,		x23	// Register holding lua_State (also in SAVE_L).
+|.define TISNUM,	x24	// Constant LJ_TISNUM << 47.
+|.define TISNUMhi,	x25	// Constant LJ_TISNUM << 15.
+|.define TISNIL,	x26	// Constant -1LL.
+|.define fp,		x29	// Yes, we have to maintain a frame pointer.
+|
+|.define ST_INTERP,	w26	// Constant -1.
+|
+|// The following temporaries are not saved across C calls, except for RA/RC.
+|.define RA,		x27
+|.define RC,		x28
+|.define RB,		x17
+|.define RAw,		w27
+|.define RCw,		w28
+|.define RBw,		w17
+|.define INS,		x16
+|.define INSw,		w16
+|.define ITYPE,		x15
+|.define TMP0,		x8
+|.define TMP1,		x9
+|.define TMP2,		x10
+|.define TMP3,		x11
+|.define TMP0w,		w8
+|.define TMP1w,		w9
+|.define TMP2w,		w10
+|.define TMP3w,		w11
+|
+|// Calling conventions. Also used as temporaries.
+|.define CARG1,		x0
+|.define CARG2,		x1
+|.define CARG3,		x2
+|.define CARG4,		x3
+|.define CARG5,		x4
+|.define CARG1w,	w0
+|.define CARG2w,	w1
+|.define CARG3w,	w2
+|.define CARG4w,	w3
+|.define CARG5w,	w4
+|
+|.define FARG1,		d0
+|.define FARG2,		d1
+|
+|.define CRET1,		x0
+|.define CRET1w,	w0
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|
+|.define CFRAME_SPACE,	208
+|//----- 16 byte aligned, <-- sp entering interpreter
+|// Unused		[sp, #204]	// 32 bit values
+|.define SAVE_NRES,	[sp, #200]
+|.define SAVE_ERRF,	[sp, #196]
+|.define SAVE_MULTRES,	[sp, #192]
+|.define TMPD,		[sp, #184]	// 64 bit values
+|.define SAVE_L,	[sp, #176]
+|.define SAVE_PC,	[sp, #168]
+|.define SAVE_CFRAME,	[sp, #160]
+|.define SAVE_FPR_,	96		// 96+8*8: 64 bit FPR saves
+|.define SAVE_GPR_,	16		// 16+10*8: 64 bit GPR saves
+|.define SAVE_LR,	[sp, #8]
+|.define SAVE_FP,	[sp]
+|//----- 16 byte aligned, <-- sp while in interpreter.
+|
+|.define TMPDofs,	#184
+|
+|.macro save_, gpr1, gpr2, fpr1, fpr2
+|  stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
+|  stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|.endmacro
+|.macro rest_, gpr1, gpr2, fpr1, fpr2
+|  ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
+|  ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|.endmacro
+|
+|.macro saveregs
+|  stp fp, lr, [sp, #-CFRAME_SPACE]!
+|  add fp, sp, #0
+|  stp x19, x20, [sp, # SAVE_GPR_]
+|  save_ 21, 22, 8, 9
+|  save_ 23, 24, 10, 11
+|  save_ 25, 26, 12, 13
+|  save_ 27, 28, 14, 15
+|.endmacro
+|.macro restoreregs
+|  ldp x19, x20, [sp, # SAVE_GPR_]
+|  rest_ 21, 22, 8, 9
+|  rest_ 23, 24, 10, 11
+|  rest_ 25, 26, 12, 13
+|  rest_ 27, 28, 14, 15
+|  ldp fp, lr, [sp], # CFRAME_SPACE
+|.endmacro
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L,		lua_State,	LREG
+|.type GL,		global_State,	GLREG
+|.type TVALUE,		TValue
+|.type GCOBJ,		GCobj
+|.type STR,		GCstr
+|.type TAB,		GCtab
+|.type LFUNC,		GCfuncL
+|.type CFUNC,		GCfuncC
+|.type PROTO,		GCproto
+|.type UPVAL,		GCupval
+|.type NODE,		Node
+|.type NARGS8,		int
+|.type TRACE,		GCtrace
+|.type SBUF,		SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; brk; .endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Access to frame relative to BASE.
+|.define FRAME_FUNC,	#-16
+|.define FRAME_PC,	#-8
+|
+|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
+|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
+|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
+|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
+|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
+|
+|// Instruction decode+dispatch.
+|.macro ins_NEXT
+|  ldr INSw, [PC], #4
+|  add TMP1, GL, INS, uxtb #3
+|   decode_RA RA, INS
+|  ldr TMP0, [TMP1, #GG_G2DISP]
+|   decode_RD RC, INS
+|  br TMP0
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+|  .define ins_next, ins_NEXT
+|  .define ins_next_, ins_NEXT
+|.else
+|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+|  // Affects only certain kinds of benchmarks (and only with -j off).
+|  .macro ins_next
+|    b ->ins_next
+|  .endmacro
+|  .macro ins_next_
+|  ->ins_next:
+|    ins_NEXT
+|  .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+|  // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+|  ldr PC, LFUNC:CARG3->pc
+|  ldr INSw, [PC], #4
+|  add TMP1, GL, INS, uxtb #3
+|   decode_RA RA, INS
+|  ldr TMP0, [TMP1, #GG_G2DISP]
+|   add RA, BASE, RA, lsl #3
+|  br TMP0
+|.endmacro
+|
+|.macro ins_call
+|  // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+|  str PC, [BASE, FRAME_PC]
+|  ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to check the TValue type and extract the GCobj. Branch on failure.
+|.macro checktp, reg, tp, target
+|  asr ITYPE, reg, #47
+|  cmn ITYPE, #-tp
+|   and reg, reg, #LJ_GCVMASK
+|  bne target
+|.endmacro
+|.macro checktp, dst, reg, tp, target
+|  asr ITYPE, reg, #47
+|  cmn ITYPE, #-tp
+|   and dst, reg, #LJ_GCVMASK
+|  bne target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|.macro checkint, reg, target
+|  cmp TISNUMhi, reg, lsr #32
+|  bne target
+|.endmacro
+|.macro checknum, reg, target
+|  cmp TISNUMhi, reg, lsr #32
+|  bls target
+|.endmacro
+|.macro checknumber, reg, target
+|  cmp TISNUMhi, reg, lsr #32
+|  blo target
+|.endmacro
+|
+|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
+|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
+|
+#define GL_J(field)	(GG_OFS(J) + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro hotcheck, delta
+|  NYI
+|.endmacro
+|
+|.macro hotloop
+|  hotcheck HOTCOUNT_LOOP
+|  blo ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+|  hotcheck HOTCOUNT_CALL
+|  blo ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
+|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp
+|  ldr tmp, GL->gc.grayagain
+|   and mark, mark, #~LJ_GC_BLACK	// black2gray(tab)
+|  str tab, GL->gc.grayagain
+|   strb mark, tab->marked
+|  str tmp, tab->gclist
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+#if !LJ_DUALNUM
+#error "Only dual-number mode supported for ARM64 target"
+#endif
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+  |.code_sub
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Return handling ----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_returnp:
+  |  // See vm_return. Also: RB = previous base.
+  |  tbz PC, #2, ->cont_dispatch	// (PC & FRAME_P) == 0?
+  |
+  |  // Return from pcall or xpcall fast func.
+  |  ldr PC, [RB, FRAME_PC]		// Fetch PC of previous frame.
+  |   mov_true TMP0
+  |  mov BASE, RB
+  |  // Prepending may overwrite the pcall frame, so do it at the end.
+  |   str TMP0, [RA, #-8]!		// Prepend true to results.
+  |
+  |->vm_returnc:
+  |  adds RC, RC, #8			// RC = (nresults+1)*8.
+  |  mov CRET1, #LUA_YIELD
+  |  beq ->vm_unwind_c_eh
+  |  str RCw, SAVE_MULTRES
+  |  ands CARG1, PC, #FRAME_TYPE
+  |  beq ->BC_RET_Z			// Handle regular return to Lua.
+  |
+  |->vm_return:
+  |  // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
+  |  // CARG1 = PC & FRAME_TYPE
+  |  and RB, PC, #~FRAME_TYPEP
+  |   cmp CARG1, #FRAME_C
+  |  sub RB, BASE, RB			// RB = previous base.
+  |   bne ->vm_returnp
+  |
+  |  str RB, L->base
+  |   ldrsw CARG2, SAVE_NRES		// CARG2 = nresults+1.
+  |    mv_vmstate TMP0w, C
+  |   sub BASE, BASE, #16
+  |  subs TMP2, RC, #8
+  |    st_vmstate TMP0w
+  |  beq >2
+  |1:
+  |  subs TMP2, TMP2, #8
+  |   ldr TMP0, [RA], #8
+  |   str TMP0, [BASE], #8
+  |  bne <1
+  |2:
+  |  cmp RC, CARG2, lsl #3		// More/less results wanted?
+  |  bne >6
+  |3:
+  |  str BASE, L->top			// Store new top.
+  |
+  |->vm_leave_cp:
+  |  ldr RC, SAVE_CFRAME		// Restore previous C frame.
+  |   mov CRET1, #0			// Ok return status for vm_pcall.
+  |  str RC, L->cframe
+  |
+  |->vm_leave_unw:
+  |  restoreregs
+  |  ret
+  |
+  |6:
+  |  bgt >7				// Less results wanted?
+  |  // More results wanted. Check stack size and fill up results with nil.
+  |  ldr CARG3, L->maxstack
+  |  cmp BASE, CARG3
+  |  bhs >8
+  |   str TISNIL, [BASE], #8
+  |  add RC, RC, #8
+  |  b <2
+  |
+  |7:  // Less results wanted.
+  |  cbz CARG2, <3			// LUA_MULTRET+1 case?
+  |  sub CARG1, RC, CARG2, lsl #3
+  |  sub BASE, BASE, CARG1		// Shrink top.
+  |  b <3
+  |
+  |8:  // Corner case: need to grow stack for filling up results.
+  |  // This can happen if:
+  |  // - A C function grows the stack (a lot).
+  |  // - The GC shrinks the stack in between.
+  |  // - A return back from a lua_call() with (high) nresults adjustment.
+  |  str BASE, L->top			// Save current top held in BASE (yes).
+  |  mov CARG1, L
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  ldr BASE, L->top			// Need the (realloced) L->top in BASE.
+  |  ldrsw CARG2, SAVE_NRES
+  |  b <2
+  |
+  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
+  |  // (void *cframe, int errcode)
+  |  mov sp, CARG1
+  |  mov CRET1, CARG2
+  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |  ldr L, SAVE_L
+  |   mv_vmstate TMP0w, C
+  |  ldr GL, L->glref
+  |   st_vmstate TMP0w
+  |  b ->vm_leave_unw
+  |
+  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
+  |  // (void *cframe)
+  |  and sp, CARG1, #CFRAME_RAWMASK
+  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  ldr L, SAVE_L
+  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+  |    movn TISNIL, #0
+  |    mov RC, #16			// 2 results: false + error message.
+  |  ldr BASE, L->base
+  |   ldr GL, L->glref			// Setup pointer to global state.
+  |    mov_false TMP0
+  |  sub RA, BASE, #8			// Results start at BASE-8.
+  |  ldr PC, [BASE, FRAME_PC]		// Fetch PC of previous frame.
+  |    str TMP0, [BASE, #-8]		// Prepend false to error message.
+  |   st_vmstate ST_INTERP
+  |  b ->vm_returnc
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Grow stack for calls -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_growstack_c:			// Grow stack for C function.
+  |  // CARG1 = L
+  |  mov CARG2, #LUA_MINSTACK
+  |  b >2
+  |
+  |->vm_growstack_l:			// Grow stack for Lua function.
+  |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+  |  add RC, BASE, RC
+  |   sub RA, RA, BASE
+  |    mov CARG1, L
+  |  stp BASE, RC, L->base
+  |   add PC, PC, #4			// Must point after first instruction.
+  |   lsr CARG2, RA, #3
+  |2:
+  |  // L->base = new base, L->top = top
+  |  str PC, SAVE_PC
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  ldp BASE, RC, L->base
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+  |   sub NARGS8:RC, RC, BASE
+  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+  |  ins_callt				// Just retry the call.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Entry points into the assembler VM ---------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_resume:				// Setup C frame and resume thread.
+  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+  |  saveregs
+  |  mov L, CARG1
+  |    ldr GL, L->glref			// Setup pointer to global state.
+  |  mov BASE, CARG2
+  |   str L, SAVE_L
+  |  mov PC, #FRAME_CP
+  |   str wzr, SAVE_NRES
+  |    add TMP0, sp, #CFRAME_RESUME
+  |  ldrb TMP1w, L->status
+  |   str wzr, SAVE_ERRF
+  |   str L, SAVE_PC			// Any value outside of bytecode is ok.
+  |   str xzr, SAVE_CFRAME
+  |    str TMP0, L->cframe
+  |  cbz TMP1w, >3
+  |
+  |  // Resume after yield (like a return).
+  |  str L, GL->cur_L
+  |  mov RA, BASE
+  |   ldp BASE, CARG1, L->base
+  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+  |  ldr PC, [BASE, FRAME_PC]
+  |     strb wzr, L->status
+  |    movn TISNIL, #0
+  |   sub RC, CARG1, BASE
+  |  ands CARG1, PC, #FRAME_TYPE
+  |   add RC, RC, #8
+  |     st_vmstate ST_INTERP
+  |   str RCw, SAVE_MULTRES
+  |  beq ->BC_RET_Z
+  |  b ->vm_return
+  |
+  |->vm_pcall:				// Setup protected C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+  |  saveregs
+  |  mov PC, #FRAME_CP
+  |  str CARG4w, SAVE_ERRF
+  |  b >1
+  |
+  |->vm_call:				// Setup C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1)
+  |  saveregs
+  |  mov PC, #FRAME_C
+  |
+  |1:  // Entry point for vm_pcall above (PC = ftype).
+  |  ldr RC, L:CARG1->cframe
+  |   str CARG3w, SAVE_NRES
+  |    mov L, CARG1
+  |   str CARG1, SAVE_L
+  |    ldr GL, L->glref			// Setup pointer to global state.
+  |     mov BASE, CARG2
+  |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
+  |  str RC, SAVE_CFRAME
+  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |
+  |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  str L, GL->cur_L
+  |  ldp RB, CARG1, L->base		// RB = old base (for vmeta_call).
+  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+  |  add PC, PC, BASE
+  |    movn TISNIL, #0
+  |  sub PC, PC, RB			// PC = frame delta + frame type
+  |   sub NARGS8:RC, CARG1, BASE
+  |    st_vmstate ST_INTERP
+  |
+  |->vm_call_dispatch:
+  |  // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
+  |  ldr CARG3, [BASE, FRAME_FUNC]
+  |  checkfunc CARG3, ->vmeta_call
+  |
+  |->vm_call_dispatch_f:
+  |  ins_call
+  |  // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
+  |
+  |->vm_cpcall:				// Setup protected C frame, call C.
+  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+  |  saveregs
+  |  mov L, CARG1
+  |   ldr RA, L:CARG1->stack
+  |  str CARG1, SAVE_L
+  |    ldr GL, L->glref			// Setup pointer to global state.
+  |   ldr RB, L->top
+  |  str CARG1, SAVE_PC			// Any value outside of bytecode is ok.
+  |  ldr RC, L->cframe
+  |   sub RA, RA, RB			// Compute -savestack(L, L->top).
+  |   str RAw, SAVE_NRES		// Neg. delta means cframe w/o frame.
+  |  str wzr, SAVE_ERRF			// No error function.
+  |  str RC, SAVE_CFRAME
+  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |    str L, GL->cur_L
+  |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |  mov BASE, CRET1
+  |   mov PC, #FRAME_CP
+  |  cbnz BASE, <3			// Else continue with the call.
+  |  b ->vm_leave_cp			// No base? Just remove C frame.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Metamethod handling ------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |//-- Continuation dispatch ----------------------------------------------
+  |
+  |->cont_dispatch:
+  |  // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
+  |  ldr LFUNC:CARG3, [RB, FRAME_FUNC]
+  |    ldr CARG1, [BASE, #-32]		// Get continuation.
+  |   mov CARG4, BASE
+  |   mov BASE, RB			// Restore caller BASE.
+  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |.if FFI
+  |    cmp CARG1, #1
+  |.endif
+  |   ldr PC, [CARG4, #-24]		// Restore PC from [cont|PC].
+  |  ldr CARG3, LFUNC:CARG3->pc
+  |    add TMP0, RA, RC
+  |    str TISNIL, [TMP0, #-8]		// Ensure one valid arg.
+  |.if FFI
+  |    bls >1
+  |.endif
+  |  ldr KBASE, [CARG3, #PC2PROTO(k)]
+  |  // BASE = base, RA = resultptr, CARG4 = meta base
+  |    br CARG1
+  |
+  |.if FFI
+  |1:
+  |  beq ->cont_ffi_callback		// cont = 1: return from FFI callback.
+  |  // cont = 0: tailcall from C function.
+  |   sub CARG4, CARG4, #32
+  |   sub RC, CARG4, BASE
+  |  b ->vm_call_tail
+  |.endif
+  |
+  |->cont_cat:				// RA = resultptr, CARG4 = meta base
+  |  ldr INSw, [PC, #-4]
+  |   sub CARG2, CARG4, #32
+  |   ldr TMP0, [RA]
+  |     str BASE, L->base
+  |  decode_RB RB, INS
+  |   decode_RA RA, INS
+  |  add TMP1, BASE, RB, lsl #3
+  |  subs TMP1, CARG2, TMP1
+  |  beq >1
+  |   str TMP0, [CARG2]
+  |  lsr CARG3, TMP1, #3
+  |  b ->BC_CAT_Z
+  |
+  |1:
+  |   str TMP0, [BASE, RA, lsl #3]
+  |  b ->cont_nop
+  |
+  |//-- Table indexing metamethods -----------------------------------------
+  |
+  |->vmeta_tgets1:
+  |  movn CARG4, #~LJ_TSTR
+  |   add CARG2, BASE, RB, lsl #3
+  |  add CARG4, STR:RC, CARG4, lsl #47
+  |  b >2
+  |
+  |->vmeta_tgets:
+  |  movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
+  |  str CARG2, GL->tmptv
+  |  add CARG2, GL, #offsetof(global_State, tmptv)
+  |2:
+  |   add CARG3, sp, TMPDofs
+  |  str CARG4, TMPD
+  |  b >1
+  |
+  |->vmeta_tgetb:			// RB = table, RC = index
+  |  add RC, RC, TISNUM
+  |   add CARG2, BASE, RB, lsl #3
+  |   add CARG3, sp, TMPDofs
+  |  str RC, TMPD
+  |  b >1
+  |
+  |->vmeta_tgetv:			// RB = table, RC = key
+  |  add CARG2, BASE, RB, lsl #3
+  |   add CARG3, BASE, RC, lsl #3
+  |1:
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
+  |  // Returns TValue * (finished) or NULL (metamethod).
+  |  cbz CRET1, >3
+  |  ldr TMP0, [CRET1]
+  |  str TMP0, [BASE, RA, lsl #3]
+  |  ins_next
+  |
+  |3:  // Call __index metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k
+  |   sub TMP1, BASE, #FRAME_CONT
+  |  ldr BASE, L->top
+  |    mov NARGS8:RC, #16		// 2 args for func(t, k).
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
+  |    str PC, [BASE, #-24]		// [cont|PC]
+  |   sub PC, BASE, TMP1
+  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  b ->vm_call_dispatch_f
+  |
+  |->vmeta_tgetr:
+  |  sxtw CARG2, TMP1w
+  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // Returns cTValue * or NULL.
+  |  mov TMP0, TISNIL
+  |  cbz CRET1, ->BC_TGETR_Z
+  |  ldr TMP0, [CRET1]
+  |  b ->BC_TGETR_Z
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->vmeta_tsets1:
+  |  movn CARG4, #~LJ_TSTR
+  |   add CARG2, BASE, RB, lsl #3
+  |  add CARG4, STR:RC, CARG4, lsl #47
+  |  b >2
+  |
+  |->vmeta_tsets:
+  |  movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
+  |  str CARG2, GL->tmptv
+  |  add CARG2, GL, #offsetof(global_State, tmptv)
+  |2:
+  |   add CARG3, sp, TMPDofs
+  |  str CARG4, TMPD
+  |  b >1
+  |
+  |->vmeta_tsetb:			// RB = table, RC = index
+  |  add RC, RC, TISNUM
+  |   add CARG2, BASE, RB, lsl #3
+  |   add CARG3, sp, TMPDofs
+  |  str RC, TMPD
+  |  b >1
+  |
+  |->vmeta_tsetv:
+  |  add CARG2, BASE, RB, lsl #3
+  |   add CARG3, BASE, RC, lsl #3
+  |1:
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
+  |  // Returns TValue * (finished) or NULL (metamethod).
+  |   ldr TMP0, [BASE, RA, lsl #3]
+  |  cbz CRET1, >3
+  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
+  |   str TMP0, [CRET1]
+  |  ins_next
+  |
+  |3:  // Call __newindex metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+  |   sub TMP1, BASE, #FRAME_CONT
+  |  ldr BASE, L->top
+  |    mov NARGS8:RC, #24		// 3 args for func(t, k, v).
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
+  |   str TMP0, [BASE, #16]		// Copy value to third argument.
+  |    str PC, [BASE, #-24]		// [cont|PC]
+  |   sub PC, BASE, TMP1
+  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  b ->vm_call_dispatch_f
+  |
+  |->vmeta_tsetr:
+  |  sxtw CARG3, TMP1w
+  |  str BASE, L->base
+  |  str PC, SAVE_PC
+  |  bl extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
+  |  // Returns TValue *.
+  |  b ->BC_TSETR_Z
+  |
+  |//-- Comparison metamethods ---------------------------------------------
+  |
+  |->vmeta_comp:
+  |  add CARG2, BASE, RA, lsl #3
+  |   sub PC, PC, #4
+  |  add CARG3, BASE, RC, lsl #3
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  uxtb CARG4w, INSw
+  |  bl extern lj_meta_comp  // (lua_State *L, TValue *o1, *o2, int op)
+  |  // Returns 0/1 or TValue * (metamethod).
+  |3:
+  |  cmp CRET1, #1
+  |  bhi ->vmeta_binop
+  |4:
+  |   ldrh RBw, [PC, #2]
+  |    add PC, PC, #4
+  |   add RB, PC, RB, lsl #2
+  |   sub RB, RB, #0x20000
+  |  csel PC, PC, RB, lo
+  |->cont_nop:
+  |  ins_next
+  |
+  |->cont_ra:				// RA = resultptr
+  |  ldr INSw, [PC, #-4]
+  |   ldr TMP0, [RA]
+  |  decode_RA TMP1, INS
+  |   str TMP0, [BASE, TMP1, lsl #3]
+  |  b ->cont_nop
+  |
+  |->cont_condt:			// RA = resultptr
+  |  ldr TMP0, [RA]
+  |   mov_true TMP1
+  |  cmp TMP1, TMP0			// Branch if result is true.
+  |  b <4
+  |
+  |->cont_condf:			// RA = resultptr
+  |  ldr TMP0, [RA]
+  |   mov_false TMP1
+  |  cmp TMP0, TMP1			// Branch if result is false.
+  |  b <4
+  |
+  |->vmeta_equal:
+  |  // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
+  |  and TAB:CARG3, CARG3, #LJ_GCVMASK
+  |  sub PC, PC, #4
+  |   str BASE, L->base
+  |   mov CARG1, L
+  |  str PC, SAVE_PC
+  |  bl extern lj_meta_equal  // (lua_State *L, GCobj *o1, *o2, int ne)
+  |  // Returns 0/1 or TValue * (metamethod).
+  |  b <3
+  |
+  |->vmeta_equal_cd:
+  |.if FFI
+  |  sub PC, PC, #4
+  |   str BASE, L->base
+  |   mov CARG1, L
+  |   mov CARG2, INS
+  |  str PC, SAVE_PC
+  |  bl extern lj_meta_equal_cd		// (lua_State *L, BCIns op)
+  |  // Returns 0/1 or TValue * (metamethod).
+  |  b <3
+  |.endif
+  |
+  |->vmeta_istype:
+  |  sub PC, PC, #4
+  |   str BASE, L->base
+  |   mov CARG1, L
+  |   mov CARG2, RA
+  |   mov CARG3, RC
+  |  str PC, SAVE_PC
+  |  bl extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
+  |  b ->cont_nop
+  |
+  |//-- Arithmetic metamethods ---------------------------------------------
+  |
+  |->vmeta_arith_vn:
+  |  add CARG3, BASE, RB, lsl #3
+  |   add CARG4, KBASE, RC, lsl #3
+  |  b >1
+  |
+  |->vmeta_arith_nv:
+  |  add CARG4, BASE, RB, lsl #3
+  |   add CARG3, KBASE, RC, lsl #3
+  |  b >1
+  |
+  |->vmeta_unm:
+  |  add CARG3, BASE, RC, lsl #3
+  |  mov CARG4, CARG3
+  |  b >1
+  |
+  |->vmeta_arith_vv:
+  |  add CARG3, BASE, RB, lsl #3
+  |   add CARG4, BASE, RC, lsl #3
+  |1:
+  |  uxtb CARG5w, INSw
+  |   add CARG2, BASE, RA, lsl #3
+  |    str BASE, L->base
+  |   mov CARG1, L
+  |    str PC, SAVE_PC
+  |  bl extern lj_meta_arith  // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+  |  // Returns NULL (finished) or TValue * (metamethod).
+  |  cbz CRET1, ->cont_nop
+  |
+  |  // Call metamethod for binary op.
+  |->vmeta_binop:
+  |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+  |  sub TMP1, CRET1, BASE
+  |   str PC, [CRET1, #-24]		// [cont|PC]
+  |  add PC, TMP1, #FRAME_CONT
+  |  mov BASE, CRET1
+  |   mov NARGS8:RC, #16		// 2 args for func(o1, o2).
+  |  b ->vm_call_dispatch
+  |
+  |->vmeta_len:
+  |  add CARG2, BASE, RC, lsl #3
+#if LJ_52
+  |  mov TAB:RC, TAB:CARG1		// Save table (ignored for other types).
+#endif
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_len		// (lua_State *L, TValue *o)
+  |  // Returns NULL (retry) or TValue * (metamethod base).
+#if LJ_52
+  |  cbnz CRET1, ->vmeta_binop		// Binop call for compatibility.
+  |  mov TAB:CARG1, TAB:RC
+  |  b ->BC_LEN_Z
+#else
+  |  b ->vmeta_binop			// Binop call for compatibility.
+#endif
+  |
+  |//-- Call metamethod ----------------------------------------------------
+  |
+  |->vmeta_call:			// Resolve and call __call metamethod.
+  |  // RB = old base, BASE = new base, RC = nargs*8
+  |  mov CARG1, L
+  |   str RB, L->base			// This is the callers base!
+  |  sub CARG2, BASE, #16
+  |   str PC, SAVE_PC
+  |  add CARG3, BASE, NARGS8:RC
+  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
+  |   add NARGS8:RC, NARGS8:RC, #8	// Got one more argument now.
+  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  ins_call
+  |
+  |->vmeta_callt:			// Resolve __call for BC_CALLT.
+  |  // BASE = old base, RA = new base, RC = nargs*8
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  sub CARG2, RA, #16
+  |   str PC, SAVE_PC
+  |  add CARG3, RA, NARGS8:RC
+  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
+  |  ldr TMP1, [RA, FRAME_FUNC]		// Guaranteed to be a function here.
+  |   ldr PC, [BASE, FRAME_PC]
+  |   add NARGS8:RC, NARGS8:RC, #8	// Got one more argument now.
+  |  and LFUNC:CARG3, TMP1, #LJ_GCVMASK
+  |  b ->BC_CALLT2_Z
+  |
+  |//-- Argument coercion for 'for' statement ------------------------------
+  |
+  |->vmeta_for:
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  mov CARG2, RA
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_for	// (lua_State *L, TValue *base)
+  |  ldr INSw, [PC, #-4]
+  |.if JIT
+  |   uxtb TMP0, INS
+  |.endif
+  |  decode_RA RA, INS
+  |  decode_RD RC, INS
+  |.if JIT
+  |   cmp TMP0, #BC_JFORI
+  |   beq =>BC_JFORI
+  |.endif
+  |  b =>BC_FORI
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Fast functions -----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |.macro .ffunc, name
+  |->ff_ .. name:
+  |.endmacro
+  |
+  |.macro .ffunc_1, name
+  |->ff_ .. name:
+  |  ldr CARG1, [BASE]
+  |   cmp NARGS8:RC, #8
+  |   blo ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_2, name
+  |->ff_ .. name:
+  |  ldp CARG1, CARG2, [BASE]
+  |   cmp NARGS8:RC, #16
+  |   blo ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_n, name
+  |  .ffunc name
+  |  ldr CARG1, [BASE]
+  |   cmp NARGS8:RC, #8
+  |  ldr FARG1, [BASE]
+  |   blo ->fff_fallback
+  |  checknum CARG1, ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_nn, name
+  |  .ffunc name
+  |  ldp CARG1, CARG2, [BASE]
+  |   cmp NARGS8:RC, #16
+  |  ldp FARG1, FARG2, [BASE]
+  |   blo ->fff_fallback
+  |  checknum CARG1, ->fff_fallback
+  |  checknum CARG2, ->fff_fallback
+  |.endmacro
+  |
+  |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
+  |.macro ffgccheck
+  |  ldp CARG1, CARG2, GL->gc.total	// Assumes threshold follows total.
+  |  cmp CARG1, CARG2
+  |  blt >1
+  |  bl ->fff_gcstep
+  |1:
+  |.endmacro
+  |
+  |//-- Base library: checks -----------------------------------------------
+  |
+  |.ffunc_1 assert
+  |   ldr PC, [BASE, FRAME_PC]
+  |  mov_false TMP1
+  |  cmp CARG1, TMP1
+  |  bhs ->fff_fallback
+  |  str CARG1, [BASE, #-16]
+  |  sub RB, BASE, #8
+  |  subs RA, NARGS8:RC, #8
+  |   add RC, NARGS8:RC, #8		// Compute (nresults+1)*8.
+  |  cbz RA, ->fff_res			// Done if exactly 1 argument.
+  |1:
+  |   ldr CARG1, [RB, #16]
+  |  sub RA, RA, #8
+  |   str CARG1, [RB], #8
+  |  cbnz RA, <1
+  |  b ->fff_res
+  |
+  |.ffunc_1 type
+  |  mov TMP0, #~LJ_TISNUM
+  |  asr ITYPE, CARG1, #47
+  |  cmn ITYPE, #~LJ_TISNUM
+  |  csinv TMP1, TMP0, ITYPE, lo
+  |  add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
+  |  ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
+  |  b ->fff_restv
+  |
+  |//-- Base library: getters and setters ---------------------------------
+  |
+  |.ffunc_1 getmetatable
+  |  asr ITYPE, CARG1, #47
+  |  cmn ITYPE, #-LJ_TTAB
+  |  ccmn ITYPE, #-LJ_TUDATA, #4, ne
+  |   and TAB:CARG1, CARG1, #LJ_GCVMASK
+  |  bne >6
+  |1:  // Field metatable must be at same offset for GCtab and GCudata!
+  |  ldr TAB:RB, TAB:CARG1->metatable
+  |2:
+  |   mov CARG1, TISNIL
+  |   ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
+  |  cbz TAB:RB, ->fff_restv
+  |  ldr TMP1w, TAB:RB->hmask
+  |   ldr TMP2w, STR:RC->hash
+  |    ldr NODE:CARG3, TAB:RB->node
+  |  and TMP1w, TMP1w, TMP2w		// idx = str->hash & tab->hmask
+  |  add TMP1, TMP1, TMP1, lsl #1
+  |  movn CARG4, #~LJ_TSTR
+  |    add NODE:CARG3, NODE:CARG3, TMP1, lsl #3  // node = tab->node + idx*3*8
+  |  add CARG4, STR:RC, CARG4, lsl #47	// Tagged key to look for.
+  |3:  // Rearranged logic, because we expect _not_ to find the key.
+  |  ldp CARG1, TMP0, NODE:CARG3->val
+  |   ldr NODE:CARG3, NODE:CARG3->next
+  |  cmp TMP0, CARG4
+  |  beq >5
+  |  cbnz NODE:CARG3, <3
+  |4:
+  |  mov CARG1, RB			// Use metatable as default result.
+  |  movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
+  |  b ->fff_restv
+  |5:
+  |  cmp TMP0, TISNIL
+  |  bne ->fff_restv
+  |  b <4
+  |
+  |6:
+  |  movn TMP0, #~LJ_TISNUM
+  |  cmp ITYPE, TMP0
+  |  csel ITYPE, ITYPE, TMP0, hs
+  |  sub TMP1, GL, ITYPE, lsl #3
+  |  ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
+  |  b <2
+  |
+  |.ffunc_2 setmetatable
+  |  // Fast path: no mt for table yet and not clearing the mt.
+  |  checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+  |   ldr TAB:TMP0, TAB:TMP1->metatable
+  |  asr ITYPE, CARG2, #47
+  |   ldrb TMP2w, TAB:TMP1->marked
+  |  cmn ITYPE, #-LJ_TTAB
+  |    and TAB:CARG2, CARG2, #LJ_GCVMASK
+  |  ccmp TAB:TMP0, #0, #0, eq
+  |  bne ->fff_fallback
+  |    str TAB:CARG2, TAB:TMP1->metatable
+  |   tbz TMP2w, #2, ->fff_restv	// isblack(table)
+  |  barrierback TAB:TMP1, TMP2w, TMP0
+  |  b ->fff_restv
+  |
+  |.ffunc rawget
+  |  ldr CARG2, [BASE]
+  |   cmp NARGS8:RC, #16
+  |   blo ->fff_fallback
+  |  checktab CARG2, ->fff_fallback
+  |   mov CARG1, L
+  |   add CARG3, BASE, #8
+  |  bl extern lj_tab_get  // (lua_State *L, GCtab *t, cTValue *key)
+  |  // Returns cTValue *.
+  |  ldr CARG1, [CRET1]
+  |  b ->fff_restv
+  |
+  |//-- Base library: conversions ------------------------------------------
+  |
+  |.ffunc tonumber
+  |  // Only handles the number case inline (without a base argument).
+  |  ldr CARG1, [BASE]
+  |   cmp NARGS8:RC, #8
+  |   bne ->fff_fallback
+  |  checknumber CARG1, ->fff_fallback
+  |  b ->fff_restv
+  |
+  |.ffunc_1 tostring
+  |  // Only handles the string or number case inline.
+  |  asr ITYPE, CARG1, #47
+  |  cmn ITYPE, #-LJ_TSTR
+  |  // A __tostring method in the string base metatable is ignored.
+  |  beq ->fff_restv
+  |  // Handle numbers inline, unless a number base metatable is present.
+  |  ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
+  |   str BASE, L->base
+  |  cmn ITYPE, #-LJ_TISNUM
+  |  ccmp TMP1, #0, #0, ls
+  |   str PC, SAVE_PC			// Redundant (but a defined value).
+  |  bne ->fff_fallback
+  |  ffgccheck
+  |  mov CARG1, L
+  |  mov CARG2, BASE
+  |  bl extern lj_strfmt_number		// (lua_State *L, cTValue *o)
+  |  // Returns GCstr *.
+  |   movn TMP1, #~LJ_TSTR
+  |  ldr BASE, L->base
+  |   add CARG1, CARG1, TMP1, lsl #47
+  |  b ->fff_restv
+  |
+  |//-- Base library: iterators -------------------------------------------
+  |
+  |.ffunc_1 next
+  |  checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback
+  |  str TISNIL, [BASE, NARGS8:RC]	// Set missing 2nd arg to nil.
+  |  ldr PC, [BASE, FRAME_PC]
+  |   stp BASE, BASE, L->base		// Add frame since C call can throw.
+  |  mov CARG1, L
+  |  add CARG3, BASE, #8
+  |   str PC, SAVE_PC
+  |  bl extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
+  |  // Returns 0 at end of traversal.
+  |  str TISNIL, [BASE, #-16]
+  |  cbz CRET1, ->fff_res1		// End of traversal: return nil.
+  |  ldp CARG1, CARG2, [BASE, #8]	// Copy key and value to results.
+  |    mov RC, #(2+1)*8
+  |  stp CARG1, CARG2, [BASE, #-16]
+  |  b ->fff_res
+  |
+  |.ffunc_1 pairs
+  |  checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+#if LJ_52
+  |  ldr TAB:CARG2, TAB:TMP1->metatable
+#endif
+  |   ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+  |    ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+  |  cbnz TAB:CARG2, ->fff_fallback
+#endif
+  |  mov RC, #(3+1)*8
+  |  stp CARG1, TISNIL, [BASE, #-8]
+  |   str CFUNC:CARG4, [BASE, #-16]
+  |  b ->fff_res
+  |
+  |.ffunc_2 ipairs_aux
+  |  checktab CARG1, ->fff_fallback
+  |   checkint CARG2, ->fff_fallback
+  |  ldr TMP1w, TAB:CARG1->asize
+  |   ldr CARG3, TAB:CARG1->array
+  |    ldr TMP0w, TAB:CARG1->hmask
+  |  add CARG2w, CARG2w, #1
+  |  cmp CARG2w, TMP1w
+  |    ldr PC, [BASE, FRAME_PC]
+  |     add TMP2, CARG2, TISNUM
+  |   mov RC, #(0+1)*8
+  |     str TMP2, [BASE, #-16]
+  |  bhs >2				// Not in array part?
+  |  ldr TMP0, [CARG3, CARG2, lsl #3]
+  |1:
+  |   mov TMP1, #(2+1)*8
+  |   cmp TMP0, TISNIL
+  |  str TMP0, [BASE, #-8]
+  |   csel RC, RC, TMP1, eq
+  |  b ->fff_res
+  |2:  // Check for empty hash part first. Otherwise call C function.
+  |  cbz TMP0w, ->fff_res
+  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // Returns cTValue * or NULL.
+  |  cbz CRET1, ->fff_res
+  |  ldr TMP0, [CRET1]
+  |  b <1
+  |
+  |.ffunc_1 ipairs
+  |  checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+#if LJ_52
+  |  ldr TAB:CARG2, TAB:TMP1->metatable
+#endif
+  |   ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+  |    ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+  |  cbnz TAB:CARG2, ->fff_fallback
+#endif
+  |  mov RC, #(3+1)*8
+  |  stp CARG1, TISNUM, [BASE, #-8]
+  |   str CFUNC:CARG4, [BASE, #-16]
+  |  b ->fff_res
+  |
+  |//-- Base library: catch errors ----------------------------------------
+  |
+  |.ffunc pcall
+  |  ldrb TMP0w, GL->hookmask
+  |   subs NARGS8:RC, NARGS8:RC, #8
+  |   blo ->fff_fallback
+  |    mov RB, BASE
+  |    add BASE, BASE, #16
+  |  ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
+  |  add PC, TMP0, #16+FRAME_PCALL
+  |   beq ->vm_call_dispatch
+  |1:
+  |   add TMP2, BASE, NARGS8:RC
+  |2:
+  |   ldr TMP0, [TMP2, #-16]
+  |   str TMP0, [TMP2, #-8]!
+  |  cmp TMP2, BASE
+  |  bne <2
+  |  b ->vm_call_dispatch
+  |
+  |.ffunc xpcall
+  |     ldp CARG1, CARG2, [BASE]
+  |  ldrb TMP0w, GL->hookmask
+  |   subs NARGS8:RC, NARGS8:RC, #16
+  |   blo ->fff_fallback
+  |    mov RB, BASE
+  |    add BASE, BASE, #24
+  |     asr ITYPE, CARG2, #47
+  |  ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
+  |     cmn ITYPE, #-LJ_TFUNC
+  |  add PC, TMP0, #24+FRAME_PCALL
+  |     bne ->fff_fallback		// Traceback must be a function.
+  |     stp CARG2, CARG1, [RB]		// Swap function and traceback.
+  |   cbz NARGS8:RC, ->vm_call_dispatch
+  |  b <1
+  |
+  |//-- Coroutine library --------------------------------------------------
+  |
+  |.macro coroutine_resume_wrap, resume
+  |.if resume
+  |.ffunc_1 coroutine_resume
+  |  checktp CARG1, LJ_TTHREAD, ->fff_fallback
+  |.else
+  |.ffunc coroutine_wrap_aux
+  |  ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
+  |  and L:CARG1, CARG1, #LJ_GCVMASK
+  |.endif
+  |   ldr PC, [BASE, FRAME_PC]
+  |     str BASE, L->base
+  |  ldp RB, CARG2, L:CARG1->base
+  |   ldrb TMP1w, L:CARG1->status
+  |  add TMP0, CARG2, TMP1
+  |   str PC, SAVE_PC
+  |  cmp TMP0, RB
+  |  beq ->fff_fallback
+  |   cmp TMP1, #LUA_YIELD
+  |    add TMP0, CARG2, #8
+  |   csel CARG2, CARG2, TMP0, hs
+  |   ldr CARG4, L:CARG1->maxstack
+  |   add CARG3, CARG2, NARGS8:RC
+  |    ldr RB, L:CARG1->cframe
+  |   ccmp CARG3, CARG4, #2, ls
+  |    ccmp RB, #0, #2, ls
+  |    bhi ->fff_fallback
+  |.if resume
+  |  sub CARG3, CARG3, #8		// Keep resumed thread in stack for GC.
+  |  add BASE, BASE, #8
+  |  sub NARGS8:RC, NARGS8:RC, #8
+  |.endif
+  |  str CARG3, L:CARG1->top
+  |  str BASE, L->top
+  |  cbz NARGS8:RC, >3
+  |2:  // Move args to coroutine.
+  |   ldr TMP0, [BASE, RB]
+  |  cmp RB, NARGS8:RC
+  |   str TMP0, [CARG2, RB]
+  |   add RB, RB, #8
+  |  bne <2
+  |3:
+  |  mov CARG3, #0
+  |   mov L:RA, L:CARG1
+  |  mov CARG4, #0
+  |  bl ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
+  |  // Returns thread status.
+  |4:
+  |  ldp CARG3, CARG4, L:RA->base
+  |   cmp CRET1, #LUA_YIELD
+  |  ldr BASE, L->base
+  |    str L, GL->cur_L
+  |    st_vmstate ST_INTERP
+  |   bhi >8
+  |  sub RC, CARG4, CARG3
+  |   ldr CARG1, L->maxstack
+  |   add CARG2, BASE, RC
+  |  cbz RC, >6				// No results?
+  |  cmp CARG2, CARG1
+  |   mov RB, #0
+  |  bhi >9				// Need to grow stack?
+  |
+  |  sub CARG4, RC, #8
+  |   str CARG3, L:RA->top		// Clear coroutine stack.
+  |5:  // Move results from coroutine.
+  |   ldr TMP0, [CARG3, RB]
+  |  cmp RB, CARG4
+  |   str TMP0, [BASE, RB]
+  |   add RB, RB, #8
+  |  bne <5
+  |6:
+  |.if resume
+  |  mov_true TMP1
+  |   add RC, RC, #16
+  |7:
+  |  str TMP1, [BASE, #-8]		// Prepend true/false to results.
+  |   sub RA, BASE, #8
+  |.else
+  |   mov RA, BASE
+  |   add RC, RC, #8
+  |.endif
+  |  ands CARG1, PC, #FRAME_TYPE
+  |   str PC, SAVE_PC
+  |   str RCw, SAVE_MULTRES
+  |  beq ->BC_RET_Z
+  |  b ->vm_return
+  |
+  |8:  // Coroutine returned with error (at co->top-1).
+  |.if resume
+  |  ldr TMP0, [CARG4, #-8]!
+  |   mov_false TMP1
+  |    mov RC, #(2+1)*8
+  |  str CARG4, L:RA->top		// Remove error from coroutine stack.
+  |  str TMP0, [BASE]			// Copy error message.
+  |  b <7
+  |.else
+  |  mov CARG1, L
+  |  mov CARG2, L:RA
+  |  bl extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
+  |  // Never returns.
+  |.endif
+  |
+  |9:  // Handle stack expansion on return from yield.
+  |  mov CARG1, L
+  |  lsr CARG2, RC, #3
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov CRET1, #0
+  |  b <4
+  |.endmacro
+  |
+  |  coroutine_resume_wrap 1		// coroutine.resume
+  |  coroutine_resume_wrap 0		// coroutine.wrap
+  |
+  |.ffunc coroutine_yield
+  |  ldr TMP0, L->cframe
+  |   add TMP1, BASE, NARGS8:RC
+  |    mov CRET1, #LUA_YIELD
+  |   stp BASE, TMP1, L->base
+  |  tbz TMP0, #0, ->fff_fallback
+  |   str xzr, L->cframe
+  |    strb CRET1w, L->status
+  |  b ->vm_leave_unw
+  |
+  |//-- Math library -------------------------------------------------------
+  |
+  |.macro math_round, func, round
+  |  .ffunc math_ .. func
+  |  ldr CARG1, [BASE]
+  |   cmp NARGS8:RC, #8
+  |  ldr d0, [BASE]
+  |   blo ->fff_fallback
+  |  cmp TISNUMhi, CARG1, lsr #32
+  |  beq ->fff_restv
+  |  blo ->fff_fallback
+  |  round d0, d0
+  |  b ->fff_resn
+  |.endmacro
+  |
+  |  math_round floor, frintm
+  |  math_round ceil, frintp
+  |
+  |.ffunc_1 math_abs
+  |  checknumber CARG1, ->fff_fallback
+  |  and CARG1, CARG1, #U64x(7fffffff,ffffffff)
+  |  bne ->fff_restv
+  |  eor CARG2w, CARG1w, CARG1w, asr #31
+  |   movz CARG3, #0x41e0, lsl #48	// 2^31.
+  |  subs CARG1w, CARG2w, CARG1w, asr #31
+  |   add CARG1, CARG1, TISNUM
+  |  csel CARG1, CARG1, CARG3, pl
+  |  // Fallthrough.
+  |
+  |->fff_restv:
+  |  // CARG1 = TValue result.
+  |  ldr PC, [BASE, FRAME_PC]
+  |  str CARG1, [BASE, #-16]
+  |->fff_res1:
+  |  // PC = return.
+  |  mov RC, #(1+1)*8
+  |->fff_res:
+  |  // RC = (nresults+1)*8, PC = return.
+  |  ands CARG1, PC, #FRAME_TYPE
+  |   str RCw, SAVE_MULTRES
+  |   sub RA, BASE, #16
+  |  bne ->vm_return
+  |  ldr INSw, [PC, #-4]
+  |  decode_RB RB, INS
+  |5:
+  |  cmp RC, RB, lsl #3			// More results expected?
+  |  blo >6
+  |  decode_RA TMP1, INS
+  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
+  |  sub BASE, RA, TMP1, lsl #3
+  |  ins_next
+  |
+  |6:  // Fill up results with nil.
+  |  add TMP1, RA, RC
+  |   add RC, RC, #8
+  |  str TISNIL, [TMP1, #-8]
+  |  b <5
+  |
+  |.macro math_extern, func
+  |  .ffunc_n math_ .. func
+  |  bl extern func
+  |  b ->fff_resn
+  |.endmacro
+  |
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
+  |  bl extern func
+  |  b ->fff_resn
+  |.endmacro
+  |
+  |.ffunc_n math_sqrt
+  |  fsqrt d0, d0
+  |->fff_resn:
+  |  ldr PC, [BASE, FRAME_PC]
+  |  str d0, [BASE, #-16]
+  |  b ->fff_res1
+  |
+  |.ffunc math_log
+  |  ldr CARG1, [BASE]
+  |   cmp NARGS8:RC, #8
+  |  ldr FARG1, [BASE]
+  |   bne ->fff_fallback			// Need exactly 1 argument.
+  |  checknum CARG1, ->fff_fallback
+  |  bl extern log
+  |  b ->fff_resn
+  |
+  |  math_extern log10
+  |  math_extern exp
+  |  math_extern sin
+  |  math_extern cos
+  |  math_extern tan
+  |  math_extern asin
+  |  math_extern acos
+  |  math_extern atan
+  |  math_extern sinh
+  |  math_extern cosh
+  |  math_extern tanh
+  |  math_extern2 pow
+  |  math_extern2 atan2
+  |  math_extern2 fmod
+  |
+  |.ffunc_2 math_ldexp
+  |  ldr FARG1, [BASE]
+  |  checknum CARG1, ->fff_fallback
+  |  checkint CARG2, ->fff_fallback
+  |  sxtw CARG1, CARG2w
+  |  bl extern ldexp			// (double x, int exp)
+  |  b ->fff_resn
+  |
+  |.ffunc_n math_frexp
+  |  add CARG1, sp, TMPDofs
+  |  bl extern frexp
+  |   ldr CARG2w, TMPD
+  |    ldr PC, [BASE, FRAME_PC]
+  |  str d0, [BASE, #-16]
+  |    mov RC, #(2+1)*8
+  |   add CARG2, CARG2, TISNUM
+  |   str CARG2, [BASE, #-8]
+  |  b ->fff_res
+  |
+  |.ffunc_n math_modf
+  |  sub CARG1, BASE, #16
+  |   ldr PC, [BASE, FRAME_PC]
+  |  bl extern modf
+  |   mov RC, #(2+1)*8
+  |  str d0, [BASE, #-8]
+  |  b ->fff_res
+  |
+  |.macro math_minmax, name, cond, fcond
+  |  .ffunc_1 name
+  |   add RB, BASE, RC
+  |   add RA, BASE, #8
+  |  checkint CARG1, >4
+  |1:  // Handle integers.
+  |  ldr CARG2, [RA]
+  |   cmp RA, RB
+  |   bhs ->fff_restv
+  |  checkint CARG2, >3
+  |  cmp CARG1w, CARG2w
+  |   add RA, RA, #8
+  |  csel CARG1, CARG2, CARG1, cond
+  |  b <1
+  |3:  // Convert intermediate result to number and continue below.
+  |  scvtf d0, CARG1w
+  |  blo ->fff_fallback
+  |  ldr d1, [RA]
+  |  b >6
+  |
+  |4:
+  |  ldr d0, [BASE]
+  |  blo ->fff_fallback
+  |5:  // Handle numbers.
+  |  ldr CARG2, [RA]
+  |  ldr d1, [RA]
+  |   cmp RA, RB
+  |   bhs ->fff_resn
+  |  checknum CARG2, >7
+  |6:
+  |  fcmp d0, d1
+  |   add RA, RA, #8
+  |  fcsel d0, d1, d0, fcond
+  |  b <5
+  |7:  // Convert integer to number and continue above.
+  |  scvtf d1, CARG2w
+  |  blo ->fff_fallback
+  |  b <6
+  |.endmacro
+  |
+  |  math_minmax math_min, gt, hi
+  |  math_minmax math_max, lt, lo
+  |
+  |//-- String library -----------------------------------------------------
+  |
+  |.ffunc string_byte			// Only handle the 1-arg case here.
+  |  ldp PC, CARG1, [BASE, FRAME_PC]
+  |   cmp NARGS8:RC, #8
+  |  asr ITYPE, CARG1, #47
+  |  ccmn ITYPE, #-LJ_TSTR, #0, eq
+  |   and STR:CARG1, CARG1, #LJ_GCVMASK
+  |  bne ->fff_fallback
+  |  ldrb TMP0w, STR:CARG1[1]		// Access is always ok (NUL at end).
+  |   ldr CARG3w, STR:CARG1->len
+  |  add TMP0, TMP0, TISNUM
+  |  str TMP0, [BASE, #-16]
+  |  mov RC, #(0+1)*8
+  |   cbz CARG3, ->fff_res
+  |  b ->fff_res1
+  |
+  |.ffunc string_char			// Only handle the 1-arg case here.
+  |  ffgccheck
+  |  ldp PC, CARG1, [BASE, FRAME_PC]
+  |  cmp CARG1w, #255
+  |   ccmp NARGS8:RC, #8, #0, ls		// Need exactly 1 argument.
+  |  bne ->fff_fallback
+  |  checkint CARG1, ->fff_fallback
+  |  mov CARG3, #1
+  |  mov CARG2, BASE			// Points to stack. Little-endian.
+  |->fff_newstr:
+  |  // CARG2 = str, CARG3 = len.
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
+  |->fff_resstr:
+  |  // Returns GCstr *.
+  |  ldr BASE, L->base
+  |   movn TMP1, #~LJ_TSTR
+  |  add CARG1, CARG1, TMP1, lsl #47
+  |  b ->fff_restv
+  |
+  |.ffunc string_sub
+  |  ffgccheck
+  |  ldr CARG1, [BASE]
+  |    ldr CARG3, [BASE, #16]
+  |   cmp NARGS8:RC, #16
+  |    movn RB, #0
+  |   beq >1
+  |   blo ->fff_fallback
+  |    checkint CARG3, ->fff_fallback
+  |    sxtw RB, CARG3w
+  |1:
+  |  ldr CARG2, [BASE, #8]
+  |  checkstr CARG1, ->fff_fallback
+  |   ldr TMP1w, STR:CARG1->len
+  |  checkint CARG2, ->fff_fallback
+  |  sxtw CARG2, CARG2w
+  |  // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
+  |   add TMP2, RB, TMP1
+  |   cmp RB, #0
+  |  add TMP0, CARG2, TMP1
+  |   csinc RB, RB, TMP2, ge		// if (end < 0) end += len+1
+  |  cmp CARG2, #0
+  |  csinc CARG2, CARG2, TMP0, ge	// if (start < 0) start += len+1
+  |   cmp RB, #0
+  |   csel RB, RB, xzr, ge		// if (end < 0) end = 0
+  |  cmp CARG2, #1
+  |  csinc CARG2, CARG2, xzr, ge	// if (start < 1) start = 1
+  |   cmp RB, TMP1
+  |   csel RB, RB, TMP1, le		// if (end > len) end = len
+  |  add CARG1, STR:CARG1, #sizeof(GCstr)-1
+  |   subs CARG3, RB, CARG2		// len = end - start
+  |  add CARG2, CARG1, CARG2
+  |   add CARG3, CARG3, #1		// len += 1
+  |   bge ->fff_newstr
+  |  add STR:CARG1, GL, #offsetof(global_State, strempty)
+  |   movn TMP1, #~LJ_TSTR
+  |  add CARG1, CARG1, TMP1, lsl #47
+  |  b ->fff_restv
+  |
+  |.macro ffstring_op, name
+  |  .ffunc string_ .. name
+  |  ffgccheck
+  |  ldr CARG2, [BASE]
+  |   cmp NARGS8:RC, #8
+  |  asr ITYPE, CARG2, #47
+  |  ccmn ITYPE, #-LJ_TSTR, #0, hs
+  |   and STR:CARG2, CARG2, #LJ_GCVMASK
+  |  bne ->fff_fallback
+  |  ldr TMP0, GL->tmpbuf.b
+  |   add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
+  |   str BASE, L->base
+  |   str PC, SAVE_PC
+  |   str L, GL->tmpbuf.L
+  |  str TMP0, GL->tmpbuf.p
+  |  bl extern lj_buf_putstr_ .. name
+  |  bl extern lj_buf_tostr
+  |  b ->fff_resstr
+  |.endmacro
+  |
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
+  |
+  |//-- Bit library --------------------------------------------------------
+  |
+  |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3
+  |->vm_tobit_fb:
+  |  bls ->fff_fallback
+  |  add CARG2, CARG1, CARG1
+  |  mov CARG3, #1076
+  |  sub CARG3, CARG3, CARG2, lsr #53
+  |  cmp CARG3, #53
+  |  bhi >1
+  |  and CARG2, CARG2, #U64x(001fffff,ffffffff)
+  |  orr CARG2, CARG2, #U64x(00200000,00000000)
+  |   cmp CARG1, #0
+  |  lsr CARG2, CARG2, CARG3
+  |   cneg CARG1w, CARG2w, mi
+  |  br lr
+  |1:
+  |  mov CARG1w, #0
+  |  br lr
+  |
+  |.macro .ffunc_bit, name
+  |  .ffunc_1 bit_..name
+  |  adr lr, >1
+  |  checkint CARG1, ->vm_tobit_fb
+  |1:
+  |.endmacro
+  |
+  |.macro .ffunc_bit_op, name, ins
+  |  .ffunc_bit name
+  |  mov RA, #8
+  |  mov TMP0w, CARG1w
+  |  adr lr, >2
+  |1:
+  |  ldr CARG1, [BASE, RA]
+  |   cmp RA, NARGS8:RC
+  |    add RA, RA, #8
+  |   bge >9
+  |  checkint CARG1, ->vm_tobit_fb
+  |2:
+  |  ins TMP0w, TMP0w, CARG1w
+  |  b <1
+  |.endmacro
+  |
+  |.ffunc_bit_op band, and
+  |.ffunc_bit_op bor, orr
+  |.ffunc_bit_op bxor, eor
+  |
+  |.ffunc_bit tobit
+  |  mov TMP0w, CARG1w
+  |9:  // Label reused by .ffunc_bit_op users.
+  |  add CARG1, TMP0, TISNUM
+  |  b ->fff_restv
+  |
+  |.ffunc_bit bswap
+  |  rev TMP0w, CARG1w
+  |  add CARG1, TMP0, TISNUM
+  |  b ->fff_restv
+  |
+  |.ffunc_bit bnot
+  |  mvn TMP0w, CARG1w
+  |  add CARG1, TMP0, TISNUM
+  |  b ->fff_restv
+  |
+  |.macro .ffunc_bit_sh, name, ins, shmod
+  |  .ffunc bit_..name
+  |  ldp TMP0, CARG1, [BASE]
+  |   cmp NARGS8:RC, #16
+  |   blo ->fff_fallback
+  |  adr lr, >1
+  |  checkint CARG1, ->vm_tobit_fb
+  |1:
+  |.if shmod == 0
+  |  mov TMP1, CARG1
+  |.else
+  |  neg TMP1, CARG1
+  |.endif
+  |  mov CARG1, TMP0
+  |  adr lr, >2
+  |  checkint CARG1, ->vm_tobit_fb
+  |2:
+  |  ins TMP0w, CARG1w, TMP1w
+  |  add CARG1, TMP0, TISNUM
+  |  b ->fff_restv
+  |.endmacro
+  |
+  |.ffunc_bit_sh lshift, lsl, 0
+  |.ffunc_bit_sh rshift, lsr, 0
+  |.ffunc_bit_sh arshift, asr, 0
+  |.ffunc_bit_sh rol, ror, 1
+  |.ffunc_bit_sh ror, ror, 0
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->fff_fallback:			// Call fast function fallback handler.
+  |  // BASE = new base, RC = nargs*8
+  |   ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC]	// Fallback may overwrite PC.
+  |  ldr TMP2, L->maxstack
+  |  add TMP1, BASE, NARGS8:RC
+  |  stp BASE, TMP1, L->base
+  |   and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  add TMP1, TMP1, #8*LUA_MINSTACK
+  |   ldr CARG3, CFUNC:CARG3->f
+  |    str PC, SAVE_PC			// Redundant (but a defined value).
+  |  cmp TMP1, TMP2
+  |   mov CARG1, L
+  |  bhi >5				// Need to grow stack.
+  |   blr CARG3				// (lua_State *L)
+  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+  |   ldr BASE, L->base
+  |  cmp CRET1w, #0
+  |   lsl RC, CRET1, #3
+  |   sub RA, BASE, #16
+  |  bgt ->fff_res			// Returned nresults+1?
+  |1:  // Returned 0 or -1: retry fast path.
+  |   ldr CARG1, L->top
+  |    ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+  |   sub NARGS8:RC, CARG1, BASE
+  |  bne ->vm_call_tail			// Returned -1?
+  |    and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  ins_callt				// Returned 0: retry fast path.
+  |
+  |// Reconstruct previous base for vmeta_call during tailcall.
+  |->vm_call_tail:
+  |  ands TMP0, PC, #FRAME_TYPE
+  |   and TMP1, PC, #~FRAME_TYPEP
+  |  bne >3
+  |  ldrb RAw, [PC, #-3]
+  |  lsl RA, RA, #3
+  |  add TMP1, RA, #16
+  |3:
+  |  sub RB, BASE, TMP1
+  |  b ->vm_call_dispatch		// Resolve again for tailcall.
+  |
+  |5:  // Grow stack for fallback handler.
+  |  mov CARG2, #LUA_MINSTACK
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  ldr BASE, L->base
+  |  cmp CARG1, CARG1			// Set zero-flag to force retry.
+  |  b <1
+  |
+  |->fff_gcstep:			// Call GC step function.
+  |  // BASE = new base, RC = nargs*8
+  |   add CARG2, BASE, NARGS8:RC	// Calculate L->top.
+  |  mov RA, lr
+  |   stp BASE, CARG2, L->base
+  |   str PC, SAVE_PC			// Redundant (but a defined value).
+  |  mov CARG1, L
+  |  bl extern lj_gc_step		// (lua_State *L)
+  |  ldp BASE, CARG2, L->base
+  |   ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+  |  mov lr, RA				// Help return address predictor.
+  |  sub NARGS8:RC, CARG2, BASE		// Calculate nargs*8.
+  |   and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  ret
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Special dispatch targets -------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_record:				// Dispatch target for recording phase.
+  |  NYI
+  |
+  |->vm_rethook:			// Dispatch target for return hooks.
+  |  ldrb TMP2w, GL->hookmask
+  |  tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1	// Hook already active?
+  |5:  // Re-dispatch to static ins.
+  |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
+  |  br TMP0
+  |
+  |->vm_inshook:			// Dispatch target for instr/line hooks.
+  |  ldrb TMP2w, GL->hookmask
+  |   ldr TMP3w, GL->hookcount
+  |  tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5	// Hook already active?
+  |  tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
+  |  beq <5
+  |   sub TMP3w, TMP3w, #1
+  |   str TMP3w, GL->hookcount
+  |   cbz TMP3w, >1
+  |  tbz TMP2w, #LUA_HOOKLINE, <5
+  |1:
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  mov CARG2, PC
+  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+  |  bl extern lj_dispatch_ins		// (lua_State *L, const BCIns *pc)
+  |3:
+  |  ldr BASE, L->base
+  |4:  // Re-dispatch to static ins.
+  |  ldr INSw, [PC, #-4]
+  |  add TMP1, GL, INS, uxtb #3
+  |   decode_RA RA, INS
+  |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
+  |   decode_RD RC, INS
+  |  br TMP0
+  |
+  |->cont_hook:				// Continue from hook yield.
+  |  ldr CARG1, [CARG4, #-40]
+  |   add PC, PC, #4
+  |  str CARG1w, SAVE_MULTRES		// Restore MULTRES for *M ins.
+  |  b <4
+  |
+  |->vm_hotloop:			// Hot loop counter underflow.
+  |  NYI
+  |
+  |->vm_callhook:			// Dispatch target for call hooks.
+  |  mov CARG2, PC
+  |.if JIT
+  |  b >1
+  |.endif
+  |
+  |->vm_hotcall:			// Hot call counter underflow.
+  |.if JIT
+  |  orr CARG2, PC, #1
+  |1:
+  |.endif
+  |  add TMP1, BASE, NARGS8:RC
+  |   str PC, SAVE_PC
+  |   mov CARG1, L
+  |   sub RA, RA, BASE
+  |  stp BASE, TMP1, L->base
+  |  bl extern lj_dispatch_call		// (lua_State *L, const BCIns *pc)
+  |  // Returns ASMFunction.
+  |  ldp BASE, TMP1, L->base
+  |   str xzr, SAVE_PC			// Invalidate for subsequent line hook.
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+  |  add RA, BASE, RA
+  |  sub NARGS8:RC, TMP1, BASE
+  |   ldr INSw, [PC, #-4]
+  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  br CRET1
+  |
+  |->cont_stitch:			// Trace stitching.
+  |  NYI
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  mov CARG2, PC
+  |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  ldr BASE, L->base
+  |  sub PC, PC, #4
+  |  b ->cont_nop
+#endif
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Trace exit handler -------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_exit_handler:
+  |  NYI
+  |->vm_exit_interp:
+  |  NYI
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Math helper functions ----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |  // int lj_vm_modi(int dividend, int divisor);
+  |->vm_modi:
+  |    eor CARG4w, CARG1w, CARG2w
+  |    cmp CARG4w, #0
+  |  eor CARG3w, CARG1w, CARG1w, asr #31
+  |   eor CARG4w, CARG2w, CARG2w, asr #31
+  |  sub CARG3w, CARG3w, CARG1w, asr #31
+  |   sub CARG4w, CARG4w, CARG2w, asr #31
+  |  udiv CARG1w, CARG3w, CARG4w
+  |  msub CARG1w, CARG1w, CARG4w, CARG3w
+  |    ccmp CARG1w, #0, #4, mi
+  |    sub CARG3w, CARG1w, CARG4w
+  |    csel CARG1w, CARG1w, CARG3w, eq
+  |  eor CARG3w, CARG1w, CARG2w
+  |  cmp CARG3w, #0
+  |  cneg CARG1w, CARG1w, mi
+  |  ret
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Miscellaneous functions --------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |//-----------------------------------------------------------------------
+  |//-- FFI helper functions -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// Handler for callback functions.
+  |// Saveregs already performed. Callback slot number in [sp], g in r12.
+  |->vm_ffi_callback:
+  |.if FFI
+  |.type CTSTATE, CTState, PC
+  |  saveregs
+  |  ldr CTSTATE, GL:x10->ctype_state
+  |  mov GL, x10
+  |    add x10, sp, # CFRAME_SPACE
+  |  str w9, CTSTATE->cb.slot
+  |  stp x0, x1, CTSTATE->cb.gpr[0]
+  |   stp d0, d1, CTSTATE->cb.fpr[0]
+  |  stp x2, x3, CTSTATE->cb.gpr[2]
+  |   stp d2, d3, CTSTATE->cb.fpr[2]
+  |  stp x4, x5, CTSTATE->cb.gpr[4]
+  |   stp d4, d5, CTSTATE->cb.fpr[4]
+  |  stp x6, x7, CTSTATE->cb.gpr[6]
+  |   stp d6, d7, CTSTATE->cb.fpr[6]
+  |    str x10, CTSTATE->cb.stack
+  |  mov CARG1, CTSTATE
+  |   str CTSTATE, SAVE_PC		// Any value outside of bytecode is ok.
+  |  mov CARG2, sp
+  |  bl extern lj_ccallback_enter	// (CTState *cts, void *cf)
+  |  // Returns lua_State *.
+  |  ldp BASE, RC, L:CRET1->base
+  |   movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+  |   movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+  |   movn TISNIL, #0
+  |   mov L, CRET1
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+  |  sub RC, RC, BASE
+  |   st_vmstate ST_INTERP
+  |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+  |  ins_callt
+  |.endif
+  |
+  |->cont_ffi_callback:			// Return from FFI callback.
+  |.if FFI
+  |  ldr CTSTATE, GL->ctype_state
+  |   stp BASE, CARG4, L->base
+  |  str L, CTSTATE->L
+  |  mov CARG1, CTSTATE
+  |  mov CARG2, RA
+  |  bl extern lj_ccallback_leave       // (CTState *cts, TValue *o)
+  |  ldp x0, x1, CTSTATE->cb.gpr[0]
+  |   ldp d0, d1, CTSTATE->cb.fpr[0]
+  |  b ->vm_leave_unw
+  |.endif
+  |
+  |->vm_ffi_call:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
+  |.if FFI
+  |  .type CCSTATE, CCallState, x19
+  |  stp fp, lr, [sp, #-32]!
+  |  add fp, sp, #0
+  |  str CCSTATE, [sp, #16]
+  |  mov CCSTATE, x0
+  |  ldr TMP0w, CCSTATE:x0->spadj
+  |   ldrb TMP1w, CCSTATE->nsp
+  |    add TMP2, CCSTATE, #offsetof(CCallState, stack)
+  |   subs TMP1, TMP1, #1
+  |    ldr TMP3, CCSTATE->func
+  |  sub sp, fp, TMP0
+  |   bmi >2
+  |1:  // Copy stack slots
+  |  ldr TMP0, [TMP2, TMP1, lsl #3]
+  |  str TMP0, [sp, TMP1, lsl #3]
+  |  subs TMP1, TMP1, #1
+  |  bpl <1
+  |2:
+  |  ldp x0, x1, CCSTATE->gpr[0]
+  |   ldp d0, d1, CCSTATE->fpr[0]
+  |  ldp x2, x3, CCSTATE->gpr[2]
+  |   ldp d2, d3, CCSTATE->fpr[2]
+  |  ldp x4, x5, CCSTATE->gpr[4]
+  |   ldp d4, d5, CCSTATE->fpr[4]
+  |  ldp x6, x7, CCSTATE->gpr[6]
+  |   ldp d6, d7, CCSTATE->fpr[6]
+  |  ldr x8, CCSTATE->retp
+  |  blr TMP3
+  |  mov sp, fp
+  |  stp x0, x1, CCSTATE->gpr[0]
+  |   stp d0, d1, CCSTATE->fpr[0]
+  |   stp d2, d3, CCSTATE->fpr[2]
+  |  ldr CCSTATE, [sp, #16]
+  |  ldp fp, lr, [sp], #32
+  |  ret
+  |.endif
+  |// Note: vm_ffi_call must be the last function in this object file!
+  |
+  |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+  int vk = 0;
+  |=>defop:
+
+  switch (op) {
+
+  /* -- Comparison ops ---------------------------------------------------- */
+
+  /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+    |  // RA = src1, RC = src2, JMP with RC = target
+    |  ldr CARG1, [BASE, RA, lsl #3]
+    |    ldrh RBw, [PC, #2]
+    |   ldr CARG2, [BASE, RC, lsl #3]
+    |    add PC, PC, #4
+    |    add RB, PC, RB, lsl #2
+    |    sub RB, RB, #0x20000
+    |  checkint CARG1, >3
+    |   checkint CARG2, >4
+    |  cmp CARG1w, CARG2w
+    if (op == BC_ISLT) {
+      |  csel PC, RB, PC, lt
+    } else if (op == BC_ISGE) {
+      |  csel PC, RB, PC, ge
+    } else if (op == BC_ISLE) {
+      |  csel PC, RB, PC, le
+    } else {
+      |  csel PC, RB, PC, gt
+    }
+    |1:
+    |  ins_next
+    |
+    |3:  // RA not int.
+    |    ldr FARG1, [BASE, RA, lsl #3]
+    |  blo ->vmeta_comp
+    |    ldr FARG2, [BASE, RC, lsl #3]
+    |   cmp TISNUMhi, CARG2, lsr #32
+    |   bhi >5
+    |   bne ->vmeta_comp
+    |  // RA number, RC int.
+    |  scvtf FARG2, CARG2w
+    |  b >5
+    |
+    |4:  // RA int, RC not int
+    |    ldr FARG2, [BASE, RC, lsl #3]
+    |   blo ->vmeta_comp
+    |  // RA int, RC number.
+    |  scvtf FARG1, CARG1w
+    |
+    |5:  // RA number, RC number
+    |  fcmp FARG1, FARG2
+    |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+    if (op == BC_ISLT) {
+      |  csel PC, RB, PC, lo
+    } else if (op == BC_ISGE) {
+      |  csel PC, RB, PC, hs
+    } else if (op == BC_ISLE) {
+      |  csel PC, RB, PC, ls
+    } else {
+      |  csel PC, RB, PC, hi
+    }
+    |  b <1
+    break;
+
+  case BC_ISEQV: case BC_ISNEV:
+    vk = op == BC_ISEQV;
+    |  // RA = src1, RC = src2, JMP with RC = target
+    |  ldr CARG1, [BASE, RA, lsl #3]
+    |   add RC, BASE, RC, lsl #3
+    |    ldrh RBw, [PC, #2]
+    |   ldr CARG3, [RC]
+    |    add PC, PC, #4
+    |    add RB, PC, RB, lsl #2
+    |    sub RB, RB, #0x20000
+    |  asr ITYPE, CARG3, #47
+    |  cmn ITYPE, #-LJ_TISNUM
+    if (vk) {
+      |  bls ->BC_ISEQN_Z
+    } else {
+      |  bls ->BC_ISNEN_Z
+    }
+    |  // RC is not a number.
+    |   asr TMP0, CARG1, #47
+    |.if FFI
+    |  // Check if RC or RA is a cdata.
+    |  cmn ITYPE, #-LJ_TCDATA
+    |   ccmn TMP0, #-LJ_TCDATA, #4, ne
+    |  beq ->vmeta_equal_cd
+    |.endif
+    |  cmp CARG1, CARG3
+    |  bne >2
+    |  // Tag and value are equal.
+    if (vk) {
+      |->BC_ISEQV_Z:
+      |  mov PC, RB			// Perform branch.
+    }
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if the tags are the same and it's a table or userdata.
+    |  cmp ITYPE, TMP0
+    |  ccmn ITYPE, #-LJ_TISTABUD, #2, eq
+    if (vk) {
+      |  bhi <1
+    } else {
+      |  bhi ->BC_ISEQV_Z		// Reuse code from opposite instruction.
+    }
+    |  // Different tables or userdatas. Need to check __eq metamethod.
+    |  // Field metatable must be at same offset for GCtab and GCudata!
+    |  and TAB:CARG2, CARG1, #LJ_GCVMASK
+    |  ldr TAB:TMP2, TAB:CARG2->metatable
+    if (vk) {
+      |  cbz TAB:TMP2, <1		// No metatable?
+      |  ldrb TMP1w, TAB:TMP2->nomm
+      |   mov CARG4, #0			// ne = 0
+      |  tbnz TMP1w, #MM_eq, <1		// 'no __eq' flag set: done.
+    } else {
+      |  cbz TAB:TMP2, ->BC_ISEQV_Z	// No metatable?
+      |  ldrb TMP1w, TAB:TMP2->nomm
+      |   mov CARG4, #1			// ne = 1.
+      |  tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z	// 'no __eq' flag set: done.
+    }
+    |  b ->vmeta_equal
+    break;
+
+  case BC_ISEQS: case BC_ISNES:
+    vk = op == BC_ISEQS;
+    |  // RA = src, RC = str_const (~), JMP with RC = target
+    |  ldr CARG1, [BASE, RA, lsl #3]
+    |   mvn RC, RC
+    |    ldrh RBw, [PC, #2]
+    |   ldr CARG2, [KBASE, RC, lsl #3]
+    |    add PC, PC, #4
+    |   movn TMP0, #~LJ_TSTR
+    |.if FFI
+    |  asr ITYPE, CARG1, #47
+    |.endif
+    |    add RB, PC, RB, lsl #2
+    |   add CARG2, CARG2, TMP0, lsl #47
+    |    sub RB, RB, #0x20000
+    |.if FFI
+    |  cmn ITYPE, #-LJ_TCDATA
+    |  beq ->vmeta_equal_cd
+    |.endif
+    |  cmp CARG1, CARG2
+    if (vk) {
+      |  csel PC, RB, PC, eq
+    } else {
+      |  csel PC, RB, PC, ne
+    }
+    |  ins_next
+    break;
+
+  case BC_ISEQN: case BC_ISNEN:
+    vk = op == BC_ISEQN;
+    |  // RA = src, RC = num_const (~), JMP with RC = target
+    |  ldr CARG1, [BASE, RA, lsl #3]
+    |   add RC, KBASE, RC, lsl #3
+    |    ldrh RBw, [PC, #2]
+    |   ldr CARG3, [RC]
+    |    add PC, PC, #4
+    |    add RB, PC, RB, lsl #2
+    |    sub RB, RB, #0x20000
+    if (vk) {
+      |->BC_ISEQN_Z:
+    } else {
+      |->BC_ISNEN_Z:
+    }
+    |  checkint CARG1, >4
+    |   checkint CARG3, >6
+    |  cmp CARG1w, CARG3w
+    |1:
+    if (vk) {
+      |  csel PC, RB, PC, eq
+      |2:
+    } else {
+      |2:
+      |  csel PC, RB, PC, ne
+    }
+    |3:
+    |  ins_next
+    |
+    |4:  // RA not int.
+    |.if FFI
+    |  blo >7
+    |.else
+    |  blo <2
+    |.endif
+    |    ldr FARG1, [BASE, RA, lsl #3]
+    |    ldr FARG2, [RC]
+    |   cmp TISNUMhi, CARG3, lsr #32
+    |   bne >5
+    |  // RA number, RC int.
+    |  scvtf FARG2, CARG3w
+    |5:
+    |  // RA number, RC number.
+    |  fcmp FARG1, FARG2
+    |  b <1
+    |
+    |6:  // RA int, RC number
+    |  ldr FARG2, [RC]
+    |  scvtf FARG1, CARG1w
+    |  fcmp FARG1, FARG2
+    |  b <1
+    |
+    |.if FFI
+    |7:
+    |  asr ITYPE, CARG1, #47
+    |  cmn ITYPE, #-LJ_TCDATA
+    |  bne <2
+    |  b ->vmeta_equal_cd
+    |.endif
+    break;
+
+  case BC_ISEQP: case BC_ISNEP:
+    vk = op == BC_ISEQP;
+    |  // RA = src, RC = primitive_type (~), JMP with RC = target
+    |  ldr TMP0, [BASE, RA, lsl #3]
+    |   ldrh RBw, [PC, #2]
+    |   add PC, PC, #4
+    |  add RC, RC, #1
+    |   add RB, PC, RB, lsl #2
+    |.if FFI
+    |  asr ITYPE, TMP0, #47
+    |  cmn ITYPE, #-LJ_TCDATA
+    |  beq ->vmeta_equal_cd
+    |  cmn RC, ITYPE
+    |.else
+    |  cmn RC, TMP0, asr #47
+    |.endif
+    |   sub RB, RB, #0x20000
+    if (vk) {
+      |  csel PC, RB, PC, eq
+    } else {
+      |  csel PC, RB, PC, ne
+    }
+    |  ins_next
+    break;
+
+  /* -- Unary test and copy ops ------------------------------------------- */
+
+  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+    |  // RA = dst or unused, RC = src, JMP with RC = target
+    |   ldrh RBw, [PC, #2]
+    |  ldr TMP0, [BASE, RC, lsl #3]
+    |   add PC, PC, #4
+    |  mov_false TMP1
+    |   add RB, PC, RB, lsl #2
+    |  cmp TMP0, TMP1
+    |   sub RB, RB, #0x20000
+    if (op == BC_ISTC || op == BC_IST) {
+      if (op == BC_ISTC) {
+	|  csel RA, RA, RC, lo
+      }
+      |  csel PC, RB, PC, lo
+    } else {
+      if (op == BC_ISFC) {
+	|  csel RA, RA, RC, hs
+      }
+      |  csel PC, RB, PC, hs
+    }
+    if (op == BC_ISTC || op == BC_ISFC) {
+      |  str TMP0, [BASE, RA, lsl #3]
+    }
+    |  ins_next
+    break;
+
+  case BC_ISTYPE:
+    |  // RA = src, RC = -type
+    |  ldr TMP0, [BASE, RA, lsl #3]
+    |  cmn RC, TMP0, asr #47
+    |  bne ->vmeta_istype
+    |  ins_next
+    break;
+  case BC_ISNUM:
+    |  // RA = src, RC = -(TISNUM-1)
+    |  ldr TMP0, [BASE, RA]
+    |  checknum TMP0, ->vmeta_istype
+    |  ins_next
+    break;
+
+  /* -- Unary ops --------------------------------------------------------- */
+
+  case BC_MOV:
+    |  // RA = dst, RC = src
+    |  ldr TMP0, [BASE, RC, lsl #3]
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_NOT:
+    |  // RA = dst, RC = src
+    |  ldr TMP0, [BASE, RC, lsl #3]
+    |   mov_false TMP1
+    |   mov_true TMP2
+    |  cmp TMP0, TMP1
+    |  csel TMP0, TMP1, TMP2, lo
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_UNM:
+    |  // RA = dst, RC = src
+    |  ldr TMP0, [BASE, RC, lsl #3]
+    |  asr ITYPE, TMP0, #47
+    |  cmn ITYPE, #-LJ_TISNUM
+    |  bhi ->vmeta_unm
+    |  eor TMP0, TMP0, #U64x(80000000,00000000)
+    |  bne >5
+    |  negs TMP0w, TMP0w
+    |   movz CARG3, #0x41e0, lsl #48	// 2^31.
+    |   add TMP0, TMP0, TISNUM
+    |  csel TMP0, TMP0, CARG3, vc
+    |5:
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_LEN:
+    |  // RA = dst, RC = src
+    |  ldr CARG1, [BASE, RC, lsl #3]
+    |  asr ITYPE, CARG1, #47
+    |  cmn ITYPE, #-LJ_TSTR
+    |   and CARG1, CARG1, #LJ_GCVMASK
+    |  bne >2
+    |  ldr CARG1w, STR:CARG1->len
+    |1:
+    |  add CARG1, CARG1, TISNUM
+    |  str CARG1, [BASE, RA, lsl #3]
+    |  ins_next
+    |
+    |2:
+    |  cmn ITYPE, #-LJ_TTAB
+    |  bne ->vmeta_len
+#if LJ_52
+    |  ldr TAB:CARG2, TAB:CARG1->metatable
+    |  cbnz TAB:CARG2, >9
+    |3:
+#endif
+    |->BC_LEN_Z:
+    |  bl extern lj_tab_len		// (GCtab *t)
+    |  // Returns uint32_t (but less than 2^31).
+    |  b <1
+    |
+#if LJ_52
+    |9:
+    |  ldrb TMP1w, TAB:CARG2->nomm
+    |  tbnz TMP1w, #MM_len, <3		// 'no __len' flag set: done.
+    |  b ->vmeta_len
+#endif
+    break;
+
+  /* -- Binary ops -------------------------------------------------------- */
+
+    |.macro ins_arithcheck_int, target
+    |  checkint CARG1, target
+    |  checkint CARG2, target
+    |.endmacro
+    |
+    |.macro ins_arithcheck_num, target
+    |  checknum CARG1, target
+    |  checknum CARG2, target
+    |.endmacro
+    |
+    |.macro ins_arithcheck_nzdiv, target
+    |  cbz CARG2w, target
+    |.endmacro
+    |
+    |.macro ins_arithhead
+    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+    ||if (vk == 1) {
+    |   and RC, RC, #255
+    |    decode_RB RB, INS
+    ||} else {
+    |   decode_RB RB, INS
+    |    and RC, RC, #255
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arithload, reg1, reg2
+    |  // RA = dst, RB = src1, RC = src2 | num_const
+    ||switch (vk) {
+    ||case 0:
+    |   ldr reg1, [BASE, RB, lsl #3]
+    |    ldr reg2, [KBASE, RC, lsl #3]
+    ||  break;
+    ||case 1:
+    |   ldr reg1, [KBASE, RC, lsl #3]
+    |    ldr reg2, [BASE, RB, lsl #3]
+    ||  break;
+    ||default:
+    |   ldr reg1, [BASE, RB, lsl #3]
+    |    ldr reg2, [BASE, RC, lsl #3]
+    ||  break;
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arithfallback, ins
+    ||switch (vk) {
+    ||case 0:
+    |   ins ->vmeta_arith_vn
+    ||  break;
+    ||case 1:
+    |   ins ->vmeta_arith_nv
+    ||  break;
+    ||default:
+    |   ins ->vmeta_arith_vv
+    ||  break;
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arithmod, res, reg1, reg2
+    |  fdiv d2, reg1, reg2
+    |  frintm d2, d2
+    |  fmsub res, d2, reg2, reg1
+    |.endmacro
+    |
+    |.macro ins_arithdn, intins, fpins
+    |  ins_arithhead
+    |  ins_arithload CARG1, CARG2
+    |  ins_arithcheck_int >5
+    |.if "intins" == "smull"
+    |  smull CARG1, CARG1w, CARG2w
+    |  cmp CARG1, CARG1, sxtw
+    |   mov CARG1w, CARG1w
+    |  ins_arithfallback bne
+    |.elif "intins" == "ins_arithmodi"
+    |  ins_arithfallback ins_arithcheck_nzdiv
+    |  bl ->vm_modi
+    |.else
+    |  intins CARG1w, CARG1w, CARG2w
+    |  ins_arithfallback bvs
+    |.endif
+    |  add CARG1, CARG1, TISNUM
+    |  str CARG1, [BASE, RA, lsl #3]
+    |4:
+    |  ins_next
+    |
+    |5:  // FP variant.
+    |  ins_arithload FARG1, FARG2
+    |  ins_arithfallback ins_arithcheck_num
+    |  fpins FARG1, FARG1, FARG2
+    |  str FARG1, [BASE, RA, lsl #3]
+    |  b <4
+    |.endmacro
+    |
+    |.macro ins_arithfp, fpins
+    |  ins_arithhead
+    |  ins_arithload CARG1, CARG2
+    |  ins_arithload FARG1, FARG2
+    |  ins_arithfallback ins_arithcheck_num
+    |.if "fpins" == "fpow"
+    |  bl extern pow
+    |.else
+    |  fpins FARG1, FARG1, FARG2
+    |.endif
+    |  str FARG1, [BASE, RA, lsl #3]
+    |  ins_next
+    |.endmacro
+
+  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+    |  ins_arithdn adds, fadd
+    break;
+  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+    |  ins_arithdn subs, fsub
+    break;
+  case BC_MULVN: case BC_MULNV: case BC_MULVV:
+    |  ins_arithdn smull, fmul
+    break;
+  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+    |  ins_arithfp fdiv
+    break;
+  case BC_MODVN: case BC_MODNV: case BC_MODVV:
+    |  ins_arithdn ins_arithmodi, ins_arithmod
+    break;
+  case BC_POW:
+    |  // NYI: (partial) integer arithmetic.
+    |  ins_arithfp fpow
+    break;
+
+  case BC_CAT:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = dst, RB = src_start, RC = src_end
+    |   str BASE, L->base
+    |  sub CARG3, RC, RB
+    |  add CARG2, BASE, RC, lsl #3
+    |->BC_CAT_Z:
+    |  // RA = dst, CARG2 = top-1, CARG3 = left
+    |  mov CARG1, L
+    |   str PC, SAVE_PC
+    |  bl extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
+    |  // Returns NULL (finished) or TValue * (metamethod).
+    |  ldrb RBw, [PC, #-1]
+    |   ldr BASE, L->base
+    |   cbnz CRET1, ->vmeta_binop
+    |  ldr TMP0, [BASE, RB, lsl #3]
+    |  str TMP0, [BASE, RA, lsl #3]	// Copy result to RA.
+    |  ins_next
+    break;
+
+  /* -- Constant ops ------------------------------------------------------ */
+
+  case BC_KSTR:
+    |  // RA = dst, RC = str_const (~)
+    |  mvn RC, RC
+    |  ldr TMP0, [KBASE, RC, lsl #3]
+    |   movn TMP1, #~LJ_TSTR
+    |  add TMP0, TMP0, TMP1, lsl #47
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_KCDATA:
+    |.if FFI
+    |  // RA = dst, RC = cdata_const (~)
+    |  mvn RC, RC
+    |  ldr TMP0, [KBASE, RC, lsl #3]
+    |   movn TMP1, #~LJ_TCDATA
+    |  add TMP0, TMP0, TMP1, lsl #47
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    |.endif
+    break;
+  case BC_KSHORT:
+    |  // RA = dst, RC = int16_literal
+    |  sxth RCw, RCw
+    |  add TMP0, RC, TISNUM
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_KNUM:
+    |  // RA = dst, RC = num_const
+    |  ldr TMP0, [KBASE, RC, lsl #3]
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_KPRI:
+    |  // RA = dst, RC = primitive_type (~)
+    |  mvn TMP0, RC, lsl #47
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_KNIL:
+    |  // RA = base, RC = end
+    |  add RA, BASE, RA, lsl #3
+    |   add RC, BASE, RC, lsl #3
+    |  str TISNIL, [RA], #8
+    |1:
+    |   cmp RA, RC
+    |  str TISNIL, [RA], #8
+    |   blt <1
+    |  ins_next_
+    break;
+
+  /* -- Upvalue and function ops ------------------------------------------ */
+
+  case BC_UGET:
+    |  // RA = dst, RC = uvnum
+    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+    |   add RC, RC, #offsetof(GCfuncL, uvptr)/8
+    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+    |  ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
+    |  ldr CARG2, UPVAL:CARG2->v
+    |  ldr TMP0, [CARG2]
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+  case BC_USETV:
+    |  // RA = uvnum, RC = src
+    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
+    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+    |  ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
+    |   ldr CARG3, [BASE, RC, lsl #3]
+    |    ldr CARG2, UPVAL:CARG1->v
+    |  ldrb TMP2w, UPVAL:CARG1->marked
+    |  ldrb TMP0w, UPVAL:CARG1->closed
+    |    asr ITYPE, CARG3, #47
+    |   str CARG3, [CARG2]
+    |    add ITYPE, ITYPE, #-LJ_TISGCV
+    |  tst TMP2w, #LJ_GC_BLACK		// isblack(uv)
+    |  ccmp TMP0w, #0, #4, ne		// && uv->closed
+    |    ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne	// && tvisgcv(v)
+    |  bhi >2
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if new value is white.
+    |  and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
+    |  ldrb TMP1w, GCOBJ:CARG3->gch.marked
+    |  tst TMP1w, #LJ_GC_WHITES		// iswhite(str)
+    |  beq <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |  mov CARG1, GL
+    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  b <1
+    break;
+  case BC_USETS:
+    |  // RA = uvnum, RC = str_const (~)
+    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
+    |    mvn RC, RC
+    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+    |  ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
+    |   ldr STR:CARG3, [KBASE, RC, lsl #3]
+    |   movn TMP0, #~LJ_TSTR
+    |    ldr CARG2, UPVAL:CARG1->v
+    |  ldrb TMP2w, UPVAL:CARG1->marked
+    |   add TMP0, STR:CARG3, TMP0, lsl #47
+    |    ldrb TMP1w, STR:CARG3->marked
+    |   str TMP0, [CARG2]
+    |  tbnz TMP2w, #2, >2		// isblack(uv)
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if string is white and ensure upvalue is closed.
+    |  ldrb TMP0w, UPVAL:CARG1->closed
+    |    tst TMP1w, #LJ_GC_WHITES	// iswhite(str)
+    |  ccmp TMP0w, #0, #0, ne
+    |  beq <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |  mov CARG1, GL
+    |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  b <1
+    break;
+  case BC_USETN:
+    |  // RA = uvnum, RC = num_const
+    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
+    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+    |  ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
+    |   ldr TMP0, [KBASE, RC, lsl #3]
+    |  ldr CARG2, UPVAL:CARG2->v
+    |   str TMP0, [CARG2]
+    |  ins_next
+    break;
+  case BC_USETP:
+    |  // RA = uvnum, RC = primitive_type (~)
+    |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+    |   add RA, RA, #offsetof(GCfuncL, uvptr)/8
+    |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+    |  ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
+    |   mvn TMP0, RC, lsl #47
+    |  ldr CARG2, UPVAL:CARG2->v
+    |   str TMP0, [CARG2]
+    |  ins_next
+    break;
+
+  case BC_UCLO:
+    |  // RA = level, RC = target
+    |  ldr CARG3, L->openupval
+    |   add RC, PC, RC, lsl #2
+    |    str BASE, L->base
+    |   sub PC, RC, #0x20000
+    |  cbz CARG3, >1
+    |  mov CARG1, L
+    |  add CARG2, BASE, RA, lsl #3
+    |  bl extern lj_func_closeuv	// (lua_State *L, TValue *level)
+    |  ldr BASE, L->base
+    |1:
+    |  ins_next
+    break;
+
+  case BC_FNEW:
+    |  // RA = dst, RC = proto_const (~) (holding function prototype)
+    |  mvn RC, RC
+    |   str BASE, L->base
+    |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+    |    str PC, SAVE_PC
+    |   ldr CARG2, [KBASE, RC, lsl #3]
+    |    mov CARG1, L
+    |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
+    |  bl extern lj_func_newL_gc
+    |  // Returns GCfuncL *.
+    |  ldr BASE, L->base
+    |   movn TMP0, #~LJ_TFUNC
+    |   add CRET1, CRET1, TMP0, lsl #47
+    |  str CRET1, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+
+  /* -- Table ops --------------------------------------------------------- */
+
+  case BC_TNEW:
+  case BC_TDUP:
+    |  // RA = dst, RC = (hbits|asize) | tab_const (~)
+    |  ldp CARG3, CARG4, GL->gc.total	// Assumes threshold follows total.
+    |   str BASE, L->base
+    |   str PC, SAVE_PC
+    |   mov CARG1, L
+    |  cmp CARG3, CARG4
+    |  bhs >5
+    |1:
+    if (op == BC_TNEW) {
+      |  and CARG2, RC, #0x7ff
+      |   lsr CARG3, RC, #11
+      |  cmp CARG2, #0x7ff
+      |  mov TMP0, #0x801
+      |  csel CARG2, CARG2, TMP0, ne
+      |  bl extern lj_tab_new  // (lua_State *L, int32_t asize, uint32_t hbits)
+      |  // Returns GCtab *.
+    } else {
+      |  mvn RC, RC
+      |  ldr CARG2, [KBASE, RC, lsl #3]
+      |  bl extern lj_tab_dup  // (lua_State *L, Table *kt)
+      |  // Returns GCtab *.
+    }
+    |  ldr BASE, L->base
+    |   movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
+    |  str CRET1, [BASE, RA, lsl #3]
+    |  ins_next
+    |
+    |5:
+    |  bl extern lj_gc_step_fixtop  // (lua_State *L)
+    |  mov CARG1, L
+    |  b <1
+    break;
+
+  case BC_GGET:
+    |  // RA = dst, RC = str_const (~)
+  case BC_GSET:
+    |  // RA = dst, RC = str_const (~)
+    |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+    |   mvn RC, RC
+    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+    |  ldr TAB:CARG2, LFUNC:CARG1->env
+    |   ldr STR:RC, [KBASE, RC, lsl #3]
+    if (op == BC_GGET) {
+      |  b ->BC_TGETS_Z
+    } else {
+      |  b ->BC_TSETS_Z
+    }
+    break;
+
+  case BC_TGETV:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = dst, RB = table, RC = key
+    |  ldr CARG2, [BASE, RB, lsl #3]
+    |   ldr TMP1, [BASE, RC, lsl #3]
+    |  checktab CARG2, ->vmeta_tgetv
+    |  checkint TMP1, >9		// Integer key?
+    |  ldr CARG3, TAB:CARG2->array
+    |   ldr CARG1w, TAB:CARG2->asize
+    |  add CARG3, CARG3, TMP1, uxtw #3
+    |   cmp TMP1w, CARG1w		// In array part?
+    |   bhs ->vmeta_tgetv
+    |  ldr TMP0, [CARG3]
+    |  cmp TMP0, TISNIL
+    |  beq >5
+    |1:
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    |
+    |5:  // Check for __index if table value is nil.
+    |  ldr TAB:CARG1, TAB:CARG2->metatable
+    |  cbz TAB:CARG1, <1		// No metatable: done.
+    |  ldrb TMP1w, TAB:CARG1->nomm
+    |  tbnz TMP1w, #MM_index, <1	// 'no __index' flag set: done.
+    |  b ->vmeta_tgetv
+    |
+    |9:
+    |  asr ITYPE, TMP1, #47
+    |  cmn ITYPE, #-LJ_TSTR		// String key?
+    |  bne ->vmeta_tgetv
+    |   and STR:RC, TMP1, #LJ_GCVMASK
+    |  b ->BC_TGETS_Z
+    break;
+  case BC_TGETS:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = dst, RB = table, RC = str_const (~)
+    |  ldr CARG2, [BASE, RB, lsl #3]
+    |   mvn RC, RC
+    |   ldr STR:RC, [KBASE, RC, lsl #3]
+    |  checktab CARG2, ->vmeta_tgets1
+    |->BC_TGETS_Z:
+    |  // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
+    |  ldr TMP1w, TAB:CARG2->hmask
+    |   ldr TMP2w, STR:RC->hash
+    |    ldr NODE:CARG3, TAB:CARG2->node
+    |  and TMP1w, TMP1w, TMP2w		// idx = str->hash & tab->hmask
+    |  add TMP1, TMP1, TMP1, lsl #1
+    |  movn CARG4, #~LJ_TSTR
+    |    add NODE:CARG3, NODE:CARG3, TMP1, lsl #3  // node = tab->node + idx*3*8
+    |  add CARG4, STR:RC, CARG4, lsl #47	// Tagged key to look for.
+    |1:
+    |  ldp TMP0, CARG1, NODE:CARG3->val
+    |   ldr NODE:CARG3, NODE:CARG3->next
+    |  cmp CARG1, CARG4
+    |  bne >4
+    |  cmp TMP0, TISNIL
+    |  beq >5
+    |3:
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    |
+    |4:  // Follow hash chain.
+    |  cbnz NODE:CARG3, <1
+    |  // End of hash chain: key not found, nil result.
+    |   mov TMP0, TISNIL
+    |
+    |5:  // Check for __index if table value is nil.
+    |  ldr TAB:CARG1, TAB:CARG2->metatable
+    |  cbz TAB:CARG1, <3		// No metatable: done.
+    |  ldrb TMP1w, TAB:CARG1->nomm
+    |  tbnz TMP1w, #MM_index, <3	// 'no __index' flag set: done.
+    |  b ->vmeta_tgets
+    break;
+  case BC_TGETB:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = dst, RB = table, RC = index
+    |  ldr CARG2, [BASE, RB, lsl #3]
+    |  checktab CARG2, ->vmeta_tgetb
+    |  ldr CARG3, TAB:CARG2->array
+    |   ldr CARG1w, TAB:CARG2->asize
+    |  add CARG3, CARG3, RC, lsl #3
+    |   cmp RCw, CARG1w			// In array part?
+    |   bhs ->vmeta_tgetb
+    |  ldr TMP0, [CARG3]
+    |  cmp TMP0, TISNIL
+    |  beq >5
+    |1:
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    |
+    |5:  // Check for __index if table value is nil.
+    |  ldr TAB:CARG1, TAB:CARG2->metatable
+    |  cbz TAB:CARG1, <1		// No metatable: done.
+    |  ldrb TMP1w, TAB:CARG1->nomm
+    |  tbnz TMP1w, #MM_index, <1	// 'no __index' flag set: done.
+    |  b ->vmeta_tgetb
+    break;
+  case BC_TGETR:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = dst, RB = table, RC = key
+    |  ldr CARG1, [BASE, RB, lsl #3]
+    |   ldr TMP1, [BASE, RC, lsl #3]
+    |  and TAB:CARG1, CARG1, #LJ_GCVMASK
+    |  ldr CARG3, TAB:CARG1->array
+    |   ldr TMP2w, TAB:CARG1->asize
+    |  add CARG3, CARG3, TMP1w, uxtw #3
+    |   cmp TMP1w, TMP2w		// In array part?
+    |   bhs ->vmeta_tgetr
+    |  ldr TMP0, [CARG3]
+    |->BC_TGETR_Z:
+    |  str TMP0, [BASE, RA, lsl #3]
+    |  ins_next
+    break;
+
+  case BC_TSETV:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = src, RB = table, RC = key
+    |  ldr CARG2, [BASE, RB, lsl #3]
+    |   ldr TMP1, [BASE, RC, lsl #3]
+    |  checktab CARG2, ->vmeta_tsetv
+    |  checkint TMP1, >9		// Integer key?
+    |  ldr CARG3, TAB:CARG2->array
+    |   ldr CARG1w, TAB:CARG2->asize
+    |  add CARG3, CARG3, TMP1, uxtw #3
+    |   cmp TMP1w, CARG1w		// In array part?
+    |   bhs ->vmeta_tsetv
+    |  ldr TMP1, [CARG3]
+    |   ldr TMP0, [BASE, RA, lsl #3]
+    |    ldrb TMP2w, TAB:CARG2->marked
+    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  beq >5
+    |1:
+    |   str TMP0, [CARG3]
+    |    tbnz TMP2w, #2, >7		// isblack(table)
+    |2:
+    |   ins_next
+    |
+    |5:  // Check for __newindex if previous value is nil.
+    |  ldr TAB:CARG1, TAB:CARG2->metatable
+    |  cbz TAB:CARG1, <1		// No metatable: done.
+    |  ldrb TMP1w, TAB:CARG1->nomm
+    |  tbnz TMP1w, #MM_newindex, <1	// 'no __newindex' flag set: done.
+    |  b ->vmeta_tsetv
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP2w, TMP1
+    |  b <2
+    |
+    |9:
+    |  asr ITYPE, TMP1, #47
+    |  cmn ITYPE, #-LJ_TSTR		// String key?
+    |  bne ->vmeta_tsetv
+    |   and STR:RC, TMP1, #LJ_GCVMASK
+    |  b ->BC_TSETS_Z
+    break;
+  case BC_TSETS:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = dst, RB = table, RC = str_const (~)
+    |  ldr CARG2, [BASE, RB, lsl #3]
+    |   mvn RC, RC
+    |   ldr STR:RC, [KBASE, RC, lsl #3]
+    |  checktab CARG2, ->vmeta_tsets1
+    |->BC_TSETS_Z:
+    |  // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
+    |  ldr TMP1w, TAB:CARG2->hmask
+    |   ldr TMP2w, STR:RC->hash
+    |    ldr NODE:CARG3, TAB:CARG2->node
+    |  and TMP1w, TMP1w, TMP2w		// idx = str->hash & tab->hmask
+    |  add TMP1, TMP1, TMP1, lsl #1
+    |  movn CARG4, #~LJ_TSTR
+    |    add NODE:CARG3, NODE:CARG3, TMP1, lsl #3  // node = tab->node + idx*3*8
+    |  add CARG4, STR:RC, CARG4, lsl #47	// Tagged key to look for.
+    |   strb wzr, TAB:CARG2->nomm	// Clear metamethod cache.
+    |1:
+    |  ldp TMP1, CARG1, NODE:CARG3->val
+    |   ldr NODE:TMP3, NODE:CARG3->next
+    |    ldrb TMP2w, TAB:CARG2->marked
+    |  cmp CARG1, CARG4
+    |  bne >5
+    |   ldr TMP0, [BASE, RA, lsl #3]
+    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  beq >4
+    |2:
+    |   str TMP0, NODE:CARG3->val
+    |    tbnz TMP2w, #2, >7		// isblack(table)
+    |3:
+    |  ins_next
+    |
+    |4:  // Check for __newindex if previous value is nil.
+    |  ldr TAB:CARG1, TAB:CARG2->metatable
+    |  cbz TAB:CARG1, <2		// No metatable: done.
+    |  ldrb TMP1w, TAB:CARG1->nomm
+    |  tbnz TMP1w, #MM_newindex, <2	// 'no __newindex' flag set: done.
+    |  b ->vmeta_tsets
+    |
+    |5:  // Follow hash chain.
+    |  mov NODE:CARG3, NODE:TMP3
+    |  cbnz NODE:TMP3, <1
+    |  // End of hash chain: key not found, add a new one.
+    |
+    |  // But check for __newindex first.
+    |  ldr TAB:CARG1, TAB:CARG2->metatable
+    |  cbz TAB:CARG1, >6		// No metatable: continue.
+    |  ldrb TMP1w, TAB:CARG1->nomm
+    |  // 'no __newindex' flag NOT set: check.
+    |  tbz TMP1w, #MM_newindex, ->vmeta_tsets
+    |6:
+    |  movn TMP1, #~LJ_TSTR
+    |   str PC, SAVE_PC
+    |  add TMP0, STR:RC, TMP1, lsl #47
+    |   str BASE, L->base
+    |   mov CARG1, L
+    |  str TMP0, TMPD
+    |   add CARG3, sp, TMPDofs
+    |  bl extern lj_tab_newkey		// (lua_State *L, GCtab *t, TValue *k)
+    |  // Returns TValue *.
+    |  ldr BASE, L->base
+    |  ldr TMP0, [BASE, RA, lsl #3]
+    |  str TMP0, [CRET1]
+    |  b <3				// No 2nd write barrier needed.
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP2w, TMP1
+    |  b <3
+    break;
+  case BC_TSETB:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = src, RB = table, RC = index
+    |  ldr CARG2, [BASE, RB, lsl #3]
+    |  checktab CARG2, ->vmeta_tsetb
+    |  ldr CARG3, TAB:CARG2->array
+    |   ldr CARG1w, TAB:CARG2->asize
+    |  add CARG3, CARG3, RC, lsl #3
+    |   cmp RCw, CARG1w			// In array part?
+    |   bhs ->vmeta_tsetb
+    |  ldr TMP1, [CARG3]
+    |   ldr TMP0, [BASE, RA, lsl #3]
+    |    ldrb TMP2w, TAB:CARG2->marked
+    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  beq >5
+    |1:
+    |   str TMP0, [CARG3]
+    |    tbnz TMP2w, #2, >7		// isblack(table)
+    |2:
+    |   ins_next
+    |
+    |5:  // Check for __newindex if previous value is nil.
+    |  ldr TAB:CARG1, TAB:CARG2->metatable
+    |  cbz TAB:CARG1, <1		// No metatable: done.
+    |  ldrb TMP1w, TAB:CARG1->nomm
+    |  tbnz TMP1w, #MM_newindex, <1	// 'no __newindex' flag set: done.
+    |  b ->vmeta_tsetb
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP2w, TMP1
+    |  b <2
+    break;
+  case BC_TSETR:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = src, RB = table, RC = key
+    |  ldr CARG2, [BASE, RB, lsl #3]
+    |   ldr TMP1, [BASE, RC, lsl #3]
+    |  and TAB:CARG2, CARG2, #LJ_GCVMASK
+    |  ldr CARG1, TAB:CARG2->array
+    |    ldrb TMP2w, TAB:CARG2->marked
+    |   ldr CARG4w, TAB:CARG2->asize
+    |  add CARG1, CARG1, TMP1, uxtw #3
+    |    tbnz TMP2w, #2, >7		// isblack(table)
+    |2:
+    |   cmp TMP1w, CARG4w		// In array part?
+    |   bhs ->vmeta_tsetr
+    |->BC_TSETR_Z:
+    |   ldr TMP0, [BASE, RA, lsl #3]
+    |   str TMP0, [CARG1]
+    |   ins_next
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP2w, TMP0
+    |  b <2
+    break;
+
+  case BC_TSETM:
+    |  // RA = base (table at base-1), RC = num_const (start index)
+    |  add RA, BASE, RA, lsl #3
+    |1:
+    |   ldr RBw, SAVE_MULTRES
+    |  ldr TAB:CARG2, [RA, #-8]		// Guaranteed to be a table.
+    |   ldr TMP1, [KBASE, RC, lsl #3]	// Integer constant is in lo-word.
+    |    sub RB, RB, #8
+    |    cbz RB, >4			// Nothing to copy?
+    |  and TAB:CARG2, CARG2, #LJ_GCVMASK
+    |  ldr CARG1w, TAB:CARG2->asize
+    |   add CARG3w, TMP1w, RBw, lsr #3
+    |   ldr CARG4, TAB:CARG2->array
+    |  cmp CARG3, CARG1
+    |    add RB, RA, RB
+    |  bhi >5
+    |   add TMP1, CARG4, TMP1w, uxtw #3
+    |    ldrb TMP2w, TAB:CARG2->marked
+    |3:  // Copy result slots to table.
+    |   ldr TMP0, [RA], #8
+    |   str TMP0, [TMP1], #8
+    |  cmp RA, RB
+    |  blo <3
+    |    tbnz TMP2w, #2, >7		// isblack(table)
+    |4:
+    |  ins_next
+    |
+    |5:  // Need to resize array part.
+    |   str BASE, L->base
+    |  mov CARG1, L
+    |   str PC, SAVE_PC
+    |  bl extern lj_tab_reasize		// (lua_State *L, GCtab *t, int nasize)
+    |  // Must not reallocate the stack.
+    |  b <1
+    |
+    |7:  // Possible table write barrier for any value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP2w, TMP1
+    |  b <4
+    break;
+
+  /* -- Calls and vararg handling ----------------------------------------- */
+
+  case BC_CALLM:
+    |  // RA = base, (RB = nresults+1,) RC = extra_nargs
+    |  ldr TMP0w, SAVE_MULTRES
+    |  decode_RC8RD NARGS8:RC, RC
+    |  add NARGS8:RC, NARGS8:RC, TMP0
+    |  b ->BC_CALL_Z
+    break;
+  case BC_CALL:
+    |  decode_RC8RD NARGS8:RC, RC
+    |  // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
+    |->BC_CALL_Z:
+    |  mov RB, BASE			// Save old BASE for vmeta_call.
+    |  add BASE, BASE, RA, lsl #3
+    |  ldr CARG3, [BASE]
+    |   sub NARGS8:RC, NARGS8:RC, #8
+    |   add BASE, BASE, #16
+    |  checkfunc CARG3, ->vmeta_call
+    |  ins_call
+    break;
+
+  case BC_CALLMT:
+    |  // RA = base, (RB = 0,) RC = extra_nargs
+    |  ldr TMP0w, SAVE_MULTRES
+    |  add NARGS8:RC, TMP0, RC, lsl #3
+    |  b ->BC_CALLT1_Z
+    break;
+  case BC_CALLT:
+    |  lsl NARGS8:RC, RC, #3
+    |  // RA = base, (RB = 0,) RC = (nargs+1)*8
+    |->BC_CALLT1_Z:
+    |  add RA, BASE, RA, lsl #3
+    |  ldr TMP1, [RA]
+    |   sub NARGS8:RC, NARGS8:RC, #8
+    |   add RA, RA, #16
+    |  checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
+    |  ldr PC, [BASE, FRAME_PC]
+    |->BC_CALLT2_Z:
+    |   mov RB, #0
+    |   ldrb TMP2w, LFUNC:CARG3->ffid
+    |  tst PC, #FRAME_TYPE
+    |  bne >7
+    |1:
+    |  str TMP1, [BASE, FRAME_FUNC]	// Copy function down, but keep PC.
+    |  cbz NARGS8:RC, >3
+    |2:
+    |  ldr TMP0, [RA, RB]
+    |   add TMP1, RB, #8
+    |   cmp TMP1, NARGS8:RC
+    |  str TMP0, [BASE, RB]
+    |    mov RB, TMP1
+    |   bne <2
+    |3:
+    |  cmp TMP2, #1			// (> FF_C) Calling a fast function?
+    |  bhi >5
+    |4:
+    |  ins_callt
+    |
+    |5:  // Tailcall to a fast function with a Lua frame below.
+    |  ldrb RAw, [PC, #-3]
+    |  sub CARG1, BASE, RA, lsl #3
+    |  ldr LFUNC:CARG1, [CARG1, #-32]
+    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+    |  ldr CARG1, LFUNC:CARG1->pc
+    |  ldr KBASE, [CARG1, #PC2PROTO(k)]
+    |  b <4
+    |
+    |7:  // Tailcall from a vararg function.
+    |  eor PC, PC, #FRAME_VARG
+    |  tst PC, #FRAME_TYPEP		// Vararg frame below?
+    |  csel TMP2, RB, TMP2, ne		// Clear ffid if no Lua function below.
+    |  bne <1
+    |  sub BASE, BASE, PC
+    |  ldr PC, [BASE, FRAME_PC]
+    |  tst PC, #FRAME_TYPE
+    |  csel TMP2, RB, TMP2, ne		// Clear ffid if no Lua function below.
+    |  b <1
+    break;
+
+  case BC_ITERC:
+    |  // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+    |  add RA, BASE, RA, lsl #3
+    |  ldr CARG3, [RA, #-24]
+    |    mov RB, BASE			// Save old BASE for vmeta_call.
+    |   ldp CARG1, CARG2, [RA, #-16]
+    |    add BASE, RA, #16
+    |    mov NARGS8:RC, #16		// Iterators get 2 arguments.
+    |  str CARG3, [RA]			// Copy callable.
+    |   stp CARG1, CARG2, [RA, #16]	// Copy state and control var.
+    |  checkfunc CARG3, ->vmeta_call
+    |  ins_call
+    break;
+
+  case BC_ITERN:
+    |  // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+    |.if JIT
+    |  // NYI: add hotloop, record BC_ITERN.
+    |.endif
+    |  add RA, BASE, RA, lsl #3
+    |  ldr TAB:RB, [RA, #-16]
+    |    ldrh TMP3w, [PC, #2]
+    |  ldr CARG1w, [RA, #-8]		// Get index from control var.
+    |    add PC, PC, #4
+    |    add TMP3, PC, TMP3, lsl #2
+    |  and TAB:RB, RB, #LJ_GCVMASK
+    |    sub TMP3, TMP3, #0x20000
+    |  ldr TMP1w, TAB:RB->asize
+    |   ldr CARG2, TAB:RB->array
+    |1:  // Traverse array part.
+    |  subs RC, CARG1, TMP1
+    |   add CARG3, CARG2, CARG1, lsl #3
+    |  bhs >5				// Index points after array part?
+    |   ldr TMP0, [CARG3]
+    |   cmp TMP0, TISNIL
+    |   cinc CARG1, CARG1, eq		// Skip holes in array part.
+    |   beq <1
+    |   add CARG1, CARG1, TISNUM
+    |   stp CARG1, TMP0, [RA]
+    |    add CARG1, CARG1, #1
+    |3:
+    |    str CARG1w, [RA, #-8]		// Update control var.
+    |  mov PC, TMP3
+    |4:
+    |  ins_next
+    |
+    |5:  // Traverse hash part.
+    |  ldr TMP2w, TAB:RB->hmask
+    |   ldr NODE:RB, TAB:RB->node
+    |6:
+    |   add CARG1, RC, RC, lsl #1
+    |  cmp RC, TMP2			// End of iteration? Branch to ITERN+1.
+    |   add NODE:CARG3, NODE:RB, CARG1, lsl #3  // node = tab->node + idx*3*8
+    |  bhi <4
+    |  ldp TMP0, CARG1, NODE:CARG3->val
+    |  cmp TMP0, TISNIL
+    |   add RC, RC, #1
+    |  beq <6				// Skip holes in hash part.
+    |  stp CARG1, TMP0, [RA]
+    |  add CARG1, RC, TMP1
+    |  b <3
+    break;
+
+  case BC_ISNEXT:
+    |  // RA = base, RC = target (points to ITERN)
+    |  add RA, BASE, RA, lsl #3
+    |  ldr CFUNC:CARG1, [RA, #-24]
+    |     add RC, PC, RC, lsl #2
+    |   ldp TAB:CARG3, CARG4, [RA, #-16]
+    |     sub RC, RC, #0x20000
+    |  checkfunc CFUNC:CARG1, >5
+    |   asr TMP0, TAB:CARG3, #47
+    |  ldrb TMP1w, CFUNC:CARG1->ffid
+    |   cmn TMP0, #-LJ_TTAB
+    |   ccmp CARG4, TISNIL, #0, eq
+    |  ccmp TMP1w, #FF_next_N, #0, eq
+    |  bne >5
+    |  mov TMP0w, #0xfffe7fff
+    |  lsl TMP0, TMP0, #32
+    |  str TMP0, [RA, #-8]		// Initialize control var.
+    |1:
+    |     mov PC, RC
+    |  ins_next
+    |
+    |5:  // Despecialize bytecode if any of the checks fail.
+    |  mov TMP0, #BC_JMP
+    |   mov TMP1, #BC_ITERC
+    |  strb TMP0w, [PC, #-4]
+    |   strb TMP1w, [RC]
+    |  b <1
+    break;
+
+  case BC_VARG:
+    |  decode_RB RB, INS
+    |   and RC, RC, #255
+    |  // RA = base, RB = (nresults+1), RC = numparams
+    |  ldr TMP1, [BASE, FRAME_PC]
+    |  add RC, BASE, RC, lsl #3
+    |   add RA, BASE, RA, lsl #3
+    |  add RC, RC, #FRAME_VARG
+    |   add TMP2, RA, RB, lsl #3
+    |  sub RC, RC, TMP1			// RC = vbase
+    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
+    |   sub TMP3, BASE, #16		// TMP3 = vtop
+    |  cbz RB, >5
+    |   sub TMP2, TMP2, #16
+    |1:  // Copy vararg slots to destination slots.
+    |  cmp RC, TMP3
+    |  ldr TMP0, [RC], #8
+    |  csel TMP0, TMP0, TISNIL, lo
+    |   cmp RA, TMP2
+    |  str TMP0, [RA], #8
+    |   blo <1
+    |2:
+    |  ins_next
+    |
+    |5:  // Copy all varargs.
+    |  ldr TMP0, L->maxstack
+    |   subs TMP2, TMP3, RC
+    |   csel RB, xzr, TMP2, le		// MULTRES = (max(vtop-vbase,0)+1)*8
+    |   add RB, RB, #8
+    |  add TMP1, RA, TMP2
+    |   str RBw, SAVE_MULTRES
+    |   ble <2				// Nothing to copy.
+    |  cmp TMP1, TMP0
+    |  bhi >7
+    |6:
+    |  ldr TMP0, [RC], #8
+    |  str TMP0, [RA], #8
+    |  cmp RC, TMP3
+    |  blo <6
+    |  b <2
+    |
+    |7:  // Grow stack for varargs.
+    |  lsr CARG2, TMP2, #3
+    |   stp BASE, RA, L->base
+    |  mov CARG1, L
+    |  sub RC, RC, BASE			// Need delta, because BASE may change.
+    |   str PC, SAVE_PC
+    |  bl extern lj_state_growstack	// (lua_State *L, int n)
+    |  ldp BASE, RA, L->base
+    |  add RC, BASE, RC
+    |  sub TMP3, BASE, #16
+    |  b <6
+    break;
+
+  /* -- Returns ----------------------------------------------------------- */
+
+  case BC_RETM:
+    |  // RA = results, RC = extra results
+    |  ldr TMP0w, SAVE_MULTRES
+    |   ldr PC, [BASE, FRAME_PC]
+    |    add RA, BASE, RA, lsl #3
+    |  add RC, TMP0, RC, lsl #3
+    |  b ->BC_RETM_Z
+    break;
+
+  case BC_RET:
+    |  // RA = results, RC = nresults+1
+    |  ldr PC, [BASE, FRAME_PC]
+    |   lsl RC, RC, #3
+    |    add RA, BASE, RA, lsl #3
+    |->BC_RETM_Z:
+    |   str RCw, SAVE_MULTRES
+    |1:
+    |  ands CARG1, PC, #FRAME_TYPE
+    |   eor CARG2, PC, #FRAME_VARG
+    |  bne ->BC_RETV2_Z
+    |
+    |->BC_RET_Z:
+    |  // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
+    |  ldr INSw, [PC, #-4]
+    |  subs TMP1, RC, #8
+    |   sub CARG3, BASE, #16
+    |  beq >3
+    |2:
+    |  ldr TMP0, [RA], #8
+    |   add BASE, BASE, #8
+    |   sub TMP1, TMP1, #8
+    |  str TMP0, [BASE, #-24]
+    |   cbnz TMP1, <2
+    |3:
+    |  decode_RA RA, INS
+    |  sub CARG4, CARG3, RA, lsl #3
+    |   decode_RB RB, INS
+    |  ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
+    |5:
+    |  cmp RC, RB, lsl #3		// More results expected?
+    |  blo >6
+    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+    |  mov BASE, CARG4
+    |  ldr CARG2, LFUNC:CARG1->pc
+    |  ldr KBASE, [CARG2, #PC2PROTO(k)]
+    |   ins_next
+    |
+    |6:  // Fill up results with nil.
+    |  add BASE, BASE, #8
+    |   add RC, RC, #8
+    |  str TISNIL, [BASE, #-24]
+    |  b <5
+    |
+    |->BC_RETV1_Z:  // Non-standard return case.
+    |  add RA, BASE, RA, lsl #3
+    |->BC_RETV2_Z:
+    |  tst CARG2, #FRAME_TYPEP
+    |  bne ->vm_return
+    |  // Return from vararg function: relocate BASE down.
+    |  sub BASE, BASE, CARG2
+    |  ldr PC, [BASE, FRAME_PC]
+    |  b <1
+    break;
+
+  case BC_RET0: case BC_RET1:
+    |  // RA = results, RC = nresults+1
+    |  ldr PC, [BASE, FRAME_PC]
+    |   lsl RC, RC, #3
+    |   str RCw, SAVE_MULTRES
+    |  ands CARG1, PC, #FRAME_TYPE
+    |   eor CARG2, PC, #FRAME_VARG
+    |  bne ->BC_RETV1_Z
+    |   ldr INSw, [PC, #-4]
+    if (op == BC_RET1) {
+      |  ldr TMP0, [BASE, RA, lsl #3]
+    }
+    |  sub CARG4, BASE, #16
+    |   decode_RA RA, INS
+    |  sub BASE, CARG4, RA, lsl #3
+    if (op == BC_RET1) {
+      |  str TMP0, [CARG4], #8
+    }
+    |   decode_RB RB, INS
+    |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+    |5:
+    |  cmp RC, RB, lsl #3
+    |  blo >6
+    |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+    |  ldr CARG2, LFUNC:CARG1->pc
+    |  ldr KBASE, [CARG2, #PC2PROTO(k)]
+    |  ins_next
+    |
+    |6:  // Fill up results with nil.
+    |  add RC, RC, #8
+    |  str TISNIL, [CARG4], #8
+    |  b <5
+    break;
+
+  /* -- Loops and branches ------------------------------------------------ */
+
+  |.define FOR_IDX,  [RA];      .define FOR_TIDX,  [RA, #4]
+  |.define FOR_STOP, [RA, #8];  .define FOR_TSTOP, [RA, #12]
+  |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
+  |.define FOR_EXT,  [RA, #24]; .define FOR_TEXT,  [RA, #28]
+
+  case BC_FORL:
+    |.if JIT
+    |  hotloop
+    |.endif
+    |  // Fall through. Assumes BC_IFORL follows.
+    break;
+
+  case BC_JFORI:
+  case BC_JFORL:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_FORI:
+  case BC_IFORL:
+    |  // RA = base, RC = target (after end of loop or start of loop)
+    vk = (op == BC_IFORL || op == BC_JFORL);
+    |  add RA, BASE, RA, lsl #3
+    |  ldp CARG1, CARG2, FOR_IDX		// CARG1 = IDX, CARG2 = STOP
+    |   ldr CARG3, FOR_STEP			// CARG3 = STEP
+    if (op != BC_JFORL) {
+      |   add RC, PC, RC, lsl #2
+      |   sub RC, RC, #0x20000
+    }
+    |  checkint CARG1, >5
+    if (!vk) {
+      |  checkint CARG2, ->vmeta_for
+      |   checkint CARG3, ->vmeta_for
+      |  tbnz CARG3w, #31, >4
+      |  cmp CARG1w, CARG2w
+    } else {
+      |  adds CARG1w, CARG1w, CARG3w
+      |  bvs >2
+      |   add TMP0, CARG1, TISNUM
+      |  tbnz CARG3w, #31, >4
+      |  cmp CARG1w, CARG2w
+    }
+    |1:
+    if (op == BC_FORI) {
+      |  csel PC, RC, PC, gt
+    } else if (op == BC_JFORI) {
+      |  ldrh RCw, [RC, #-2]
+    } else if (op == BC_IFORL) {
+      |  csel PC, RC, PC, le
+    }
+    if (vk) {
+      |   str TMP0, FOR_IDX
+      |   str TMP0, FOR_EXT
+    } else {
+      |  str CARG1, FOR_EXT
+    }
+    if (op == BC_JFORI || op == BC_JFORL) {
+      |  ble =>BC_JLOOP
+    }
+    |2:
+    |   ins_next
+    |
+    |4:  // Invert check for negative step.
+    |  cmp CARG2w, CARG1w
+    |  b <1
+    |
+    |5:  // FP loop.
+    |  ldp d0, d1, FOR_IDX
+    |  blo ->vmeta_for
+    if (!vk) {
+      |  checknum CARG2, ->vmeta_for
+      |   checknum CARG3, ->vmeta_for
+      |  str d0, FOR_EXT
+    } else {
+      |  ldr d2, FOR_STEP
+      |  fadd d0, d0, d2
+    }
+    |  tbnz CARG3, #63, >7
+    |  fcmp d0, d1
+    |6:
+    if (vk) {
+      |  str d0, FOR_IDX
+      |  str d0, FOR_EXT
+    }
+    if (op == BC_FORI) {
+      |  csel PC, RC, PC, hi
+    } else if (op == BC_JFORI) {
+      |  ldrh RCw, [RC, #-2]
+      |  bls =>BC_JLOOP
+    } else if (op == BC_IFORL) {
+      |  csel PC, RC, PC, ls
+    } else {
+      |  bls =>BC_JLOOP
+    }
+    |  b <2
+    |
+    |7:  // Invert check for negative step.
+    |  fcmp d1, d0
+    |  b <6
+    break;
+
+  case BC_ITERL:
+    |.if JIT
+    |  hotloop
+    |.endif
+    |  // Fall through. Assumes BC_IITERL follows.
+    break;
+
+  case BC_JITERL:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_IITERL:
+    |  // RA = base, RC = target
+    |  ldr CARG1, [BASE, RA, lsl #3]
+    |   add TMP1, BASE, RA, lsl #3
+    |  cmp CARG1, TISNIL
+    |  beq >1				// Stop if iterator returned nil.
+    if (op == BC_JITERL) {
+      |  str CARG1, [TMP1, #-8]
+      |  b =>BC_JLOOP
+    } else {
+      |  add TMP0, PC, RC, lsl #2	// Otherwise save control var + branch.
+      |  sub PC, TMP0, #0x20000
+      |  str CARG1, [TMP1, #-8]
+    }
+    |1:
+    |  ins_next
+    break;
+
+  case BC_LOOP:
+    |  // RA = base, RC = target (loop extent)
+    |  // Note: RA/RC is only used by trace recorder to determine scope/extent
+    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+    |.if JIT
+    |  hotloop
+    |.endif
+    |  // Fall through. Assumes BC_ILOOP follows.
+    break;
+
+  case BC_ILOOP:
+    |  // RA = base, RC = target (loop extent)
+    |  ins_next
+    break;
+
+  case BC_JLOOP:
+    |.if JIT
+    |  NYI
+    |.endif
+    break;
+
+  case BC_JMP:
+    |  // RA = base (only used by trace recorder), RC = target
+    |  add RC, PC, RC, lsl #2
+    |  sub PC, RC, #0x20000
+    |  ins_next
+    break;
+
+  /* -- Function headers -------------------------------------------------- */
+
+  case BC_FUNCF:
+    |.if JIT
+    |  hotcall
+    |.endif
+  case BC_FUNCV:  /* NYI: compiled vararg functions. */
+    |  // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
+    break;
+
+  case BC_JFUNCF:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_IFUNCF:
+    |  // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+    |  ldr CARG1, L->maxstack
+    |   ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
+    |    ldr KBASE, [PC, #-4+PC2PROTO(k)]
+    |  cmp RA, CARG1
+    |  bhi ->vm_growstack_l
+    |2:
+    |  cmp NARGS8:RC, TMP1, lsl #3	// Check for missing parameters.
+    |  blo >3
+    if (op == BC_JFUNCF) {
+      |  decode_RD RC, INS
+      |  b =>BC_JLOOP
+    } else {
+      |  ins_next
+    }
+    |
+    |3:  // Clear missing parameters.
+    |  str TISNIL, [BASE, NARGS8:RC]
+    |  add NARGS8:RC, NARGS8:RC, #8
+    |  b <2
+    break;
+
+  case BC_JFUNCV:
+#if !LJ_HASJIT
+    break;
+#endif
+    |  NYI  // NYI: compiled vararg functions
+    break;  /* NYI: compiled vararg functions. */
+
+  case BC_IFUNCV:
+    |  // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+    |  ldr CARG1, L->maxstack
+    |   add TMP2, BASE, RC
+    |  add RA, RA, RC
+    |   add TMP0, RC, #16+FRAME_VARG
+    |   str LFUNC:CARG3, [TMP2], #8	// Store (untagged) copy of LFUNC.
+    |    ldr KBASE, [PC, #-4+PC2PROTO(k)]
+    |  cmp RA, CARG1
+    |   str TMP0, [TMP2], #8		// Store delta + FRAME_VARG.
+    |  bhs ->vm_growstack_l
+    |   sub RC, TMP2, #16
+    |  ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
+    |   mov RA, BASE
+    |   mov BASE, TMP2
+    |  cbz TMP1, >2
+    |1:
+    |  cmp RA, RC			// Less args than parameters?
+    |  bhs >3
+    |   ldr TMP0, [RA]
+    |  sub TMP1, TMP1, #1
+    |    str TISNIL, [RA], #8		// Clear old fixarg slot (help the GC).
+    |   str TMP0, [TMP2], #8
+    |  cbnz TMP1, <1
+    |2:
+    |  ins_next
+    |
+    |3:
+    |  sub TMP1, TMP1, #1
+    |   str TISNIL, [TMP2], #8
+    |  cbz TMP1, <2
+    |  b <3
+    break;
+
+  case BC_FUNCC:
+  case BC_FUNCCW:
+    |  // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
+    if (op == BC_FUNCC) {
+      |  ldr CARG4, CFUNC:CARG3->f
+    } else {
+      |  ldr CARG4, GL->wrapf
+    }
+    |   add CARG2, RA, NARGS8:RC
+    |   ldr CARG1, L->maxstack
+    |  add RC, BASE, NARGS8:RC
+    |   cmp CARG2, CARG1
+    |  stp BASE, RC, L->base
+    if (op == BC_FUNCCW) {
+      |  ldr CARG2, CFUNC:CARG3->f
+    }
+    |    mv_vmstate TMP0w, C
+    |  mov CARG1, L
+    |   bhi ->vm_growstack_c		// Need to grow stack.
+    |    st_vmstate TMP0w
+    |  blr CARG4			// (lua_State *L [, lua_CFunction f])
+    |  // Returns nresults.
+    |  ldp BASE, TMP1, L->base
+    |    str L, GL->cur_L
+    |   sbfiz RC, CRET1, #3, #32
+    |    st_vmstate ST_INTERP
+    |  ldr PC, [BASE, FRAME_PC]
+    |   sub RA, TMP1, RC		// RA = L->top - nresults*8
+    |  b ->vm_returnc
+    break;
+
+  /* ---------------------------------------------------------------------- */
+
+  default:
+    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+    exit(2);
+    break;
+  }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+  int op;
+
+  dasm_growpc(Dst, BC__MAX);
+
+  build_subroutines(ctx);
+
+  |.code_op
+  for (op = 0; op < BC__MAX; op++)
+    build_ins(ctx, (BCOp)op, op);
+
+  return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+  int i, cf = CFRAME_SIZE >> 3;
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+    fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
+    fprintf(ctx->fp,
+	".Lframe0:\n"
+	"\t.long .LECIE0-.LSCIE0\n"
+	".LSCIE0:\n"
+	"\t.long 0xffffffff\n"
+	"\t.byte 0x1\n"
+	"\t.string \"\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	".LECIE0:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE0:\n"
+	"\t.long .LEFDE0-.LASFDE0\n"
+	".LASFDE0:\n"
+	"\t.long .Lframe0\n"
+	"\t.quad .Lbegin\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
+	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
+	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
+	fcofs, CFRAME_SIZE, cf, cf-1);
+    for (i = 19; i <= 28; i++)  /* offset x19-x28 */
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+    for (i = 8; i <= 15; i++)  /* offset d8-d15 */
+      fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+	      64+i, cf-i-4);
+    fprintf(ctx->fp,
+	"\t.align 3\n"
+	".LEFDE0:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.long .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.long .Lframe0\n"
+	"\t.quad lj_vm_ffi_call\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
+	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
+	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
+	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.align 3\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
+    fprintf(ctx->fp,
+	".Lframe1:\n"
+	"\t.long .LECIE1-.LSCIE1\n"
+	".LSCIE1:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zPR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.uleb128 6\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.long lj_err_unwind_dwarf-.\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	".LECIE1:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE2:\n"
+	"\t.long .LEFDE2-.LASFDE2\n"
+	".LASFDE2:\n"
+	"\t.long .LASFDE2-.Lframe1\n"
+	"\t.long .Lbegin-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
+	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
+	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
+	fcofs, CFRAME_SIZE, cf, cf-1);
+    for (i = 19; i <= 28; i++)  /* offset x19-x28 */
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+    for (i = 8; i <= 15; i++)  /* offset d8-d15 */
+      fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+	      64+i, cf-i-4);
+    fprintf(ctx->fp,
+	"\t.align 3\n"
+	".LEFDE2:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".Lframe2:\n"
+	"\t.long .LECIE2-.LSCIE2\n"
+	".LSCIE2:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.uleb128 1\n"                        /* augmentation length */
+	"\t.byte 0x1b\n"                        /* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	".LECIE2:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE3:\n"
+	"\t.long .LEFDE3-.LASFDE3\n"
+	".LASFDE3:\n"
+	"\t.long .LASFDE3-.Lframe2\n"
+	"\t.long lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"                        /* augmentation length */
+	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
+	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
+	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
+	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.align 3\n"
+	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+    break;
+  default:
+    break;
+  }
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/vm_mips.dasc b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_mips.dasc
similarity index 94%
rename from source/libs/luajit/LuaJIT-2.0.4/src/vm_mips.dasc
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_mips.dasc
index ac8346bbddc56fcd46453cc774ccca2832ecf8e4..7cfdf4b18b5a3b476200a427738ca75210224111 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/vm_mips.dasc
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_mips.dasc
@@ -138,6 +138,7 @@
 |.type NODE,		Node
 |.type NARGS8,		int
 |.type TRACE,		GCtrace
+|.type SBUF,		SBuf
 |
 |//-----------------------------------------------------------------------
 |
@@ -486,12 +487,13 @@ static void build_subroutines(BuildCtx *ctx)
   |    addiu DISPATCH, DISPATCH, GG_G2DISP
   |   sw r0, SAVE_NRES
   |   sw r0, SAVE_ERRF
-  |  sw TMP0, L->cframe
+  |   sw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |   sw r0, SAVE_CFRAME
   |    beqz TMP1, >3
-  |.  sw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
+  |. sw TMP0, L->cframe
   |
   |  // Resume after yield (like a return).
+  |  sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  move RA, BASE
   |   lw BASE, L->base
   |   lw TMP1, L->top
@@ -525,17 +527,18 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |1:  // Entry point for vm_pcall above (PC = ftype).
   |  lw TMP1, L:CARG1->cframe
-  |   sw CARG3, SAVE_NRES
   |    move L, CARG1
-  |   sw CARG1, SAVE_L
-  |    move BASE, CARG2
-  |  sw sp, L->cframe			// Add our C frame to cframe chain.
+  |   sw CARG3, SAVE_NRES
   |    lw DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |   sw CARG1, SAVE_L
+  |     move BASE, CARG2
+  |    addiu DISPATCH, DISPATCH, GG_G2DISP
   |   sw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |  sw TMP1, SAVE_CFRAME
-  |    addiu DISPATCH, DISPATCH, GG_G2DISP
+  |  sw sp, L->cframe			// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  lw TMP2, L->base			// TMP2 = old base (used in vmeta_call).
   |     lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
   |   lw TMP1, L->top
@@ -566,20 +569,21 @@ static void build_subroutines(BuildCtx *ctx)
   |   lw TMP0, L:CARG1->stack
   |  sw CARG1, SAVE_L
   |   lw TMP1, L->top
+  |     lw DISPATCH, L->glref		// Setup pointer to dispatch table.
   |  sw CARG1, SAVE_PC			// Any value outside of bytecode is ok.
   |   subu TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
   |    lw TMP1, L->cframe
-  |    sw sp, L->cframe			// Add our C frame to cframe chain.
+  |     addiu DISPATCH, DISPATCH, GG_G2DISP
   |   sw TMP0, SAVE_NRES		// Neg. delta means cframe w/o frame.
   |  sw r0, SAVE_ERRF			// No error function.
-  |  move CFUNCADDR, CARG4
+  |    sw TMP1, SAVE_CFRAME
+  |    sw sp, L->cframe			// Add our C frame to cframe chain.
+  |     sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  jalr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
-  |.   sw TMP1, SAVE_CFRAME
+  |.  move CFUNCADDR, CARG4
   |  move BASE, CRET1
-  |   lw DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |    li PC, FRAME_CP
   |  bnez CRET1, <3			// Else continue with the call.
-  |.  addiu DISPATCH, DISPATCH, GG_G2DISP
+  |.  li PC, FRAME_CP
   |  b ->vm_leave_cp			// No base? Just remove C frame.
   |.  nop
   |
@@ -688,6 +692,16 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->vm_call_dispatch_f
   |.  li NARGS8:RC, 16			// 2 args for func(t, k).
   |
+  |->vmeta_tgetr:
+  |  load_got lj_tab_getinth
+  |  call_intern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |.  nop
+  |  // Returns cTValue * or NULL.
+  |  beqz CRET1, >1
+  |.  nop
+  |  b ->BC_TGETR_Z
+  |.  ldc1 f0, 0(CRET1)
+  |
   |//-----------------------------------------------------------------------
   |
   |->vmeta_tsets1:
@@ -740,6 +754,16 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->vm_call_dispatch_f
   |.  li NARGS8:RC, 24			// 3 args for func(t, k, v)
   |
+  |->vmeta_tsetr:
+  |  load_got lj_tab_setinth
+  |  sw BASE, L->base
+  |  sw PC, SAVE_PC
+  |  call_intern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
+  |.  move CARG1, L
+  |  // Returns TValue *.
+  |  b ->BC_TSETR_Z
+  |.  nop
+  |
   |//-- Comparison metamethods ---------------------------------------------
   |
   |->vmeta_comp:
@@ -813,6 +837,18 @@ static void build_subroutines(BuildCtx *ctx)
   |.  nop
   |.endif
   |
+  |->vmeta_istype:
+  |  load_got lj_meta_istype
+  |  addiu PC, PC, -4
+  |   sw BASE, L->base
+  |   srl CARG2, RA, 3
+  |   srl CARG3, RD, 3
+  |  sw PC, SAVE_PC
+  |  call_intern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
+  |.  move CARG1, L
+  |  b ->cont_nop
+  |.  nop
+  |
   |//-- Arithmetic metamethods ---------------------------------------------
   |
   |->vmeta_unm:
@@ -1119,9 +1155,9 @@ static void build_subroutines(BuildCtx *ctx)
   |.  sw BASE, L->base			// Add frame since C call can throw.
   |  ffgccheck
   |.  sw PC, SAVE_PC			// Redundant (but a defined value).
-  |  load_got lj_str_fromnum
+  |  load_got lj_strfmt_num
   |  move CARG1, L
-  |  call_intern lj_str_fromnum		// (lua_State *L, lua_Number *np)
+  |  call_intern lj_strfmt_num		// (lua_State *L, lua_Number *np)
   |.  move CARG2, BASE
   |  // Returns GCstr *.
   |  li CARG3, LJ_TSTR
@@ -1188,7 +1224,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   mtc1 TMP0, FARG1
   |  beqz AT, ->fff_fallback
   |.  lw PC, FRAME_PC(BASE)
-  |   cvt.w.d FRET1, FARG2
+  |   trunc.w.d FRET1, FARG2
   |  cvt.d.w FARG1, FARG1
   |   lw TMP0, TAB:CARG1->asize
   |   lw TMP1, TAB:CARG1->array
@@ -1331,6 +1367,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lw TMP3, L:RA->top
   |    li_vmstate INTERP
   |  lw BASE, L->base
+  |    sw L, DISPATCH_GL(cur_L)(DISPATCH)
   |    st_vmstate
   |   beqz AT, >8
   |. subu RD, TMP3, TMP2
@@ -1521,14 +1558,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->fff_resn
   |.  nop
   |
-  |->ff_math_deg:
-  |.ffunc_n math_rad
-  |.  ldc1 FARG2, CFUNC:RB->upvalue[0]
-  |  b ->fff_resn
-  |.  mul.d FRET1, FARG1, FARG2
-  |
   |.ffunc_nn math_ldexp
-  |  cvt.w.d FARG2, FARG2
+  |  trunc.w.d FARG2, FARG2
   |  load_got ldexp
   |  mfc1 CARG3, FARG2
   |  call_extern
@@ -1592,13 +1623,6 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |//-- String library -----------------------------------------------------
   |
-  |.ffunc_1 string_len
-  |  li AT, LJ_TSTR
-  |  bne CARG3, AT, ->fff_fallback
-  |.  nop
-  |  b ->fff_resi
-  |.  lw CRET1, STR:CARG1->len
-  |
   |.ffunc string_byte			// Only handle the 1-arg case here.
   |  lw CARG3, HI(BASE)
   |   lw STR:CARG1, LO(BASE)
@@ -1628,7 +1652,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.  sltiu AT, CARG3, LJ_TISNUM
   |  beqz AT, ->fff_fallback
   |.  li CARG3, 1
-  |   cvt.w.d FARG1, FARG1
+  |   trunc.w.d FARG1, FARG1
   |  addiu CARG2, sp, ARG5_OFS
   |  sltiu AT, TMP0, 256
   |   mfc1 TMP0, FARG1
@@ -1642,6 +1666,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.  move CARG1, L
   |  // Returns GCstr *.
   |  lw BASE, L->base
+  |->fff_resstr:
   |  move CARG1, CRET1
   |  b ->fff_restv
   |.  li CARG3, LJ_TSTR
@@ -1658,7 +1683,7 @@ static void build_subroutines(BuildCtx *ctx)
   |    ldc1 f2, 8(BASE)
   |  beqz AT, >1
   |.  li CARG4, -1
-  |   cvt.w.d f0, f0
+  |   trunc.w.d f0, f0
   |  sltiu AT, CARG3, LJ_TISNUM
   |  beqz AT, ->fff_fallback
   |.  mfc1 CARG4, f0
@@ -1666,7 +1691,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  sltiu AT, CARG2, LJ_TISNUM
   |  beqz AT, ->fff_fallback
   |.  li AT, LJ_TSTR
-  |  cvt.w.d f2, f2
+  |  trunc.w.d f2, f2
   |  bne TMP0, AT, ->fff_fallback
   |.  lw CARG2, STR:CARG1->len
   |  mfc1 CARG3, f2
@@ -1695,108 +1720,32 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->fff_restv
   |.  li CARG3, LJ_TSTR
   |
-  |.ffunc string_rep			// Only handle the 1-char case inline.
-  |  ffgccheck
-  |  lw TMP0, HI(BASE)
-  |   addiu AT, NARGS8:RC, -16		// Exactly 2 arguments.
-  |  lw CARG4, 8+HI(BASE)
-  |   lw STR:CARG1, LO(BASE)
-  |  addiu TMP0, TMP0, -LJ_TSTR
-  |   ldc1 f0, 8(BASE)
-  |  or AT, AT, TMP0
-  |  bnez AT, ->fff_fallback
-  |.  sltiu AT, CARG4, LJ_TISNUM
-  |   cvt.w.d f0, f0
-  |  beqz AT, ->fff_fallback
-  |.  lw TMP0, STR:CARG1->len
-  |   mfc1 CARG3, f0
-  |  lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  li AT, 1
-  |   blez CARG3, ->fff_emptystr	// Count <= 0?
-  |.   sltu AT, AT, TMP0
-  |  beqz TMP0, ->fff_emptystr		// Zero length string?
-  |.  sltu TMP0, TMP1, CARG3
-  |  or AT, AT, TMP0
-  |   lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |  bnez AT, ->fff_fallback		// Fallback for > 1-char strings.
-  |.  lbu TMP0, STR:CARG1[1]
-  |  addu TMP2, CARG2, CARG3
-  |1:  // Fill buffer with char. Yes, this is suboptimal code (do you care?).
-  |  addiu TMP2, TMP2, -1
-  |  sltu AT, CARG2, TMP2
-  |  bnez AT, <1
-  |.  sb TMP0, 0(TMP2)
-  |  b ->fff_newstr
-  |.  nop
-  |
-  |.ffunc string_reverse
-  |  ffgccheck
-  |  lw CARG3, HI(BASE)
-  |   lw STR:CARG1, LO(BASE)
-  |  beqz NARGS8:RC, ->fff_fallback
-  |.  li AT, LJ_TSTR
-  |  bne CARG3, AT, ->fff_fallback
-  |.  lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  lw CARG3, STR:CARG1->len
-  |   addiu CARG1, STR:CARG1, #STR
-  |   lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |  sltu AT, TMP1, CARG3
-  |  bnez AT, ->fff_fallback
-  |.  addu TMP3, CARG1, CARG3
-  |  addu CARG4, CARG2, CARG3
-  |1:  // Reverse string copy.
-  |   lbu TMP1, 0(CARG1)
-  |  sltu AT, CARG1, TMP3
-  |  beqz AT, ->fff_newstr
-  |.  addiu CARG1, CARG1, 1
-  |  addiu CARG4, CARG4, -1
-  |  b <1
-  |   sb TMP1, 0(CARG4)
-  |
-  |.macro ffstring_case, name, lo
-  |  .ffunc name
+  |.macro ffstring_op, name
+  |  .ffunc string_ .. name
   |  ffgccheck
   |  lw CARG3, HI(BASE)
-  |   lw STR:CARG1, LO(BASE)
+  |   lw STR:CARG2, LO(BASE)
   |  beqz NARGS8:RC, ->fff_fallback
   |.  li AT, LJ_TSTR
   |  bne CARG3, AT, ->fff_fallback
-  |.  lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  lw CARG3, STR:CARG1->len
-  |   addiu CARG1, STR:CARG1, #STR
-  |   lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |  sltu AT, TMP1, CARG3
-  |  bnez AT, ->fff_fallback
-  |.  addu TMP3, CARG1, CARG3
-  |  move CARG4, CARG2
-  |1:  // ASCII case conversion.
-  |   lbu TMP1, 0(CARG1)
-  |  sltu AT, CARG1, TMP3
-  |  beqz AT, ->fff_newstr
-  |.  addiu TMP0, TMP1, -lo
-  |   xori TMP2, TMP1, 0x20
-  |   sltiu AT, TMP0, 26
-  |   movn TMP1, TMP2, AT
-  |  addiu CARG1, CARG1, 1
-  |   sb TMP1, 0(CARG4)
-  |  b <1
-  |.  addiu CARG4, CARG4, 1
+  |.  addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
+  |  load_got lj_buf_putstr_ .. name
+  |  lw TMP0, SBUF:CARG1->b
+  |   sw L, SBUF:CARG1->L
+  |   sw BASE, L->base
+  |  sw TMP0, SBUF:CARG1->p
+  |  call_intern extern lj_buf_putstr_ .. name
+  |.  sw PC, SAVE_PC
+  |  load_got lj_buf_tostr
+  |  call_intern lj_buf_tostr
+  |.  move SBUF:CARG1, SBUF:CRET1
+  |  b ->fff_resstr
+  |.  lw BASE, L->base
   |.endmacro
   |
-  |ffstring_case string_lower, 65
-  |ffstring_case string_upper, 97
-  |
-  |//-- Table library ------------------------------------------------------
-  |
-  |.ffunc_1 table_getn
-  |  li AT, LJ_TTAB
-  |  bne CARG3, AT, ->fff_fallback
-  |.  load_got lj_tab_len
-  |  call_intern lj_tab_len		// (GCtab *t)
-  |.  nop
-  |  // Returns uint32_t (but less than 2^31).
-  |  b ->fff_resi
-  |.  nop
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
   |
   |//-- Bit library --------------------------------------------------------
   |
@@ -2061,6 +2010,76 @@ static void build_subroutines(BuildCtx *ctx)
   |  lw LFUNC:RB, FRAME_FUNC(BASE)
   |  jr CRET1
   |.  lw INS, -4(PC)
+  |
+  |->cont_stitch:			// Trace stitching.
+  |.if JIT
+  |  // RA = resultptr, RB = meta base
+  |  lw INS, -4(PC)
+  |    lw TMP3, -24+LO(RB)		// Save previous trace number.
+  |  decode_RA8a RC, INS
+  |   addiu AT, MULTRES, -8
+  |  decode_RA8b RC
+  |   beqz AT, >2
+  |. addu RC, BASE, RC			// Call base.
+  |1:  // Move results down.
+  |  ldc1 f0, 0(RA)
+  |   addiu AT, AT, -8
+  |    addiu RA, RA, 8
+  |  sdc1 f0, 0(RC)
+  |   bnez AT, <1
+  |.   addiu RC, RC, 8
+  |2:
+  |   decode_RA8a RA, INS
+  |    decode_RB8a RB, INS
+  |   decode_RA8b RA
+  |    decode_RB8b RB
+  |   addu RA, RA, RB
+  |  lw TMP1, DISPATCH_J(trace)(DISPATCH)
+  |   addu RA, BASE, RA
+  |3:
+  |   sltu AT, RC, RA
+  |   bnez AT, >9			// More results wanted?
+  |. sll TMP2, TMP3, 2
+  |
+  |  addu TMP2, TMP1, TMP2
+  |  lw TRACE:TMP2, 0(TMP2)
+  |  beqz TRACE:TMP2, ->cont_nop
+  |.  nop
+  |  lhu RD, TRACE:TMP2->link
+  |  beq RD, TMP3, ->cont_nop		// Blacklisted.
+  |.  load_got lj_dispatch_stitch
+  |  bnez RD, =>BC_JLOOP		// Jump to stitched trace.
+  |.  sll RD, RD, 3
+  |
+  |  // Stitch a new trace to the previous trace.
+  |  sw TMP3, DISPATCH_J(exitno)(DISPATCH)
+  |  sw L, DISPATCH_J(L)(DISPATCH)
+  |  sw BASE, L->base
+  |  addiu CARG1, DISPATCH, GG_DISP2J
+  |  call_intern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
+  |.  move CARG2, PC
+  |  b ->cont_nop
+  |.  lw BASE, L->base
+  |
+  |9:
+  |  sw TISNIL, HI(RC)
+  |  b <3
+  |.  addiu RC, RC, 8
+  |.endif
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  load_got lj_dispatch_profile
+  |   sw MULTRES, SAVE_MULTRES
+  |  move CARG2, PC
+  |   sw BASE, L->base
+  |  call_intern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |.  move CARG1, L
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  addiu PC, PC, -4
+  |  b ->cont_nop
+  |.  lw BASE, L->base
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------
@@ -2100,14 +2119,15 @@ static void build_subroutines(BuildCtx *ctx)
   |  lw TMP1, 0(TMP2)			// Load exit number.
   |  st_vmstate
   |   sw TMP2, 16+32*8+29*4(sp)		// Store sp in RID_SP.
-  |  lw L, DISPATCH_GL(jit_L)(DISPATCH)
-  |  lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
+  |  lw L, DISPATCH_GL(cur_L)(DISPATCH)
+  |   lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
   |  load_got lj_trace_exit
   |  sw L, DISPATCH_J(L)(DISPATCH)
   |  sw ra, DISPATCH_J(parent)(DISPATCH)  // Store trace number.
+  |   sw BASE, L->base
   |  sw TMP1, DISPATCH_J(exitno)(DISPATCH)  // Store exit number.
   |  addiu CARG1, DISPATCH, GG_DISP2J
-  |  sw BASE, L->base
+  |   sw r0, DISPATCH_GL(jit_base)(DISPATCH)
   |  call_intern lj_trace_exit		// (jit_State *J, ExitState *ex)
   |.  addiu CARG2, sp, 16
   |  // Returns MULTRES (unscaled) or negated error code.
@@ -2123,17 +2143,18 @@ static void build_subroutines(BuildCtx *ctx)
   |.if JIT
   |  // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
   |  lw L, SAVE_L
-  |  addiu DISPATCH, JGL, -GG_DISP2G-32768
+  |   addiu DISPATCH, JGL, -GG_DISP2G-32768
+  |  sw BASE, L->base
   |1:
-  |  bltz CRET1, >3			// Check for error from exit.
-  |.  lw LFUNC:TMP1, FRAME_FUNC(BASE)
+  |  bltz CRET1, >9			// Check for error from exit.
+  |.  lw LFUNC:RB, FRAME_FUNC(BASE)
   |    lui TMP3, 0x59c0			// TOBIT = 2^52 + 2^51 (float).
   |  sll MULTRES, CRET1, 3
   |    li TISNIL, LJ_TNIL
   |  sw MULTRES, SAVE_MULTRES
   |    mtc1 TMP3, TOBIT
-  |  lw TMP1, LFUNC:TMP1->pc
-  |   sw r0, DISPATCH_GL(jit_L)(DISPATCH)
+  |  lw TMP1, LFUNC:RB->pc
+  |   sw r0, DISPATCH_GL(jit_base)(DISPATCH)
   |  lw KBASE, PC2PROTO(k)(TMP1)
   |    cvt.d.s TOBIT, TOBIT
   |  // Modified copy of ins_next which handles function header dispatch, too.
@@ -2153,11 +2174,27 @@ static void build_subroutines(BuildCtx *ctx)
   |  jr AT
   |.  decode_RD8b RD
   |2:
+  |  sltiu TMP2, TMP1, (BC_FUNCC+2)*4	// Fast function?
+  |  bnez TMP2, >3
+  |.  lw TMP1, FRAME_PC(BASE)
+  |  // Check frame below fast function.
+  |  andi TMP0, TMP1, FRAME_TYPE
+  |  bnez TMP0, >3			// Trace stitching continuation?
+  |.  nop
+  |  // Otherwise set KBASE for Lua function below fast function.
+  |  lw TMP2, -4(TMP1)
+  |  decode_RA8a TMP0, TMP2
+  |  decode_RA8b TMP0
+  |  subu TMP1, BASE, TMP0
+  |  lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
+  |  lw TMP1, LFUNC:TMP2->pc
+  |  lw KBASE, PC2PROTO(k)(TMP1)
+  |3:
   |  addiu RC, MULTRES, -8
   |  jr AT
   |.  addu RA, RA, BASE
   |
-  |3:  // Rethrow error from the right C frame.
+  |9:  // Rethrow error from the right C frame.
   |  load_got lj_err_throw
   |  negu CARG2, CRET1
   |  call_intern lj_err_throw		// (lua_State *L, int errcode)
@@ -2572,6 +2609,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_next
     break;
 
+  case BC_ISTYPE:
+    |  // RA = src*8, RD = -type*8
+    |  addu TMP2, BASE, RA
+    |  srl TMP1, RD, 3
+    |  lw TMP0, HI(TMP2)
+    |  ins_next1
+    |  addu AT, TMP0, TMP1
+    |  bnez AT, ->vmeta_istype
+    |.  ins_next2
+    break;
+  case BC_ISNUM:
+    |  // RA = src*8, RD = -(TISNUM-1)*8
+    |  addu TMP2, BASE, RA
+    |  lw TMP0, HI(TMP2)
+    |  ins_next1
+    |  sltiu AT, TMP0, LJ_TISNUM
+    |  beqz AT, ->vmeta_istype
+    |.  ins_next2
+    break;
+
   /* -- Unary ops --------------------------------------------------------- */
 
   case BC_MOV:
@@ -3210,6 +3267,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  b ->vmeta_tgetb			// Caveat: preserve TMP0!
     |.  nop
     break;
+  case BC_TGETR:
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  decode_RB8a RB, INS
+    |  decode_RB8b RB
+    |   decode_RDtoRC8 RC, RD
+    |  addu CARG2, BASE, RB
+    |   addu CARG3, BASE, RC
+    |    lw TAB:CARG1, LO(CARG2)
+    |   ldc1 f0, 0(CARG3)
+    |  trunc.w.d f2, f0
+    |   lw TMP0, TAB:CARG1->asize
+    |  mfc1 CARG2, f2
+    |   lw TMP1, TAB:CARG1->array
+    |  sltu AT, CARG2, TMP0
+    |   sll TMP2, CARG2, 3
+    |  beqz AT, ->vmeta_tgetr		// In array part?
+    |.  addu TMP2, TMP1, TMP2
+    |   ldc1 f0, 0(TMP2)
+    |->BC_TGETR_Z:
+    |   addu RA, BASE, RA
+    |  ins_next1
+    |   sdc1 f0, 0(RA)
+    |  ins_next2
+    break;
 
   case BC_TSETV:
     |  // RA = src*8, RB = table*8, RC = key*8
@@ -3398,6 +3479,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |7:  // Possible table write barrier for the value. Skip valiswhite check.
     |  barrierback TAB:RB, TMP3, TMP0, <2
     break;
+  case BC_TSETR:
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  decode_RB8a RB, INS
+    |  decode_RB8b RB
+    |   decode_RDtoRC8 RC, RD
+    |  addu CARG1, BASE, RB
+    |   addu CARG3, BASE, RC
+    |    lw TAB:CARG2, LO(CARG1)
+    |   ldc1 f0, 0(CARG3)
+    |  trunc.w.d f2, f0
+    |    lbu TMP3, TAB:CARG2->marked
+    |   lw TMP0, TAB:CARG2->asize
+    |  mfc1 CARG3, f2
+    |   lw TMP1, TAB:CARG2->array
+    |  andi AT, TMP3, LJ_GC_BLACK	// isblack(table)
+    |  bnez AT, >7
+    |.  addu RA, BASE, RA
+    |2:
+    |  sltu AT, CARG3, TMP0
+    |   sll TMP2, CARG3, 3
+    |  beqz AT, ->vmeta_tsetr		// In array part?
+    |.  ldc1 f20, 0(RA)
+    |   addu CRET1, TMP1, TMP2
+    |->BC_TSETR_Z:
+    |  ins_next1
+    |   sdc1 f20, 0(CRET1)
+    |  ins_next2
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMP3, TMP0, <2
+    break;
+
 
   case BC_TSETM:
     |  // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3957,8 +4070,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   sw AT, DISPATCH_GL(vmstate)(DISPATCH)
     |  lw TRACE:TMP2, 0(TMP1)
     |   sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
-    |   sw L, DISPATCH_GL(jit_L)(DISPATCH)
     |  lw TMP2, TRACE:TMP2->mcode
+    |   sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
     |  jr TMP2
     |.  addiu JGL, DISPATCH, GG_DISP2G+32768
     |.endif
@@ -4084,6 +4197,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |    li_vmstate INTERP
     |  lw PC, FRAME_PC(BASE)		// Fetch PC of caller.
     |   subu RA, TMP1, RD		// RA = L->top - nresults*8
+    |    sw L, DISPATCH_GL(cur_L)(DISPATCH)
     |  b ->vm_returnc
     |.   st_vmstate
     break;
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/vm_ppc.dasc b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_ppc.dasc
similarity index 95%
rename from source/libs/luajit/LuaJIT-2.0.4/src/vm_ppc.dasc
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_ppc.dasc
index ad8a023e479bc935aca417529cfe49cc0746e608..9299c554e2b9c05d7ab7f82c95823078b8cecfb2 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/vm_ppc.dasc
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
-|// Low-level VM code for PowerPC CPUs.
+|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
 |// Bytecode interpreter, fast functions and helper functions.
 |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 |
@@ -18,7 +18,7 @@
 |// DynASM defines used by the PPC port:
 |//
 |// P64     64 bit pointers (only for GPR64 testing).
-|//         Note: a full PPC64 _LP64 port is not planned.
+|//         Note: see vm_ppc64.dasc for a full PPC64 _LP64 port.
 |// GPR64   64 bit registers (but possibly 32 bit pointers, e.g. PS3).
 |//         Affects reg saves, stack layout, carry/overflow/dot flags etc.
 |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -316,16 +316,10 @@
 |.type NODE,		Node
 |.type NARGS8,		int
 |.type TRACE,		GCtrace
+|.type SBUF,		SBuf
 |
 |//-----------------------------------------------------------------------
 |
-|// These basic macros should really be part of DynASM.
-|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
-|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
-|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
-|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
-|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
-|
 |// Trap for not-yet-implemented parts.
 |.macro NYI; tw 4, sp, sp; .endmacro
 |
@@ -684,12 +678,13 @@ static void build_subroutines(BuildCtx *ctx)
   |   stw CARG3, SAVE_NRES
   |    cmplwi TMP1, 0
   |   stw CARG3, SAVE_ERRF
-  |  stp TMP0, L->cframe
   |   stp CARG3, SAVE_CFRAME
   |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
+  |  stp TMP0, L->cframe
   |    beq >3
   |
   |  // Resume after yield (like a return).
+  |  stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  mr RA, BASE
   |   lp BASE, L->base
   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
@@ -729,17 +724,18 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |1:  // Entry point for vm_pcall above (PC = ftype).
   |  lp TMP1, L:CARG1->cframe
-  |   stw CARG3, SAVE_NRES
   |    mr L, CARG1
-  |   stw CARG1, SAVE_L
-  |    mr BASE, CARG2
-  |  stp sp, L->cframe			// Add our C frame to cframe chain.
+  |   stw CARG3, SAVE_NRES
   |    lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |   stw CARG1, SAVE_L
+  |     mr BASE, CARG2
+  |    addi DISPATCH, DISPATCH, GG_G2DISP
   |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
   |  stp TMP1, SAVE_CFRAME
-  |    addi DISPATCH, DISPATCH, GG_G2DISP
+  |  stp sp, L->cframe			// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  lp TMP2, L->base			// TMP2 = old base (used in vmeta_call).
   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
   |   lp TMP1, L->top
@@ -776,15 +772,18 @@ static void build_subroutines(BuildCtx *ctx)
   |   lwz TMP0, L:CARG1->stack
   |  stw CARG1, SAVE_L
   |   lp TMP1, L->top
+  |     lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
   |  stw CARG1, SAVE_PC			// Any value outside of bytecode is ok.
   |   sub TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
   |    lp TMP1, L->cframe
-  |    stp sp, L->cframe		// Add our C frame to cframe chain.
+  |     addi DISPATCH, DISPATCH, GG_G2DISP
   |  .toc lp CARG4, 0(CARG4)
   |  li TMP2, 0
   |   stw TMP0, SAVE_NRES		// Neg. delta means cframe w/o frame.
   |  stw TMP2, SAVE_ERRF		// No error function.
   |    stp TMP1, SAVE_CFRAME
+  |    stp sp, L->cframe		// Add our C frame to cframe chain.
+  |     stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |  mtctr CARG4
   |  bctrl			// (lua_State *L, lua_CFunction func, void *ud)
   |.if PPE
@@ -793,9 +792,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.else
   |  mr. BASE, CRET1
   |.endif
-  |   lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
-  |    li PC, FRAME_CP
-  |   addi DISPATCH, DISPATCH, GG_G2DISP
+  |   li PC, FRAME_CP
   |  bne <3				// Else continue with the call.
   |  b ->vm_leave_cp			// No base? Just remove C frame.
   |
@@ -918,6 +915,17 @@ static void build_subroutines(BuildCtx *ctx)
   |   li NARGS8:RC, 16			// 2 args for func(t, k).
   |  b ->vm_call_dispatch_f
   |
+  |->vmeta_tgetr:
+  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // Returns cTValue * or NULL.
+  |  cmplwi CRET1, 0
+  |  beq >1
+  |  lfd f14, 0(CRET1)
+  |  b ->BC_TGETR_Z
+  |1:
+  |  stwx TISNIL, BASE, RA
+  |  b ->cont_nop
+  |
   |//-----------------------------------------------------------------------
   |
   |->vmeta_tsets1:
@@ -985,6 +993,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  stfd f0, 16(BASE)			// Copy value to third argument.
   |  b ->vm_call_dispatch_f
   |
+  |->vmeta_tsetr:
+  |  stp BASE, L->base
+  |  stw PC, SAVE_PC
+  |  bl extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
+  |  // Returns TValue *.
+  |  stfd f14, 0(CRET1)
+  |  b ->cont_nop
+  |
   |//-- Comparison metamethods ---------------------------------------------
   |
   |->vmeta_comp:
@@ -1063,6 +1079,16 @@ static void build_subroutines(BuildCtx *ctx)
   |  b <3
   |.endif
   |
+  |->vmeta_istype:
+  |  subi PC, PC, 4
+  |   stp BASE, L->base
+  |   srwi CARG2, RA, 3
+  |   mr CARG1, L
+  |   srwi CARG3, RD, 3
+  |  stw PC, SAVE_PC
+  |  bl extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
+  |  b ->cont_nop
+  |
   |//-- Arithmetic metamethods ---------------------------------------------
   |
   |->vmeta_arith_nv:
@@ -1387,9 +1413,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mr CARG1, L
   |  mr CARG2, BASE
   |.if DUALNUM
-  |  bl extern lj_str_fromnumber	// (lua_State *L, cTValue *o)
+  |  bl extern lj_strfmt_number		// (lua_State *L, cTValue *o)
   |.else
-  |  bl extern lj_str_fromnum		// (lua_State *L, lua_Number *np)
+  |  bl extern lj_strfmt_num		// (lua_State *L, lua_Number *np)
   |.endif
   |  // Returns GCstr *.
   |  li CARG3, LJ_TSTR
@@ -1622,6 +1648,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lp TMP3, L:SAVE0->top
   |    li_vmstate INTERP
   |  lp BASE, L->base
+  |    stw L, DISPATCH_GL(cur_L)(DISPATCH)
   |    st_vmstate
   |   bgt >8
   |  sub RD, TMP3, TMP2
@@ -1893,12 +1920,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  math_extern2 atan2
   |  math_extern2 fmod
   |
-  |->ff_math_deg:
-  |.ffunc_n math_rad
-  |  lfd FARG2, CFUNC:RB->upvalue[0]
-  |  fmul FARG1, FARG1, FARG2
-  |  b ->fff_resn
-  |
   |.if DUALNUM
   |.ffunc math_ldexp
   |  cmplwi NARGS8:RC, 16
@@ -2044,11 +2065,6 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |//-- String library -----------------------------------------------------
   |
-  |.ffunc_1 string_len
-  |  checkstr CARG3; bne ->fff_fallback
-  |  lwz CRET1, STR:CARG1->len
-  |  b ->fff_resi
-  |
   |.ffunc string_byte			// Only handle the 1-arg case here.
   |  cmplwi NARGS8:RC, 8
   |   lwz CARG3, 0(BASE)
@@ -2103,6 +2119,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  stp BASE, L->base
   |  stw PC, SAVE_PC
   |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
+  |->fff_resstr:
   |  // Returns GCstr *.
   |  lp BASE, L->base
   |  li CARG3, LJ_TSTR
@@ -2180,114 +2197,29 @@ static void build_subroutines(BuildCtx *ctx)
   |  addi TMP1, TMP1, 1			// start = 1 + (start ? start+len : 0)
   |  b <3
   |
-  |.ffunc string_rep			// Only handle the 1-char case inline.
-  |  ffgccheck
-  |  cmplwi NARGS8:RC, 16
-  |   lwz TMP0, 0(BASE)
-  |    lwz STR:CARG1, 4(BASE)
-  |   lwz CARG4, 8(BASE)
-  |.if DUALNUM
-  |    lwz CARG3, 12(BASE)
-  |.else
-  |    lfd FARG2, 8(BASE)
-  |.endif
-  |  bne ->fff_fallback			// Exactly 2 arguments.
-  |  checkstr TMP0; bne ->fff_fallback
-  |.if DUALNUM
-  |  checknum CARG4; bne ->fff_fallback
-  |.else
-  |  checknum CARG4; bge ->fff_fallback
-  |    toint CARG3, FARG2
-  |.endif
-  |   lwz TMP0, STR:CARG1->len
-  |  cmpwi CARG3, 0
-  |   lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  ble >2				// Count <= 0? (or non-int)
-  |   cmplwi TMP0, 1
-  |  subi TMP2, CARG3, 1
-  |   blt >2				// Zero length string?
-  |  cmplw cr1, TMP1, CARG3
-  |   bne ->fff_fallback		// Fallback for > 1-char strings.
-  |   lbz TMP0, STR:CARG1[1]
-  |   lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |  blt cr1, ->fff_fallback
-  |1:  // Fill buffer with char. Yes, this is suboptimal code (do you care?).
-  |  cmplwi TMP2, 0
-  |   stbx TMP0, CARG2, TMP2
-  |   subi TMP2, TMP2, 1
-  |  bne <1
-  |  b ->fff_newstr
-  |2:  // Return empty string.
-  |  la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
-  |  li CARG3, LJ_TSTR
-  |  b ->fff_restv
-  |
-  |.ffunc string_reverse
+  |.macro ffstring_op, name
+  |  .ffunc string_ .. name
   |  ffgccheck
   |  cmplwi NARGS8:RC, 8
   |   lwz CARG3, 0(BASE)
-  |    lwz STR:CARG1, 4(BASE)
+  |    lwz STR:CARG2, 4(BASE)
   |  blt ->fff_fallback
   |  checkstr CARG3
-  |   lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
+  |   la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
   |  bne ->fff_fallback
-  |  lwz CARG3, STR:CARG1->len
-  |   la CARG1, #STR(STR:CARG1)
-  |   lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |   li TMP2, 0
-  |  cmplw TMP1, CARG3
-  |   subi TMP3, CARG3, 1
-  |  blt ->fff_fallback
-  |1:  // Reverse string copy.
-  |  cmpwi TMP3, 0
-  |   lbzx TMP1, CARG1, TMP2
-  |  blty ->fff_newstr
-  |   stbx TMP1, CARG2, TMP3
-  |  subi TMP3, TMP3, 1
-  |  addi TMP2, TMP2, 1
-  |  b <1
-  |
-  |.macro ffstring_case, name, lo
-  |  .ffunc name
-  |  ffgccheck
-  |  cmplwi NARGS8:RC, 8
-  |   lwz CARG3, 0(BASE)
-  |    lwz STR:CARG1, 4(BASE)
-  |  blt ->fff_fallback
-  |  checkstr CARG3
-  |   lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
-  |  bne ->fff_fallback
-  |  lwz CARG3, STR:CARG1->len
-  |   la CARG1, #STR(STR:CARG1)
-  |   lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
-  |  cmplw TMP1, CARG3
-  |   li TMP2, 0
-  |  blt ->fff_fallback
-  |1:  // ASCII case conversion.
-  |  cmplw TMP2, CARG3
-  |   lbzx TMP1, CARG1, TMP2
-  |  bgey ->fff_newstr
-  |   subi TMP0, TMP1, lo
-  |    xori TMP3, TMP1, 0x20
-  |   addic TMP0, TMP0, -26
-  |   subfe TMP3, TMP3, TMP3
-  |   rlwinm TMP3, TMP3, 0, 26, 26	// x &= 0x20.
-  |   xor TMP1, TMP1, TMP3
-  |   stbx TMP1, CARG2, TMP2
-  |  addi TMP2, TMP2, 1
-  |  b <1
+  |   lwz TMP0, SBUF:CARG1->b
+  |  stw L, SBUF:CARG1->L
+  |  stp BASE, L->base
+  |  stw PC, SAVE_PC
+  |   stw TMP0, SBUF:CARG1->p
+  |  bl extern lj_buf_putstr_ .. name
+  |  bl extern lj_buf_tostr
+  |  b ->fff_resstr
   |.endmacro
   |
-  |ffstring_case string_lower, 65
-  |ffstring_case string_upper, 97
-  |
-  |//-- Table library ------------------------------------------------------
-  |
-  |.ffunc_1 table_getn
-  |  checktab CARG3; bne ->fff_fallback
-  |  bl extern lj_tab_len		// (GCtab *t)
-  |  // Returns uint32_t (but less than 2^31).
-  |  b ->fff_resi
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
   |
   |//-- Bit library --------------------------------------------------------
   |
@@ -2588,6 +2520,70 @@ static void build_subroutines(BuildCtx *ctx)
   |  lwz INS, -4(PC)
   |  mtctr CRET1
   |  bctr
+  |
+  |->cont_stitch:			// Trace stitching.
+  |.if JIT
+  |  // RA = resultptr, RB = meta base
+  |  lwz INS, -4(PC)
+  |    lwz TMP3, -20(RB)		// Save previous trace number.
+  |   addic. TMP1, MULTRES, -8
+  |  decode_RA8 RC, INS			// Call base.
+  |   beq >2
+  |1:  // Move results down.
+  |  lfd f0, 0(RA)
+  |   addic. TMP1, TMP1, -8
+  |    addi RA, RA, 8
+  |  stfdx f0, BASE, RC
+  |    addi RC, RC, 8
+  |   bne <1
+  |2:
+  |   decode_RA8 RA, INS
+  |   decode_RB8 RB, INS
+  |   add RA, RA, RB
+  |  lwz TMP1, DISPATCH_J(trace)(DISPATCH)
+  |3:
+  |   cmplw RA, RC
+  |   bgt >9				// More results wanted?
+  |
+  |  slwi TMP2, TMP3, 2
+  |  lwzx TRACE:TMP2, TMP1, TMP2
+  |  cmpwi TRACE:TMP2, 0
+  |  beq ->cont_nop
+  |  lhz RD, TRACE:TMP2->link
+  |  cmpw RD, TMP3
+  |   cmpwi cr1, RD, 0
+  |  beq ->cont_nop			// Blacklisted.
+  |    slwi RD, RD, 3
+  |   bne cr1, =>BC_JLOOP		// Jump to stitched trace.
+  |
+  |  // Stitch a new trace to the previous trace.
+  |  stw TMP3, DISPATCH_J(exitno)(DISPATCH)
+  |  stp L, DISPATCH_J(L)(DISPATCH)
+  |  stp BASE, L->base
+  |  addi CARG1, DISPATCH, GG_DISP2J
+  |  mr CARG2, PC
+  |  bl extern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
+  |  lp BASE, L->base
+  |  b ->cont_nop
+  |
+  |9:
+  |  stwx TISNIL, BASE, RC
+  |  addi RC, RC, 8
+  |  b <3
+  |.endif
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mr CARG1, L
+  |   stw MULTRES, SAVE_MULTRES
+  |  mr CARG2, PC
+  |   stp BASE, L->base
+  |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  lp BASE, L->base
+  |  subi PC, PC, 4
+  |  b ->cont_nop
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------
@@ -2623,16 +2619,16 @@ static void build_subroutines(BuildCtx *ctx)
   |  savex_ 20,21,22,23
   |   lhz CARG4, 2(CARG3)		// Load trace number.
   |  savex_ 24,25,26,27
-  |  lwz L, DISPATCH_GL(jit_L)(DISPATCH)
+  |  lwz L, DISPATCH_GL(cur_L)(DISPATCH)
   |  savex_ 28,29,30,31
   |   sub CARG3, TMP0, CARG3		// Compute exit number.
   |  lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
   |   srwi CARG3, CARG3, 2
-  |  stw L, DISPATCH_J(L)(DISPATCH)
+  |  stp L, DISPATCH_J(L)(DISPATCH)
   |   subi CARG3, CARG3, 2
-  |  stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
-  |   stw CARG4, DISPATCH_J(parent)(DISPATCH)
   |  stp BASE, L->base
+  |   stw CARG4, DISPATCH_J(parent)(DISPATCH)
+  |  stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
   |  addi CARG1, DISPATCH, GG_DISP2J
   |   stw CARG3, DISPATCH_J(exitno)(DISPATCH)
   |  addi CARG2, sp, 16
@@ -2656,15 +2652,16 @@ static void build_subroutines(BuildCtx *ctx)
   |  // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
   |  lwz L, SAVE_L
   |  addi DISPATCH, JGL, -GG_DISP2G-32768
+  |  stp BASE, L->base
   |1:
   |  cmpwi CARG1, 0
-  |  blt >3				// Check for error from exit.
-  |  lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+  |  blt >9				// Check for error from exit.
+  |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   |   slwi MULTRES, CARG1, 3
   |    li TMP2, 0
   |   stw MULTRES, SAVE_MULTRES
-  |  lwz TMP1, LFUNC:TMP1->pc
-  |    stw TMP2, DISPATCH_GL(jit_L)(DISPATCH)
+  |  lwz TMP1, LFUNC:RB->pc
+  |    stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
   |  lwz KBASE, PC2PROTO(k)(TMP1)
   |  // Setup type comparison constants.
   |  li TISNUM, LJ_TISNUM
@@ -2694,11 +2691,25 @@ static void build_subroutines(BuildCtx *ctx)
   |   decode_RC8 RC, INS
   |  bctr
   |2:
+  |  cmplwi TMP1, (BC_FUNCC+2)*4	// Fast function?
+  |  blt >3
+  |  // Check frame below fast function.
+  |  lwz TMP1, FRAME_PC(BASE)
+  |  andix. TMP0, TMP1, FRAME_TYPE
+  |  bney >3				// Trace stitching continuation?
+  |  // Otherwise set KBASE for Lua function below fast function.
+  |  lwz TMP2, -4(TMP1)
+  |  decode_RA8 TMP0, TMP2
+  |  sub TMP1, BASE, TMP0
+  |  lwz LFUNC:TMP2, -12(TMP1)
+  |  lwz TMP1, LFUNC:TMP2->pc
+  |  lwz KBASE, PC2PROTO(k)(TMP1)
+  |3:
   |   subi RC, MULTRES, 8
   |   add RA, RA, BASE
   |  bctr
   |
-  |3:  // Rethrow error from the right C frame.
+  |9:  // Rethrow error from the right C frame.
   |  neg CARG2, CARG1
   |  mr CARG1, L
   |  bl extern lj_err_throw		// (lua_State *L, int errcode)
@@ -3288,6 +3299,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_next
     break;
 
+  case BC_ISTYPE:
+    |  // RA = src*8, RD = -type*8
+    |  lwzx TMP0, BASE, RA
+    |  srwi TMP1, RD, 3
+    |  ins_next1
+    |.if not PPE and not GPR64
+    |  add. TMP0, TMP0, TMP1
+    |.else
+    |  neg TMP1, TMP1
+    |  cmpw TMP0, TMP1
+    |.endif
+    |  bne ->vmeta_istype
+    |  ins_next2
+    break;
+  case BC_ISNUM:
+    |  // RA = src*8, RD = -(TISNUM-1)*8
+    |  lwzx TMP0, BASE, RA
+    |  ins_next1
+    |  checknum TMP0
+    |  bge ->vmeta_istype
+    |  ins_next2
+    break;
+
   /* -- Unary ops --------------------------------------------------------- */
 
   case BC_MOV:
@@ -4039,6 +4073,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  bne <1				// 'no __index' flag set: done.
     |  b ->vmeta_tgetb			// Caveat: preserve TMP0!
     break;
+  case BC_TGETR:
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  add RB, BASE, RB
+    |  lwz TAB:CARG1, 4(RB)
+    |.if DUALNUM
+    |  add RC, BASE, RC
+    |  lwz TMP0, TAB:CARG1->asize
+    |  lwz CARG2, 4(RC)
+    |   lwz TMP1, TAB:CARG1->array
+    |.else
+    |  lfdx f0, BASE, RC
+    |  lwz TMP0, TAB:CARG1->asize
+    |  toint CARG2, f0
+    |   lwz TMP1, TAB:CARG1->array
+    |.endif
+    |  cmplw TMP0, CARG2
+    |   slwi TMP2, CARG2, 3
+    |  ble ->vmeta_tgetr		// In array part?
+    |   lfdx f14, TMP1, TMP2
+    |->BC_TGETR_Z:
+    |  ins_next1
+    |   stfdx f14, BASE, RA
+    |  ins_next2
+    break;
 
   case BC_TSETV:
     |  // RA = src*8, RB = table*8, RC = key*8
@@ -4218,6 +4276,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  barrierback TAB:RB, TMP3, TMP0
     |  b <2
     break;
+  case BC_TSETR:
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  add RB, BASE, RB
+    |  lwz TAB:CARG2, 4(RB)
+    |.if DUALNUM
+    |  add RC, BASE, RC
+    |    lbz TMP3, TAB:CARG2->marked
+    |  lwz TMP0, TAB:CARG2->asize
+    |  lwz CARG3, 4(RC)
+    |   lwz TMP1, TAB:CARG2->array
+    |.else
+    |  lfdx f0, BASE, RC
+    |    lbz TMP3, TAB:CARG2->marked
+    |  lwz TMP0, TAB:CARG2->asize
+    |  toint CARG3, f0
+    |   lwz TMP1, TAB:CARG2->array
+    |.endif
+    |  andix. TMP2, TMP3, LJ_GC_BLACK	// isblack(table)
+    |  bne >7
+    |2:
+    |  cmplw TMP0, CARG3
+    |   slwi TMP2, CARG3, 3
+    |   lfdx f14, BASE, RA
+    |  ble ->vmeta_tsetr		// In array part?
+    |  ins_next1
+    |   stfdx f14, TMP1, TMP2
+    |  ins_next2
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP3, TMP2
+    |  b <2
+    break;
+
 
   case BC_TSETM:
     |  // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4859,8 +4950,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  lp TMP2, TRACE:TMP2->mcode
     |   stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
     |  mtctr TMP2
-    |   stw L, DISPATCH_GL(jit_L)(DISPATCH)
     |   addi JGL, DISPATCH, GG_DISP2G+32768
+    |   stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
     |  bctr
     |.endif
     break;
@@ -4995,6 +5086,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  lp TMP1, L->top
     |    li_vmstate INTERP
     |  lwz PC, FRAME_PC(BASE)		// Fetch PC of caller.
+    |    stw L, DISPATCH_GL(cur_L)(DISPATCH)
     |   sub RA, TMP1, RD		// RA = L->top - nresults*8
     |    st_vmstate
     |  b ->vm_returnc
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_x64.dasc b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_x64.dasc
new file mode 100644
index 0000000000000000000000000000000000000000..e7e990ae27faed80952d1ac38e7d446c38cefcb9
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_x64.dasc
@@ -0,0 +1,4902 @@
+|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch x64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|//-----------------------------------------------------------------------
+|
+|.if WIN
+|.define X64WIN, 1			// Windows/x64 calling conventions.
+|.endif
+|
+|// Fixed register assignments for the interpreter.
+|// This is very fragile and has many dependencies. Caveat emptor.
+|.define BASE,		rdx		// Not C callee-save, refetched anyway.
+|.if X64WIN
+|.define KBASE,		rdi		// Must be C callee-save.
+|.define PC,		rsi		// Must be C callee-save.
+|.define DISPATCH,	rbx		// Must be C callee-save.
+|.define KBASEd,	edi
+|.define PCd,		esi
+|.define DISPATCHd,	ebx
+|.else
+|.define KBASE,		r15		// Must be C callee-save.
+|.define PC,		rbx		// Must be C callee-save.
+|.define DISPATCH,	r14		// Must be C callee-save.
+|.define KBASEd,	r15d
+|.define PCd,		ebx
+|.define DISPATCHd,	r14d
+|.endif
+|
+|.define RA,		rcx
+|.define RAd,		ecx
+|.define RAH,		ch
+|.define RAL,		cl
+|.define RB,		rbp		// Must be rbp (C callee-save).
+|.define RBd,		ebp
+|.define RC,		rax		// Must be rax.
+|.define RCd,		eax
+|.define RCW,		ax
+|.define RCH,		ah
+|.define RCL,		al
+|.define OP,		RBd
+|.define RD,		RC
+|.define RDd,		RCd
+|.define RDW,		RCW
+|.define RDL,		RCL
+|.define TMPR,		r10
+|.define TMPRd,		r10d
+|.define ITYPE,		r11
+|.define ITYPEd,	r11d
+|
+|.if X64WIN
+|.define CARG1,		rcx		// x64/WIN64 C call arguments.
+|.define CARG2,		rdx
+|.define CARG3,		r8
+|.define CARG4,		r9
+|.define CARG1d,	ecx
+|.define CARG2d,	edx
+|.define CARG3d,	r8d
+|.define CARG4d,	r9d
+|.else
+|.define CARG1,		rdi		// x64/POSIX C call arguments.
+|.define CARG2,		rsi
+|.define CARG3,		rdx
+|.define CARG4,		rcx
+|.define CARG5,		r8
+|.define CARG6,		r9
+|.define CARG1d,	edi
+|.define CARG2d,	esi
+|.define CARG3d,	edx
+|.define CARG4d,	ecx
+|.define CARG5d,	r8d
+|.define CARG6d,	r9d
+|.endif
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L,		lua_State
+|.type GL,		global_State
+|.type TVALUE,		TValue
+|.type GCOBJ,		GCobj
+|.type STR,		GCstr
+|.type TAB,		GCtab
+|.type LFUNC,		GCfuncL
+|.type CFUNC,		GCfuncC
+|.type PROTO,		GCproto
+|.type UPVAL,		GCupval
+|.type NODE,		Node
+|.type NARGS,		int
+|.type TRACE,		GCtrace
+|.type SBUF,		SBuf
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|//-----------------------------------------------------------------------
+|.if X64WIN		// x64/Windows stack layout
+|
+|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.macro saveregs_
+|  push rdi; push rsi; push rbx
+|  sub rsp, CFRAME_SPACE
+|.endmacro
+|.macro saveregs
+|  push rbp; saveregs_
+|.endmacro
+|.macro restoreregs
+|  add rsp, CFRAME_SPACE
+|  pop rbx; pop rsi; pop rdi; pop rbp
+|.endmacro
+|
+|.define SAVE_CFRAME,	aword [rsp+aword*13]
+|.define SAVE_PC,	aword [rsp+aword*12]
+|.define SAVE_L,	aword [rsp+aword*11]
+|.define SAVE_ERRF,	dword [rsp+dword*21]
+|.define SAVE_NRES,	dword [rsp+dword*20]
+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
+|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	aword [rsp+aword*8]
+|.define SAVE_R3,	aword [rsp+aword*7]
+|.define SAVE_R2,	aword [rsp+aword*6]
+|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define ARG5,		aword [rsp+aword*4]
+|.define CSAVE_4,	aword [rsp+aword*3]
+|.define CSAVE_3,	aword [rsp+aword*2]
+|.define CSAVE_2,	aword [rsp+aword*1]
+|.define CSAVE_1,	aword [rsp]		//<-- rsp while in interpreter.
+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
+|
+|.define ARG5d,		dword [rsp+dword*8]
+|.define TMP1,		ARG5			// TMP1 overlaps ARG5
+|.define TMP1d,		ARG5d
+|.define TMP1hi,	dword [rsp+dword*9]
+|.define MULTRES,	TMP1d			// MULTRES overlaps TMP1d.
+|
+|//-----------------------------------------------------------------------
+|.else			// x64/POSIX stack layout
+|
+|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.macro saveregs_
+|  push rbx; push r15; push r14
+|.if NO_UNWIND
+|  push r13; push r12
+|.endif
+|  sub rsp, CFRAME_SPACE
+|.endmacro
+|.macro saveregs
+|  push rbp; saveregs_
+|.endmacro
+|.macro restoreregs
+|  add rsp, CFRAME_SPACE
+|.if NO_UNWIND
+|  pop r12; pop r13
+|.endif
+|  pop r14; pop r15; pop rbx; pop rbp
+|.endmacro
+|
+|//----- 16 byte aligned,
+|.if NO_UNWIND
+|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	aword [rsp+aword*10]
+|.define SAVE_R3,	aword [rsp+aword*9]
+|.define SAVE_R2,	aword [rsp+aword*8]
+|.define SAVE_R1,	aword [rsp+aword*7]
+|.define SAVE_RU2,	aword [rsp+aword*6]
+|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.else
+|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	aword [rsp+aword*8]
+|.define SAVE_R3,	aword [rsp+aword*7]
+|.define SAVE_R2,	aword [rsp+aword*6]
+|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.endif
+|.define SAVE_CFRAME,	aword [rsp+aword*4]
+|.define SAVE_PC,	aword [rsp+aword*3]
+|.define SAVE_L,	aword [rsp+aword*2]
+|.define SAVE_ERRF,	dword [rsp+dword*3]
+|.define SAVE_NRES,	dword [rsp+dword*2]
+|.define TMP1,		aword [rsp]		//<-- rsp while in interpreter.
+|//----- 16 byte aligned
+|
+|.define TMP1d,		dword [rsp]
+|.define TMP1hi,	dword [rsp+dword*1]
+|.define MULTRES,	TMP1d			// MULTRES overlaps TMP1d.
+|
+|.endif
+|
+|//-----------------------------------------------------------------------
+|
+|// Instruction headers.
+|.macro ins_A; .endmacro
+|.macro ins_AD; .endmacro
+|.macro ins_AJ; .endmacro
+|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
+|.macro ins_AB_; movzx RBd, RCH; .endmacro
+|.macro ins_A_C; movzx RCd, RCL; .endmacro
+|.macro ins_AND; not RD; .endmacro
+|
+|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
+|.macro ins_NEXT
+|  mov RCd, [PC]
+|  movzx RAd, RCH
+|  movzx OP, RCL
+|  add PC, 4
+|  shr RCd, 16
+|  jmp aword [DISPATCH+OP*8]
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+|  .define ins_next, ins_NEXT
+|  .define ins_next_, ins_NEXT
+|.else
+|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+|  // Affects only certain kinds of benchmarks (and only with -j off).
+|  // Around 10%-30% slower on Core2, a lot more slower on P4.
+|  .macro ins_next
+|    jmp ->ins_next
+|  .endmacro
+|  .macro ins_next_
+|  ->ins_next:
+|    ins_NEXT
+|  .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+|  // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
+|  mov PC, LFUNC:RB->pc
+|  mov RAd, [PC]
+|  movzx OP, RAL
+|  movzx RAd, RAH
+|  add PC, 4
+|  jmp aword [DISPATCH+OP*8]
+|.endmacro
+|
+|.macro ins_call
+|  // BASE = new base, RB = LFUNC, RD = nargs+1
+|  mov [BASE-8], PC
+|  ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to clear or set tags.
+|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
+|.macro settp, reg, tp
+|  mov64 ITYPE, ((int64_t)tp<<47)
+|  or reg, ITYPE
+|.endmacro
+|.macro settp, dst, reg, tp
+|  mov64 dst, ((int64_t)tp<<47)
+|  or dst, reg
+|.endmacro
+|.macro setint, reg
+|  settp reg, LJ_TISNUM
+|.endmacro
+|.macro setint, dst, reg
+|  settp dst, reg, LJ_TISNUM
+|.endmacro
+|
+|// Macros to test operand types.
+|.macro checktp_nc, reg, tp, target
+|  mov ITYPE, reg
+|  sar ITYPE, 47
+|  cmp ITYPEd, tp
+|  jne target
+|.endmacro
+|.macro checktp, reg, tp, target
+|  mov ITYPE, reg
+|  cleartp reg
+|  sar ITYPE, 47
+|  cmp ITYPEd, tp
+|  jne target
+|.endmacro
+|.macro checktptp, src, tp, target
+|  mov ITYPE, src
+|  sar ITYPE, 47
+|  cmp ITYPEd, tp
+|  jne target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|
+|.macro checknumx, reg, target, jump
+|  mov ITYPE, reg
+|  sar ITYPE, 47
+|  cmp ITYPEd, LJ_TISNUM
+|  jump target
+|.endmacro
+|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
+|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
+|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
+|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
+|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
+|
+|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
+|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
+|
+|// These operands must be used with movzx.
+|.define PC_OP, byte [PC-4]
+|.define PC_RA, byte [PC-3]
+|.define PC_RB, byte [PC-1]
+|.define PC_RC, byte [PC-2]
+|.define PC_RD, word [PC-2]
+|
+|.macro branchPC, reg
+|  lea PC, [PC+reg*4-BCBIAS_J*4]
+|.endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|// Decrement hashed hotcount and trigger trace recorder if zero.
+|.macro hotloop, reg
+|  mov reg, PCd
+|  shr reg, 1
+|  and reg, HOTCOUNT_PCMASK
+|  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
+|  jb ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall, reg
+|  mov reg, PCd
+|  shr reg, 1
+|  and reg, HOTCOUNT_PCMASK
+|  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
+|  jb ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro set_vmstate, st
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
+|.endmacro
+|
+|.macro fpop1; fstp st1; .endmacro
+|
+|// Synthesize SSE FP constants.
+|.macro sseconst_abs, reg, tmp		// Synthesize abs mask.
+|  mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
+|.endmacro
+|
+|.macro sseconst_hi, reg, tmp, val	// Synthesize hi-32 bit const.
+|  mov64 tmp, U64x(val,00000000); movd reg, tmp
+|.endmacro
+|
+|.macro sseconst_sign, reg, tmp		// Synthesize sign mask.
+|  sseconst_hi reg, tmp, 80000000
+|.endmacro
+|.macro sseconst_1, reg, tmp		// Synthesize 1.0.
+|  sseconst_hi reg, tmp, 3ff00000
+|.endmacro
+|.macro sseconst_m1, reg, tmp		// Synthesize -1.0.
+|  sseconst_hi reg, tmp, bff00000
+|.endmacro
+|.macro sseconst_2p52, reg, tmp		// Synthesize 2^52.
+|  sseconst_hi reg, tmp, 43300000
+|.endmacro
+|.macro sseconst_tobit, reg, tmp	// Synthesize 2^52 + 2^51.
+|  sseconst_hi reg, tmp, 43380000
+|.endmacro
+|
+|// Move table write barrier back. Overwrites reg.
+|.macro barrierback, tab, reg
+|  and byte tab->marked, (uint8_t)~LJ_GC_BLACK	// black2gray(tab)
+|  mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
+|  mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
+|  mov tab->gclist, reg
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+  |.code_sub
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Return handling ----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_returnp:
+  |  test PCd, FRAME_P
+  |  jz ->cont_dispatch
+  |
+  |  // Return from pcall or xpcall fast func.
+  |  and PC, -8
+  |  sub BASE, PC			// Restore caller base.
+  |  lea RA, [RA+PC-8]			// Rebase RA and prepend one result.
+  |  mov PC, [BASE-8]			// Fetch PC of previous frame.
+  |  // Prepending may overwrite the pcall frame, so do it at the end.
+  |  mov_true ITYPE
+  |  mov aword [BASE+RA], ITYPE		// Prepend true to results.
+  |
+  |->vm_returnc:
+  |  add RDd, 1				// RD = nresults+1
+  |  jz ->vm_unwind_yield
+  |  mov MULTRES, RDd
+  |  test PC, FRAME_TYPE
+  |  jz ->BC_RET_Z			// Handle regular return to Lua.
+  |
+  |->vm_return:
+  |  // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
+  |  xor PC, FRAME_C
+  |  test PCd, FRAME_TYPE
+  |  jnz ->vm_returnp
+  |
+  |  // Return to C.
+  |  set_vmstate C
+  |  and PC, -8
+  |  sub PC, BASE
+  |  neg PC				// Previous base = BASE - delta.
+  |
+  |  sub RDd, 1
+  |  jz >2
+  |1:  // Move results down.
+  |  mov RB, [BASE+RA]
+  |  mov [BASE-16], RB
+  |  add BASE, 8
+  |  sub RDd, 1
+  |  jnz <1
+  |2:
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, PC
+  |3:
+  |  mov RDd, MULTRES
+  |  mov RAd, SAVE_NRES			// RA = wanted nresults+1
+  |4:
+  |  cmp RAd, RDd
+  |  jne >6				// More/less results wanted?
+  |5:
+  |  sub BASE, 16
+  |  mov L:RB->top, BASE
+  |
+  |->vm_leave_cp:
+  |  mov RA, SAVE_CFRAME		// Restore previous C frame.
+  |  mov L:RB->cframe, RA
+  |  xor eax, eax			// Ok return status for vm_pcall.
+  |
+  |->vm_leave_unw:
+  |  restoreregs
+  |  ret
+  |
+  |6:
+  |  jb >7				// Less results wanted?
+  |  // More results wanted. Check stack size and fill up results with nil.
+  |  cmp BASE, L:RB->maxstack
+  |  ja >8
+  |  mov aword [BASE-16], LJ_TNIL
+  |  add BASE, 8
+  |  add RDd, 1
+  |  jmp <4
+  |
+  |7:  // Less results wanted.
+  |  test RAd, RAd
+  |  jz <5				// But check for LUA_MULTRET+1.
+  |  sub RA, RD				// Negative result!
+  |  lea BASE, [BASE+RA*8]		// Correct top.
+  |  jmp <5
+  |
+  |8:  // Corner case: need to grow stack for filling up results.
+  |  // This can happen if:
+  |  // - A C function grows the stack (a lot).
+  |  // - The GC shrinks the stack in between.
+  |  // - A return back from a lua_call() with (high) nresults adjustment.
+  |  mov L:RB->top, BASE		// Save current top held in BASE (yes).
+  |  mov MULTRES, RDd			// Need to fill only remainder with nil.
+  |  mov CARG2d, RAd
+  |  mov CARG1, L:RB
+  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov BASE, L:RB->top		// Need the (realloced) L->top in BASE.
+  |  jmp <3
+  |
+  |->vm_unwind_yield:
+  |  mov al, LUA_YIELD
+  |  jmp ->vm_unwind_c_eh
+  |
+  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
+  |  // (void *cframe, int errcode)
+  |  mov eax, CARG2d			// Error return status for vm_pcall.
+  |  mov rsp, CARG1
+  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |  mov L:RB, SAVE_L
+  |  mov GL:RB, L:RB->glref
+  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
+  |  jmp ->vm_leave_unw
+  |
+  |->vm_unwind_rethrow:
+  |.if not X64WIN
+  |  mov CARG1, SAVE_L
+  |  mov CARG2d, eax
+  |  restoreregs
+  |  jmp extern lj_err_throw		// (lua_State *L, int errcode)
+  |.endif
+  |
+  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
+  |  // (void *cframe)
+  |  and CARG1, CFRAME_RAWMASK
+  |  mov rsp, CARG1
+  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  mov L:RB, SAVE_L
+  |  mov RDd, 1+1			// Really 1+2 results, incr. later.
+  |  mov BASE, L:RB->base
+  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
+  |  mov PC, [BASE-8]			// Fetch PC of previous frame.
+  |  mov_false RA
+  |  mov RB, [BASE]
+  |  mov [BASE-16], RA			// Prepend false to error message.
+  |  mov [BASE-8], RB
+  |  mov RA, -16			// Results start at BASE+RA = BASE-16.
+  |  set_vmstate INTERP
+  |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Grow stack for calls -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_growstack_c:			// Grow stack for C function.
+  |  mov CARG2d, LUA_MINSTACK
+  |  jmp >2
+  |
+  |->vm_growstack_v:			// Grow stack for vararg Lua function.
+  |  sub RD, 8
+  |  jmp >1
+  |
+  |->vm_growstack_f:			// Grow stack for fixarg Lua function.
+  |  // BASE = new base, RD = nargs+1, RB = L, PC = first PC
+  |  lea RD, [BASE+NARGS:RD*8-8]
+  |1:
+  |  movzx RAd, byte [PC-4+PC2PROTO(framesize)]
+  |  add PC, 4				// Must point after first instruction.
+  |  mov L:RB->base, BASE
+  |  mov L:RB->top, RD
+  |  mov SAVE_PC, PC
+  |  mov CARG2, RA
+  |2:
+  |  // RB = L, L->base = new base, L->top = top
+  |  mov CARG1, L:RB
+  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov BASE, L:RB->base
+  |  mov RD, L:RB->top
+  |  mov LFUNC:RB, [BASE-16]
+  |  cleartp LFUNC:RB
+  |  sub RD, BASE
+  |  shr RDd, 3
+  |  add NARGS:RDd, 1
+  |  // BASE = new base, RB = LFUNC, RD = nargs+1
+  |  ins_callt				// Just retry the call.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Entry points into the assembler VM ---------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_resume:				// Setup C frame and resume thread.
+  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+  |  saveregs
+  |  mov L:RB, CARG1			// Caveat: CARG1 may be RA.
+  |  mov SAVE_L, CARG1
+  |  mov RA, CARG2
+  |  mov PCd, FRAME_CP
+  |  xor RDd, RDd
+  |  lea KBASE, [esp+CFRAME_RESUME]
+  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
+  |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
+  |  mov SAVE_CFRAME, RD
+  |  mov SAVE_NRES, RDd
+  |  mov SAVE_ERRF, RDd
+  |  mov L:RB->cframe, KBASE
+  |  cmp byte L:RB->status, RDL
+  |  je >2				// Initial resume (like a call).
+  |
+  |  // Resume after yield (like a return).
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  set_vmstate INTERP
+  |  mov byte L:RB->status, RDL
+  |  mov BASE, L:RB->base
+  |  mov RD, L:RB->top
+  |  sub RD, RA
+  |  shr RDd, 3
+  |  add RDd, 1				// RD = nresults+1
+  |  sub RA, BASE			// RA = resultofs
+  |  mov PC, [BASE-8]
+  |  mov MULTRES, RDd
+  |  test PCd, FRAME_TYPE
+  |  jz ->BC_RET_Z
+  |  jmp ->vm_return
+  |
+  |->vm_pcall:				// Setup protected C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+  |  saveregs
+  |  mov PCd, FRAME_CP
+  |  mov SAVE_ERRF, CARG4d
+  |  jmp >1
+  |
+  |->vm_call:				// Setup C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1)
+  |  saveregs
+  |  mov PCd, FRAME_C
+  |
+  |1:  // Entry point for vm_pcall above (PC = ftype).
+  |  mov SAVE_NRES, CARG3d
+  |  mov L:RB, CARG1			// Caveat: CARG1 may be RA.
+  |  mov SAVE_L, CARG1
+  |  mov RA, CARG2
+  |
+  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
+  |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
+  |  mov SAVE_CFRAME, KBASE
+  |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
+  |  add DISPATCH, GG_G2DISP
+  |  mov L:RB->cframe, rsp
+  |
+  |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  set_vmstate INTERP
+  |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
+  |  add PC, RA
+  |  sub PC, BASE			// PC = frame delta + frame type
+  |
+  |  mov RD, L:RB->top
+  |  sub RD, RA
+  |  shr NARGS:RDd, 3
+  |  add NARGS:RDd, 1			// RD = nargs+1
+  |
+  |->vm_call_dispatch:
+  |  mov LFUNC:RB, [RA-16]
+  |  checkfunc LFUNC:RB, ->vmeta_call	// Ensure KBASE defined and != BASE.
+  |
+  |->vm_call_dispatch_f:
+  |  mov BASE, RA
+  |  ins_call
+  |  // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
+  |
+  |->vm_cpcall:				// Setup protected C frame, call C.
+  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+  |  saveregs
+  |  mov L:RB, CARG1			// Caveat: CARG1 may be RA.
+  |  mov SAVE_L, CARG1
+  |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
+  |
+  |  mov KBASE, L:RB->stack		// Compute -savestack(L, L->top).
+  |  sub KBASE, L:RB->top
+  |   mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
+  |  mov SAVE_ERRF, 0			// No error function.
+  |  mov SAVE_NRES, KBASEd		// Neg. delta means cframe w/o frame.
+  |   add DISPATCH, GG_G2DISP
+  |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+  |
+  |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
+  |  mov SAVE_CFRAME, KBASE
+  |  mov L:RB->cframe, rsp
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |
+  |  call CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |  // TValue * (new base) or NULL returned in eax (RC).
+  |  test RC, RC
+  |  jz ->vm_leave_cp			// No base? Just remove C frame.
+  |  mov RA, RC
+  |  mov PCd, FRAME_CP
+  |  jmp <2				// Else continue with the call.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Metamethod handling ------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |//-- Continuation dispatch ----------------------------------------------
+  |
+  |->cont_dispatch:
+  |  // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
+  |  add RA, BASE
+  |  and PC, -8
+  |  mov RB, BASE
+  |  sub BASE, PC			// Restore caller BASE.
+  |  mov aword [RA+RD*8-8], LJ_TNIL	// Ensure one valid arg.
+  |  mov RC, RA				// ... in [RC]
+  |  mov PC, [RB-24]			// Restore PC from [cont|PC].
+  |  mov RA, qword [RB-32]		// May be negative on WIN64 with debug.
+  |.if FFI
+  |  cmp RA, 1
+  |  jbe >1
+  |.endif
+  |  mov LFUNC:KBASE, [BASE-16]
+  |  cleartp LFUNC:KBASE
+  |  mov KBASE, LFUNC:KBASE->pc
+  |  mov KBASE, [KBASE+PC2PROTO(k)]
+  |  // BASE = base, RC = result, RB = meta base
+  |  jmp RA				// Jump to continuation.
+  |
+  |.if FFI
+  |1:
+  |  je ->cont_ffi_callback		// cont = 1: return from FFI callback.
+  |  // cont = 0: Tail call from C function.
+  |  sub RB, BASE
+  |  shr RBd, 3
+  |  lea RDd, [RBd-3]
+  |  jmp ->vm_call_tail
+  |.endif
+  |
+  |->cont_cat:				// BASE = base, RC = result, RB = mbase
+  |  movzx RAd, PC_RB
+  |  sub RB, 32
+  |  lea RA, [BASE+RA*8]
+  |  sub RA, RB
+  |  je ->cont_ra
+  |  neg RA
+  |  shr RAd, 3
+  |.if X64WIN
+  |  mov CARG3d, RAd
+  |  mov L:CARG1, SAVE_L
+  |  mov L:CARG1->base, BASE
+  |  mov RC, [RC]
+  |  mov [RB], RC
+  |  mov CARG2, RB
+  |.else
+  |  mov L:CARG1, SAVE_L
+  |  mov L:CARG1->base, BASE
+  |  mov CARG3d, RAd
+  |  mov RA, [RC]
+  |  mov [RB], RA
+  |  mov CARG2, RB
+  |.endif
+  |  jmp ->BC_CAT_Z
+  |
+  |//-- Table indexing metamethods -----------------------------------------
+  |
+  |->vmeta_tgets:
+  |  settp STR:RC, LJ_TSTR		// STR:RC = GCstr *
+  |  mov TMP1, STR:RC
+  |  lea RC, TMP1
+  |  cmp PC_OP, BC_GGET
+  |  jne >1
+  |  settp TAB:RA, TAB:RB, LJ_TTAB	// TAB:RB = GCtab *
+  |  lea RB, [DISPATCH+DISPATCH_GL(tmptv)]  // Store fn->l.env in g->tmptv.
+  |  mov [RB], TAB:RA
+  |  jmp >2
+  |
+  |->vmeta_tgetb:
+  |  movzx RCd, PC_RC
+  |.if DUALNUM
+  |  setint RC
+  |  mov TMP1, RC
+  |.else
+  |  cvtsi2sd xmm0, RCd
+  |  movsd TMP1, xmm0
+  |.endif
+  |  lea RC, TMP1
+  |  jmp >1
+  |
+  |->vmeta_tgetv:
+  |  movzx RCd, PC_RC			// Reload TValue *k from RC.
+  |  lea RC, [BASE+RC*8]
+  |1:
+  |  movzx RBd, PC_RB			// Reload TValue *t from RB.
+  |  lea RB, [BASE+RB*8]
+  |2:
+  |  mov L:CARG1, SAVE_L
+  |  mov L:CARG1->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
+  |  mov CARG2, RB
+  |  mov CARG3, RC
+  |  mov L:RB, L:CARG1
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
+  |  // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+  |  mov BASE, L:RB->base
+  |  test RC, RC
+  |  jz >3
+  |->cont_ra:				// BASE = base, RC = result
+  |  movzx RAd, PC_RA
+  |  mov RB, [RC]
+  |  mov [BASE+RA*8], RB
+  |  ins_next
+  |
+  |3:  // Call __index metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k
+  |  mov RA, L:RB->top
+  |  mov [RA-24], PC			// [cont|PC]
+  |  lea PC, [RA+FRAME_CONT]
+  |  sub PC, BASE
+  |  mov LFUNC:RB, [RA-16]		// Guaranteed to be a function here.
+  |  mov NARGS:RDd, 2+1			// 2 args for func(t, k).
+  |  cleartp LFUNC:RB
+  |  jmp ->vm_call_dispatch_f
+  |
+  |->vmeta_tgetr:
+  |  mov CARG1, TAB:RB
+  |  mov RB, BASE			// Save BASE.
+  |  mov CARG2d, RCd			// Caveat: CARG2 == BASE
+  |  call extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // cTValue * or NULL returned in eax (RC).
+  |  movzx RAd, PC_RA
+  |  mov BASE, RB			// Restore BASE.
+  |  test RC, RC
+  |  jnz ->BC_TGETR_Z
+  |  mov ITYPE, LJ_TNIL
+  |  jmp ->BC_TGETR2_Z
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->vmeta_tsets:
+  |  settp STR:RC, LJ_TSTR		// STR:RC = GCstr *
+  |  mov TMP1, STR:RC
+  |  lea RC, TMP1
+  |  cmp PC_OP, BC_GSET
+  |  jne >1
+  |  settp TAB:RA, TAB:RB, LJ_TTAB	// TAB:RB = GCtab *
+  |  lea RB, [DISPATCH+DISPATCH_GL(tmptv)]  // Store fn->l.env in g->tmptv.
+  |  mov [RB], TAB:RA
+  |  jmp >2
+  |
+  |->vmeta_tsetb:
+  |  movzx RCd, PC_RC
+  |.if DUALNUM
+  |  setint RC
+  |  mov TMP1, RC
+  |.else
+  |  cvtsi2sd xmm0, RCd
+  |  movsd TMP1, xmm0
+  |.endif
+  |  lea RC, TMP1
+  |  jmp >1
+  |
+  |->vmeta_tsetv:
+  |  movzx RCd, PC_RC			// Reload TValue *k from RC.
+  |  lea RC, [BASE+RC*8]
+  |1:
+  |  movzx RBd, PC_RB			// Reload TValue *t from RB.
+  |  lea RB, [BASE+RB*8]
+  |2:
+  |  mov L:CARG1, SAVE_L
+  |  mov L:CARG1->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
+  |  mov CARG2, RB
+  |  mov CARG3, RC
+  |  mov L:RB, L:CARG1
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
+  |  // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+  |  mov BASE, L:RB->base
+  |  test RC, RC
+  |  jz >3
+  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
+  |  movzx RAd, PC_RA
+  |  mov RB, [BASE+RA*8]
+  |  mov [RC], RB
+  |->cont_nop:				// BASE = base, (RC = result)
+  |  ins_next
+  |
+  |3:  // Call __newindex metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+  |  mov RA, L:RB->top
+  |  mov [RA-24], PC			// [cont|PC]
+  |  movzx RCd, PC_RA
+  |  // Copy value to third argument.
+  |  mov RB, [BASE+RC*8]
+  |  mov [RA+16], RB
+  |  lea PC, [RA+FRAME_CONT]
+  |  sub PC, BASE
+  |  mov LFUNC:RB, [RA-16]		// Guaranteed to be a function here.
+  |  mov NARGS:RDd, 3+1			// 3 args for func(t, k, v).
+  |  cleartp LFUNC:RB
+  |  jmp ->vm_call_dispatch_f
+  |
+  |->vmeta_tsetr:
+  |.if X64WIN
+  |  mov L:CARG1, SAVE_L
+  |  mov CARG3d, RCd
+  |  mov L:CARG1->base, BASE
+  |  xchg CARG2, TAB:RB			// Caveat: CARG2 == BASE.
+  |.else
+  |  mov L:CARG1, SAVE_L
+  |  mov CARG2, TAB:RB
+  |  mov L:CARG1->base, BASE
+  |  mov RB, BASE			// Save BASE.
+  |  mov CARG3d, RCd			// Caveat: CARG3 == BASE.
+  |.endif
+  |  mov SAVE_PC, PC
+  |  call extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
+  |  // TValue * returned in eax (RC).
+  |  movzx RAd, PC_RA
+  |  mov BASE, RB			// Restore BASE.
+  |  jmp ->BC_TSETR_Z
+  |
+  |//-- Comparison metamethods ---------------------------------------------
+  |
+  |->vmeta_comp:
+  |  movzx RDd, PC_RD
+  |  movzx RAd, PC_RA
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Caveat: CARG2/CARG3 == BASE.
+  |.if X64WIN
+  |  lea CARG3, [BASE+RD*8]
+  |  lea CARG2, [BASE+RA*8]
+  |.else
+  |  lea CARG2, [BASE+RA*8]
+  |  lea CARG3, [BASE+RD*8]
+  |.endif
+  |  mov CARG1, L:RB			// Caveat: CARG1/CARG4 == RA.
+  |  movzx CARG4d, PC_OP
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_comp	// (lua_State *L, TValue *o1, *o2, int op)
+  |  // 0/1 or TValue * (metamethod) returned in eax (RC).
+  |3:
+  |  mov BASE, L:RB->base
+  |  cmp RC, 1
+  |  ja ->vmeta_binop
+  |4:
+  |  lea PC, [PC+4]
+  |  jb >6
+  |5:
+  |  movzx RDd, PC_RD
+  |  branchPC RD
+  |6:
+  |  ins_next
+  |
+  |->cont_condt:			// BASE = base, RC = result
+  |  add PC, 4
+  |  mov ITYPE, [RC]
+  |  sar ITYPE, 47
+  |  cmp ITYPEd, LJ_TISTRUECOND		// Branch if result is true.
+  |  jb <5
+  |  jmp <6
+  |
+  |->cont_condf:			// BASE = base, RC = result
+  |  mov ITYPE, [RC]
+  |  sar ITYPE, 47
+  |  cmp ITYPEd, LJ_TISTRUECOND		// Branch if result is false.
+  |  jmp <4
+  |
+  |->vmeta_equal:
+  |  cleartp TAB:RD
+  |  sub PC, 4
+  |.if X64WIN
+  |  mov CARG3, RD
+  |  mov CARG4d, RBd
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Caveat: CARG2 == BASE.
+  |  mov CARG2, RA
+  |  mov CARG1, L:RB			// Caveat: CARG1 == RA.
+  |.else
+  |  mov CARG2, RA
+  |  mov CARG4d, RBd			// Caveat: CARG4 == RA.
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Caveat: CARG3 == BASE.
+  |  mov CARG3, RD
+  |  mov CARG1, L:RB
+  |.endif
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_equal	// (lua_State *L, GCobj *o1, *o2, int ne)
+  |  // 0/1 or TValue * (metamethod) returned in eax (RC).
+  |  jmp <3
+  |
+  |->vmeta_equal_cd:
+  |.if FFI
+  |  sub PC, 4
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov CARG1, L:RB
+  |  mov CARG2d, dword [PC-4]
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_equal_cd	// (lua_State *L, BCIns ins)
+  |  // 0/1 or TValue * (metamethod) returned in eax (RC).
+  |  jmp <3
+  |.endif
+  |
+  |->vmeta_istype:
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
+  |  mov CARG2d, RAd
+  |  mov CARG3d, RDd
+  |  mov L:CARG1, L:RB
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
+  |  mov BASE, L:RB->base
+  |  jmp <6
+  |
+  |//-- Arithmetic metamethods ---------------------------------------------
+  |
+  |->vmeta_arith_vno:
+  |.if DUALNUM
+  |  movzx RBd, PC_RB
+  |  movzx RCd, PC_RC
+  |.endif
+  |->vmeta_arith_vn:
+  |  lea RC, [KBASE+RC*8]
+  |  jmp >1
+  |
+  |->vmeta_arith_nvo:
+  |.if DUALNUM
+  |  movzx RBd, PC_RB
+  |  movzx RCd, PC_RC
+  |.endif
+  |->vmeta_arith_nv:
+  |  lea TMPR, [KBASE+RC*8]
+  |  lea RC, [BASE+RB*8]
+  |  mov RB, TMPR
+  |  jmp >2
+  |
+  |->vmeta_unm:
+  |  lea RC, [BASE+RD*8]
+  |  mov RB, RC
+  |  jmp >2
+  |
+  |->vmeta_arith_vvo:
+  |.if DUALNUM
+  |  movzx RBd, PC_RB
+  |  movzx RCd, PC_RC
+  |.endif
+  |->vmeta_arith_vv:
+  |  lea RC, [BASE+RC*8]
+  |1:
+  |  lea RB, [BASE+RB*8]
+  |2:
+  |  lea RA, [BASE+RA*8]
+  |.if X64WIN
+  |  mov CARG3, RB
+  |  mov CARG4, RC
+  |  movzx RCd, PC_OP
+  |  mov ARG5d, RCd
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Caveat: CARG2 == BASE.
+  |  mov CARG2, RA
+  |  mov CARG1, L:RB			// Caveat: CARG1 == RA.
+  |.else
+  |  movzx CARG5d, PC_OP
+  |  mov CARG2, RA
+  |  mov CARG4, RC			// Caveat: CARG4 == RA.
+  |  mov L:CARG1, SAVE_L
+  |  mov L:CARG1->base, BASE		// Caveat: CARG3 == BASE.
+  |  mov CARG3, RB
+  |  mov L:RB, L:CARG1
+  |.endif
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_arith	// (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+  |  // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+  |  mov BASE, L:RB->base
+  |  test RC, RC
+  |  jz ->cont_nop
+  |
+  |  // Call metamethod for binary op.
+  |->vmeta_binop:
+  |  // BASE = base, RC = new base, stack = cont/func/o1/o2
+  |  mov RA, RC
+  |  sub RC, BASE
+  |  mov [RA-24], PC			// [cont|PC]
+  |  lea PC, [RC+FRAME_CONT]
+  |  mov NARGS:RDd, 2+1			// 2 args for func(o1, o2).
+  |  jmp ->vm_call_dispatch
+  |
+  |->vmeta_len:
+  |  movzx RDd, PC_RD
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  lea CARG2, [BASE+RD*8]		// Caveat: CARG2 == BASE
+  |  mov L:CARG1, L:RB
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_len		// (lua_State *L, TValue *o)
+  |  // NULL (retry) or TValue * (metamethod) returned in eax (RC).
+  |  mov BASE, L:RB->base
+#if LJ_52
+  |  test RC, RC
+  |  jne ->vmeta_binop			// Binop call for compatibility.
+  |  movzx RDd, PC_RD
+  |  mov TAB:CARG1, [BASE+RD*8]
+  |  cleartp TAB:CARG1
+  |  jmp ->BC_LEN_Z
+#else
+  |  jmp ->vmeta_binop			// Binop call for compatibility.
+#endif
+  |
+  |//-- Call metamethod ----------------------------------------------------
+  |
+  |->vmeta_call_ra:
+  |  lea RA, [BASE+RA*8+16]
+  |->vmeta_call:			// Resolve and call __call metamethod.
+  |  // BASE = old base, RA = new base, RC = nargs+1, PC = return
+  |  mov TMP1d, NARGS:RDd		// Save RA, RC for us.
+  |  mov RB, RA
+  |.if X64WIN
+  |  mov L:TMPR, SAVE_L
+  |  mov L:TMPR->base, BASE		// Caveat: CARG2 is BASE.
+  |  lea CARG2, [RA-16]
+  |  lea CARG3, [RA+NARGS:RD*8-8]
+  |  mov CARG1, L:TMPR			// Caveat: CARG1 is RA.
+  |.else
+  |  mov L:CARG1, SAVE_L
+  |  mov L:CARG1->base, BASE		// Caveat: CARG3 is BASE.
+  |  lea CARG2, [RA-16]
+  |  lea CARG3, [RA+NARGS:RD*8-8]
+  |.endif
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
+  |  mov RA, RB
+  |  mov L:RB, SAVE_L
+  |  mov BASE, L:RB->base
+  |  mov NARGS:RDd, TMP1d
+  |  mov LFUNC:RB, [RA-16]
+  |  cleartp LFUNC:RB
+  |  add NARGS:RDd, 1
+  |  // This is fragile. L->base must not move, KBASE must always be defined.
+  |  cmp KBASE, BASE			// Continue with CALLT if flag set.
+  |  je ->BC_CALLT_Z
+  |  mov BASE, RA
+  |  ins_call				// Otherwise call resolved metamethod.
+  |
+  |//-- Argument coercion for 'for' statement ------------------------------
+  |
+  |->vmeta_for:
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov CARG2, RA			// Caveat: CARG2 == BASE
+  |  mov L:CARG1, L:RB			// Caveat: CARG1 == RA
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_for	// (lua_State *L, TValue *base)
+  |  mov BASE, L:RB->base
+  |  mov RCd, [PC-4]
+  |  movzx RAd, RCH
+  |  movzx OP, RCL
+  |  shr RCd, 16
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]	// Retry FORI or JFORI.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Fast functions -----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |.macro .ffunc, name
+  |->ff_ .. name:
+  |.endmacro
+  |
+  |.macro .ffunc_1, name
+  |->ff_ .. name:
+  |  cmp NARGS:RDd, 1+1;  jb ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_2, name
+  |->ff_ .. name:
+  |  cmp NARGS:RDd, 2+1;  jb ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_n, name, op
+  |  .ffunc_1 name
+  |  checknumtp [BASE], ->fff_fallback
+  |  op xmm0, qword [BASE]
+  |.endmacro
+  |
+  |.macro .ffunc_n, name
+  |  .ffunc_n name, movsd
+  |.endmacro
+  |
+  |.macro .ffunc_nn, name
+  |  .ffunc_2 name
+  |  checknumtp [BASE], ->fff_fallback
+  |  checknumtp [BASE+8], ->fff_fallback
+  |  movsd xmm0, qword [BASE]
+  |  movsd xmm1, qword [BASE+8]
+  |.endmacro
+  |
+  |// Inlined GC threshold check. Caveat: uses label 1.
+  |.macro ffgccheck
+  |  mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
+  |  cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
+  |  jb >1
+  |  call ->fff_gcstep
+  |1:
+  |.endmacro
+  |
+  |//-- Base library: checks -----------------------------------------------
+  |
+  |.ffunc_1 assert
+  |  mov ITYPE, [BASE]
+  |  mov RB, ITYPE
+  |  sar ITYPE, 47
+  |  cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
+  |  mov PC, [BASE-8]
+  |  mov MULTRES, RDd
+  |  mov RB, [BASE]
+  |  mov [BASE-16], RB
+  |  sub RDd, 2
+  |  jz >2
+  |  mov RA, BASE
+  |1:
+  |  add RA, 8
+  |  mov RB, [RA]
+  |  mov [RA-16], RB
+  |  sub RDd, 1
+  |  jnz <1
+  |2:
+  |  mov RDd, MULTRES
+  |  jmp ->fff_res_
+  |
+  |.ffunc_1 type
+  |  mov RC, [BASE]
+  |  sar RC, 47
+  |  mov RBd, LJ_TISNUM
+  |  cmp RCd, RBd
+  |  cmovb RCd, RBd
+  |  not RCd
+  |2:
+  |  mov CFUNC:RB, [BASE-16]
+  |  cleartp CFUNC:RB
+  |  mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
+  |  mov PC, [BASE-8]
+  |  settp STR:RC, LJ_TSTR
+  |  mov [BASE-16], STR:RC
+  |  jmp ->fff_res1
+  |
+  |//-- Base library: getters and setters ---------------------------------
+  |
+  |.ffunc_1 getmetatable
+  |  mov TAB:RB, [BASE]
+  |  mov PC, [BASE-8]
+  |  checktab TAB:RB, >6
+  |1:  // Field metatable must be at same offset for GCtab and GCudata!
+  |  mov TAB:RB, TAB:RB->metatable
+  |2:
+  |  test TAB:RB, TAB:RB
+  |  mov aword [BASE-16], LJ_TNIL
+  |  jz ->fff_res1
+  |  settp TAB:RC, TAB:RB, LJ_TTAB
+  |  mov [BASE-16], TAB:RC		// Store metatable as default result.
+  |  mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
+  |  mov RAd, TAB:RB->hmask
+  |  and RAd, STR:RC->hash
+  |  settp STR:RC, LJ_TSTR
+  |  imul RAd, #NODE
+  |  add NODE:RA, TAB:RB->node
+  |3:  // Rearranged logic, because we expect _not_ to find the key.
+  |  cmp NODE:RA->key, STR:RC
+  |  je >5
+  |4:
+  |  mov NODE:RA, NODE:RA->next
+  |  test NODE:RA, NODE:RA
+  |  jnz <3
+  |  jmp ->fff_res1			// Not found, keep default result.
+  |5:
+  |  mov RB, NODE:RA->val
+  |  cmp RB, LJ_TNIL; je ->fff_res1	// Ditto for nil value.
+  |  mov [BASE-16], RB			// Return value of mt.__metatable.
+  |  jmp ->fff_res1
+  |
+  |6:
+  |  cmp ITYPEd, LJ_TUDATA; je <1
+  |  cmp ITYPEd, LJ_TISNUM; ja >7
+  |  mov ITYPEd, LJ_TISNUM
+  |7:
+  |  not ITYPEd
+  |  mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
+  |  jmp <2
+  |
+  |.ffunc_2 setmetatable
+  |  mov TAB:RB, [BASE]
+  |  mov TAB:TMPR, TAB:RB
+  |  checktab TAB:RB, ->fff_fallback
+  |  // Fast path: no mt for table yet and not clearing the mt.
+  |  cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
+  |  mov TAB:RA, [BASE+8]
+  |  checktab TAB:RA, ->fff_fallback
+  |  mov TAB:RB->metatable, TAB:RA
+  |  mov PC, [BASE-8]
+  |  mov [BASE-16], TAB:TMPR			// Return original table.
+  |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+  |  jz >1
+  |  // Possible write barrier. Table is black, but skip iswhite(mt) check.
+  |  barrierback TAB:RB, RC
+  |1:
+  |  jmp ->fff_res1
+  |
+  |.ffunc_2 rawget
+  |.if X64WIN
+  |  mov TAB:RA, [BASE]
+  |  checktab TAB:RA, ->fff_fallback
+  |  mov RB, BASE			// Save BASE.
+  |  lea CARG3, [BASE+8]
+  |  mov CARG2, TAB:RA			// Caveat: CARG2 == BASE.
+  |  mov CARG1, SAVE_L
+  |.else
+  |  mov TAB:CARG2, [BASE]
+  |  checktab TAB:CARG2, ->fff_fallback
+  |  mov RB, BASE			// Save BASE.
+  |  lea CARG3, [BASE+8]		// Caveat: CARG3 == BASE.
+  |  mov CARG1, SAVE_L
+  |.endif
+  |  call extern lj_tab_get	// (lua_State *L, GCtab *t, cTValue *key)
+  |  // cTValue * returned in eax (RD).
+  |  mov BASE, RB			// Restore BASE.
+  |  // Copy table slot.
+  |  mov RB, [RD]
+  |  mov PC, [BASE-8]
+  |  mov [BASE-16], RB
+  |  jmp ->fff_res1
+  |
+  |//-- Base library: conversions ------------------------------------------
+  |
+  |.ffunc tonumber
+  |  // Only handles the number case inline (without a base argument).
+  |  cmp NARGS:RDd, 1+1;  jne ->fff_fallback	// Exactly one argument.
+  |  mov RB, [BASE]
+  |  checknumber RB, ->fff_fallback
+  |  mov PC, [BASE-8]
+  |  mov [BASE-16], RB
+  |  jmp ->fff_res1
+  |
+  |.ffunc_1 tostring
+  |  // Only handles the string or number case inline.
+  |  mov PC, [BASE-8]
+  |  mov STR:RB, [BASE]
+  |  checktp_nc STR:RB, LJ_TSTR, >3
+  |  // A __tostring method in the string base metatable is ignored.
+  |2:
+  |  mov [BASE-16], STR:RB
+  |  jmp ->fff_res1
+  |3:  // Handle numbers inline, unless a number base metatable is present.
+  |  cmp ITYPEd, LJ_TISNUM;  ja ->fff_fallback_1
+  |  cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
+  |  jne ->fff_fallback
+  |  ffgccheck				// Caveat: uses label 1.
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Add frame since C call can throw.
+  |  mov SAVE_PC, PC			// Redundant (but a defined value).
+  |.if not X64WIN
+  |  mov CARG2, BASE			// Otherwise: CARG2 == BASE
+  |.endif
+  |  mov L:CARG1, L:RB
+  |.if DUALNUM
+  |  call extern lj_strfmt_number	// (lua_State *L, cTValue *o)
+  |.else
+  |  call extern lj_strfmt_num		// (lua_State *L, lua_Number *np)
+  |.endif
+  |  // GCstr returned in eax (RD).
+  |  mov BASE, L:RB->base
+  |  settp STR:RB, RD, LJ_TSTR
+  |  jmp <2
+  |
+  |//-- Base library: iterators -------------------------------------------
+  |
+  |.ffunc_1 next
+  |  je >2				// Missing 2nd arg?
+  |1:
+  |.if X64WIN
+  |  mov RA, [BASE]
+  |  checktab RA, ->fff_fallback
+  |.else
+  |  mov CARG2, [BASE]
+  |  checktab CARG2, ->fff_fallback
+  |.endif
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Add frame since C call can throw.
+  |  mov L:RB->top, BASE		// Dummy frame length is ok.
+  |  mov PC, [BASE-8]
+  |.if X64WIN
+  |  lea CARG3, [BASE+8]
+  |  mov CARG2, RA			// Caveat: CARG2 == BASE.
+  |  mov CARG1, L:RB
+  |.else
+  |  lea CARG3, [BASE+8]		// Caveat: CARG3 == BASE.
+  |  mov CARG1, L:RB
+  |.endif
+  |  mov SAVE_PC, PC			// Needed for ITERN fallback.
+  |  call extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
+  |  // Flag returned in eax (RD).
+  |  mov BASE, L:RB->base
+  |  test RDd, RDd;  jz >3		// End of traversal?
+  |  // Copy key and value to results.
+  |  mov RB, [BASE+8]
+  |  mov RD, [BASE+16]
+  |  mov [BASE-16], RB
+  |  mov [BASE-8], RD
+  |->fff_res2:
+  |  mov RDd, 1+2
+  |  jmp ->fff_res
+  |2:  // Set missing 2nd arg to nil.
+  |  mov aword [BASE+8], LJ_TNIL
+  |  jmp <1
+  |3:  // End of traversal: return nil.
+  |  mov aword [BASE-16], LJ_TNIL
+  |  jmp ->fff_res1
+  |
+  |.ffunc_1 pairs
+  |  mov TAB:RB, [BASE]
+  |  mov TMPR, TAB:RB
+  |  checktab TAB:RB, ->fff_fallback
+#if LJ_52
+  |  cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
+#endif
+  |  mov CFUNC:RD, [BASE-16]
+  |  cleartp CFUNC:RD
+  |  mov CFUNC:RD, CFUNC:RD->upvalue[0]
+  |  settp CFUNC:RD, LJ_TFUNC
+  |  mov PC, [BASE-8]
+  |  mov [BASE-16], CFUNC:RD
+  |  mov [BASE-8], TMPR
+  |  mov aword [BASE], LJ_TNIL
+  |  mov RDd, 1+3
+  |  jmp ->fff_res
+  |
+  |.ffunc_2 ipairs_aux
+  |  mov TAB:RB, [BASE]
+  |  checktab TAB:RB, ->fff_fallback
+  |.if DUALNUM
+  |  mov RA, [BASE+8]
+  |  checkint RA, ->fff_fallback
+  |.else
+  |  checknumtp [BASE+8], ->fff_fallback
+  |  movsd xmm0, qword [BASE+8]
+  |.endif
+  |  mov PC, [BASE-8]
+  |.if DUALNUM
+  |  add RAd, 1
+  |  setint ITYPE, RA
+  |  mov [BASE-16], ITYPE
+  |.else
+  |  sseconst_1 xmm1, TMPR
+  |  addsd xmm0, xmm1
+  |  cvttsd2si RAd, xmm0
+  |  movsd qword [BASE-16], xmm0
+  |.endif
+  |  cmp RAd, TAB:RB->asize;  jae >2	// Not in array part?
+  |  mov RD, TAB:RB->array
+  |  lea RD, [RD+RA*8]
+  |1:
+  |  cmp aword [RD], LJ_TNIL;  je ->fff_res0
+  |  // Copy array slot.
+  |  mov RB, [RD]
+  |  mov [BASE-8], RB
+  |  jmp ->fff_res2
+  |2:  // Check for empty hash part first. Otherwise call C function.
+  |  cmp dword TAB:RB->hmask, 0; je ->fff_res0
+  |.if X64WIN
+  |  mov TMPR, BASE
+  |  mov CARG2d, RAd
+  |  mov CARG1, TAB:RB
+  |  mov RB, TMPR
+  |.else
+  |  mov CARG1, TAB:RB
+  |  mov RB, BASE			// Save BASE.
+  |  mov CARG2d, RAd			// Caveat: CARG2 == BASE
+  |.endif
+  |  call extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // cTValue * or NULL returned in eax (RD).
+  |  mov BASE, RB
+  |  test RD, RD
+  |  jnz <1
+  |->fff_res0:
+  |  mov RDd, 1+0
+  |  jmp ->fff_res
+  |
+  |.ffunc_1 ipairs
+  |  mov TAB:RB, [BASE]
+  |  mov TMPR, TAB:RB
+  |  checktab TAB:RB, ->fff_fallback
+#if LJ_52
+  |  cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
+#endif
+  |  mov CFUNC:RD, [BASE-16]
+  |  cleartp CFUNC:RD
+  |  mov CFUNC:RD, CFUNC:RD->upvalue[0]
+  |  settp CFUNC:RD, LJ_TFUNC
+  |  mov PC, [BASE-8]
+  |  mov [BASE-16], CFUNC:RD
+  |  mov [BASE-8], TMPR
+  |.if DUALNUM
+  |  mov64 RD, ((int64_t)LJ_TISNUM<<47)
+  |  mov [BASE], RD
+  |.else
+  |  mov qword [BASE], 0
+  |.endif
+  |  mov RDd, 1+3
+  |  jmp ->fff_res
+  |
+  |//-- Base library: catch errors ----------------------------------------
+  |
+  |.ffunc_1 pcall
+  |  lea RA, [BASE+16]
+  |  sub NARGS:RDd, 1
+  |  mov PCd, 16+FRAME_PCALL
+  |1:
+  |  movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+  |  shr RB, HOOK_ACTIVE_SHIFT
+  |  and RB, 1
+  |  add PC, RB				// Remember active hook before pcall.
+  |  // Note: this does a (harmless) copy of the function to the PC slot, too.
+  |  mov KBASE, RD
+  |2:
+  |  mov RB, [RA+KBASE*8-24]
+  |  mov [RA+KBASE*8-16], RB
+  |  sub KBASE, 1
+  |  ja <2
+  |  jmp ->vm_call_dispatch
+  |
+  |.ffunc_2 xpcall
+  |  mov LFUNC:RA, [BASE+8]
+  |  checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
+  |  mov LFUNC:RB, [BASE]		// Swap function and traceback.
+  |  mov [BASE], LFUNC:RA
+  |  mov [BASE+8], LFUNC:RB
+  |  lea RA, [BASE+24]
+  |  sub NARGS:RDd, 2
+  |  mov PCd, 24+FRAME_PCALL
+  |  jmp <1
+  |
+  |//-- Coroutine library --------------------------------------------------
+  |
+  |.macro coroutine_resume_wrap, resume
+  |.if resume
+  |.ffunc_1 coroutine_resume
+  |  mov L:RB, [BASE]
+  |  cleartp L:RB
+  |.else
+  |.ffunc coroutine_wrap_aux
+  |  mov CFUNC:RB, [BASE-16]
+  |  cleartp CFUNC:RB
+  |  mov L:RB, CFUNC:RB->upvalue[0].gcr
+  |  cleartp L:RB
+  |.endif
+  |  mov PC, [BASE-8]
+  |  mov SAVE_PC, PC
+  |  mov TMP1, L:RB
+  |.if resume
+  |  checktptp [BASE], LJ_TTHREAD, ->fff_fallback
+  |.endif
+  |  cmp aword L:RB->cframe, 0; jne ->fff_fallback
+  |  cmp byte L:RB->status, LUA_YIELD;  ja ->fff_fallback
+  |  mov RA, L:RB->top
+  |  je >1				// Status != LUA_YIELD (i.e. 0)?
+  |  cmp RA, L:RB->base			// Check for presence of initial func.
+  |  je ->fff_fallback
+  |  mov PC, [RA-8]			// Move initial function up.
+  |  mov [RA], PC
+  |  add RA, 8
+  |1:
+  |.if resume
+  |  lea PC, [RA+NARGS:RD*8-16]		// Check stack space (-1-thread).
+  |.else
+  |  lea PC, [RA+NARGS:RD*8-8]		// Check stack space (-1).
+  |.endif
+  |  cmp PC, L:RB->maxstack; ja ->fff_fallback
+  |  mov L:RB->top, PC
+  |
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |.if resume
+  |  add BASE, 8			// Keep resumed thread in stack for GC.
+  |.endif
+  |  mov L:RB->top, BASE
+  |.if resume
+  |  lea RB, [BASE+NARGS:RD*8-24]	// RB = end of source for stack move.
+  |.else
+  |  lea RB, [BASE+NARGS:RD*8-16]	// RB = end of source for stack move.
+  |.endif
+  |  sub RB, PC			// Relative to PC.
+  |
+  |  cmp PC, RA
+  |  je >3
+  |2:  // Move args to coroutine.
+  |  mov RC, [PC+RB]
+  |  mov [PC-8], RC
+  |  sub PC, 8
+  |  cmp PC, RA
+  |  jne <2
+  |3:
+  |  mov CARG2, RA
+  |  mov CARG1, TMP1
+  |  call ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
+  |
+  |  mov L:RB, SAVE_L
+  |  mov L:PC, TMP1
+  |  mov BASE, L:RB->base
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  set_vmstate INTERP
+  |
+  |  cmp eax, LUA_YIELD
+  |  ja >8
+  |4:
+  |  mov RA, L:PC->base
+  |  mov KBASE, L:PC->top
+  |  mov L:PC->top, RA			// Clear coroutine stack.
+  |  mov PC, KBASE
+  |  sub PC, RA
+  |  je >6				// No results?
+  |  lea RD, [BASE+PC]
+  |  shr PCd, 3
+  |  cmp RD, L:RB->maxstack
+  |  ja >9				// Need to grow stack?
+  |
+  |  mov RB, BASE
+  |  sub RB, RA
+  |5:  // Move results from coroutine.
+  |  mov RD, [RA]
+  |  mov [RA+RB], RD
+  |  add RA, 8
+  |  cmp RA, KBASE
+  |  jne <5
+  |6:
+  |.if resume
+  |  lea RDd, [PCd+2]			// nresults+1 = 1 + true + results.
+  |  mov_true ITYPE			// Prepend true to results.
+  |  mov [BASE-8], ITYPE
+  |.else
+  |  lea RDd, [PCd+1]			// nresults+1 = 1 + results.
+  |.endif
+  |7:
+  |  mov PC, SAVE_PC
+  |  mov MULTRES, RDd
+  |.if resume
+  |  mov RA, -8
+  |.else
+  |  xor RAd, RAd
+  |.endif
+  |  test PCd, FRAME_TYPE
+  |  jz ->BC_RET_Z
+  |  jmp ->vm_return
+  |
+  |8:  // Coroutine returned with error (at co->top-1).
+  |.if resume
+  |  mov_false ITYPE			// Prepend false to results.
+  |  mov [BASE-8], ITYPE
+  |  mov RA, L:PC->top
+  |  sub RA, 8
+  |  mov L:PC->top, RA			// Clear error from coroutine stack.
+  |  // Copy error message.
+  |  mov RD, [RA]
+  |  mov [BASE], RD
+  |  mov RDd, 1+2			// nresults+1 = 1 + false + error.
+  |  jmp <7
+  |.else
+  |  mov CARG2, L:PC
+  |  mov CARG1, L:RB
+  |  call extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
+  |  // Error function does not return.
+  |.endif
+  |
+  |9:  // Handle stack expansion on return from yield.
+  |  mov L:RA, TMP1
+  |  mov L:RA->top, KBASE		// Undo coroutine stack clearing.
+  |  mov CARG2, PC
+  |  mov CARG1, L:RB
+  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov L:PC, TMP1
+  |  mov BASE, L:RB->base
+  |  jmp <4				// Retry the stack move.
+  |.endmacro
+  |
+  |  coroutine_resume_wrap 1		// coroutine.resume
+  |  coroutine_resume_wrap 0		// coroutine.wrap
+  |
+  |.ffunc coroutine_yield
+  |  mov L:RB, SAVE_L
+  |  test aword L:RB->cframe, CFRAME_RESUME
+  |  jz ->fff_fallback
+  |  mov L:RB->base, BASE
+  |  lea RD, [BASE+NARGS:RD*8-8]
+  |  mov L:RB->top, RD
+  |  xor RDd, RDd
+  |  mov aword L:RB->cframe, RD
+  |  mov al, LUA_YIELD
+  |  mov byte L:RB->status, al
+  |  jmp ->vm_leave_unw
+  |
+  |//-- Math library -------------------------------------------------------
+  |
+  |  .ffunc_1 math_abs
+  |  mov RB, [BASE]
+  |.if DUALNUM
+  |  checkint RB, >3
+  |  cmp RBd, 0; jns ->fff_resi
+  |  neg RBd; js >2
+  |->fff_resbit:
+  |->fff_resi:
+  |  setint RB
+  |->fff_resRB:
+  |  mov PC, [BASE-8]
+  |  mov [BASE-16], RB
+  |  jmp ->fff_res1
+  |2:
+  |  mov64 RB, U64x(41e00000,00000000)  // 2^31.
+  |  jmp ->fff_resRB
+  |3:
+  |  ja ->fff_fallback
+  |.else
+  |  checknum RB, ->fff_fallback
+  |.endif
+  |  shl RB, 1
+  |  shr RB, 1
+  |  mov PC, [BASE-8]
+  |  mov [BASE-16], RB
+  |  jmp ->fff_res1
+  |
+  |.ffunc_n math_sqrt, sqrtsd
+  |->fff_resxmm0:
+  |  mov PC, [BASE-8]
+  |  movsd qword [BASE-16], xmm0
+  |  // fallthrough
+  |
+  |->fff_res1:
+  |  mov RDd, 1+1
+  |->fff_res:
+  |  mov MULTRES, RDd
+  |->fff_res_:
+  |  test PCd, FRAME_TYPE
+  |  jnz >7
+  |5:
+  |  cmp PC_RB, RDL			// More results expected?
+  |  ja >6
+  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
+  |  movzx RAd, PC_RA
+  |  neg RA
+  |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
+  |  ins_next
+  |
+  |6:  // Fill up results with nil.
+  |  mov aword [BASE+RD*8-24], LJ_TNIL
+  |  add RD, 1
+  |  jmp <5
+  |
+  |7:  // Non-standard return case.
+  |  mov RA, -16			// Results start at BASE+RA = BASE-16.
+  |  jmp ->vm_return
+  |
+  |.macro math_round, func
+  |  .ffunc math_ .. func
+  |.if DUALNUM
+  |  mov RB, [BASE]
+  |  checknumx RB, ->fff_resRB, je
+  |  ja ->fff_fallback
+  |.else
+  |  checknumtp [BASE], ->fff_fallback
+  |.endif
+  |  movsd xmm0, qword [BASE]
+  |  call ->vm_ .. func .. _sse
+  |.if DUALNUM
+  |  cvttsd2si RBd, xmm0
+  |  cmp RBd, 0x80000000
+  |  jne ->fff_resi
+  |  cvtsi2sd xmm1, RBd
+  |  ucomisd xmm0, xmm1
+  |  jp ->fff_resxmm0
+  |  je ->fff_resi
+  |.endif
+  |  jmp ->fff_resxmm0
+  |.endmacro
+  |
+  |  math_round floor
+  |  math_round ceil
+  |
+  |.ffunc math_log
+  |  cmp NARGS:RDd, 1+1; jne ->fff_fallback	// Exactly one argument.
+  |  checknumtp [BASE], ->fff_fallback
+  |  movsd xmm0, qword [BASE]
+  |  mov RB, BASE
+  |  call extern log
+  |  mov BASE, RB
+  |  jmp ->fff_resxmm0
+  |
+  |.macro math_extern, func
+  |  .ffunc_n math_ .. func
+  |  mov RB, BASE
+  |  call extern func
+  |  mov BASE, RB
+  |  jmp ->fff_resxmm0
+  |.endmacro
+  |
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
+  |  mov RB, BASE
+  |  call extern func
+  |  mov BASE, RB
+  |  jmp ->fff_resxmm0
+  |.endmacro
+  |
+  |  math_extern log10
+  |  math_extern exp
+  |  math_extern sin
+  |  math_extern cos
+  |  math_extern tan
+  |  math_extern asin
+  |  math_extern acos
+  |  math_extern atan
+  |  math_extern sinh
+  |  math_extern cosh
+  |  math_extern tanh
+  |  math_extern2 pow
+  |  math_extern2 atan2
+  |  math_extern2 fmod
+  |
+  |.ffunc_2 math_ldexp
+  |  checknumtp [BASE], ->fff_fallback
+  |  checknumtp [BASE+8], ->fff_fallback
+  |  fld qword [BASE+8]
+  |  fld qword [BASE]
+  |  fscale
+  |  fpop1
+  |  mov PC, [BASE-8]
+  |  fstp qword [BASE-16]
+  |  jmp ->fff_res1
+  |
+  |.ffunc_n math_frexp
+  |  lea CARG1, TMP1
+  |  mov RB, BASE
+  |  call extern frexp
+  |  mov BASE, RB
+  |  mov RBd, TMP1d
+  |  mov PC, [BASE-8]
+  |  movsd qword [BASE-16], xmm0
+  |.if DUALNUM
+  |  setint RB
+  |  mov [BASE-8], RB
+  |.else
+  |  cvtsi2sd xmm1, RBd
+  |  movsd qword [BASE-8], xmm1
+  |.endif
+  |  mov RDd, 1+2
+  |  jmp ->fff_res
+  |
+  |.ffunc_n math_modf
+  |  lea CARG1, [BASE-16]
+  |  mov PC, [BASE-8]
+  |  mov RB, BASE
+  |  call extern modf
+  |  mov BASE, RB
+  |  mov PC, [BASE-8]
+  |  movsd qword [BASE-8], xmm0
+  |  mov RDd, 1+2
+  |  jmp ->fff_res
+  |
+  |.macro math_minmax, name, cmovop, sseop
+  |  .ffunc name
+  |  mov RAd, 2
+  |.if DUALNUM
+  |  mov RB, [BASE]
+  |  checkint RB, >4
+  |1:  // Handle integers.
+  |  cmp RAd, RDd; jae ->fff_resRB
+  |  mov TMPR, [BASE+RA*8-8]
+  |  checkint TMPR, >3
+  |  cmp RBd, TMPRd
+  |  cmovop RB, TMPR
+  |  add RAd, 1
+  |  jmp <1
+  |3:
+  |  ja ->fff_fallback
+  |  // Convert intermediate result to number and continue below.
+  |  cvtsi2sd xmm0, RBd
+  |  jmp >6
+  |4:
+  |  ja ->fff_fallback
+  |.else
+  |  checknumtp [BASE], ->fff_fallback
+  |.endif
+  |
+  |  movsd xmm0, qword [BASE]
+  |5:  // Handle numbers or integers.
+  |  cmp RAd, RDd; jae ->fff_resxmm0
+  |.if DUALNUM
+  |  mov RB, [BASE+RA*8-8]
+  |  checknumx RB, >6, jb
+  |  ja ->fff_fallback
+  |  cvtsi2sd xmm1, RBd
+  |  jmp >7
+  |.else
+  |  checknumtp [BASE+RA*8-8], ->fff_fallback
+  |.endif
+  |6:
+  |  movsd xmm1, qword [BASE+RA*8-8]
+  |7:
+  |  sseop xmm0, xmm1
+  |  add RAd, 1
+  |  jmp <5
+  |.endmacro
+  |
+  |  math_minmax math_min, cmovg, minsd
+  |  math_minmax math_max, cmovl, maxsd
+  |
+  |//-- String library -----------------------------------------------------
+  |
+  |.ffunc string_byte			// Only handle the 1-arg case here.
+  |  cmp NARGS:RDd, 1+1;  jne ->fff_fallback
+  |  mov STR:RB, [BASE]
+  |  checkstr STR:RB, ->fff_fallback
+  |  mov PC, [BASE-8]
+  |  cmp dword STR:RB->len, 1
+  |  jb ->fff_res0			// Return no results for empty string.
+  |  movzx RBd, byte STR:RB[1]
+  |.if DUALNUM
+  |  jmp ->fff_resi
+  |.else
+  |  cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
+  |.endif
+  |
+  |.ffunc string_char			// Only handle the 1-arg case here.
+  |  ffgccheck
+  |  cmp NARGS:RDd, 1+1;  jne ->fff_fallback	// *Exactly* 1 arg.
+  |.if DUALNUM
+  |  mov RB, [BASE]
+  |  checkint RB, ->fff_fallback
+  |.else
+  |  checknumtp [BASE], ->fff_fallback
+  |  cvttsd2si RBd, qword [BASE]
+  |.endif
+  |  cmp RBd, 255;  ja ->fff_fallback
+  |  mov TMP1d, RBd
+  |  mov TMPRd, 1
+  |  lea RD, TMP1			// Points to stack. Little-endian.
+  |->fff_newstr:
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov CARG3d, TMPRd			// Zero-extended to size_t.
+  |  mov CARG2, RD
+  |  mov CARG1, L:RB
+  |  mov SAVE_PC, PC
+  |  call extern lj_str_new		// (lua_State *L, char *str, size_t l)
+  |->fff_resstr:
+  |  // GCstr * returned in eax (RD).
+  |  mov BASE, L:RB->base
+  |  mov PC, [BASE-8]
+  |  settp STR:RD, LJ_TSTR
+  |  mov [BASE-16], STR:RD
+  |  jmp ->fff_res1
+  |
+  |.ffunc string_sub
+  |  ffgccheck
+  |  mov TMPRd, -1
+  |  cmp NARGS:RDd, 1+2;  jb ->fff_fallback
+  |  jna >1
+  |.if DUALNUM
+  |  mov TMPR, [BASE+16]
+  |  checkint TMPR, ->fff_fallback
+  |.else
+  |  checknumtp [BASE+16], ->fff_fallback
+  |  cvttsd2si TMPRd, qword [BASE+16]
+  |.endif
+  |1:
+  |  mov STR:RB, [BASE]
+  |  checkstr STR:RB, ->fff_fallback
+  |.if DUALNUM
+  |  mov ITYPE, [BASE+8]
+  |  mov RAd, ITYPEd			// Must clear hiword for lea below.
+  |  sar ITYPE, 47
+  |  cmp ITYPEd, LJ_TISNUM
+  |  jne ->fff_fallback
+  |.else
+  |  checknumtp [BASE+8], ->fff_fallback
+  |  cvttsd2si RAd, qword [BASE+8]
+  |.endif
+  |  mov RCd, STR:RB->len
+  |  cmp RCd, TMPRd			// len < end? (unsigned compare)
+  |  jb >5
+  |2:
+  |  test RAd, RAd			// start <= 0?
+  |  jle >7
+  |3:
+  |  sub TMPRd, RAd			// start > end?
+  |  jl ->fff_emptystr
+  |  lea RD, [STR:RB+RAd+#STR-1]
+  |  add TMPRd, 1
+  |4:
+  |  jmp ->fff_newstr
+  |
+  |5:  // Negative end or overflow.
+  |  jl >6
+  |  lea TMPRd, [TMPRd+RCd+1]		// end = end+(len+1)
+  |  jmp <2
+  |6:  // Overflow.
+  |  mov TMPRd, RCd			// end = len
+  |  jmp <2
+  |
+  |7:  // Negative start or underflow.
+  |  je >8
+  |  add RAd, RCd			// start = start+(len+1)
+  |  add RAd, 1
+  |  jg <3				// start > 0?
+  |8:  // Underflow.
+  |  mov RAd, 1				// start = 1
+  |  jmp <3
+  |
+  |->fff_emptystr:  // Range underflow.
+  |  xor TMPRd, TMPRd			// Zero length. Any ptr in RD is ok.
+  |  jmp <4
+  |
+  |.macro ffstring_op, name
+  |  .ffunc_1 string_ .. name
+  |  ffgccheck
+  |.if X64WIN
+  |  mov STR:TMPR, [BASE]
+  |  checkstr STR:TMPR, ->fff_fallback
+  |.else
+  |  mov STR:CARG2, [BASE]
+  |  checkstr STR:CARG2, ->fff_fallback
+  |.endif
+  |  mov L:RB, SAVE_L
+  |   lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
+  |  mov L:RB->base, BASE
+  |.if X64WIN
+  |  mov STR:CARG2, STR:TMPR		// Caveat: CARG2 == BASE
+  |.endif
+  |   mov RC, SBUF:CARG1->b
+  |   mov SBUF:CARG1->L, L:RB
+  |   mov SBUF:CARG1->p, RC
+  |  mov SAVE_PC, PC
+  |  call extern lj_buf_putstr_ .. name
+  |  mov CARG1, rax
+  |  call extern lj_buf_tostr
+  |  jmp ->fff_resstr
+  |.endmacro
+  |
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
+  |
+  |//-- Bit library --------------------------------------------------------
+  |
+  |.macro .ffunc_bit, name, kind, fdef
+  |  fdef name
+  |.if kind == 2
+  |  sseconst_tobit xmm1, RB
+  |.endif
+  |.if DUALNUM
+  |  mov RB, [BASE]
+  |  checkint RB, >1
+  |.if kind > 0
+  |  jmp >2
+  |.else
+  |  jmp ->fff_resbit
+  |.endif
+  |1:
+  |  ja ->fff_fallback
+  |  movd xmm0, RB
+  |.else
+  |  checknumtp [BASE], ->fff_fallback
+  |  movsd xmm0, qword [BASE]
+  |.endif
+  |.if kind < 2
+  |  sseconst_tobit xmm1, RB
+  |.endif
+  |  addsd xmm0, xmm1
+  |  movd RBd, xmm0
+  |2:
+  |.endmacro
+  |
+  |.macro .ffunc_bit, name, kind
+  |  .ffunc_bit name, kind, .ffunc_1
+  |.endmacro
+  |
+  |.ffunc_bit bit_tobit, 0
+  |  jmp ->fff_resbit
+  |
+  |.macro .ffunc_bit_op, name, ins
+  |  .ffunc_bit name, 2
+  |  mov TMPRd, NARGS:RDd		// Save for fallback.
+  |  lea RD, [BASE+NARGS:RD*8-16]
+  |1:
+  |  cmp RD, BASE
+  |  jbe ->fff_resbit
+  |.if DUALNUM
+  |  mov RA, [RD]
+  |  checkint RA, >2
+  |  ins RBd, RAd
+  |  sub RD, 8
+  |  jmp <1
+  |2:
+  |  ja ->fff_fallback_bit_op
+  |  movd xmm0, RA
+  |.else
+  |  checknumtp [RD], ->fff_fallback_bit_op
+  |  movsd xmm0, qword [RD]
+  |.endif
+  |  addsd xmm0, xmm1
+  |  movd RAd, xmm0
+  |  ins RBd, RAd
+  |  sub RD, 8
+  |  jmp <1
+  |.endmacro
+  |
+  |.ffunc_bit_op bit_band, and
+  |.ffunc_bit_op bit_bor, or
+  |.ffunc_bit_op bit_bxor, xor
+  |
+  |.ffunc_bit bit_bswap, 1
+  |  bswap RBd
+  |  jmp ->fff_resbit
+  |
+  |.ffunc_bit bit_bnot, 1
+  |  not RBd
+  |.if DUALNUM
+  |  jmp ->fff_resbit
+  |.else
+  |->fff_resbit:
+  |  cvtsi2sd xmm0, RBd
+  |  jmp ->fff_resxmm0
+  |.endif
+  |
+  |->fff_fallback_bit_op:
+  |  mov NARGS:RDd, TMPRd		// Restore for fallback
+  |  jmp ->fff_fallback
+  |
+  |.macro .ffunc_bit_sh, name, ins
+  |.if DUALNUM
+  |  .ffunc_bit name, 1, .ffunc_2
+  |  // Note: no inline conversion from number for 2nd argument!
+  |  mov RA, [BASE+8]
+  |  checkint RA, ->fff_fallback
+  |.else
+  |  .ffunc_nn name
+  |  sseconst_tobit xmm2, RB
+  |  addsd xmm0, xmm2
+  |  addsd xmm1, xmm2
+  |  movd RBd, xmm0
+  |  movd RAd, xmm1
+  |.endif
+  |  ins RBd, cl			// Assumes RA is ecx.
+  |  jmp ->fff_resbit
+  |.endmacro
+  |
+  |.ffunc_bit_sh bit_lshift, shl
+  |.ffunc_bit_sh bit_rshift, shr
+  |.ffunc_bit_sh bit_arshift, sar
+  |.ffunc_bit_sh bit_rol, rol
+  |.ffunc_bit_sh bit_ror, ror
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->fff_fallback_2:
+  |  mov NARGS:RDd, 1+2			// Other args are ignored, anyway.
+  |  jmp ->fff_fallback
+  |->fff_fallback_1:
+  |  mov NARGS:RDd, 1+1			// Other args are ignored, anyway.
+  |->fff_fallback:			// Call fast function fallback handler.
+  |  // BASE = new base, RD = nargs+1
+  |  mov L:RB, SAVE_L
+  |  mov PC, [BASE-8]			// Fallback may overwrite PC.
+  |  mov SAVE_PC, PC			// Redundant (but a defined value).
+  |  mov L:RB->base, BASE
+  |  lea RD, [BASE+NARGS:RD*8-8]
+  |  lea RA, [RD+8*LUA_MINSTACK]	// Ensure enough space for handler.
+  |  mov L:RB->top, RD
+  |  mov CFUNC:RD, [BASE-16]
+  |  cleartp CFUNC:RD
+  |  cmp RA, L:RB->maxstack
+  |  ja >5				// Need to grow stack.
+  |  mov CARG1, L:RB
+  |  call aword CFUNC:RD->f		// (lua_State *L)
+  |  mov BASE, L:RB->base
+  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+  |  test RDd, RDd; jg ->fff_res	// Returned nresults+1?
+  |1:
+  |  mov RA, L:RB->top
+  |  sub RA, BASE
+  |  shr RAd, 3
+  |  test RDd, RDd
+  |  lea NARGS:RDd, [RAd+1]
+  |  mov LFUNC:RB, [BASE-16]
+  |  jne ->vm_call_tail			// Returned -1?
+  |  cleartp LFUNC:RB
+  |  ins_callt				// Returned 0: retry fast path.
+  |
+  |// Reconstruct previous base for vmeta_call during tailcall.
+  |->vm_call_tail:
+  |  mov RA, BASE
+  |  test PCd, FRAME_TYPE
+  |  jnz >3
+  |  movzx RBd, PC_RA
+  |  neg RB
+  |  lea BASE, [BASE+RB*8-16]		// base = base - (RB+2)*8
+  |  jmp ->vm_call_dispatch		// Resolve again for tailcall.
+  |3:
+  |  mov RB, PC
+  |  and RB, -8
+  |  sub BASE, RB
+  |  jmp ->vm_call_dispatch		// Resolve again for tailcall.
+  |
+  |5:  // Grow stack for fallback handler.
+  |  mov CARG2d, LUA_MINSTACK
+  |  mov CARG1, L:RB
+  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov BASE, L:RB->base
+  |  xor RDd, RDd			// Simulate a return 0.
+  |  jmp <1				// Dumb retry (goes through ff first).
+  |
+  |->fff_gcstep:			// Call GC step function.
+  |  // BASE = new base, RD = nargs+1
+  |  pop RB				// Must keep stack at same level.
+  |  mov TMP1, RB			// Save return address
+  |  mov L:RB, SAVE_L
+  |  mov SAVE_PC, PC			// Redundant (but a defined value).
+  |  mov L:RB->base, BASE
+  |  lea RD, [BASE+NARGS:RD*8-8]
+  |  mov CARG1, L:RB
+  |  mov L:RB->top, RD
+  |  call extern lj_gc_step		// (lua_State *L)
+  |  mov BASE, L:RB->base
+  |  mov RD, L:RB->top
+  |  sub RD, BASE
+  |  shr RDd, 3
+  |  add NARGS:RDd, 1
+  |  mov RB, TMP1
+  |  push RB				// Restore return address.
+  |  ret
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Special dispatch targets -------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_record:				// Dispatch target for recording phase.
+  |.if JIT
+  |  movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+  |  test RDL, HOOK_VMEVENT		// No recording while in vmevent.
+  |  jnz >5
+  |  // Decrement the hookcount for consistency, but always do the call.
+  |  test RDL, HOOK_ACTIVE
+  |  jnz >1
+  |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+  |  jz >1
+  |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+  |  jmp >1
+  |.endif
+  |
+  |->vm_rethook:			// Dispatch target for return hooks.
+  |  movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+  |  test RDL, HOOK_ACTIVE		// Hook already active?
+  |  jnz >5
+  |  jmp >1
+  |
+  |->vm_inshook:			// Dispatch target for instr/line hooks.
+  |  movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+  |  test RDL, HOOK_ACTIVE		// Hook already active?
+  |  jnz >5
+  |
+  |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+  |  jz >5
+  |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+  |  jz >1
+  |  test RDL, LUA_MASKLINE
+  |  jz >5
+  |1:
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov CARG2, PC			// Caveat: CARG2 == BASE
+  |  mov CARG1, L:RB
+  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+  |  call extern lj_dispatch_ins	// (lua_State *L, const BCIns *pc)
+  |3:
+  |  mov BASE, L:RB->base
+  |4:
+  |  movzx RAd, PC_RA
+  |5:
+  |  movzx OP, PC_OP
+  |  movzx RDd, PC_RD
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]	// Re-dispatch to static ins.
+  |
+  |->cont_hook:				// Continue from hook yield.
+  |  add PC, 4
+  |  mov RA, [RB-40]
+  |  mov MULTRES, RAd			// Restore MULTRES for *M ins.
+  |  jmp <4
+  |
+  |->vm_hotloop:			// Hot loop counter underflow.
+  |.if JIT
+  |  mov LFUNC:RB, [BASE-16]		// Same as curr_topL(L).
+  |  cleartp LFUNC:RB
+  |  mov RB, LFUNC:RB->pc
+  |  movzx RDd, byte [RB+PC2PROTO(framesize)]
+  |  lea RD, [BASE+RD*8]
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov L:RB->top, RD
+  |  mov CARG2, PC
+  |  lea CARG1, [DISPATCH+GG_DISP2J]
+  |  mov aword [DISPATCH+DISPATCH_J(L)], L:RB
+  |  mov SAVE_PC, PC
+  |  call extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
+  |  jmp <3
+  |.endif
+  |
+  |->vm_callhook:			// Dispatch target for call hooks.
+  |  mov SAVE_PC, PC
+  |.if JIT
+  |  jmp >1
+  |.endif
+  |
+  |->vm_hotcall:			// Hot call counter underflow.
+  |.if JIT
+  |  mov SAVE_PC, PC
+  |  or PC, 1				// Marker for hot call.
+  |1:
+  |.endif
+  |  lea RD, [BASE+NARGS:RD*8-8]
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov L:RB->top, RD
+  |  mov CARG2, PC
+  |  mov CARG1, L:RB
+  |  call extern lj_dispatch_call	// (lua_State *L, const BCIns *pc)
+  |  // ASMFunction returned in eax/rax (RD).
+  |  mov SAVE_PC, 0			// Invalidate for subsequent line hook.
+  |.if JIT
+  |  and PC, -2
+  |.endif
+  |  mov BASE, L:RB->base
+  |  mov RA, RD
+  |  mov RD, L:RB->top
+  |  sub RD, BASE
+  |  mov RB, RA
+  |  movzx RAd, PC_RA
+  |  shr RDd, 3
+  |  add NARGS:RDd, 1
+  |  jmp RB
+  |
+  |->cont_stitch:			// Trace stitching.
+  |.if JIT
+  |  // BASE = base, RC = result, RB = mbase
+  |  mov ITYPEd, [RB-24]		// Save previous trace number.
+  |  mov TMPRd, MULTRES
+  |  movzx RAd, PC_RA
+  |  lea RA, [BASE+RA*8]		// Call base.
+  |  sub TMPRd, 1
+  |  jz >2
+  |1:  // Move results down.
+  |  mov RB, [RC]
+  |  mov [RA], RB
+  |  add RC, 8
+  |  add RA, 8
+  |  sub TMPRd, 1
+  |  jnz <1
+  |2:
+  |  movzx RCd, PC_RA
+  |  movzx RBd, PC_RB
+  |  add RC, RB
+  |  lea RC, [BASE+RC*8-8]
+  |3:
+  |  cmp RC, RA
+  |  ja >9				// More results wanted?
+  |
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RD, [RA+ITYPE*8]
+  |  test TRACE:RD, TRACE:RD
+  |  jz ->cont_nop
+  |  movzx RDd, word TRACE:RD->link
+  |  cmp RDd, RBd
+  |  je ->cont_nop			// Blacklisted.
+  |  test RDd, RDd
+  |  jne =>BC_JLOOP			// Jump to stitched trace.
+  |
+  |  // Stitch a new trace to the previous trace.
+  |  mov [DISPATCH+DISPATCH_J(exitno)], RB
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov CARG2, PC
+  |  lea CARG1, [DISPATCH+GG_DISP2J]
+  |  mov aword [DISPATCH+DISPATCH_J(L)], L:RB
+  |  call extern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
+  |  mov BASE, L:RB->base
+  |  jmp ->cont_nop
+  |
+  |9:  // Fill up results with nil.
+  |  mov aword [RA], LJ_TNIL
+  |  add RA, 8
+  |  jmp <3
+  |.endif
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov CARG2, PC			// Caveat: CARG2 == BASE
+  |  mov CARG1, L:RB
+  |  call extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |  mov BASE, L:RB->base
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  sub PC, 4
+  |  jmp ->cont_nop
+#endif
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Trace exit handler -------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// Called from an exit stub with the exit number on the stack.
+  |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+  |->vm_exit_handler:
+  |.if JIT
+  |  push r13; push r12
+  |  push r11; push r10; push r9; push r8
+  |  push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
+  |  push rbx; push rdx; push rcx; push rax
+  |  movzx RCd, byte [rbp-8]		// Reconstruct exit number.
+  |  mov RCH, byte [rbp-16]
+  |  mov [rbp-8], r15; mov [rbp-16], r14
+  |  // Caveat: DISPATCH is rbx.
+  |  mov DISPATCH, [ebp]
+  |  mov RA, [DISPATCH+DISPATCH_GL(vmstate)]	// Get trace number.
+  |  set_vmstate EXIT
+  |  mov [DISPATCH+DISPATCH_J(exitno)], RC
+  |  mov [DISPATCH+DISPATCH_J(parent)], RA
+  |.if X64WIN
+  |  sub rsp, 16*8+4*8			// Room for SSE regs + save area.
+  |.else
+  |  sub rsp, 16*8			// Room for SSE regs.
+  |.endif
+  |  add rbp, -128
+  |  movsd qword [rbp-8],   xmm15; movsd qword [rbp-16],  xmm14
+  |  movsd qword [rbp-24],  xmm13; movsd qword [rbp-32],  xmm12
+  |  movsd qword [rbp-40],  xmm11; movsd qword [rbp-48],  xmm10
+  |  movsd qword [rbp-56],  xmm9;  movsd qword [rbp-64],  xmm8
+  |  movsd qword [rbp-72],  xmm7;  movsd qword [rbp-80],  xmm6
+  |  movsd qword [rbp-88],  xmm5;  movsd qword [rbp-96],  xmm4
+  |  movsd qword [rbp-104], xmm3;  movsd qword [rbp-112], xmm2
+  |  movsd qword [rbp-120], xmm1;  movsd qword [rbp-128], xmm0
+  |  // Caveat: RB is rbp.
+  |  mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
+  |  mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
+  |  mov aword [DISPATCH+DISPATCH_J(L)], L:RB
+  |  mov L:RB->base, BASE
+  |.if X64WIN
+  |  lea CARG2, [rsp+4*8]
+  |.else
+  |  mov CARG2, rsp
+  |.endif
+  |  lea CARG1, [DISPATCH+GG_DISP2J]
+  |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
+  |  call extern lj_trace_exit		// (jit_State *J, ExitState *ex)
+  |  // MULTRES or negated error code returned in eax (RD).
+  |  mov RA, L:RB->cframe
+  |  and RA, CFRAME_RAWMASK
+  |  mov [RA+CFRAME_OFS_L], L:RB	// Set SAVE_L (on-trace resume/yield).
+  |  mov BASE, L:RB->base
+  |  mov PC, [RA+CFRAME_OFS_PC]	// Get SAVE_PC.
+  |  jmp >1
+  |.endif
+  |->vm_exit_interp:
+  |  // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
+  |.if JIT
+  |  // Restore additional callee-save registers only used in compiled code.
+  |.if X64WIN
+  |  lea RA, [rsp+10*16+4*8]
+  |1:
+  |  movdqa xmm15, [RA-10*16]
+  |  movdqa xmm14, [RA-9*16]
+  |  movdqa xmm13, [RA-8*16]
+  |  movdqa xmm12, [RA-7*16]
+  |  movdqa xmm11, [RA-6*16]
+  |  movdqa xmm10, [RA-5*16]
+  |  movdqa xmm9, [RA-4*16]
+  |  movdqa xmm8, [RA-3*16]
+  |  movdqa xmm7, [RA-2*16]
+  |  mov rsp, RA			// Reposition stack to C frame.
+  |  movdqa xmm6, [RA-1*16]
+  |  mov r15, CSAVE_1
+  |  mov r14, CSAVE_2
+  |  mov r13, CSAVE_3
+  |  mov r12, CSAVE_4
+  |.else
+  |  lea RA, [rsp+16]
+  |1:
+  |  mov r13, [RA-8]
+  |  mov r12, [RA]
+  |  mov rsp, RA			// Reposition stack to C frame.
+  |.endif
+  |  test RDd, RDd; js >9		// Check for error from exit.
+  |  mov L:RB, SAVE_L
+  |  mov MULTRES, RDd
+  |  mov LFUNC:KBASE, [BASE-16]
+  |  cleartp LFUNC:KBASE
+  |  mov KBASE, LFUNC:KBASE->pc
+  |  mov KBASE, [KBASE+PC2PROTO(k)]
+  |  mov L:RB->base, BASE
+  |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
+  |  set_vmstate INTERP
+  |  // Modified copy of ins_next which handles function header dispatch, too.
+  |  mov RCd, [PC]
+  |  movzx RAd, RCH
+  |  movzx OP, RCL
+  |  add PC, 4
+  |  shr RCd, 16
+  |  cmp OP, BC_FUNCF			// Function header?
+  |  jb >3
+  |  cmp OP, BC_FUNCC+2			// Fast function?
+  |  jae >4
+  |2:
+  |  mov RCd, MULTRES			// RC/RD holds nres+1.
+  |3:
+  |  jmp aword [DISPATCH+OP*8]
+  |
+  |4:  // Check frame below fast function.
+  |  mov RC, [BASE-8]
+  |  test RCd, FRAME_TYPE
+  |  jnz <2				// Trace stitching continuation?
+  |  // Otherwise set KBASE for Lua function below fast function.
+  |  movzx RCd, byte [RC-3]
+  |  neg RC
+  |  mov LFUNC:KBASE, [BASE+RC*8-24]
+  |  cleartp LFUNC:KBASE
+  |  mov KBASE, LFUNC:KBASE->pc
+  |  mov KBASE, [KBASE+PC2PROTO(k)]
+  |  jmp <2
+  |
+  |9:  // Rethrow error from the right C frame.
+  |  neg RD
+  |  mov CARG1, L:RB
+  |  mov CARG2, RD
+  |  call extern lj_err_throw		// (lua_State *L, int errcode)
+  |.endif
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Math helper functions ----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// FP value rounding. Called by math.floor/math.ceil fast functions
+  |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
+  |.macro vm_round, name, mode, cond
+  |->name:
+  |->name .. _sse:
+  |  sseconst_abs xmm2, RD
+  |  sseconst_2p52 xmm3, RD
+  |  movaps xmm1, xmm0
+  |  andpd xmm1, xmm2			// |x|
+  |  ucomisd xmm3, xmm1			// No truncation if 2^52 <= |x|.
+  |  jbe >1
+  |  andnpd xmm2, xmm0			// Isolate sign bit.
+  |.if mode == 2		// trunc(x)?
+  |  movaps xmm0, xmm1
+  |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
+  |  subsd xmm1, xmm3
+  |  sseconst_1 xmm3, RD
+  |  cmpsd xmm0, xmm1, 1		// |x| < result?
+  |  andpd xmm0, xmm3
+  |  subsd xmm1, xmm0			// If yes, subtract -1.
+  |  orpd xmm1, xmm2			// Merge sign bit back in.
+  |.else
+  |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
+  |  subsd xmm1, xmm3
+  |  orpd xmm1, xmm2			// Merge sign bit back in.
+  |  .if mode == 1		// ceil(x)?
+  |    sseconst_m1 xmm2, RD		// Must subtract -1 to preserve -0.
+  |    cmpsd xmm0, xmm1, 6		// x > result?
+  |  .else			// floor(x)?
+  |    sseconst_1 xmm2, RD
+  |    cmpsd xmm0, xmm1, 1		// x < result?
+  |  .endif
+  |  andpd xmm0, xmm2
+  |  subsd xmm1, xmm0			// If yes, subtract +-1.
+  |.endif
+  |  movaps xmm0, xmm1
+  |1:
+  |  ret
+  |.endmacro
+  |
+  |  vm_round vm_floor, 0, 1
+  |  vm_round vm_ceil,  1, JIT
+  |  vm_round vm_trunc, 2, JIT
+  |
+  |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+  |->vm_mod:
+  |// Args in xmm0/xmm1, return value in xmm0.
+  |// Caveat: xmm0-xmm5 and RC (eax) modified!
+  |  movaps xmm5, xmm0
+  |  divsd xmm0, xmm1
+  |  sseconst_abs xmm2, RD
+  |  sseconst_2p52 xmm3, RD
+  |  movaps xmm4, xmm0
+  |  andpd xmm4, xmm2			// |x/y|
+  |  ucomisd xmm3, xmm4			// No truncation if 2^52 <= |x/y|.
+  |  jbe >1
+  |  andnpd xmm2, xmm0			// Isolate sign bit.
+  |  addsd xmm4, xmm3			// (|x/y| + 2^52) - 2^52
+  |  subsd xmm4, xmm3
+  |  orpd xmm4, xmm2			// Merge sign bit back in.
+  |  sseconst_1 xmm2, RD
+  |  cmpsd xmm0, xmm4, 1		// x/y < result?
+  |  andpd xmm0, xmm2
+  |  subsd xmm4, xmm0			// If yes, subtract 1.0.
+  |  movaps xmm0, xmm5
+  |  mulsd xmm1, xmm4
+  |  subsd xmm0, xmm1
+  |  ret
+  |1:
+  |  mulsd xmm1, xmm0
+  |  movaps xmm0, xmm5
+  |  subsd xmm0, xmm1
+  |  ret
+  |
+  |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
+  |->vm_powi_sse:
+  |  cmp eax, 1; jle >6			// i<=1?
+  |  // Now 1 < (unsigned)i <= 0x80000000.
+  |1:  // Handle leading zeros.
+  |  test eax, 1; jnz >2
+  |  mulsd xmm0, xmm0
+  |  shr eax, 1
+  |  jmp <1
+  |2:
+  |  shr eax, 1; jz >5
+  |  movaps xmm1, xmm0
+  |3:  // Handle trailing bits.
+  |  mulsd xmm0, xmm0
+  |  shr eax, 1; jz >4
+  |  jnc <3
+  |  mulsd xmm1, xmm0
+  |  jmp <3
+  |4:
+  |  mulsd xmm0, xmm1
+  |5:
+  |  ret
+  |6:
+  |  je <5				// x^1 ==> x
+  |  jb >7				// x^0 ==> 1
+  |  neg eax
+  |  call <1
+  |  sseconst_1 xmm1, RD
+  |  divsd xmm1, xmm0
+  |  movaps xmm0, xmm1
+  |  ret
+  |7:
+  |  sseconst_1 xmm0, RD
+  |  ret
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Miscellaneous functions --------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
+  |->vm_cpuid:
+  |  mov eax, CARG1d
+  |  .if X64WIN; push rsi; mov rsi, CARG2; .endif
+  |  push rbx
+  |  cpuid
+  |  mov [rsi], eax
+  |  mov [rsi+4], ebx
+  |  mov [rsi+8], ecx
+  |  mov [rsi+12], edx
+  |  pop rbx
+  |  .if X64WIN; pop rsi; .endif
+  |  ret
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Assertions ---------------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->assert_bad_for_arg_type:
+#ifdef LUA_USE_ASSERT
+  |  int3
+#endif
+  |  int3
+  |
+  |//-----------------------------------------------------------------------
+  |//-- FFI helper functions -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// Handler for callback functions. Callback slot number in ah/al.
+  |->vm_ffi_callback:
+  |.if FFI
+  |.type CTSTATE, CTState, PC
+  |  saveregs_	// ebp/rbp already saved. ebp now holds global_State *.
+  |  lea DISPATCH, [ebp+GG_G2DISP]
+  |  mov CTSTATE, GL:ebp->ctype_state
+  |  movzx eax, ax
+  |  mov CTSTATE->cb.slot, eax
+  |  mov CTSTATE->cb.gpr[0], CARG1
+  |  mov CTSTATE->cb.gpr[1], CARG2
+  |  mov CTSTATE->cb.gpr[2], CARG3
+  |  mov CTSTATE->cb.gpr[3], CARG4
+  |  movsd qword CTSTATE->cb.fpr[0], xmm0
+  |  movsd qword CTSTATE->cb.fpr[1], xmm1
+  |  movsd qword CTSTATE->cb.fpr[2], xmm2
+  |  movsd qword CTSTATE->cb.fpr[3], xmm3
+  |.if X64WIN
+  |  lea rax, [rsp+CFRAME_SIZE+4*8]
+  |.else
+  |  lea rax, [rsp+CFRAME_SIZE]
+  |  mov CTSTATE->cb.gpr[4], CARG5
+  |  mov CTSTATE->cb.gpr[5], CARG6
+  |  movsd qword CTSTATE->cb.fpr[4], xmm4
+  |  movsd qword CTSTATE->cb.fpr[5], xmm5
+  |  movsd qword CTSTATE->cb.fpr[6], xmm6
+  |  movsd qword CTSTATE->cb.fpr[7], xmm7
+  |.endif
+  |  mov CTSTATE->cb.stack, rax
+  |  mov CARG2, rsp
+  |  mov SAVE_PC, CTSTATE		// Any value outside of bytecode is ok.
+  |  mov CARG1, CTSTATE
+  |  call extern lj_ccallback_enter	// (CTState *cts, void *cf)
+  |  // lua_State * returned in eax (RD).
+  |  set_vmstate INTERP
+  |  mov BASE, L:RD->base
+  |  mov RD, L:RD->top
+  |  sub RD, BASE
+  |  mov LFUNC:RB, [BASE-16]
+  |  cleartp LFUNC:RB
+  |  shr RD, 3
+  |  add RD, 1
+  |  ins_callt
+  |.endif
+  |
+  |->cont_ffi_callback:			// Return from FFI callback.
+  |.if FFI
+  |  mov L:RA, SAVE_L
+  |  mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
+  |  mov aword CTSTATE->L, L:RA
+  |  mov L:RA->base, BASE
+  |  mov L:RA->top, RB
+  |  mov CARG1, CTSTATE
+  |  mov CARG2, RC
+  |  call extern lj_ccallback_leave	// (CTState *cts, TValue *o)
+  |  mov rax, CTSTATE->cb.gpr[0]
+  |  movsd xmm0, qword CTSTATE->cb.fpr[0]
+  |  jmp ->vm_leave_unw
+  |.endif
+  |
+  |->vm_ffi_call:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
+  |.if FFI
+  |  .type CCSTATE, CCallState, rbx
+  |  push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
+  |
+  |  // Readjust stack.
+  |  mov eax, CCSTATE->spadj
+  |  sub rsp, rax
+  |
+  |  // Copy stack slots.
+  |  movzx ecx, byte CCSTATE->nsp
+  |  sub ecx, 1
+  |  js >2
+  |1:
+  |  mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
+  |  mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
+  |  sub ecx, 1
+  |  jns <1
+  |2:
+  |
+  |  movzx eax, byte CCSTATE->nfpr
+  |  mov CARG1, CCSTATE->gpr[0]
+  |  mov CARG2, CCSTATE->gpr[1]
+  |  mov CARG3, CCSTATE->gpr[2]
+  |  mov CARG4, CCSTATE->gpr[3]
+  |.if not X64WIN
+  |  mov CARG5, CCSTATE->gpr[4]
+  |  mov CARG6, CCSTATE->gpr[5]
+  |.endif
+  |  test eax, eax; jz >5
+  |  movaps xmm0, CCSTATE->fpr[0]
+  |  movaps xmm1, CCSTATE->fpr[1]
+  |  movaps xmm2, CCSTATE->fpr[2]
+  |  movaps xmm3, CCSTATE->fpr[3]
+  |.if not X64WIN
+  |  cmp eax, 4; jbe >5
+  |  movaps xmm4, CCSTATE->fpr[4]
+  |  movaps xmm5, CCSTATE->fpr[5]
+  |  movaps xmm6, CCSTATE->fpr[6]
+  |  movaps xmm7, CCSTATE->fpr[7]
+  |.endif
+  |5:
+  |
+  |  call aword CCSTATE->func
+  |
+  |  mov CCSTATE->gpr[0], rax
+  |  movaps CCSTATE->fpr[0], xmm0
+  |.if not X64WIN
+  |  mov CCSTATE->gpr[1], rdx
+  |  movaps CCSTATE->fpr[1], xmm1
+  |.endif
+  |
+  |  mov rbx, [rbp-8]; leave; ret
+  |.endif
+  |// Note: vm_ffi_call must be the last function in this object file!
+  |
+  |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+  int vk = 0;
+  |// Note: aligning all instructions does not pay off.
+  |=>defop:
+
+  switch (op) {
+
+  /* -- Comparison ops ---------------------------------------------------- */
+
+  /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+  |.macro jmp_comp, lt, ge, le, gt, target
+  ||switch (op) {
+  ||case BC_ISLT:
+  |   lt target
+  ||break;
+  ||case BC_ISGE:
+  |   ge target
+  ||break;
+  ||case BC_ISLE:
+  |   le target
+  ||break;
+  ||case BC_ISGT:
+  |   gt target
+  ||break;
+  ||default: break;  /* Shut up GCC. */
+  ||}
+  |.endmacro
+
+  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+    |  // RA = src1, RD = src2, JMP with RD = target
+    |  ins_AD
+    |  mov ITYPE, [BASE+RA*8]
+    |  mov RB, [BASE+RD*8]
+    |  mov RA, ITYPE
+    |  mov RD, RB
+    |  sar ITYPE, 47
+    |  sar RB, 47
+    |.if DUALNUM
+    |  cmp ITYPEd, LJ_TISNUM; jne >7
+    |  cmp RBd, LJ_TISNUM; jne >8
+    |  add PC, 4
+    |  cmp RAd, RDd
+    |  jmp_comp jge, jl, jg, jle, >9
+    |6:
+    |  movzx RDd, PC_RD
+    |  branchPC RD
+    |9:
+    |  ins_next
+    |
+    |7:  // RA is not an integer.
+    |  ja ->vmeta_comp
+    |  // RA is a number.
+    |  cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
+    |  // RA is a number, RD is an integer.
+    |  cvtsi2sd xmm0, RDd
+    |  jmp >2
+    |
+    |8:  // RA is an integer, RD is not an integer.
+    |  ja ->vmeta_comp
+    |  // RA is an integer, RD is a number.
+    |  cvtsi2sd xmm1, RAd
+    |  movd xmm0, RD
+    |  jmp >3
+    |.else
+    |  cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
+    |  cmp RBd, LJ_TISNUM; jae ->vmeta_comp
+    |.endif
+    |1:
+    |  movd xmm0, RD
+    |2:
+    |  movd xmm1, RA
+    |3:
+    |  add PC, 4
+    |  ucomisd xmm0, xmm1
+    |  // Unordered: all of ZF CF PF set, ordered: PF clear.
+    |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+    |.if DUALNUM
+    |  jmp_comp jbe, ja, jb, jae, <9
+    |  jmp <6
+    |.else
+    |  jmp_comp jbe, ja, jb, jae, >1
+    |  movzx RDd, PC_RD
+    |  branchPC RD
+    |1:
+    |  ins_next
+    |.endif
+    break;
+
+  case BC_ISEQV: case BC_ISNEV:
+    vk = op == BC_ISEQV;
+    |  ins_AD	// RA = src1, RD = src2, JMP with RD = target
+    |  mov RB, [BASE+RD*8]
+    |  mov ITYPE, [BASE+RA*8]
+    |  add PC, 4
+    |  mov RD, RB
+    |  mov RA, ITYPE
+    |  sar RB, 47
+    |  sar ITYPE, 47
+    |.if DUALNUM
+    |  cmp RBd, LJ_TISNUM; jne >7
+    |  cmp ITYPEd, LJ_TISNUM; jne >8
+    |  cmp RDd, RAd
+    if (vk) {
+      |  jne >9
+    } else {
+      |  je >9
+    }
+    |  movzx RDd, PC_RD
+    |  branchPC RD
+    |9:
+    |  ins_next
+    |
+    |7:  // RD is not an integer.
+    |  ja >5
+    |  // RD is a number.
+    |  movd xmm1, RD
+    |  cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
+    |  // RD is a number, RA is an integer.
+    |  cvtsi2sd xmm0, RAd
+    |  jmp >2
+    |
+    |8:  // RD is an integer, RA is not an integer.
+    |  ja >5
+    |  // RD is an integer, RA is a number.
+    |  cvtsi2sd xmm1, RDd
+    |  jmp >1
+    |
+    |.else
+    |  cmp RBd, LJ_TISNUM; jae >5
+    |  cmp ITYPEd, LJ_TISNUM; jae >5
+    |  movd xmm1, RD
+    |.endif
+    |1:
+    |  movd xmm0, RA
+    |2:
+    |  ucomisd xmm0, xmm1
+    |4:
+  iseqne_fp:
+    if (vk) {
+      |  jp >2				// Unordered means not equal.
+      |  jne >2
+    } else {
+      |  jp >2				// Unordered means not equal.
+      |  je >1
+    }
+  iseqne_end:
+    if (vk) {
+      |1:				// EQ: Branch to the target.
+      |  movzx RDd, PC_RD
+      |  branchPC RD
+      |2:				// NE: Fallthrough to next instruction.
+      |.if not FFI
+      |3:
+      |.endif
+    } else {
+      |.if not FFI
+      |3:
+      |.endif
+      |2:				// NE: Branch to the target.
+      |  movzx RDd, PC_RD
+      |  branchPC RD
+      |1:				// EQ: Fallthrough to next instruction.
+    }
+    if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
+		       op == BC_ISEQN || op == BC_ISNEN)) {
+      |  jmp <9
+    } else {
+      |  ins_next
+    }
+    |
+    if (op == BC_ISEQV || op == BC_ISNEV) {
+      |5:  // Either or both types are not numbers.
+      |.if FFI
+      |  cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
+      |  cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
+      |.endif
+      |  cmp RA, RD
+      |  je <1				// Same GCobjs or pvalues?
+      |  cmp RBd, ITYPEd
+      |  jne <2				// Not the same type?
+      |  cmp RBd, LJ_TISTABUD
+      |  ja <2				// Different objects and not table/ud?
+      |
+      |  // Different tables or userdatas. Need to check __eq metamethod.
+      |  // Field metatable must be at same offset for GCtab and GCudata!
+      |  cleartp TAB:RA
+      |  mov TAB:RB, TAB:RA->metatable
+      |  test TAB:RB, TAB:RB
+      |  jz <2				// No metatable?
+      |  test byte TAB:RB->nomm, 1<<MM_eq
+      |  jnz <2				// Or 'no __eq' flag set?
+      if (vk) {
+	|  xor RBd, RBd			// ne = 0
+      } else {
+	|  mov RBd, 1			// ne = 1
+      }
+      |  jmp ->vmeta_equal		// Handle __eq metamethod.
+    } else {
+      |.if FFI
+      |3:
+      |  cmp ITYPEd, LJ_TCDATA
+      if (LJ_DUALNUM && vk) {
+	|  jne <9
+      } else {
+	|  jne <2
+      }
+      |  jmp ->vmeta_equal_cd
+      |.endif
+    }
+    break;
+  case BC_ISEQS: case BC_ISNES:
+    vk = op == BC_ISEQS;
+    |  ins_AND	// RA = src, RD = str const, JMP with RD = target
+    |  mov RB, [BASE+RA*8]
+    |  add PC, 4
+    |  checkstr RB, >3
+    |  cmp RB, [KBASE+RD*8]
+  iseqne_test:
+    if (vk) {
+      |  jne >2
+    } else {
+      |  je >1
+    }
+    goto iseqne_end;
+  case BC_ISEQN: case BC_ISNEN:
+    vk = op == BC_ISEQN;
+    |  ins_AD	// RA = src, RD = num const, JMP with RD = target
+    |  mov RB, [BASE+RA*8]
+    |  add PC, 4
+    |.if DUALNUM
+    |  checkint RB, >7
+    |  mov RD, [KBASE+RD*8]
+    |  checkint RD, >8
+    |  cmp RBd, RDd
+    if (vk) {
+      |  jne >9
+    } else {
+      |  je >9
+    }
+    |  movzx RDd, PC_RD
+    |  branchPC RD
+    |9:
+    |  ins_next
+    |
+    |7:  // RA is not an integer.
+    |  ja >3
+    |  // RA is a number.
+    |  mov RD, [KBASE+RD*8]
+    |  checkint RD, >1
+    |  // RA is a number, RD is an integer.
+    |  cvtsi2sd xmm0, RDd
+    |  jmp >2
+    |
+    |8:  // RA is an integer, RD is a number.
+    |  cvtsi2sd xmm0, RBd
+    |  movd xmm1, RD
+    |  ucomisd xmm0, xmm1
+    |  jmp >4
+    |1:
+    |  movd xmm0, RD
+    |.else
+    |  checknum RB, >3
+    |1:
+    |  movsd xmm0, qword [KBASE+RD*8]
+    |.endif
+    |2:
+    |  ucomisd xmm0, qword [BASE+RA*8]
+    |4:
+    goto iseqne_fp;
+  case BC_ISEQP: case BC_ISNEP:
+    vk = op == BC_ISEQP;
+    |  ins_AND	// RA = src, RD = primitive type (~), JMP with RD = target
+    |  mov RB, [BASE+RA*8]
+    |  sar RB, 47
+    |  add PC, 4
+    |  cmp RBd, RDd
+    if (!LJ_HASFFI) goto iseqne_test;
+    if (vk) {
+      |  jne >3
+      |  movzx RDd, PC_RD
+      |  branchPC RD
+      |2:
+      |  ins_next
+      |3:
+      |  cmp RBd, LJ_TCDATA; jne <2
+      |  jmp ->vmeta_equal_cd
+    } else {
+      |  je >2
+      |  cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
+      |  movzx RDd, PC_RD
+      |  branchPC RD
+      |2:
+      |  ins_next
+    }
+    break;
+
+  /* -- Unary test and copy ops ------------------------------------------- */
+
+  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+    |  ins_AD	// RA = dst or unused, RD = src, JMP with RD = target
+    |  mov ITYPE, [BASE+RD*8]
+    |  add PC, 4
+    if (op == BC_ISTC || op == BC_ISFC) {
+      |  mov RB, ITYPE
+    }
+    |  sar ITYPE, 47
+    |  cmp ITYPEd, LJ_TISTRUECOND
+    if (op == BC_IST || op == BC_ISTC) {
+      |  jae >1
+    } else {
+      |  jb >1
+    }
+    if (op == BC_ISTC || op == BC_ISFC) {
+      |  mov [BASE+RA*8], RB
+    }
+    |  movzx RDd, PC_RD
+    |  branchPC RD
+    |1:					// Fallthrough to the next instruction.
+    |  ins_next
+    break;
+
+  case BC_ISTYPE:
+    |  ins_AD	// RA = src, RD = -type
+    |  mov RB, [BASE+RA*8]
+    |  sar RB, 47
+    |  add RBd, RDd
+    |  jne ->vmeta_istype
+    |  ins_next
+    break;
+  case BC_ISNUM:
+    |  ins_AD	// RA = src, RD = -(TISNUM-1)
+    |  checknumtp [BASE+RA*8], ->vmeta_istype
+    |  ins_next
+    break;
+
+  /* -- Unary ops --------------------------------------------------------- */
+
+  case BC_MOV:
+    |  ins_AD	// RA = dst, RD = src
+    |  mov RB, [BASE+RD*8]
+    |  mov [BASE+RA*8], RB
+    |  ins_next_
+    break;
+  case BC_NOT:
+    |  ins_AD	// RA = dst, RD = src
+    |  mov RB, [BASE+RD*8]
+    |  sar RB, 47
+    |  mov RCd, 2
+    |  cmp RB, LJ_TISTRUECOND
+    |  sbb RCd, 0
+    |  shl RC, 47
+    |  not RC
+    |  mov [BASE+RA*8], RC
+    |  ins_next
+    break;
+  case BC_UNM:
+    |  ins_AD	// RA = dst, RD = src
+    |  mov RB, [BASE+RD*8]
+    |.if DUALNUM
+    |  checkint RB, >5
+    |  neg RBd
+    |  jo >4
+    |  setint RB
+    |9:
+    |  mov [BASE+RA*8], RB
+    |  ins_next
+    |4:
+    |  mov64 RB, U64x(41e00000,00000000)  // 2^31.
+    |  jmp <9
+    |5:
+    |  ja ->vmeta_unm
+    |.else
+    |  checknum RB, ->vmeta_unm
+    |.endif
+    |  mov64 RD, U64x(80000000,00000000)
+    |  xor RB, RD
+    |.if DUALNUM
+    |  jmp <9
+    |.else
+    |  mov [BASE+RA*8], RB
+    |  ins_next
+    |.endif
+    break;
+  case BC_LEN:
+    |  ins_AD	// RA = dst, RD = src
+    |  mov RD, [BASE+RD*8]
+    |  checkstr RD, >2
+    |.if DUALNUM
+    |  mov RDd, dword STR:RD->len
+    |1:
+    |  setint RD
+    |  mov [BASE+RA*8], RD
+    |.else
+    |  xorps xmm0, xmm0
+    |  cvtsi2sd xmm0, dword STR:RD->len
+    |1:
+    |  movsd qword [BASE+RA*8], xmm0
+    |.endif
+    |  ins_next
+    |2:
+    |  cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
+    |  mov TAB:CARG1, TAB:RD
+#if LJ_52
+    |  mov TAB:RB, TAB:RD->metatable
+    |  cmp TAB:RB, 0
+    |  jnz >9
+    |3:
+#endif
+    |->BC_LEN_Z:
+    |  mov RB, BASE			// Save BASE.
+    |  call extern lj_tab_len		// (GCtab *t)
+    |  // Length of table returned in eax (RD).
+    |.if DUALNUM
+    |  // Nothing to do.
+    |.else
+    |  cvtsi2sd xmm0, RDd
+    |.endif
+    |  mov BASE, RB			// Restore BASE.
+    |  movzx RAd, PC_RA
+    |  jmp <1
+#if LJ_52
+    |9:  // Check for __len.
+    |  test byte TAB:RB->nomm, 1<<MM_len
+    |  jnz <3
+    |  jmp ->vmeta_len			// 'no __len' flag NOT set: check.
+#endif
+    break;
+
+  /* -- Binary ops -------------------------------------------------------- */
+
+    |.macro ins_arithpre, sseins, ssereg
+    |  ins_ABC
+    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+    ||switch (vk) {
+    ||case 0:
+    |   checknumtp [BASE+RB*8], ->vmeta_arith_vn
+    |   .if DUALNUM
+    |     checknumtp [KBASE+RC*8], ->vmeta_arith_vn
+    |   .endif
+    |   movsd xmm0, qword [BASE+RB*8]
+    |   sseins ssereg, qword [KBASE+RC*8]
+    ||  break;
+    ||case 1:
+    |   checknumtp [BASE+RB*8], ->vmeta_arith_nv
+    |   .if DUALNUM
+    |     checknumtp [KBASE+RC*8], ->vmeta_arith_nv
+    |   .endif
+    |   movsd xmm0, qword [KBASE+RC*8]
+    |   sseins ssereg, qword [BASE+RB*8]
+    ||  break;
+    ||default:
+    |   checknumtp [BASE+RB*8], ->vmeta_arith_vv
+    |   checknumtp [BASE+RC*8], ->vmeta_arith_vv
+    |   movsd xmm0, qword [BASE+RB*8]
+    |   sseins ssereg, qword [BASE+RC*8]
+    ||  break;
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arithdn, intins
+    |  ins_ABC
+    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+    ||switch (vk) {
+    ||case 0:
+    |   mov RB, [BASE+RB*8]
+    |   mov RC, [KBASE+RC*8]
+    |   checkint RB, ->vmeta_arith_vno
+    |   checkint RC, ->vmeta_arith_vno
+    |   intins RBd, RCd; jo ->vmeta_arith_vno
+    ||  break;
+    ||case 1:
+    |   mov RB, [BASE+RB*8]
+    |   mov RC, [KBASE+RC*8]
+    |   checkint RB, ->vmeta_arith_nvo
+    |   checkint RC, ->vmeta_arith_nvo
+    |   intins RCd, RBd; jo ->vmeta_arith_nvo
+    ||  break;
+    ||default:
+    |   mov RB, [BASE+RB*8]
+    |   mov RC, [BASE+RC*8]
+    |   checkint RB, ->vmeta_arith_vvo
+    |   checkint RC, ->vmeta_arith_vvo
+    |   intins RBd, RCd; jo ->vmeta_arith_vvo
+    ||  break;
+    ||}
+    ||if (vk == 1) {
+    |   setint RC
+    |   mov [BASE+RA*8], RC
+    ||} else {
+    |   setint RB
+    |   mov [BASE+RA*8], RB
+    ||}
+    |  ins_next
+    |.endmacro
+    |
+    |.macro ins_arithpost
+    |  movsd qword [BASE+RA*8], xmm0
+    |.endmacro
+    |
+    |.macro ins_arith, sseins
+    |  ins_arithpre sseins, xmm0
+    |  ins_arithpost
+    |  ins_next
+    |.endmacro
+    |
+    |.macro ins_arith, intins, sseins
+    |.if DUALNUM
+    |  ins_arithdn intins
+    |.else
+    |  ins_arith, sseins
+    |.endif
+    |.endmacro
+
+    |  // RA = dst, RB = src1 or num const, RC = src2 or num const
+  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+    |  ins_arith add, addsd
+    break;
+  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+    |  ins_arith sub, subsd
+    break;
+  case BC_MULVN: case BC_MULNV: case BC_MULVV:
+    |  ins_arith imul, mulsd
+    break;
+  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+    |  ins_arith divsd
+    break;
+  case BC_MODVN:
+    |  ins_arithpre movsd, xmm1
+    |->BC_MODVN_Z:
+    |  call ->vm_mod
+    |  ins_arithpost
+    |  ins_next
+    break;
+  case BC_MODNV: case BC_MODVV:
+    |  ins_arithpre movsd, xmm1
+    |  jmp ->BC_MODVN_Z			// Avoid 3 copies. It's slow anyway.
+    break;
+  case BC_POW:
+    |  ins_arithpre movsd, xmm1
+    |  mov RB, BASE
+    |  call extern pow
+    |  movzx RAd, PC_RA
+    |  mov BASE, RB
+    |  ins_arithpost
+    |  ins_next
+    break;
+
+  case BC_CAT:
+    |  ins_ABC	// RA = dst, RB = src_start, RC = src_end
+    |  mov L:CARG1, SAVE_L
+    |  mov L:CARG1->base, BASE
+    |  lea CARG2, [BASE+RC*8]
+    |  mov CARG3d, RCd
+    |  sub CARG3d, RBd
+    |->BC_CAT_Z:
+    |  mov L:RB, L:CARG1
+    |  mov SAVE_PC, PC
+    |  call extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
+    |  // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+    |  mov BASE, L:RB->base
+    |  test RC, RC
+    |  jnz ->vmeta_binop
+    |  movzx RBd, PC_RB			// Copy result to Stk[RA] from Stk[RB].
+    |  movzx RAd, PC_RA
+    |  mov RC, [BASE+RB*8]
+    |  mov [BASE+RA*8], RC
+    |  ins_next
+    break;
+
+  /* -- Constant ops ------------------------------------------------------ */
+
+  case BC_KSTR:
+    |  ins_AND	// RA = dst, RD = str const (~)
+    |  mov RD, [KBASE+RD*8]
+    |  settp RD, LJ_TSTR
+    |  mov [BASE+RA*8], RD
+    |  ins_next
+    break;
+  case BC_KCDATA:
+    |.if FFI
+    |  ins_AND	// RA = dst, RD = cdata const (~)
+    |  mov RD, [KBASE+RD*8]
+    |  settp RD, LJ_TCDATA
+    |  mov [BASE+RA*8], RD
+    |  ins_next
+    |.endif
+    break;
+  case BC_KSHORT:
+    |  ins_AD	// RA = dst, RD = signed int16 literal
+    |.if DUALNUM
+    |  movsx RDd, RDW
+    |  setint RD
+    |  mov [BASE+RA*8], RD
+    |.else
+    |  movsx RDd, RDW			// Sign-extend literal.
+    |  cvtsi2sd xmm0, RDd
+    |  movsd qword [BASE+RA*8], xmm0
+    |.endif
+    |  ins_next
+    break;
+  case BC_KNUM:
+    |  ins_AD	// RA = dst, RD = num const
+    |  movsd xmm0, qword [KBASE+RD*8]
+    |  movsd qword [BASE+RA*8], xmm0
+    |  ins_next
+    break;
+  case BC_KPRI:
+    |  ins_AD	// RA = dst, RD = primitive type (~)
+    |  shl RD, 47
+    |  not RD
+    |  mov [BASE+RA*8], RD
+    |  ins_next
+    break;
+  case BC_KNIL:
+    |  ins_AD	// RA = dst_start, RD = dst_end
+    |  lea RA, [BASE+RA*8+8]
+    |  lea RD, [BASE+RD*8]
+    |  mov RB, LJ_TNIL
+    |  mov [RA-8], RB			// Sets minimum 2 slots.
+    |1:
+    |  mov [RA], RB
+    |  add RA, 8
+    |  cmp RA, RD
+    |  jbe <1
+    |  ins_next
+    break;
+
+  /* -- Upvalue and function ops ------------------------------------------ */
+
+  case BC_UGET:
+    |  ins_AD	// RA = dst, RD = upvalue #
+    |  mov LFUNC:RB, [BASE-16]
+    |  cleartp LFUNC:RB
+    |  mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
+    |  mov RB, UPVAL:RB->v
+    |  mov RD, [RB]
+    |  mov [BASE+RA*8], RD
+    |  ins_next
+    break;
+  case BC_USETV:
+#define TV2MARKOFS \
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
+    |  ins_AD	// RA = upvalue #, RD = src
+    |  mov LFUNC:RB, [BASE-16]
+    |  cleartp LFUNC:RB
+    |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+    |  cmp byte UPVAL:RB->closed, 0
+    |  mov RB, UPVAL:RB->v
+    |  mov RA, [BASE+RD*8]
+    |  mov [RB], RA
+    |  jz >1
+    |  // Check barrier for closed upvalue.
+    |  test byte [RB+TV2MARKOFS], LJ_GC_BLACK		// isblack(uv)
+    |  jnz >2
+    |1:
+    |  ins_next
+    |
+    |2:  // Upvalue is black. Check if new value is collectable and white.
+    |  mov RD, RA
+    |  sar RD, 47
+    |  sub RDd, LJ_TISGCV
+    |  cmp RDd, LJ_TNUMX - LJ_TISGCV			// tvisgcv(v)
+    |  jbe <1
+    |  cleartp GCOBJ:RA
+    |  test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(v)
+    |  jz <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |.if not X64WIN
+    |  mov CARG2, RB
+    |  mov RB, BASE			// Save BASE.
+    |.else
+    |  xchg CARG2, RB			// Save BASE (CARG2 == BASE).
+    |.endif
+    |  lea GL:CARG1, [DISPATCH+GG_DISP2G]
+    |  call extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  mov BASE, RB			// Restore BASE.
+    |  jmp <1
+    break;
+#undef TV2MARKOFS
+  case BC_USETS:
+    |  ins_AND	// RA = upvalue #, RD = str const (~)
+    |  mov LFUNC:RB, [BASE-16]
+    |  cleartp LFUNC:RB
+    |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+    |  mov STR:RA, [KBASE+RD*8]
+    |  mov RD, UPVAL:RB->v
+    |  settp STR:ITYPE, STR:RA, LJ_TSTR
+    |  mov [RD], STR:ITYPE
+    |  test byte UPVAL:RB->marked, LJ_GC_BLACK		// isblack(uv)
+    |  jnz >2
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if string is white and ensure upvalue is closed.
+    |  test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(str)
+    |  jz <1
+    |  cmp byte UPVAL:RB->closed, 0
+    |  jz <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |  mov RB, BASE			// Save BASE (CARG2 == BASE).
+    |  mov CARG2, RD
+    |  lea GL:CARG1, [DISPATCH+GG_DISP2G]
+    |  call extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  mov BASE, RB			// Restore BASE.
+    |  jmp <1
+    break;
+  case BC_USETN:
+    |  ins_AD	// RA = upvalue #, RD = num const
+    |  mov LFUNC:RB, [BASE-16]
+    |  cleartp LFUNC:RB
+    |  movsd xmm0, qword [KBASE+RD*8]
+    |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+    |  mov RA, UPVAL:RB->v
+    |  movsd qword [RA], xmm0
+    |  ins_next
+    break;
+  case BC_USETP:
+    |  ins_AD	// RA = upvalue #, RD = primitive type (~)
+    |  mov LFUNC:RB, [BASE-16]
+    |  cleartp LFUNC:RB
+    |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+    |  shl RD, 47
+    |  not RD
+    |  mov RA, UPVAL:RB->v
+    |  mov [RA], RD
+    |  ins_next
+    break;
+  case BC_UCLO:
+    |  ins_AD	// RA = level, RD = target
+    |  branchPC RD			// Do this first to free RD.
+    |  mov L:RB, SAVE_L
+    |  cmp dword L:RB->openupval, 0
+    |  je >1
+    |  mov L:RB->base, BASE
+    |  lea CARG2, [BASE+RA*8]		// Caveat: CARG2 == BASE
+    |  mov L:CARG1, L:RB		// Caveat: CARG1 == RA
+    |  call extern lj_func_closeuv	// (lua_State *L, TValue *level)
+    |  mov BASE, L:RB->base
+    |1:
+    |  ins_next
+    break;
+
+  case BC_FNEW:
+    |  ins_AND	// RA = dst, RD = proto const (~) (holding function prototype)
+    |  mov L:RB, SAVE_L
+    |  mov L:RB->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
+    |  mov CARG3, [BASE-16]
+    |  cleartp CARG3
+    |  mov CARG2, [KBASE+RD*8]		// Fetch GCproto *.
+    |  mov CARG1, L:RB
+    |  mov SAVE_PC, PC
+    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
+    |  call extern lj_func_newL_gc
+    |  // GCfuncL * returned in eax (RC).
+    |  mov BASE, L:RB->base
+    |  movzx RAd, PC_RA
+    |  settp LFUNC:RC, LJ_TFUNC
+    |  mov [BASE+RA*8], LFUNC:RC
+    |  ins_next
+    break;
+
+  /* -- Table ops --------------------------------------------------------- */
+
+  case BC_TNEW:
+    |  ins_AD	// RA = dst, RD = hbits|asize
+    |  mov L:RB, SAVE_L
+    |  mov L:RB->base, BASE
+    |  mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+    |  cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+    |  mov SAVE_PC, PC
+    |  jae >5
+    |1:
+    |  mov CARG3d, RDd
+    |  and RDd, 0x7ff
+    |  shr CARG3d, 11
+    |  cmp RDd, 0x7ff
+    |  je >3
+    |2:
+    |  mov L:CARG1, L:RB
+    |  mov CARG2d, RDd
+    |  call extern lj_tab_new  // (lua_State *L, int32_t asize, uint32_t hbits)
+    |  // Table * returned in eax (RC).
+    |  mov BASE, L:RB->base
+    |  movzx RAd, PC_RA
+    |  settp TAB:RC, LJ_TTAB
+    |  mov [BASE+RA*8], TAB:RC
+    |  ins_next
+    |3:  // Turn 0x7ff into 0x801.
+    |  mov RDd, 0x801
+    |  jmp <2
+    |5:
+    |  mov L:CARG1, L:RB
+    |  call extern lj_gc_step_fixtop	// (lua_State *L)
+    |  movzx RDd, PC_RD
+    |  jmp <1
+    break;
+  case BC_TDUP:
+    |  ins_AND	// RA = dst, RD = table const (~) (holding template table)
+    |  mov L:RB, SAVE_L
+    |  mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+    |  mov SAVE_PC, PC
+    |  cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+    |  mov L:RB->base, BASE
+    |  jae >3
+    |2:
+    |  mov TAB:CARG2, [KBASE+RD*8]	// Caveat: CARG2 == BASE
+    |  mov L:CARG1, L:RB		// Caveat: CARG1 == RA
+    |  call extern lj_tab_dup		// (lua_State *L, Table *kt)
+    |  // Table * returned in eax (RC).
+    |  mov BASE, L:RB->base
+    |  movzx RAd, PC_RA
+    |  settp TAB:RC, LJ_TTAB
+    |  mov [BASE+RA*8], TAB:RC
+    |  ins_next
+    |3:
+    |  mov L:CARG1, L:RB
+    |  call extern lj_gc_step_fixtop	// (lua_State *L)
+    |  movzx RDd, PC_RD			// Need to reload RD.
+    |  not RD
+    |  jmp <2
+    break;
+
+  case BC_GGET:
+    |  ins_AND	// RA = dst, RD = str const (~)
+    |  mov LFUNC:RB, [BASE-16]
+    |  cleartp LFUNC:RB
+    |  mov TAB:RB, LFUNC:RB->env
+    |  mov STR:RC, [KBASE+RD*8]
+    |  jmp ->BC_TGETS_Z
+    break;
+  case BC_GSET:
+    |  ins_AND	// RA = src, RD = str const (~)
+    |  mov LFUNC:RB, [BASE-16]
+    |  cleartp LFUNC:RB
+    |  mov TAB:RB, LFUNC:RB->env
+    |  mov STR:RC, [KBASE+RD*8]
+    |  jmp ->BC_TSETS_Z
+    break;
+
+  case BC_TGETV:
+    |  ins_ABC	// RA = dst, RB = table, RC = key
+    |  mov TAB:RB, [BASE+RB*8]
+    |  mov RC, [BASE+RC*8]
+    |  checktab TAB:RB, ->vmeta_tgetv
+    |
+    |  // Integer key?
+    |.if DUALNUM
+    |  checkint RC, >5
+    |.else
+    |  // Convert number to int and back and compare.
+    |  checknum RC, >5
+    |  movd xmm0, RC
+    |  cvttsd2si RCd, xmm0
+    |  cvtsi2sd xmm1, RCd
+    |  ucomisd xmm0, xmm1
+    |  jne ->vmeta_tgetv		// Generic numeric key? Use fallback.
+    |.endif
+    |  cmp RCd, TAB:RB->asize		// Takes care of unordered, too.
+    |  jae ->vmeta_tgetv		// Not in array part? Use fallback.
+    |  shl RCd, 3
+    |  add RC, TAB:RB->array
+    |  // Get array slot.
+    |  mov ITYPE, [RC]
+    |  cmp ITYPE, LJ_TNIL		// Avoid overwriting RB in fastpath.
+    |  je >2
+    |1:
+    |  mov [BASE+RA*8], ITYPE
+    |  ins_next
+    |
+    |2:  // Check for __index if table value is nil.
+    |  mov TAB:TMPR, TAB:RB->metatable
+    |  test TAB:TMPR, TAB:TMPR
+    |  jz <1
+    |  test byte TAB:TMPR->nomm, 1<<MM_index
+    |  jz ->vmeta_tgetv			// 'no __index' flag NOT set: check.
+    |  jmp <1
+    |
+    |5:  // String key?
+    |  cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
+    |  cleartp STR:RC
+    |  jmp ->BC_TGETS_Z
+    break;
+  case BC_TGETS:
+    |  ins_ABC	// RA = dst, RB = table, RC = str const (~)
+    |  mov TAB:RB, [BASE+RB*8]
+    |  not RC
+    |  mov STR:RC, [KBASE+RC*8]
+    |  checktab TAB:RB, ->vmeta_tgets
+    |->BC_TGETS_Z:	// RB = GCtab *, RC = GCstr *
+    |  mov TMPRd, TAB:RB->hmask
+    |  and TMPRd, STR:RC->hash
+    |  imul TMPRd, #NODE
+    |  add NODE:TMPR, TAB:RB->node
+    |  settp ITYPE, STR:RC, LJ_TSTR
+    |1:
+    |  cmp NODE:TMPR->key, ITYPE
+    |  jne >4
+    |  // Get node value.
+    |  mov ITYPE, NODE:TMPR->val
+    |  cmp ITYPE, LJ_TNIL
+    |  je >5				// Key found, but nil value?
+    |2:
+    |  mov [BASE+RA*8], ITYPE
+    |  ins_next
+    |
+    |4:  // Follow hash chain.
+    |  mov NODE:TMPR, NODE:TMPR->next
+    |  test NODE:TMPR, NODE:TMPR
+    |  jnz <1
+    |  // End of hash chain: key not found, nil result.
+    |  mov ITYPE, LJ_TNIL
+    |
+    |5:  // Check for __index if table value is nil.
+    |  mov TAB:TMPR, TAB:RB->metatable
+    |  test TAB:TMPR, TAB:TMPR
+    |  jz <2				// No metatable: done.
+    |  test byte TAB:TMPR->nomm, 1<<MM_index
+    |  jnz <2				// 'no __index' flag set: done.
+    |  jmp ->vmeta_tgets		// Caveat: preserve STR:RC.
+    break;
+  case BC_TGETB:
+    |  ins_ABC	// RA = dst, RB = table, RC = byte literal
+    |  mov TAB:RB, [BASE+RB*8]
+    |  checktab TAB:RB, ->vmeta_tgetb
+    |  cmp RCd, TAB:RB->asize
+    |  jae ->vmeta_tgetb
+    |  shl RCd, 3
+    |  add RC, TAB:RB->array
+    |  // Get array slot.
+    |  mov ITYPE, [RC]
+    |  cmp ITYPE, LJ_TNIL
+    |  je >2
+    |1:
+    |  mov [BASE+RA*8], ITYPE
+    |  ins_next
+    |
+    |2:  // Check for __index if table value is nil.
+    |  mov TAB:TMPR, TAB:RB->metatable
+    |  test TAB:TMPR, TAB:TMPR
+    |  jz <1
+    |  test byte TAB:TMPR->nomm, 1<<MM_index
+    |  jz ->vmeta_tgetb			// 'no __index' flag NOT set: check.
+    |  jmp <1
+    break;
+  case BC_TGETR:
+    |  ins_ABC	// RA = dst, RB = table, RC = key
+    |  mov TAB:RB, [BASE+RB*8]
+    |  cleartp TAB:RB
+    |.if DUALNUM
+    |  mov RCd, dword [BASE+RC*8]
+    |.else
+    |  cvttsd2si RCd, qword [BASE+RC*8]
+    |.endif
+    |  cmp RCd, TAB:RB->asize
+    |  jae ->vmeta_tgetr		// Not in array part? Use fallback.
+    |  shl RCd, 3
+    |  add RC, TAB:RB->array
+    |  // Get array slot.
+    |->BC_TGETR_Z:
+    |  mov ITYPE, [RC]
+    |->BC_TGETR2_Z:
+    |  mov [BASE+RA*8], ITYPE
+    |  ins_next
+    break;
+
+  case BC_TSETV:
+    |  ins_ABC	// RA = src, RB = table, RC = key
+    |  mov TAB:RB, [BASE+RB*8]
+    |  mov RC, [BASE+RC*8]
+    |  checktab TAB:RB, ->vmeta_tsetv
+    |
+    |  // Integer key?
+    |.if DUALNUM
+    |  checkint RC, >5
+    |.else
+    |  // Convert number to int and back and compare.
+    |  checknum RC, >5
+    |  movd xmm0, RC
+    |  cvttsd2si RCd, xmm0
+    |  cvtsi2sd xmm1, RCd
+    |  ucomisd xmm0, xmm1
+    |  jne ->vmeta_tsetv		// Generic numeric key? Use fallback.
+    |.endif
+    |  cmp RCd, TAB:RB->asize		// Takes care of unordered, too.
+    |  jae ->vmeta_tsetv
+    |  shl RCd, 3
+    |  add RC, TAB:RB->array
+    |  cmp aword [RC], LJ_TNIL
+    |  je >3				// Previous value is nil?
+    |1:
+    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jnz >7
+    |2:  // Set array slot.
+    |  mov RB, [BASE+RA*8]
+    |  mov [RC], RB
+    |  ins_next
+    |
+    |3:  // Check for __newindex if previous value is nil.
+    |  mov TAB:TMPR, TAB:RB->metatable
+    |  test TAB:TMPR, TAB:TMPR
+    |  jz <1
+    |  test byte TAB:TMPR->nomm, 1<<MM_newindex
+    |  jz ->vmeta_tsetv			// 'no __newindex' flag NOT set: check.
+    |  jmp <1
+    |
+    |5:  // String key?
+    |  cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
+    |  cleartp STR:RC
+    |  jmp ->BC_TSETS_Z
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMPR
+    |  jmp <2
+    break;
+  case BC_TSETS:
+    |  ins_ABC	// RA = src, RB = table, RC = str const (~)
+    |  mov TAB:RB, [BASE+RB*8]
+    |  not RC
+    |  mov STR:RC, [KBASE+RC*8]
+    |  checktab TAB:RB, ->vmeta_tsets
+    |->BC_TSETS_Z:	// RB = GCtab *, RC = GCstr *
+    |  mov TMPRd, TAB:RB->hmask
+    |  and TMPRd, STR:RC->hash
+    |  imul TMPRd, #NODE
+    |  mov byte TAB:RB->nomm, 0		// Clear metamethod cache.
+    |  add NODE:TMPR, TAB:RB->node
+    |  settp ITYPE, STR:RC, LJ_TSTR
+    |1:
+    |  cmp NODE:TMPR->key, ITYPE
+    |  jne >5
+    |  // Ok, key found. Assumes: offsetof(Node, val) == 0
+    |  cmp aword [TMPR], LJ_TNIL
+    |  je >4				// Previous value is nil?
+    |2:
+    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jnz >7
+    |3:  // Set node value.
+    |  mov ITYPE, [BASE+RA*8]
+    |  mov [TMPR], ITYPE
+    |  ins_next
+    |
+    |4:  // Check for __newindex if previous value is nil.
+    |  mov TAB:ITYPE, TAB:RB->metatable
+    |  test TAB:ITYPE, TAB:ITYPE
+    |  jz <2
+    |  test byte TAB:ITYPE->nomm, 1<<MM_newindex
+    |  jz ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
+    |  jmp <2
+    |
+    |5:  // Follow hash chain.
+    |  mov NODE:TMPR, NODE:TMPR->next
+    |  test NODE:TMPR, NODE:TMPR
+    |  jnz <1
+    |  // End of hash chain: key not found, add a new one.
+    |
+    |  // But check for __newindex first.
+    |  mov TAB:TMPR, TAB:RB->metatable
+    |  test TAB:TMPR, TAB:TMPR
+    |  jz >6				// No metatable: continue.
+    |  test byte TAB:TMPR->nomm, 1<<MM_newindex
+    |  jz ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
+    |6:
+    |  mov TMP1, ITYPE
+    |  mov L:CARG1, SAVE_L
+    |  mov L:CARG1->base, BASE
+    |  lea CARG3, TMP1
+    |  mov CARG2, TAB:RB
+    |  mov SAVE_PC, PC
+    |  call extern lj_tab_newkey	// (lua_State *L, GCtab *t, TValue *k)
+    |  // Handles write barrier for the new key. TValue * returned in eax (RC).
+    |  mov L:CARG1, SAVE_L
+    |  mov BASE, L:CARG1->base
+    |  mov TMPR, rax
+    |  movzx RAd, PC_RA
+    |  jmp <2				// Must check write barrier for value.
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, ITYPE
+    |  jmp <3
+    break;
+  case BC_TSETB:
+    |  ins_ABC	// RA = src, RB = table, RC = byte literal
+    |  mov TAB:RB, [BASE+RB*8]
+    |  checktab TAB:RB, ->vmeta_tsetb
+    |  cmp RCd, TAB:RB->asize
+    |  jae ->vmeta_tsetb
+    |  shl RCd, 3
+    |  add RC, TAB:RB->array
+    |  cmp aword [RC], LJ_TNIL
+    |  je >3				// Previous value is nil?
+    |1:
+    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jnz >7
+    |2:	 // Set array slot.
+    |  mov ITYPE, [BASE+RA*8]
+    |  mov [RC], ITYPE
+    |  ins_next
+    |
+    |3:  // Check for __newindex if previous value is nil.
+    |  mov TAB:TMPR, TAB:RB->metatable
+    |  test TAB:TMPR, TAB:TMPR
+    |  jz <1
+    |  test byte TAB:TMPR->nomm, 1<<MM_newindex
+    |  jz ->vmeta_tsetb			// 'no __newindex' flag NOT set: check.
+    |  jmp <1
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMPR
+    |  jmp <2
+    break;
+  case BC_TSETR:
+    |  ins_ABC	// RA = src, RB = table, RC = key
+    |  mov TAB:RB, [BASE+RB*8]
+    |  cleartp TAB:RB
+    |.if DUALNUM
+    |  mov RC, [BASE+RC*8]
+    |.else
+    |  cvttsd2si RCd, qword [BASE+RC*8]
+    |.endif
+    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jnz >7
+    |2:
+    |  cmp RCd, TAB:RB->asize
+    |  jae ->vmeta_tsetr
+    |  shl RCd, 3
+    |  add RC, TAB:RB->array
+    |  // Set array slot.
+    |->BC_TSETR_Z:
+    |  mov ITYPE, [BASE+RA*8]
+    |  mov [RC], ITYPE
+    |  ins_next
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMPR
+    |  jmp <2
+    break;
+
+  case BC_TSETM:
+    |  ins_AD	// RA = base (table at base-1), RD = num const (start index)
+    |1:
+    |  mov TMPRd, dword [KBASE+RD*8]	// Integer constant is in lo-word.
+    |  lea RA, [BASE+RA*8]
+    |  mov TAB:RB, [RA-8]		// Guaranteed to be a table.
+    |  cleartp TAB:RB
+    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jnz >7
+    |2:
+    |  mov RDd, MULTRES
+    |  sub RDd, 1
+    |  jz >4				// Nothing to copy?
+    |  add RDd, TMPRd			// Compute needed size.
+    |  cmp RDd, TAB:RB->asize
+    |  ja >5				// Doesn't fit into array part?
+    |  sub RDd, TMPRd
+    |  shl TMPRd, 3
+    |  add TMPR, TAB:RB->array
+    |3:  // Copy result slots to table.
+    |  mov RB, [RA]
+    |  add RA, 8
+    |  mov [TMPR], RB
+    |  add TMPR, 8
+    |  sub RDd, 1
+    |  jnz <3
+    |4:
+    |  ins_next
+    |
+    |5:  // Need to resize array part.
+    |  mov L:CARG1, SAVE_L
+    |  mov L:CARG1->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
+    |  mov CARG2, TAB:RB
+    |  mov CARG3d, RDd
+    |  mov L:RB, L:CARG1
+    |  mov SAVE_PC, PC
+    |  call extern lj_tab_reasize	// (lua_State *L, GCtab *t, int nasize)
+    |  mov BASE, L:RB->base
+    |  movzx RAd, PC_RA			// Restore RA.
+    |  movzx RDd, PC_RD			// Restore RD.
+    |  jmp <1				// Retry.
+    |
+    |7:  // Possible table write barrier for any value. Skip valiswhite check.
+    |  barrierback TAB:RB, RD
+    |  jmp <2
+    break;
+
+  /* -- Calls and vararg handling ----------------------------------------- */
+
+  case BC_CALL: case BC_CALLM:
+    |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    if (op == BC_CALLM) {
+      |  add NARGS:RDd, MULTRES
+    }
+    |  mov LFUNC:RB, [BASE+RA*8]
+    |  checkfunc LFUNC:RB, ->vmeta_call_ra
+    |  lea BASE, [BASE+RA*8+16]
+    |  ins_call
+    break;
+
+  case BC_CALLMT:
+    |  ins_AD	// RA = base, RD = extra_nargs
+    |  add NARGS:RDd, MULTRES
+    |  // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
+    break;
+  case BC_CALLT:
+    |  ins_AD	// RA = base, RD = nargs+1
+    |  lea RA, [BASE+RA*8+16]
+    |  mov KBASE, BASE			// Use KBASE for move + vmeta_call hint.
+    |  mov LFUNC:RB, [RA-16]
+    |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
+    |->BC_CALLT_Z:
+    |  mov PC, [BASE-8]
+    |  test PCd, FRAME_TYPE
+    |  jnz >7
+    |1:
+    |  mov [BASE-16], LFUNC:RB		// Copy func+tag down, reloaded below.
+    |  mov MULTRES, NARGS:RDd
+    |  sub NARGS:RDd, 1
+    |  jz >3
+    |2:  // Move args down.
+    |  mov RB, [RA]
+    |  add RA, 8
+    |  mov [KBASE], RB
+    |  add KBASE, 8
+    |  sub NARGS:RDd, 1
+    |  jnz <2
+    |
+    |  mov LFUNC:RB, [BASE-16]
+    |3:
+    |  cleartp LFUNC:RB
+    |  mov NARGS:RDd, MULTRES
+    |  cmp byte LFUNC:RB->ffid, 1	// (> FF_C) Calling a fast function?
+    |  ja >5
+    |4:
+    |  ins_callt
+    |
+    |5:  // Tailcall to a fast function.
+    |  test PCd, FRAME_TYPE		// Lua frame below?
+    |  jnz <4
+    |  movzx RAd, PC_RA
+    |  neg RA
+    |  mov LFUNC:KBASE, [BASE+RA*8-32]	// Need to prepare KBASE.
+    |  cleartp LFUNC:KBASE
+    |  mov KBASE, LFUNC:KBASE->pc
+    |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  jmp <4
+    |
+    |7:  // Tailcall from a vararg function.
+    |  sub PC, FRAME_VARG
+    |  test PCd, FRAME_TYPEP
+    |  jnz >8				// Vararg frame below?
+    |  sub BASE, PC			// Need to relocate BASE/KBASE down.
+    |  mov KBASE, BASE
+    |  mov PC, [BASE-8]
+    |  jmp <1
+    |8:
+    |  add PCd, FRAME_VARG
+    |  jmp <1
+    break;
+
+  case BC_ITERC:
+    |  ins_A	// RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+    |  lea RA, [BASE+RA*8+16]		// fb = base+2
+    |  mov RB, [RA-32]			// Copy state. fb[0] = fb[-4].
+    |  mov RC, [RA-24]			// Copy control var. fb[1] = fb[-3].
+    |  mov [RA], RB
+    |  mov [RA+8], RC
+    |  mov LFUNC:RB, [RA-40]		// Copy callable. fb[-1] = fb[-5]
+    |  mov [RA-16], LFUNC:RB
+    |  mov NARGS:RDd, 2+1		// Handle like a regular 2-arg call.
+    |  checkfunc LFUNC:RB, ->vmeta_call
+    |  mov BASE, RA
+    |  ins_call
+    break;
+
+  case BC_ITERN:
+    |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+    |.if JIT
+    |  // NYI: add hotloop, record BC_ITERN.
+    |.endif
+    |  mov TAB:RB, [BASE+RA*8-16]
+    |  cleartp TAB:RB
+    |  mov RCd, [BASE+RA*8-8]		// Get index from control var.
+    |  mov TMPRd, TAB:RB->asize
+    |  add PC, 4
+    |  mov ITYPE, TAB:RB->array
+    |1:  // Traverse array part.
+    |  cmp RCd, TMPRd; jae >5		// Index points after array part?
+    |  cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
+    |.if not DUALNUM
+    |  cvtsi2sd xmm0, RCd
+    |.endif
+    |  // Copy array slot to returned value.
+    |  mov RB, [ITYPE+RC*8]
+    |  mov [BASE+RA*8+8], RB
+    |  // Return array index as a numeric key.
+    |.if DUALNUM
+    |  setint ITYPE, RC
+    |  mov [BASE+RA*8], ITYPE
+    |.else
+    |  movsd qword [BASE+RA*8], xmm0
+    |.endif
+    |  add RCd, 1
+    |  mov [BASE+RA*8-8], RCd		// Update control var.
+    |2:
+    |  movzx RDd, PC_RD			// Get target from ITERL.
+    |  branchPC RD
+    |3:
+    |  ins_next
+    |
+    |4:  // Skip holes in array part.
+    |  add RCd, 1
+    |  jmp <1
+    |
+    |5:  // Traverse hash part.
+    |  sub RCd, TMPRd
+    |6:
+    |  cmp RCd, TAB:RB->hmask; ja <3	// End of iteration? Branch to ITERL+1.
+    |  imul ITYPEd, RCd, #NODE
+    |  add NODE:ITYPE, TAB:RB->node
+    |  cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
+    |  lea TMPRd, [RCd+TMPRd+1]
+    |  // Copy key and value from hash slot.
+    |  mov RB, NODE:ITYPE->key
+    |  mov RC, NODE:ITYPE->val
+    |  mov [BASE+RA*8], RB
+    |  mov [BASE+RA*8+8], RC
+    |  mov [BASE+RA*8-8], TMPRd
+    |  jmp <2
+    |
+    |7:  // Skip holes in hash part.
+    |  add RCd, 1
+    |  jmp <6
+    break;
+
+  case BC_ISNEXT:
+    |  ins_AD	// RA = base, RD = target (points to ITERN)
+    |  mov CFUNC:RB, [BASE+RA*8-24]
+    |  checkfunc CFUNC:RB, >5
+    |  checktptp [BASE+RA*8-16], LJ_TTAB, >5
+    |  cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
+    |  cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
+    |  branchPC RD
+    |  mov64 TMPR, U64x(fffe7fff, 00000000)
+    |  mov [BASE+RA*8-8], TMPR		// Initialize control var.
+    |1:
+    |  ins_next
+    |5:  // Despecialize bytecode if any of the checks fail.
+    |  mov PC_OP, BC_JMP
+    |  branchPC RD
+    |  mov byte [PC], BC_ITERC
+    |  jmp <1
+    break;
+
+  case BC_VARG:
+    |  ins_ABC	// RA = base, RB = nresults+1, RC = numparams
+    |  lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
+    |  lea RA, [BASE+RA*8]
+    |  sub TMPR, [BASE-8]
+    |  // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
+    |  test RB, RB
+    |  jz >5				// Copy all varargs?
+    |  lea RB, [RA+RB*8-8]
+    |  cmp TMPR, BASE			// No vararg slots?
+    |  jnb >2
+    |1:  // Copy vararg slots to destination slots.
+    |  mov RC, [TMPR-16]
+    |  add TMPR, 8
+    |  mov [RA], RC
+    |  add RA, 8
+    |  cmp RA, RB			// All destination slots filled?
+    |  jnb >3
+    |  cmp TMPR, BASE			// No more vararg slots?
+    |  jb <1
+    |2:  // Fill up remainder with nil.
+    |  mov aword [RA], LJ_TNIL
+    |  add RA, 8
+    |  cmp RA, RB
+    |  jb <2
+    |3:
+    |  ins_next
+    |
+    |5:  // Copy all varargs.
+    |  mov MULTRES, 1			// MULTRES = 0+1
+    |  mov RC, BASE
+    |  sub RC, TMPR
+    |  jbe <3				// No vararg slots?
+    |  mov RBd, RCd
+    |  shr RBd, 3
+    |  add RBd, 1
+    |  mov MULTRES, RBd			// MULTRES = #varargs+1
+    |  mov L:RB, SAVE_L
+    |  add RC, RA
+    |  cmp RC, L:RB->maxstack
+    |  ja >7				// Need to grow stack?
+    |6:  // Copy all vararg slots.
+    |  mov RC, [TMPR-16]
+    |  add TMPR, 8
+    |  mov [RA], RC
+    |  add RA, 8
+    |  cmp TMPR, BASE			// No more vararg slots?
+    |  jb <6
+    |  jmp <3
+    |
+    |7:  // Grow stack for varargs.
+    |  mov L:RB->base, BASE
+    |  mov L:RB->top, RA
+    |  mov SAVE_PC, PC
+    |  sub TMPR, BASE			// Need delta, because BASE may change.
+    |  mov TMP1hi, TMPRd
+    |  mov CARG2d, MULTRES
+    |  sub CARG2d, 1
+    |  mov CARG1, L:RB
+    |  call extern lj_state_growstack	// (lua_State *L, int n)
+    |  mov BASE, L:RB->base
+    |  movsxd TMPR, TMP1hi
+    |  mov RA, L:RB->top
+    |  add TMPR, BASE
+    |  jmp <6
+    break;
+
+  /* -- Returns ----------------------------------------------------------- */
+
+  case BC_RETM:
+    |  ins_AD	// RA = results, RD = extra_nresults
+    |  add RDd, MULTRES			// MULTRES >=1, so RD >=1.
+    |  // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+    break;
+
+  case BC_RET: case BC_RET0: case BC_RET1:
+    |  ins_AD	// RA = results, RD = nresults+1
+    if (op != BC_RET0) {
+      |  shl RAd, 3
+    }
+    |1:
+    |  mov PC, [BASE-8]
+    |  mov MULTRES, RDd			// Save nresults+1.
+    |  test PCd, FRAME_TYPE		// Check frame type marker.
+    |  jnz >7				// Not returning to a fixarg Lua func?
+    switch (op) {
+    case BC_RET:
+      |->BC_RET_Z:
+      |  mov KBASE, BASE		// Use KBASE for result move.
+      |  sub RDd, 1
+      |  jz >3
+      |2:  // Move results down.
+      |  mov RB, [KBASE+RA]
+      |  mov [KBASE-16], RB
+      |  add KBASE, 8
+      |  sub RDd, 1
+      |  jnz <2
+      |3:
+      |  mov RDd, MULTRES		// Note: MULTRES may be >255.
+      |  movzx RBd, PC_RB		// So cannot compare with RDL!
+      |5:
+      |  cmp RBd, RDd			// More results expected?
+      |  ja >6
+      break;
+    case BC_RET1:
+      |  mov RB, [BASE+RA]
+      |  mov [BASE-16], RB
+      /* fallthrough */
+    case BC_RET0:
+      |5:
+      |  cmp PC_RB, RDL			// More results expected?
+      |  ja >6
+    default:
+      break;
+    }
+    |  movzx RAd, PC_RA
+    |  neg RA
+    |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
+    |  mov LFUNC:KBASE, [BASE-16]
+    |  cleartp LFUNC:KBASE
+    |  mov KBASE, LFUNC:KBASE->pc
+    |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  ins_next
+    |
+    |6:  // Fill up results with nil.
+    if (op == BC_RET) {
+      |  mov aword [KBASE-16], LJ_TNIL	// Note: relies on shifted base.
+      |  add KBASE, 8
+    } else {
+      |  mov aword [BASE+RD*8-24], LJ_TNIL
+    }
+    |  add RD, 1
+    |  jmp <5
+    |
+    |7:  // Non-standard return case.
+    |  lea RB, [PC-FRAME_VARG]
+    |  test RBd, FRAME_TYPEP
+    |  jnz ->vm_return
+    |  // Return from vararg function: relocate BASE down and RA up.
+    |  sub BASE, RB
+    if (op != BC_RET0) {
+      |  add RA, RB
+    }
+    |  jmp <1
+    break;
+
+  /* -- Loops and branches ------------------------------------------------ */
+
+  |.define FOR_IDX,  [RA]
+  |.define FOR_STOP, [RA+8]
+  |.define FOR_STEP, [RA+16]
+  |.define FOR_EXT,  [RA+24]
+
+  case BC_FORL:
+    |.if JIT
+    |  hotloop RBd
+    |.endif
+    | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+    break;
+
+  case BC_JFORI:
+  case BC_JFORL:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_FORI:
+  case BC_IFORL:
+    vk = (op == BC_IFORL || op == BC_JFORL);
+    |  ins_AJ	// RA = base, RD = target (after end of loop or start of loop)
+    |  lea RA, [BASE+RA*8]
+    if (LJ_DUALNUM) {
+      |  mov RB, FOR_IDX
+      |  checkint RB, >9
+      |  mov TMPR, FOR_STOP
+      if (!vk) {
+	|  checkint TMPR, ->vmeta_for
+	|  mov ITYPE, FOR_STEP
+	|  test ITYPEd, ITYPEd; js >5
+	|  sar ITYPE, 47;
+	|  cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
+      } else {
+#ifdef LUA_USE_ASSERT
+	|  checkinttp FOR_STOP, ->assert_bad_for_arg_type
+	|  checkinttp FOR_STEP, ->assert_bad_for_arg_type
+#endif
+	|  mov ITYPE, FOR_STEP
+	|  test ITYPEd, ITYPEd; js >5
+	|  add RBd, ITYPEd; jo >1
+	|  setint RB
+	|  mov FOR_IDX, RB
+      }
+      |  cmp RBd, TMPRd
+      |  mov FOR_EXT, RB
+      if (op == BC_FORI) {
+	|  jle >7
+	|1:
+	|6:
+	|  branchPC RD
+      } else if (op == BC_JFORI) {
+	|  branchPC RD
+	|  movzx RDd, PC_RD
+	|  jle =>BC_JLOOP
+	|1:
+	|6:
+      } else if (op == BC_IFORL) {
+	|  jg >7
+	|6:
+	|  branchPC RD
+	|1:
+      } else {
+	|  jle =>BC_JLOOP
+	|1:
+	|6:
+      }
+      |7:
+      |  ins_next
+      |
+      |5:  // Invert check for negative step.
+      if (!vk) {
+	|  sar ITYPE, 47;
+	|  cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
+      } else {
+	|  add RBd, ITYPEd; jo <1
+	|  setint RB
+	|  mov FOR_IDX, RB
+      }
+      |  cmp RBd, TMPRd
+      |  mov FOR_EXT, RB
+      if (op == BC_FORI) {
+	|  jge <7
+      } else if (op == BC_JFORI) {
+	|  branchPC RD
+	|  movzx RDd, PC_RD
+	|  jge =>BC_JLOOP
+      } else if (op == BC_IFORL) {
+	|  jl <7
+      } else {
+	|  jge =>BC_JLOOP
+      }
+      |  jmp <6
+      |9:  // Fallback to FP variant.
+      if (!vk) {
+	|  jae ->vmeta_for
+      }
+    } else if (!vk) {
+      |  checknumtp FOR_IDX, ->vmeta_for
+    }
+    if (!vk) {
+      |  checknumtp FOR_STOP, ->vmeta_for
+    } else {
+#ifdef LUA_USE_ASSERT
+      |  checknumtp FOR_STOP, ->assert_bad_for_arg_type
+      |  checknumtp FOR_STEP, ->assert_bad_for_arg_type
+#endif
+    }
+    |  mov RB, FOR_STEP
+    if (!vk) {
+      |  checknum RB, ->vmeta_for
+    }
+    |  movsd xmm0, qword FOR_IDX
+    |  movsd xmm1, qword FOR_STOP
+    if (vk) {
+      |  addsd xmm0, qword FOR_STEP
+      |  movsd qword FOR_IDX, xmm0
+      |  test RB, RB; js >3
+    } else {
+      |  jl >3
+    }
+    |  ucomisd xmm1, xmm0
+    |1:
+    |  movsd qword FOR_EXT, xmm0
+    if (op == BC_FORI) {
+      |.if DUALNUM
+      |  jnb <7
+      |.else
+      |  jnb >2
+      |  branchPC RD
+      |.endif
+    } else if (op == BC_JFORI) {
+      |  branchPC RD
+      |  movzx RDd, PC_RD
+      |  jnb =>BC_JLOOP
+    } else if (op == BC_IFORL) {
+      |.if DUALNUM
+      |  jb <7
+      |.else
+      |  jb >2
+      |  branchPC RD
+      |.endif
+    } else {
+      |  jnb =>BC_JLOOP
+    }
+    |.if DUALNUM
+    |  jmp <6
+    |.else
+    |2:
+    |  ins_next
+    |.endif
+    |
+    |3:  // Invert comparison if step is negative.
+    |  ucomisd xmm0, xmm1
+    |  jmp <1
+    break;
+
+  case BC_ITERL:
+    |.if JIT
+    |  hotloop RBd
+    |.endif
+    | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+    break;
+
+  case BC_JITERL:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_IITERL:
+    |  ins_AJ	// RA = base, RD = target
+    |  lea RA, [BASE+RA*8]
+    |  mov RB, [RA]
+    |  cmp RB, LJ_TNIL; je >1		// Stop if iterator returned nil.
+    if (op == BC_JITERL) {
+      |  mov [RA-8], RB
+      |  jmp =>BC_JLOOP
+    } else {
+      |  branchPC RD			// Otherwise save control var + branch.
+      |  mov [RA-8], RB
+    }
+    |1:
+    |  ins_next
+    break;
+
+  case BC_LOOP:
+    |  ins_A	// RA = base, RD = target (loop extent)
+    |  // Note: RA/RD is only used by trace recorder to determine scope/extent
+    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+    |.if JIT
+    |  hotloop RBd
+    |.endif
+    | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+    break;
+
+  case BC_ILOOP:
+    |  ins_A	// RA = base, RD = target (loop extent)
+    |  ins_next
+    break;
+
+  case BC_JLOOP:
+    |.if JIT
+    |  ins_AD	// RA = base (ignored), RD = traceno
+    |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+    |  mov TRACE:RD, [RA+RD*8]
+    |  mov RD, TRACE:RD->mcode
+    |  mov L:RB, SAVE_L
+    |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
+    |  mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
+    |  // Save additional callee-save registers only used in compiled code.
+    |.if X64WIN
+    |  mov CSAVE_4, r12
+    |  mov CSAVE_3, r13
+    |  mov CSAVE_2, r14
+    |  mov CSAVE_1, r15
+    |  mov RA, rsp
+    |  sub rsp, 10*16+4*8
+    |  movdqa [RA-1*16], xmm6
+    |  movdqa [RA-2*16], xmm7
+    |  movdqa [RA-3*16], xmm8
+    |  movdqa [RA-4*16], xmm9
+    |  movdqa [RA-5*16], xmm10
+    |  movdqa [RA-6*16], xmm11
+    |  movdqa [RA-7*16], xmm12
+    |  movdqa [RA-8*16], xmm13
+    |  movdqa [RA-9*16], xmm14
+    |  movdqa [RA-10*16], xmm15
+    |.else
+    |  sub rsp, 16
+    |  mov [rsp+16], r12
+    |  mov [rsp+8], r13
+    |.endif
+    |  jmp RD
+    |.endif
+    break;
+
+  case BC_JMP:
+    |  ins_AJ	// RA = unused, RD = target
+    |  branchPC RD
+    |  ins_next
+    break;
+
+  /* -- Function headers -------------------------------------------------- */
+
+   /*
+   ** Reminder: A function may be called with func/args above L->maxstack,
+   ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
+   ** too. This means all FUNC* ops (including fast functions) must check
+   ** for stack overflow _before_ adding more slots!
+   */
+
+  case BC_FUNCF:
+    |.if JIT
+    |  hotcall RBd
+    |.endif
+  case BC_FUNCV:  /* NYI: compiled vararg functions. */
+    | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+    break;
+
+  case BC_JFUNCF:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_IFUNCF:
+    |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
+    |  mov KBASE, [PC-4+PC2PROTO(k)]
+    |  mov L:RB, SAVE_L
+    |  lea RA, [BASE+RA*8]		// Top of frame.
+    |  cmp RA, L:RB->maxstack
+    |  ja ->vm_growstack_f
+    |  movzx RAd, byte [PC-4+PC2PROTO(numparams)]
+    |  cmp NARGS:RDd, RAd		// Check for missing parameters.
+    |  jbe >3
+    |2:
+    if (op == BC_JFUNCF) {
+      |  movzx RDd, PC_RD
+      |  jmp =>BC_JLOOP
+    } else {
+      |  ins_next
+    }
+    |
+    |3:  // Clear missing parameters.
+    |  mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
+    |  add NARGS:RDd, 1
+    |  cmp NARGS:RDd, RAd
+    |  jbe <3
+    |  jmp <2
+    break;
+
+  case BC_JFUNCV:
+#if !LJ_HASJIT
+    break;
+#endif
+    | int3  // NYI: compiled vararg functions
+    break;  /* NYI: compiled vararg functions. */
+
+  case BC_IFUNCV:
+    |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
+    |  lea RBd, [NARGS:RD*8+FRAME_VARG+8]
+    |  lea RD, [BASE+NARGS:RD*8+8]
+    |  mov LFUNC:KBASE, [BASE-16]
+    |  mov [RD-8], RB			// Store delta + FRAME_VARG.
+    |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
+    |  mov L:RB, SAVE_L
+    |  lea RA, [RD+RA*8]
+    |  cmp RA, L:RB->maxstack
+    |  ja ->vm_growstack_v		// Need to grow stack.
+    |  mov RA, BASE
+    |  mov BASE, RD
+    |  movzx RBd, byte [PC-4+PC2PROTO(numparams)]
+    |  test RBd, RBd
+    |  jz >2
+    |  add RA, 8
+    |1:  // Copy fixarg slots up to new frame.
+    |  add RA, 8
+    |  cmp RA, BASE
+    |  jnb >3				// Less args than parameters?
+    |  mov KBASE, [RA-16]
+    |  mov [RD], KBASE
+    |  add RD, 8
+    |  mov aword [RA-16], LJ_TNIL	// Clear old fixarg slot (help the GC).
+    |  sub RBd, 1
+    |  jnz <1
+    |2:
+    if (op == BC_JFUNCV) {
+      |  movzx RDd, PC_RD
+      |  jmp =>BC_JLOOP
+    } else {
+      |  mov KBASE, [PC-4+PC2PROTO(k)]
+      |  ins_next
+    }
+    |
+    |3:  // Clear missing parameters.
+    |  mov aword [RD], LJ_TNIL
+    |  add RD, 8
+    |  sub RBd, 1
+    |  jnz <3
+    |  jmp <2
+    break;
+
+  case BC_FUNCC:
+  case BC_FUNCCW:
+    |  ins_AD  // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
+    |  mov CFUNC:RB, [BASE-16]
+    |  cleartp CFUNC:RB
+    |  mov KBASE, CFUNC:RB->f
+    |  mov L:RB, SAVE_L
+    |  lea RD, [BASE+NARGS:RD*8-8]
+    |  mov L:RB->base, BASE
+    |  lea RA, [RD+8*LUA_MINSTACK]
+    |  cmp RA, L:RB->maxstack
+    |  mov L:RB->top, RD
+    if (op == BC_FUNCC) {
+      |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
+    } else {
+      |  mov CARG2, KBASE
+      |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
+    }
+    |  ja ->vm_growstack_c		// Need to grow stack.
+    |  set_vmstate C
+    if (op == BC_FUNCC) {
+      |  call KBASE			// (lua_State *L)
+    } else {
+      |  // (lua_State *L, lua_CFunction f)
+      |  call aword [DISPATCH+DISPATCH_GL(wrapf)]
+    }
+    |  // nresults returned in eax (RD).
+    |  mov BASE, L:RB->base
+    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+    |  set_vmstate INTERP
+    |  lea RA, [BASE+RD*8]
+    |  neg RA
+    |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
+    |  mov PC, [BASE-8]			// Fetch PC of caller.
+    |  jmp ->vm_returnc
+    break;
+
+  /* ---------------------------------------------------------------------- */
+
+  default:
+    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+    exit(2);
+    break;
+  }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+  int op;
+  dasm_growpc(Dst, BC__MAX);
+  build_subroutines(ctx);
+  |.code_op
+  for (op = 0; op < BC__MAX; op++)
+    build_ins(ctx, (BCOp)op, op);
+  return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+    fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+    fprintf(ctx->fp,
+	".Lframe0:\n"
+	"\t.long .LECIE0-.LSCIE0\n"
+	".LSCIE0:\n"
+	"\t.long 0xffffffff\n"
+	"\t.byte 0x1\n"
+	"\t.string \"\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 0x10\n"
+	"\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
+	"\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
+	"\t.align 8\n"
+	".LECIE0:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE0:\n"
+	"\t.long .LEFDE0-.LASFDE0\n"
+	".LASFDE0:\n"
+	"\t.long .Lframe0\n"
+	"\t.quad .Lbegin\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
+	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
+	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
+#if LJ_NO_UNWIND
+	"\t.byte 0x8d\n\t.uleb128 0x6\n"	/* offset r13 */
+	"\t.byte 0x8c\n\t.uleb128 0x7\n"	/* offset r12 */
+#endif
+	"\t.align 8\n"
+	".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.long .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.long .Lframe0\n"
+	"\t.quad lj_vm_ffi_call\n"
+	"\t.quad %d\n"
+	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
+	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
+	"\t.align 8\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+#if (defined(__sun__) && defined(__svr4__))
+    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
+#else
+    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+#endif
+    fprintf(ctx->fp,
+	".Lframe1:\n"
+	"\t.long .LECIE1-.LSCIE1\n"
+	".LSCIE1:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zPR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 0x10\n"
+	"\t.uleb128 6\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.long lj_err_unwind_dwarf-.\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
+	"\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
+	"\t.align 8\n"
+	".LECIE1:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE2:\n"
+	"\t.long .LEFDE2-.LASFDE2\n"
+	".LASFDE2:\n"
+	"\t.long .LASFDE2-.Lframe1\n"
+	"\t.long .Lbegin-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
+	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
+	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
+	"\t.align 8\n"
+	".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".Lframe2:\n"
+	"\t.long .LECIE2-.LSCIE2\n"
+	".LSCIE2:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 0x10\n"
+	"\t.uleb128 1\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
+	"\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
+	"\t.align 8\n"
+	".LECIE2:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE3:\n"
+	"\t.long .LEFDE3-.LASFDE3\n"
+	".LASFDE3:\n"
+	"\t.long .LASFDE3-.Lframe2\n"
+	"\t.long lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
+	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
+	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
+	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
+	"\t.align 8\n"
+	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#endif
+    break;
+#if !LJ_NO_UNWIND
+  /* Mental note: never let Apple design an assembler.
+  ** Or a linker. Or a plastic case. But I digress.
+  */
+  case BUILD_machasm: {
+#if LJ_HASFFI
+    int fcsize = 0;
+#endif
+    int i;
+    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+    fprintf(ctx->fp,
+	"EH_frame1:\n"
+	"\t.set L$set$x,LECIEX-LSCIEX\n"
+	"\t.long L$set$x\n"
+	"LSCIEX:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zPR\\0\"\n"
+	"\t.byte 0x1\n"
+	"\t.byte 128-8\n"
+	"\t.byte 0x10\n"
+	"\t.byte 6\n"				/* augmentation length */
+	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
+	"\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
+	"\t.byte 0x80+0x10\n\t.byte 0x1\n"
+	"\t.align 3\n"
+	"LECIEX:\n\n");
+    for (i = 0; i < ctx->nsym; i++) {
+      const char *name = ctx->sym[i].name;
+      int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
+      if (size == 0) continue;
+#if LJ_HASFFI
+      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
+      fprintf(ctx->fp,
+	  "%s.eh:\n"
+	  "LSFDE%d:\n"
+	  "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+	  "\t.long L$set$%d\n"
+	  "LASFDE%d:\n"
+	  "\t.long LASFDE%d-EH_frame1\n"
+	  "\t.long %s-.\n"
+	  "\t.long %d\n"
+	  "\t.byte 0\n"				/* augmentation length */
+	  "\t.byte 0xe\n\t.byte %d\n"		/* def_cfa_offset */
+	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
+	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
+	  "\t.byte 0x8f\n\t.byte 0x4\n"		/* offset r15 */
+	  "\t.byte 0x8e\n\t.byte 0x5\n"		/* offset r14 */
+	  "\t.align 3\n"
+	  "LEFDE%d:\n\n",
+	  name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
+    }
+#if LJ_HASFFI
+    if (fcsize) {
+      fprintf(ctx->fp,
+	  "EH_frame2:\n"
+	  "\t.set L$set$y,LECIEY-LSCIEY\n"
+	  "\t.long L$set$y\n"
+	  "LSCIEY:\n"
+	  "\t.long 0\n"
+	  "\t.byte 0x1\n"
+	  "\t.ascii \"zR\\0\"\n"
+	  "\t.byte 0x1\n"
+	  "\t.byte 128-8\n"
+	  "\t.byte 0x10\n"
+	  "\t.byte 1\n"				/* augmentation length */
+	  "\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	  "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
+	  "\t.byte 0x80+0x10\n\t.byte 0x1\n"
+	  "\t.align 3\n"
+	  "LECIEY:\n\n");
+      fprintf(ctx->fp,
+	  "_lj_vm_ffi_call.eh:\n"
+	  "LSFDEY:\n"
+	  "\t.set L$set$yy,LEFDEY-LASFDEY\n"
+	  "\t.long L$set$yy\n"
+	  "LASFDEY:\n"
+	  "\t.long LASFDEY-EH_frame2\n"
+	  "\t.long _lj_vm_ffi_call-.\n"
+	  "\t.long %d\n"
+	  "\t.byte 0\n"				/* augmentation length */
+	  "\t.byte 0xe\n\t.byte 16\n"		/* def_cfa_offset */
+	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
+	  "\t.byte 0xd\n\t.byte 0x6\n"		/* def_cfa_register rbp */
+	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
+	  "\t.align 3\n"
+	  "LEFDEY:\n\n", fcsize);
+    }
+#endif
+    fprintf(ctx->fp, ".subsections_via_symbols\n");
+    }
+    break;
+#endif
+  default:  /* Difficult for other modes. */
+    break;
+  }
+}
+
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/vm_x86.dasc b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_x86.dasc
similarity index 84%
rename from source/libs/luajit/LuaJIT-2.0.4/src/vm_x86.dasc
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_x86.dasc
index 6cdb8cbd2731812b7ddf817f3756125355d7d17f..f31e595b3f4294f4b24ee4c9f09cbdd14e733037 100644
--- a/source/libs/luajit/LuaJIT-2.0.4/src/vm_x86.dasc
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/vm_x86.dasc
@@ -18,7 +18,6 @@
 |
 |.if P64
 |.define X64, 1
-|.define SSE, 1
 |.if WIN
 |.define X64WIN, 1
 |.endif
@@ -116,6 +115,7 @@
 |.type NODE,		Node
 |.type NARGS,		int
 |.type TRACE,		GCtrace
+|.type SBUF,		SBuf
 |
 |// Stack layout while in interpreter. Must match with lj_frame.h.
 |//-----------------------------------------------------------------------
@@ -222,6 +222,9 @@
 |.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
 |.macro saveregs_
 |  push rbx; push r15; push r14
+|.if NO_UNWIND
+|  push r13; push r12
+|.endif
 |  sub rsp, CFRAME_SPACE
 |.endmacro
 |.macro saveregs
@@ -229,15 +232,28 @@
 |.endmacro
 |.macro restoreregs
 |  add rsp, CFRAME_SPACE
+|.if NO_UNWIND
+|  pop r12; pop r13
+|.endif
 |  pop r14; pop r15; pop rbx; pop rbp
 |.endmacro
 |
 |//----- 16 byte aligned,
+|.if NO_UNWIND
+|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	aword [rsp+aword*10]
+|.define SAVE_R3,	aword [rsp+aword*9]
+|.define SAVE_R2,	aword [rsp+aword*8]
+|.define SAVE_R1,	aword [rsp+aword*7]
+|.define SAVE_RU2,	aword [rsp+aword*6]
+|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.else
 |.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
 |.define SAVE_R4,	aword [rsp+aword*8]
 |.define SAVE_R3,	aword [rsp+aword*7]
 |.define SAVE_R2,	aword [rsp+aword*6]
 |.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.endif
 |.define SAVE_CFRAME,	aword [rsp+aword*4]
 |.define SAVE_PC,	dword [rsp+dword*7]
 |.define SAVE_L,	dword [rsp+dword*6]
@@ -373,7 +389,6 @@
 |  fpop
 |.endmacro
 |
-|.macro fdup; fld st0; .endmacro
 |.macro fpop1; fstp st1; .endmacro
 |
 |// Synthesize SSE FP constants.
@@ -630,17 +645,18 @@ static void build_subroutines(BuildCtx *ctx)
   |  lea KBASEa, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
-  |  mov L:RB->cframe, KBASEa
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RDa
   |.if X64
   |  mov SAVE_NRES, RD
   |  mov SAVE_ERRF, RD
   |.endif
+  |  mov L:RB->cframe, KBASEa
   |  cmp byte L:RB->status, RDL
-  |  je >3				// Initial resume (like a call).
+  |  je >2				// Initial resume (like a call).
   |
   |  // Resume after yield (like a return).
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |  set_vmstate INTERP
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
@@ -680,20 +696,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov RA, INARG_BASE			// Caveat: overlaps SAVE_CFRAME!
   |.endif
   |
+  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  mov KBASEa, L:RB->cframe		// Add our C frame to cframe chain.
   |  mov SAVE_CFRAME, KBASEa
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
+  |  add DISPATCH, GG_G2DISP
   |.if X64
   |  mov L:RB->cframe, rsp
   |.else
   |  mov L:RB->cframe, esp
   |.endif
   |
-  |2:  // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
-  |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
-  |  add DISPATCH, GG_G2DISP
-  |
-  |3:  // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
+  |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |  set_vmstate INTERP
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
@@ -731,14 +746,17 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  mov KBASE, L:RB->stack		// Compute -savestack(L, L->top).
   |  sub KBASE, L:RB->top
+  |   mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASE		// Neg. delta means cframe w/o frame.
+  |   add DISPATCH, GG_G2DISP
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |.if X64
   |  mov KBASEa, L:RB->cframe		// Add our C frame to cframe chain.
   |  mov SAVE_CFRAME, KBASEa
   |  mov L:RB->cframe, rsp
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |
   |  call CARG4			// (lua_State *L, lua_CFunction func, void *ud)
   |.else
@@ -749,6 +767,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
   |  mov SAVE_CFRAME, KBASE
   |  mov L:RB->cframe, esp
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   |
   |  call BASE			// (lua_State *L, lua_CFunction func, void *ud)
   |.endif
@@ -856,13 +875,9 @@ static void build_subroutines(BuildCtx *ctx)
   |.if DUALNUM
   |  mov TMP2, LJ_TISNUM
   |  mov TMP1, RC
-  |.elif SSE
+  |.else
   |  cvtsi2sd xmm0, RC
   |  movsd TMPQ, xmm0
-  |.else
-  |  mov ARG4, RC
-  |  fild ARG4
-  |  fstp TMPQ
   |.endif
   |  lea RCa, TMPQ			// Store temp. TValue in TMPQ.
   |  jmp >1
@@ -916,6 +931,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov NARGS:RD, 2+1			// 2 args for func(t, k).
   |  jmp ->vm_call_dispatch_f
   |
+  |->vmeta_tgetr:
+  |  mov FCARG1, TAB:RB
+  |  mov RB, BASE			// Save BASE.
+  |  mov FCARG2, RC			// Caveat: FCARG2 == BASE
+  |  call extern lj_tab_getinth@8	// (GCtab *t, int32_t key)
+  |  // cTValue * or NULL returned in eax (RC).
+  |  movzx RA, PC_RA
+  |  mov BASE, RB			// Restore BASE.
+  |  test RC, RC
+  |  jnz ->BC_TGETR_Z
+  |  mov dword [BASE+RA*8+4], LJ_TNIL
+  |  jmp ->BC_TGETR2_Z
+  |
   |//-----------------------------------------------------------------------
   |
   |->vmeta_tsets:
@@ -935,13 +963,9 @@ static void build_subroutines(BuildCtx *ctx)
   |.if DUALNUM
   |  mov TMP2, LJ_TISNUM
   |  mov TMP1, RC
-  |.elif SSE
+  |.else
   |  cvtsi2sd xmm0, RC
   |  movsd TMPQ, xmm0
-  |.else
-  |  mov ARG4, RC
-  |  fild ARG4
-  |  fstp TMPQ
   |.endif
   |  lea RCa, TMPQ			// Store temp. TValue in TMPQ.
   |  jmp >1
@@ -1007,6 +1031,33 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov NARGS:RD, 3+1			// 3 args for func(t, k, v).
   |  jmp ->vm_call_dispatch_f
   |
+  |->vmeta_tsetr:
+  |.if X64WIN
+  |  mov L:CARG1d, SAVE_L
+  |  mov CARG3d, RC
+  |  mov L:CARG1d->base, BASE
+  |  xchg CARG2d, TAB:RB		// Caveat: CARG2d == BASE.
+  |.elif X64
+  |  mov L:CARG1d, SAVE_L
+  |  mov CARG2d, TAB:RB
+  |  mov L:CARG1d->base, BASE
+  |  mov RB, BASE			// Save BASE.
+  |  mov CARG3d, RC			// Caveat: CARG3d == BASE.
+  |.else
+  |  mov L:RA, SAVE_L
+  |  mov ARG2, TAB:RB
+  |  mov RB, BASE			// Save BASE.
+  |  mov ARG3, RC
+  |  mov ARG1, L:RA
+  |  mov L:RA->base, BASE
+  |.endif
+  |  mov SAVE_PC, PC
+  |  call extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
+  |  // TValue * returned in eax (RC).
+  |  movzx RA, PC_RA
+  |  mov BASE, RB			// Restore BASE.
+  |  jmp ->BC_TSETR_Z
+  |
   |//-- Comparison metamethods ---------------------------------------------
   |
   |->vmeta_comp:
@@ -1101,6 +1152,26 @@ static void build_subroutines(BuildCtx *ctx)
   |  jmp <3
   |.endif
   |
+  |->vmeta_istype:
+  |.if X64
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE		// Caveat: CARG2d/CARG3d may be BASE.
+  |  mov CARG2d, RA
+  |  movzx CARG3d, PC_RD
+  |  mov L:CARG1d, L:RB
+  |.else
+  |  movzx RD, PC_RD
+  |  mov ARG2, RA
+  |  mov L:RB, SAVE_L
+  |  mov ARG3, RD
+  |  mov ARG1, L:RB
+  |  mov L:RB->base, BASE
+  |.endif
+  |  mov SAVE_PC, PC
+  |  call extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
+  |  mov BASE, L:RB->base
+  |  jmp <6
+  |
   |//-- Arithmetic metamethods ---------------------------------------------
   |
   |->vmeta_arith_vno:
@@ -1273,19 +1344,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  cmp NARGS:RD, 2+1;  jb ->fff_fallback
   |.endmacro
   |
-  |.macro .ffunc_n, name
-  |  .ffunc_1 name
-  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
-  |  fld qword [BASE]
-  |.endmacro
-  |
-  |.macro .ffunc_n, name, op
-  |  .ffunc_1 name
-  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
-  |  op
-  |  fld qword [BASE]
-  |.endmacro
-  |
   |.macro .ffunc_nsse, name, op
   |  .ffunc_1 name
   |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
@@ -1296,14 +1354,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  .ffunc_nsse name, movsd
   |.endmacro
   |
-  |.macro .ffunc_nn, name
-  |  .ffunc_2 name
-  |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
-  |  cmp dword [BASE+12], LJ_TISNUM;  jae ->fff_fallback
-  |  fld qword [BASE]
-  |  fld qword [BASE+8]
-  |.endmacro
-  |
   |.macro .ffunc_nnsse, name
   |  .ffunc_2 name
   |  cmp dword [BASE+4], LJ_TISNUM;  jae ->fff_fallback
@@ -1509,11 +1559,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.else
   |  jae ->fff_fallback
   |.endif
-  |.if SSE
   |  movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
-  |.else
-  |  fld qword [BASE]; jmp ->fff_resn
-  |.endif
   |
   |.ffunc_1 tostring
   |  // Only handles the string or number case inline.
@@ -1538,9 +1584,9 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |  mov L:FCARG1, L:RB
   |.if DUALNUM
-  |  call extern lj_str_fromnumber@8	// (lua_State *L, cTValue *o)
+  |  call extern lj_strfmt_number@8	// (lua_State *L, cTValue *o)
   |.else
-  |  call extern lj_str_fromnum@8	// (lua_State *L, lua_Number *np)
+  |  call extern lj_strfmt_num@8	// (lua_State *L, lua_Number *np)
   |.endif
   |  // GCstr returned in eax (RD).
   |  mov BASE, L:RB->base
@@ -1631,19 +1677,12 @@ static void build_subroutines(BuildCtx *ctx)
   |  add RD, 1
   |  mov dword [BASE-4], LJ_TISNUM
   |  mov dword [BASE-8], RD
-  |.elif SSE
+  |.else
   |  movsd xmm0, qword [BASE+8]
   |  sseconst_1 xmm1, RBa
   |  addsd xmm0, xmm1
-  |  cvtsd2si RD, xmm0
+  |  cvttsd2si RD, xmm0
   |  movsd qword [BASE-8], xmm0
-  |.else
-  |  fld qword [BASE+8]
-  |  fld1
-  |  faddp st1
-  |  fist ARG1
-  |  fstp qword [BASE-8]
-  |  mov RD, ARG1
   |.endif
   |  mov TAB:RB, [BASE]
   |  cmp RD, TAB:RB->asize;  jae >2	// Not in array part?
@@ -1690,12 +1729,9 @@ static void build_subroutines(BuildCtx *ctx)
   |.if DUALNUM
   |  mov dword [BASE+12], LJ_TISNUM
   |  mov dword [BASE+8], 0
-  |.elif SSE
+  |.else
   |  xorps xmm0, xmm0
   |  movsd qword [BASE+8], xmm0
-  |.else
-  |  fldz
-  |  fstp qword [BASE+8]
   |.endif
   |  mov RD, 1+3
   |  jmp ->fff_res
@@ -1802,7 +1838,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov ARG3, RA
   |.endif
   |  call ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
-  |  set_vmstate INTERP
   |
   |  mov L:RB, SAVE_L
   |.if X64
@@ -1811,6 +1846,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:PC, ARG1			// The callee doesn't modify SAVE_L.
   |.endif
   |  mov BASE, L:RB->base
+  |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  set_vmstate INTERP
+  |
   |  cmp eax, LUA_YIELD
   |  ja >8
   |4:
@@ -1925,12 +1963,10 @@ static void build_subroutines(BuildCtx *ctx)
   |->fff_resi:  // Dummy.
   |.endif
   |
-  |.if SSE
   |->fff_resn:
   |  mov PC, [BASE-4]
   |  fstp qword [BASE-8]
   |  jmp ->fff_res1
-  |.endif
   |
   |  .ffunc_1 math_abs
   |.if DUALNUM
@@ -1954,8 +1990,6 @@ static void build_subroutines(BuildCtx *ctx)
   |.else
   |  cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
   |.endif
-  |
-  |.if SSE
   |  movsd xmm0, qword [BASE]
   |  sseconst_abs xmm1, RDa
   |  andps xmm0, xmm1
@@ -1963,15 +1997,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov PC, [BASE-4]
   |  movsd qword [BASE-8], xmm0
   |  // fallthrough
-  |.else
-  |  fld qword [BASE]
-  |  fabs
-  |  // fallthrough
-  |->fff_resxmm0:  // Dummy.
-  |->fff_resn:
-  |  mov PC, [BASE-4]
-  |  fstp qword [BASE-8]
-  |.endif
   |
   |->fff_res1:
   |  mov RD, 1+1
@@ -1998,6 +2023,12 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov RAa, -8			// Results start at BASE+RA = BASE-8.
   |  jmp ->vm_return
   |
+  |.if X64
+  |.define fff_resfp, fff_resxmm0
+  |.else
+  |.define fff_resfp, fff_resn
+  |.endif
+  |
   |.macro math_round, func
   |  .ffunc math_ .. func
   |.if DUALNUM
@@ -2008,107 +2039,75 @@ static void build_subroutines(BuildCtx *ctx)
   |.else
   |  cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
   |.endif
-  |.if SSE
   |  movsd xmm0, qword [BASE]
-  |  call ->vm_ .. func
-  |  .if DUALNUM
-  |    cvtsd2si RB, xmm0
-  |    cmp RB, 0x80000000
-  |    jne ->fff_resi
-  |    cvtsi2sd xmm1, RB
-  |    ucomisd xmm0, xmm1
-  |    jp ->fff_resxmm0
-  |    je ->fff_resi
-  |  .endif
-  |  jmp ->fff_resxmm0
-  |.else
-  |  fld qword [BASE]
-  |  call ->vm_ .. func
-  |  .if DUALNUM
-  |    fist ARG1
-  |    mov RB, ARG1
-  |    cmp RB, 0x80000000; jne >2
-  |    fdup
-  |    fild ARG1
-  |    fcomparepp
-  |    jp ->fff_resn
-  |    jne ->fff_resn
-  |2:
-  |    fpop
-  |    jmp ->fff_resi
-  | .else
-  |    jmp ->fff_resn
-  | .endif
+  |  call ->vm_ .. func .. _sse
+  |.if DUALNUM
+  |  cvttsd2si RB, xmm0
+  |  cmp RB, 0x80000000
+  |  jne ->fff_resi
+  |  cvtsi2sd xmm1, RB
+  |  ucomisd xmm0, xmm1
+  |  jp ->fff_resxmm0
+  |  je ->fff_resi
   |.endif
+  |  jmp ->fff_resxmm0
   |.endmacro
   |
   |  math_round floor
   |  math_round ceil
   |
-  |.if SSE
   |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
-  |.else
-  |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
-  |.endif
   |
   |.ffunc math_log
   |  cmp NARGS:RD, 1+1; jne ->fff_fallback	// Exactly one argument.
   |  cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-  |  fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn
-  |
-  |.ffunc_n math_log10, fldlg2;	fyl2x;		jmp ->fff_resn
-  |.ffunc_n math_exp;	call ->vm_exp_x87;	jmp ->fff_resn
-  |
-  |.ffunc_n math_sin;	fsin;			jmp ->fff_resn
-  |.ffunc_n math_cos;	fcos;			jmp ->fff_resn
-  |.ffunc_n math_tan;	fptan; fpop;		jmp ->fff_resn
-  |
-  |.ffunc_n math_asin
-  |  fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
-  |  jmp ->fff_resn
-  |.ffunc_n math_acos
-  |  fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
-  |  jmp ->fff_resn
-  |.ffunc_n math_atan;	fld1; fpatan;		jmp ->fff_resn
+  |  movsd xmm0, qword [BASE]
+  |.if not X64
+  |  movsd FPARG1, xmm0
+  |.endif
+  |  mov RB, BASE
+  |  call extern log
+  |  mov BASE, RB
+  |  jmp ->fff_resfp
   |
   |.macro math_extern, func
-  |.if SSE
   |  .ffunc_nsse math_ .. func
-  |  .if not X64
-  |    movsd FPARG1, xmm0
-  |  .endif
-  |.else
-  |  .ffunc_n math_ .. func
-  |  fstp FPARG1
+  |.if not X64
+  |  movsd FPARG1, xmm0
   |.endif
   |  mov RB, BASE
-  |  call extern lj_vm_ .. func
+  |  call extern func
   |  mov BASE, RB
-  |  .if X64
-  |    jmp ->fff_resxmm0
-  |  .else
-  |    jmp ->fff_resn
-  |  .endif
+  |  jmp ->fff_resfp
   |.endmacro
   |
+  |.macro math_extern2, func
+  |  .ffunc_nnsse math_ .. func
+  |.if not X64
+  |  movsd FPARG1, xmm0
+  |  movsd FPARG3, xmm1
+  |.endif
+  |  mov RB, BASE
+  |  call extern func
+  |  mov BASE, RB
+  |  jmp ->fff_resfp
+  |.endmacro
+  |
+  |  math_extern log10
+  |  math_extern exp
+  |  math_extern sin
+  |  math_extern cos
+  |  math_extern tan
+  |  math_extern asin
+  |  math_extern acos
+  |  math_extern atan
   |  math_extern sinh
   |  math_extern cosh
   |  math_extern tanh
+  |  math_extern2 pow
+  |  math_extern2 atan2
+  |  math_extern2 fmod
   |
-  |->ff_math_deg:
-  |.if SSE
-  |.ffunc_nsse math_rad
-  |  mov CFUNC:RB, [BASE-8]
-  |  mulsd xmm0, qword CFUNC:RB->upvalue[0]
-  |  jmp ->fff_resxmm0
-  |.else
-  |.ffunc_n math_rad
-  |  mov CFUNC:RB, [BASE-8]
-  |  fmul qword CFUNC:RB->upvalue[0]
-  |  jmp ->fff_resn
-  |.endif
-  |
-  |.ffunc_nn math_atan2;	fpatan;		jmp ->fff_resn
   |.ffunc_nnr math_ldexp;	fscale; fpop1;	jmp ->fff_resn
   |
   |.ffunc_1 math_frexp
@@ -2123,65 +2122,34 @@ static void build_subroutines(BuildCtx *ctx)
   |  cmp RB, 0x00200000; jb >4
   |1:
   |  shr RB, 21; sub RB, RC		// Extract and unbias exponent.
-  |.if SSE
   |  cvtsi2sd xmm0, RB
-  |.else
-  |  mov TMP1, RB; fild TMP1
-  |.endif
   |  mov RB, [BASE-4]
   |  and RB, 0x800fffff			// Mask off exponent.
   |  or RB, 0x3fe00000			// Put mantissa in range [0.5,1) or 0.
   |  mov [BASE-4], RB
   |2:
-  |.if SSE
   |  movsd qword [BASE], xmm0
-  |.else
-  |  fstp qword [BASE]
-  |.endif
   |  mov RD, 1+2
   |  jmp ->fff_res
   |3:  // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
-  |.if SSE
   |  xorps xmm0, xmm0; jmp <2
-  |.else
-  |  fldz; jmp <2
-  |.endif
   |4:  // Handle denormals by multiplying with 2^54 and adjusting the bias.
-  |.if SSE
   |  movsd xmm0, qword [BASE]
   |  sseconst_hi xmm1, RBa, 43500000  // 2^54.
   |  mulsd xmm0, xmm1
   |  movsd qword [BASE-8], xmm0
-  |.else
-  |  fld qword [BASE]
-  |  mov TMP1, 0x5a800000; fmul TMP1	// x = x*2^54
-  |  fstp qword [BASE-8]
-  |.endif
   |  mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
   |
-  |.if SSE
   |.ffunc_nsse math_modf
-  |.else
-  |.ffunc_n math_modf
-  |.endif
   |  mov RB, [BASE+4]
   |  mov PC, [BASE-4]
   |  shl RB, 1; cmp RB, 0xffe00000; je >4	// +-Inf?
-  |.if SSE
   |  movaps xmm4, xmm0
-  |  call ->vm_trunc
+  |  call ->vm_trunc_sse
   |  subsd xmm4, xmm0
   |1:
   |  movsd qword [BASE-8], xmm0
   |  movsd qword [BASE], xmm4
-  |.else
-  |  fdup
-  |  call ->vm_trunc
-  |  fsub st1, st0
-  |1:
-  |  fstp qword [BASE-8]
-  |  fstp qword [BASE]
-  |.endif
   |  mov RC, [BASE-4]; mov RB, [BASE+4]
   |  xor RC, RB; js >3				// Need to adjust sign?
   |2:
@@ -2191,24 +2159,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor RB, 0x80000000; mov [BASE+4], RB	// Flip sign of fraction.
   |  jmp <2
   |4:
-  |.if SSE
   |  xorps xmm4, xmm4; jmp <1			// Return +-Inf and +-0.
-  |.else
-  |  fldz; fxch; jmp <1				// Return +-Inf and +-0.
-  |.endif
-  |
-  |.ffunc_nnr math_fmod
-  |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
-  |  fpop1
-  |  jmp ->fff_resn
-  |
-  |.if SSE
-  |.ffunc_nnsse math_pow;	call ->vm_pow;	jmp ->fff_resxmm0
-  |.else
-  |.ffunc_nn math_pow;		call ->vm_pow;	jmp ->fff_resn
-  |.endif
   |
-  |.macro math_minmax, name, cmovop, fcmovop, sseop
+  |.macro math_minmax, name, cmovop, sseop
   |  .ffunc name
   |  mov RA, 2
   |  cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2178,7 @@ static void build_subroutines(BuildCtx *ctx)
   |3:
   |  ja ->fff_fallback
   |  // Convert intermediate result to number and continue below.
-  |.if SSE
   |  cvtsi2sd xmm0, RB
-  |.else
-  |  mov TMP1, RB
-  |  fild TMP1
-  |.endif
   |  jmp >6
   |4:
   |  ja ->fff_fallback
@@ -2238,7 +2186,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  jae ->fff_fallback
   |.endif
   |
-  |.if SSE
   |  movsd xmm0, qword [BASE]
   |5:  // Handle numbers or integers.
   |  cmp RA, RD; jae ->fff_resxmm0
@@ -2257,48 +2204,13 @@ static void build_subroutines(BuildCtx *ctx)
   |  sseop xmm0, xmm1
   |  add RA, 1
   |  jmp <5
-  |.else
-  |  fld qword [BASE]
-  |5:  // Handle numbers or integers.
-  |  cmp RA, RD; jae ->fff_resn
-  |  cmp dword [BASE+RA*8-4], LJ_TISNUM
-  |.if DUALNUM
-  |  jb >6
-  |  ja >9
-  |  fild dword [BASE+RA*8-8]
-  |  jmp >7
-  |.else
-  |  jae >9
-  |.endif
-  |6:
-  |  fld qword [BASE+RA*8-8]
-  |7:
-  |  fucomi st1; fcmovop st1; fpop1
-  |  add RA, 1
-  |  jmp <5
-  |.endif
   |.endmacro
   |
-  |  math_minmax math_min, cmovg, fcmovnbe, minsd
-  |  math_minmax math_max, cmovl, fcmovbe, maxsd
-  |.if not SSE
-  |9:
-  |  fpop; jmp ->fff_fallback
-  |.endif
+  |  math_minmax math_min, cmovg, minsd
+  |  math_minmax math_max, cmovl, maxsd
   |
   |//-- String library -----------------------------------------------------
   |
-  |.ffunc_1 string_len
-  |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
-  |  mov STR:RB, [BASE]
-  |.if DUALNUM
-  |  mov RB, dword STR:RB->len; jmp ->fff_resi
-  |.elif SSE
-  |  cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
-  |.else
-  |  fild dword STR:RB->len; jmp ->fff_resn
-  |.endif
-  |
   |.ffunc string_byte			// Only handle the 1-arg case here.
   |  cmp NARGS:RD, 1+1;  jne ->fff_fallback
   |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
@@ -2309,10 +2221,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RB, byte STR:RB[1]
   |.if DUALNUM
   |  jmp ->fff_resi
-  |.elif SSE
-  |  cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
   |.else
-  |  mov TMP1, RB; fild TMP1; jmp ->fff_resn
+  |  cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
   |.endif
   |
   |.ffunc string_char			// Only handle the 1-arg case here.
@@ -2324,16 +2234,11 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov RB, dword [BASE]
   |  cmp RB, 255;  ja ->fff_fallback
   |  mov TMP2, RB
-  |.elif SSE
+  |.else
   |  jae ->fff_fallback
   |  cvttsd2si RB, qword [BASE]
   |  cmp RB, 255;  ja ->fff_fallback
   |  mov TMP2, RB
-  |.else
-  |  jae ->fff_fallback
-  |  fld qword [BASE]
-  |  fistp TMP2
-  |  cmp TMP2, 255;  ja ->fff_fallback
   |.endif
   |.if X64
   |  mov TMP3, 1
@@ -2354,6 +2259,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |  mov SAVE_PC, PC
   |  call extern lj_str_new		// (lua_State *L, char *str, size_t l)
+  |->fff_resstr:
   |  // GCstr * returned in eax (RD).
   |  mov BASE, L:RB->base
   |  mov PC, [BASE-4]
@@ -2371,14 +2277,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  jne ->fff_fallback
   |  mov RB, dword [BASE+16]
   |  mov TMP2, RB
-  |.elif SSE
+  |.else
   |  jae ->fff_fallback
   |  cvttsd2si RB, qword [BASE+16]
   |  mov TMP2, RB
-  |.else
-  |  jae ->fff_fallback
-  |  fld qword [BASE+16]
-  |  fistp TMP2
   |.endif
   |1:
   |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
@@ -2393,12 +2295,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov RB, STR:RB->len
   |.if DUALNUM
   |  mov RA, dword [BASE+8]
-  |.elif SSE
-  |  cvttsd2si RA, qword [BASE+8]
   |.else
-  |  fld qword [BASE+8]
-  |  fistp ARG3
-  |  mov RA, ARG3
+  |  cvttsd2si RA, qword [BASE+8]
   |.endif
   |  mov RC, TMP2
   |  cmp RB, RC				// len < end? (unsigned compare)
@@ -2442,136 +2340,34 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor RC, RC				// Zero length. Any ptr in RB is ok.
   |  jmp <4
   |
-  |.ffunc string_rep			// Only handle the 1-char case inline.
-  |  ffgccheck
-  |  cmp NARGS:RD, 2+1; jne ->fff_fallback	// Exactly 2 arguments.
-  |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
-  |  cmp dword [BASE+12], LJ_TISNUM
-  |  mov STR:RB, [BASE]
-  |.if DUALNUM
-  |  jne ->fff_fallback
-  |  mov RC, dword [BASE+8]
-  |.elif SSE
-  |  jae ->fff_fallback
-  |  cvttsd2si RC, qword [BASE+8]
-  |.else
-  |  jae ->fff_fallback
-  |  fld qword [BASE+8]
-  |  fistp TMP2
-  |  mov RC, TMP2
-  |.endif
-  |  test RC, RC
-  |  jle ->fff_emptystr			// Count <= 0? (or non-int)
-  |  cmp dword STR:RB->len, 1
-  |  jb ->fff_emptystr			// Zero length string?
-  |  jne ->fff_fallback_2		// Fallback for > 1-char strings.
-  |  cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC;  jb ->fff_fallback_2
-  |  movzx RA, byte STR:RB[1]
-  |  mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
-  |.if X64
-  |  mov TMP3, RC
-  |.else
-  |  mov ARG3, RC
-  |.endif
-  |1:  // Fill buffer with char. Yes, this is suboptimal code (do you care?).
-  |  mov [RB], RAL
-  |  add RB, 1
-  |  sub RC, 1
-  |  jnz <1
-  |  mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
-  |  jmp ->fff_newstr
-  |
-  |.ffunc_1 string_reverse
-  |  ffgccheck
-  |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
-  |  mov STR:RB, [BASE]
-  |  mov RC, STR:RB->len
-  |  test RC, RC
-  |  jz ->fff_emptystr			// Zero length string?
-  |  cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC;  jb ->fff_fallback_1
-  |  add RB, #STR
-  |  mov TMP2, PC			// Need another temp register.
-  |.if X64
-  |  mov TMP3, RC
-  |.else
-  |  mov ARG3, RC
-  |.endif
-  |  mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
-  |1:
-  |  movzx RA, byte [RB]
-  |  add RB, 1
-  |  sub RC, 1
-  |  mov [PC+RC], RAL
-  |  jnz <1
-  |  mov RD, PC
-  |  mov PC, TMP2
-  |  jmp ->fff_newstr
-  |
-  |.macro ffstring_case, name, lo, hi
-  |  .ffunc_1 name
+  |.macro ffstring_op, name
+  |  .ffunc_1 string_ .. name
   |  ffgccheck
   |  cmp dword [BASE+4], LJ_TSTR;  jne ->fff_fallback
-  |  mov STR:RB, [BASE]
-  |  mov RC, STR:RB->len
-  |  cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC;  jb ->fff_fallback_1
-  |  add RB, #STR
-  |  mov TMP2, PC			// Need another temp register.
-  |.if X64
-  |  mov TMP3, RC
-  |.else
-  |  mov ARG3, RC
-  |.endif
-  |  mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
-  |  jmp >3
-  |1:  // ASCII case conversion. Yes, this is suboptimal code (do you care?).
-  |  movzx RA, byte [RB+RC]
-  |  cmp RA, lo
-  |  jb >2
-  |  cmp RA, hi
-  |  ja >2
-  |  xor RA, 0x20
-  |2:
-  |  mov [PC+RC], RAL
-  |3:
-  |  sub RC, 1
-  |  jns <1
-  |  mov RD, PC
-  |  mov PC, TMP2
-  |  jmp ->fff_newstr
+  |  mov L:RB, SAVE_L
+  |   lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
+  |  mov L:RB->base, BASE
+  |  mov STR:FCARG2, [BASE]		// Caveat: FCARG2 == BASE
+  |   mov RC, SBUF:FCARG1->b
+  |   mov SBUF:FCARG1->L, L:RB
+  |   mov SBUF:FCARG1->p, RC
+  |  mov SAVE_PC, PC
+  |  call extern lj_buf_putstr_ .. name .. @8
+  |  mov FCARG1, eax
+  |  call extern lj_buf_tostr@4
+  |  jmp ->fff_resstr
   |.endmacro
   |
-  |ffstring_case string_lower, 0x41, 0x5a
-  |ffstring_case string_upper, 0x61, 0x7a
-  |
-  |//-- Table library ------------------------------------------------------
-  |
-  |.ffunc_1 table_getn
-  |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
-  |  mov RB, BASE			// Save BASE.
-  |  mov TAB:FCARG1, [BASE]
-  |  call extern lj_tab_len@4		// LJ_FASTCALL (GCtab *t)
-  |  // Length of table returned in eax (RD).
-  |  mov BASE, RB			// Restore BASE.
-  |.if DUALNUM
-  |  mov RB, RD; jmp ->fff_resi
-  |.elif SSE
-  |  cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
-  |.else
-  |  mov ARG1, RD; fild ARG1; jmp ->fff_resn
-  |.endif
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
   |
   |//-- Bit library --------------------------------------------------------
   |
-  |.define TOBIT_BIAS, 0x59c00000	// 2^52 + 2^51 (float, not double!).
-  |
   |.macro .ffunc_bit, name, kind, fdef
   |  fdef name
   |.if kind == 2
-  |.if SSE
   |  sseconst_tobit xmm1, RBa
-  |.else
-  |  mov TMP1, TOBIT_BIAS
-  |.endif
   |.endif
   |  cmp dword [BASE+4], LJ_TISNUM
   |.if DUALNUM
@@ -2587,24 +2383,12 @@ static void build_subroutines(BuildCtx *ctx)
   |.else
   |  jae ->fff_fallback
   |.endif
-  |.if SSE
   |  movsd xmm0, qword [BASE]
   |.if kind < 2
   |  sseconst_tobit xmm1, RBa
   |.endif
   |  addsd xmm0, xmm1
   |  movd RB, xmm0
-  |.else
-  |  fld qword [BASE]
-  |.if kind < 2
-  |  mov TMP1, TOBIT_BIAS
-  |.endif
-  |  fadd TMP1
-  |  fstp FPARG1
-  |.if kind > 0
-  |  mov RB, ARG1
-  |.endif
-  |.endif
   |2:
   |.endmacro
   |
@@ -2613,15 +2397,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endmacro
   |
   |.ffunc_bit bit_tobit, 0
-  |.if DUALNUM or SSE
-  |.if not SSE
-  |  mov RB, ARG1
-  |.endif
   |  jmp ->fff_resbit
-  |.else
-  |  fild ARG1
-  |  jmp ->fff_resn
-  |.endif
   |
   |.macro .ffunc_bit_op, name, ins
   |  .ffunc_bit name, 2
@@ -2641,17 +2417,10 @@ static void build_subroutines(BuildCtx *ctx)
   |.else
   |  jae ->fff_fallback_bit_op
   |.endif
-  |.if SSE
   |  movsd xmm0, qword [RD]
   |  addsd xmm0, xmm1
   |  movd RA, xmm0
   |  ins RB, RA
-  |.else
-  |  fld qword [RD]
-  |  fadd TMP1
-  |  fstp FPARG1
-  |  ins RB, ARG1
-  |.endif
   |  sub RD, 8
   |  jmp <1
   |.endmacro
@@ -2668,15 +2437,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  not RB
   |.if DUALNUM
   |  jmp ->fff_resbit
-  |.elif SSE
+  |.else
   |->fff_resbit:
   |  cvtsi2sd xmm0, RB
   |  jmp ->fff_resxmm0
-  |.else
-  |->fff_resbit:
-  |  mov ARG1, RB
-  |  fild ARG1
-  |  jmp ->fff_resn
   |.endif
   |
   |->fff_fallback_bit_op:
@@ -2689,22 +2453,13 @@ static void build_subroutines(BuildCtx *ctx)
   |  // Note: no inline conversion from number for 2nd argument!
   |  cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
   |  mov RA, dword [BASE+8]
-  |.elif SSE
+  |.else
   |  .ffunc_nnsse name
   |  sseconst_tobit xmm2, RBa
   |  addsd xmm0, xmm2
   |  addsd xmm1, xmm2
   |  movd RB, xmm0
   |  movd RA, xmm1
-  |.else
-  |  .ffunc_nn name
-  |  mov TMP1, TOBIT_BIAS
-  |  fadd TMP1
-  |  fstp FPARG3
-  |  fadd TMP1
-  |  fstp FPARG1
-  |  mov RA, ARG3
-  |  mov RB, ARG1
   |.endif
   |  ins RB, cl				// Assumes RA is ecx.
   |  jmp ->fff_resbit
@@ -2838,7 +2593,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov FCARG2, PC			// Caveat: FCARG2 == BASE
   |  mov FCARG1, L:RB
   |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
-  |  call extern lj_dispatch_ins@8	// (lua_State *L, BCIns *pc)
+  |  call extern lj_dispatch_ins@8	// (lua_State *L, const BCIns *pc)
   |3:
   |  mov BASE, L:RB->base
   |4:
@@ -2908,6 +2663,82 @@ static void build_subroutines(BuildCtx *ctx)
   |  shr RD, 3
   |  add NARGS:RD, 1
   |  jmp RBa
+  |
+  |->cont_stitch:			// Trace stitching.
+  |.if JIT
+  |  // BASE = base, RC = result, RB = mbase
+  |  mov RA, [RB-24]			// Save previous trace number.
+  |  mov TMP1, RA
+  |  mov TMP3, DISPATCH			// Need one more register.
+  |  mov DISPATCH, MULTRES
+  |  movzx RA, PC_RA
+  |  lea RA, [BASE+RA*8]		// Call base.
+  |  sub DISPATCH, 1
+  |  jz >2
+  |1:  // Move results down.
+  |.if X64
+  |  mov RBa, [RC]
+  |  mov [RA], RBa
+  |.else
+  |  mov RB, [RC]
+  |  mov [RA], RB
+  |  mov RB, [RC+4]
+  |  mov [RA+4], RB
+  |.endif
+  |  add RC, 8
+  |  add RA, 8
+  |  sub DISPATCH, 1
+  |  jnz <1
+  |2:
+  |  movzx RC, PC_RA
+  |  movzx RB, PC_RB
+  |  add RC, RB
+  |  lea RC, [BASE+RC*8-8]
+  |3:
+  |  cmp RC, RA
+  |  ja >9				// More results wanted?
+  |
+  |  mov DISPATCH, TMP3
+  |  mov RB, TMP1			// Get previous trace number.
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RD, [RA+RB*4]
+  |  test TRACE:RD, TRACE:RD
+  |  jz ->cont_nop
+  |  movzx RD, word TRACE:RD->link
+  |  cmp RD, RB
+  |  je ->cont_nop			// Blacklisted.
+  |  test RD, RD
+  |  jne =>BC_JLOOP			// Jump to stitched trace.
+  |
+  |  // Stitch a new trace to the previous trace.
+  |  mov [DISPATCH+DISPATCH_J(exitno)], RB
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov FCARG2, PC
+  |  lea FCARG1, [DISPATCH+GG_DISP2J]
+  |  mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
+  |  call extern lj_dispatch_stitch@8	// (jit_State *J, const BCIns *pc)
+  |  mov BASE, L:RB->base
+  |  jmp ->cont_nop
+  |
+  |9:  // Fill up results with nil.
+  |  mov dword [RA+4], LJ_TNIL
+  |  add RA, 8
+  |  jmp <3
+  |.endif
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov FCARG2, PC			// Caveat: FCARG2 == BASE
+  |  mov FCARG1, L:RB
+  |  call extern lj_dispatch_profile@8	// (lua_State *L, const BCIns *pc)
+  |  mov BASE, L:RB->base
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  sub PC, 4
+  |  jmp ->cont_nop
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------
@@ -2961,10 +2792,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
   |.endif
   |  // Caveat: RB is ebp.
-  |  mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
+  |  mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
   |  mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
   |  mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-  |  mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
   |  mov L:RB->base, BASE
   |.if X64WIN
   |  lea CARG2, [rsp+4*8]
@@ -2974,6 +2804,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lea FCARG2, [esp+16]
   |.endif
   |  lea FCARG1, [DISPATCH+GG_DISP2J]
+  |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
   |  call extern lj_trace_exit@8	// (jit_State *J, ExitState *ex)
   |  // MULTRES or negated error code returned in eax (RD).
   |  mov RAa, L:RB->cframe
@@ -3020,12 +2851,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov r13, TMPa
   |  mov r12, TMPQ
   |.endif
-  |  test RD, RD; js >3			// Check for error from exit.
+  |  test RD, RD; js >9			// Check for error from exit.
+  |  mov L:RB, SAVE_L
   |  mov MULTRES, RD
   |  mov LFUNC:KBASE, [BASE-8]
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
-  |  mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
+  |  mov L:RB->base, BASE
+  |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
   |  set_vmstate INTERP
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RC, [PC]
@@ -3034,16 +2867,31 @@ static void build_subroutines(BuildCtx *ctx)
   |  add PC, 4
   |  shr RC, 16
   |  cmp OP, BC_FUNCF			// Function header?
-  |  jb >2
-  |  mov RC, MULTRES			// RC/RD holds nres+1.
+  |  jb >3
+  |  cmp OP, BC_FUNCC+2			// Fast function?
+  |  jae >4
   |2:
+  |  mov RC, MULTRES			// RC/RD holds nres+1.
+  |3:
   |.if X64
   |  jmp aword [DISPATCH+OP*8]
   |.else
   |  jmp aword [DISPATCH+OP*4]
   |.endif
   |
-  |3:  // Rethrow error from the right C frame.
+  |4:  // Check frame below fast function.
+  |  mov RC, [BASE-4]
+  |  test RC, FRAME_TYPE
+  |  jnz <2				// Trace stitching continuation?
+  |  // Otherwise set KBASE for Lua function below fast function.
+  |  movzx RC, byte [RC-3]
+  |  not RCa
+  |  mov LFUNC:KBASE, [BASE+RC*8-8]
+  |  mov KBASE, LFUNC:KBASE->pc
+  |  mov KBASE, [KBASE+PC2PROTO(k)]
+  |  jmp <2
+  |
+  |9:  // Rethrow error from the right C frame.
   |  neg RD
   |  mov FCARG1, L:RB
   |  mov FCARG2, RD
@@ -3055,27 +2903,18 @@ static void build_subroutines(BuildCtx *ctx)
   |//-----------------------------------------------------------------------
   |
   |// FP value rounding. Called by math.floor/math.ceil fast functions
-  |// and from JIT code.
-  |
-  |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified.
-  |.macro vm_round_x87, mode1, mode2
-  |  fnstcw word [esp+4]		// Caveat: overwrites ARG1 and ARG2.
-  |  mov [esp+8], eax
-  |  mov ax, mode1
-  |  or ax, [esp+4]
-  |.if mode2 ~= 0xffff
-  |  and ax, mode2
-  |.endif
-  |  mov [esp+6], ax
-  |  fldcw word [esp+6]
-  |  frndint
-  |  fldcw word [esp+4]
-  |  mov eax, [esp+8]
+  |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
+  |.macro vm_round, name, mode, cond
+  |->name:
+  |.if not X64 and cond
+  |  movsd xmm0, qword [esp+4]
+  |  call ->name .. _sse
+  |  movsd qword [esp+4], xmm0  // Overwrite callee-owned arg.
+  |  fld qword [esp+4]
   |  ret
-  |.endmacro
+  |.endif
   |
-  |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
-  |.macro vm_round_sse, mode
+  |->name .. _sse:
   |  sseconst_abs xmm2, RDa
   |  sseconst_2p52 xmm3, RDa
   |  movaps xmm1, xmm0
@@ -3094,232 +2933,56 @@ static void build_subroutines(BuildCtx *ctx)
   |  orpd xmm1, xmm2			// Merge sign bit back in.
   |.else
   |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
-  |  subsd xmm1, xmm3
-  |  orpd xmm1, xmm2			// Merge sign bit back in.
-  |  .if mode == 1		// ceil(x)?
-  |    sseconst_m1 xmm2, RDa		// Must subtract -1 to preserve -0.
-  |    cmpsd xmm0, xmm1, 6		// x > result?
-  |  .else			// floor(x)?
-  |    sseconst_1 xmm2, RDa
-  |    cmpsd xmm0, xmm1, 1		// x < result?
-  |  .endif
-  |  andpd xmm0, xmm2
-  |  subsd xmm1, xmm0			// If yes, subtract +-1.
-  |.endif
-  |  movaps xmm0, xmm1
-  |1:
-  |  ret
-  |.endmacro
-  |
-  |.macro vm_round, name, ssemode, mode1, mode2
-  |->name:
-  |.if not SSE
-  |  vm_round_x87 mode1, mode2
-  |.endif
-  |->name .. _sse:
-  |  vm_round_sse ssemode
-  |.endmacro
-  |
-  |  vm_round vm_floor, 0, 0x0400, 0xf7ff
-  |  vm_round vm_ceil,  1, 0x0800, 0xfbff
-  |  vm_round vm_trunc, 2, 0x0c00, 0xffff
-  |
-  |// FP modulo x%y. Called by BC_MOD* and vm_arith.
-  |->vm_mod:
-  |.if SSE
-  |// Args in xmm0/xmm1, return value in xmm0.
-  |// Caveat: xmm0-xmm5 and RC (eax) modified!
-  |  movaps xmm5, xmm0
-  |  divsd xmm0, xmm1
-  |  sseconst_abs xmm2, RDa
-  |  sseconst_2p52 xmm3, RDa
-  |  movaps xmm4, xmm0
-  |  andpd xmm4, xmm2			// |x/y|
-  |  ucomisd xmm3, xmm4			// No truncation if 2^52 <= |x/y|.
-  |  jbe >1
-  |  andnpd xmm2, xmm0			// Isolate sign bit.
-  |  addsd xmm4, xmm3			// (|x/y| + 2^52) - 2^52
-  |  subsd xmm4, xmm3
-  |  orpd xmm4, xmm2			// Merge sign bit back in.
-  |  sseconst_1 xmm2, RDa
-  |  cmpsd xmm0, xmm4, 1		// x/y < result?
-  |  andpd xmm0, xmm2
-  |  subsd xmm4, xmm0			// If yes, subtract 1.0.
-  |  movaps xmm0, xmm5
-  |  mulsd xmm1, xmm4
-  |  subsd xmm0, xmm1
-  |  ret
-  |1:
-  |  mulsd xmm1, xmm0
-  |  movaps xmm0, xmm5
-  |  subsd xmm0, xmm1
-  |  ret
-  |.else
-  |// Args/ret on x87 stack (y on top). No xmm registers modified.
-  |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
-  |  fld st1
-  |  fdiv st1
-  |  fnstcw word [esp+4]
-  |  mov ax, 0x0400
-  |  or ax, [esp+4]
-  |  and ax, 0xf7ff
-  |  mov [esp+6], ax
-  |  fldcw word [esp+6]
-  |  frndint
-  |  fldcw word [esp+4]
-  |  fmulp st1
-  |  fsubp st1
-  |  ret
-  |.endif
-  |
-  |// FP log2(x). Called by math.log(x, base).
-  |->vm_log2:
-  |.if X64WIN
-  |  movsd qword [rsp+8], xmm0		// Use scratch area.
-  |  fld1
-  |  fld qword [rsp+8]
-  |  fyl2x
-  |  fstp qword [rsp+8]
-  |  movsd xmm0, qword [rsp+8]
-  |.elif X64
-  |  movsd qword [rsp-8], xmm0		// Use red zone.
-  |  fld1
-  |  fld qword [rsp-8]
-  |  fyl2x
-  |  fstp qword [rsp-8]
-  |  movsd xmm0, qword [rsp-8]
-  |.else
-  |  fld1
-  |  fld qword [esp+4]
-  |  fyl2x
-  |.endif
-  |  ret
-  |
-  |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
-  |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
-  |// Caveat: needs 3 slots on x87 stack!
-  |->vm_exp_x87:
-  |  fldl2e; fmulp st1				// e^x ==> 2^(x*log2(e))
-  |->vm_exp2_x87:
-  |  .if X64WIN
-  |    .define expscratch, dword [rsp+8]	// Use scratch area.
-  |  .elif X64
-  |    .define expscratch, dword [rsp-8]	// Use red zone.
-  |  .else
-  |    .define expscratch, dword [esp+4]	// Needs 4 byte scratch area.
-  |  .endif
-  |  fst expscratch				// Caveat: overwrites ARG1.
-  |  cmp expscratch, 0x7f800000; je >1		// Special case: e^+Inf = +Inf
-  |  cmp expscratch, 0xff800000; je >2		// Special case: e^-Inf = 0
-  |->vm_exp2raw:  // Entry point for vm_pow. Without +-Inf check.
-  |  fdup; frndint; fsub st1, st0; fxch		// Split into frac/int part.
-  |  f2xm1; fld1; faddp st1; fscale; fpop1	// ==> (2^frac-1 +1) << int
-  |1:
-  |  ret
-  |2:
-  |  fpop; fldz; ret
-  |
-  |// Generic power function x^y. Called by BC_POW, math.pow fast function,
-  |// and vm_arith.
-  |// Args/ret on x87 stack (y on top). RC (eax) modified.
-  |// Caveat: needs 3 slots on x87 stack!
-  |->vm_pow:
-  |.if not SSE
-  |  fist dword [esp+4]			// Store/reload int before comparison.
-  |  fild dword [esp+4]			// Integral exponent used in vm_powi.
-  |  fucomip st1
-  |  jnz >8				// Branch for FP exponents.
-  |  jp >9				// Branch for NaN exponent.
-  |  fpop				// Pop y and fallthrough to vm_powi.
-  |
-  |// FP/int power function x^i. Arg1/ret on x87 stack.
-  |// Arg2 (int) on C stack. RC (eax) modified.
-  |// Caveat: needs 2 slots on x87 stack!
-  |  mov eax, [esp+4]
-  |  cmp eax, 1; jle >6			// i<=1?
-  |  // Now 1 < (unsigned)i <= 0x80000000.
-  |1:  // Handle leading zeros.
-  |  test eax, 1; jnz >2
-  |  fmul st0
-  |  shr eax, 1
-  |  jmp <1
-  |2:
-  |  shr eax, 1; jz >5
-  |  fdup
-  |3:  // Handle trailing bits.
-  |  fmul st0
-  |  shr eax, 1; jz >4
-  |  jnc <3
-  |  fmul st1, st0
-  |  jmp <3
-  |4:
-  |  fmulp st1
-  |5:
-  |  ret
-  |6:
-  |  je <5				// x^1 ==> x
-  |  jb >7
-  |  fld1; fdivrp st1
-  |  neg eax
-  |  cmp eax, 1; je <5			// x^-1 ==> 1/x
-  |  jmp <1				// x^-i ==> (1/x)^i
-  |7:
-  |  fpop; fld1				// x^0 ==> 1
-  |  ret
-  |
-  |8:  // FP/FP power function x^y.
-  |  fst dword [esp+4]
-  |  fxch
-  |  fst dword [esp+8]
-  |  mov eax, [esp+4]; shl eax, 1
-  |  cmp eax, 0xff000000; je >2			// x^+-Inf?
-  |  mov eax, [esp+8]; shl eax, 1; je >4	// +-0^y?
-  |  cmp eax, 0xff000000; je >4			// +-Inf^y?
-  |  fyl2x
-  |  jmp ->vm_exp2raw
-  |
-  |9:  // Handle x^NaN.
-  |  fld1
-  |  fucomip st2
-  |  je >1				// 1^NaN ==> 1
-  |  fxch				// x^NaN ==> NaN
+  |  subsd xmm1, xmm3
+  |  orpd xmm1, xmm2			// Merge sign bit back in.
+  |  .if mode == 1		// ceil(x)?
+  |    sseconst_m1 xmm2, RDa		// Must subtract -1 to preserve -0.
+  |    cmpsd xmm0, xmm1, 6		// x > result?
+  |  .else			// floor(x)?
+  |    sseconst_1 xmm2, RDa
+  |    cmpsd xmm0, xmm1, 1		// x < result?
+  |  .endif
+  |  andpd xmm0, xmm2
+  |  subsd xmm1, xmm0			// If yes, subtract +-1.
+  |.endif
+  |  movaps xmm0, xmm1
   |1:
-  |  fpop
   |  ret
+  |.endmacro
   |
-  |2:  // Handle x^+-Inf.
-  |  fabs
-  |  fld1
-  |  fucomip st1
-  |  je >3					// +-1^+-Inf ==> 1
-  |  fpop; fabs; fldz; mov eax, 0; setc al
-  |  ror eax, 1; xor eax, [esp+4]; jns >3	// |x|<>1, x^+-Inf ==> +Inf/0
-  |  fxch
-  |3:
-  |  fpop1; fabs
-  |  ret
+  |  vm_round vm_floor, 0, 1
+  |  vm_round vm_ceil,  1, JIT
+  |  vm_round vm_trunc, 2, JIT
   |
-  |4:  // Handle +-0^y or +-Inf^y.
-  |  cmp dword [esp+4], 0; jge <3		// y >= 0, x^y ==> |x|
-  |  fpop; fpop
-  |  test eax, eax; jz >5			// y < 0, +-0^y ==> +Inf
-  |  fldz					// y < 0, +-Inf^y ==> 0
+  |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+  |->vm_mod:
+  |// Args in xmm0/xmm1, return value in xmm0.
+  |// Caveat: xmm0-xmm5 and RC (eax) modified!
+  |  movaps xmm5, xmm0
+  |  divsd xmm0, xmm1
+  |  sseconst_abs xmm2, RDa
+  |  sseconst_2p52 xmm3, RDa
+  |  movaps xmm4, xmm0
+  |  andpd xmm4, xmm2			// |x/y|
+  |  ucomisd xmm3, xmm4			// No truncation if 2^52 <= |x/y|.
+  |  jbe >1
+  |  andnpd xmm2, xmm0			// Isolate sign bit.
+  |  addsd xmm4, xmm3			// (|x/y| + 2^52) - 2^52
+  |  subsd xmm4, xmm3
+  |  orpd xmm4, xmm2			// Merge sign bit back in.
+  |  sseconst_1 xmm2, RDa
+  |  cmpsd xmm0, xmm4, 1		// x/y < result?
+  |  andpd xmm0, xmm2
+  |  subsd xmm4, xmm0			// If yes, subtract 1.0.
+  |  movaps xmm0, xmm5
+  |  mulsd xmm1, xmm4
+  |  subsd xmm0, xmm1
   |  ret
-  |5:
-  |  mov dword [esp+4], 0x7f800000		// Return +Inf.
-  |  fld dword [esp+4]
+  |1:
+  |  mulsd xmm1, xmm0
+  |  movaps xmm0, xmm5
+  |  subsd xmm0, xmm1
   |  ret
-  |.endif
-  |
-  |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
-  |// Needs 16 byte scratch area for x86. Also called from JIT code.
-  |->vm_pow_sse:
-  |  cvtsd2si eax, xmm1
-  |  cvtsi2sd xmm2, eax
-  |  ucomisd xmm1, xmm2
-  |  jnz >8				// Branch for FP exponents.
-  |  jp >9				// Branch for NaN exponent.
-  |  // Fallthrough to vm_powi_sse.
   |
   |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
   |->vm_powi_sse:
@@ -3356,287 +3019,6 @@ static void build_subroutines(BuildCtx *ctx)
   |  sseconst_1 xmm0, RDa
   |  ret
   |
-  |8:  // FP/FP power function x^y.
-  |.if X64
-  |  movd rax, xmm1; shl rax, 1
-  |  rol rax, 12; cmp rax, 0xffe; je >2		// x^+-Inf?
-  |  movd rax, xmm0; shl rax, 1; je >4		// +-0^y?
-  |  rol rax, 12; cmp rax, 0xffe; je >5		// +-Inf^y?
-  |  .if X64WIN
-  |    movsd qword [rsp+16], xmm1		// Use scratch area.
-  |    movsd qword [rsp+8], xmm0
-  |    fld qword [rsp+16]
-  |    fld qword [rsp+8]
-  |  .else
-  |    movsd qword [rsp-16], xmm1		// Use red zone.
-  |    movsd qword [rsp-8], xmm0
-  |    fld qword [rsp-16]
-  |    fld qword [rsp-8]
-  |  .endif
-  |.else
-  |  movsd qword [esp+12], xmm1			// Needs 16 byte scratch area.
-  |  movsd qword [esp+4], xmm0
-  |  cmp dword [esp+12], 0; jne >1
-  |  mov eax, [esp+16]; shl eax, 1
-  |  cmp eax, 0xffe00000; je >2			// x^+-Inf?
-  |1:
-  |  cmp dword [esp+4], 0; jne >1
-  |  mov eax, [esp+8]; shl eax, 1; je >4	// +-0^y?
-  |  cmp eax, 0xffe00000; je >5			// +-Inf^y?
-  |1:
-  |  fld qword [esp+12]
-  |  fld qword [esp+4]
-  |.endif
-  |  fyl2x					// y*log2(x)
-  |  fdup; frndint; fsub st1, st0; fxch		// Split into frac/int part.
-  |  f2xm1; fld1; faddp st1; fscale; fpop1	// ==> (2^frac-1 +1) << int
-  |.if X64WIN
-  |  fstp qword [rsp+8]				// Use scratch area.
-  |  movsd xmm0, qword [rsp+8]
-  |.elif X64
-  |  fstp qword [rsp-8]				// Use red zone.
-  |  movsd xmm0, qword [rsp-8]
-  |.else
-  |  fstp qword [esp+4]				// Needs 8 byte scratch area.
-  |  movsd xmm0, qword [esp+4]
-  |.endif
-  |  ret
-  |
-  |9:  // Handle x^NaN.
-  |  sseconst_1 xmm2, RDa
-  |  ucomisd xmm0, xmm2; je >1			// 1^NaN ==> 1
-  |  movaps xmm0, xmm1				// x^NaN ==> NaN
-  |1:
-  |  ret
-  |
-  |2:  // Handle x^+-Inf.
-  |  sseconst_abs xmm2, RDa
-  |  andpd xmm0, xmm2				// |x|
-  |  sseconst_1 xmm2, RDa
-  |  ucomisd xmm0, xmm2; je <1			// +-1^+-Inf ==> 1
-  |  movmskpd eax, xmm1
-  |  xorps xmm0, xmm0
-  |  mov ah, al; setc al; xor al, ah; jne <1	// |x|<>1, x^+-Inf ==> +Inf/0
-  |3:
-  |  sseconst_hi xmm0, RDa, 7ff00000  // +Inf
-  |  ret
-  |
-  |4:  // Handle +-0^y.
-  |  movmskpd eax, xmm1; test eax, eax; jnz <3	// y < 0, +-0^y ==> +Inf
-  |  xorps xmm0, xmm0				// y >= 0, +-0^y ==> 0
-  |  ret
-  |
-  |5:  // Handle +-Inf^y.
-  |  movmskpd eax, xmm1; test eax, eax; jz <3	// y >= 0, +-Inf^y ==> +Inf
-  |  xorps xmm0, xmm0				// y < 0, +-Inf^y ==> 0
-  |  ret
-  |
-  |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
-  |// Computes fpm(x) for extended math functions. ORDER FPM.
-  |->vm_foldfpm:
-  |.if JIT
-  |.if X64
-  |  .if X64WIN
-  |    .define fpmop, CARG2d
-  |  .else
-  |    .define fpmop, CARG1d
-  |  .endif
-  |  cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
-  |  cmp fpmop, 3; jb ->vm_trunc; ja >2
-  |  sqrtsd xmm0, xmm0; ret
-  |2:
-  |  .if X64WIN
-  |    movsd qword [rsp+8], xmm0	// Use scratch area.
-  |    fld qword [rsp+8]
-  |  .else
-  |    movsd qword [rsp-8], xmm0	// Use red zone.
-  |    fld qword [rsp-8]
-  |  .endif
-  |  cmp fpmop, 5; ja >2
-  |  .if X64WIN; pop rax; .endif
-  |  je >1
-  |  call ->vm_exp_x87
-  |  .if X64WIN; push rax; .endif
-  |  jmp >7
-  |1:
-  |  call ->vm_exp2_x87
-  |  .if X64WIN; push rax; .endif
-  |  jmp >7
-  |2: ; cmp fpmop, 7; je >1; ja >2
-  |  fldln2; fxch; fyl2x; jmp >7
-  |1: ; fld1; fxch; fyl2x; jmp >7
-  |2: ; cmp fpmop, 9; je >1; ja >2
-  |  fldlg2; fxch; fyl2x; jmp >7
-  |1: ; fsin; jmp >7
-  |2: ; cmp fpmop, 11; je >1; ja >9
-  |   fcos; jmp >7
-  |1: ; fptan; fpop
-  |7:
-  |  .if X64WIN
-  |    fstp qword [rsp+8]		// Use scratch area.
-  |    movsd xmm0, qword [rsp+8]
-  |  .else
-  |    fstp qword [rsp-8]		// Use red zone.
-  |    movsd xmm0, qword [rsp-8]
-  |  .endif
-  |  ret
-  |.else  // x86 calling convention.
-  |  .define fpmop, eax
-  |.if SSE
-  |  mov fpmop, [esp+12]
-  |  movsd xmm0, qword [esp+4]
-  |  cmp fpmop, 1; je >1; ja >2
-  |  call ->vm_floor; jmp >7
-  |1: ; call ->vm_ceil; jmp >7
-  |2: ; cmp fpmop, 3; je >1; ja >2
-  |  call ->vm_trunc; jmp >7
-  |1:
-  |  sqrtsd xmm0, xmm0
-  |7:
-  |  movsd qword [esp+4], xmm0	// Overwrite callee-owned args.
-  |  fld qword [esp+4]
-  |  ret
-  |2: ; fld qword [esp+4]
-  |  cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
-  |2: ; cmp fpmop, 7; je >1; ja >2
-  |  fldln2; fxch; fyl2x; ret
-  |1: ; fld1; fxch; fyl2x; ret
-  |2: ; cmp fpmop, 9; je >1; ja >2
-  |  fldlg2; fxch; fyl2x; ret
-  |1: ; fsin; ret
-  |2: ; cmp fpmop, 11; je >1; ja >9
-  |   fcos; ret
-  |1: ; fptan; fpop; ret
-  |.else
-  |  mov fpmop, [esp+12]
-  |  fld qword [esp+4]
-  |  cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
-  |  cmp fpmop, 3; jb ->vm_trunc; ja >2
-  |  fsqrt; ret
-  |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
-  |  cmp fpmop, 7; je >1; ja >2
-  |  fldln2; fxch; fyl2x; ret
-  |1: ; fld1; fxch; fyl2x; ret
-  |2: ; cmp fpmop, 9; je >1; ja >2
-  |  fldlg2; fxch; fyl2x; ret
-  |1: ; fsin; ret
-  |2: ; cmp fpmop, 11; je >1; ja >9
-  |   fcos; ret
-  |1: ; fptan; fpop; ret
-  |.endif
-  |.endif
-  |9: ; int3					// Bad fpm.
-  |.endif
-  |
-  |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
-  |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
-  |// and basic math functions. ORDER ARITH
-  |->vm_foldarith:
-  |.if X64
-  |
-  |  .if X64WIN
-  |    .define foldop, CARG3d
-  |  .else
-  |    .define foldop, CARG1d
-  |  .endif
-  |  cmp foldop, 1; je >1; ja >2
-  |  addsd xmm0, xmm1; ret
-  |1: ; subsd xmm0, xmm1; ret
-  |2: ; cmp foldop, 3; je >1; ja >2
-  |  mulsd xmm0, xmm1; ret
-  |1: ; divsd xmm0, xmm1; ret
-  |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
-  |  cmp foldop, 7; je >1; ja >2
-  |  sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
-  |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
-  |2: ; cmp foldop, 9; ja >2
-  |.if X64WIN
-  |  movsd qword [rsp+8], xmm0	// Use scratch area.
-  |  movsd qword [rsp+16], xmm1
-  |  fld qword [rsp+8]
-  |  fld qword [rsp+16]
-  |.else
-  |  movsd qword [rsp-8], xmm0	// Use red zone.
-  |  movsd qword [rsp-16], xmm1
-  |  fld qword [rsp-8]
-  |  fld qword [rsp-16]
-  |.endif
-  |  je >1
-  |  fpatan
-  |7:
-  |.if X64WIN
-  |  fstp qword [rsp+8]		// Use scratch area.
-  |  movsd xmm0, qword [rsp+8]
-  |.else
-  |  fstp qword [rsp-8]		// Use red zone.
-  |  movsd xmm0, qword [rsp-8]
-  |.endif
-  |  ret
-  |1: ; fxch; fscale; fpop1; jmp <7
-  |2: ; cmp foldop, 11; je >1; ja >9
-  |  minsd xmm0, xmm1; ret
-  |1: ; maxsd xmm0, xmm1; ret
-  |9: ; int3				// Bad op.
-  |
-  |.elif SSE  // x86 calling convention with SSE ops.
-  |
-  |  .define foldop, eax
-  |  mov foldop, [esp+20]
-  |  movsd xmm0, qword [esp+4]
-  |  movsd xmm1, qword [esp+12]
-  |  cmp foldop, 1; je >1; ja >2
-  |  addsd xmm0, xmm1
-  |7:
-  |  movsd qword [esp+4], xmm0	// Overwrite callee-owned args.
-  |  fld qword [esp+4]
-  |  ret
-  |1: ; subsd xmm0, xmm1; jmp <7
-  |2: ; cmp foldop, 3; je >1; ja >2
-  |  mulsd xmm0, xmm1; jmp <7
-  |1: ; divsd xmm0, xmm1; jmp <7
-  |2: ; cmp foldop, 5
-  |  je >1; ja >2
-  |  call ->vm_mod; jmp <7
-  |1: ; pop edx; call ->vm_pow; push edx; jmp <7  // Writes to scratch area.
-  |2: ; cmp foldop, 7; je >1; ja >2
-  |  sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
-  |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
-  |2: ; cmp foldop, 9; ja >2
-  |  fld qword [esp+4]		// Reload from stack
-  |  fld qword [esp+12]
-  |  je >1
-  |  fpatan; ret
-  |1: ; fxch; fscale; fpop1; ret
-  |2: ; cmp foldop, 11; je >1; ja >9
-  |  minsd xmm0, xmm1; jmp <7
-  |1: ; maxsd xmm0, xmm1; jmp <7
-  |9: ; int3				// Bad op.
-  |
-  |.else  // x86 calling convention with x87 ops.
-  |
-  |  mov eax, [esp+20]
-  |  fld qword [esp+4]
-  |  fld qword [esp+12]
-  |  cmp eax, 1; je >1; ja >2
-  |  faddp st1; ret
-  |1: ; fsubp st1; ret
-  |2: ; cmp eax, 3; je >1; ja >2
-  |  fmulp st1; ret
-  |1: ; fdivp st1; ret
-  |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
-  |  cmp eax, 7; je >1; ja >2
-  |  fpop; fchs; ret
-  |1: ; fpop; fabs; ret
-  |2: ; cmp eax, 9; je >1; ja >2
-  |  fpatan; ret
-  |1: ; fxch; fscale; fpop1; ret
-  |2: ; cmp eax, 11; je >1; ja >9
-  |  fucomi st1; fcmovnbe st1; fpop1; ret
-  |1: ; fucomi st1; fcmovbe st1; fpop1; ret
-  |9: ; int3				// Bad op.
-  |
-  |.endif
-  |
   |//-----------------------------------------------------------------------
   |//-- Miscellaneous functions --------------------------------------------
   |//-----------------------------------------------------------------------
@@ -3947,19 +3329,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // RA is a number.
     |  cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
     |  // RA is a number, RD is an integer.
-    |.if SSE
     |  cvtsi2sd xmm0, dword [BASE+RD*8]
     |  jmp >2
-    |.else
-    |  fld qword [BASE+RA*8]
-    |  fild dword [BASE+RD*8]
-    |  jmp >3
-    |.endif
     |
     |8:  // RA is an integer, RD is not an integer.
     |  ja ->vmeta_comp
     |  // RA is an integer, RD is a number.
-    |.if SSE
     |  cvtsi2sd xmm1, dword [BASE+RA*8]
     |  movsd xmm0, qword [BASE+RD*8]
     |  add PC, 4
@@ -3967,29 +3342,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  jmp_comp jbe, ja, jb, jae, <9
     |  jmp <6
     |.else
-    |  fild dword [BASE+RA*8]
-    |  jmp >2
-    |.endif
-    |.else
     |  checknum RA, ->vmeta_comp
     |  checknum RD, ->vmeta_comp
     |.endif
-    |.if SSE
     |1:
     |  movsd xmm0, qword [BASE+RD*8]
     |2:
     |  add PC, 4
     |  ucomisd xmm0, qword [BASE+RA*8]
     |3:
-    |.else
-    |1:
-    |  fld qword [BASE+RA*8]		// Reverse order, i.e like cmp D, A.
-    |2:
-    |  fld qword [BASE+RD*8]
-    |3:
-    |  add PC, 4
-    |  fcomparepp
-    |.endif
     |  // Unordered: all of ZF CF PF set, ordered: PF clear.
     |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
     |.if DUALNUM
@@ -4029,43 +3390,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // RD is a number.
     |  cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
     |  // RD is a number, RA is an integer.
-    |.if SSE
     |  cvtsi2sd xmm0, dword [BASE+RA*8]
-    |.else
-    |  fild dword [BASE+RA*8]
-    |.endif
     |  jmp >2
     |
     |8:  // RD is an integer, RA is not an integer.
     |  ja >5
     |  // RD is an integer, RA is a number.
-    |.if SSE
     |  cvtsi2sd xmm0, dword [BASE+RD*8]
     |  ucomisd xmm0, qword [BASE+RA*8]
-    |.else
-    |  fild dword [BASE+RD*8]
-    |  fld qword [BASE+RA*8]
-    |.endif
     |  jmp >4
     |
     |.else
     |  cmp RB, LJ_TISNUM; jae >5
     |  checknum RA, >5
     |.endif
-    |.if SSE
     |1:
     |  movsd xmm0, qword [BASE+RA*8]
     |2:
     |  ucomisd xmm0, qword [BASE+RD*8]
     |4:
-    |.else
-    |1:
-    |  fld qword [BASE+RA*8]
-    |2:
-    |  fld qword [BASE+RD*8]
-    |4:
-    |  fcomparepp
-    |.endif
   iseqne_fp:
     if (vk) {
       |  jp >2				// Unordered means not equal.
@@ -4188,39 +3531,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // RA is a number.
     |  cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
     |  // RA is a number, RD is an integer.
-    |.if SSE
     |  cvtsi2sd xmm0, dword [KBASE+RD*8]
-    |.else
-    |  fild dword [KBASE+RD*8]
-    |.endif
     |  jmp >2
     |
     |8:  // RA is an integer, RD is a number.
-    |.if SSE
     |  cvtsi2sd xmm0, dword [BASE+RA*8]
     |  ucomisd xmm0, qword [KBASE+RD*8]
-    |.else
-    |  fild dword [BASE+RA*8]
-    |  fld qword [KBASE+RD*8]
-    |.endif
     |  jmp >4
     |.else
     |  cmp RB, LJ_TISNUM; jae >3
     |.endif
-    |.if SSE
     |1:
     |  movsd xmm0, qword [KBASE+RD*8]
     |2:
     |  ucomisd xmm0, qword [BASE+RA*8]
     |4:
-    |.else
-    |1:
-    |  fld qword [KBASE+RD*8]
-    |2:
-    |  fld qword [BASE+RA*8]
-    |4:
-    |  fcomparepp
-    |.endif
     goto iseqne_fp;
   case BC_ISEQP: case BC_ISNEP:
     vk = op == BC_ISEQP;
@@ -4271,6 +3596,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_next
     break;
 
+  case BC_ISTYPE:
+    |  ins_AD	// RA = src, RD = -type
+    |  add RD, [BASE+RA*8+4]
+    |  jne ->vmeta_istype
+    |  ins_next
+    break;
+  case BC_ISNUM:
+    |  ins_AD	// RA = src, RD = -(TISNUM-1)
+    |  checknum RA, ->vmeta_istype
+    |  ins_next
+    break;
+
   /* -- Unary ops --------------------------------------------------------- */
 
   case BC_MOV:
@@ -4314,16 +3651,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |.else
     |  checknum RD, ->vmeta_unm
     |.endif
-    |.if SSE
     |  movsd xmm0, qword [BASE+RD*8]
     |  sseconst_sign xmm1, RDa
     |  xorps xmm0, xmm1
     |  movsd qword [BASE+RA*8], xmm0
-    |.else
-    |  fld qword [BASE+RD*8]
-    |  fchs
-    |  fstp qword [BASE+RA*8]
-    |.endif
     |.if DUALNUM
     |  jmp <9
     |.else
@@ -4339,15 +3670,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |1:
     |  mov dword [BASE+RA*8+4], LJ_TISNUM
     |  mov dword [BASE+RA*8], RD
-    |.elif SSE
+    |.else
     |  xorps xmm0, xmm0
     |  cvtsi2sd xmm0, dword STR:RD->len
     |1:
     |  movsd qword [BASE+RA*8], xmm0
-    |.else
-    |  fild dword STR:RD->len
-    |1:
-    |  fstp qword [BASE+RA*8]
     |.endif
     |  ins_next
     |2:
@@ -4365,11 +3692,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // Length of table returned in eax (RD).
     |.if DUALNUM
     |  // Nothing to do.
-    |.elif SSE
-    |  cvtsi2sd xmm0, RD
     |.else
-    |  mov ARG1, RD
-    |  fild ARG1
+    |  cvtsi2sd xmm0, RD
     |.endif
     |  mov BASE, RB			// Restore BASE.
     |  movzx RA, PC_RA
@@ -4384,7 +3708,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   /* -- Binary ops -------------------------------------------------------- */
 
-    |.macro ins_arithpre, x87ins, sseins, ssereg
+    |.macro ins_arithpre, sseins, ssereg
     |  ins_ABC
     ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     ||switch (vk) {
@@ -4393,37 +3717,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   .if DUALNUM
     |     cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
     |   .endif
-    |   .if SSE
-    |     movsd xmm0, qword [BASE+RB*8]
-    |     sseins ssereg, qword [KBASE+RC*8]
-    |   .else
-    |     fld qword [BASE+RB*8]
-    |     x87ins qword [KBASE+RC*8]
-    |   .endif
+    |   movsd xmm0, qword [BASE+RB*8]
+    |   sseins ssereg, qword [KBASE+RC*8]
     ||  break;
     ||case 1:
     |   checknum RB, ->vmeta_arith_nv
     |   .if DUALNUM
     |     cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
     |   .endif
-    |   .if SSE
-    |     movsd xmm0, qword [KBASE+RC*8]
-    |     sseins ssereg, qword [BASE+RB*8]
-    |   .else
-    |     fld qword [KBASE+RC*8]
-    |     x87ins qword [BASE+RB*8]
-    |   .endif
+    |   movsd xmm0, qword [KBASE+RC*8]
+    |   sseins ssereg, qword [BASE+RB*8]
     ||  break;
     ||default:
     |   checknum RB, ->vmeta_arith_vv
     |   checknum RC, ->vmeta_arith_vv
-    |   .if SSE
-    |     movsd xmm0, qword [BASE+RB*8]
-    |     sseins ssereg, qword [BASE+RC*8]
-    |   .else
-    |     fld qword [BASE+RB*8]
-    |     x87ins qword [BASE+RC*8]
-    |   .endif
+    |   movsd xmm0, qword [BASE+RB*8]
+    |   sseins ssereg, qword [BASE+RC*8]
     ||  break;
     ||}
     |.endmacro
@@ -4461,55 +3770,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |.endmacro
     |
     |.macro ins_arithpost
-    |.if SSE
     |  movsd qword [BASE+RA*8], xmm0
-    |.else
-    |  fstp qword [BASE+RA*8]
-    |.endif
     |.endmacro
     |
-    |.macro ins_arith, x87ins, sseins
-    |  ins_arithpre x87ins, sseins, xmm0
+    |.macro ins_arith, sseins
+    |  ins_arithpre sseins, xmm0
     |  ins_arithpost
     |  ins_next
     |.endmacro
     |
-    |.macro ins_arith, intins, x87ins, sseins
+    |.macro ins_arith, intins, sseins
     |.if DUALNUM
     |  ins_arithdn intins
     |.else
-    |  ins_arith, x87ins, sseins
+    |  ins_arith, sseins
     |.endif
     |.endmacro
 
     |  // RA = dst, RB = src1 or num const, RC = src2 or num const
   case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-    |  ins_arith add, fadd, addsd
+    |  ins_arith add, addsd
     break;
   case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-    |  ins_arith sub, fsub, subsd
+    |  ins_arith sub, subsd
     break;
   case BC_MULVN: case BC_MULNV: case BC_MULVV:
-    |  ins_arith imul, fmul, mulsd
+    |  ins_arith imul, mulsd
     break;
   case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-    |  ins_arith fdiv, divsd
+    |  ins_arith divsd
     break;
   case BC_MODVN:
-    |  ins_arithpre fld, movsd, xmm1
+    |  ins_arithpre movsd, xmm1
     |->BC_MODVN_Z:
     |  call ->vm_mod
     |  ins_arithpost
     |  ins_next
     break;
   case BC_MODNV: case BC_MODVV:
-    |  ins_arithpre fld, movsd, xmm1
+    |  ins_arithpre movsd, xmm1
     |  jmp ->BC_MODVN_Z			// Avoid 3 copies. It's slow anyway.
     break;
   case BC_POW:
-    |  ins_arithpre fld, movsd, xmm1
-    |  call ->vm_pow
+    |  ins_arithpre movsd, xmm1
+    |  mov RB, BASE
+    |.if not X64
+    |  movsd FPARG1, xmm0
+    |  movsd FPARG3, xmm1
+    |.endif
+    |  call extern pow
+    |  movzx RA, PC_RA
+    |  mov BASE, RB
+    |.if X64
     |  ins_arithpost
+    |.else
+    |  fstp qword [BASE+RA*8]
+    |.endif
     |  ins_next
     break;
 
@@ -4577,25 +3893,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  movsx RD, RDW
     |  mov dword [BASE+RA*8+4], LJ_TISNUM
     |  mov dword [BASE+RA*8], RD
-    |.elif SSE
+    |.else
     |  movsx RD, RDW			// Sign-extend literal.
     |  cvtsi2sd xmm0, RD
     |  movsd qword [BASE+RA*8], xmm0
-    |.else
-    |  fild PC_RD			// Refetch signed RD from instruction.
-    |  fstp qword [BASE+RA*8]
     |.endif
     |  ins_next
     break;
   case BC_KNUM:
     |  ins_AD	// RA = dst, RD = num const
-    |.if SSE
     |  movsd xmm0, qword [KBASE+RD*8]
     |  movsd qword [BASE+RA*8], xmm0
-    |.else
-    |  fld qword [KBASE+RD*8]
-    |  fstp qword [BASE+RA*8]
-    |.endif
     |  ins_next
     break;
   case BC_KPRI:
@@ -4702,18 +4010,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   case BC_USETN:
     |  ins_AD	// RA = upvalue #, RD = num const
     |  mov LFUNC:RB, [BASE-8]
-    |.if SSE
     |  movsd xmm0, qword [KBASE+RD*8]
-    |.else
-    |  fld qword [KBASE+RD*8]
-    |.endif
     |  mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
     |  mov RA, UPVAL:RB->v
-    |.if SSE
     |  movsd qword [RA], xmm0
-    |.else
-    |  fstp qword [RA]
-    |.endif
     |  ins_next
     break;
   case BC_USETP:
@@ -4867,18 +4167,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |.else
     |  // Convert number to int and back and compare.
     |  checknum RC, >5
-    |.if SSE
     |  movsd xmm0, qword [BASE+RC*8]
-    |  cvtsd2si RC, xmm0
+    |  cvttsd2si RC, xmm0
     |  cvtsi2sd xmm1, RC
     |  ucomisd xmm0, xmm1
-    |.else
-    |  fld qword [BASE+RC*8]
-    |  fist ARG1
-    |  fild ARG1
-    |  fcomparepp
-    |  mov RC, ARG1
-    |.endif
     |  jne ->vmeta_tgetv		// Generic numeric key? Use fallback.
     |.endif
     |  cmp RC, TAB:RB->asize	// Takes care of unordered, too.
@@ -5002,6 +4294,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov dword [BASE+RA*8+4], LJ_TNIL
     |  jmp <1
     break;
+  case BC_TGETR:
+    |  ins_ABC	// RA = dst, RB = table, RC = key
+    |  mov TAB:RB, [BASE+RB*8]
+    |.if DUALNUM
+    |  mov RC, dword [BASE+RC*8]
+    |.else
+    |  cvttsd2si RC, qword [BASE+RC*8]
+    |.endif
+    |  cmp RC, TAB:RB->asize
+    |  jae ->vmeta_tgetr		// Not in array part? Use fallback.
+    |  shl RC, 3
+    |  add RC, TAB:RB->array
+    |  // Get array slot.
+    |->BC_TGETR_Z:
+    |.if X64
+    |  mov RBa, [RC]
+    |  mov [BASE+RA*8], RBa
+    |.else
+    |  mov RB, [RC]
+    |  mov RC, [RC+4]
+    |  mov [BASE+RA*8], RB
+    |  mov [BASE+RA*8+4], RC
+    |.endif
+    |->BC_TGETR2_Z:
+    |  ins_next
+    break;
 
   case BC_TSETV:
     |  ins_ABC	// RA = src, RB = table, RC = key
@@ -5015,18 +4333,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |.else
     |  // Convert number to int and back and compare.
     |  checknum RC, >5
-    |.if SSE
     |  movsd xmm0, qword [BASE+RC*8]
-    |  cvtsd2si RC, xmm0
+    |  cvttsd2si RC, xmm0
     |  cvtsi2sd xmm1, RC
     |  ucomisd xmm0, xmm1
-    |.else
-    |  fld qword [BASE+RC*8]
-    |  fist ARG1
-    |  fild ARG1
-    |  fcomparepp
-    |  mov RC, ARG1
-    |.endif
     |  jne ->vmeta_tsetv		// Generic numeric key? Use fallback.
     |.endif
     |  cmp RC, TAB:RB->asize		// Takes care of unordered, too.
@@ -5196,6 +4506,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  movzx RA, PC_RA			// Restore RA.
     |  jmp <2
     break;
+  case BC_TSETR:
+    |  ins_ABC	// RA = src, RB = table, RC = key
+    |  mov TAB:RB, [BASE+RB*8]
+    |.if DUALNUM
+    |  mov RC, dword [BASE+RC*8]
+    |.else
+    |  cvttsd2si RC, qword [BASE+RC*8]
+    |.endif
+    |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
+    |  jnz >7
+    |2:
+    |  cmp RC, TAB:RB->asize
+    |  jae ->vmeta_tsetr
+    |  shl RC, 3
+    |  add RC, TAB:RB->array
+    |  // Set array slot.
+    |->BC_TSETR_Z:
+    |.if X64
+    |  mov RBa, [BASE+RA*8]
+    |  mov [RC], RBa
+    |.else
+    |  mov RB, [BASE+RA*8+4]
+    |  mov RA, [BASE+RA*8]
+    |  mov [RC+4], RB
+    |  mov [RC], RA
+    |.endif
+    |  ins_next
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, RA
+    |  movzx RA, PC_RA			// Restore RA.
+    |  jmp <2
+    break;
 
   case BC_TSETM:
     |  ins_AD	// RA = base (table at base-1), RD = num const (start index)
@@ -5389,10 +4732,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |.if DUALNUM
     |  mov dword [BASE+RA*8+4], LJ_TISNUM
     |  mov dword [BASE+RA*8], RC
-    |.elif SSE
-    |  cvtsi2sd xmm0, RC
     |.else
-    |  fild dword [BASE+RA*8-8]
+    |  cvtsi2sd xmm0, RC
     |.endif
     |  // Copy array slot to returned value.
     |.if X64
@@ -5408,10 +4749,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // Return array index as a numeric key.
     |.if DUALNUM
     |  // See above.
-    |.elif SSE
-    |  movsd qword [BASE+RA*8], xmm0
     |.else
-    |  fstp qword [BASE+RA*8]
+    |  movsd qword [BASE+RA*8], xmm0
     |.endif
     |  mov [BASE+RA*8-8], RC		// Update control var.
     |2:
@@ -5424,9 +4763,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |
     |4:  // Skip holes in array part.
     |  add RC, 1
-    |.if not (DUALNUM or SSE)
-    |  mov [BASE+RA*8-8], RC
-    |.endif
     |  jmp <1
     |
     |5:  // Traverse hash part.
@@ -5760,7 +5096,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     if (!vk) {
       |  cmp RB, LJ_TISNUM; jae ->vmeta_for
     }
-    |.if SSE
     |  movsd xmm0, qword FOR_IDX
     |  movsd xmm1, qword FOR_STOP
     if (vk) {
@@ -5773,22 +5108,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ucomisd xmm1, xmm0
     |1:
     |  movsd qword FOR_EXT, xmm0
-    |.else
-    |  fld qword FOR_STOP
-    |  fld qword FOR_IDX
-    if (vk) {
-      |  fadd qword FOR_STEP		// nidx = idx + step
-      |  fst qword FOR_IDX
-      |  fst qword FOR_EXT
-      |  test RB, RB; js >1
-    } else {
-      |  fst qword FOR_EXT
-      |  jl >1
-    }
-    |  fxch				// Swap lim/(n)idx if step non-negative.
-    |1:
-    |  fcomparepp
-    |.endif
     if (op == BC_FORI) {
       |.if DUALNUM
       |  jnb <7
@@ -5816,11 +5135,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |2:
     |  ins_next
     |.endif
-    |.if SSE
+    |
     |3:  // Invert comparison if step is negative.
     |  ucomisd xmm0, xmm1
     |  jmp <1
-    |.endif
     break;
 
   case BC_ITERL:
@@ -5858,7 +5176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_A	// RA = base, RD = target (loop extent)
     |  // Note: RA/RD is only used by trace recorder to determine scope/extent
     |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
-  |.if JIT
+    |.if JIT
     |  hotloop RB
     |.endif
     | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5877,7 +5195,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov RDa, TRACE:RD->mcode
     |  mov L:RB, SAVE_L
     |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
-    |  mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
+    |  mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
     |  // Save additional callee-save registers only used in compiled code.
     |.if X64WIN
     |  mov TMPQ, r12
@@ -6044,9 +5362,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  // (lua_State *L, lua_CFunction f)
       |  call aword [DISPATCH+DISPATCH_GL(wrapf)]
     }
-    |  set_vmstate INTERP
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
+    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+    |  set_vmstate INTERP
     |  lea RA, [BASE+RD*8]
     |  neg RA
     |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
@@ -6119,6 +5438,10 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
 	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
 	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
+#if LJ_NO_UNWIND
+	"\t.byte 0x8d\n\t.uleb128 0x6\n"	/* offset r13 */
+	"\t.byte 0x8c\n\t.uleb128 0x7\n"	/* offset r12 */
+#endif
 #else
 	"\t.long .Lbegin\n"
 	"\t.long %d\n"
@@ -6154,6 +5477,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.align " SZPTR "\n"
 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
 #endif
+#if !LJ_NO_UNWIND
 #if (defined(__sun__) && defined(__svr4__))
 #if LJ_64
     fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
@@ -6241,8 +5565,10 @@ static void emit_asm_debug(BuildCtx *ctx)
 #endif
 	"\t.align " SZPTR "\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
 #endif
     break;
+#if !LJ_NO_UNWIND
   /* Mental note: never let Apple design an assembler.
   ** Or a linker. Or a plastic case. But I digress.
   */
@@ -6359,17 +5685,24 @@ static void emit_asm_debug(BuildCtx *ctx)
 	  "LEFDEY:\n\n", fcsize);
     }
 #endif
-#if LJ_64
-    fprintf(ctx->fp, "\t.subsections_via_symbols\n");
-#else
+#if !LJ_64
     fprintf(ctx->fp,
       "\t.non_lazy_symbol_pointer\n"
       "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
       ".indirect_symbol _lj_err_unwind_dwarf\n"
-      ".long 0\n");
+      ".long 0\n\n");
+    fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
+    {
+      const char *const *xn;
+      for (xn = ctx->extnames; *xn; xn++)
+	if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
+	  fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
+    }
 #endif
+    fprintf(ctx->fp, ".subsections_via_symbols\n");
     }
     break;
+#endif
   default:  /* Difficult for other modes. */
     break;
   }
diff --git a/source/libs/luajit/LuaJIT-2.1.0-beta1/src/xb1build.bat b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/xb1build.bat
new file mode 100644
index 0000000000000000000000000000000000000000..847e84a555778ad59c4ec6156c5b0f2a5c20a79d
--- /dev/null
+++ b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/xb1build.bat
@@ -0,0 +1,101 @@
+@rem Script to build LuaJIT with the Xbox One SDK.
+@rem Donated to the public domain.
+@rem
+@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem Then cd to this directory and run this script.
+
+@if not defined INCLUDE goto :FAIL
+@if not defined DurangoXDK goto :FAIL
+
+@setlocal
+@echo ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /DLUAJIT_ENABLE_GC64
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+  %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Error out for 64 bit host compiler
+@minilua
+@if not errorlevel 8 goto :FAIL
+
+@set DASMFLAGS=-D WIN -D FFI -D P64
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc
+@if errorlevel 1 goto :BAD
+
+%LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+  %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m peobj -o lj_vm.obj
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@echo ---- Cross compiler ----
+
+@set CWD=%cd%
+@call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK
+@cd /D "%CWD%"
+@shift
+
+@set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO
+@set LJLIB="lib" /nologo
+
+@if "%1"=="debug" (
+  @shift
+  @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od
+  @set LJLINK=%LJLINK% /debug 
+) else (
+  @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG
+)
+
+@if "%1"=="amalg" goto :AMALG
+%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
+@if errorlevel 1 goto :BAD
+%LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for Xbox One ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Visual Studio .NET Command Prompt"
+@echo (64 bit host compiler). The Xbox One SDK must be installed, too.
+:END
diff --git a/source/libs/luajit/LuaJIT-2.0.4/src/xedkbuild.bat b/source/libs/luajit/LuaJIT-2.1.0-beta1/src/xedkbuild.bat
similarity index 100%
rename from source/libs/luajit/LuaJIT-2.0.4/src/xedkbuild.bat
rename to source/libs/luajit/LuaJIT-2.1.0-beta1/src/xedkbuild.bat
diff --git a/source/libs/luajit/Makefile.am b/source/libs/luajit/Makefile.am
index f3c2889764bd6f34af86afec850328c576a86329..febddb72941abe434242c4a8b2273d76fb9dc0a5 100644
--- a/source/libs/luajit/Makefile.am
+++ b/source/libs/luajit/Makefile.am
@@ -17,13 +17,14 @@ include $(srcdir)/../../am/dist_hook.am
 NEVER_NAMES += $(NEVER_NAMES_SUB) $(NEVER_NAMES_LT)
 
 AM_CPPFLAGS = -I$(srcdir)/$(LUAJIT_TREE)/src $(LUAJIT_DEFINES)
-AM_CFLAGS = -Wall
+AM_CFLAGS = $(LUAJIT_CFLAGS) -Wall
+AM_CCASFLAGS = $(LUAJIT_CFLAGS)
 
 SUBDIRS = . native include
 
 lib_LTLIBRARIES = libtexluajit.la
 
-libtexluajit_la_LDFLAGS = -bindir @bindir@ -no-undefined -version-info $(LUAJIT_LT_VERSINFO)
+libtexluajit_la_LDFLAGS = -bindir @bindir@ -no-undefined -version-info $(LUAJIT_LT_VERSINFO) $(LIBLUAJIT_LDEXTRA)
 
 dist_libtexluajit_la_SOURCES = \
 	lbitlib.c
@@ -38,6 +39,7 @@ nodist_libtexluajit_la_SOURCES = \
 	@LUAJIT_TREE@/src/lj_bc.c \
 	@LUAJIT_TREE@/src/lj_bcread.c \
 	@LUAJIT_TREE@/src/lj_bcwrite.c \
+	@LUAJIT_TREE@/src/lj_buf.c \
 	@LUAJIT_TREE@/src/lj_carith.c \
 	@LUAJIT_TREE@/src/lj_ccall.c \
 	@LUAJIT_TREE@/src/lj_ccallback.c \
@@ -70,10 +72,12 @@ nodist_libtexluajit_la_SOURCES = \
 	@LUAJIT_TREE@/src/lj_opt_sink.c \
 	@LUAJIT_TREE@/src/lj_opt_split.c \
 	@LUAJIT_TREE@/src/lj_parse.c \
+	@LUAJIT_TREE@/src/lj_profile.c \
 	@LUAJIT_TREE@/src/lj_record.c \
 	@LUAJIT_TREE@/src/lj_snap.c \
 	@LUAJIT_TREE@/src/lj_state.c \
 	@LUAJIT_TREE@/src/lj_str.c \
+	@LUAJIT_TREE@/src/lj_strfmt.c \
 	@LUAJIT_TREE@/src/lj_strscan.c \
 	@LUAJIT_TREE@/src/lj_tab.c \
 	@LUAJIT_TREE@/src/lj_trace.c \
@@ -84,7 +88,7 @@ nodist_libtexluajit_la_SOURCES = \
 if PEOBJ
 libtexluajit_la_LIBADD = lj_vm_obj.lo
 else !PEOBJ
-nodist_libtexluajit_la_SOURCES += lj_vm_asm.s
+nodist_libtexluajit_la_SOURCES += lj_vm_asm.S
 endif !PEOBJ
 
 ljlib_sources = \
@@ -104,7 +108,7 @@ $(libtexluajit_la_OBJECTS): $(HDRGEN)
 
 # When cross-compiling, EXEEXT for the build and host systems may differ.
 # Thus we depend on the auxiliary file native/buildvm-stamp.
-native/buildvm-stamp: $(ljlib_sources)
+native/buildvm-stamp: $(ljlib_sources) lj_opt_fold.c
 	cd native && $(MAKE) $(AM_MAKEFLAGS) buildvm-stamp
 
 HDRGEN = lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h
@@ -121,10 +125,10 @@ lj_libdef.h: native/buildvm-stamp
 lj_recdef.h: native/buildvm-stamp
 	$(AM_V_GEN)native/buildvm -m recdef -o $@ $(ljlib_sources)
 
-lj_folddef.h: native/buildvm-stamp $(LUAJIT_TREE)/src/lj_opt_fold.c
-	$(AM_V_GEN)native/buildvm -m folddef -o $@ $(srcdir)/$(LUAJIT_TREE)/src/lj_opt_fold.c
+lj_folddef.h: native/buildvm-stamp
+	$(AM_V_GEN)native/buildvm -m folddef -o $@ lj_opt_fold.c
 
-lj_vm_obj.o lj_vm_asm.s: native/buildvm-stamp
+lj_vm_obj.o lj_vm_asm.S: native/buildvm-stamp
 	$(AM_V_GEN)native/buildvm -m $(LJVM_MODE) -o $@
 
 lj_vm_obj.lo: lj_vm_obj.o lib_base.lo
@@ -132,13 +136,13 @@ lj_vm_obj.lo: lj_vm_obj.o lib_base.lo
 	  && . ./$@ && test x"$$pic_object" == xnone \
 	  || cp lj_vm_obj.o .libs/lj_vm_obj.o
 
-$(ljlib_sources): config.force
+$(ljlib_sources) lj_opt_fold.c: config.force
 	@test -f $@ || { rm -f $@; \
 	  if $(AM_V_P); then echo "$(LN_S) $(srcdir)/$(LUAJIT_TREE)/src/$@ $@"; \
 	    else echo "  LINK     $@"; fi; \
 	  $(LN_S) $(srcdir)/$(LUAJIT_TREE)/src/$@ $@; } || exit 1
 
-CLEANFILES = $(HDRGEN) $(ljlib_sources) lj_vm_asm.s
+CLEANFILES = $(HDRGEN) $(ljlib_sources) lj_opt_fold.c lj_vm_asm.S
 
 CONFIG_CLEAN_FILES = dynasm_flags native_flags
 
@@ -166,11 +170,11 @@ luajiterr.log: luajittry$(EXEEXT)
 
 jittest_SOURCES = jittest.c
 
-jittest_CPPFLAGS = -Iinclude
+jittest_CPPFLAGS = -Iinclude $(LUAJIT_DEFINES)
 
 nodist_luajittry_SOURCES = @LUAJIT_TREE@/src/luajit.c
 
-luajittry_CPPFLAGS = -Iinclude
+luajittry_CPPFLAGS = -Iinclude $(LUAJIT_DEFINES)
 luajittry_LDFLAGS = $(LUAJIT_LDEXTRA)
 
 LDADD = libtexluajit.la
diff --git a/source/libs/luajit/Makefile.in b/source/libs/luajit/Makefile.in
index c2a75bc5d902d8b290b9c5c067fdd909679f030b..cc7665b58121c2299c41805b1aea15c2dfe4b81c 100644
--- a/source/libs/luajit/Makefile.in
+++ b/source/libs/luajit/Makefile.in
@@ -90,7 +90,7 @@ PRE_UNINSTALL = :
 POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
-@PEOBJ_FALSE@am__append_1 = lj_vm_asm.s
+@PEOBJ_FALSE@am__append_1 = lj_vm_asm.S
 @build_TRUE@check_PROGRAMS = jittest$(EXEEXT) luajittry$(EXEEXT)
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -159,7 +159,8 @@ nodist_libtexluajit_la_OBJECTS = $(am__objects_1) \
 	@LUAJIT_TREE@/src/lj_alloc.lo @LUAJIT_TREE@/src/lj_api.lo \
 	@LUAJIT_TREE@/src/lj_asm.lo @LUAJIT_TREE@/src/lj_bc.lo \
 	@LUAJIT_TREE@/src/lj_bcread.lo @LUAJIT_TREE@/src/lj_bcwrite.lo \
-	@LUAJIT_TREE@/src/lj_carith.lo @LUAJIT_TREE@/src/lj_ccall.lo \
+	@LUAJIT_TREE@/src/lj_buf.lo @LUAJIT_TREE@/src/lj_carith.lo \
+	@LUAJIT_TREE@/src/lj_ccall.lo \
 	@LUAJIT_TREE@/src/lj_ccallback.lo \
 	@LUAJIT_TREE@/src/lj_cconv.lo @LUAJIT_TREE@/src/lj_cdata.lo \
 	@LUAJIT_TREE@/src/lj_char.lo @LUAJIT_TREE@/src/lj_clib.lo \
@@ -178,9 +179,10 @@ nodist_libtexluajit_la_OBJECTS = $(am__objects_1) \
 	@LUAJIT_TREE@/src/lj_opt_narrow.lo \
 	@LUAJIT_TREE@/src/lj_opt_sink.lo \
 	@LUAJIT_TREE@/src/lj_opt_split.lo \
-	@LUAJIT_TREE@/src/lj_parse.lo @LUAJIT_TREE@/src/lj_record.lo \
-	@LUAJIT_TREE@/src/lj_snap.lo @LUAJIT_TREE@/src/lj_state.lo \
-	@LUAJIT_TREE@/src/lj_str.lo @LUAJIT_TREE@/src/lj_strscan.lo \
+	@LUAJIT_TREE@/src/lj_parse.lo @LUAJIT_TREE@/src/lj_profile.lo \
+	@LUAJIT_TREE@/src/lj_record.lo @LUAJIT_TREE@/src/lj_snap.lo \
+	@LUAJIT_TREE@/src/lj_state.lo @LUAJIT_TREE@/src/lj_str.lo \
+	@LUAJIT_TREE@/src/lj_strfmt.lo @LUAJIT_TREE@/src/lj_strscan.lo \
 	@LUAJIT_TREE@/src/lj_tab.lo @LUAJIT_TREE@/src/lj_trace.lo \
 	@LUAJIT_TREE@/src/lj_udata.lo @LUAJIT_TREE@/src/lj_vmevent.lo \
 	@LUAJIT_TREE@/src/lj_vmmath.lo $(am__objects_2)
@@ -223,6 +225,16 @@ DEFAULT_INCLUDES = -I.@am__isrc@
 depcomp = $(SHELL) $(top_srcdir)/../../build-aux/depcomp
 am__depfiles_maybe = depfiles
 am__mv = mv -f
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CCASFLAGS) $(CCASFLAGS)
+AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
+am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
+am__v_CPPAS_0 = @echo "  CPPAS   " $@;
+am__v_CPPAS_1 = 
 COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
 	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
 LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -241,14 +253,6 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@)
 am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
 am__v_CCLD_0 = @echo "  CCLD    " $@;
 am__v_CCLD_1 = 
-CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
-LTCCASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
-	$(LIBTOOLFLAGS) --mode=compile $(CCAS) $(AM_CCASFLAGS) \
-	$(CCASFLAGS)
-AM_V_CCAS = $(am__v_CCAS_@AM_V@)
-am__v_CCAS_ = $(am__v_CCAS_@AM_DEFAULT_V@)
-am__v_CCAS_0 = @echo "  CCAS    " $@;
-am__v_CCAS_1 = 
 SOURCES = $(dist_libtexluajit_la_SOURCES) \
 	$(nodist_libtexluajit_la_SOURCES) $(jittest_SOURCES) \
 	$(nodist_luajittry_SOURCES)
@@ -569,6 +573,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
+LIBLUAJIT_LDEXTRA = @LIBLUAJIT_LDEXTRA@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@@ -580,6 +585,7 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
 LUAJITVERSION = @LUAJITVERSION@
+LUAJIT_CFLAGS = @LUAJIT_CFLAGS@
 LUAJIT_DEFINES = @LUAJIT_DEFINES@
 LUAJIT_LDEXTRA = @LUAJIT_LDEXTRA@
 LUAJIT_LT_VERSINFO = @LUAJIT_LT_VERSINFO@
@@ -670,10 +676,11 @@ NEVER_NAMES = -name .svn $(NEVER_NAMES_SUB) $(NEVER_NAMES_LT)
 NEVER_NAMES_SUB = -o -name .deps -o -name .dirstamp -o -name '*.$(OBJEXT)'
 NEVER_NAMES_LT = -o -name .libs -o -name '*.lo'
 AM_CPPFLAGS = -I$(srcdir)/$(LUAJIT_TREE)/src $(LUAJIT_DEFINES)
-AM_CFLAGS = -Wall
+AM_CFLAGS = $(LUAJIT_CFLAGS) -Wall
+AM_CCASFLAGS = $(LUAJIT_CFLAGS)
 SUBDIRS = . native include
 lib_LTLIBRARIES = libtexluajit.la
-libtexluajit_la_LDFLAGS = -bindir @bindir@ -no-undefined -version-info $(LUAJIT_LT_VERSINFO)
+libtexluajit_la_LDFLAGS = -bindir @bindir@ -no-undefined -version-info $(LUAJIT_LT_VERSINFO) $(LIBLUAJIT_LDEXTRA)
 dist_libtexluajit_la_SOURCES = \
 	lbitlib.c
 
@@ -682,25 +689,27 @@ nodist_libtexluajit_la_SOURCES = $(ljlib_sources) \
 	@LUAJIT_TREE@/src/lj_alloc.c @LUAJIT_TREE@/src/lj_api.c \
 	@LUAJIT_TREE@/src/lj_asm.c @LUAJIT_TREE@/src/lj_bc.c \
 	@LUAJIT_TREE@/src/lj_bcread.c @LUAJIT_TREE@/src/lj_bcwrite.c \
-	@LUAJIT_TREE@/src/lj_carith.c @LUAJIT_TREE@/src/lj_ccall.c \
-	@LUAJIT_TREE@/src/lj_ccallback.c @LUAJIT_TREE@/src/lj_cconv.c \
-	@LUAJIT_TREE@/src/lj_cdata.c @LUAJIT_TREE@/src/lj_char.c \
-	@LUAJIT_TREE@/src/lj_clib.c @LUAJIT_TREE@/src/lj_cparse.c \
-	@LUAJIT_TREE@/src/lj_crecord.c @LUAJIT_TREE@/src/lj_ctype.c \
-	@LUAJIT_TREE@/src/lj_debug.c @LUAJIT_TREE@/src/lj_dispatch.c \
-	@LUAJIT_TREE@/src/lj_err.c @LUAJIT_TREE@/src/lj_ffrecord.c \
-	@LUAJIT_TREE@/src/lj_func.c @LUAJIT_TREE@/src/lj_gc.c \
-	@LUAJIT_TREE@/src/lj_gdbjit.c @LUAJIT_TREE@/src/lj_ir.c \
-	@LUAJIT_TREE@/src/lj_lex.c @LUAJIT_TREE@/src/lj_lib.c \
-	@LUAJIT_TREE@/src/lj_load.c @LUAJIT_TREE@/src/lj_mcode.c \
-	@LUAJIT_TREE@/src/lj_meta.c @LUAJIT_TREE@/src/lj_obj.c \
-	@LUAJIT_TREE@/src/lj_opt_dce.c @LUAJIT_TREE@/src/lj_opt_fold.c \
+	@LUAJIT_TREE@/src/lj_buf.c @LUAJIT_TREE@/src/lj_carith.c \
+	@LUAJIT_TREE@/src/lj_ccall.c @LUAJIT_TREE@/src/lj_ccallback.c \
+	@LUAJIT_TREE@/src/lj_cconv.c @LUAJIT_TREE@/src/lj_cdata.c \
+	@LUAJIT_TREE@/src/lj_char.c @LUAJIT_TREE@/src/lj_clib.c \
+	@LUAJIT_TREE@/src/lj_cparse.c @LUAJIT_TREE@/src/lj_crecord.c \
+	@LUAJIT_TREE@/src/lj_ctype.c @LUAJIT_TREE@/src/lj_debug.c \
+	@LUAJIT_TREE@/src/lj_dispatch.c @LUAJIT_TREE@/src/lj_err.c \
+	@LUAJIT_TREE@/src/lj_ffrecord.c @LUAJIT_TREE@/src/lj_func.c \
+	@LUAJIT_TREE@/src/lj_gc.c @LUAJIT_TREE@/src/lj_gdbjit.c \
+	@LUAJIT_TREE@/src/lj_ir.c @LUAJIT_TREE@/src/lj_lex.c \
+	@LUAJIT_TREE@/src/lj_lib.c @LUAJIT_TREE@/src/lj_load.c \
+	@LUAJIT_TREE@/src/lj_mcode.c @LUAJIT_TREE@/src/lj_meta.c \
+	@LUAJIT_TREE@/src/lj_obj.c @LUAJIT_TREE@/src/lj_opt_dce.c \
+	@LUAJIT_TREE@/src/lj_opt_fold.c \
 	@LUAJIT_TREE@/src/lj_opt_loop.c @LUAJIT_TREE@/src/lj_opt_mem.c \
 	@LUAJIT_TREE@/src/lj_opt_narrow.c \
 	@LUAJIT_TREE@/src/lj_opt_sink.c \
 	@LUAJIT_TREE@/src/lj_opt_split.c @LUAJIT_TREE@/src/lj_parse.c \
-	@LUAJIT_TREE@/src/lj_record.c @LUAJIT_TREE@/src/lj_snap.c \
-	@LUAJIT_TREE@/src/lj_state.c @LUAJIT_TREE@/src/lj_str.c \
+	@LUAJIT_TREE@/src/lj_profile.c @LUAJIT_TREE@/src/lj_record.c \
+	@LUAJIT_TREE@/src/lj_snap.c @LUAJIT_TREE@/src/lj_state.c \
+	@LUAJIT_TREE@/src/lj_str.c @LUAJIT_TREE@/src/lj_strfmt.c \
 	@LUAJIT_TREE@/src/lj_strscan.c @LUAJIT_TREE@/src/lj_tab.c \
 	@LUAJIT_TREE@/src/lj_trace.c @LUAJIT_TREE@/src/lj_udata.c \
 	@LUAJIT_TREE@/src/lj_vmevent.c @LUAJIT_TREE@/src/lj_vmmath.c \
@@ -720,8 +729,8 @@ ljlib_sources = \
 	lib_ffi.c
 
 HDRGEN = lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h
-CLEANFILES = $(HDRGEN) $(ljlib_sources) lj_vm_asm.s jiterr.out \
-	jiterr.tmp rebuild.stamp
+CLEANFILES = $(HDRGEN) $(ljlib_sources) lj_opt_fold.c lj_vm_asm.S \
+	jiterr.out jiterr.tmp rebuild.stamp
 CONFIG_CLEAN_FILES = dynasm_flags native_flags
 luajitincludedir = ${includedir}/texluajit
 luajitinclude_HEADERS = \
@@ -737,9 +746,9 @@ pkgconfig_DATA = texluajit.pc
 @build_TRUE@dist_check_SCRIPTS = luajit.test luajiterr.test
 @build_TRUE@TESTS = luajit.test luajiterr.test
 jittest_SOURCES = jittest.c
-jittest_CPPFLAGS = -Iinclude
+jittest_CPPFLAGS = -Iinclude $(LUAJIT_DEFINES)
 nodist_luajittry_SOURCES = @LUAJIT_TREE@/src/luajit.c
-luajittry_CPPFLAGS = -Iinclude
+luajittry_CPPFLAGS = -Iinclude $(LUAJIT_DEFINES)
 luajittry_LDFLAGS = $(LUAJIT_LDEXTRA)
 LDADD = libtexluajit.la
 
@@ -754,7 +763,7 @@ all: config.h
 	$(MAKE) $(AM_MAKEFLAGS) all-recursive
 
 .SUFFIXES:
-.SUFFIXES: .c .lo .log .o .obj .s .test .test$(EXEEXT) .trs
+.SUFFIXES: .S .c .lo .log .o .obj .test .test$(EXEEXT) .trs
 am--refresh: Makefile
 	@:
 $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/../../am/dist_hook.am $(srcdir)/../../am/reconfig.am $(srcdir)/../../am/rebuild.am $(am__configure_deps)
@@ -863,6 +872,8 @@ clean-libLTLIBRARIES:
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_bcwrite.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
+@LUAJIT_TREE@/src/lj_buf.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
+	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_carith.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_ccall.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
@@ -927,6 +938,8 @@ clean-libLTLIBRARIES:
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_parse.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
+@LUAJIT_TREE@/src/lj_profile.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
+	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_record.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_snap.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
@@ -935,6 +948,8 @@ clean-libLTLIBRARIES:
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_str.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
+@LUAJIT_TREE@/src/lj_strfmt.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
+	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_strscan.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
 	@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp)
 @LUAJIT_TREE@/src/lj_tab.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \
@@ -992,6 +1007,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_package.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_string.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_table.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lj_vm_asm.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_aux.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_init.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_alloc.Plo@am__quote@
@@ -1000,6 +1016,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_bc.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_bcread.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_bcwrite.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_buf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_carith.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_ccall.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_ccallback.Plo@am__quote@
@@ -1032,10 +1049,12 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_sink.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_split.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_parse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_profile.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_record.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_snap.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_state.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_str.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_strfmt.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_strscan.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_tab.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_trace.Plo@am__quote@
@@ -1044,6 +1063,30 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_vmmath.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/luajittry-luajit.Po@am__quote@
 
+.S.o:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCCAS_TRUE@	$(CPPASCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCCAS_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCCAS_TRUE@	$(CPPASCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCCAS_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCCAS_TRUE@	$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCCAS_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
+
 .c.o:
 @am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
 @am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
@@ -1096,15 +1139,6 @@ jittest-jittest.obj: jittest.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(luajittry_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o @LUAJIT_TREE@/src/luajittry-luajit.obj `if test -f '@LUAJIT_TREE@/src/luajit.c'; then $(CYGPATH_W) '@LUAJIT_TREE@/src/luajit.c'; else $(CYGPATH_W) '$(srcdir)/@LUAJIT_TREE@/src/luajit.c'; fi`
 
-.s.o:
-	$(AM_V_CCAS)$(CCASCOMPILE) -c -o $@ $<
-
-.s.obj:
-	$(AM_V_CCAS)$(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-.s.lo:
-	$(AM_V_CCAS)$(LTCCASCOMPILE) -c -o $@ $<
-
 mostlyclean-libtool:
 	-rm -f *.lo
 
@@ -1766,7 +1800,7 @@ $(libtexluajit_la_OBJECTS): $(HDRGEN)
 
 # When cross-compiling, EXEEXT for the build and host systems may differ.
 # Thus we depend on the auxiliary file native/buildvm-stamp.
-native/buildvm-stamp: $(ljlib_sources)
+native/buildvm-stamp: $(ljlib_sources) lj_opt_fold.c
 	cd native && $(MAKE) $(AM_MAKEFLAGS) buildvm-stamp
 
 lj_bcdef.h: native/buildvm-stamp
@@ -1781,10 +1815,10 @@ lj_libdef.h: native/buildvm-stamp
 lj_recdef.h: native/buildvm-stamp
 	$(AM_V_GEN)native/buildvm -m recdef -o $@ $(ljlib_sources)
 
-lj_folddef.h: native/buildvm-stamp $(LUAJIT_TREE)/src/lj_opt_fold.c
-	$(AM_V_GEN)native/buildvm -m folddef -o $@ $(srcdir)/$(LUAJIT_TREE)/src/lj_opt_fold.c
+lj_folddef.h: native/buildvm-stamp
+	$(AM_V_GEN)native/buildvm -m folddef -o $@ lj_opt_fold.c
 
-lj_vm_obj.o lj_vm_asm.s: native/buildvm-stamp
+lj_vm_obj.o lj_vm_asm.S: native/buildvm-stamp
 	$(AM_V_GEN)native/buildvm -m $(LJVM_MODE) -o $@
 
 lj_vm_obj.lo: lj_vm_obj.o lib_base.lo
@@ -1792,7 +1826,7 @@ lj_vm_obj.lo: lj_vm_obj.o lib_base.lo
 	  && . ./$@ && test x"$$pic_object" == xnone \
 	  || cp lj_vm_obj.o .libs/lj_vm_obj.o
 
-$(ljlib_sources): config.force
+$(ljlib_sources) lj_opt_fold.c: config.force
 	@test -f $@ || { rm -f $@; \
 	  if $(AM_V_P); then echo "$(LN_S) $(srcdir)/$(LUAJIT_TREE)/src/$@ $@"; \
 	    else echo "  LINK     $@"; fi; \
diff --git a/source/libs/luajit/configure b/source/libs/luajit/configure
index 039ddf2e84c2357626aaaa706357fabc7ef07a9b..ecf9dcba9457247d5433f11189ce20c72928446a 100755
--- a/source/libs/luajit/configure
+++ b/source/libs/luajit/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for luajit for TeX Live 2.0.4.
+# Generated by GNU Autoconf 2.69 for luajit for TeX Live 2.1.0-beta1.
 #
 # Report bugs to <tex-k@tug.org>.
 #
@@ -590,12 +590,12 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='luajit for TeX Live'
 PACKAGE_TARNAME='luajit-for-tex-live'
-PACKAGE_VERSION='2.0.4'
-PACKAGE_STRING='luajit for TeX Live 2.0.4'
+PACKAGE_VERSION='2.1.0-beta1'
+PACKAGE_STRING='luajit for TeX Live 2.1.0-beta1'
 PACKAGE_BUGREPORT='tex-k@tug.org'
 PACKAGE_URL=''
 
-ac_unique_file="LuaJIT-2.0.4/src/luajit.h"
+ac_unique_file="LuaJIT-2.1.0-beta1/src/luajit.h"
 # Factoring default headers for most tests.
 ac_includes_default="\
 #include <stdio.h>
@@ -641,8 +641,10 @@ subdirs
 LUAJIT_TREE
 build_FALSE
 build_TRUE
+LIBLUAJIT_LDEXTRA
 LJVM_MODE
 LJHOST
+LUAJIT_CFLAGS
 PEOBJ_FALSE
 PEOBJ_TRUE
 DASM_ARCH
@@ -1343,7 +1345,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures luajit for TeX Live 2.0.4 to adapt to many kinds of systems.
+\`configure' configures luajit for TeX Live 2.1.0-beta1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1414,7 +1416,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of luajit for TeX Live 2.0.4:";;
+     short | recursive ) echo "Configuration of luajit for TeX Live 2.1.0-beta1:";;
    esac
   cat <<\_ACEOF
 
@@ -1533,7 +1535,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-luajit for TeX Live configure 2.0.4
+luajit for TeX Live configure 2.1.0-beta1
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1994,7 +1996,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by luajit for TeX Live $as_me 2.0.4, which was
+It was created by luajit for TeX Live $as_me 2.1.0-beta1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3917,7 +3919,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='luajit-for-tex-live'
- VERSION='2.0.4'
+ VERSION='2.1.0-beta1'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -4921,6 +4923,183 @@ else
 fi
 
 
+   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5
+$as_echo_n "checking for $CC option to accept ISO C99... " >&6; }
+if ${ac_cv_prog_cc_c99+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c99=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <stdio.h>
+
+// Check varargs macros.  These examples are taken from C99 6.10.3.5.
+#define debug(...) fprintf (stderr, __VA_ARGS__)
+#define showlist(...) puts (#__VA_ARGS__)
+#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__))
+static void
+test_varargs_macros (void)
+{
+  int x = 1234;
+  int y = 5678;
+  debug ("Flag");
+  debug ("X = %d\n", x);
+  showlist (The first, second, and third items.);
+  report (x>y, "x is %d but y is %d", x, y);
+}
+
+// Check long long types.
+#define BIG64 18446744073709551615ull
+#define BIG32 4294967295ul
+#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0)
+#if !BIG_OK
+  your preprocessor is broken;
+#endif
+#if BIG_OK
+#else
+  your preprocessor is broken;
+#endif
+static long long int bignum = -9223372036854775807LL;
+static unsigned long long int ubignum = BIG64;
+
+struct incomplete_array
+{
+  int datasize;
+  double data[];
+};
+
+struct named_init {
+  int number;
+  const wchar_t *name;
+  double average;
+};
+
+typedef const char *ccp;
+
+static inline int
+test_restrict (ccp restrict text)
+{
+  // See if C++-style comments work.
+  // Iterate through items via the restricted pointer.
+  // Also check for declarations in for loops.
+  for (unsigned int i = 0; *(text+i) != '\0'; ++i)
+    continue;
+  return 0;
+}
+
+// Check varargs and va_copy.
+static void
+test_varargs (const char *format, ...)
+{
+  va_list args;
+  va_start (args, format);
+  va_list args_copy;
+  va_copy (args_copy, args);
+
+  const char *str;
+  int number;
+  float fnumber;
+
+  while (*format)
+    {
+      switch (*format++)
+	{
+	case 's': // string
+	  str = va_arg (args_copy, const char *);
+	  break;
+	case 'd': // int
+	  number = va_arg (args_copy, int);
+	  break;
+	case 'f': // float
+	  fnumber = va_arg (args_copy, double);
+	  break;
+	default:
+	  break;
+	}
+    }
+  va_end (args_copy);
+  va_end (args);
+}
+
+int
+main ()
+{
+
+  // Check bool.
+  _Bool success = false;
+
+  // Check restrict.
+  if (test_restrict ("String literal") == 0)
+    success = true;
+  char *restrict newvar = "Another string";
+
+  // Check varargs.
+  test_varargs ("s, d' f .", "string", 65, 34.234);
+  test_varargs_macros ();
+
+  // Check flexible array members.
+  struct incomplete_array *ia =
+    malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10));
+  ia->datasize = 10;
+  for (int i = 0; i < ia->datasize; ++i)
+    ia->data[i] = i * 1.234;
+
+  // Check named initializers.
+  struct named_init ni = {
+    .number = 34,
+    .name = L"Test wide string",
+    .average = 543.34343,
+  };
+
+  ni.number = 58;
+
+  int dynamic_array[ni.number];
+  dynamic_array[ni.number - 1] = 543;
+
+  // work around unused variable warnings
+  return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x'
+	  || dynamic_array[ni.number - 1] != 543);
+
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_prog_cc_c99=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c99" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c99" in
+  x)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c99"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5
+$as_echo "$ac_cv_prog_cc_c99" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c99" != xno; then :
+
+fi
+
+
 # By default we simply use the C compiler to build assembly code.
 
 test "${CCAS+set}" = set || CCAS=$CC
@@ -5158,10 +5337,10 @@ $as_echo "no, using $LN_S" >&6; }
 fi
 
 
-LUAJITVERSION=2.0.4
+LUAJITVERSION=2.1.0-beta1
 
 
-LUAJIT_LT_VERSINFO=2:4:0
+LUAJIT_LT_VERSINFO=3:0:1
 
 
 case `pwd` in
@@ -13955,7 +14134,7 @@ case $host_os in #(
     LJHOST='Other' ;;
 esac
 lj_save_CPPFLAGS=$CPPFLAGS
-CPPFLAGS="$CPPFLAGS -I$srcdir/LuaJIT-2.0.4/src $LUAJIT_DEFINES -dM"
+CPPFLAGS="$CPPFLAGS -I$srcdir/LuaJIT-2.1.0-beta1/src $LUAJIT_DEFINES -dM"
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <lj_arch.h>
@@ -13968,17 +14147,22 @@ if grep 'LJ_TARGET_X64 ' conftest.i >/dev/null 2>&1; then :
   LJARCH=x64
 elif grep 'LJ_TARGET_X86 ' conftest.i >/dev/null 2>&1; then :
   LJARCH=x86
-         LUAJIT_DEFINES="$LUAJIT_DEFINES -march=i686"
+         LUAJIT_CFLAGS='-march=i686 -msse -msse2 -mfpmath=sse'
 elif grep 'LJ_TARGET_ARM ' conftest.i >/dev/null 2>&1; then :
   LJARCH=arm
+elif grep 'LJ_TARGET_ARM64 ' conftest.i >/dev/null 2>&1; then :
+  LJARCH=arm64
 elif grep 'LJ_TARGET_PPC ' conftest.i >/dev/null 2>&1; then :
   LJARCH=ppc
-elif grep 'LJ_TARGET_PPCSPE ' conftest.i >/dev/null 2>&1; then :
-  LJARCH=ppcspe
+         if grep 'LJ_LE 1' conftest.i >/dev/null 2>&1; then :
+  echo '-DLJ_ARCH_ENDIAN=LUAJIT_LE' >>native_flags
+else
+  echo '-DLJ_ARCH_ENDIAN=LUAJIT_BE' >>native_flags
+fi
 elif grep 'LJ_TARGET_MIPS ' conftest.i >/dev/null 2>&1; then :
   LJARCH=mips
          if grep 'MIPSEL ' conftest.i >/dev/null 2>&1; then :
-  echo '-D__MPISEL__=1' >>native_flags
+  echo '-D__MIPSEL__=1' >>native_flags
 fi
 else
   as_fn_error $? "Sorry, unsupported architecture" "$LINENO" 5
@@ -13988,8 +14172,9 @@ if grep 'LJ_TARGET_PS3 1'conftest.i >/dev/null 2>&1; then :
          echo '-D__CELLOS_LV2__' >>native_flags
          LUAJIT_DEFINES="$LUAJIT_DEFINES -DLUAJIT_USE_SYSMALLOC"
 fi
-if grep 'LJ_NO_UNWIND 1'conftest.i >/dev/null 2>&1; then :
-  echo '-DLUAJIT_NO_UNWIND' >>native_flags
+if grep 'LJ_NO_UNWIND 1' conftest.i >/dev/null 2>&1; then :
+  echo '-D NO_UNWIND' >>dynasm_flags
+         echo '-DLUAJIT_NO_UNWIND' >>native_flags
 fi
 echo "-DLUAJIT_TARGET=LUAJIT_ARCH_$LJARCH" >>native_flags
 if grep 'LJ_ARCH_BITS 64' conftest.i >/dev/null 2>&1; then :
@@ -14031,7 +14216,11 @@ case $LJARCH in #(
   echo '-D SSE' >>dynasm_flags
 fi ;; #(
   x64) :
-    DASM_ARCH=x86 ;; #(
+    if grep 'LJ_FR2 1' conftest.i >/dev/null 2>&1; then :
+
+else
+  DASM_ARCH=x86
+fi ;; #(
   arm) :
     if test "x$LJHOST" = xiOS; then :
   echo '-D IOS' >>dynasm_flags
@@ -14039,15 +14228,15 @@ fi ;; #(
   ppc) :
     if grep 'LJ_ARCH_SQRT 1' conftest.i >/dev/null 2>&1; then :
   echo '-D SQRT' >>dynasm_flags
-fi
-                if grep 'LJ_ARCH_SQRT 1' conftest.i >/dev/null 2>&1; then :
-  echo '-D SQRT' >>dynasm_flags
 fi
                 if grep 'LJ_ARCH_ROUND 1' conftest.i >/dev/null 2>&1; then :
   echo '-D ROUND' >>dynasm_flags
 fi
-                if grep 'LJ_ARCH_PPC64 1' conftest.i >/dev/null 2>&1; then :
+                if grep 'LJ_ARCH_PPC32ON64 1' conftest.i >/dev/null 2>&1; then :
   echo '-D GPR64' >>dynasm_flags
+fi
+                if grep 'LJ_ARCH_PPC64 ' conftest.i >/dev/null 2>&1; then :
+  DASM_ARCH=ppc64
 fi
                 if test "x$LJHOST" = xPS3; then :
   echo '-D PPE -D TOC' >>dynasm_flags
@@ -14077,7 +14266,7 @@ CPPFLAGS=$lj_save_CPPFLAGS
 case $LJHOST in #(
   Windows) :
     LJVM_MODE=peobj
-                    LUAJIT_DEFINES="$LUAJIT_DEFINES -malign-double" ;; #(
+                    LUAJIT_CFLAGS=-malign-double ;; #(
   Darwin | iOS) :
     LJVM_MODE=machasm ;; #(
   *) :
@@ -14095,6 +14284,15 @@ fi
 
 
 
+
+case $host_os:$host_cpu in #(
+  *darwin*:x86_64) :
+    LIBLUAJIT_LDEXTRA='-image_base 7fff04c4a000' ;; #(
+  *) :
+     ;;
+esac
+
+
  if test "x$enable_build" != xno; then
   build_TRUE=
   build_FALSE='#'
@@ -14113,7 +14311,7 @@ fi
 echo timestamp >config.force
 fi
 
-LUAJIT_TREE=LuaJIT-2.0.4
+LUAJIT_TREE=LuaJIT-2.1.0-beta1
 
 
 ac_config_headers="$ac_config_headers config.h"
@@ -14680,7 +14878,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by luajit for TeX Live $as_me 2.0.4, which was
+This file was extended by luajit for TeX Live $as_me 2.1.0-beta1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -14746,7 +14944,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-luajit for TeX Live config.status 2.0.4
+luajit for TeX Live config.status 2.1.0-beta1
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/source/libs/luajit/configure.ac b/source/libs/luajit/configure.ac
index 230570809650f84e5917169e7ec5c121a0e29a33..bec139f35d4b3ad187d617861c79f9a5db99d544 100644
--- a/source/libs/luajit/configure.ac
+++ b/source/libs/luajit/configure.ac
@@ -1,6 +1,6 @@
 dnl Process this file with autoconf to produce a configure script.
 dnl
-dnl   Copyright (C) 2014 Peter Breitenlohner <tex-live@tug.org>
+dnl   Copyright (C) 2014, 2015 Peter Breitenlohner <tex-live@tug.org>
 dnl
 dnl   This file is free software; the copyright holder
 dnl   gives unlimited permission to copy and/or distribute it,
@@ -18,6 +18,7 @@ KPSE_BASIC([luajit])
 KPSE_CANONICAL_HOST
 
 AC_PROG_CC
+AC_PROG_CC_C99
 AM_PROG_AS
 AC_PROG_RANLIB
 AC_PROG_LN_S
@@ -47,14 +48,19 @@ AS_CASE([$host_os],
 LJ_ARCH()
 AS_CASE([$LJHOST],
         [Windows], [LJVM_MODE=peobj
-                    LUAJIT_DEFINES="$LUAJIT_DEFINES -malign-double"],
+                    LUAJIT_CFLAGS=-malign-double],
         [Darwin | iOS], [LJVM_MODE=machasm],
                         [LJVM_MODE=elfasm])
 AM_CONDITIONAL([PEOBJ], [test "x$LJVM_MODE" = xpeobj])
 
+AC_SUBST([LUAJIT_CFLAGS])
 AC_SUBST([LJHOST])
 AC_SUBST([LJVM_MODE])
 
+AS_CASE([$host_os:$host_cpu],
+        [*darwin*:x86_64], [LIBLUAJIT_LDEXTRA='-image_base 7fff04c4a000'])
+AC_SUBST([LIBLUAJIT_LDEXTRA])
+
 AM_CONDITIONAL([build], [test "x$enable_build" != xno])
 
 if test "x$enable_build" != xno || test -f config.force; then
diff --git a/source/libs/luajit/include/Makefile.in b/source/libs/luajit/include/Makefile.in
index d6eb81b10930c0b77eef352df6a545626433bbc6..654a48df59b9cc21a386a7109229375697d8322a 100644
--- a/source/libs/luajit/include/Makefile.in
+++ b/source/libs/luajit/include/Makefile.in
@@ -170,6 +170,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
+LIBLUAJIT_LDEXTRA = @LIBLUAJIT_LDEXTRA@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@@ -181,6 +182,7 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
 LUAJITVERSION = @LUAJITVERSION@
+LUAJIT_CFLAGS = @LUAJIT_CFLAGS@
 LUAJIT_DEFINES = @LUAJIT_DEFINES@
 LUAJIT_LDEXTRA = @LUAJIT_LDEXTRA@
 LUAJIT_LT_VERSINFO = @LUAJIT_LT_VERSINFO@
diff --git a/source/libs/luajit/m4/lj-system.m4 b/source/libs/luajit/m4/lj-system.m4
index 623180c33356deca5480696642bd7d9b83f3534d..768458c88a12a03131e338a1530471c4a073a5a2 100644
--- a/source/libs/luajit/m4/lj-system.m4
+++ b/source/libs/luajit/m4/lj-system.m4
@@ -1,5 +1,5 @@
 # Autoconf macros for luajit.
-# Copyright (C) 2014 Peter Breitenlohner <tex-live@tug.org>
+# Copyright (C) 2014, 2015 Peter Breitenlohner <tex-live@tug.org>
 #
 # This file is free software; the copyright holder
 # gives unlimited permission to copy and/or distribute it,
@@ -16,24 +16,28 @@ AS_IF([grep 'LJ_TARGET_X64 ' conftest.i >/dev/null 2>&1],
         [LJARCH=x64],
       [grep 'LJ_TARGET_X86 ' conftest.i >/dev/null 2>&1],
         [LJARCH=x86
-         LUAJIT_DEFINES="$LUAJIT_DEFINES -march=i686"],
+         LUAJIT_CFLAGS='-march=i686 -msse -msse2 -mfpmath=sse'],
       [grep 'LJ_TARGET_ARM ' conftest.i >/dev/null 2>&1],
         [LJARCH=arm],
+      [grep 'LJ_TARGET_ARM64 ' conftest.i >/dev/null 2>&1],
+        [LJARCH=arm64],
       [grep 'LJ_TARGET_PPC ' conftest.i >/dev/null 2>&1],
-        [LJARCH=ppc],
-      [grep 'LJ_TARGET_PPCSPE ' conftest.i >/dev/null 2>&1],
-        [LJARCH=ppcspe],
+        [LJARCH=ppc
+         AS_IF([grep 'LJ_LE 1' conftest.i >/dev/null 2>&1],
+                 [echo '-DLJ_ARCH_ENDIAN=LUAJIT_LE' >>native_flags],
+                 [echo '-DLJ_ARCH_ENDIAN=LUAJIT_BE' >>native_flags])],
       [grep 'LJ_TARGET_MIPS ' conftest.i >/dev/null 2>&1],
         [LJARCH=mips
          AS_IF([grep 'MIPSEL ' conftest.i >/dev/null 2>&1],
-                 [echo '-D__MPISEL__=1' >>native_flags])],
+                 [echo '-D__MIPSEL__=1' >>native_flags])],
         [AC_MSG_ERROR([Sorry, unsupported architecture])])
 AS_IF([grep 'LJ_TARGET_PS3 1'conftest.i >/dev/null 2>&1],
         [LJHOST='PS3'
          echo '-D__CELLOS_LV2__' >>native_flags
          LUAJIT_DEFINES="$LUAJIT_DEFINES -DLUAJIT_USE_SYSMALLOC"])
-AS_IF([grep 'LJ_NO_UNWIND 1'conftest.i >/dev/null 2>&1],
-        [echo '-DLUAJIT_NO_UNWIND' >>native_flags])
+AS_IF([grep 'LJ_NO_UNWIND 1' conftest.i >/dev/null 2>&1],
+        [echo '-D NO_UNWIND' >>dynasm_flags
+         echo '-DLUAJIT_NO_UNWIND' >>native_flags])
 echo "-DLUAJIT_TARGET=LUAJIT_ARCH_$LJARCH" >>native_flags
 AS_IF([grep 'LJ_ARCH_BITS 64' conftest.i >/dev/null 2>&1],
         [echo '-D P64' >>dynasm_flags
@@ -61,17 +65,18 @@ DASM_ARCH=$LJARCH
 AS_CASE([$LJARCH],
         [x86], [AS_IF([grep '__SSE2__ 1' conftest.i >/dev/null 2>&1],
                       [echo '-D SSE' >>dynasm_flags])],
-        [x64], [DASM_ARCH=x86],
+        [x64], [AS_IF([grep 'LJ_FR2 1' conftest.i >/dev/null 2>&1],
+                      [], [DASM_ARCH=x86])],
         [arm], [AS_IF([test "x$LJHOST" = xiOS],
                       [echo '-D IOS' >>dynasm_flags])],
         [ppc], [AS_IF([grep 'LJ_ARCH_SQRT 1' conftest.i >/dev/null 2>&1],
                       [echo '-D SQRT' >>dynasm_flags])
-                AS_IF([grep 'LJ_ARCH_SQRT 1' conftest.i >/dev/null 2>&1],
-                      [echo '-D SQRT' >>dynasm_flags])
                 AS_IF([grep 'LJ_ARCH_ROUND 1' conftest.i >/dev/null 2>&1],
                       [echo '-D ROUND' >>dynasm_flags])
-                AS_IF([grep 'LJ_ARCH_PPC64 1' conftest.i >/dev/null 2>&1],
+                AS_IF([grep 'LJ_ARCH_PPC32ON64 1' conftest.i >/dev/null 2>&1],
                       [echo '-D GPR64' >>dynasm_flags])
+                AS_IF([grep 'LJ_ARCH_PPC64 ' conftest.i >/dev/null 2>&1],
+                      [DASM_ARCH=ppc64])
                 AS_IF([test "x$LJHOST" = xPS3],
                       [echo '-D PPE -D TOC' >>dynasm_flags])])
 AS_CASE([$LJHOST],
diff --git a/source/libs/luajit/native/configure b/source/libs/luajit/native/configure
index 794770bc8f87f6f5f766ea1012e9244b7e971b4b..6c8da53c2abaa6f9d6b57b8e54f0f6ee50cdbd5f 100755
--- a/source/libs/luajit/native/configure
+++ b/source/libs/luajit/native/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for luajit native 2.0.4.
+# Generated by GNU Autoconf 2.69 for luajit native 2.1.0-beta1.
 #
 # Report bugs to <tex-k@tug.org>.
 #
@@ -580,12 +580,12 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='luajit native'
 PACKAGE_TARNAME='luajit-native'
-PACKAGE_VERSION='2.0.4'
-PACKAGE_STRING='luajit native 2.0.4'
+PACKAGE_VERSION='2.1.0-beta1'
+PACKAGE_STRING='luajit native 2.1.0-beta1'
 PACKAGE_BUGREPORT='tex-k@tug.org'
 PACKAGE_URL=''
 
-ac_unique_file="../LuaJIT-2.0.4/src/luajit.h"
+ac_unique_file="../LuaJIT-2.1.0-beta1/src/luajit.h"
 # Factoring default headers for most tests.
 ac_includes_default="\
 #include <stdio.h>
@@ -1286,7 +1286,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures luajit native 2.0.4 to adapt to many kinds of systems.
+\`configure' configures luajit native 2.1.0-beta1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1356,7 +1356,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of luajit native 2.0.4:";;
+     short | recursive ) echo "Configuration of luajit native 2.1.0-beta1:";;
    esac
   cat <<\_ACEOF
 
@@ -1453,7 +1453,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-luajit native configure 2.0.4
+luajit native configure 2.1.0-beta1
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1847,7 +1847,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by luajit native $as_me 2.0.4, which was
+It was created by luajit native $as_me 2.1.0-beta1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3770,7 +3770,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='luajit-native'
- VERSION='2.0.4'
+ VERSION='2.1.0-beta1'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -5304,7 +5304,7 @@ esac
 
 
 
-LUAJIT_TREE=../LuaJIT-2.0.4
+LUAJIT_TREE=../LuaJIT-2.1.0-beta1
 
 
 ac_config_headers="$ac_config_headers config.h"
@@ -5851,7 +5851,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by luajit native $as_me 2.0.4, which was
+This file was extended by luajit native $as_me 2.1.0-beta1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -5917,7 +5917,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-luajit native config.status 2.0.4
+luajit native config.status 2.1.0-beta1
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/source/libs/luajit/version.ac b/source/libs/luajit/version.ac
index 562fb72ce96391f545a0cfb3207e7a3ccea42f86..03e99a5d2d7f44ab810ce70ce4c57661ae25ce78 100644
--- a/source/libs/luajit/version.ac
+++ b/source/libs/luajit/version.ac
@@ -8,4 +8,4 @@ dnl
 dnl --------------------------------------------------------
 dnl
 dnl  m4-include this file to define the current luajit version
-m4_define([luajit_version], [2.0.4])
+m4_define([luajit_version], [2.1.0-beta1])
diff --git a/source/m4/ChangeLog b/source/m4/ChangeLog
index 028e4931ce258aead803c2eac06a693202979245..474e8d465b5e4c5200c378820607a2c1477f40cd 100644
--- a/source/m4/ChangeLog
+++ b/source/m4/ChangeLog
@@ -1,3 +1,12 @@
+2015-09-044  Peter Breitenlohner  <peb@mppmu.mpg.de>
+
+	* kpse-lib-version.m4 (KPSE_LIB_VERSION): Allow more general
+	version number suffix strings, e.g., "-beta1".
+
+2015-08-14  Peter Breitenlohner  <peb@mppmu.mpg.de>
+
+	* kpse-pkgs.m4 (kpse_texk_pkgs): Add gregorio.
+
 2015-08-13  Peter Breitenlohner  <peb@mppmu.mpg.de>
 
 	* kpse-setup.m4: Allow shared libraries (DLLs) in native TeXLive
diff --git a/source/m4/kpse-lib-version.m4 b/source/m4/kpse-lib-version.m4
index b6fa4ce9908610e32f3721d964f3cb3cedfc0236..12fab2cb610478a79f27fa3f4dc14905473cd031 100644
--- a/source/m4/kpse-lib-version.m4
+++ b/source/m4/kpse-lib-version.m4
@@ -1,5 +1,5 @@
 # Private macros for the TeX Live (TL) tree.
-# Copyright (C) 2011 Peter Breitenlohner <tex-live@tug.org>
+# Copyright (C) 2011-2015 Peter Breitenlohner <tex-live@tug.org>
 #
 # This file is free software; the copyright holder
 # gives unlimited permission to copy and/or distribute it,
@@ -8,15 +8,15 @@
 # KPSE_LIB_VERSION(NAME)
 # ----------------------
 # Split NAME_version into NAME_major, NAME_minor, and NAME_micro
-# (ignoring lowercase letters forming an optional suffix).
+# (ignoring an optional suffix starting with a non-digit).
 AC_DEFUN([KPSE_LIB_VERSION],
-[m4_bpatsubst($1_version, [^\([0-9]+\).\([0-9]+\).\([0-9]+\)[a-z]*$],
+[m4_bpatsubst($1_version, [^\([0-9]+\).\([0-9]+\).\([0-9]+\).*$],
               [m4_define([$1_major], [\1])m4_define([$1_minor], [\2])m4_define([$1_micro], [\3])])[]dnl
 ]) # KPSE_LIB_VERSION
 
 # KPSE_LT_VERSION(NAME)
 # ---------------------
-# Split NAME_version as above and define _LT_VERSINFO.
+# Split NAME_version as above and define NAME_LT_VERSINFO.
 AC_DEFUN([KPSE_LT_VERSION],
 [KPSE_LIB_VERSION([$1])
 AC_SUBST(AS_TR_CPP($1)[_LT_VERSINFO],
diff --git a/source/m4/kpse-pkgs.m4 b/source/m4/kpse-pkgs.m4
index 744e0c873c352d5a803d3ff2c87ee1c3979757e1..eb0c0160b0831a7624b66048192fc84ad7c91106 100644
--- a/source/m4/kpse-pkgs.m4
+++ b/source/m4/kpse-pkgs.m4
@@ -114,6 +114,7 @@ dvipng
 dvipos
 dvipsk
 dvisvgm
+gregorio
 gsftopk
 lcdf-typetools
 makeindexk
diff --git a/source/texk/README b/source/texk/README
index c847489f406543ccd85ab50f70b99fbf738207ff..27b078715b2af432e329fb1209a7636393ac8009 100644
--- a/source/texk/README
+++ b/source/texk/README
@@ -1,4 +1,4 @@
-$Id: README 37981 2015-07-28 12:22:49Z peter $
+$Id: README 38285 2015-09-03 06:54:44Z peter $
 Copyright 2006-2015 TeX Users Group.
 You may freely use, modify and/or distribute this file.
 
@@ -62,6 +62,9 @@ dvipsk - maintained here, by us
 dvisvgm 1.10 - checked 28jul15
   http://dvisvgm.2ix.de/Downloads
 
+gregorio 4.0.0-beta2
+  https://github.com/gregorio-project/gregorio/releases/
+
 gsftopk - from Paul Vojta's xdvi?
 
 kpathsea - maintained here, by us
diff --git a/source/texk/texlive/linked_scripts/Makefile.am b/source/texk/texlive/linked_scripts/Makefile.am
index b12aa912f75f1100259f2d723ed9ac711e8665c2..42a2071877fdc5ef6844fee886cbd404d54b1ab3 100644
--- a/source/texk/texlive/linked_scripts/Makefile.am
+++ b/source/texk/texlive/linked_scripts/Makefile.am
@@ -54,6 +54,7 @@ texmf_scriptsdir = $(datarootdir)/texmf-dist/scripts
 texmf_shell_scripts = \
 	adhocfilelist/adhocfilelist.sh \
 	bibexport/bibexport.sh \
+	checklistings/checklistings.sh \
 	dtxgen/dtxgen \
 	installfont/installfont-tl \
 	latexfileversion/latexfileversion \
diff --git a/source/texk/texlive/linked_scripts/Makefile.in b/source/texk/texlive/linked_scripts/Makefile.in
index 18c6c4d0040fb1a3adcecc67d07b257c0d471b97..c061fa62fb772db2e2bd534797bff80199814f30 100644
--- a/source/texk/texlive/linked_scripts/Makefile.in
+++ b/source/texk/texlive/linked_scripts/Makefile.in
@@ -269,6 +269,7 @@ texmf_scriptsdir = $(datarootdir)/texmf-dist/scripts
 texmf_shell_scripts = \
 	adhocfilelist/adhocfilelist.sh \
 	bibexport/bibexport.sh \
+	checklistings/checklistings.sh \
 	dtxgen/dtxgen \
 	installfont/installfont-tl \
 	latexfileversion/latexfileversion \