diff --git a/source/libs/luajit/LuaJIT-src/.relver b/source/libs/luajit/LuaJIT-src/.relver new file mode 100644 index 0000000000000000000000000000000000000000..c0a29354a7ae87f71da74d1fd070d0c3a6b1e692 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/.relver @@ -0,0 +1 @@ +1736781742 diff --git a/source/libs/luajit/LuaJIT-src/COPYRIGHT b/source/libs/luajit/LuaJIT-src/COPYRIGHT index 6ed40025ae076fa48318e747d74b15991613b137..a192ae49daf91c0162d022f98bb9b7122fb89ec2 100644 --- a/source/libs/luajit/LuaJIT-src/COPYRIGHT +++ b/source/libs/luajit/LuaJIT-src/COPYRIGHT @@ -1,7 +1,7 @@ =============================================================================== -LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ +LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ -Copyright (C) 2005-2017 Mike Pall. All rights reserved. +Copyright (C) 2005-2025 Mike Pall. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -[ MIT license: http://www.opensource.org/licenses/mit-license.php ] +[ MIT license: https://www.opensource.org/licenses/mit-license.php ] =============================================================================== [ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ] @@ -51,6 +51,6 @@ THE SOFTWARE. This is a version (aka dlmalloc) of malloc/free/realloc written by Doug Lea and released to the public domain, as explained at -http://creativecommons.org/licenses/publicdomain +https://creativecommons.org/licenses/publicdomain =============================================================================== diff --git a/source/libs/luajit/LuaJIT-src/Makefile b/source/libs/luajit/LuaJIT-src/Makefile index 0f9330893fdea4de73d0da82c49aa336f4e556d5..d789e9f37456d5b28d5715cbc633218b22158aed 100644 --- a/source/libs/luajit/LuaJIT-src/Makefile +++ b/source/libs/luajit/LuaJIT-src/Makefile @@ -10,16 +10,21 @@ # For MSVC, please follow the instructions given in src/msvcbuild.bat. # For MinGW and Cygwin, cd to src and run make with the Makefile there. # -# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ############################################################################## MAJVER= 2 MINVER= 1 -RELVER= 0 -PREREL= -beta3 -VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL) ABIVER= 5.1 +# LuaJIT uses rolling releases. The release version is based on the time of +# the latest git commit. The 'git' command must be available during the build. +RELVER= $(shell cat src/luajit_relver.txt 2>/dev/null || : ) +# Note: setting it with := doesn't work, since it will change during the build. + +MMVERSION= $(MAJVER).$(MINVER) +VERSION= $(MMVERSION).$(RELVER) + ############################################################################## # # Change the installation path as needed. This automatically adjusts @@ -32,11 +37,13 @@ export MULTILIB= lib DPREFIX= $(DESTDIR)$(PREFIX) INSTALL_BIN= $(DPREFIX)/bin INSTALL_LIB= $(DPREFIX)/$(MULTILIB) -INSTALL_SHARE= $(DPREFIX)/share -INSTALL_INC= $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER) +INSTALL_SHARE_= $(PREFIX)/share +INSTALL_SHARE= $(DESTDIR)$(INSTALL_SHARE_) +INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) +INSTALL_INC= $(INSTALL_DEFINC) -INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION) -INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit +export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit-$(MMVERSION) +INSTALL_JITLIB= $(DESTDIR)$(INSTALL_LJLIBD)/jit INSTALL_LMODD= $(INSTALL_SHARE)/lua INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) INSTALL_CMODD= $(INSTALL_LIB)/lua @@ -49,10 +56,10 @@ INSTALL_TSYMNAME= luajit INSTALL_ANAME= libluajit-$(ABIVER).a INSTALL_SOSHORT1= libluajit-$(ABIVER).so INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) -INSTALL_SONAME= $(INSTALL_SOSHORT2).$(MINVER).$(RELVER) +INSTALL_SONAME= libluajit-$(ABIVER).so.$(VERSION) INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib -INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib +INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(VERSION).dylib INSTALL_PCNAME= luajit.pc INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) @@ -65,7 +72,7 @@ INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) -UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \ +UNINSTALL_DIRS= $(INSTALL_JITLIB) $(DESTDIR)$(INSTALL_LJLIBD) $(INSTALL_INC) \ $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) RM= rm -f @@ -75,9 +82,13 @@ SYMLINK= ln -sf INSTALL_X= install -m 0755 INSTALL_F= install -m 0644 UNINSTALL= $(RM) -LDCONFIG= ldconfig -n +LDCONFIG= ldconfig -n 2>/dev/null SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ - -e "s|^multilib=.*|multilib=$(MULTILIB)|" + -e "s|^multilib=.*|multilib=$(MULTILIB)|" \ + -e "s|^relver=.*|relver=$(RELVER)|" +ifneq ($(INSTALL_DEFINC),$(INSTALL_INC)) + SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|" +endif FILE_T= luajit FILE_A= libluajit.a @@ -88,7 +99,9 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ - dis_mips64.lua dis_mips64el.lua vmdef.lua + dis_mips64.lua dis_mips64el.lua \ + dis_mips64r6.lua dis_mips64r6el.lua \ + vmdef.lua ifeq (,$(findstring Windows,$(OS))) HOST_SYS:= $(shell uname -s) @@ -102,6 +115,7 @@ ifeq (Darwin,$(TARGET_SYS)) INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1) INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2) LDCONFIG= : + SED_PC+= -e "s| -Wl,-E||" endif ############################################################################## @@ -109,9 +123,9 @@ endif INSTALL_DEP= src/luajit default all $(INSTALL_DEP): - @echo "==== Building LuaJIT $(VERSION) ====" + @echo "==== Building LuaJIT $(MMVERSION) ====" $(MAKE) -C src - @echo "==== Successfully built LuaJIT $(VERSION) ====" + @echo "==== Successfully built LuaJIT $(MMVERSION) ====" install: $(INSTALL_DEP) @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" @@ -121,7 +135,7 @@ install: $(INSTALL_DEP) $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) cd src && test -f $(FILE_SO) && \ $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ - $(LDCONFIG) $(INSTALL_LIB) && \ + ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) @@ -130,18 +144,12 @@ install: $(INSTALL_DEP) $(RM) $(FILE_PC).tmp cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) + $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" - @echo "" - @echo "Note: the development releases deliberately do NOT install a symlink for luajit" - @echo "You can do this now by running this command (with sudo):" - @echo "" - @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" - @echo "" - uninstall: @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" - $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) for file in $(FILES_JITLIB); do \ $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ done @@ -155,8 +163,9 @@ uninstall: ############################################################################## amalg: - @echo "Building LuaJIT $(VERSION)" + @echo "==== Building LuaJIT $(MMVERSION) (amalgamation) ====" $(MAKE) -C src amalg + @echo "==== Successfully built LuaJIT $(MMVERSION) (amalgamation) ====" clean: $(MAKE) -C src clean diff --git a/source/libs/luajit/LuaJIT-src/README b/source/libs/luajit/LuaJIT-src/README index 2b9ae9d2231602713c536af69b899359d8cbb905..201f1b7272b2cd1059582f2c0987b7162ecababf 100644 --- a/source/libs/luajit/LuaJIT-src/README +++ b/source/libs/luajit/LuaJIT-src/README @@ -1,11 +1,11 @@ -README for LuaJIT 2.1.0-beta3 ------------------------------ +README for LuaJIT 2.1 +--------------------- LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. -Project Homepage: http://luajit.org/ +Project Homepage: https://luajit.org/ -LuaJIT is Copyright (C) 2005-2017 Mike Pall. +LuaJIT is Copyright (C) 2005-2025 Mike Pall. LuaJIT is free software, released under the MIT license. See full Copyright Notice in the COPYRIGHT file or in luajit.h. diff --git a/source/libs/luajit/LuaJIT-src/doc/bluequad-print.css b/source/libs/luajit/LuaJIT-src/doc/bluequad-print.css index 62e1c165909bd2656630258189e5c6d07233cfbb..5bfda5d323fd10a09b0b5aa7903d938bacddb7d0 100644 --- a/source/libs/luajit/LuaJIT-src/doc/bluequad-print.css +++ b/source/libs/luajit/LuaJIT-src/doc/bluequad-print.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2017 Mike Pall. +/* Copyright (C) 2004-2025 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. diff --git a/source/libs/luajit/LuaJIT-src/doc/bluequad.css b/source/libs/luajit/LuaJIT-src/doc/bluequad.css index be2c4bf2d51d8a99641012f02eb6bb56c7de220e..5334a7596b7a67965bd496129a1f288f9c67c341 100644 --- a/source/libs/luajit/LuaJIT-src/doc/bluequad.css +++ b/source/libs/luajit/LuaJIT-src/doc/bluequad.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2017 Mike Pall. +/* Copyright (C) 2004-2025 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. @@ -206,11 +206,9 @@ img.right { .ext { color: #ff8000; } -.new { - font-size: 6pt; - vertical-align: middle; - background: #ff8000; - color: #ffffff; +.note { + padding: 0.5em 1em; + border-left: 3px solid #bfcfff; } #site { clear: both; diff --git a/source/libs/luajit/LuaJIT-src/doc/changes.html b/source/libs/luajit/LuaJIT-src/doc/changes.html deleted file mode 100644 index a66a8d95049ee8ccfcd69733e0b24f41c76ab25f..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/doc/changes.html +++ /dev/null @@ -1,883 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>LuaJIT Change History</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -<meta name="Language" content="en"> -<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> -<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -<style type="text/css"> -div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; } -</style> -</head> -<body> -<div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> -</div> -<div id="head"> -<h1>LuaJIT Change History</h1> -</div> -<div id="nav"> -<ul><li> -<a href="luajit.html">LuaJIT</a> -<ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> -</li><li> -<a href="install.html">Installation</a> -</li><li> -<a href="running.html">Running</a> -</li></ul> -</li><li> -<a href="extensions.html">Extensions</a> -<ul><li> -<a href="ext_ffi.html">FFI Library</a> -<ul><li> -<a href="ext_ffi_tutorial.html">FFI Tutorial</a> -</li><li> -<a href="ext_ffi_api.html">ffi.* API</a> -</li><li> -<a href="ext_ffi_semantics.html">FFI Semantics</a> -</li></ul> -</li><li> -<a href="ext_jit.html">jit.* Library</a> -</li><li> -<a href="ext_c_api.html">Lua/C API</a> -</li><li> -<a href="ext_profiler.html">Profiler</a> -</li></ul> -</li><li> -<a href="status.html">Status</a> -<ul><li> -<a class="current" href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> -</li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> -</li></ul> -</div> -<div id="main"> -<p> -This is a list of changes between the released versions of LuaJIT.<br> -The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT 2.0.5</strong>.<br> -</p> -<p> -Please check the -<a href="http://luajit.org/changes.html"><span class="ext">»</span> Online Change History</a> -to see whether newer versions are available. -</p> - -<div class="major" style="background: #d0d0ff;"> -<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 — 2017-05-01</h2> -<ul> -<li>Rewrite memory block allocator.</li> -<li>Add various extension from Lua 5.2/5.3.</li> -<li>Remove old Lua 5.0 compatibility defines.</li> -<li>Set arg table before evaluating <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> -<li>Fix FOLD rules for <tt>math.abs()</tt> and FP negation.</li> -<li>Fix soft-float <tt>math.abs()</tt> and negation.</li> -<li>Fix formatting of some small denormals at low precision.</li> -<li>LJ_GC64: Add JIT compiler support.</li> -<li>x64/LJ_GC64: Add JIT compiler backend.</li> -<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li> -<li>Windows/x86: Add full exception interoperability.</li> -<li>ARM64: Add big-endian support.</li> -<li>ARM64: Add JIT compiler backend.</li> -<li>MIPS: Fix <tt>TSETR</tt> barrier.</li> -<li>MIPS: Support MIPS16 interlinking.</li> -<li>MIPS soft-float: Fix code generation for <tt>HREF</tt>.</li> -<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li> -<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li> -<li>FFI: Compile bitfield loads/stores.</li> -<li>Various fixes common with the 2.0 branch.</li> -</ul> - -<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 — 2016-03-03</h2> -<ul> -<li>Enable trace stitching.</li> -<li>Use internal implementation for converting FP numbers to strings.</li> -<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li> -<li>Add MIPS soft-float support.</li> -<li>Switch MIPS port to dual-number mode.</li> -<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li> -<li>FFI: Add <tt>ssize_t</tt> declaration.</li> -<li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li> -<li>Various minor fixes.</li> -</ul> - -<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 — 2015-08-25</h2> -<p> -This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0. -Please take a look at the commit history for more details. -</p> -<ul> -<li>Changes to the VM core: -<ul> -<li>Add low-overhead profiler (<tt>-jp</tt>).</li> -<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li> -<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li> -<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li> -<li>Parse binary number literals (<tt>0bxxx</tt>).</li> -</ul></li> -<li>Improvements to the JIT compiler: -<ul> -<li>Add trace stitching (disabled for now).</li> -<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li> -<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li> -<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li> -<li>Compile string concatenations (<tt>BC_CAT</tt>).</li> -<li>Compile <tt>__concat</tt> metamethod.</li> -<li>Various minor optimizations.</li> -</ul></li> -<li>Internal Changes: -<ul> -<li>Add support for embedding LuaJIT bytecode for builtins.</li> -<li>Replace various builtins with embedded bytecode.</li> -<li>Refactor string buffers and string formatting.</li> -<li>Remove obsolete non-truncating number to integer conversions.</li> -</ul></li> -<li>Ports: -<ul> -<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li> -<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li> -<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li> -<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li> -<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li> -<li>PPC/e500: Drop support for this architecture.</li> -</ul></li> -<li>FFI library: -<ul> -<li>FFI: Add 64 bit bitwise operations.</li> -<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li> -<li>FFI: Compile conversions from functions to function pointers.</li> -<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li> -<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li> -<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li> -</ul></li> -</ul> -</div> - -<div class="major" style="background: #ffffd0;"> -<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 — 2017-05-01</h2> -<ul> -<li>Add workaround for MSVC 2015 stdio changes.</li> -<li>Limit mcode alloc probing, depending on the available pool size.</li> -<li>Fix overly restrictive range calculation in mcode allocation.</li> -<li>Fix out-of-scope goto handling in parser.</li> -<li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li> -<li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li> -<li>Fix GC step size calculation.</li> -<li>Initialize <tt>uv->immutable</tt> for upvalues of loaded chunks.</li> -<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li> -<li>Drop leftover regs in 'for' iterator assignment, too.</li> -<li>Fix PHI remarking in SINK pass.</li> -<li>Don't try to record outermost <tt>pcall()</tt> return to lower frame.</li> -<li>Add guard for obscure aliasing between open upvalues and SSA slots.</li> -<li>Remove assumption that <tt>lj_math_random_step()</tt> doesn't clobber FPRs.</li> -<li>Fix handling of non-numeric strings in arithmetic coercions.</li> -<li>Fix recording of <tt>select(n, ...)</tt> with off-trace varargs</li> -<li>Fix install for cross-builds.</li> -<li>Don't allocate unused 2nd result register in JIT compiler backend.</li> -<li>Drop marks from replayed instructions when sinking.</li> -<li>Fix unsinking check.</li> -<li>Properly handle OOM in <tt>trace_save()</tt>.</li> -<li>Limit number of arguments given to <tt>io.lines()</tt> and <tt>fp:lines()</tt>.</li> -<li>Fix narrowing of <tt>TOBIT</tt>.</li> -<li>OSX: Fix build with recent XCode.</li> -<li>x86/x64: Don't spill an explicit <tt>REF_BASE</tt> in the IR.</li> -<li>x86/x64: Fix instruction length decoder.</li> -<li>x86/x64: Search for exit jumps with instruction length decoder.</li> -<li>ARM: Fix <tt>BLX</tt> encoding for Thumb interworking calls.</li> -<li>MIPS: Don't use <tt>RID_GP</tt> as a scratch register.</li> -<li>MIPS: Fix emitted code for U32 to float conversion.</li> -<li>MIPS: Backport workaround for compact unwind tables.</li> -<li>MIPS: Fix cross-endian jit.bcsave.</li> -<li>MIPS: Fix <tt>BC_ISNEXT</tt> fallback path.</li> -<li>MIPS: Fix use of ffgccheck delay slots in interpreter.</li> -<li>FFI: Fix FOLD rules for <tt>int64_t</tt> comparisons.</li> -<li>FFI: Fix SPLIT pass for <tt>CONV i64.u64</tt>.</li> -<li>FFI: Fix <tt>ipairs()</tt> recording.</li> -<li>FFI: Don't propagate qualifiers into subtypes of complex.</li> -</ul> - -<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 — 2015-05-14</h2> -<ul> -<li>Fix stack check in narrowing optimization.</li> -<li>Fix Lua/C API typecheck error for special indexes.</li> -<li>Fix string to number conversion.</li> -<li>Fix lexer error for chunks without tokens.</li> -<li>Don't compile <tt>IR_RETF</tt> after <tt>CALLT</tt> to ff with-side effects.</li> -<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization in Lua parser.</li> -<li>Fix corner case in string to number conversion.</li> -<li>Gracefully handle <tt>lua_error()</tt> for a suspended coroutine.</li> -<li>Avoid error messages when building with Clang.</li> -<li>Fix snapshot #0 handling for traces with a stack check on entry.</li> -<li>Fix fused constant loads under high register pressure.</li> -<li>Invalidate backpropagation cache after DCE.</li> -<li>Fix ABC elimination.</li> -<li>Fix debug info for main chunk of stripped bytecode.</li> -<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li> -<li>Fix FOLD rule for <tt>STRREF</tt> of <tt>SNEW</tt>.</li> -<li>Fix frame traversal while searching for error function.</li> -<li>Prevent GC estimate miscalculation due to buffer growth.</li> -<li>Prevent adding side traces for stack checks.</li> -<li>Fix top slot calculation for snapshots with continuations.</li> -<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li> -<li>Add PS Vita port.</li> -<li>Fix compatibility issues with Illumos.</li> -<li>Fix DragonFly build (unsupported).</li> -<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li> -<li>x86: Fix argument checks for <tt>ipairs()</tt> iterator.</li> -<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX Clang.</li> -<li>x86: Fix code generation for unused result of <tt>math.random()</tt>.</li> -<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and <tt>LUAJIT_USE_VALGRIND</tt>.</li> -<li>x86/x64: Fix argument check for bit shifts.</li> -<li>x86/x64: Fix code generation for fused test/arith ops.</li> -<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li> -<li>PPC: Fix red zone overflow in machine code generation.</li> -<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li> -<li>Various archs: Fix excess stack growth in interpreter.</li> -<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV num.u32</tt>.</li> -<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li> -<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li> -<li>FFI: Fix initialization of unions of subtypes.</li> -<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li> -<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt> metamethod resolution for ctypes.</li> -<li>FFI: Fix compilation of reference field access.</li> -<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li> -<li>FFI: Fix recording of indexing a struct pointer ctype object itself.</li> -<li>FFI: Allow non-scalar cdata to be compared for equality by address.</li> -<li>FFI: Fix pseudo type conversions for type punning.</li> -</ul> - -<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 — 2014-03-12</h2> -<ul> -<li>Add PS4 port.</li> -<li>Add support for multilib distro builds.</li> -<li>Fix OSX build.</li> -<li>Fix MinGW build.</li> -<li>Fix Xbox 360 build.</li> -<li>Improve ULOAD forwarding for open upvalues.</li> -<li>Fix GC steps threshold handling when called by JIT-compiled code.</li> -<li>Fix argument checks for <tt>math.deg()</tt> and <tt>math.rad()</tt>.</li> -<li>Fix <tt>jit.flush(func|true)</tt>.</li> -<li>Respect <tt>jit.off(func)</tt> when returning to a function, too.</li> -<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li> -<li>Fix line number for relocated bytecode after closure fixup</li> -<li>Fix frame traversal for backtraces.</li> -<li>Fix ABC elimination.</li> -<li>Fix handling of redundant PHIs.</li> -<li>Fix snapshot restore for exit to function header.</li> -<li>Fix type punning alias analysis for constified pointers</li> -<li>Fix call unroll checks in the presence of metamethod frames.</li> -<li>Fix initial maxslot for down-recursive traces.</li> -<li>Prevent BASE register coalescing if parent uses <tt>IR_RETF</tt>.</li> -<li>Don't purge modified function from stack slots in <tt>BC_RET</tt>.</li> -<li>Fix recording of <tt>BC_VARG</tt>.</li> -<li>Don't access dangling reference to reallocated IR.</li> -<li>Fix frame depth display for bytecode dump in <tt>-jdump</tt>.</li> -<li>ARM: Fix register allocation when rematerializing FPRs.</li> -<li>x64: Fix store to upvalue for lightuserdata values.</li> -<li>FFI: Add missing GC steps for callback argument conversions.</li> -<li>FFI: Properly unload loaded DLLs.</li> -<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li> -<li>FFI/x64: Fix passing of vector arguments to calls.</li> -<li>FFI: Rehash finalizer table after GC cycle, if needed.</li> -<li>FFI: Fix <tt>cts->L</tt> for cdata unsinking in snapshot restore.</li> -</ul> - -<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 — 2013-06-03</h2> -<ul> -<li>Fix memory access check for fast string interning.</li> -<li>Fix MSVC intrinsics for older versions.</li> -<li>Add missing GC steps for <tt>io.*</tt> functions.</li> -<li>Fix spurious red zone overflows in machine code generation.</li> -<li>Fix jump-range constrained mcode allocation.</li> -<li>Inhibit DSE for implicit loads via calls.</li> -<li>Fix builtin string to number conversion for overflow digits.</li> -<li>Fix optional argument handling while recording builtins.</li> -<li>Fix optional argument handling in <tt>table.concat()</tt>.</li> -<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li> -<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt> FOLD rule.</li> -<li>Fix compatibility issues with Illumos.</li> -<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li> -<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li> -<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler code.</li> -<li>FFI: Fix snapshot substitution in SPLIT pass.</li> -<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li> -<li>FFI: Fix tailcall in lowest frame to C function with bool result.</li> -<li>FFI: Ignore <tt>long</tt> type specifier in <tt>ffi.istype()</tt>.</li> -<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct returns).</li> -<li>FFI: Fix calling conventions for ARM hard-float EABI (nested structs).</li> -<li>FFI: Improve error messages for arithmetic and comparison operators.</li> -<li>FFI: Insert no-op type conversion for pointer to integer cast.</li> -<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li> -<li>FFI: Must sink <tt>XBAR</tt> together with <tt>XSTORE</tt>s.</li> -<li>FFI: Preserve intermediate string for <tt>const char *</tt> conversion.</li> -</ul> - -<h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 — 2013-02-19</h2> -<ul> -<li>Don't clear frame for out-of-memory error.</li> -<li>Leave hook when resume catches error thrown from hook.</li> -<li>Add missing GC steps for template table creation.</li> -<li>Fix discharge order of comparisons in Lua parser.</li> -<li>Improve buffer handling for <tt>io.read()</tt>.</li> -<li>OSX: Add support for Mach-O object files to <tt>-b</tt> option.</li> -<li>Fix PS3 port.</li> -<li>Fix/enable Xbox 360 port.</li> -<li>x86/x64: Always mark ref for shift count as non-weak.</li> -<li>x64: Don't fuse implicitly 32-to-64 extended operands.</li> -<li>ARM: Fix armhf call argument handling.</li> -<li>ARM: Fix code generation for integer math.min/math.max.</li> -<li>PPC/e500: Fix <tt>lj_vm_floor()</tt> for Inf/NaN.</li> -<li>FFI: Change priority of table initializer variants for structs.</li> -<li>FFI: Fix code generation for bool call result check on x86/x64.</li> -<li>FFI: Load FFI library on-demand for bytecode with cdata literals.</li> -<li>FFI: Fix handling of qualified transparent structs/unions.</li> -</ul> - -<h2 id="LuaJIT-2.0.0">LuaJIT 2.0.0 — 2012-11-08</h2> -<ul> -<li>Correctness and completeness: -<ul> - <li>Fix Android/x86 build.</li> - <li>Fix recording of equality comparisons with <tt>__eq</tt> metamethods.</li> - <li>Fix detection of immutable upvalues.</li> - <li>Replace error with PANIC for callbacks from JIT-compiled code.</li> - <li>Fix builtin string to number conversion for <tt>INT_MIN</tt>.</li> - <li>Don't create unneeded array part for template tables.</li> - <li>Fix <tt>CONV.num.int</tt> sinking.</li> - <li>Don't propagate implicitly widened number to index metamethods.</li> - <li>ARM: Fix ordered comparisons of number vs. non-number.</li> - <li>FFI: Fix code generation for replay of sunk float fields.</li> - <li>FFI: Fix signedness of bool.</li> - <li>FFI: Fix recording of bool call result check on x86/x64.</li> - <li>FFI: Fix stack-adjustment for <tt>__thiscall</tt> callbacks.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta11">LuaJIT 2.0.0-beta11 — 2012-10-16</h2> -<ul> -<li>New features: -<ul> - <li>Use ARM VFP instructions, if available (build-time detection).</li> - <li>Add support for ARM hard-float EABI (<tt>armhf</tt>).</li> - <li>Add PS3 port.</li> - <li>Add many features from Lua 5.2, e.g. <tt>goto</tt>/labels. - Refer to <a href="extensions.html#lua52">this list</a>.</li> - <li>FFI: Add parameterized C types.</li> - <li>FFI: Add support for copy constructors.</li> - <li>FFI: Equality comparisons never raise an error (treat as unequal instead).</li> - <li>FFI: Box all accessed or returned enums.</li> - <li>FFI: Check for <tt>__new</tt> metamethod when calling a constructor.</li> - <li>FFI: Handle <tt>__pairs</tt>/<tt>__ipairs</tt> metamethods for cdata objects.</li> - <li>FFI: Convert <tt>io.*</tt> file handle to <tt>FILE *</tt> pointer (but as a <tt>void *</tt>).</li> - <li>FFI: Detect and support type punning through unions.</li> - <li>FFI: Improve various error messages.</li> -</ul></li> -<li>Build-system reorganization: -<ul> - <li>Reorganize directory layout:<br> - <tt>lib/*</tt> → <tt>src/jit/*</tt><br> - <tt>src/buildvm_*.dasc</tt> → <tt>src/vm_*.dasc</tt><br> - <tt>src/buildvm_*.h</tt> → removed<br> - <tt>src/buildvm*</tt> → <tt>src/host/*</tt></li> - <li>Add minified Lua interpreter plus Lua BitOp (<tt>minilua</tt>) to run DynASM.</li> - <li>Change DynASM bit operations to use Lua BitOp</li> - <li>Translate only <tt>vm_*.dasc</tt> for detected target architecture.</li> - <li>Improve target detection for <tt>msvcbuild.bat</tt>.</li> - <li>Fix build issues on Cygwin and MinGW with optional MSys.</li> - <li>Handle cross-compiles with FPU/no-FPU or hard-fp/soft-fp ABI mismatch.</li> - <li>Remove some library functions for no-JIT/no-FFI builds.</li> - <li>Add uninstall target to top-level Makefile.</li> -</ul></li> -<li>Correctness and completeness: -<ul> - <li>Preserve snapshot #0 PC for all traces.</li> - <li>Fix argument checks for <tt>coroutine.create()</tt>.</li> - <li>Command line prints version and JIT status to <tt>stdout</tt>, not <tt>stderr</tt>.</li> - <li>Fix userdata <tt>__gc</tt> separations at Lua state close.</li> - <li>Fix <tt>TDUP</tt> to <tt>HLOAD</tt> forwarding for <tt>LJ_DUALNUM</tt> builds.</li> - <li>Fix buffer check in bytecode writer.</li> - <li>Make <tt>os.date()</tt> thread-safe.</li> - <li>Add missing declarations for MSVC intrinsics.</li> - <li>Fix dispatch table modifications for return hooks.</li> - <li>Workaround for MSVC conversion bug (<tt>double</tt> → <tt>uint32_t</tt> → <tt>int32_t</tt>).</li> - <li>Fix FOLD rule <tt>(i-j)-i => 0-j</tt>.</li> - <li>Never use DWARF unwinder on Windows.</li> - <li>Fix shrinking of direct mapped blocks in builtin allocator.</li> - <li>Limit recursion depth in <tt>string.match()</tt> et al.</li> - <li>Fix late despecialization of <tt>ITERN</tt> after loop has been entered.</li> - <li>Fix <tt>'f'</tt> and <tt>'L'</tt> options for <tt>debug.getinfo()</tt> and <tt>lua_getinfo()</tt>.</li> - <li>Fix <tt>package.searchpath()</tt>.</li> - <li>OSX: Change dylib names to be consistent with other platforms.</li> - <li>Android: Workaround for broken <tt>sprintf("%g", -0.0)</tt>.</li> - <li>x86: Remove support for ancient CPUs without <tt>CMOV</tt> (before Pentium Pro).</li> - <li>x86: Fix register allocation for calls returning register pair.</li> - <li>x86/x64: Fix fusion of unsigned byte comparisons with swapped operands.</li> - <li>ARM: Fix <tt>tonumber()</tt> argument check.</li> - <li>ARM: Fix modulo operator and <tt>math.floor()</tt>/<tt>math.ceil()</tt> for <tt>inf</tt>/<tt>nan</tt>.</li> - <li>ARM: Invoke SPLIT pass for leftover <tt>IR_TOBIT</tt>.</li> - <li>ARM: Fix BASE register coalescing.</li> - <li>PPC: Fix interpreter state setup in callbacks.</li> - <li>PPC: Fix <tt>string.sub()</tt> range check.</li> - <li>MIPS: Support generation of MIPS/MIPSEL bytecode object files.</li> - <li>MIPS: Fix calls to <tt>floor()</tt>/<tt>ceil()</tt><tt>/trunc()</tt>.</li> - <li>ARM/PPC: Detect more target architecture variants.</li> - <li>ARM/PPC/e500/MIPS: Fix tailcalls from fast functions, esp. <tt>tostring()</tt>.</li> - <li>ARM/PPC/MIPS: Fix rematerialization of FP constants.</li> - <li>FFI: Don't call <tt>FreeLibrary()</tt> on our own EXE/DLL.</li> - <li>FFI: Resolve metamethods for constructors, too.</li> - <li>FFI: Properly disable callbacks on iOS (would require executable memory).</li> - <li>FFI: Fix cdecl string parsing during recording.</li> - <li>FFI: Show address pointed to for <tt>tostring(ref)</tt>, too.</li> - <li>FFI: Fix alignment of C call argument/return structure.</li> - <li>FFI: Initialize all fields of standard types.</li> - <li>FFI: Fix callback handling when new C types are declared in callback.</li> - <li>FFI: Fix recording of constructors for pointers.</li> - <li>FFI: Always resolve metamethods for pointers to structs.</li> - <li>FFI: Correctly propagate alignment when interning nested types.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> - <li>Add allocation sinking and store sinking optimization.</li> - <li>Constify immutable upvalues.</li> - <li>Add builtin string to integer or FP number conversion. Improves cross-platform consistency and correctness.</li> - <li>Create string hash slots in template tables for non-const values, too. Avoids later table resizes.</li> - <li>Eliminate <tt>HREFK</tt> guard for template table references.</li> - <li>Add various new FOLD rules.</li> - <li>Don't use stack unwinding for <tt>lua_yield()</tt> (slow on x64).</li> - <li>ARM, PPC, MIPS: Improve <tt>XLOAD</tt> operand fusion and register hinting.</li> - <li>PPC, MIPS: Compile <tt>math.sqrt()</tt> to sqrt instruction, if available.</li> - <li>FFI: Fold <tt>KPTR</tt> + constant offset in SPLIT pass.</li> - <li>FFI: Optimize/inline <tt>ffi.copy()</tt> and <tt>ffi.fill()</tt>.</li> - <li>FFI: Compile and optimize array/struct copies.</li> - <li>FFI: Compile <tt>ffi.typeof(cdata|ctype)</tt>, <tt>ffi.sizeof()</tt>, <tt>ffi.alignof()</tt>, <tt>ffi.offsetof()</tt> and <tt>ffi.gc()</tt>.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta10">LuaJIT 2.0.0-beta10 — 2012-05-09</h2> -<ul> -<li>New features: -<ul> -<li>The MIPS of LuaJIT is complete. It requires a CPU conforming to the -MIPS32 R1 architecture with hardware FPU. O32 hard-fp ABI, -little-endian or big-endian.</li> -<li>Auto-detect target arch via cross-compiler. No need for -<tt>TARGET=arch</tt> anymore.</li> -<li>Make DynASM compatible with Lua 5.2.</li> -<li>From Lua 5.2: Try <tt>__tostring</tt> metamethod on non-string error -messages..</li> -</ul></li> -<li>Correctness and completeness: -<ul> -<li>Fix parsing of hex literals with exponents.</li> -<li>Fix bytecode dump for certain number constants.</li> -<li>Fix argument type in error message for relative arguments.</li> -<li>Fix argument error handling on Lua stacks without a frame.</li> -<li>Add missing mcode limit check in assembler backend.</li> -<li>Fix compilation on OpenBSD.</li> -<li>Avoid recursive GC steps after GC-triggered trace exit.</li> -<li>Replace <tt><unwind.h></tt> definitions with our own.</li> -<li>Fix OSX build issues. Bump minimum required OSX version to 10.4.</li> -<li>Fix discharge order of comparisons in Lua parser.</li> -<li>Ensure running <tt>__gc</tt> of userdata created in <tt>__gc</tt> -at state close.</li> -<li>Limit number of userdata <tt>__gc</tt> separations at state close.</li> -<li>Fix bytecode <tt>JMP</tt> slot range when optimizing -<tt>and</tt>/<tt>or</tt> with constant LHS.</li> -<li>Fix DSE of <tt>USTORE</tt>.</li> -<li>Make <tt>lua_concat()</tt> work from C hook with partial frame.</li> -<li>Add required PHIs for implicit conversions, e.g. via <tt>XREF</tt> -forwarding.</li> -<li>Add more comparison variants to Valgrind suppressions file.</li> -<li>Disable loading bytecode with an extra header (BOM or <tt>#!</tt>).</li> -<li>Fix PHI stack slot syncing.</li> -<li>ARM: Reorder type/value tests to silence Valgrind.</li> -<li>ARM: Fix register allocation for <tt>ldrd</tt>-optimized -<tt>HREFK</tt>.</li> -<li>ARM: Fix conditional branch fixup for <tt>OBAR</tt>.</li> -<li>ARM: Invoke SPLIT pass for <tt>double</tt> args in FFI call.</li> -<li>ARM: Handle all <tt>CALL*</tt> ops with <tt>double</tt> results in -SPLIT pass.</li> -<li>ARM: Fix rejoin of <tt>POW</tt> in SPLIT pass.</li> -<li>ARM: Fix compilation of <tt>math.sinh</tt>, <tt>math.cosh</tt>, -<tt>math.tanh</tt>.</li> -<li>ARM, PPC: Avoid pointless arg clearing in <tt>BC_IFUNCF</tt>.</li> -<li>PPC: Fix resume after yield from hook.</li> -<li>PPC: Fix argument checking for <tt>rawget()</tt>.</li> -<li>PPC: Fix fusion of floating-point <tt>XLOAD</tt>/<tt>XSTORE</tt>.</li> -<li>PPC: Fix <tt>HREFK</tt> code generation for huge tables.</li> -<li>PPC: Use builtin D-Cache/I-Cache sync code.</li> -</ul></li> -<li>FFI library: -<ul> -<li>Ignore empty statements in <tt>ffi.cdef()</tt>.</li> -<li>Ignore number parsing errors while skipping definitions.</li> -<li>Don't touch frame in callbacks with tailcalls to fast functions.</li> -<li>Fix library unloading on POSIX systems.</li> -<li>Finalize cdata before userdata when closing the state.</li> -<li>Change <tt>ffi.load()</tt> library name resolution for Cygwin.</li> -<li>Fix resolving of function name redirects on Windows/x86.</li> -<li>Fix symbol resolving error messages on Windows.</li> -<li>Fix blacklisting of C functions calling callbacks.</li> -<li>Fix result type of pointer difference.</li> -<li>Use correct PC in FFI metamethod error message.</li> -<li>Allow <tt>'typedef _Bool int BOOL;'</tt> for the Windows API.</li> -<li>Don't record test for bool result of call, if ignored.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta9">LuaJIT 2.0.0-beta9 — 2011-12-14</h2> -<ul> -<li>New features: -<ul> -<li>PPC port of LuaJIT is complete. Default is the dual-number port -(usually faster). Single-number port selectable via <tt>src/Makefile</tt> -at build time.</li> -<li>Add FFI callback support.</li> -<li>Extend <tt>-b</tt> to generate <tt>.c</tt>, <tt>.h</tt> or <tt>.obj/.o</tt> -files with embedded bytecode.</li> -<li>Allow loading embedded bytecode with <tt>require()</tt>.</li> -<li>From Lua 5.2: Change to <tt>'\z'</tt> escape. Reject undefined escape -sequences.</li> -</ul></li> -<li>Correctness and completeness: -<ul> -<li>Fix OSX 10.7 build. Fix <tt>install_name</tt> and versioning on OSX.</li> -<li>Fix iOS build.</li> -<li>Install <tt>dis_arm.lua</tt>, too.</li> -<li>Mark installed shared library as executable.</li> -<li>Add debug option to <tt>msvcbuild.bat</tt> and improve error handling.</li> -<li>Fix data-flow analysis for iterators.</li> -<li>Fix forced unwinding triggered by external unwinder.</li> -<li>Record missing <tt>for</tt> loop slot loads (return to lower frame).</li> -<li>Always use ANSI variants of Windows system functions.</li> -<li>Fix GC barrier for multi-result table constructor (<tt>TSETM</tt>).</li> -<li>Fix/add various FOLD rules.</li> -<li>Add potential PHI for number conversions due to type instability.</li> -<li>Do not eliminate PHIs only referenced from other PHIs.</li> -<li>Correctly anchor implicit number to string conversions in Lua/C API.</li> -<li>Fix various stack limit checks.</li> -<li>x64: Use thread-safe exceptions for external unwinding (GCC platforms).</li> -<li>x64: Fix result type of cdata index conversions.</li> -<li>x64: Fix <tt>math.random()</tt> and <tt>bit.bswap()</tt> code generation.</li> -<li>x64: Fix <tt>lightuserdata</tt> comparisons.</li> -<li>x64: Always extend stack-passed arguments to pointer size.</li> -<li>ARM: Many fixes to code generation backend.</li> -<li>PPC/e500: Fix dispatch for binop metamethods.</li> -<li>PPC/e500: Save/restore condition registers when entering/leaving the VM.</li> -<li>PPC/e500: Fix write barrier in stores of strings to upvalues.</li> -</ul></li> -<li>FFI library: -<ul> -<li>Fix C comment parsing.</li> -<li>Fix snapshot optimization for cdata comparisons.</li> -<li>Fix recording of const/enum lookups in namespaces.</li> -<li>Fix call argument and return handling for <tt>I8/U8/I16/U16</tt> types.</li> -<li>Fix unfused loads of float fields.</li> -<li>Fix <tt>ffi.string()</tt> recording.</li> -<li>Save <tt>GetLastError()</tt> around <tt>ffi.load()</tt> and symbol -resolving, too.</li> -<li>Improve ld script detection in <tt>ffi.load()</tt>.</li> -<li>Record loads/stores to external variables in namespaces.</li> -<li>Compile calls to stdcall, fastcall and vararg functions.</li> -<li>Treat function ctypes like pointers in comparisons.</li> -<li>Resolve <tt>__call</tt> metamethod for pointers, too.</li> -<li>Record C function calls with bool return values.</li> -<li>Record <tt>ffi.errno()</tt>.</li> -<li>x86: Fix number to <tt>uint32_t</tt> conversion rounding.</li> -<li>x86: Fix 64 bit arithmetic in assembler backend.</li> -<li>x64: Fix struct-by-value calling conventions.</li> -<li>ARM: Ensure invocation of SPLIT pass for float conversions.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> -<li>Display trace types with <tt>-jv</tt> and <tt>-jdump</tt>.</li> -<li>Record isolated calls. But prefer recording loops over calls.</li> -<li>Specialize to prototype for non-monomorphic functions. Solves the -trace-explosion problem for closure-heavy programming styles.</li> -<li>Always generate a portable <tt>vmdef.lua</tt>. Easier for distros.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta8">LuaJIT 2.0.0-beta8 — 2011-06-23</h2> -<ul> -<li>New features: -<ul> -<li>Soft-float ARM port of LuaJIT is complete.</li> -<li>Add support for bytecode loading/saving and <tt>-b</tt> command line -option.</li> -<li>From Lua 5.2: <tt>__len</tt> metamethod for tables -(disabled by default).</li> -</ul></li> -<li>Correctness and completeness: -<ul> -<li>ARM: Misc. fixes for interpreter.</li> -<li>x86/x64: Fix <tt>bit.*</tt> argument checking in interpreter.</li> -<li>Catch early out-of-memory in memory allocator initialization.</li> -<li>Fix data-flow analysis for paths leading to an upvalue close.</li> -<li>Fix check for missing arguments in <tt>string.format()</tt>.</li> -<li>Fix Solaris/x86 build (note: not a supported target).</li> -<li>Fix recording of loops with instable directions in side traces.</li> -<li>x86/x64: Fix fusion of comparisons with <tt>u8</tt>/<tt>u16</tt> -<tt>XLOAD</tt>.</li> -<li>x86/x64: Fix register allocation for variable shifts.</li> -</ul></li> -<li>FFI library: -<ul> -<li>Add <tt>ffi.errno()</tt>. Save <tt>errno</tt>/<tt>GetLastError()</tt> -around allocations etc.</li> -<li>Fix <tt>__gc</tt> for VLA/VLS cdata objects.</li> -<li>Fix recording of casts from 32 bit cdata pointers to integers.</li> -<li><tt>tonumber(cdata)</tt> returns <tt>nil</tt> for non-numbers.</li> -<li>Show address pointed to for <tt>tostring(pointer)</tt>.</li> -<li>Print <tt>NULL</tt> pointers as <tt>"cdata<... *>: NULL"</tt>.</li> -<li>Support <tt>__tostring</tt> metamethod for pointers to structs, too.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> -<li>More tuning for loop unrolling heuristics.</li> -<li>Flatten and compress in-memory debug info (saves ~70%).</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta7">LuaJIT 2.0.0-beta7 — 2011-05-05</h2> -<ul> -<li>New features: -<ul> -<li>ARM port of the LuaJIT interpreter is complete.</li> -<li>FFI library: Add <tt>ffi.gc()</tt>, <tt>ffi.metatype()</tt>, -<tt>ffi.istype()</tt>.</li> -<li>FFI library: Resolve ld script redirection in <tt>ffi.load()</tt>.</li> -<li>From Lua 5.2: <tt>package.searchpath()</tt>, <tt>fp:read("*L")</tt>, -<tt>load(string)</tt>.</li> -<li>From Lua 5.2, disabled by default: empty statement, -<tt>table.unpack()</tt>, modified <tt>coroutine.running()</tt>.</li> -</ul></li> -<li>Correctness and completeness: -<ul> -<li>FFI library: numerous fixes.</li> -<li>Fix type mismatches in store-to-load forwarding.</li> -<li>Fix error handling within metamethods.</li> -<li>Fix <tt>table.maxn()</tt>.</li> -<li>Improve accuracy of <tt>x^-k</tt> on x64.</li> -<li>Fix code generation for Intel Atom in x64 mode.</li> -<li>Fix narrowing of POW.</li> -<li>Fix recording of retried fast functions.</li> -<li>Fix code generation for <tt>bit.bnot()</tt> and multiplies.</li> -<li>Fix error location within cpcall frames.</li> -<li>Add workaround for old libgcc unwind bug.</li> -<li>Fix <tt>lua_yield()</tt> and <tt>getmetatable(lightuserdata)</tt> on x64.</li> -<li>Misc. fixes for PPC/e500 interpreter.</li> -<li>Fix stack slot updates for down-recursion.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> -<li>Add dual-number mode (int/double) for the VM. Enabled for ARM.</li> -<li>Improve narrowing of arithmetic operators and <tt>for</tt> loops.</li> -<li>Tune loop unrolling heuristics and increase trace recorder limits.</li> -<li>Eliminate dead slots in snapshots using bytecode data-flow analysis.</li> -<li>Avoid phantom stores to proxy tables.</li> -<li>Optimize lookups in empty proxy tables.</li> -<li>Improve bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta6">LuaJIT 2.0.0-beta6 — 2011-02-11</h2> -<ul> -<li>New features: -<ul> -<li>PowerPC/e500v2 port of the LuaJIT interpreter is complete.</li> -<li>Various minor features from Lua 5.2: Hex escapes in literals, -<tt>'\*'</tt> escape, reversible <tt>string.format("%q",s)</tt>, -<tt>"%g"</tt> pattern, <tt>table.sort</tt> checks callbacks, -<tt>os.exit(status|true|false[,close])</tt>.</li> -<li>Lua 5.2 <tt>__pairs</tt> and <tt>__ipairs</tt> metamethods -(disabled by default).</li> -<li>Initial release of the FFI library.</li> -</ul></li> -<li>Correctness and completeness: -<ul> -<li>Fix <tt>string.format()</tt> for non-finite numbers.</li> -<li>Fix memory leak when compiled to use the built-in allocator.</li> -<li>x86/x64: Fix unnecessary resize in <tt>TSETM</tt> bytecode.</li> -<li>Fix various GC issues with traces and <tt>jit.flush()</tt>.</li> -<li>x64: Fix fusion of indexes for array references.</li> -<li>x86/x64: Fix stack overflow handling for coroutine results.</li> -<li>Enable low-2GB memory allocation on FreeBSD/x64.</li> -<li>Fix <tt>collectgarbage("count")</tt> result if more than 2GB is in use.</li> -<li>Fix parsing of hex floats.</li> -<li>x86/x64: Fix loop branch inversion with trailing -<tt>HREF+NE/EQ</tt>.</li> -<li>Add <tt>jit.os</tt> string.</li> -<li><tt>coroutine.create()</tt> permits running C functions, too.</li> -<li>Fix OSX build to work with newer ld64 versions.</li> -<li>Fix bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> -<li>Emit specialized bytecode for <tt>pairs()</tt>/<tt>next()</tt>.</li> -<li>Improve bytecode coalescing of <tt>nil</tt> constants.</li> -<li>Compile calls to vararg functions.</li> -<li>Compile <tt>select()</tt>.</li> -<li>Improve alias analysis, esp. for loads from allocations.</li> -<li>Tuning of various compiler heuristics.</li> -<li>Refactor and extend IR conversion instructions.</li> -<li>x86/x64: Various backend enhancements related to the FFI.</li> -<li>Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta5">LuaJIT 2.0.0-beta5 — 2010-08-24</h2> -<ul> -<li>Correctness and completeness: -<ul> -<li>Fix trace exit dispatch to function headers.</li> -<li>Fix Windows and OSX builds with LUAJIT_DISABLE_JIT.</li> -<li>Reorganize and fix placement of generated machine code on x64.</li> -<li>Fix TNEW in x64 interpreter.</li> -<li>Do not eliminate PHIs for values only referenced from side exits.</li> -<li>OS-independent canonicalization of strings for non-finite numbers.</li> -<li>Fix <tt>string.char()</tt> range check on x64.</li> -<li>Fix <tt>tostring()</tt> resolving within <tt>print()</tt>.</li> -<li>Fix error handling for <tt>next()</tt>.</li> -<li>Fix passing of constant arguments to external calls on x64.</li> -<li>Fix interpreter argument check for two-argument SSE math functions.</li> -<li>Fix C frame chain corruption caused by <tt>lua_cpcall()</tt>.</li> -<li>Fix return from <tt>pcall()</tt> within active hook.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> -<li>Replace on-trace GC frame syncing with interpreter exit.</li> -<li>Improve hash lookup specialization by not removing dead keys during GC.</li> -<li>Turn traces into true GC objects.</li> -<li>Avoid starting a GC cycle immediately after library init.</li> -<li>Add weak guards to improve dead-code elimination.</li> -<li>Speed up string interning.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta4">LuaJIT 2.0.0-beta4 — 2010-03-28</h2> -<ul> -<li>Correctness and completeness: -<ul> -<li>Fix precondition for on-trace creation of table keys.</li> -<li>Fix <tt>{f()}</tt> on x64 when table is resized.</li> -<li>Fix folding of ordered comparisons with same references.</li> -<li>Fix snapshot restores for multi-result bytecodes.</li> -<li>Fix potential hang when recording bytecode with nested closures.</li> -<li>Fix recording of <tt>getmetatable()</tt>, <tt>tonumber()</tt> and bad argument types.</li> -<li>Fix SLOAD fusion across returns to lower frames.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> -<li>Add array bounds check elimination. <tt>-Oabc</tt> is enabled by default.</li> -<li>More tuning for x64, e.g. smaller table objects.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta3">LuaJIT 2.0.0-beta3 — 2010-03-07</h2> -<ul> -<li>LuaJIT x64 port: -<ul> -<li>Port integrated memory allocator to Linux/x64, Windows/x64 and OSX/x64.</li> -<li>Port interpreter and JIT compiler to x64.</li> -<li>Port DynASM to x64.</li> -<li>Many 32/64 bit cleanups in the VM.</li> -<li>Allow building the interpreter with either x87 or SSE2 arithmetics.</li> -<li>Add external unwinding and C++ exception interop (default on x64).</li> -</ul></li> -<li>Correctness and completeness: -<ul> -<li>Fix constructor bytecode generation for certain conditional values.</li> -<li>Fix some cases of ordered string comparisons.</li> -<li>Fix <tt>lua_tocfunction()</tt>.</li> -<li>Fix cutoff register in JMP bytecode for some conditional expressions.</li> -<li>Fix PHI marking algorithm for references from variant slots.</li> -<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li> -<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li> -<li>Drive the GC forward on string allocations in the parser.</li> -<li>Implement call/return hooks (zero-cost if disabled).</li> -<li>Implement yield from C hooks.</li> -<li>Disable JIT compiler on older non-SSE2 CPUs instead of aborting.</li> -</ul></li> -<li>Structural and performance enhancements: -<ul> -<li>Compile recursive code (tail-, up- and down-recursion).</li> -<li>Improve heuristics for bytecode penalties and blacklisting.</li> -<li>Split CALL/FUNC recording and clean up fast function call semantics.</li> -<li>Major redesign of internal function call handling.</li> -<li>Improve FOR loop const specialization and integerness checks.</li> -<li>Switch to pre-initialized stacks. Avoid frame-clearing.</li> -<li>Colocation of prototypes and related data: bytecode, constants, debug info.</li> -<li>Cleanup parser and streamline bytecode generation.</li> -<li>Add support for weak IR references to register allocator.</li> -<li>Switch to compressed, extensible snapshots.</li> -<li>Compile returns to frames below the start frame.</li> -<li>Improve alias analysis of upvalues using a disambiguation hash value.</li> -<li>Compile floor/ceil/trunc to SSE2 helper calls or SSE4.1 instructions.</li> -<li>Add generic C call handling to IR and backend.</li> -<li>Improve KNUM fuse vs. load heuristics.</li> -<li>Compile various <tt>io.*()</tt> functions.</li> -<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>, <tt>math.tanh()</tt> -and <tt>math.random()</tt>.</li> -</ul></li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 — 2009-11-09</h2> -<ul> -<li>Reorganize build system. Build static+shared library on POSIX.</li> -<li>Allow C++ exception conversion on all platforms -using a wrapper function.</li> -<li>Automatically catch C++ exceptions and rethrow Lua error -(DWARF2 only).</li> -<li>Check for the correct x87 FPU precision at strategic points.</li> -<li>Always use wrappers for libm functions.</li> -<li>Resurrect metamethod name strings before copying them.</li> -<li>Mark current trace, even if compiler is idle.</li> -<li>Ensure FILE metatable is created only once.</li> -<li>Fix type comparisons when different integer types are involved.</li> -<li>Fix <tt>getmetatable()</tt> recording.</li> -<li>Fix TDUP with dead keys in template table.</li> -<li><tt>jit.flush(tr)</tt> returns status. -Prevent manual flush of a trace that's still linked.</li> -<li>Improve register allocation heuristics for invariant references.</li> -<li>Compile the push/pop variants of <tt>table.insert()</tt> and -<tt>table.remove()</tt>.</li> -<li>Compatibility with MSVC <tt>link /debug</tt>.</li> -<li>Fix <tt>lua_iscfunction()</tt>.</li> -<li>Fix <tt>math.random()</tt> when compiled with <tt>-fpic</tt> (OSX).</li> -<li>Fix <tt>table.maxn()</tt>.</li> -<li>Bump <tt>MACOSX_DEPLOYMENT_TARGET</tt> to <tt>10.4</tt></li> -<li><tt>luaL_check*()</tt> and <tt>luaL_opt*()</tt> now support -negative arguments, too.<br> -This matches the behavior of Lua 5.1, but not the specification.</li> -</ul> - -<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 — 2009-10-31</h2> -<ul> -<li>This is the first public release of LuaJIT 2.0.</li> -<li>The whole VM has been rewritten from the ground up, so there's -no point in listing differences over earlier versions.</li> -</ul> -</div> -<br class="flush"> -</div> -<div id="foot"> -<hr class="hide"> -Copyright © 2005-2017 Mike Pall -<span class="noprint"> -· -<a href="contact.html">Contact</a> -</span> -</div> -</body> -</html> diff --git a/source/libs/luajit/LuaJIT-src/doc/contact.html b/source/libs/luajit/LuaJIT-src/doc/contact.html index fe4751c0ead3a38e57625127e86f5d46193bc23c..d8d34a69028ebc75de6593b1a101c8161a405019 100644 --- a/source/libs/luajit/LuaJIT-src/doc/contact.html +++ b/source/libs/luajit/LuaJIT-src/doc/contact.html @@ -1,17 +1,16 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Contact</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>Contact</h1> @@ -20,7 +19,7 @@ <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -38,6 +37,8 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -45,29 +46,22 @@ <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> <p> If you want to report bugs, propose fixes or suggest enhancements, please use the -<a href="https://github.com/LuaJIT/LuaJIT/issues">GitHub issue tracker</a>. +<a href="https://github.com/LuaJIT/LuaJIT/issues"><span class="ext">»</span> GitHub issue tracker</a>. </p> <p> Please send general questions to the -<a href="http://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a>. +<a href="https://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a>. </p> <p> You can also send any questions you have directly to me: @@ -90,10 +84,17 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D") </p> </noscript> +<p><i> +Note: I cannot reply to GMail, Google Workplace, Outlook or Office365 +mail addresses, since they prefer to mindlessly filter out mails sent +from small domains using independent mail servers, such as mine. If you +don't like that, please complain to Google or Microsoft, not me. +</i></p> + <h2>Copyright</h2> <p> All documentation is -Copyright © 2005-2017 Mike Pall. +Copyright © 2005-2025 Mike Pall. </p> @@ -101,7 +102,7 @@ Copyright © 2005-2017 Mike Pall. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_buffer.html b/source/libs/luajit/LuaJIT-src/doc/ext_buffer.html new file mode 100644 index 0000000000000000000000000000000000000000..1ab392f07d6c6ba8ef10a17c7c008384459a456a --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/doc/ext_buffer.html @@ -0,0 +1,689 @@ +<!DOCTYPE html> +<html> +<head> +<title>String Buffer Library</title> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +.lib { + vertical-align: middle; + margin-left: 5px; + padding: 0 5px; + font-size: 60%; + border-radius: 5px; + background: #c5d5ff; + color: #000; +} +</style> +</head> +<body> +<div id="site"> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>String Buffer Library</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a class="current" href="ext_buffer.html">String Buffers</a> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> +</li><li> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> +</li><li> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +The string buffer library allows <b>high-performance manipulation of +string-like data</b>. +</p> +<p> +Unlike Lua strings, which are constants, string buffers are +<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data +can be stored, formatted and encoded into a string buffer and later +converted, extracted or decoded. +</p> +<p> +The convenient string buffer API simplifies common string manipulation +tasks, that would otherwise require creating many intermediate strings. +String buffers improve performance by eliminating redundant memory +copies, object creation, string interning and garbage collection +overhead. In conjunction with the FFI library, they allow zero-copy +operations. +</p> +<p> +The string buffer library also includes a high-performance +<a href="#serialize">serializer</a> for Lua objects. +</p> + +<h2 id="use">Using the String Buffer Library</h2> +<p> +The string buffer library is built into LuaJIT by default, but it's not +loaded by default. Add this to the start of every Lua file that needs +one of its functions: +</p> +<pre class="code"> +local buffer = require("string.buffer") +</pre> +<p> +The convention for the syntax shown on this page is that <tt>buffer</tt> +refers to the buffer library and <tt>buf</tt> refers to an individual +buffer object. +</p> +<p> +Please note the difference between a Lua function call, e.g. +<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g. +<tt>buf:reset()</tt> (with a colon). +</p> + +<h3 id="buffer_object">Buffer Objects</h3> +<p> +A buffer object is a garbage-collected Lua object. After creation with +<tt>buffer.new()</tt>, it can (and should) be reused for many operations. +When the last reference to a buffer object is gone, it will eventually +be freed by the garbage collector, along with the allocated buffer +space. +</p> +<p> +Buffers operate like a FIFO (first-in first-out) data structure. Data +can be appended (written) to the end of the buffer and consumed (read) +from the front of the buffer. These operations may be freely mixed. +</p> +<p> +The buffer space that holds the characters is managed automatically +— it grows as needed and already consumed space is recycled. Use +<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more +control. +</p> +<p> +The maximum size of a single buffer is the same as the maximum size of a +Lua string, which is slightly below two gigabytes. For huge data sizes, +neither strings nor buffers are the right data structure — use the +FFI library to directly map memory or files up to the virtual memory +limit of your OS. +</p> + +<h3 id="buffer_overview">Buffer Method Overview</h3> +<ul> +<li> +The <tt>buf:put*()</tt>-like methods append (write) characters to the +end of the buffer. +</li> +<li> +The <tt>buf:get*()</tt>-like methods consume (read) characters from the +front of the buffer. +</li> +<li> +Other methods, like <tt>buf:tostring()</tt> only read the buffer +contents, but don't change the buffer. +</li> +<li> +The <tt>buf:set()</tt> method allows zero-copy consumption of a string +or an FFI cdata object as a buffer. +</li> +<li> +The FFI-specific methods allow zero-copy read/write-style operations or +modifying the buffer contents in-place. Please check the +<a href="#ffi_caveats">FFI caveats</a> below, too. +</li> +<li> +Methods that don't need to return anything specific, return the buffer +object itself as a convenience. This allows method chaining, e.g.: +<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt> +</li> +</ul> + +<h2 id="create">Buffer Creation and Management</h2> + +<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br> +local buf = buffer.new([options])</tt></h3> +<p> +Creates a new buffer object. +</p> +<p> +The optional <tt>size</tt> argument ensures a minimum initial buffer +size. This is strictly an optimization when the required buffer size is +known beforehand. The buffer space will grow as needed, in any case. +</p> +<p> +The optional table <tt>options</tt> sets various +<a href="#serialize_options">serialization options</a>. +</p> + +<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3> +<p> +Reset (empty) the buffer. The allocated buffer space is not freed and +may be reused. +</p> + +<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3> +<p> +The buffer space of the buffer object is freed. The object itself +remains intact, empty and may be reused. +</p> +<p> +Note: you normally don't need to use this method. The garbage collector +automatically frees the buffer space, when the buffer object is +collected. Use this method, if you need to free the associated memory +immediately. +</p> + +<h2 id="write">Buffer Writers</h2> + +<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3> +<p> +Appends a string <tt>str</tt>, a number <tt>num</tt> or any object +<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer. +Multiple arguments are appended in the given order. +</p> +<p> +Appending a buffer to a buffer is possible and short-circuited +internally. But it still involves a copy. Better combine the buffer +writes to use a single buffer. +</p> + +<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3> +<p> +Appends the formatted arguments to the buffer. The <tt>format</tt> +string supports the same options as <tt>string.format()</tt>. +</p> + +<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3> +<p> +Appends the given <tt>len</tt> number of bytes from the memory pointed +to by the FFI <tt>cdata</tt> object to the buffer. The object needs to +be convertible to a (constant) pointer. +</p> + +<h3 id="buffer_set"><tt>buf = buf:set(str)<br> +buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3> +<p> +This method allows zero-copy consumption of a string or an FFI cdata +object as a buffer. It stores a reference to the passed string +<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer +space originally allocated is freed. This is <i>not</i> an append +operation, unlike the <tt>buf:put*()</tt> methods. +</p> +<p> +After calling this method, the buffer behaves as if +<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata, len)</tt> +had been called. However, the data is only referenced and not copied, as +long as the buffer is only consumed. +</p> +<p> +In case the buffer is written to later on, the referenced data is copied +and the object reference is removed (copy-on-write semantics). +</p> +<p> +The stored reference is an anchor for the garbage collector and keeps the +originally passed string or FFI cdata object alive. +</p> + +<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br> +<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3> +<p> +The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of +write space in the buffer. It returns an <tt>uint8_t *</tt> FFI +cdata pointer <tt>ptr</tt> that points to this space. +</p> +<p> +The available length in bytes is returned in <tt>len</tt>. This is at +least <tt>size</tt> bytes, but may be more to facilitate efficient +buffer growth. You can either make use of the additional space or ignore +<tt>len</tt> and only use <tt>size</tt> bytes. +</p> +<p> +The <tt>commit</tt> method appends the <tt>used</tt> bytes of the +previously returned write space to the buffer data. +</p> +<p> +This pair of methods allows zero-copy use of C read-style APIs: +</p> +<pre class="code"> +local MIN_SIZE = 65536 +repeat + local ptr, len = buf:reserve(MIN_SIZE) + local n = C.read(fd, ptr, len) + if n == 0 then break end -- EOF. + if n < 0 then error("read error") end + buf:commit(n) +until false +</pre> +<p> +The reserved write space is <i>not</i> initialized. At least the +<tt>used</tt> bytes <b>must</b> be written to before calling the +<tt>commit</tt> method. There's no need to call the <tt>commit</tt> +method, if nothing is added to the buffer (e.g. on error). +</p> + +<h2 id="read">Buffer Readers</h2> + +<h3 id="buffer_length"><tt>len = #buf</tt></h3> +<p> +Returns the current length of the buffer data in bytes. +</p> + +<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3> +<p> +The Lua concatenation operator <tt>..</tt> also accepts buffers, just +like strings or numbers. It always returns a string and not a buffer. +</p> +<p> +Note that although this is supported for convenience, this thwarts one +of the main reasons to use buffers, which is to avoid string +allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>. +</p> +<p> +Mixing this with unrelated objects that have a <tt>__concat</tt> +metamethod may not work, since these probably only expect strings. +</p> + +<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3> +<p> +Skips (consumes) <tt>len</tt> bytes from the buffer up to the current +length of the buffer data. +</p> + +<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3> +<p> +Consumes the buffer data and returns one or more strings. If called +without arguments, the whole buffer data is consumed. If called with a +number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument +consumes the remaining buffer space (this only makes sense as the last +argument). Multiple arguments consume the buffer data in the given +order. +</p> +<p> +Note: a zero length or no remaining buffer data returns an empty string +and not <tt>nil</tt>. +</p> + +<h3 id="buffer_tostring"><tt>str = buf:tostring()<br> +str = tostring(buf)</tt></h3> +<p> +Creates a string from the buffer data, but doesn't consume it. The +buffer remains unchanged. +</p> +<p> +Buffer objects also define a <tt>__tostring</tt> metamethod. This means +buffers can be passed to the global <tt>tostring()</tt> function and +many other functions that accept this in place of strings. The important +internal uses in functions like <tt>io.write()</tt> are short-circuited +to avoid the creation of an intermediate string object. +</p> + +<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3> +<p> +Returns an <tt>uint8_t *</tt> FFI cdata pointer <tt>ptr</tt> that +points to the buffer data. The length of the buffer data in bytes is +returned in <tt>len</tt>. +</p> +<p> +The returned pointer can be directly passed to C functions that expect a +buffer and a length. You can also do bytewise reads +(<tt>local x = ptr[i]</tt>) or writes +(<tt>ptr[i] = 0x40</tt>) of the buffer data. +</p> +<p> +In conjunction with the <tt>skip</tt> method, this allows zero-copy use +of C write-style APIs: +</p> +<pre class="code"> +repeat + local ptr, len = buf:ref() + if len == 0 then break end + local n = C.write(fd, ptr, len) + if n < 0 then error("write error") end + buf:skip(n) +until n >= len +</pre> +<p> +Unlike Lua strings, buffer data is <i>not</i> implicitly +zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that +expect zero-terminated strings. If you're not using <tt>len</tt>, then +you're doing something wrong. +</p> + +<h2 id="serialize">Serialization of Lua Objects</h2> +<p> +The following functions and methods allow <b>high-speed serialization</b> +(encoding) of a Lua object into a string and decoding it back to a Lua +object. This allows convenient storage and transport of <b>structured +data</b>. +</p> +<p> +The encoded data is in an <a href="#serialize_format">internal binary +format</a>. The data can be stored in files, binary-transparent +databases or transmitted to other LuaJIT instances across threads, +processes or networks. +</p> +<p> +Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or +server-class system, even when serializing many small objects. Decoding +speed is mostly constrained by object creation cost. +</p> +<p> +The serializer handles most Lua types, common FFI number types and +nested structures. Functions, thread objects, other FFI cdata and full +userdata cannot be serialized (yet). +</p> +<p> +The encoder serializes nested structures as trees. Multiple references +to a single object will be stored separately and create distinct objects +after decoding. Circular references cause an error. +</p> + +<h3 id="serialize_methods">Serialization Functions and Methods</h3> + +<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br> +buf = buf:encode(obj)</tt></h3> +<p> +Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone +function returns a string <tt>str</tt>. The buffer method appends the +encoding to the buffer. +</p> +<p> +<tt>obj</tt> can be any of the supported Lua types — it doesn't +need to be a Lua table. +</p> +<p> +This function may throw an error when attempting to serialize +unsupported object types, circular references or deeply nested tables. +</p> + +<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br> +obj = buf:decode()</tt></h3> +<p> +The stand-alone function deserializes (decodes) the string +<tt>str</tt>, the buffer method deserializes one object from the +buffer. Both return a Lua object <tt>obj</tt>. +</p> +<p> +The returned object may be any of the supported Lua types — +even <tt>nil</tt>. +</p> +<p> +This function may throw an error when fed with malformed or incomplete +encoded data. The stand-alone function throws when there's left-over +data after decoding a single top-level object. The buffer method leaves +any left-over data in the buffer. +</p> +<p> +Attempting to deserialize an FFI type will throw an error, if the FFI +library is not built-in or has not been loaded, yet. +</p> + +<h3 id="serialize_options">Serialization Options</h3> +<p> +The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain +the following members (all optional): +</p> +<ul> +<li> +<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that +commonly occur as table keys of objects you are serializing. These keys +are compactly encoded as indexes during serialization. A well-chosen +dictionary saves space and improves serialization performance. +</li> +<li> +<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b> +for the table objects you are serializing. +</li> +</ul> +<p> +<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs +to be an array of tables. Both starting at index 1 and without holes (no +<tt>nil</tt> in between). The tables are anchored in the buffer object and +internally modified into a two-way index (don't do this yourself, just pass +a plain array). The tables must not be modified after they have been passed +to <tt>buffer.new()</tt>. +</p> +<p> +The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and +decoder must be the same. Put the most common entries at the front. Extend +at the end to ensure backwards-compatibility — older encodings can +then still be read. You may also set some indexes to <tt>false</tt> to +explicitly drop backwards-compatibility. Old encodings that use these +indexes will throw an error when decoded. +</p> +<p> +Metatables that are not found in the <tt>metatable</tt> dictionary are +ignored when encoding. Decoding returns a table with a <tt>nil</tt> +metatable. +</p> +<p> +Note: parsing and preparation of the options table is somewhat +expensive. Create a buffer object only once and recycle it for multiple +uses. Avoid mixing encoder and decoder buffers, since the +<tt>buf:set()</tt> method frees the already allocated buffer space: +</p> +<pre class="code"> +local options = { + dict = { "commonly", "used", "string", "keys" }, +} +local buf_enc = buffer.new(options) +local buf_dec = buffer.new(options) + +local function encode(obj) + return buf_enc:reset():encode(obj):get() +end + +local function decode(str) + return buf_dec:set(str):decode() +end +</pre> + +<h3 id="serialize_stream">Streaming Serialization</h3> +<p> +In some contexts, it's desirable to do piecewise serialization of large +datasets, also known as <i>streaming</i>. +</p> +<p> +This serialization format can be safely concatenated and supports streaming. +Multiple encodings can simply be appended to a buffer and later decoded +individually: +</p> +<pre class="code"> +local buf = buffer.new() +buf:encode(obj1) +buf:encode(obj2) +local copy1 = buf:decode() +local copy2 = buf:decode() +</pre> +<p> +Here's how to iterate over a stream: +</p> +<pre class="code"> +while #buf ~= 0 do + local obj = buf:decode() + -- Do something with obj. +end +</pre> +<p> +Since the serialization format doesn't prepend a length to its encoding, +network applications may need to transmit the length, too. +</p> + +<h3 id="serialize_format">Serialization Format Specification</h3> +<p> +This serialization format is designed for <b>internal use</b> by LuaJIT +applications. Serialized data is upwards-compatible and portable across +all supported LuaJIT platforms. +</p> +<p> +It's an <b>8-bit binary format</b> and not human-readable. It uses e.g. +embedded zeroes and stores embedded Lua string objects unmodified, which +are 8-bit-clean, too. Encoded data can be safely concatenated for +streaming and later decoded one top-level object at a time. +</p> +<p> +The encoding is reasonably compact, but tuned for maximum performance, +not for minimum space usage. It compresses well with any of the common +byte-oriented data compression algorithms. +</p> +<p> +Although documented here for reference, this format is explicitly +<b>not</b> intended to be a 'public standard' for structured data +interchange across computer languages (like JSON or MessagePack). Please +do not use it as such. +</p> +<p> +The specification is given below as a context-free grammar with a +top-level <tt>object</tt> as the starting point. Alternatives are +separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats. +Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are +either plain hex numbers, encoded as bytes, or have a <tt>.format</tt> +suffix. +</p> +<pre> +object → nil | false | true + | null | lightud32 | lightud64 + | int | num | tab | tab_mt + | int64 | uint64 | complex + | string + +nil → 0x00 +false → 0x01 +true → 0x02 + +null → 0x03 // NULL lightuserdata +lightud32 → 0x04 data.I // 32 bit lightuserdata +lightud64 → 0x05 data.L // 64 bit lightuserdata + +int → 0x06 int.I // int32_t +num → 0x07 double.L + +tab → 0x08 // Empty table + | 0x09 h.U h*{object object} // Key/value hash + | 0x0a a.U a*object // 0-based array + | 0x0b a.U h.U a*object h*{object object} // Mixed + | 0x0c a.U (a-1)*object // 1-based array + | 0x0d a.U h.U (a-1)*object h*{object object} // Mixed +tab_mt → 0x0e (index-1).U tab // Metatable dict entry + +int64 → 0x10 int.L // FFI int64_t +uint64 → 0x11 uint.L // FFI uint64_t +complex → 0x12 re.L im.L // FFI complex + +string → (0x20+len).U len*char.B + | 0x0f (index-1).U // String dict entry + +.B = 8 bit +.I = 32 bit little-endian +.L = 64 bit little-endian +.U = prefix-encoded 32 bit unsigned number n: + 0x00..0xdf → n.B + 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B + 0x1fe0.. → 0xff n.I +</pre> + +<h2 id="error">Error handling</h2> +<p> +Many of the buffer methods can throw an error. Out-of-memory or usage +errors are best caught with an outer wrapper for larger parts of code. +There's not much one can do after that, anyway. +</p> +<p> +OTOH, you may want to catch some errors individually. Buffer methods need +to receive the buffer object as the first argument. The Lua colon-syntax +<tt>obj:method()</tt> does that implicitly. But to wrap a method with +<tt>pcall()</tt>, the arguments need to be passed like this: +</p> +<pre class="code"> +local ok, err = pcall(buf.encode, buf, obj) +if not ok then + -- Handle error in err. +end +</pre> + +<h2 id="ffi_caveats">FFI caveats</h2> +<p> +The string buffer library has been designed to work well together with +the FFI library. But due to the low-level nature of the FFI library, +some care needs to be taken: +</p> +<p> +First, please remember that FFI pointers are zero-indexed. The space +returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the +returned pointer and ends before <tt>len</tt> bytes after that. +</p> +<p> +I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index +is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid +index at all. The returned pointer may even be <tt>NULL</tt>. +</p> +<p> +The space pointed to by the returned pointer is only valid as long as +the buffer is not modified in any way (neither append, nor consume, nor +reset, etc.). The pointer is also not a GC anchor for the buffer object +itself. +</p> +<p> +Buffer data is only guaranteed to be byte-aligned. Casting the returned +pointer to a data type with higher alignment may cause unaligned +accesses. It depends on the CPU architecture whether this is allowed or +not (it's always OK on x86/x64 and mostly OK on other modern +architectures). +</p> +<p> +FFI pointers or references do not count as GC anchors for an underlying +object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is +anchored by <tt>buf:set(array, len)</tt>, but not by +<tt>buf:set(array+offset, len)</tt>. The addition of the offset +creates a new pointer, even when the offset is zero. In this case, you +need to make sure there's still a reference to the original array as +long as its contents are in use by the buffer. +</p> +<p> +Even though each LuaJIT VM instance is single-threaded (but you can +create multiple VMs), FFI data structures can be accessed concurrently. +Be careful when reading/writing FFI cdata from/to buffers to avoid +concurrent accesses or modifications. In particular, the memory +referenced by <tt>buf:set(cdata, len)</tt> must not be modified +while buffer readers are working on it. Shared, but read-only memory +mappings of files are OK, but only if the file does not change. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2025 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_c_api.html b/source/libs/luajit/LuaJIT-src/doc/ext_c_api.html index ad462c63e517c0dd48be4ed2944b517ce814ee5d..673a9b9a14782bb9e347c9d4fac602d3f0506f1c 100644 --- a/source/libs/luajit/LuaJIT-src/doc/ext_c_api.html +++ b/source/libs/luajit/LuaJIT-src/doc/ext_c_api.html @@ -1,17 +1,16 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Lua/C API Extensions</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>Lua/C API Extensions</h1> @@ -20,7 +19,7 @@ <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -38,6 +37,8 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a class="current" href="ext_c_api.html">Lua/C API</a> @@ -45,18 +46,11 @@ <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> @@ -91,8 +85,8 @@ other Lua/C API functions). </p> <p> The third argument specifies the mode, which is 'or'ed with a flag. -The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on, -<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or +The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off, +<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code. </p> <p> @@ -111,7 +105,7 @@ Turn the whole JIT compiler on or off or flush the whole cache of compiled code. This sets the mode for the function at the stack index <tt>idx</tt> or the parent of the calling function (<tt>idx = 0</tt>). It either enables JIT compilation for a function, disables it and flushes any -already compiled code or only flushes already compiled code. This +already compiled code, or only flushes already compiled code. This applies recursively to all sub-functions of the function with <tt>LUAJIT_MODE_ALLFUNC</tt> or only to the sub-functions with <tt>LUAJIT_MODE_ALLSUBFUNC</tt>. @@ -130,7 +124,7 @@ traces which link to it. This mode defines a wrapper function for calls to C functions. If called with <tt>LUAJIT_MODE_ON</tt>, the stack index at <tt>idx</tt> must be a <tt>lightuserdata</tt> object holding a pointer to the wrapper -function. From now on all C functions are called through the wrapper +function. From now on, all C functions are called through the wrapper function. If called with <tt>LUAJIT_MODE_OFF</tt> this mode is turned off and all C functions are directly called. </p> @@ -179,7 +173,7 @@ Also note that this mechanism is not without overhead. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_ffi.html b/source/libs/luajit/LuaJIT-src/doc/ext_ffi.html index 5e1daaf54b7670f322d61951503d0289ec0e2d04..aa6d8363f670b2fded0a678c673563dc1e3da7cd 100644 --- a/source/libs/luajit/LuaJIT-src/doc/ext_ffi.html +++ b/source/libs/luajit/LuaJIT-src/doc/ext_ffi.html @@ -1,17 +1,16 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>FFI Library</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>FFI Library</h1> @@ -20,7 +19,7 @@ <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -38,6 +37,8 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -45,18 +46,11 @@ <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> @@ -161,7 +155,7 @@ call the binding function. Phew! <h2 id="cdata">Motivating Example: Using C Data Structures</h2> <p> The FFI library allows you to create and access C data -structures. Of course the main use for this is for interfacing with +structures. Of course, the main use for this is for interfacing with C functions. But they can be used stand-alone, too. </p> <p> @@ -173,7 +167,7 @@ implemented with a big table holding lots of tiny tables. This imposes both a substantial memory overhead as well as a performance overhead. </p> <p> -Here's a sketch of a library that operates on color images plus a +Here's a sketch of a library that operates on color images, plus a simple benchmark. First, the plain Lua version: </p> <pre class="code"> @@ -188,7 +182,7 @@ local function image_ramp_green(n) return img end -local function image_to_grey(img, n) +local function image_to_gray(img, n) for i=1,n do local y = floor(0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue) img[i].red = y; img[i].green = y; img[i].blue = y @@ -198,14 +192,14 @@ end local N = 400*400 local img = image_ramp_green(N) for i=1,1000 do - image_to_grey(img, N) + image_to_gray(img, N) end </pre> <p> This creates a table with 160.000 pixels, each of which is a table -holding four number values in the range of 0-255. First an image with +holding four number values in the range of 0-255. First, an image with a green ramp is created (1D for simplicity), then the image is -converted to greyscale 1000 times. Yes, that's silly, but I was in +converted to grayscale 1000 times. Yes, that's silly, but I was in need of a simple example ... </p> <p> @@ -312,7 +306,7 @@ be more compact and faster. This is certainly true (by a factor of ~1.7x). Switching to a struct-of-arrays would help, too. </p> <p style="font-size: 8pt;"> -However the resulting code would be less idiomatic and rather +However, the resulting code would be less idiomatic and rather error-prone. And it still doesn't get even close to the performance of the FFI version of the code. Also, high-level data structures cannot be easily passed to other C functions, especially I/O functions, @@ -322,7 +316,7 @@ without undue conversion penalties. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_ffi_api.html b/source/libs/luajit/LuaJIT-src/doc/ext_ffi_api.html index 91af2e1d43d075ce95ecba27e68643973709625a..360dd521d6614d260b2cf77dd1036bb175065554 100644 --- a/source/libs/luajit/LuaJIT-src/doc/ext_ffi_api.html +++ b/source/libs/luajit/LuaJIT-src/doc/ext_ffi_api.html @@ -1,10 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>ffi.* API Functions</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -16,7 +15,7 @@ td.abiparam { font-weight: bold; width: 6em; } </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1><tt>ffi.*</tt> API Functions</h1> @@ -25,7 +24,7 @@ td.abiparam { font-weight: bold; width: 6em; } <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -43,6 +42,8 @@ td.abiparam { font-weight: bold; width: 6em; } <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -50,18 +51,11 @@ td.abiparam { font-weight: bold; width: 6em; } <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> @@ -125,7 +119,7 @@ separated by semicolons. The trailing semicolon for a single declaration may be omitted. </p> <p> -Please note that external symbols are only <em>declared</em>, but they +Please note, that external symbols are only <em>declared</em>, but they are <em>not bound</em> to any specific address, yet. Binding is achieved with C library namespaces (see below). </p> @@ -213,7 +207,7 @@ parse the cdecl only once and get its ctype with <tt>ffi.typeof()</tt>. Then use the ctype as a constructor repeatedly. </p> <p style="font-size: 8pt;"> -Please note that an anonymous <tt>struct</tt> declaration implicitly +Please note, that an anonymous <tt>struct</tt> declaration implicitly creates a new and distinguished ctype every time you use it for <tt>ffi.new()</tt>. This is probably <b>not</b> what you want, especially if you create more than one cdata object. Different anonymous @@ -260,12 +254,12 @@ afterwards. Neither the contents of the <tt>metatable</tt> nor the contents of an <tt>__index</tt> table (if any) may be modified afterwards. The associated metatable automatically applies to all uses of this type, no matter how the objects are created or where they -originate from. Note that pre-defined operations on types have -precedence (e.g. declared field names cannot be overriden). +originate from. Note that predefined operations on types have +precedence (e.g. declared field names cannot be overridden). </p> <p> All standard Lua metamethods are implemented. These are called directly, -without shortcuts and on any mix of types. For binary operations, the +without shortcuts, and on any mix of types. For binary operations, the left operand is checked first for a valid ctype metamethod. The <tt>__gc</tt> metamethod only applies to <tt>struct</tt>/<tt>union</tt> types and performs an implicit <a href="#ffi_gc"><tt>ffi.gc()</tt></a> @@ -469,6 +463,10 @@ otherwise. The following parameters are currently defined: <tr class="odd"> <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> <tr class="even"> +<td class="abiparam">pauth</td><td class="abidesc">Pointer authentication ABI</td></tr> +<tr class="odd"> +<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr> +<tr class="even"> <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr> </table> @@ -494,7 +492,7 @@ have some extra methods: <p> Free the resources associated with a callback. The associated Lua function is unanchored and may be garbage collected. The callback -function pointer is no longer valid and must not be called anymore +function pointer is no longer valid and must not be called again (it may be reused by a subsequently created callback). </p> @@ -560,7 +558,7 @@ named <tt>i</tt>. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_ffi_semantics.html b/source/libs/luajit/LuaJIT-src/doc/ext_ffi_semantics.html index 800b6b18a0e193a485c0e06f2a1d905d331061ed..cd533e8c88a9f8b160de1c2bee4395ccb1ef15eb 100644 --- a/source/libs/luajit/LuaJIT-src/doc/ext_ffi_semantics.html +++ b/source/libs/luajit/LuaJIT-src/doc/ext_ffi_semantics.html @@ -1,10 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>FFI Semantics</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -16,7 +15,7 @@ td.convop { font-style: italic; width: 40%; } </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>FFI Semantics</h1> @@ -25,7 +24,7 @@ td.convop { font-style: italic; width: 40%; } <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -43,6 +42,8 @@ td.convop { font-style: italic; width: 40%; } <a class="current" href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -50,18 +51,11 @@ td.convop { font-style: italic; width: 40%; } <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> @@ -92,7 +86,7 @@ footprint. It's used by the <a href="ext_ffi_api.html">ffi.* library functions</a> to declare C types or external symbols. </p> <p> -It's only purpose is to parse C declarations, as found e.g. in +Its only purpose is to parse C declarations, as found e.g. in C header files. Although it does evaluate constant expressions, it's <em>not</em> a C compiler. The body of <tt>inline</tt> C function definitions is simply ignored. @@ -169,7 +163,7 @@ function declarations.</li> </ul> <p> -The following C types are pre-defined by the C parser (like +The following C types are predefined by the C parser (like a <tt>typedef</tt>, except re-declarations will be ignored): </p> <ul> @@ -446,6 +440,19 @@ If you don't do this, the default Lua number → <tt>double</tt> conversion rule applies. A vararg C function expecting an integer will see a garbled or uninitialized value. </p> +<p> +Note: this is the only place where creating a boxed scalar number type is +actually useful. <b>Never use <tt>ffi.new("int")</tt>, <tt>ffi.new("float")</tt> +etc. anywhere else!</b> +</p> +<p style="font-size: 8pt;"> +Ditto for <tt>ffi.cast()</tt>. Explicitly boxing scalars <b>does not</b> +improve performance or force <tt>int</tt> or <tt>float</tt> arithmetic! It +just adds costly boxing, unboxing and conversions steps. And it may lead +to surprise results, because +<a href="#cdata_arith">cdata arithmetic on scalar numbers</a> +is always performed on 64 bit integers. +</p> <h2 id="init">Initializers</h2> <p> @@ -587,9 +594,9 @@ ffi.new("struct nested", {x=1,y={2,3}}) --> x = 1, y.a = 2, y.b = 3 <h2 id="cdata_ops">Operations on cdata Objects</h2> <p> -All of the standard Lua operators can be applied to cdata objects or a +All standard Lua operators can be applied to cdata objects or a mix of a cdata object and another Lua object. The following list shows -the pre-defined operations. +the predefined operations. </p> <p> Reference types are dereferenced <em>before</em> performing each of @@ -597,7 +604,7 @@ the operations below — the operation is applied to the C type pointed to by the reference. </p> <p> -The pre-defined operations are always tried first before deferring to a +The predefined operations are always tried first before deferring to a metamethod or index table (if any) for the corresponding ctype (except for <tt>__new</tt>). An error is raised if the metamethod lookup or index table lookup fails. @@ -647,7 +654,7 @@ assigning to an index of a vector raises an error.</li> </ul> <p> A ctype object can be indexed with a string key, too. The only -pre-defined operation is reading scoped constants of +predefined operation is reading scoped constants of <tt>struct</tt>/<tt>union</tt> types. All other accesses defer to the corresponding metamethods or index tables (if any). </p> @@ -660,7 +667,7 @@ certain optimizations. <p> As a consequence, the <em>elements</em> of complex numbers and vectors are immutable. But the elements of an aggregate holding these -types <em>may</em> be modified of course. I.e. you cannot assign to +types <em>may</em> be modified, of course. I.e. you cannot assign to <tt>foo.c.im</tt>, but you can assign a (newly created) complex number to <tt>foo.c</tt>. </p> @@ -678,9 +685,9 @@ through unions is explicitly detected and allowed. <a href="ext_ffi_api.html#ffi_new">constructor</a>. This is equivalent to <tt>ffi.new(ct, ...)</tt>, unless a <tt>__new</tt> metamethod is defined. The <tt>__new</tt> metamethod is called with the ctype object -plus any other arguments passed to the contructor. Note that you have to -use <tt>ffi.new</tt> inside of it, since calling <tt>ct(...)</tt> would -cause infinite recursion.</li> +plus any other arguments passed to the constructor. Note that you have to +use <tt>ffi.new</tt> inside the metamethod, since calling <tt>ct(...)</tt> +would cause infinite recursion.</li> <li><b>C function call</b>: a cdata function or cdata function pointer can be called. The passed arguments are @@ -691,7 +698,7 @@ variable argument part of vararg C function use C function is called and the return value (if any) is <a href="#convert_tolua">converted to a Lua object</a>.<br> On Windows/x86 systems, <tt>__stdcall</tt> functions are automatically -detected and a function declared as <tt>__cdecl</tt> (the default) is +detected, and a function declared as <tt>__cdecl</tt> (the default) is silently fixed up after the first call.</li> </ul> @@ -701,7 +708,7 @@ silently fixed up after the first call.</li> <li><b>Pointer arithmetic</b>: a cdata pointer/array and a cdata number or a Lua number can be added or subtracted. The number must be -on the right hand side for a subtraction. The result is a pointer of +on the right-hand side for a subtraction. The result is a pointer of the same type with an address plus or minus the number value multiplied by the element size in bytes. An error is raised if the element size is undefined.</li> @@ -716,7 +723,7 @@ operators (<tt>+ - * / % ^</tt> and unary minus) can be applied to two cdata numbers, or a cdata number and a Lua number. If one of them is an <tt>uint64_t</tt>, the other side is converted to an <tt>uint64_t</tt> and an unsigned arithmetic operation -is performed. Otherwise both sides are converted to an +is performed. Otherwise, both sides are converted to an <tt>int64_t</tt> and a signed arithmetic operation is performed. The result is a boxed 64 bit cdata object.<br> @@ -763,7 +770,7 @@ which is compatible with any other pointer type.</li> <li><b>64 bit integer comparison</b>: two cdata numbers, or a cdata number and a Lua number can be compared with each other. If one of them is an <tt>uint64_t</tt>, the other side is converted to an -<tt>uint64_t</tt> and an unsigned comparison is performed. Otherwise +<tt>uint64_t</tt> and an unsigned comparison is performed. Otherwise, both sides are converted to an <tt>int64_t</tt> and a signed comparison is performed.<br> @@ -788,9 +795,9 @@ keys!</b> A cdata object is treated like any other garbage-collected object and is hashed and compared by its address for table indexing. Since there's no interning for cdata value types, the same value may be -boxed in different cdata objects with different addresses. Thus +boxed in different cdata objects with different addresses. Thus, <tt>t[1LL+1LL]</tt> and <tt>t[2LL]</tt> usually <b>do not</b> point to -the same hash slot and they certainly <b>do not</b> point to the same +the same hash slot, and they certainly <b>do not</b> point to the same hash slot as <tt>t[2]</tt>. </p> <p> @@ -812,7 +819,7 @@ the resulting Lua number as a key when indexing tables.<br> One obvious benefit: <tt>t[tonumber(2LL)]</tt> <b>does</b> point to the same slot as <tt>t[2]</tt>.</li> -<li>Otherwise use either <tt>tostring()</tt> on 64 bit integers +<li>Otherwise, use either <tt>tostring()</tt> on 64 bit integers or complex numbers or combine multiple fields of a cdata aggregate to a Lua string (e.g. with <a href="ext_ffi_api.html#ffi_string"><tt>ffi.string()</tt></a>). Then @@ -820,7 +827,7 @@ use the resulting Lua string as a key when indexing tables.</li> <li>Create your own specialized hash table implementation using the C types provided by the FFI library, just like you would in -C code. Ultimately this may give much better performance than the +C code. Ultimately, this may give much better performance than the other alternatives or what a generic by-value hash table could possibly provide.</li> @@ -864,7 +871,7 @@ place of a type, you'd need to use <tt>ffi.typeof("int")</tt> instead. <p> The main use for parameterized types are libraries implementing abstract data types -(<a href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">»</span> example</a>), +(<a href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">»</span> example</a>), similar to what can be achieved with C++ template metaprogramming. Another use case are derived types of anonymous structs, which avoids pollution of the global struct namespace. @@ -886,7 +893,7 @@ garbage collector will automatically free the memory used by it (at the end of the next GC cycle). </p> <p> -Please note that pointers themselves are cdata objects, however they +Please note, that pointers themselves are cdata objects, however they are <b>not</b> followed by the garbage collector. So e.g. if you assign a cdata array to a pointer, you must keep the cdata object holding the array alive as long as the pointer is still in use: @@ -935,18 +942,18 @@ of the function pointer and the Lua function object (closure). </p> <p> This can happen implicitly due to the usual conversions, e.g. when -passing a Lua function to a function pointer argument. Or you can use +passing a Lua function to a function pointer argument. Or, you can use <tt>ffi.cast()</tt> to explicitly cast a Lua function to a C function pointer. </p> <p> -Currently only certain C function types can be used as callback +Currently, only certain C function types can be used as callback functions. Neither C vararg functions nor functions with pass-by-value aggregate argument or result types are supported. There -are no restrictions for the kind of Lua functions that can be called +are no restrictions on the kind of Lua functions that can be called from the callback — no checks for the proper number of arguments are made. The return value of the Lua function will be converted to the -result type and an error will be thrown for invalid conversions. +result type, and an error will be thrown for invalid conversions. </p> <p> It's allowed to throw errors across a callback invocation, but it's not @@ -1007,7 +1014,7 @@ convention cannot be automatically detected, unlike for <tt>__stdcall</tt> calls <em>to</em> Windows functions. </p> <p> -For some use cases it's necessary to free up the resources or to +For some use cases, it's necessary to free up the resources or to dynamically redirect callbacks. Use an explicit cast to a C function pointer and keep the resulting cdata object. Then use the <a href="ext_ffi_api.html#callback_free"><tt>cb:free()</tt></a> @@ -1060,7 +1067,7 @@ GUI application, which waits for user input most of the time, anyway. </p> <p> For new designs <b>avoid push-style APIs</b>: a C function repeatedly -calling a callback for each result. Instead <b>use pull-style APIs</b>: +calling a callback for each result. Instead, <b>use pull-style APIs</b>: call a C function repeatedly to get a new result. Calls from Lua to C via the FFI are much faster than the other way round. Most well-designed libraries already use pull-style APIs (read/write, get/put). @@ -1079,7 +1086,7 @@ function. </p> <p> Indexing a C library namespace object with a symbol name (a Lua -string) automatically binds it to the library. First the symbol type +string) automatically binds it to the library. First, the symbol type is resolved — it must have been declared with <a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a>. Then the symbol address is resolved by searching for the symbol name in the @@ -1134,7 +1141,7 @@ Performance notice: the JIT compiler specializes to the identity of namespace objects and to the strings used to index it. This effectively turns function cdata objects into constants. It's not useful and actually counter-productive to explicitly cache these -function objects, e.g. <tt>local strlen = ffi.C.strlen</tt>. OTOH it +function objects, e.g. <tt>local strlen = ffi.C.strlen</tt>. OTOH, it <em>is</em> useful to cache the namespace itself, e.g. <tt>local C = ffi.C</tt>. </p> @@ -1159,14 +1166,14 @@ This behavior is inevitable, since the goal is to provide full interoperability with C code. Adding extra safety measures, like bounds checks, would be futile. There's no way to detect misdeclarations of C functions, since shared libraries only -provide symbol names, but no type information. Likewise there's no way +provide symbol names, but no type information. Likewise, there's no way to infer the valid range of indexes for a returned pointer. </p> <p> Again: the FFI library is a low-level library. This implies it needs to be used with care, but it's flexibility and performance often outweigh this concern. If you're a C or C++ developer, it'll be easy -to apply your existing knowledge. OTOH writing code for the FFI +to apply your existing knowledge. OTOH, writing code for the FFI library is not for the faint of heart and probably shouldn't be the first exercise for someone with little experience in Lua, C or C++. </p> @@ -1194,7 +1201,7 @@ currently incomplete: <li>C declarations are not passed through a C pre-processor, yet.</li> <li>The C parser is able to evaluate most constant expressions -commonly found in C header files. However it doesn't handle the +commonly found in C header files. However, it doesn't handle the full range of C expression semantics and may fail for some obscure constructs.</li> <li><tt>static const</tt> declarations only work for integer types @@ -1225,7 +1232,7 @@ suboptimal performance, especially when used in inner loops: <li>Table initializers.</li> <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> <li>Non-default initialization of VLA/VLS or large C types -(> 128 bytes or > 16 array elements.</li> +(> 128 bytes or > 16 array elements).</li> <li>Bitfield initializations.</li> <li>Pointer differences for element sizes that are not a power of two.</li> @@ -1252,7 +1259,7 @@ compiled.</li> </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_ffi_tutorial.html b/source/libs/luajit/LuaJIT-src/doc/ext_ffi_tutorial.html index 3650066440a431d6aebdf8831690e6f56a28b6e5..5ea55a7452f4a2d6cebc6652f63a0a12f5df09f3 100644 --- a/source/libs/luajit/LuaJIT-src/doc/ext_ffi_tutorial.html +++ b/source/libs/luajit/LuaJIT-src/doc/ext_ffi_tutorial.html @@ -1,10 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>FFI Tutorial</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -18,7 +17,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>FFI Tutorial</h1> @@ -27,7 +26,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -45,6 +44,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -52,18 +53,11 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> @@ -89,7 +83,7 @@ of its functions: local ffi = require("ffi") </pre> <p> -Please note this doesn't define an <tt>ffi</tt> variable in the table +Please note, this doesn't define an <tt>ffi</tt> variable in the table of globals — you really need to use the local variable. The <tt>require</tt> function ensures the library is only loaded once. </p> @@ -198,7 +192,7 @@ don't need to declare them as such. <span class="mark">⑤</span> The <tt>poll()</tt> function takes a couple more arguments we're not going to use. You can simply use <tt>nil</tt> to pass a <tt>NULL</tt> pointer and <tt>0</tt> -for the <tt>nfds</tt> parameter. Please note that the +for the <tt>nfds</tt> parameter. Please note, that the number <tt>0</tt> <em>does not convert to a pointer value</em>, unlike in C++. You really have to pass pointers to pointer arguments and numbers to number arguments. @@ -222,7 +216,7 @@ a fascinating best-selling game is left as an exercise for the reader. <h2 id="zlib">Accessing the zlib Compression Library</h2> <p> The following code shows how to access the <a -href="http://zlib.net/">zlib</a> compression library from Lua code. +href="https://zlib.net/"><span class="ext">»</span> zlib</a> compression library from Lua code. We'll define two convenience wrapper functions that take a string and compress or uncompress it to another string: </p> @@ -295,17 +289,17 @@ Here's the step-by-step explanation: <p> <span class="mark">①</span> This defines some of the C functions provided by zlib. For the sake of this example, some -type indirections have been reduced and it uses the pre-defined +type indirections have been reduced and it uses the predefined fixed-size integer types, while still adhering to the zlib API/ABI. </p> <p> <span class="mark">②</span> This loads the zlib shared -library. On POSIX systems it's named <tt>libz.so</tt> and usually +library. On POSIX systems, it's named <tt>libz.so</tt> and usually comes pre-installed. Since <tt>ffi.load()</tt> automatically adds any missing standard prefixes/suffixes, we can simply load the <tt>"z"</tt> library. On Windows it's named <tt>zlib1.dll</tt> and you'll have to download it first from the -<a href="http://zlib.net/"><span class="ext">»</span> zlib site</a>. The check for +<a href="https://zlib.net/"><span class="ext">»</span> zlib site</a>. The check for <tt>ffi.os</tt> makes sure we pass the right name to <tt>ffi.load()</tt>. </p> @@ -328,7 +322,7 @@ actual length that was used. <p> In C you'd pass in the address of a local variable (<tt>&buflen</tt>). But since there's no address-of operator in -Lua, we'll just pass in a one-element array. Conveniently it can be +Lua, we'll just pass in a one-element array. Conveniently, it can be initialized with the maximum buffer size in one step. Calling the actual <tt>zlib.compress2</tt> function is then straightforward. </p> @@ -352,7 +346,7 @@ for garbage collection and string interning. <span class="mark">⑥</span> The <tt>uncompress</tt> functions does the exact opposite of the <tt>compress</tt> function. The compressed data doesn't include the size of the original string, -so this needs to be passed in. Otherwise no surprises here. +so this needs to be passed in. Otherwise, no surprises here. </p> <p> <span class="mark">⑦</span> The code, that makes use @@ -386,7 +380,7 @@ Ok, so the <tt>ffi.*</tt> functions generally accept cdata objects wherever you'd want to use a number. That's why we get a away with passing <tt>n</tt> to <tt>ffi.string()</tt> above. But other Lua library functions or modules don't know how to deal with this. So for -maximum portability one needs to use <tt>tonumber()</tt> on returned +maximum portability, one needs to use <tt>tonumber()</tt> on returned <tt>long</tt> results before passing them on. Otherwise the application might work on some systems, but would fail in a POSIX/x64 environment. @@ -458,7 +452,7 @@ the origin. </p> <p> <span class="mark">④</span> If we run out of operators, we can -define named methods, too. Here the <tt>__index</tt> table defines an +define named methods, too. Here, the <tt>__index</tt> table defines an <tt>area</tt> function. For custom indexing needs, one might want to define <tt>__index</tt> and <tt>__newindex</tt> <em>functions</em> instead. </p> @@ -472,13 +466,13 @@ be used e.g. to create an array of points. The metamethods automatically apply to any and all uses of this type. </p> <p> -Please note that the association with a metatable is permanent and +Please note, that the association with a metatable is permanent and <b>the metatable must not be modified afterwards!</b> Ditto for the <tt>__index</tt> table. </p> <p> <span class="mark">⑥</span> Here are some simple usage examples -for the point type and their expected results. The pre-defined +for the point type and their expected results. The predefined operations (such as <tt>a.x</tt>) can be freely mixed with the newly defined metamethods. Note that <tt>area</tt> is a method and must be called with the Lua syntax for methods: <tt>a:area()</tt>, not @@ -487,7 +481,7 @@ called with the Lua syntax for methods: <tt>a:area()</tt>, not <p> The C type metamethod mechanism is most useful when used in conjunction with C libraries that are written in an object-oriented -style. Creators return a pointer to a new instance and methods take an +style. Creators return a pointer to a new instance, and methods take an instance pointer as the first argument. Sometimes you can just point <tt>__index</tt> to the library namespace and <tt>__gc</tt> to the destructor and you're done. But often enough you'll want to add @@ -573,7 +567,7 @@ end </pre> <p> This turns them into indirect calls and generates bigger and slower -machine code. Instead you'll want to cache the namespace itself and +machine code. Instead, you'll want to cache the namespace itself and rely on the JIT compiler to eliminate the lookups: </p> <pre class="code"> @@ -593,7 +587,7 @@ it to a local variable in the function scope is unnecessary. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_jit.html b/source/libs/luajit/LuaJIT-src/doc/ext_jit.html index e4088bcbd20371f2af53f7a7c5cf40f5b4d6179e..15c75af1853f63bc93ee0d8cb494a0fc0ccc57fa 100644 --- a/source/libs/luajit/LuaJIT-src/doc/ext_jit.html +++ b/source/libs/luajit/LuaJIT-src/doc/ext_jit.html @@ -1,17 +1,16 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>jit.* Library</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1><tt>jit.*</tt> Library</h1> @@ -20,7 +19,7 @@ <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -38,6 +37,8 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a class="current" href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -45,18 +46,11 @@ <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> @@ -141,7 +135,9 @@ Contains the LuaJIT version string. <h3 id="jit_version_num"><tt>jit.version_num</tt></h3> <p> Contains the version number of the LuaJIT core. Version xx.yy.zz -is represented by the decimal number xxyyzz. +is represented by the decimal number xxyyzz.<br> +<b>DEPRECATED after the switch to +<a href="https://luajit.org/status.html#release"><span class="ext">»</span> rolling releases</a>. zz is frozen at 99.</b> </p> <h3 id="jit_os"><tt>jit.os</tt></h3> @@ -153,12 +149,12 @@ Contains the target OS name: <h3 id="jit_arch"><tt>jit.arch</tt></h3> <p> Contains the target architecture name: -"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64". +"x86", "x64", "arm", "arm64", "arm64be", "ppc", "mips", "mipsel", "mips64", "mips64el", "mips64r6", "mips64r6el". </p> <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> <p> -This sub-module provides the backend for the <tt>-O</tt> command line +This submodule provides the backend for the <tt>-O</tt> command line option. </p> <p> @@ -178,7 +174,7 @@ which was one of the ways to enable optimization. <h2 id="jit_util"><tt>jit.util.*</tt> — JIT compiler introspection</h2> <p> -This sub-module holds functions to introspect the bytecode, generated +This submodule holds functions to introspect the bytecode, generated traces, the IR and the generated machine code. The functionality provided by this module is still in flux and therefore undocumented. </p> @@ -191,7 +187,7 @@ if you want to know more. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/ext_profiler.html b/source/libs/luajit/LuaJIT-src/doc/ext_profiler.html index 71b8c03339e3f6043cfc247799e125845bf6fc90..c24ca97b7bede94c6299213c40ebeceb0039a7c4 100644 --- a/source/libs/luajit/LuaJIT-src/doc/ext_profiler.html +++ b/source/libs/luajit/LuaJIT-src/doc/ext_profiler.html @@ -1,17 +1,16 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Profiler</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>Profiler</h1> @@ -20,7 +19,7 @@ <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -38,6 +37,8 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -45,18 +46,11 @@ <a class="current" href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> @@ -162,7 +156,7 @@ To see how much time is spent in different VM states or Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time spent in a VM state or zone vs. hotspots. This can be used to answer -questions like "Which time consuming functions are only interpreted?" or +questions like "Which time-consuming functions are only interpreted?" or "What's the garbage collector overhead for a specific function?". </p> <p> @@ -221,7 +215,7 @@ local profile = require("jit.profile") This module can be used to implement your own higher-level profiler. A typical profiling run starts the profiler, captures stack dumps in the profiler callback, adds them to a hash table to aggregate the number -of samples, stops the profiler and then analyzes all of the captured +of samples, stops the profiler and then analyzes all captured stack dumps. Other parameters can be sampled in the profiler callback, too. But it's important not to spend too much time in the callback, since this may skew the statistics. @@ -275,9 +269,9 @@ returns a string with a stack dump for the <tt>thread</tt> (coroutine), formatted according to the <tt>fmt</tt> argument: </p> <ul> -<li><tt>p</tt> — Preserve the full path for module names. Otherwise +<li><tt>p</tt> — Preserve the full path for module names. Otherwise, only the file name is used.</li> -<li><tt>f</tt> — Dump the function name if it can be derived. Otherwise +<li><tt>f</tt> — Dump the function name if it can be derived. Otherwise, use module:line.</li> <li><tt>F</tt> — Ditto, but dump module:name.</li> <li><tt>l</tt> — Dump module:line.</li> @@ -355,7 +349,7 @@ use. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/extensions.html b/source/libs/luajit/LuaJIT-src/doc/extensions.html index d7cc9693bebfec04c415bd4a83495aa81f5883cf..75b466db5e816e45730f0b0d50e08c9c56bbd85a 100644 --- a/source/libs/luajit/LuaJIT-src/doc/extensions.html +++ b/source/libs/luajit/LuaJIT-src/doc/extensions.html @@ -1,10 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Extensions</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -28,7 +27,7 @@ td.excinterop { </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>Extensions</h1> @@ -37,7 +36,7 @@ td.excinterop { <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -55,6 +54,8 @@ td.excinterop { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -62,26 +63,20 @@ td.excinterop { <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> + <p> LuaJIT is fully upwards-compatible with Lua 5.1. It supports all -<a href="http://www.lua.org/manual/5.1/manual.html#5"><span class="ext">»</span> standard Lua +<a href="https://www.lua.org/manual/5.1/manual.html#5"><span class="ext">»</span> standard Lua library functions</a> and the full set of -<a href="http://www.lua.org/manual/5.1/manual.html#3"><span class="ext">»</span> Lua/C API +<a href="https://www.lua.org/manual/5.1/manual.html#3"><span class="ext">»</span> Lua/C API functions</a>. </p> <p> @@ -92,7 +87,7 @@ or LuaJIT. </p> <p> LuaJIT extends the standard Lua VM with new functionality and adds -several extension modules. Please note this page is only about +several extension modules. Please note, this page is only about <em>functional</em> enhancements and not about performance enhancements, such as the optimized VM, the faster interpreter or the JIT compiler. </p> @@ -105,7 +100,7 @@ LuaJIT comes with several built-in extension modules: <h3 id="bit"><tt>bit.*</tt> — Bitwise operations</h3> <p> LuaJIT supports all bitwise operations as defined by -<a href="http://bitop.luajit.org"><span class="ext">»</span> Lua BitOp</a>: +<a href="https://bitop.luajit.org"><span class="ext">»</span> Lua BitOp</a>: </p> <pre class="code"> bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor @@ -114,7 +109,7 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap <p> This module is a LuaJIT built-in — you don't need to download or install Lua BitOp. The Lua BitOp site has full documentation for all -<a href="http://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. +<a href="https://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. The FFI adds support for <a href="ext_ffi_semantics.html#cdata_arith">64 bit bitwise operations</a>, using the same API functions. @@ -165,13 +160,33 @@ passes any arguments after the error function to the function which is called in a protected context. </p> -<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3> +<h3 id="load"><tt>load*()</tt> handle UTF-8 source code</h3> <p> Non-ASCII characters are handled transparently by the Lua source code parser. This allows the use of UTF-8 characters in identifiers and strings. A UTF-8 BOM is skipped at the start of the source code. </p> +<h3 id="load_mode"><tt>load*()</tt> add a mode parameter</h3> +<p> +As an extension from Lua 5.2, the functions <tt>loadstring()</tt>, +<tt>loadfile()</tt> and (new) <tt>load()</tt> add an optional +<tt>mode</tt> parameter. +</p> +<p> +The default mode string is <tt>"bt"</tt>, which allows loading of both +source code and bytecode. Use <tt>"t"</tt> to allow only source code +or <tt>"b"</tt> to allow only bytecode to be loaded. +</p> +<p> +By default, the <tt>load*</tt> functions generate the native bytecode format. +For cross-compilation purposes, add <tt>W</tt> to the mode string to +force the 32 bit format and <tt>X</tt> to force the 64 bit format. +Add both to force the opposite format. Note that non-native bytecode +generated by <tt>load*</tt> cannot be run, but can still be passed +to <tt>string.dump</tt>. +</p> + <h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and ±Inf</h3> <p> All number-to-string conversions consistently convert non-finite numbers @@ -191,27 +206,33 @@ works independently of the current locale and it supports hex floating-point numbers (e.g. <tt>0x1.5p-3</tt>). </p> -<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3> +<h3 id="string_dump"><tt>string.dump(f [,mode])</tt> generates portable bytecode</h3> <p> An extra argument has been added to <tt>string.dump()</tt>. If set to -<tt>true</tt>, 'stripped' bytecode without debug information is -generated. This speeds up later bytecode loading and reduces memory -usage. See also the +<tt>true</tt> or to a string which contains the character <tt>s</tt>, +'stripped' bytecode without debug information is generated. This speeds +up later bytecode loading and reduces memory usage. See also the <a href="running.html#opt_b"><tt>-b</tt> command line option</a>. </p> <p> The generated bytecode is portable and can be loaded on any architecture -that LuaJIT supports, independent of word size or endianess. However the -bytecode compatibility versions must match. Bytecode stays compatible -for dot releases (x.y.0 → x.y.1), but may change with major or -minor releases (2.0 → 2.1) or between any beta release. Foreign -bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. +that LuaJIT supports. However, the bytecode compatibility versions must +match. Bytecode only stays compatible within a major+minor version +(x.y.aaa → x.y.bbb), except for development branches. Foreign bytecode +(e.g. from Lua 5.1) is incompatible and cannot be loaded. </p> <p> Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies -a different, incompatible bytecode format for ports that use this mode (e.g. -ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified -in the future. +a different, incompatible bytecode format between 32 bit and 64 bit ports. +This may be rectified in the future. In the meantime, use the <tt>W</tt> +and </tt>X</tt> <a href="#load_mode">modes of the <tt>load*</tt> functions</a> +for cross-compilation purposes. +</p> +<p> +Due to VM hardening, bytecode is not deterministic. Add <tt>d</tt> to the +mode string to dump it in a deterministic manner: identical source code +always gives a byte-for-byte identical bytecode dump. This feature is +mainly useful for reproducible builds. </p> <h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> @@ -234,7 +255,7 @@ avoids managing backlinks, saves an allocation and the overhead of incremental array/hash part growth. </p> <p> -Please note this function is meant for very specific situations. In most +Please note, this function is meant for very specific situations. In most cases it's better to replace the (usually single) link with a new table and let the GC do its work. </p> @@ -244,7 +265,7 @@ and let the GC do its work. LuaJIT uses a Tausworthe PRNG with period 2^223 to implement <tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of the PRNG results is much superior compared to the standard Lua -implementation which uses the platform-specific ANSI rand(). +implementation, which uses the platform-specific ANSI <tt>rand()</tt>. </p> <p> The PRNG generates the same sequences from the same seeds on all @@ -254,11 +275,19 @@ for every call. The result is uniformly distributed between 0.0 and 1.0. It's correctly scaled up and rounded for <tt>math.random(n [,m])</tt> to preserve uniformity. </p> +<p> +Call <tt>math.randomseed()</tt> without any arguments to seed it from +system entropy. +</p> +<p> +Important: Neither this nor any other PRNG based on the simplistic +<tt>math.random()</tt> API is suitable for cryptographic use. +</p> <h3 id="io"><tt>io.*</tt> functions handle 64 bit file offsets</h3> <p> The file I/O functions in the standard <tt>io.*</tt> library handle -64 bit file offsets. In particular this means it's possible +64 bit file offsets. In particular, this means it's possible to open files larger than 2 Gigabytes and to reposition or obtain the current file position for offsets beyond 2 GB (<tt>fp:seek()</tt> method). @@ -288,7 +317,7 @@ enabled: </p> <ul> <li><tt>goto</tt> and <tt>::labels::</tt>.</li> -<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li> +<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\z'</tt> escape in strings.</li> <li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li> <li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li> <li><tt>loadfile(filename [,mode [,env]])</tt>.</li> @@ -374,6 +403,7 @@ LuaJIT supports some extensions from Lua 5.3: <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> <li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> <li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li> +<li><tt>assert()</tt> accepts any type of error object.</li> <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li> <li><tt>coroutine.isyieldable()</tt>.</li> <li>Lua/C API extensions: @@ -394,29 +424,19 @@ the toolchain used to compile LuaJIT: <td class="excinterop">Interoperability</td> </tr> <tr class="odd separate"> -<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> -<td class="exccompiler">GCC 4.3+, Clang</td> +<td class="excplatform">External frame unwinding</td> +<td class="exccompiler">GCC, Clang, MSVC</td> <td class="excinterop"><b style="color: #00a000;">Full</b></td> </tr> <tr class="even"> -<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td> -<td class="exccompiler">GCC, Clang</td> -<td class="excinterop"><b style="color: #00a000;">Full</b></td> -</tr> -<tr class="odd"> -<td class="excplatform">Other platforms, DWARF2 unwinding</td> +<td class="excplatform">Internal frame unwinding + DWARF2</td> <td class="exccompiler">GCC, Clang</td> <td class="excinterop"><b style="color: #c06000;">Limited</b></td> </tr> -<tr class="even"> -<td class="excplatform">Windows/x64</td> -<td class="exccompiler">MSVC or WinSDK</td> -<td class="excinterop"><b style="color: #00a000;">Full</b></td> -</tr> <tr class="odd"> -<td class="excplatform">Windows/x86</td> -<td class="exccompiler">Any</td> -<td class="excinterop"><b style="color: #00a000;">Full</b></td> +<td class="excplatform">Windows 64 bit</td> +<td class="exccompiler">non-MSVC</td> +<td class="excinterop"><b style="color: #c06000;">Limited</b></td> </tr> <tr class="even"> <td class="excplatform">Other platforms</td> @@ -472,7 +492,7 @@ C++ destructors.</li> </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/faq.html b/source/libs/luajit/LuaJIT-src/doc/faq.html deleted file mode 100644 index 2c930743ddf9f506743980766b9c9226c6ec5147..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/doc/faq.html +++ /dev/null @@ -1,186 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Frequently Asked Questions (FAQ)</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -<meta name="Language" content="en"> -<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> -<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -<style type="text/css"> -dd { margin-left: 1.5em; } -</style> -</head> -<body> -<div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> -</div> -<div id="head"> -<h1>Frequently Asked Questions (FAQ)</h1> -</div> -<div id="nav"> -<ul><li> -<a href="luajit.html">LuaJIT</a> -<ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> -</li><li> -<a href="install.html">Installation</a> -</li><li> -<a href="running.html">Running</a> -</li></ul> -</li><li> -<a href="extensions.html">Extensions</a> -<ul><li> -<a href="ext_ffi.html">FFI Library</a> -<ul><li> -<a href="ext_ffi_tutorial.html">FFI Tutorial</a> -</li><li> -<a href="ext_ffi_api.html">ffi.* API</a> -</li><li> -<a href="ext_ffi_semantics.html">FFI Semantics</a> -</li></ul> -</li><li> -<a href="ext_jit.html">jit.* Library</a> -</li><li> -<a href="ext_c_api.html">Lua/C API</a> -</li><li> -<a href="ext_profiler.html">Profiler</a> -</li></ul> -</li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a class="current" href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> -</li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> -</li></ul> -</div> -<div id="main"> -<dl> -<dt>Q: Where can I learn more about LuaJIT and Lua?</dt> -<dd> -<ul style="padding: 0;"> -<li>The <a href="http://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a> focuses on topics -related to LuaJIT.</li> -<li>The <a href="http://wiki.luajit.org/"><span class="ext">»</span> LuaJIT wiki</a> gathers community -resources about LuaJIT.</li> -<li>News about Lua itself can be found at the -<a href="http://www.lua.org/lua-l.html"><span class="ext">»</span> Lua mailing list</a>. -The mailing list archives are worth checking out for older postings -about LuaJIT.</li> -<li>The <a href="http://lua.org"><span class="ext">»</span> main Lua.org site</a> has complete -<a href="http://www.lua.org/docs.html"><span class="ext">»</span> documentation</a> of the language -and links to books and papers about Lua.</li> -<li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">»</span> Lua Wiki</a> -has information about diverse topics.</li> -</ul> -</dl> - -<dl> -<dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt> -<dd> -I'm planning to write more documentation about the internals of LuaJIT. -In the meantime, please use the following Google Scholar searches -to find relevant papers:<br> -Search for: <a href="http://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">»</span> Trace Compiler</a><br> -Search for: <a href="http://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">»</span> JIT Compiler</a><br> -Search for: <a href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">»</span> Dynamic Language Optimizations</a><br> -Search for: <a href="http://scholar.google.com/scholar?q=SSA+Form"><span class="ext">»</span> SSA Form</a><br> -Search for: <a href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">»</span> Linear Scan Register Allocation</a><br> -Here is a list of the <a href="http://article.gmane.org/gmane.comp.lang.lua.general/58908"><span class="ext">»</span> innovative features in LuaJIT</a>.<br> -And, you know, reading the source is of course the only way to enlightenment. :-) -</dd> -</dl> - -<dl> -<dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br> -Q: My vararg functions fail after switching to LuaJIT!</dt> -<dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't -support the implicit <tt>arg</tt> parameter for old-style vararg -functions from Lua 5.0.<br>Please convert your code to the -<a href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">»</span> Lua 5.1 -vararg syntax</a>.</dd> -</dl> - -<dl> -<dt>Q: Why do I get this error: "bad FPU precision"?<br> -<dt>Q: I get weird behavior after initializing Direct3D.<br> -<dt>Q: Some FPU operations crash after I load a Delphi DLL.<br> -</dt> -<dd> - -DirectX/Direct3D (up to version 9) sets the x87 FPU to single-precision -mode by default. This violates the Windows ABI and interferes with the -operation of many programs — LuaJIT is affected, too. Please make -sure you always use the <tt>D3DCREATE_FPU_PRESERVE</tt> flag when -initializing Direct3D.<br> - -Direct3D version 10 or higher do not show this behavior anymore. -Consider testing your application with older versions, too.<br> - -Similarly, the Borland/Delphi runtime modifies the FPU control word and -enables FP exceptions. Of course this violates the Windows ABI, too. -Please check the Delphi docs for the Set8087CW method. - -</dl> - -<dl> -<dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt> -<dd>The interrupt signal handler sets a Lua debug hook. But this is -currently ignored by compiled code (this will eventually be fixed). If -your program is running in a tight loop and never falls back to the -interpreter, the debug hook never runs and can't throw the -"interrupted!" error.<br> In the meantime you have to press Ctrl-C -twice to get stop your program. That's similar to when it's stuck -running inside a C function under the Lua interpreter.</dd> -</dl> - -<dl> -<dt>Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?</dt> -<dd>Because it's a completely redesigned VM and has very little code -in common with Lua anymore. Also, if the patch introduces changes to -the Lua semantics, these would need to be reflected everywhere in the -VM, from the interpreter up to all stages of the compiler.<br> Please -use only standard Lua language constructs. For many common needs you -can use source transformations or use wrapper or proxy functions. -The compiler will happily optimize away such indirections.</dd> -</dl> - -<dl> -<dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt> -<dd>Because it's a compiler — it needs to generate native -machine code. This means the code generator must be ported to each -architecture. And the fast interpreter is written in assembler and -must be ported, too. This is quite an undertaking.<br> -The <a href="install.html">install documentation</a> shows the supported -architectures. Other architectures will follow based on sufficient user -demand and/or sponsoring.</dd> -</dl> - -<dl> -<dt>Q: When will feature X be added? When will the next version be released?</dt> -<dd>When it's ready.<br> -C'mon, it's open source — I'm doing it on my own time and you're -getting it for free. You can either contribute a patch or sponsor -the development of certain features, if they are important to you. -</dd> -</dl> -<br class="flush"> -</div> -<div id="foot"> -<hr class="hide"> -Copyright © 2005-2017 Mike Pall -<span class="noprint"> -· -<a href="contact.html">Contact</a> -</span> -</div> -</body> -</html> diff --git a/source/libs/luajit/LuaJIT-src/doc/install.html b/source/libs/luajit/LuaJIT-src/doc/install.html index c491c601432e52510cd3a6ead244bfd87a95fb69..d1dbc10acc861fc43c45fd3f14bb75904dcc64f8 100644 --- a/source/libs/luajit/LuaJIT-src/doc/install.html +++ b/source/libs/luajit/LuaJIT-src/doc/install.html @@ -1,10 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Installation</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -15,31 +14,26 @@ table.compat { } table.compat td { border: 1px solid #bfcfff; - height: 2.5em; + height: 1.5em; } table.compat tr.compathead td { font-weight: bold; border-bottom: 2px solid #bfcfff; } -tr.compathead td.compatos { - vertical-align: top; +td.compatname { + width: 10%; } -table.compat td.compatcpu { - width: 18%; - border-right: 2px solid #bfcfff; +td.compatbits { + width: 5%; } -td.compatos { +td.compatx { width: 21%; - vertical-align: middle; -} -td.compatno { - background-color: #d0d0d0; } </style> </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>Installation</h1> @@ -48,7 +42,7 @@ td.compatno { <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a class="current" href="install.html">Installation</a> </li><li> @@ -66,6 +60,8 @@ td.compatno { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -73,25 +69,18 @@ td.compatno { <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="faq.html">FAQ</a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> -</li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> <p> -LuaJIT is only distributed as a source package. This page explains -how to build and install LuaJIT with different operating systems -and C compilers. +LuaJIT is only distributed as source code — get it from the +<a href="https://luajit.org/download.html"><span class="ext">»</span> git repository</a>. This page explains how to build +and install the LuaJIT binary and library for different operating systems. </p> <p> For the impatient (on POSIX systems): @@ -99,62 +88,24 @@ For the impatient (on POSIX systems): <pre class="code"> make && sudo make install </pre> + +<h2 id="req">Requirements</h2> <p> -LuaJIT currently builds out-of-the box on most systems. -Here's the compatibility matrix for the supported combinations of -operating systems, CPUs and compilers: +LuaJIT currently builds out-of-the box on most systems. Please check the +supported operating systems and CPU architectures on the +<a href="https://luajit.org/status.html"><span class="ext">»</span> status page</a>. +</p> +<p> +Building LuaJIT requires a recent toolchain based on GCC, Clang/LLVM or +MSVC++. +</p> +<p> +The Makefile-based build system requires GNU Make and supports +cross-builds. +</p> +<p> +Batch files are provided for MSVC++ builds and console cross-builds. </p> -<table class="compat"> -<tr class="compathead"> -<td class="compatcpu">CPU / OS</td> -<td class="compatos"><a href="#posix">Linux</a> or<br><a href="#android">Android</a></td> -<td class="compatos"><a href="#posix">*BSD, Other</a></td> -<td class="compatos"><a href="#posix">OSX 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td> -<td class="compatos"><a href="#windows">Windows<br>XP/Vista/7</a></td> -</tr> -<tr class="odd separate"> -<td class="compatcpu">x86 (32 bit)</td> -<td class="compatos">GCC 4.2+</td> -<td class="compatos">GCC 4.2+</td> -<td class="compatos">XCode 5.0+<br>Clang</td> -<td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td> -</tr> -<tr class="even"> -<td class="compatcpu">x64 (64 bit)</td> -<td class="compatos">GCC 4.2+</td> -<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td> -<td class="compatos">XCode 5.0+<br>Clang</td> -<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td> -</tr> -<tr class="odd"> -<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> -<td class="compatos">GCC 4.2+</td> -<td class="compatos">GCC 4.2+<br>PSP2 (<a href="#psvita">PS VITA</a>)</td> -<td class="compatos">XCode 5.0+<br>Clang</td> -<td class="compatos compatno"> </td> -</tr> -<tr class="even"> -<td class="compatcpu"><a href="#cross2">ARM64</a></td> -<td class="compatos">GCC 4.8+</td> -<td class="compatos compatno"> </td> -<td class="compatos">XCode 6.0+<br>Clang 3.5+</td> -<td class="compatos compatno"> </td> -</tr> -<tr class="odd"> -<td class="compatcpu"><a href="#cross2">PPC</a></td> -<td class="compatos">GCC 4.3+</td> -<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td> -<td class="compatos compatno"> </td> -<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td> -</tr> -<tr class="even"> -<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td> -<td class="compatos">GCC 4.3+</td> -<td class="compatos">GCC 4.3+</td> -<td class="compatos compatno"> </td> -<td class="compatos compatno"> </td> -</tr> -</table> <h2>Configuring LuaJIT</h2> <p> @@ -163,43 +114,34 @@ Usually there is no need to tweak the settings. The following files hold all user-configurable settings: </p> <ul> -<li><tt>src/luaconf.h</tt> sets some configuration variables.</li> <li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX only).</li> <li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT under POSIX, MinGW or Cygwin.</li> <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with -MSVC or WinSDK.</li> +MSVC (Visual Studio).</li> </ul> <p> Please read the instructions given in these files, before changing any settings. </p> <p> -LuaJIT on x64 currently uses 32 bit GC objects by default. -<tt>LJ_GC64</tt> mode may be explicitly enabled: -add <tt>XCFLAGS=-DLUAJIT_ENABLE_GC64</tt> to the make command or run -<tt>msvcbuild gc64</tt> for MSVC/WinSDK. Please check the note -about the <a href="extensions.html#string_dump">bytecode format</a> -differences, too. +All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>). +For x64, you can select the old 32-on-64 bit mode by adding +<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command. +Please check the note about the +<a href="extensions.html#string_dump">bytecode format</a> differences, too. </p> -<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> +<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2> <h3>Prerequisites</h3> <p> -Depending on your distribution, you may need to install a package for -GCC, the development headers and/or a complete SDK. E.g. on a current -Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager. +Depending on your distribution, you may need to install a package for a +compiler (GCC or Clang/LLVM), the development headers and/or a complete SDK. +E.g. on a current Debian/Ubuntu, install <tt>build-essential</tt> with the +package manager. </p> -<p> -Download the current source package of LuaJIT (pick the .tar.gz), -if you haven't already done so. Move it to a directory of your choice, -open a terminal window and change to this directory. Now unpack the archive -and change to the newly created directory: -</p> -<pre class="code"> -tar zxf LuaJIT-2.0.5.tar.gz -cd LuaJIT-2.0.5</pre> +</pre> <h3>Building LuaJIT</h3> <p> The supplied Makefiles try to auto-detect the settings needed for your @@ -223,9 +165,12 @@ You can add an extra prefix to the search paths by appending the make PREFIX=/home/myself/lj2 </pre> <p> -Note for OSX: if the <tt>MACOSX_DEPLOYMENT_TARGET</tt> environment -variable is not set, then it's forced to <tt>10.4</tt>. +Note for macOS: you <b>must</b> set the <tt>MACOSX_DEPLOYMENT_TARGET</tt> +environment variable to a value supported by your toolchain: </p> +<pre class="code"> +MACOSX_DEPLOYMENT_TARGET=XX.YY make +</pre> <h3>Installing LuaJIT</h3> <p> The top-level Makefile installs LuaJIT by default under @@ -252,65 +197,31 @@ Obviously the prefixes given during build and installation need to be the same. <p> Either install one of the open source SDKs (<a href="http://mingw.org/"><span class="ext">»</span> MinGW</a> or -<a href="http://www.cygwin.com/"><span class="ext">»</span> Cygwin</a>), which come with a modified +<a href="https://www.cygwin.com/"><span class="ext">»</span> Cygwin</a>), which come with a modified GCC plus the required development headers. -</p> -<p> -Or install Microsoft's Visual C++ (MSVC). The freely downloadable -<a href="http://www.microsoft.com/Express/VC/"><span class="ext">»</span> Express Edition</a> -works just fine, but only contains an x86 compiler. -</p> -<p> -The freely downloadable -<a href="http://msdn.microsoft.com/en-us/windowsserver/bb980924.aspx"><span class="ext">»</span> Windows SDK</a> -only comes with command line tools, but this is all you need to build LuaJIT. -It contains x86 and x64 compilers. -</p> -<p> -Next, download the source package and unpack it using an archive manager -(e.g. the Windows Explorer) to a directory of your choice. +Or install Microsoft's Visual Studio (MSVC). </p> <h3>Building with MSVC</h3> <p> -Open a "Visual Studio .NET Command Prompt", <tt>cd</tt> to the -directory where you've unpacked the sources and run these commands: +Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the +directory with the source code and run these commands: </p> <pre class="code"> cd src msvcbuild </pre> <p> +Check the <tt>msvcbuild.bat</tt> file for more options. Then follow the installation instructions below. </p> -<h3>Building with the Windows SDK</h3> -<p> -Open a "Windows SDK Command Shell" and select the x86 compiler: -</p> -<pre class="code"> -setenv /release /x86 -</pre> -<p> -Or select the x64 compiler: -</p> -<pre class="code"> -setenv /release /x64 -</pre> -<p> -Then <tt>cd</tt> to the directory where you've unpacked the sources -and run these commands: -</p> -<pre class="code"> -cd src -msvcbuild -</pre> <p> -Then follow the installation instructions below. +For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt> </p> <h3>Building with MinGW or Cygwin</h3> <p> Open a command prompt window and make sure the MinGW or Cygwin programs -are in your path. Then <tt>cd</tt> to the directory where -you've unpacked the sources and run this command for MinGW: +are in your path. Then <tt>cd</tt> to the directory of the git repository. +Then run this command for MinGW: </p> <pre class="code"> mingw32-make @@ -358,6 +269,7 @@ for any supported target: <li>Yes, you need a toolchain for both your host <em>and</em> your target!</li> <li>Both host and target architectures must have the same pointer size.</li> <li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li> +<li>On some distro versions, multilib conflicts with cross-compilers. The workaround is to install the x86 cross-compiler package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part (<tt>HOST_CC=i686-linux-gnu-gcc</tt>).</li> <li>64 bit targets always require compilation on a 64 bit host.</li> </ul> <p> @@ -365,7 +277,7 @@ You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the target OS differ, or you'll get assembler or linker errors: </p> <ul> -<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> +<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> <li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li> <li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li> </ul> @@ -412,15 +324,15 @@ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- # ARM64 -make CROSS=aarch64-linux- +make CROSS=aarch64-linux-gnu- # PPC make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- # MIPS32 big-endian -make HOST_CC="gcc -m32" CROSS=mips-linux- +make HOST_CC="gcc -m32" CROSS=mips-linux-gnu- # MIPS32 little-endian -make HOST_CC="gcc -m32" CROSS=mipsel-linux- +make HOST_CC="gcc -m32" CROSS=mipsel-linux-gnu- # MIPS64 big-endian make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" @@ -428,52 +340,33 @@ make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" </pre> <p> -You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>. -The environment variables need to match the install locations and the -desired target platform. E.g. Android 4.0 corresponds to ABI level 14. -For details check the folder <tt>docs</tt> in the NDK directory. -</p> -<p> -Only a few common variations for the different CPUs, ABIs and platforms -are listed. Please use your own judgement for which combination you want -to build/deploy or which lowest common denominator you want to pick: +You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">»</span> Android NDK</a>. +Please adapt the environment variables to match the install locations and the +desired target platform. E.g. Android 4.1 corresponds to ABI level 16. </p> <pre class="code"> -# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo) -NDK=/opt/android/ndk -NDKABI=8 -NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 -NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- -NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" -make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" - -# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS) -NDK=/opt/android/ndk -NDKABI=14 -NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 -NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- -NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" -NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8" -make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH" - -# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS) -NDK=/opt/android/ndk -NDKABI=14 -NDKVER=$NDK/toolchains/mipsel-linux-android-4.9 -NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android- -NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips" -make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" +# Android/ARM64, aarch64, Android 5.0+ (L) +NDKDIR=/opt/android/ndk +NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin +NDKCROSS=$NDKBIN/aarch64-linux-android- +NDKCC=$NDKBIN/aarch64-linux-android21-clang +make CROSS=$NDKCROSS \ + STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \ + TARGET_LD=$NDKCC TARGET_AR="$NDKBIN/llvm-ar rcus" \ + TARGET_STRIP=$NDKBIN/llvm-strip -# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS) -NDK=/opt/android/ndk -NDKABI=14 -NDKVER=$NDK/toolchains/x86-4.9 -NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android- -NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86" -make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" +# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) +NDKDIR=/opt/android/ndk +NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin +NDKCROSS=$NDKBIN/arm-linux-androideabi- +NDKCC=$NDKBIN/armv7a-linux-androideabi16-clang +make HOST_CC="gcc -m32" CROSS=$NDKCROSS \ + STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \ + TARGET_LD=$NDKCC TARGET_AR="$NDKBIN/llvm-ar rcus" \ + TARGET_STRIP=$NDKBIN/llvm-strip </pre> <p> -You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">»</span> iOS SDK</a>: +You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">»</span> iOS SDK</a>: </p> <p style="font-size: 8pt;"> Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps @@ -483,13 +376,6 @@ much slower than the JIT compiler. Please complain to Apple, not me. Or use Android. :-p </p> <pre class="code"> -# iOS/ARM (32 bit) -ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) -ICC=$(xcrun --sdk iphoneos --find clang) -ISDKF="-arch armv7 -isysroot $ISDKP" -make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \ - CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS - # iOS/ARM64 ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) ICC=$(xcrun --sdk iphoneos --find clang) @@ -501,8 +387,7 @@ make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \ <h3 id="consoles">Cross-compiling for consoles</h3> <p> Building LuaJIT for consoles requires both a supported host compiler -(x86 or x64) and a cross-compiler (to PPC or ARM) from the official -console SDK. +(x86 or x64) and a cross-compiler from the official console SDK. </p> <p> Due to restrictions on consoles, the JIT compiler is disabled and only @@ -523,45 +408,58 @@ To cross-compile for <b id="ps3">PS3</b> from a Linux host (requires make HOST_CC="gcc -m32" CROSS=ppu-lv2- </pre> <p> -To cross-compile for <b id="ps4">PS4</b> from a Windows host, -open a "Visual Studio .NET Command Prompt" (64 bit host compiler), -<tt>cd</tt> to the directory where you've unpacked the sources and -run the following commands: +To cross-compile for the other consoles from a Windows host, open a +"Native Tools Command Prompt for VS". You need to choose either the 32 +or the 64 bit version of the host compiler to match the target. +Then <tt>cd</tt> to the <tt>src</tt> directory below the source code +and run the build command given in the table: </p> -<pre class="code"> -cd src -ps4build -</pre> -<p> -To cross-compile for <b id="psvita">PS Vita</b> from a Windows host, -open a "Visual Studio .NET Command Prompt" (32 bit host compiler), -<tt>cd</tt> to the directory where you've unpacked the sources and -run the following commands: -</p> -<pre class="code"> -cd src -psvitabuild -</pre> -<p> -To cross-compile for <b id="xbox360">Xbox 360</b> from a Windows host, -open a "Visual Studio .NET Command Prompt" (32 bit host compiler), -<tt>cd</tt> to the directory where you've unpacked the sources and run -the following commands: -</p> -<pre class="code"> -cd src -xedkbuild -</pre> +<table class="compat"> +<tr class="compathead"> +<td class="compatname">Console</td> +<td class="compatbits">Bits</td> +<td class="compatx">Build Command</td> +</tr> +<tr class="odd separate"> +<td class="compatname"><b id="ps4">PS4</b></td> +<td class="compatbits">64</td> +<td class="compatx"><tt>ps4build</tt></td> +</tr> +<tr class="even"> +<td class="compatname"><b id="ps5">PS5</b></td> +<td class="compatbits">64</td> +<td class="compatx"><tt>ps5build</tt></td> +</tr> +<tr class="odd"> +<td class="compatname"><b id="psvita">PS Vita</b></td> +<td class="compatbits">32</td> +<td class="compatx"><tt>psvitabuild</tt></td> +</tr> +<tr class="even"> +<td class="compatname"><b id="xbox360">Xbox 360</b></td> +<td class="compatbits">32</td> +<td class="compatx"><tt>xedkbuild</tt></td> +</tr> +<tr class="odd"> +<td class="compatname"><b id="xboxone">Xbox One</b></td> +<td class="compatbits">64</td> +<td class="compatx"><tt>xb1build</tt></td> +</tr> +<tr class="even"> +<td class="compatname"><b id="nx32">Nintendo Switch NX32</b></td> +<td class="compatbits">32</td> +<td class="compatx"><tt>nxbuild</tt></td> +</tr> +<tr class="odd"> +<td class="compatname"><b id="nx64">Nintendo Switch NX64</b></td> +<td class="compatbits">64</td> +<td class="compatx"><tt>nxbuild</tt></td> +</tr> +</table> <p> -To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host, -open a "Visual Studio .NET Command Prompt" (64 bit host compiler), -<tt>cd</tt> to the directory where you've unpacked the sources and run -the following commands: +Please check out the comments in the corresponding <tt>*.bat</tt> +file for more options. </p> -<pre class="code"> -cd src -xb1build -</pre> <h2 id="embed">Embedding LuaJIT</h2> <p> @@ -590,14 +488,6 @@ the DLL). You may link LuaJIT statically on Windows only if you don't intend to load Lua/C modules at runtime. </li></ul> </li> -<li> -If you're building a 64 bit application on OSX which links directly or -indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode, -you need to link your main executable with these flags: -<pre class="code"> --pagezero_size 10000 -image_base 100000000 -</pre> -</li> </ul> <p>Additional hints for initializing LuaJIT using the C API functions:</p> <ul> @@ -606,16 +496,16 @@ you need to link your main executable with these flags: for embedding Lua or LuaJIT into your application.</li> <li>Make sure you use <tt>luaL_newstate</tt>. Avoid using <tt>lua_newstate</tt>, since this uses the (slower) default memory -allocator from your system (no support for this on x64).</li> +allocator from your system (no support for this on 64 bit architectures).</li> <li>Make sure you use <tt>luaL_openlibs</tt> and not the old Lua 5.0 style of calling <tt>luaopen_base</tt> etc. directly.</li> <li>To change or extend the list of standard libraries to load, copy <tt>src/lib_init.c</tt> to your project and modify it accordingly. -Make sure the <tt>jit</tt> library is loaded or the JIT compiler +Make sure the <tt>jit</tt> library is loaded, or the JIT compiler will not be activated.</li> <li>The <tt>bit.*</tt> module for bitwise operations is already built-in. There's no need to statically link -<a href="http://bitop.luajit.org/"><span class="ext">»</span> Lua BitOp</a> to your application.</li> +<a href="https://bitop.luajit.org/"><span class="ext">»</span> Lua BitOp</a> to your application.</li> </ul> <h2 id="distro">Hints for Distribution Maintainers</h2> @@ -630,7 +520,7 @@ in unspeakable ways. There should be absolutely no need to patch <tt>luaconf.h</tt> or any of the Makefiles. And please do not hand-pick files for your packages — simply use whatever <tt>make install</tt> creates. There's a reason -for all of the files <em>and</em> directories it creates. +for all the files <em>and</em> directories it creates. </p> <p> The build system uses GNU make and auto-detects most settings based on @@ -682,7 +572,7 @@ to me (the upstream) and not you (the package maintainer), anyway. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/luajit.html b/source/libs/luajit/LuaJIT-src/doc/luajit.html index ef5b824c1b318124f50ac4e8343b6cf49d3a5c96..73a1c17f06e6b06a4c1333489cac6ebe0e432e0b 100644 --- a/source/libs/luajit/LuaJIT-src/doc/luajit.html +++ b/source/libs/luajit/LuaJIT-src/doc/luajit.html @@ -1,10 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>LuaJIT</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -68,35 +67,11 @@ table.fcompat td { background-image: -o-linear-gradient(#41bfbf 10%, #b0ffff 95%); background-image: -ms-linear-gradient(#41bfbf 10%, #b0ffff 95%); } -table.stats td { - color: #ffffff; - background: #a0a0a0; - background-image: linear-gradient(#808080 10%, #d0d0d0 95%); - background-image: -moz-linear-gradient(#808080 10%, #d0d0d0 95%); - background-image: -webkit-linear-gradient(#808080 10%, #d0d0d0 95%); - background-image: -o-linear-gradient(#808080 10%, #d0d0d0 95%); - background-image: -ms-linear-gradient(#808080 10%, #d0d0d0 95%); -} -table.stats td.speed { - color: #ff4020; -} -table.stats td.kb { - color: #ffff80; - background: #808080; - background-image: linear-gradient(#606060 10%, #c0c0c0 95%); - background-image: -moz-linear-gradient(#606060 10%, #c0c0c0 95%); - background-image: -webkit-linear-gradient(#606060 10%, #c0c0c0 95%); - background-image: -o-linear-gradient(#606060 10%, #c0c0c0 95%); - background-image: -ms-linear-gradient(#606060 10%, #c0c0c0 95%); -} -table.feature small { - font-size: 50%; -} </style> </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>LuaJIT</h1> @@ -105,7 +80,7 @@ table.feature small { <ul><li> <a class="current" href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -123,6 +98,8 @@ table.feature small { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -130,43 +107,36 @@ table.feature small { <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="faq.html">FAQ</a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> -</li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> <p> LuaJIT is a <b>Just-In-Time Compiler</b> (JIT) for the -<a href="http://www.lua.org/"><span class="ext">»</span> Lua</a> programming language. +<a href="https://www.lua.org/"><span class="ext">»</span> Lua</a> programming language. Lua is a powerful, dynamic and light-weight programming language. It may be embedded or used as a general-purpose, stand-alone language. </p> <p> -LuaJIT is Copyright © 2005-2017 Mike Pall, released under the -<a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT open source license</a>. +LuaJIT is Copyright © 2005-2025 Mike Pall, released under the +<a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT open source license</a>. </p> <p> </p> <h2>Compatibility</h2> <table class="feature os os1"> -<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr> +<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>macOS</td><td>POSIX</td></tr> </table> <table class="feature os os2"> <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> </table> <table class="feature os os3"> -<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr> +<tr><td>PS3</td><td>PS4<br>PS5</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td><td>Nintendo<br>Switch</td></tr> </table> <table class="feature compiler"> <tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr> @@ -179,23 +149,20 @@ LuaJIT is Copyright © 2005-2017 Mike Pall, released under the </table> <h2>Overview</h2> -<table class="feature stats"> -<tr> -<td class="speed">3x<br>- 100x</td> -<td class="kb">115 <small>KB</small><br>VM</td> -<td class="kb">90 <small>KB</small><br>JIT</td> -<td class="kloc">63 <small>KLOC</small><br>C</td> -<td class="kloc">24 <small>KLOC</small><br>ASM</td> -<td class="kloc">11 <small>KLOC</small><br>Lua</td> -</tr> -</table> <p style="margin-top: 1em;"> LuaJIT has been successfully used as a <b>scripting middleware</b> in games, appliances, network and graphics apps, numerical simulations, -trading platforms and many other specialty applications. It scales from -embedded devices, smartphones, desktops up to server farms. It combines -high flexibility with <a href="http://luajit.org/performance.html"><span class="ext">»</span> high performance</a> -and an unmatched <b>low memory footprint</b>. +trading platforms and many other specialty applications. +</p> +<p> +LuaJIT is part of a hundred million web sites, huge SaaS installations, +network switches, set-top boxes and other embedded devices. You've probably +already used LuaJIT without knowing about it. +</p> +<p> +LuaJIT scales from embedded devices, smartphones, desktops up to server +farms. It combines high flexibility with high performance and an unmatched +<b>low memory footprint</b>. </p> <p> LuaJIT has been in continuous development since 2005. It's widely @@ -226,7 +193,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT. </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/running.html b/source/libs/luajit/LuaJIT-src/doc/running.html index 666b0abca8139c68f8f20103430323f03c1a2be6..f71eee42f6af50a1f5c60d5cdaa6cd3a6fc30ce3 100644 --- a/source/libs/luajit/LuaJIT-src/doc/running.html +++ b/source/libs/luajit/LuaJIT-src/doc/running.html @@ -1,10 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Running LuaJIT</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2025"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> @@ -33,7 +32,7 @@ td.param_default { </head> <body> <div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> </div> <div id="head"> <h1>Running LuaJIT</h1> @@ -42,7 +41,7 @@ td.param_default { <ul><li> <a href="luajit.html">LuaJIT</a> <ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> </li><li> <a href="install.html">Installation</a> </li><li> @@ -60,6 +59,8 @@ td.param_default { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> @@ -67,21 +68,15 @@ td.param_default { <a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> -<a href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +<a href="https://luajit.org/status.html">Status <span class="ext">»</span></a> </li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +<a href="https://luajit.org/faq.html">FAQ <span class="ext">»</span></a> </li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> </li></ul> </div> <div id="main"> + <p> LuaJIT has only a single stand-alone executable, called <tt>luajit</tt> on POSIX systems or <tt>luajit.exe</tt> on Windows. It can be used to run simple @@ -95,7 +90,7 @@ The <tt>luajit</tt> stand-alone executable is just a slightly modified version of the regular <tt>lua</tt> stand-alone executable. It supports the same basic options, too. <tt>luajit -h</tt> prints a short list of the available options. Please have a look at the -<a href="http://www.lua.org/manual/5.1/manual.html#6"><span class="ext">»</span> Lua manual</a> +<a href="https://www.lua.org/manual/5.1/manual.html#6"><span class="ext">»</span> Lua manual</a> for details. </p> <p> @@ -111,10 +106,14 @@ are accepted: <li><tt>-l</tt> — Only list bytecode.</li> <li><tt>-s</tt> — Strip debug info (this is the default).</li> <li><tt>-g</tt> — Keep debug info.</li> +<li><tt>-W</tt> — Generate 32 bit (non-GC64) bytecode.</li> +<li><tt>-X</tt> — Generate 64 bit (GC64) bytecode.</li> +<li><tt>-d</tt> — Generate bytecode in deterministic manner.</li> <li><tt>-n name</tt> — Set module name (default: auto-detect from input name)</li> <li><tt>-t type</tt> — Set output file type (default: auto-detect from output name).</li> <li><tt>-a arch</tt> — Override architecture for object files (default: native).</li> <li><tt>-o os</tt> — Override OS for object files (default: native).</li> +<li><tt>-F name</tt> — Override filename (default: input filename).</li> <li><tt>-e chunk</tt> — Use chunk string as input.</li> <li><tt>-</tt> (a single minus sign) — Use stdin as input and/or stdout as output.</li> </ul> @@ -124,7 +123,8 @@ file name: </p> <ul> <li><tt>c</tt> — C source file, exported bytecode data.</li> -<li><tt>h</tt> — C header file, static bytecode data.</li> +<li><tt>cc</tt> — C++ source file, exported bytecode data.</li> +<li><tt>h</tt> — C/C++ header file, static bytecode data.</li> <li><tt>obj</tt> or <tt>o</tt> — Object file, exported bytecode data (OS- and architecture-specific).</li> <li><tt>raw</tt> or any other extension — Raw bytecode file (portable). @@ -188,9 +188,9 @@ written in Lua. They are mainly used for debugging the JIT compiler itself. For a description of their options and output format, please read the comment block at the start of their source. They can be found in the <tt>lib</tt> directory of the source -distribution or installed under the <tt>jit</tt> directory. By default -this is <tt>/usr/local/share/luajit-2.0.5/jit</tt> on POSIX -systems. +distribution or installed under the <tt>jit</tt> directory. By default, +this is <tt>/usr/local/share/luajit-XX.YY.ZZ>/jit</tt> on POSIX +systems (replace XX.YY.ZZ by the installed version). </p> <h3 id="opt_O"><tt>-O[level]</tt><br> @@ -220,11 +220,17 @@ to a specific value. You can either use this option multiple times (like <tt>-Ocse -O-dce -Ohotloop=10</tt>) or separate several settings with a comma (like <tt>-O+cse,-dce,hotloop=10</tt>). The settings are applied from -left to right and later settings override earlier ones. You can freely +left to right, and later settings override earlier ones. You can freely mix the three forms, but note that setting an optimization level overrides all earlier flags. </p> <p> +Note that <tt>-Ofma</tt> is not enabled by default at any level, +because it affects floating-point result accuracy. Only enable this, +if you fully understand the trade-offs of FMA for performance (higher), +determinism (lower) and numerical accuracy (higher). +</p> +<p> Here are the available flags and at what optimization levels they are enabled: </p> @@ -256,6 +262,8 @@ are enabled: <td class="flag_name">sink</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Allocation/Store Sinking</td></tr> <tr class="even"> <td class="flag_name">fuse</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Fusion of operands into instructions</td></tr> +<tr class="odd"> +<td class="flag_name">fma </td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_desc">Fused multiply-add</td></tr> </table> <p> Here are the parameters and their default settings: @@ -299,7 +307,7 @@ Here are the parameters and their default settings: </div> <div id="foot"> <hr class="hide"> -Copyright © 2005-2017 Mike Pall +Copyright © 2005-2025 <span class="noprint"> · <a href="contact.html">Contact</a> diff --git a/source/libs/luajit/LuaJIT-src/doc/status.html b/source/libs/luajit/LuaJIT-src/doc/status.html deleted file mode 100644 index cad6ca654688ddbb43d3b7fdaa98a94f6e7e1898..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/doc/status.html +++ /dev/null @@ -1,123 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> -<title>Status</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> -<meta name="Author" content="Mike Pall"> -<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -<meta name="Language" content="en"> -<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> -<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -<style type="text/css"> -ul li { padding-bottom: 0.3em; } -</style> -</head> -<body> -<div id="site"> -<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> -</div> -<div id="head"> -<h1>Status</h1> -</div> -<div id="nav"> -<ul><li> -<a href="luajit.html">LuaJIT</a> -<ul><li> -<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> -</li><li> -<a href="install.html">Installation</a> -</li><li> -<a href="running.html">Running</a> -</li></ul> -</li><li> -<a href="extensions.html">Extensions</a> -<ul><li> -<a href="ext_ffi.html">FFI Library</a> -<ul><li> -<a href="ext_ffi_tutorial.html">FFI Tutorial</a> -</li><li> -<a href="ext_ffi_api.html">ffi.* API</a> -</li><li> -<a href="ext_ffi_semantics.html">FFI Semantics</a> -</li></ul> -</li><li> -<a href="ext_jit.html">jit.* Library</a> -</li><li> -<a href="ext_c_api.html">Lua/C API</a> -</li><li> -<a href="ext_profiler.html">Profiler</a> -</li></ul> -</li><li> -<a class="current" href="status.html">Status</a> -<ul><li> -<a href="changes.html">Changes</a> -</li></ul> -</li><li> -<a href="faq.html">FAQ</a> -</li><li> -<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> -</li><li> -<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> -</li><li> -<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> -</li></ul> -</div> -<div id="main"> -<p> -<span style="color: #0000c0;">LuaJIT 2.0</span> is the current -<span style="color: #0000c0;">stable branch</span>. This branch is in -feature-freeze — new features will only be added to LuaJIT 2.1. -</p> - -<h2>Current Status</h2> -<p> -LuaJIT ought to run all Lua 5.1-compatible source code just fine. -It's considered a serious bug if the VM crashes or produces unexpected -results — please report this. -</p> -<p> -Known incompatibilities and issues in LuaJIT 2.0: -</p> -<ul> -<li> -There are some differences in <b>implementation-defined</b> behavior. -These either have a good reason, are arbitrary design choices -or are due to quirks in the VM. The latter cases may get fixed if a -demonstrable need is shown. -</li> -<li> -The Lua <b>debug API</b> is missing a couple of features (return -hooks for non-Lua functions) and shows slightly different behavior -in LuaJIT (no per-coroutine hooks, no tail call counting). -</li> -<li> -Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not -handled correctly. The error may fall through an on-trace -<tt>pcall</tt> or it may be passed on to the function set with -<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new -garbage collector. -</li> -<li> -LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the -<b>legacy <tt>lightuserdata</tt></b> data type. -This is only relevant on x64 systems which use the negative part of the -virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems -configured with a 48 bit or 52 bit VA. -Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside -of that range, e.g. variables on the stack. In general, avoid this data -type for new code and replace it with (much more performant) FFI bindings. -FFI cdata pointers can address the full 64 bit range. -</li> -</ul> -<br class="flush"> -</div> -<div id="foot"> -<hr class="hide"> -Copyright © 2005-2017 Mike Pall -<span class="noprint"> -· -<a href="contact.html">Contact</a> -</span> -</div> -</body> -</html> diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.h b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.h index a43f7c6645c8f3203272240459889ea26a3a3d5e..824e8be035a25dd9f977f733e3c7e07b8e395297 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.h +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.h @@ -1,6 +1,6 @@ /* ** DynASM ARM encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -70,7 +70,7 @@ struct dasm_State { size_t lgsize; int *pclabels; /* PC label chains/pos ptrs. */ size_t pcsize; - void **globals; /* Array of globals (bias -10). */ + void **globals; /* Array of globals. */ dasm_Section *section; /* Pointer to active section. */ size_t codesize; /* Total size of all code sections. */ int maxsection; /* 0 <= sectionidx < maxsection. */ @@ -87,7 +87,6 @@ void dasm_init(Dst_DECL, int maxsection) { dasm_State *D; size_t psz = 0; - int i; Dst_REF = NULL; DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); D = Dst_REF; @@ -98,12 +97,7 @@ void dasm_init(Dst_DECL, int maxsection) D->pcsize = 0; D->globals = NULL; D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } + memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); } /* Free DynASM state. */ @@ -123,7 +117,7 @@ void dasm_free(Dst_DECL) void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) { dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ + D->globals = gl; DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); } @@ -148,6 +142,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } @@ -254,6 +249,7 @@ void dasm_put(Dst_DECL, int start, ...) case DASM_IMMV8: CK((n & 3) == 0, RANGE_I); n >>= 2; + /* fallthrough */ case DASM_IMML8: case DASM_IMML12: CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : @@ -293,7 +289,7 @@ int dasm_link(Dst_DECL, size_t *szp) { /* Handle globals not defined in this translation unit. */ int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { int n = D->lglabels[idx]; /* Undefined label: Collapse rel chain and replace with marker (< 0). */ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } @@ -370,7 +366,11 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; break; case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp - 4); + goto patchrel; + } + /* fallthrough */ case DASM_REL_PC: CK(n >= 0, UNDEF_PC); n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; @@ -391,7 +391,7 @@ int dasm_encode(Dst_DECL, void *buffer) } break; case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n); break; case DASM_LABEL_PC: break; case DASM_IMM: diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.lua b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.lua index 32f595afff235e28e5b4e6d3ca64f275aadc7bd1..12540d170f5023ec481f57ba3d9c8a6c54b785ca 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM ARM module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ @@ -9,9 +9,9 @@ local _info = { arch = "arm", description = "DynASM ARM module", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.h b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.h index 47e1e074939ae75c4f1c50635b1d1d29f89acf13..a922a4aeea7f7c8a57dfacd4774b50122c6a09f0 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.h +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.h @@ -1,6 +1,6 @@ /* ** DynASM ARM64 encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -21,8 +21,9 @@ enum { /* The following actions need a buffer position. */ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, + DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A, DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, + DASM_IMMV, DASM_VREG, DASM__MAX }; @@ -39,6 +40,7 @@ enum { #define DASM_S_RANGE_LG 0x13000000 #define DASM_S_RANGE_PC 0x14000000 #define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_RANGE_VREG 0x16000000 #define DASM_S_UNDEF_LG 0x21000000 #define DASM_S_UNDEF_PC 0x22000000 @@ -70,7 +72,7 @@ struct dasm_State { size_t lgsize; int *pclabels; /* PC label chains/pos ptrs. */ size_t pcsize; - void **globals; /* Array of globals (bias -10). */ + void **globals; /* Array of globals. */ dasm_Section *section; /* Pointer to active section. */ size_t codesize; /* Total size of all code sections. */ int maxsection; /* 0 <= sectionidx < maxsection. */ @@ -87,7 +89,6 @@ void dasm_init(Dst_DECL, int maxsection) { dasm_State *D; size_t psz = 0; - int i; Dst_REF = NULL; DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); D = Dst_REF; @@ -98,12 +99,7 @@ void dasm_init(Dst_DECL, int maxsection) D->pcsize = 0; D->globals = NULL; D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } + memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); } /* Free DynASM state. */ @@ -123,7 +119,7 @@ void dasm_free(Dst_DECL) void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) { dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ + D->globals = gl; DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); } @@ -148,6 +144,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } @@ -156,10 +153,10 @@ void dasm_setup(Dst_DECL, const void *actionlist) #ifdef DASM_CHECKS #define CK(x, st) \ do { if (!(x)) { \ - D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) #define CKPL(kind, st) \ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ - D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) + D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) #else #define CK(x, st) ((void)0) #define CKPL(kind, st) ((void)0) @@ -188,7 +185,9 @@ static int dasm_imm13(int lo, int hi) unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo; unsigned long long m = 1ULL, a, b, c; if (n & 1) { n = ~n; inv = 1; } - a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b); + a = n & (unsigned long long)-(long long)n; + b = (n+a)&(unsigned long long)-(long long)(n+a); + c = (n+a-b)&(unsigned long long)-(long long)(n+a-b); xa = dasm_ffs(a); xb = dasm_ffs(b); if (c) { w = dasm_ffs(c) - xa; @@ -247,7 +246,7 @@ void dasm_put(Dst_DECL, int start, ...) n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; goto stop; case DASM_ESC: p++; ofs += 4; break; - case DASM_REL_EXT: break; + case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break; case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; case DASM_REL_LG: n = (ins & 2047) - 10; pl = D->lglabels + n; @@ -268,6 +267,11 @@ void dasm_put(Dst_DECL, int start, ...) *pl = pos; } pos++; + if ((ins & 0x8000)) ofs += 8; + break; + case DASM_REL_A: + b[pos++] = n; + b[pos++] = va_arg(ap, int); break; case DASM_LABEL_LG: pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; @@ -312,13 +316,21 @@ void dasm_put(Dst_DECL, int start, ...) } case DASM_IMML: { #ifdef DASM_CHECKS - int scale = (p[-2] >> 30); + int scale = (ins & 3); CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) || (unsigned int)(n+256) < 512, RANGE_I); #endif b[pos++] = n; break; } + case DASM_IMMV: + ofs += 4; + b[pos++] = n; + break; + case DASM_VREG: + CK(n < 32, RANGE_VREG); + b[pos++] = n; + break; } } } @@ -348,7 +360,7 @@ int dasm_link(Dst_DECL, size_t *szp) { /* Handle globals not defined in this translation unit. */ int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { int n = D->lglabels[idx]; /* Undefined label: Collapse rel chain and replace with marker (< 0). */ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } @@ -375,8 +387,8 @@ int dasm_link(Dst_DECL, size_t *szp) case DASM_REL_LG: case DASM_REL_PC: pos++; break; case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: - case DASM_IMML: pos++; break; - case DASM_IMM13X: pos += 2; break; + case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break; + case DASM_IMM13X: case DASM_REL_A: pos += 2; break; } } stop: (void)0; @@ -391,7 +403,7 @@ int dasm_link(Dst_DECL, size_t *szp) #ifdef DASM_CHECKS #define CK(x, st) \ - do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) + do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) #else #define CK(x, st) ((void)0) #endif @@ -423,10 +435,16 @@ int dasm_encode(Dst_DECL, void *buffer) n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); goto patchrel; case DASM_ALIGN: - ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f; break; case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); + if (n < 0) { + ptrdiff_t na = (ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4; + n = (int)na; + CK((ptrdiff_t)n == na, RANGE_REL); + goto patchrel; + } + /* fallthrough */ case DASM_REL_PC: CK(n >= 0, UNDEF_PC); n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; @@ -445,10 +463,26 @@ int dasm_encode(Dst_DECL, void *buffer) } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL); cp[-1] |= ((n << 3) & 0x0007ffe0); + } else if ((ins & 0x8000)) { /* absolute */ + cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n); + cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32); + cp += 2; } break; + case DASM_REL_A: { + ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n); + if ((ins & 0x3000) == 0x3000) { /* ADRP */ + ins &= ~0x1000; + na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12); + } else { + na = na - (ptrdiff_t)cp + 4; + } + n = (int)na; + CK((ptrdiff_t)n == na, RANGE_REL); + goto patchrel; + } case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n); break; case DASM_LABEL_PC: break; case DASM_IMM: @@ -467,11 +501,17 @@ int dasm_encode(Dst_DECL, void *buffer) cp[-1] |= (dasm_imm13(n, *b++) << 10); break; case DASM_IMML: { - int scale = (p[-2] >> 30); + int scale = (ins & 3); cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ? ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); break; } + case DASM_IMMV: + *cp++ = n; + break; + case DASM_VREG: + cp[-1] |= (n & 0x1f) << (ins & 0x1f); + break; default: *cp++ = ins; break; } } @@ -511,7 +551,7 @@ int dasm_checkstep(Dst_DECL, int secmatch) } if (D->status == DASM_S_OK && secmatch >= 0 && D->section != &D->sections[secmatch]) - D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); return D->status; } #endif diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.lua b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.lua index 8a5f735d7c315e4c2bc577e69787dbc2ed1af589..8b27e9625c57a4745a7418ac434b7c593fd0cc9c 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_arm64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM ARM64 module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ @@ -9,9 +9,9 @@ local _info = { arch = "arm", description = "DynASM ARM64 module", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } @@ -23,12 +23,12 @@ local _M = { _info = _info } local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs local assert, setmetatable, rawget = assert, setmetatable, rawget local _s = string -local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local format, byte, char = _s.format, _s.byte, _s.char local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub local concat, sort, insert = table.concat, table.sort, table.insert local bit = bit or require("bit") local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift -local ror, tohex = bit.ror, bit.tohex +local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit -- Inherited tables and callbacks. local g_opt, g_arch @@ -39,7 +39,9 @@ local wline, werror, wfatal, wwarn local action_names = { "STOP", "SECTION", "ESC", "REL_EXT", "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", + "REL_PC", "LABEL_PC", "REL_A", + "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV", + "VREG", } -- Maximum number of section buffer positions for dasm_put(). @@ -246,9 +248,12 @@ local map_cond = { local parse_reg_type -local function parse_reg(expr) +local function parse_reg(expr, shift, no_vreg) if not expr then werror("expected register name") end local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") + if not tname then + tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$") + end local tp = map_type[tname or expr] if tp then local reg = ovreg or tp.reg @@ -266,18 +271,28 @@ local function parse_reg(expr) elseif parse_reg_type ~= rt then werror("register size mismatch") end - return r, tp + return shl(r, shift), tp end end + local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$") + if vreg then + if not parse_reg_type then + parse_reg_type = vrt + elseif parse_reg_type ~= vrt then + werror("register size mismatch") + end + if not no_vreg then waction("VREG", shift, vreg) end + return 0 + end werror("bad register name `"..expr.."'") end local function parse_reg_base(expr) if expr == "sp" then return 0x3e0 end - local base, tp = parse_reg(expr) + local base, tp = parse_reg(expr, 5) if parse_reg_type ~= "x" then werror("bad register type") end parse_reg_type = false - return shl(base, 5), tp + return base, tp end local parse_ctx = {} @@ -297,7 +312,7 @@ local function parse_number(n) local code = loadenv("return "..n) if code then local ok, y = pcall(code) - if ok then return y end + if ok and type(y) == "number" then return y end end return nil end @@ -403,7 +418,7 @@ local function parse_imm_load(imm, scale) end werror("out of range immediate `"..imm.."'") else - waction("IMML", 0, imm) + waction("IMML", scale, imm) return 0 end end @@ -462,6 +477,7 @@ end local function parse_load(params, nparams, n, op) if params[n+2] then werror("too many operands") end + local scale = shr(op, 30) local pn, p2 = params[n], params[n+1] local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") if not p1 then @@ -470,14 +486,13 @@ local function parse_load(params, nparams, n, op) if reg and tailr ~= "" then local base, tp = parse_reg_base(reg) if tp then - waction("IMML", 0, format(tp.ctypefmt, tailr)) + waction("IMML", scale, format(tp.ctypefmt, tailr)) return op + base end end end werror("expected address operand") end - local scale = shr(op, 30) if p2 then if wb == "!" then werror("bad use of '!'") end op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 @@ -494,7 +509,7 @@ local function parse_load(params, nparams, n, op) op = op + parse_imm_load(imm, scale) else local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") - op = op + shl(parse_reg(p2b), 16) + 0x00200800 + op = op + parse_reg(p2b, 16) + 0x00200800 if parse_reg_type ~= "x" and parse_reg_type ~= "w" then werror("bad index register type") end @@ -534,7 +549,7 @@ end local function parse_load_pair(params, nparams, n, op) if params[n+2] then werror("too many operands") end local pn, p2 = params[n], params[n+1] - local scale = shr(op, 30) == 0 and 2 or 3 + local scale = 2 + shr(op, 31 - band(shr(op, 26), 1)) local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") if not p1 then if not p2 then @@ -561,14 +576,14 @@ local function parse_load_pair(params, nparams, n, op) end local function parse_label(label, def) - local prefix = sub(label, 1, 2) + local prefix = label:sub(1, 2) -- =>label (pc label reference) if prefix == "=>" then - return "PC", 0, sub(label, 3) + return "PC", 0, label:sub(3) end -- ->name (global label reference) if prefix == "->" then - return "LG", map_global[sub(label, 3)] + return "LG", map_global[label:sub(3)] end if def then -- [1-9] (local label definition) @@ -586,8 +601,11 @@ local function parse_label(label, def) if extname then return "EXT", map_extern[extname] end + -- &expr (pointer) + if label:sub(1, 1) == "&" then + return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2)) + end end - werror("bad label `"..label.."'") end local function branch_type(op) @@ -620,24 +638,24 @@ local function alias_bfx(p) end local function alias_bfiz(p) - parse_reg(p[1]) + parse_reg(p[1], 0, true) if parse_reg_type == "w" then - p[3] = "#-("..p[3]:sub(2)..")%32" + p[3] = "#(32-("..p[3]:sub(2).."))%32" p[4] = "#("..p[4]:sub(2)..")-1" else - p[3] = "#-("..p[3]:sub(2)..")%64" + p[3] = "#(64-("..p[3]:sub(2).."))%64" p[4] = "#("..p[4]:sub(2)..")-1" end end local alias_lslimm = op_alias("ubfm_4", function(p) - parse_reg(p[1]) + parse_reg(p[1], 0, true) local sh = p[3]:sub(2) if parse_reg_type == "w" then - p[3] = "#-("..sh..")%32" + p[3] = "#(32-("..sh.."))%32" p[4] = "#31-("..sh..")" else - p[3] = "#-("..sh..")%64" + p[3] = "#(64-("..sh.."))%64" p[4] = "#63-("..sh..")" end end) @@ -788,8 +806,8 @@ map_op = { ["ldrsw_*"] = "98000000DxB|b8800000DxL", -- NOTE: ldur etc. are handled by ldr et al. - ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", - ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", + ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP", + ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP", ["ldpsw_*"] = "68400000DAxP", -- Branches. @@ -805,6 +823,13 @@ map_op = { tbz_3 = "36000000DTBw|36000000DTBx", tbnz_3 = "37000000DTBw|37000000DTBx", + -- ARM64e: Pointer authentication codes (PAC). + blraaz_1 = "d63f081fNx", + braa_2 = "d71f0800NDx", + braaz_1 = "d61f081fNx", + pacibsp_0 = "d503237f", + retab_0 = "d65f0fff", + -- Miscellaneous instructions. -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr -- TODO: sys, sysl, ic, dc, at, tlbi @@ -881,25 +906,25 @@ end -- Handle opcodes defined with template strings. local function parse_template(params, template, nparams, pos) - local op = tonumber(sub(template, 1, 8), 16) + local op = tonumber(template:sub(1, 8), 16) local n = 1 local rtt = {} parse_reg_type = false -- Process each character. - for p in gmatch(sub(template, 9), ".") do + for p in gmatch(template:sub(9), ".") do local q = params[n] if p == "D" then - op = op + parse_reg(q); n = n + 1 + op = op + parse_reg(q, 0); n = n + 1 elseif p == "N" then - op = op + shl(parse_reg(q), 5); n = n + 1 + op = op + parse_reg(q, 5); n = n + 1 elseif p == "M" then - op = op + shl(parse_reg(q), 16); n = n + 1 + op = op + parse_reg(q, 16); n = n + 1 elseif p == "A" then - op = op + shl(parse_reg(q), 10); n = n + 1 + op = op + parse_reg(q, 10); n = n + 1 elseif p == "m" then - op = op + shl(parse_reg(params[n-1]), 16) + op = op + parse_reg(params[n-1], 16) elseif p == "p" then if q == "sp" then params[n] = "@x31" end @@ -917,7 +942,7 @@ local function parse_template(params, template, nparams, pos) werror("bad register type") end parse_reg_type = false - elseif p == "x" or p == "w" or p == "d" or p == "s" then + elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then if parse_reg_type ~= p then werror("register size mismatch") end @@ -930,8 +955,14 @@ local function parse_template(params, template, nparams, pos) elseif p == "B" then local mode, v, s = parse_label(q, false); n = n + 1 + if not mode then werror("bad label `"..q.."'") end local m = branch_type(op) - waction("REL_"..mode, v+m, s, 1) + if mode == "A" then + waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s)) + actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s) + else + waction("REL_"..mode, v+m, s, 1) + end elseif p == "I" then op = op + parse_imm12(q); n = n + 1 @@ -977,8 +1008,8 @@ function op_template(params, template, nparams) if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end -- Limit number of section buffer positions used by a single dasm_put(). - -- A single opcode needs a maximum of 3 positions. - if secpos+3 > maxsecpos then wflush() end + -- A single opcode needs a maximum of 4 positions. + if secpos+4 > maxsecpos then wflush() end local pos = wpos() local lpos, apos, spos = #actlist, #actargs, secpos @@ -990,9 +1021,11 @@ function op_template(params, template, nparams) actlist[lpos+1] = nil actlist[lpos+2] = nil actlist[lpos+3] = nil + actlist[lpos+4] = nil actargs[apos+1] = nil actargs[apos+2] = nil actargs[apos+3] = nil + actargs[apos+4] = nil end error(err, 0) end @@ -1036,23 +1069,50 @@ map_op[".label_1"] = function(params) if not params then return "[1-9] | ->global | =>pcexpr" end if secpos+1 > maxsecpos then wflush() end local mode, n, s = parse_label(params[1], true) - if mode == "EXT" then werror("bad label definition") end + if not mode or mode == "EXT" then werror("bad label definition") end waction("LABEL_"..mode, n, s, 1) end ------------------------------------------------------------------------------ -- Pseudo-opcodes for data storage. -map_op[".long_*"] = function(params) +local function op_data(params) if not params then return "imm..." end + local sz = params.op == ".long" and 4 or 8 for _,p in ipairs(params) do - local n = tonumber(p) - if not n then werror("bad immediate `"..p.."'") end - if n < 0 then n = n + 2^32 end - wputw(n) + local imm = parse_number(p) + if imm then + local n = tobit(imm) + if n == imm or (n < 0 and n + 2^32 == imm) then + wputw(n < 0 and n + 2^32 or n) + if sz == 8 then + wputw(imm < 0 and 0xffffffff or 0) + end + elseif sz == 4 then + werror("bad immediate `"..p.."'") + else + imm = nil + end + end + if not imm then + local mode, v, s = parse_label(p, false) + if sz == 4 then + if mode then werror("label does not fit into .long") end + waction("IMMV", 0, p) + elseif mode and mode ~= "A" then + waction("REL_"..mode, v+0x8000, s, 1) + else + if mode == "A" then p = s end + waction("IMMV", 0, format("(unsigned int)(%s)", p)) + waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p)) + end + end if secpos+2 > maxsecpos then wflush() end end end +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data -- Alignment pseudo-opcode. map_op[".align_1"] = function(params) diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.h b/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.h index 4b49fd8c7c62ec86a00d0cd9340b1ca27206bde4..3fa2ef42c857f41f0c1c9b9da9de5986beabdf20 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.h +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.h @@ -1,6 +1,6 @@ /* ** DynASM MIPS encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -69,7 +69,7 @@ struct dasm_State { size_t lgsize; int *pclabels; /* PC label chains/pos ptrs. */ size_t pcsize; - void **globals; /* Array of globals (bias -10). */ + void **globals; /* Array of globals. */ dasm_Section *section; /* Pointer to active section. */ size_t codesize; /* Total size of all code sections. */ int maxsection; /* 0 <= sectionidx < maxsection. */ @@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection) { dasm_State *D; size_t psz = 0; - int i; Dst_REF = NULL; DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); D = Dst_REF; @@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection) D->pcsize = 0; D->globals = NULL; D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } + memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); } /* Free DynASM state. */ @@ -122,7 +116,7 @@ void dasm_free(Dst_DECL) void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) { dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ + D->globals = gl; DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); } @@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } @@ -155,10 +150,10 @@ void dasm_setup(Dst_DECL, const void *actionlist) #ifdef DASM_CHECKS #define CK(x, st) \ do { if (!(x)) { \ - D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) #define CKPL(kind, st) \ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ - D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) + D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) #else #define CK(x, st) ((void)0) #define CKPL(kind, st) ((void)0) @@ -273,7 +268,7 @@ int dasm_link(Dst_DECL, size_t *szp) { /* Handle globals not defined in this translation unit. */ int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { int n = D->lglabels[idx]; /* Undefined label: Collapse rel chain and replace with marker (< 0). */ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } @@ -314,7 +309,7 @@ int dasm_link(Dst_DECL, size_t *szp) #ifdef DASM_CHECKS #define CK(x, st) \ - do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) + do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) #else #define CK(x, st) ((void)0) #endif @@ -349,22 +344,27 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; break; case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp); + goto patchrel; + } + /* fallthrough */ case DASM_REL_PC: CK(n >= 0, UNDEF_PC); n = *DASM_POS2PTR(D, n); if (ins & 2048) - n = n - (int)((char *)cp - base); - else n = (n + (int)(size_t)base) & 0x0fffffff; - patchrel: + else + n = n - (int)((char *)cp - base); + patchrel: { + unsigned int e = 16 + ((ins >> 12) & 15); CK((n & 3) == 0 && - ((n + ((ins & 2048) ? 0x00020000 : 0)) >> - ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); - cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); + ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); + cp[-1] |= ((n>>2) & ((1<<e)-1)); + } break; case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n); break; case DASM_LABEL_PC: break; case DASM_IMMS: @@ -412,7 +412,7 @@ int dasm_checkstep(Dst_DECL, int secmatch) } if (D->status == DASM_S_OK && secmatch >= 0 && D->section != &D->sections[secmatch]) - D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); return D->status; } #endif diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.lua b/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.lua index 78a4e34a09032946ef0136e67ae540c4a823f452..2ab3f3ce8f6104cc1b3f16b3bbe5d98407ddce88 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips.lua @@ -1,19 +1,20 @@ ------------------------------------------------------------------------------ -- DynASM MIPS32/MIPS64 module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ local mips64 = mips64 +local mipsr6 = _map_def.MIPSR6 -- Module information: local _info = { arch = mips64 and "mips64" or "mips", description = "DynASM MIPS32/MIPS64 module", - version = "1.4.0", - vernum = 10400, - release = "2016-05-24", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } @@ -238,7 +239,6 @@ local map_op = { bne_3 = "14000000STB", blez_2 = "18000000SB", bgtz_2 = "1c000000SB", - addi_3 = "20000000TSI", li_2 = "24000000TI", addiu_3 = "24000000TSI", slti_3 = "28000000TSI", @@ -248,40 +248,22 @@ local map_op = { ori_3 = "34000000TSU", xori_3 = "38000000TSU", lui_2 = "3c000000TU", - beqzl_2 = "50000000SB", - beql_3 = "50000000STB", - bnezl_2 = "54000000SB", - bnel_3 = "54000000STB", - blezl_2 = "58000000SB", - bgtzl_2 = "5c000000SB", - daddi_3 = mips64 and "60000000TSI", daddiu_3 = mips64 and "64000000TSI", ldl_2 = mips64 and "68000000TO", ldr_2 = mips64 and "6c000000TO", lb_2 = "80000000TO", lh_2 = "84000000TO", - lwl_2 = "88000000TO", lw_2 = "8c000000TO", lbu_2 = "90000000TO", lhu_2 = "94000000TO", - lwr_2 = "98000000TO", lwu_2 = mips64 and "9c000000TO", sb_2 = "a0000000TO", sh_2 = "a4000000TO", - swl_2 = "a8000000TO", sw_2 = "ac000000TO", - sdl_2 = mips64 and "b0000000TO", - sdr_2 = mips64 and "b1000000TO", - swr_2 = "b8000000TO", - cache_2 = "bc000000NO", - ll_2 = "c0000000TO", lwc1_2 = "c4000000HO", - pref_2 = "cc000000NO", ldc1_2 = "d4000000HO", ld_2 = mips64 and "dc000000TO", - sc_2 = "e0000000TO", swc1_2 = "e4000000HO", - scd_2 = mips64 and "f0000000TO", sdc1_2 = "f4000000HO", sd_2 = mips64 and "fc000000TO", @@ -289,10 +271,6 @@ local map_op = { nop_0 = "00000000", sll_3 = "00000000DTA", sextw_2 = "00000000DT", - movf_2 = "00000001DS", - movf_3 = "00000001DSC", - movt_2 = "00010001DS", - movt_3 = "00010001DSC", srl_3 = "00000002DTA", rotr_3 = "00200002DTA", sra_3 = "00000003DTA", @@ -301,31 +279,16 @@ local map_op = { rotrv_3 = "00000046DTS", drotrv_3 = mips64 and "00000056DTS", srav_3 = "00000007DTS", - jr_1 = "00000008S", jalr_1 = "0000f809S", jalr_2 = "00000009DS", - movz_3 = "0000000aDST", - movn_3 = "0000000bDST", syscall_0 = "0000000c", syscall_1 = "0000000cY", break_0 = "0000000d", break_1 = "0000000dY", sync_0 = "0000000f", - mfhi_1 = "00000010D", - mthi_1 = "00000011S", - mflo_1 = "00000012D", - mtlo_1 = "00000013S", dsllv_3 = mips64 and "00000014DTS", dsrlv_3 = mips64 and "00000016DTS", dsrav_3 = mips64 and "00000017DTS", - mult_2 = "00000018ST", - multu_2 = "00000019ST", - div_2 = "0000001aST", - divu_2 = "0000001bST", - dmult_2 = mips64 and "0000001cST", - dmultu_2 = mips64 and "0000001dST", - ddiv_2 = mips64 and "0000001eST", - ddivu_2 = mips64 and "0000001fST", add_3 = "00000020DST", move_2 = mips64 and "00000025DS" or "00000021DS", addu_3 = "00000021DST", @@ -369,32 +332,9 @@ local map_op = { bgez_2 = "04010000SB", bltzl_2 = "04020000SB", bgezl_2 = "04030000SB", - tgei_2 = "04080000SI", - tgeiu_2 = "04090000SI", - tlti_2 = "040a0000SI", - tltiu_2 = "040b0000SI", - teqi_2 = "040c0000SI", - tnei_2 = "040e0000SI", - bltzal_2 = "04100000SB", bal_1 = "04110000B", - bgezal_2 = "04110000SB", - bltzall_2 = "04120000SB", - bgezall_2 = "04130000SB", synci_1 = "041f0000O", - -- Opcode SPECIAL2. - madd_2 = "70000000ST", - maddu_2 = "70000001ST", - mul_3 = "70000002DST", - msub_2 = "70000004ST", - msubu_2 = "70000005ST", - clz_2 = "70000020DS=", - clo_2 = "70000021DS=", - dclz_2 = mips64 and "70000024DS=", - dclo_2 = mips64 and "70000025DS=", - sdbbp_0 = "7000003f", - sdbbp_1 = "7000003fY", - -- Opcode SPECIAL3. ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 @@ -445,15 +385,6 @@ local map_op = { ctc1_2 = "44c00000TG", mthc1_2 = "44e00000TG", - bc1f_1 = "45000000B", - bc1f_2 = "45000000CB", - bc1t_1 = "45010000B", - bc1t_2 = "45010000CB", - bc1fl_1 = "45020000B", - bc1fl_2 = "45020000CB", - bc1tl_1 = "45030000B", - bc1tl_2 = "45030000CB", - ["add.s_3"] = "46000000FGH", ["sub.s_3"] = "46000001FGH", ["mul.s_3"] = "46000002FGH", @@ -470,51 +401,11 @@ local map_op = { ["trunc.w.s_2"] = "4600000dFG", ["ceil.w.s_2"] = "4600000eFG", ["floor.w.s_2"] = "4600000fFG", - ["movf.s_2"] = "46000011FG", - ["movf.s_3"] = "46000011FGC", - ["movt.s_2"] = "46010011FG", - ["movt.s_3"] = "46010011FGC", - ["movz.s_3"] = "46000012FGT", - ["movn.s_3"] = "46000013FGT", ["recip.s_2"] = "46000015FG", ["rsqrt.s_2"] = "46000016FG", ["cvt.d.s_2"] = "46000021FG", ["cvt.w.s_2"] = "46000024FG", ["cvt.l.s_2"] = "46000025FG", - ["cvt.ps.s_3"] = "46000026FGH", - ["c.f.s_2"] = "46000030GH", - ["c.f.s_3"] = "46000030VGH", - ["c.un.s_2"] = "46000031GH", - ["c.un.s_3"] = "46000031VGH", - ["c.eq.s_2"] = "46000032GH", - ["c.eq.s_3"] = "46000032VGH", - ["c.ueq.s_2"] = "46000033GH", - ["c.ueq.s_3"] = "46000033VGH", - ["c.olt.s_2"] = "46000034GH", - ["c.olt.s_3"] = "46000034VGH", - ["c.ult.s_2"] = "46000035GH", - ["c.ult.s_3"] = "46000035VGH", - ["c.ole.s_2"] = "46000036GH", - ["c.ole.s_3"] = "46000036VGH", - ["c.ule.s_2"] = "46000037GH", - ["c.ule.s_3"] = "46000037VGH", - ["c.sf.s_2"] = "46000038GH", - ["c.sf.s_3"] = "46000038VGH", - ["c.ngle.s_2"] = "46000039GH", - ["c.ngle.s_3"] = "46000039VGH", - ["c.seq.s_2"] = "4600003aGH", - ["c.seq.s_3"] = "4600003aVGH", - ["c.ngl.s_2"] = "4600003bGH", - ["c.ngl.s_3"] = "4600003bVGH", - ["c.lt.s_2"] = "4600003cGH", - ["c.lt.s_3"] = "4600003cVGH", - ["c.nge.s_2"] = "4600003dGH", - ["c.nge.s_3"] = "4600003dVGH", - ["c.le.s_2"] = "4600003eGH", - ["c.le.s_3"] = "4600003eVGH", - ["c.ngt.s_2"] = "4600003fGH", - ["c.ngt.s_3"] = "4600003fVGH", - ["add.d_3"] = "46200000FGH", ["sub.d_3"] = "46200001FGH", ["mul.d_3"] = "46200002FGH", @@ -531,130 +422,410 @@ local map_op = { ["trunc.w.d_2"] = "4620000dFG", ["ceil.w.d_2"] = "4620000eFG", ["floor.w.d_2"] = "4620000fFG", - ["movf.d_2"] = "46200011FG", - ["movf.d_3"] = "46200011FGC", - ["movt.d_2"] = "46210011FG", - ["movt.d_3"] = "46210011FGC", - ["movz.d_3"] = "46200012FGT", - ["movn.d_3"] = "46200013FGT", ["recip.d_2"] = "46200015FG", ["rsqrt.d_2"] = "46200016FG", ["cvt.s.d_2"] = "46200020FG", ["cvt.w.d_2"] = "46200024FG", ["cvt.l.d_2"] = "46200025FG", - ["c.f.d_2"] = "46200030GH", - ["c.f.d_3"] = "46200030VGH", - ["c.un.d_2"] = "46200031GH", - ["c.un.d_3"] = "46200031VGH", - ["c.eq.d_2"] = "46200032GH", - ["c.eq.d_3"] = "46200032VGH", - ["c.ueq.d_2"] = "46200033GH", - ["c.ueq.d_3"] = "46200033VGH", - ["c.olt.d_2"] = "46200034GH", - ["c.olt.d_3"] = "46200034VGH", - ["c.ult.d_2"] = "46200035GH", - ["c.ult.d_3"] = "46200035VGH", - ["c.ole.d_2"] = "46200036GH", - ["c.ole.d_3"] = "46200036VGH", - ["c.ule.d_2"] = "46200037GH", - ["c.ule.d_3"] = "46200037VGH", - ["c.sf.d_2"] = "46200038GH", - ["c.sf.d_3"] = "46200038VGH", - ["c.ngle.d_2"] = "46200039GH", - ["c.ngle.d_3"] = "46200039VGH", - ["c.seq.d_2"] = "4620003aGH", - ["c.seq.d_3"] = "4620003aVGH", - ["c.ngl.d_2"] = "4620003bGH", - ["c.ngl.d_3"] = "4620003bVGH", - ["c.lt.d_2"] = "4620003cGH", - ["c.lt.d_3"] = "4620003cVGH", - ["c.nge.d_2"] = "4620003dGH", - ["c.nge.d_3"] = "4620003dVGH", - ["c.le.d_2"] = "4620003eGH", - ["c.le.d_3"] = "4620003eVGH", - ["c.ngt.d_2"] = "4620003fGH", - ["c.ngt.d_3"] = "4620003fVGH", - - ["add.ps_3"] = "46c00000FGH", - ["sub.ps_3"] = "46c00001FGH", - ["mul.ps_3"] = "46c00002FGH", - ["abs.ps_2"] = "46c00005FG", - ["mov.ps_2"] = "46c00006FG", - ["neg.ps_2"] = "46c00007FG", - ["movf.ps_2"] = "46c00011FG", - ["movf.ps_3"] = "46c00011FGC", - ["movt.ps_2"] = "46c10011FG", - ["movt.ps_3"] = "46c10011FGC", - ["movz.ps_3"] = "46c00012FGT", - ["movn.ps_3"] = "46c00013FGT", - ["cvt.s.pu_2"] = "46c00020FG", - ["cvt.s.pl_2"] = "46c00028FG", - ["pll.ps_3"] = "46c0002cFGH", - ["plu.ps_3"] = "46c0002dFGH", - ["pul.ps_3"] = "46c0002eFGH", - ["puu.ps_3"] = "46c0002fFGH", - ["c.f.ps_2"] = "46c00030GH", - ["c.f.ps_3"] = "46c00030VGH", - ["c.un.ps_2"] = "46c00031GH", - ["c.un.ps_3"] = "46c00031VGH", - ["c.eq.ps_2"] = "46c00032GH", - ["c.eq.ps_3"] = "46c00032VGH", - ["c.ueq.ps_2"] = "46c00033GH", - ["c.ueq.ps_3"] = "46c00033VGH", - ["c.olt.ps_2"] = "46c00034GH", - ["c.olt.ps_3"] = "46c00034VGH", - ["c.ult.ps_2"] = "46c00035GH", - ["c.ult.ps_3"] = "46c00035VGH", - ["c.ole.ps_2"] = "46c00036GH", - ["c.ole.ps_3"] = "46c00036VGH", - ["c.ule.ps_2"] = "46c00037GH", - ["c.ule.ps_3"] = "46c00037VGH", - ["c.sf.ps_2"] = "46c00038GH", - ["c.sf.ps_3"] = "46c00038VGH", - ["c.ngle.ps_2"] = "46c00039GH", - ["c.ngle.ps_3"] = "46c00039VGH", - ["c.seq.ps_2"] = "46c0003aGH", - ["c.seq.ps_3"] = "46c0003aVGH", - ["c.ngl.ps_2"] = "46c0003bGH", - ["c.ngl.ps_3"] = "46c0003bVGH", - ["c.lt.ps_2"] = "46c0003cGH", - ["c.lt.ps_3"] = "46c0003cVGH", - ["c.nge.ps_2"] = "46c0003dGH", - ["c.nge.ps_3"] = "46c0003dVGH", - ["c.le.ps_2"] = "46c0003eGH", - ["c.le.ps_3"] = "46c0003eVGH", - ["c.ngt.ps_2"] = "46c0003fGH", - ["c.ngt.ps_3"] = "46c0003fVGH", - ["cvt.s.w_2"] = "46800020FG", ["cvt.d.w_2"] = "46800021FG", - ["cvt.s.l_2"] = "46a00020FG", ["cvt.d.l_2"] = "46a00021FG", - - -- Opcode COP1X. - lwxc1_2 = "4c000000FX", - ldxc1_2 = "4c000001FX", - luxc1_2 = "4c000005FX", - swxc1_2 = "4c000008FX", - sdxc1_2 = "4c000009FX", - suxc1_2 = "4c00000dFX", - prefx_2 = "4c00000fMX", - ["alnv.ps_4"] = "4c00001eFGHS", - ["madd.s_4"] = "4c000020FRGH", - ["madd.d_4"] = "4c000021FRGH", - ["madd.ps_4"] = "4c000026FRGH", - ["msub.s_4"] = "4c000028FRGH", - ["msub.d_4"] = "4c000029FRGH", - ["msub.ps_4"] = "4c00002eFRGH", - ["nmadd.s_4"] = "4c000030FRGH", - ["nmadd.d_4"] = "4c000031FRGH", - ["nmadd.ps_4"] = "4c000036FRGH", - ["nmsub.s_4"] = "4c000038FRGH", - ["nmsub.d_4"] = "4c000039FRGH", - ["nmsub.ps_4"] = "4c00003eFRGH", } +if mipsr6 then -- Instructions added with MIPSR6. + + for k,v in pairs({ + + -- Add immediate to upper bits. + aui_3 = "3c000000TSI", + daui_3 = mips64 and "74000000TSI", + dahi_2 = mips64 and "04060000SI", + dati_2 = mips64 and "041e0000SI", + + -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc. + + -- Compact branches. + blezalc_2 = "18000000TB", -- rt != 0. + bgezalc_2 = "18000000T=SB", -- rt != 0. + bgtzalc_2 = "1c000000TB", -- rt != 0. + bltzalc_2 = "1c000000T=SB", -- rt != 0. + + blezc_2 = "58000000TB", -- rt != 0. + bgezc_2 = "58000000T=SB", -- rt != 0. + bgec_3 = "58000000STB", -- rs != rt. + blec_3 = "58000000TSB", -- rt != rs. + + bgtzc_2 = "5c000000TB", -- rt != 0. + bltzc_2 = "5c000000T=SB", -- rt != 0. + bltc_3 = "5c000000STB", -- rs != rt. + bgtc_3 = "5c000000TSB", -- rt != rs. + + bgeuc_3 = "18000000STB", -- rs != rt. + bleuc_3 = "18000000TSB", -- rt != rs. + bltuc_3 = "1c000000STB", -- rs != rt. + bgtuc_3 = "1c000000TSB", -- rt != rs. + + beqzalc_2 = "20000000TB", -- rt != 0. + bnezalc_2 = "60000000TB", -- rt != 0. + beqc_3 = "20000000STB", -- rs < rt. + bnec_3 = "60000000STB", -- rs < rt. + bovc_3 = "20000000STB", -- rs >= rt. + bnvc_3 = "60000000STB", -- rs >= rt. + + beqzc_2 = "d8000000SK", -- rs != 0. + bnezc_2 = "f8000000SK", -- rs != 0. + jic_2 = "d8000000TI", + jialc_2 = "f8000000TI", + bc_1 = "c8000000L", + balc_1 = "e8000000L", + + -- Opcode SPECIAL. + jr_1 = "00000009S", + sdbbp_0 = "0000000e", + sdbbp_1 = "0000000eY", + lsa_4 = "00000005DSTA", + dlsa_4 = mips64 and "00000015DSTA", + seleqz_3 = "00000035DST", + selnez_3 = "00000037DST", + clz_2 = "00000050DS", + clo_2 = "00000051DS", + dclz_2 = mips64 and "00000052DS", + dclo_2 = mips64 and "00000053DS", + mul_3 = "00000098DST", + muh_3 = "000000d8DST", + mulu_3 = "00000099DST", + muhu_3 = "000000d9DST", + div_3 = "0000009aDST", + mod_3 = "000000daDST", + divu_3 = "0000009bDST", + modu_3 = "000000dbDST", + dmul_3 = mips64 and "0000009cDST", + dmuh_3 = mips64 and "000000dcDST", + dmulu_3 = mips64 and "0000009dDST", + dmuhu_3 = mips64 and "000000ddDST", + ddiv_3 = mips64 and "0000009eDST", + dmod_3 = mips64 and "000000deDST", + ddivu_3 = mips64 and "0000009fDST", + dmodu_3 = mips64 and "000000dfDST", + + -- Opcode SPECIAL3. + align_4 = "7c000220DSTA", + dalign_4 = mips64 and "7c000224DSTA", + bitswap_2 = "7c000020DT", + dbitswap_2 = mips64 and "7c000024DT", + + -- Opcode COP1. + bc1eqz_2 = "45200000HB", + bc1nez_2 = "45a00000HB", + + ["sel.s_3"] = "46000010FGH", + ["seleqz.s_3"] = "46000014FGH", + ["selnez.s_3"] = "46000017FGH", + ["maddf.s_3"] = "46000018FGH", + ["msubf.s_3"] = "46000019FGH", + ["rint.s_2"] = "4600001aFG", + ["class.s_2"] = "4600001bFG", + ["min.s_3"] = "4600001cFGH", + ["mina.s_3"] = "4600001dFGH", + ["max.s_3"] = "4600001eFGH", + ["maxa.s_3"] = "4600001fFGH", + ["cmp.af.s_3"] = "46800000FGH", + ["cmp.un.s_3"] = "46800001FGH", + ["cmp.or.s_3"] = "46800011FGH", + ["cmp.eq.s_3"] = "46800002FGH", + ["cmp.une.s_3"] = "46800012FGH", + ["cmp.ueq.s_3"] = "46800003FGH", + ["cmp.ne.s_3"] = "46800013FGH", + ["cmp.lt.s_3"] = "46800004FGH", + ["cmp.ult.s_3"] = "46800005FGH", + ["cmp.le.s_3"] = "46800006FGH", + ["cmp.ule.s_3"] = "46800007FGH", + ["cmp.saf.s_3"] = "46800008FGH", + ["cmp.sun.s_3"] = "46800009FGH", + ["cmp.sor.s_3"] = "46800019FGH", + ["cmp.seq.s_3"] = "4680000aFGH", + ["cmp.sune.s_3"] = "4680001aFGH", + ["cmp.sueq.s_3"] = "4680000bFGH", + ["cmp.sne.s_3"] = "4680001bFGH", + ["cmp.slt.s_3"] = "4680000cFGH", + ["cmp.sult.s_3"] = "4680000dFGH", + ["cmp.sle.s_3"] = "4680000eFGH", + ["cmp.sule.s_3"] = "4680000fFGH", + + ["sel.d_3"] = "46200010FGH", + ["seleqz.d_3"] = "46200014FGH", + ["selnez.d_3"] = "46200017FGH", + ["maddf.d_3"] = "46200018FGH", + ["msubf.d_3"] = "46200019FGH", + ["rint.d_2"] = "4620001aFG", + ["class.d_2"] = "4620001bFG", + ["min.d_3"] = "4620001cFGH", + ["mina.d_3"] = "4620001dFGH", + ["max.d_3"] = "4620001eFGH", + ["maxa.d_3"] = "4620001fFGH", + ["cmp.af.d_3"] = "46a00000FGH", + ["cmp.un.d_3"] = "46a00001FGH", + ["cmp.or.d_3"] = "46a00011FGH", + ["cmp.eq.d_3"] = "46a00002FGH", + ["cmp.une.d_3"] = "46a00012FGH", + ["cmp.ueq.d_3"] = "46a00003FGH", + ["cmp.ne.d_3"] = "46a00013FGH", + ["cmp.lt.d_3"] = "46a00004FGH", + ["cmp.ult.d_3"] = "46a00005FGH", + ["cmp.le.d_3"] = "46a00006FGH", + ["cmp.ule.d_3"] = "46a00007FGH", + ["cmp.saf.d_3"] = "46a00008FGH", + ["cmp.sun.d_3"] = "46a00009FGH", + ["cmp.sor.d_3"] = "46a00019FGH", + ["cmp.seq.d_3"] = "46a0000aFGH", + ["cmp.sune.d_3"] = "46a0001aFGH", + ["cmp.sueq.d_3"] = "46a0000bFGH", + ["cmp.sne.d_3"] = "46a0001bFGH", + ["cmp.slt.d_3"] = "46a0000cFGH", + ["cmp.sult.d_3"] = "46a0000dFGH", + ["cmp.sle.d_3"] = "46a0000eFGH", + ["cmp.sule.d_3"] = "46a0000fFGH", + + }) do map_op[k] = v end + +else -- Instructions removed by MIPSR6. + + for k,v in pairs({ + -- Traps, don't use. + addi_3 = "20000000TSI", + daddi_3 = mips64 and "60000000TSI", + + -- Branch on likely, don't use. + beqzl_2 = "50000000SB", + beql_3 = "50000000STB", + bnezl_2 = "54000000SB", + bnel_3 = "54000000STB", + blezl_2 = "58000000SB", + bgtzl_2 = "5c000000SB", + + lwl_2 = "88000000TO", + lwr_2 = "98000000TO", + swl_2 = "a8000000TO", + sdl_2 = mips64 and "b0000000TO", + sdr_2 = mips64 and "b1000000TO", + swr_2 = "b8000000TO", + cache_2 = "bc000000NO", + ll_2 = "c0000000TO", + pref_2 = "cc000000NO", + sc_2 = "e0000000TO", + scd_2 = mips64 and "f0000000TO", + + -- Opcode SPECIAL. + movf_2 = "00000001DS", + movf_3 = "00000001DSC", + movt_2 = "00010001DS", + movt_3 = "00010001DSC", + jr_1 = "00000008S", + movz_3 = "0000000aDST", + movn_3 = "0000000bDST", + mfhi_1 = "00000010D", + mthi_1 = "00000011S", + mflo_1 = "00000012D", + mtlo_1 = "00000013S", + mult_2 = "00000018ST", + multu_2 = "00000019ST", + div_3 = "0000001aST", + divu_3 = "0000001bST", + ddiv_3 = mips64 and "0000001eST", + ddivu_3 = mips64 and "0000001fST", + dmult_2 = mips64 and "0000001cST", + dmultu_2 = mips64 and "0000001dST", + + -- Opcode REGIMM. + tgei_2 = "04080000SI", + tgeiu_2 = "04090000SI", + tlti_2 = "040a0000SI", + tltiu_2 = "040b0000SI", + teqi_2 = "040c0000SI", + tnei_2 = "040e0000SI", + bltzal_2 = "04100000SB", + bgezal_2 = "04110000SB", + bltzall_2 = "04120000SB", + bgezall_2 = "04130000SB", + + -- Opcode SPECIAL2. + madd_2 = "70000000ST", + maddu_2 = "70000001ST", + mul_3 = "70000002DST", + msub_2 = "70000004ST", + msubu_2 = "70000005ST", + clz_2 = "70000020D=TS", + clo_2 = "70000021D=TS", + dclz_2 = mips64 and "70000024D=TS", + dclo_2 = mips64 and "70000025D=TS", + sdbbp_0 = "7000003f", + sdbbp_1 = "7000003fY", + + -- Opcode COP1. + bc1f_1 = "45000000B", + bc1f_2 = "45000000CB", + bc1t_1 = "45010000B", + bc1t_2 = "45010000CB", + bc1fl_1 = "45020000B", + bc1fl_2 = "45020000CB", + bc1tl_1 = "45030000B", + bc1tl_2 = "45030000CB", + + ["movf.s_2"] = "46000011FG", + ["movf.s_3"] = "46000011FGC", + ["movt.s_2"] = "46010011FG", + ["movt.s_3"] = "46010011FGC", + ["movz.s_3"] = "46000012FGT", + ["movn.s_3"] = "46000013FGT", + ["cvt.ps.s_3"] = "46000026FGH", + ["c.f.s_2"] = "46000030GH", + ["c.f.s_3"] = "46000030VGH", + ["c.un.s_2"] = "46000031GH", + ["c.un.s_3"] = "46000031VGH", + ["c.eq.s_2"] = "46000032GH", + ["c.eq.s_3"] = "46000032VGH", + ["c.ueq.s_2"] = "46000033GH", + ["c.ueq.s_3"] = "46000033VGH", + ["c.olt.s_2"] = "46000034GH", + ["c.olt.s_3"] = "46000034VGH", + ["c.ult.s_2"] = "46000035GH", + ["c.ult.s_3"] = "46000035VGH", + ["c.ole.s_2"] = "46000036GH", + ["c.ole.s_3"] = "46000036VGH", + ["c.ule.s_2"] = "46000037GH", + ["c.ule.s_3"] = "46000037VGH", + ["c.sf.s_2"] = "46000038GH", + ["c.sf.s_3"] = "46000038VGH", + ["c.ngle.s_2"] = "46000039GH", + ["c.ngle.s_3"] = "46000039VGH", + ["c.seq.s_2"] = "4600003aGH", + ["c.seq.s_3"] = "4600003aVGH", + ["c.ngl.s_2"] = "4600003bGH", + ["c.ngl.s_3"] = "4600003bVGH", + ["c.lt.s_2"] = "4600003cGH", + ["c.lt.s_3"] = "4600003cVGH", + ["c.nge.s_2"] = "4600003dGH", + ["c.nge.s_3"] = "4600003dVGH", + ["c.le.s_2"] = "4600003eGH", + ["c.le.s_3"] = "4600003eVGH", + ["c.ngt.s_2"] = "4600003fGH", + ["c.ngt.s_3"] = "4600003fVGH", + ["movf.d_2"] = "46200011FG", + ["movf.d_3"] = "46200011FGC", + ["movt.d_2"] = "46210011FG", + ["movt.d_3"] = "46210011FGC", + ["movz.d_3"] = "46200012FGT", + ["movn.d_3"] = "46200013FGT", + ["c.f.d_2"] = "46200030GH", + ["c.f.d_3"] = "46200030VGH", + ["c.un.d_2"] = "46200031GH", + ["c.un.d_3"] = "46200031VGH", + ["c.eq.d_2"] = "46200032GH", + ["c.eq.d_3"] = "46200032VGH", + ["c.ueq.d_2"] = "46200033GH", + ["c.ueq.d_3"] = "46200033VGH", + ["c.olt.d_2"] = "46200034GH", + ["c.olt.d_3"] = "46200034VGH", + ["c.ult.d_2"] = "46200035GH", + ["c.ult.d_3"] = "46200035VGH", + ["c.ole.d_2"] = "46200036GH", + ["c.ole.d_3"] = "46200036VGH", + ["c.ule.d_2"] = "46200037GH", + ["c.ule.d_3"] = "46200037VGH", + ["c.sf.d_2"] = "46200038GH", + ["c.sf.d_3"] = "46200038VGH", + ["c.ngle.d_2"] = "46200039GH", + ["c.ngle.d_3"] = "46200039VGH", + ["c.seq.d_2"] = "4620003aGH", + ["c.seq.d_3"] = "4620003aVGH", + ["c.ngl.d_2"] = "4620003bGH", + ["c.ngl.d_3"] = "4620003bVGH", + ["c.lt.d_2"] = "4620003cGH", + ["c.lt.d_3"] = "4620003cVGH", + ["c.nge.d_2"] = "4620003dGH", + ["c.nge.d_3"] = "4620003dVGH", + ["c.le.d_2"] = "4620003eGH", + ["c.le.d_3"] = "4620003eVGH", + ["c.ngt.d_2"] = "4620003fGH", + ["c.ngt.d_3"] = "4620003fVGH", + ["add.ps_3"] = "46c00000FGH", + ["sub.ps_3"] = "46c00001FGH", + ["mul.ps_3"] = "46c00002FGH", + ["abs.ps_2"] = "46c00005FG", + ["mov.ps_2"] = "46c00006FG", + ["neg.ps_2"] = "46c00007FG", + ["movf.ps_2"] = "46c00011FG", + ["movf.ps_3"] = "46c00011FGC", + ["movt.ps_2"] = "46c10011FG", + ["movt.ps_3"] = "46c10011FGC", + ["movz.ps_3"] = "46c00012FGT", + ["movn.ps_3"] = "46c00013FGT", + ["cvt.s.pu_2"] = "46c00020FG", + ["cvt.s.pl_2"] = "46c00028FG", + ["pll.ps_3"] = "46c0002cFGH", + ["plu.ps_3"] = "46c0002dFGH", + ["pul.ps_3"] = "46c0002eFGH", + ["puu.ps_3"] = "46c0002fFGH", + ["c.f.ps_2"] = "46c00030GH", + ["c.f.ps_3"] = "46c00030VGH", + ["c.un.ps_2"] = "46c00031GH", + ["c.un.ps_3"] = "46c00031VGH", + ["c.eq.ps_2"] = "46c00032GH", + ["c.eq.ps_3"] = "46c00032VGH", + ["c.ueq.ps_2"] = "46c00033GH", + ["c.ueq.ps_3"] = "46c00033VGH", + ["c.olt.ps_2"] = "46c00034GH", + ["c.olt.ps_3"] = "46c00034VGH", + ["c.ult.ps_2"] = "46c00035GH", + ["c.ult.ps_3"] = "46c00035VGH", + ["c.ole.ps_2"] = "46c00036GH", + ["c.ole.ps_3"] = "46c00036VGH", + ["c.ule.ps_2"] = "46c00037GH", + ["c.ule.ps_3"] = "46c00037VGH", + ["c.sf.ps_2"] = "46c00038GH", + ["c.sf.ps_3"] = "46c00038VGH", + ["c.ngle.ps_2"] = "46c00039GH", + ["c.ngle.ps_3"] = "46c00039VGH", + ["c.seq.ps_2"] = "46c0003aGH", + ["c.seq.ps_3"] = "46c0003aVGH", + ["c.ngl.ps_2"] = "46c0003bGH", + ["c.ngl.ps_3"] = "46c0003bVGH", + ["c.lt.ps_2"] = "46c0003cGH", + ["c.lt.ps_3"] = "46c0003cVGH", + ["c.nge.ps_2"] = "46c0003dGH", + ["c.nge.ps_3"] = "46c0003dVGH", + ["c.le.ps_2"] = "46c0003eGH", + ["c.le.ps_3"] = "46c0003eVGH", + ["c.ngt.ps_2"] = "46c0003fGH", + ["c.ngt.ps_3"] = "46c0003fVGH", + + -- Opcode COP1X. + lwxc1_2 = "4c000000FX", + ldxc1_2 = "4c000001FX", + luxc1_2 = "4c000005FX", + swxc1_2 = "4c000008FX", + sdxc1_2 = "4c000009FX", + suxc1_2 = "4c00000dFX", + prefx_2 = "4c00000fMX", + ["alnv.ps_4"] = "4c00001eFGHS", + ["madd.s_4"] = "4c000020FRGH", + ["madd.d_4"] = "4c000021FRGH", + ["madd.ps_4"] = "4c000026FRGH", + ["msub.s_4"] = "4c000028FRGH", + ["msub.d_4"] = "4c000029FRGH", + ["msub.ps_4"] = "4c00002eFRGH", + ["nmadd.s_4"] = "4c000030FRGH", + ["nmadd.d_4"] = "4c000031FRGH", + ["nmadd.ps_4"] = "4c000036FRGH", + ["nmsub.s_4"] = "4c000038FRGH", + ["nmsub.d_4"] = "4c000039FRGH", + ["nmsub.ps_4"] = "4c00003eFRGH", + + }) do map_op[k] = v end + +end + ------------------------------------------------------------------------------ local function parse_gpr(expr) @@ -808,10 +979,12 @@ map_op[".template__"] = function(params, template, nparams) op = op + parse_disp(params[n]); n = n + 1 elseif p == "X" then op = op + parse_index(params[n]); n = n + 1 - elseif p == "B" or p == "J" then - local mode, n, s = parse_label(params[n], false) - if p == "B" then n = n + 2048 end - waction("REL_"..mode, n, s, 1) + elseif p == "B" or p == "J" or p == "K" or p == "L" then + local mode, m, s = parse_label(params[n], false) + if p == "J" then m = m + 0xa800 + elseif p == "K" then m = m + 0x5000 + elseif p == "L" then m = m + 0xa000 end + waction("REL_"..mode, m, s, 1) n = n + 1 elseif p == "A" then op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 @@ -833,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams) elseif p == "Z" then op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 elseif p == "=" then - op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. + n = n - 1 -- Re-use previous parameter for next template char. else assert(false) end diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips64.lua b/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips64.lua index 5636b23a6ec5bebd03c8d7cb79a8cbd04ea2edf7..cfcf20e5e59c57ecc91d83640b9229bf1da7c37b 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips64.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_mips64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM MIPS64 module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.h b/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.h index 3a7ee9b0e9a8af26800c689eec21c983d6f517bb..e97b4efa6d49623294f01c8400825e1e5f0161ea 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.h +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.h @@ -1,6 +1,6 @@ /* ** DynASM PPC/PPC64 encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -69,7 +69,7 @@ struct dasm_State { size_t lgsize; int *pclabels; /* PC label chains/pos ptrs. */ size_t pcsize; - void **globals; /* Array of globals (bias -10). */ + void **globals; /* Array of globals. */ dasm_Section *section; /* Pointer to active section. */ size_t codesize; /* Total size of all code sections. */ int maxsection; /* 0 <= sectionidx < maxsection. */ @@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection) { dasm_State *D; size_t psz = 0; - int i; Dst_REF = NULL; DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); D = Dst_REF; @@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection) D->pcsize = 0; D->globals = NULL; D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } + memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); } /* Free DynASM state. */ @@ -122,7 +116,7 @@ void dasm_free(Dst_DECL) void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) { dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ + D->globals = gl; DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); } @@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } @@ -277,7 +272,7 @@ int dasm_link(Dst_DECL, size_t *szp) { /* Handle globals not defined in this translation unit. */ int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { int n = D->lglabels[idx]; /* Undefined label: Collapse rel chain and replace with marker (< 0). */ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } @@ -353,7 +348,11 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; break; case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp); + goto patchrel; + } + /* fallthrough */ case DASM_REL_PC: CK(n >= 0, UNDEF_PC); n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); @@ -364,7 +363,7 @@ int dasm_encode(Dst_DECL, void *buffer) cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); break; case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n); break; case DASM_LABEL_PC: break; case DASM_IMM: diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.lua b/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.lua index f73974dd7f9e0d6b4c010a431631c35080d6273c..67662330d1bc1a217eb4a361dd903a97f7ddefcc 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_ppc.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM PPC/PPC64 module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. -- -- Support for various extensions contributed by Caio Souza Oliveira. @@ -11,9 +11,9 @@ local _info = { arch = "ppc", description = "DynASM PPC module", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } @@ -1722,9 +1722,9 @@ op_template = function(params, template, nparams) elseif p == "M" then op = op + parse_shiftmask(params[n], false); n = n + 1 elseif p == "J" or p == "K" then - local mode, n, s = parse_label(params[n], false) - if p == "K" then n = n + 2048 end - waction("REL_"..mode, n, s, 1) + local mode, m, s = parse_label(params[n], false) + if p == "K" then m = m + 2048 end + waction("REL_"..mode, m, s, 1) n = n + 1 elseif p == "0" then if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_proto.h b/source/libs/luajit/LuaJIT-src/dynasm/dasm_proto.h index 59d9e2b2dd7869cdd025968aec17d84d8da7ffcc..38f5e71ae6578e5495e2680be8ee9b0c116e4bb3 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_proto.h +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_proto.h @@ -1,6 +1,6 @@ /* ** DynASM encoding engine prototypes. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -10,8 +10,8 @@ #include <stddef.h> #include <stdarg.h> -#define DASM_IDENT "DynASM 1.4.0" -#define DASM_VERSION 10400 /* 1.4.0 */ +#define DASM_IDENT "DynASM 1.5.0" +#define DASM_VERSION 10500 /* 1.5.0 */ #ifndef Dst_DECL #define Dst_DECL dasm_State **Dst diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_x64.lua b/source/libs/luajit/LuaJIT-src/dynasm/dasm_x64.lua index e8bdeb37d4def31c2833f0da2f20ffc81f95ff59..d23fcd99f927b3ec5bd081d52d86c32e4dfe6a70 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_x64.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_x64.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM x64 module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ -- This module just sets 64 bit mode for the combined x86/x64 module. diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.h b/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.h index bc636357a65cc2954777256594f4e9bbc4ac73ff..ff54c9f11edee150e5b2cc248260e7e2570fdacc 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.h +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.h @@ -1,6 +1,6 @@ /* ** DynASM x86 encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -68,7 +68,7 @@ struct dasm_State { size_t lgsize; int *pclabels; /* PC label chains/pos ptrs. */ size_t pcsize; - void **globals; /* Array of globals (bias -10). */ + void **globals; /* Array of globals. */ dasm_Section *section; /* Pointer to active section. */ size_t codesize; /* Total size of all code sections. */ int maxsection; /* 0 <= sectionidx < maxsection. */ @@ -85,7 +85,6 @@ void dasm_init(Dst_DECL, int maxsection) { dasm_State *D; size_t psz = 0; - int i; Dst_REF = NULL; DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); D = Dst_REF; @@ -96,12 +95,7 @@ void dasm_init(Dst_DECL, int maxsection) D->pcsize = 0; D->globals = NULL; D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } + memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); } /* Free DynASM state. */ @@ -121,7 +115,7 @@ void dasm_free(Dst_DECL) void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) { dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ + D->globals = gl; DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); } @@ -146,6 +140,7 @@ void dasm_setup(Dst_DECL, const void *actionlist) if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); for (i = 0; i < D->maxsection; i++) { D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; D->sections[i].ofs = 0; } } @@ -194,12 +189,13 @@ void dasm_put(Dst_DECL, int start, ...) switch (action) { case DASM_DISP: if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } - case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; + /* fallthrough */ + case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ case DASM_IMM_D: ofs += 4; break; case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; - case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; case DASM_SPACE: p++; ofs += n; break; case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ @@ -207,8 +203,8 @@ void dasm_put(Dst_DECL, int start, ...) if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; if (*p < 0x20 && (n&7) == 4) ofs++; switch ((*p++ >> 3) & 3) { - case 3: n |= b[pos-3]; - case 2: n |= b[pos-2]; + case 3: n |= b[pos-3]; /* fallthrough */ + case 2: n |= b[pos-2]; /* fallthrough */ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } } continue; @@ -238,8 +234,11 @@ void dasm_put(Dst_DECL, int start, ...) } pos++; ofs += 4; /* Maximum offset needed. */ - if (action == DASM_REL_LG || action == DASM_REL_PC) + if (action == DASM_REL_LG || action == DASM_REL_PC) { b[pos++] = ofs; /* Store pass1 offset estimate. */ + } else if (sizeof(ptrdiff_t) == 8) { + ofs += 4; + } break; case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); @@ -304,11 +303,13 @@ int dasm_link(Dst_DECL, size_t *szp) while (pos != lastpos) { dasm_ActList p = D->actionlist + b[pos++]; + int op = 0; while (1) { - int op, action = *p++; + int action = *p++; switch (action) { - case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; - case DASM_REL_PC: op = p[-2]; rel_pc: { + case DASM_REL_LG: p++; + /* fallthrough */ + case DASM_REL_PC: { int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); if (shrink) { /* Shrinkable branch opcode? */ int lofs, lpos = b[pos]; @@ -329,17 +330,21 @@ int dasm_link(Dst_DECL, size_t *szp) pos += 2; break; } + /* fallthrough */ case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; + /* fallthrough */ case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; case DASM_LABEL_LG: p++; + /* fallthrough */ case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ case DASM_EXTERN: p += 2; break; - case DASM_ESC: p++; break; + case DASM_ESC: op = *p++; break; case DASM_MARK: break; case DASM_SECTION: case DASM_STOP: goto stop; + default: op = action; break; } } stop: (void)0; @@ -358,10 +363,22 @@ int dasm_link(Dst_DECL, size_t *szp) do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) #define dasmd(x) \ do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) +#define dasmq(x) \ + do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0) #else #define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) #define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) +#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0) #endif +static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x) +{ + if (sizeof(ptrdiff_t) == 8) + dasmq((unsigned long long)x); + else + dasmd((unsigned int)x); + return cp; +} +#define dasma(x) (cp = dasma_(cp, (x))) /* Pass 3: Encode sections. */ int dasm_encode(Dst_DECL, void *buffer) @@ -391,12 +408,15 @@ int dasm_encode(Dst_DECL, void *buffer) if (mrm != 5) { mm[-1] -= 0x80; break; } } if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; } + /* fallthrough */ case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; case DASM_IMM_DB: if (((n+128)&-256) == 0) { db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; } else mark = NULL; + /* fallthrough */ case DASM_IMM_D: wd: dasmd(n); break; case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; + /* fallthrough */ case DASM_IMM_W: dasmw(n); break; case DASM_VREG: { int t = *p++; @@ -420,8 +440,10 @@ int dasm_encode(Dst_DECL, void *buffer) break; } case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; - b++; n = (int)(ptrdiff_t)D->globals[-n]; - case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + b++; n = (int)(ptrdiff_t)D->globals[-n-10]; + /* fallthrough */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ case DASM_REL_PC: rel_pc: { int shrink = *b++; int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } @@ -431,16 +453,18 @@ int dasm_encode(Dst_DECL, void *buffer) goto wb; } case DASM_IMM_LG: - p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } + p++; + if (n < 0) { dasma((ptrdiff_t)D->globals[-n-10]); break; } + /* fallthrough */ case DASM_IMM_PC: { int *pb = DASM_POS2PTR(D, n); - n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); - goto wd; + dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base)); + break; } case DASM_LABEL_LG: { int idx = *p++; if (idx >= 10) - D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); + D->globals[idx-10] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); break; } case DASM_LABEL_PC: case DASM_SETLABEL: break; @@ -452,6 +476,7 @@ int dasm_encode(Dst_DECL, void *buffer) case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; case DASM_MARK: mark = cp; break; case DASM_ESC: action = *p++; + /* fallthrough */ default: *cp++ = action; break; case DASM_SECTION: case DASM_STOP: goto stop; } diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.lua b/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.lua index 4c031e2cd67e42451cdd6918ee151fae66dc817c..c767cdeba1208880d07533081356419e873b0db4 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dasm_x86.lua @@ -1,7 +1,7 @@ ------------------------------------------------------------------------------ -- DynASM x86/x64 module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ @@ -11,9 +11,9 @@ local x64 = x64 local _info = { arch = x64 and "x64" or "x86", description = "DynASM x86/x64 module", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } @@ -484,6 +484,22 @@ local function wputdarg(n) end end +-- Put signed or unsigned qword or arg. +local function wputqarg(n) + local tn = type(n) + if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. + wputb(band(n, 255)) + wputb(band(shr(n, 8), 255)) + wputb(band(shr(n, 16), 255)) + wputb(shr(n, 24)) + local sign = n < 0 and 255 or 0 + wputb(sign); wputb(sign); wputb(sign); wputb(sign) + else + waction("IMM_D", format("(unsigned int)(%s)", n)) + waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) + end +end + -- Put operand-size dependent number or arg (defaults to dword). local function wputszarg(sz, n) if not sz or sz == "d" or sz == "q" then wputdarg(n) @@ -611,7 +627,11 @@ local function wputmrmsib(t, imark, s, vsreg, psz, sk) werror("NYI: rip-relative displacement followed by immediate") end -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. - wputlabel("REL_", disp[1], 2) + if disp[2] == "iPJ" then + waction("REL_A", disp[1]) + else + wputlabel("REL_", disp[1], 2) + end else wputdarg(disp) end @@ -663,10 +683,16 @@ local function opmodestr(op, args) end -- Convert number to valid integer or nil. -local function toint(expr) +local function toint(expr, isqword) local n = tonumber(expr) if n then - if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then + if n % 1 ~= 0 then + werror("not an integer number `"..expr.."'") + elseif isqword then + if n < -2147483648 or n > 2147483647 then + n = nil -- Handle it as an expression to avoid precision loss. + end + elseif n < -2147483648 or n > 4294967295 then werror("bad integer number `"..expr.."'") end return n @@ -722,9 +748,9 @@ local function dispexpr(expr) return imm*map_opsizenum[ops] end local mode, iexpr = immexpr(dispt) - if mode == "iJ" then + if mode == "iJ" or mode == "iPJ" then if c == "-" then werror("cannot invert label reference") end - return { iexpr } + return { iexpr, mode } end return expr -- Need to return original signed expression. end @@ -749,7 +775,7 @@ local function rtexpr(expr) end -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. -local function parseoperand(param) +local function parseoperand(param, isqword) local t = {} local expr = param @@ -810,7 +836,7 @@ local function parseoperand(param) if t.disp then break end -- [reg+xreg...] - local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") + local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$") xreg, t.xreg, tp = rtexpr(xreg) if not t.xreg then -- [reg+-expr] @@ -837,7 +863,7 @@ local function parseoperand(param) t.disp = dispexpr(tailx) else -- imm or opsize*imm - local imm = toint(expr) + local imm = toint(expr, isqword) if not imm and sub(expr, 1, 1) == "*" and t.opsize then imm = toint(sub(expr, 2)) if imm then @@ -955,6 +981,7 @@ end -- "u" Use VEX encoding, vvvv unused. -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is -- removed from the list used by future characters). +-- "w" Use VEX encoding, vvvv from 3rd operand. -- "L" Force VEX.L -- -- All of the following characters force a flush of the opcode: @@ -1124,6 +1151,8 @@ local map_op = { rep_0 = "F3", repe_0 = "F3", repz_0 = "F3", + endbr32_0 = "F30F1EFB", + endbr64_0 = "F30F1EFA", -- F4: *hlt cmc_0 = "F5", -- F6: test... mb,i; div... mb @@ -1536,8 +1565,8 @@ local map_op = { vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", vrsqrtps_2 = "rmoy:0Fu52rM", vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", - vroundpd_3 = "rmioy:660F3AV09rMU", - vroundps_3 = "rmioy:660F3AV08rMU", + vroundpd_3 = "rmioy:660F3Au09rMU", + vroundps_3 = "rmioy:660F3Au08rMU", vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", vshufpd_4 = "rrmioy:660FVC6rMU", @@ -1677,6 +1706,91 @@ local map_op = { -- Intel ADX adcx_2 = "rmqd:660F38F6rM", adox_2 = "rmqd:F30F38F6rM", + + -- BMI1 + andn_3 = "rrmqd:0F38VF2rM", + bextr_3 = "rmrqd:0F38wF7rM", + blsi_2 = "rmqd:0F38vF33m", + blsmsk_2 = "rmqd:0F38vF32m", + blsr_2 = "rmqd:0F38vF31m", + tzcnt_2 = "rmqdw:F30FBCrM", + + -- BMI2 + bzhi_3 = "rmrqd:0F38wF5rM", + mulx_3 = "rrmqd:F20F38VF6rM", + pdep_3 = "rrmqd:F20F38VF5rM", + pext_3 = "rrmqd:F30F38VF5rM", + rorx_3 = "rmSqd:F20F3AuF0rMS", + sarx_3 = "rmrqd:F30F38wF7rM", + shrx_3 = "rmrqd:F20F38wF7rM", + shlx_3 = "rmrqd:660F38wF7rM", + + -- FMA3 + vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", + vfmaddsub132ps_3 = "rrmoy:660F38V96rM", + vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", + vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", + vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", + vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", + + vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", + vfmsubadd132ps_3 = "rrmoy:660F38V97rM", + vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", + vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", + vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", + vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", + + vfmadd132pd_3 = "rrmoy:660F38VX98rM", + vfmadd132ps_3 = "rrmoy:660F38V98rM", + vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", + vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", + vfmadd213pd_3 = "rrmoy:660F38VXA8rM", + vfmadd213ps_3 = "rrmoy:660F38VA8rM", + vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", + vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", + vfmadd231pd_3 = "rrmoy:660F38VXB8rM", + vfmadd231ps_3 = "rrmoy:660F38VB8rM", + vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", + vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", + + vfmsub132pd_3 = "rrmoy:660F38VX9ArM", + vfmsub132ps_3 = "rrmoy:660F38V9ArM", + vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", + vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", + vfmsub213pd_3 = "rrmoy:660F38VXAArM", + vfmsub213ps_3 = "rrmoy:660F38VAArM", + vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", + vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", + vfmsub231pd_3 = "rrmoy:660F38VXBArM", + vfmsub231ps_3 = "rrmoy:660F38VBArM", + vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", + vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", + + vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", + vfnmadd132ps_3 = "rrmoy:660F38V9CrM", + vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", + vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", + vfnmadd213pd_3 = "rrmoy:660F38VXACrM", + vfnmadd213ps_3 = "rrmoy:660F38VACrM", + vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", + vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", + vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", + vfnmadd231ps_3 = "rrmoy:660F38VBCrM", + vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", + vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", + + vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", + vfnmsub132ps_3 = "rrmoy:660F38V9ErM", + vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", + vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", + vfnmsub213pd_3 = "rrmoy:660F38VXAErM", + vfnmsub213ps_3 = "rrmoy:660F38VAErM", + vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", + vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", + vfnmsub231pd_3 = "rrmoy:660F38VXBErM", + vfnmsub231ps_3 = "rrmoy:660F38VBErM", + vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", + vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", } ------------------------------------------------------------------------------ @@ -1766,7 +1880,7 @@ end ------------------------------------------------------------------------------ -local map_vexarg = { u = false, v = 1, V = 2 } +local map_vexarg = { u = false, v = 1, V = 2, w = 3 } -- Process pattern string. local function dopattern(pat, args, sz, op, needrex) @@ -1866,7 +1980,7 @@ local function dopattern(pat, args, sz, op, needrex) local a = args[narg] narg = narg + 1 local mode, imm = a.mode, a.imm - if mode == "iJ" and not match("iIJ", c) then + if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then werror("bad operand size for label") end if c == "S" then @@ -2058,14 +2172,16 @@ end local function op_data(params) if not params then return "imm..." end local sz = sub(params.op, 2, 2) - if sz == "a" then sz = addrsize end + if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end for _,p in ipairs(params) do - local a = parseoperand(p) + local a = parseoperand(p, sz == "q") if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then werror("bad mode or size in `"..p.."'") end if a.mode == "iJ" then wputlabel("IMM_", a.imm, 1) + elseif sz == "q" then + wputqarg(a.imm) else wputszarg(sz, a.imm) end @@ -2077,7 +2193,11 @@ map_op[".byte_*"] = op_data map_op[".sbyte_*"] = op_data map_op[".word_*"] = op_data map_op[".dword_*"] = op_data +map_op[".qword_*"] = op_data map_op[".aword_*"] = op_data +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data ------------------------------------------------------------------------------ diff --git a/source/libs/luajit/LuaJIT-src/dynasm/dynasm.lua b/source/libs/luajit/LuaJIT-src/dynasm/dynasm.lua index 5ec21a7979af14d09170af2f669b4654c01665a2..2a4d649ec820da37735fd733cfe77e8360fcb1c8 100644 --- a/source/libs/luajit/LuaJIT-src/dynasm/dynasm.lua +++ b/source/libs/luajit/LuaJIT-src/dynasm/dynasm.lua @@ -2,7 +2,7 @@ -- DynASM. A dynamic assembler for code generation engines. -- Originally designed and implemented for LuaJIT. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- See below for full copyright notice. ------------------------------------------------------------------------------ @@ -10,14 +10,14 @@ local _info = { name = "DynASM", description = "A dynamic assembler for code generation engines", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", - url = "http://luajit.org/dynasm.html", + url = "https://luajit.org/dynasm.html", license = "MIT", copyright = [[ -Copyright (C) 2005-2017 Mike Pall. All rights reserved. +Copyright (C) 2005-2025 Mike Pall. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -38,7 +38,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -[ MIT license: http://www.opensource.org/licenses/mit-license.php ] +[ MIT license: https://www.opensource.org/licenses/mit-license.php ] ]], } @@ -75,7 +75,7 @@ local function wline(line, needindent) g_synclineno = g_synclineno + 1 end --- Write assembler line as a comment, if requestd. +-- Write assembler line as a comment, if requested. local function wcomment(aline) if g_opt.comment then wline(g_opt.comment..aline..g_opt.endcomment, true) @@ -630,6 +630,7 @@ end -- Load architecture-specific module. local function loadarch(arch) if not match(arch, "^[%w_]+$") then return "bad arch name" end + _G._map_def = map_def local ok, m_arch = pcall(require, "dasm_"..arch) if not ok then return "cannot load module: "..m_arch end g_arch = m_arch diff --git a/source/libs/luajit/LuaJIT-src/etc/luajit.1 b/source/libs/luajit/LuaJIT-src/etc/luajit.1 index 0d263db79fdd78626360b93f1025d1169627b637..c16b4172f4a3723e0c01e2ac0f2d2d580de894ef 100644 --- a/source/libs/luajit/LuaJIT-src/etc/luajit.1 +++ b/source/libs/luajit/LuaJIT-src/etc/luajit.1 @@ -6,7 +6,7 @@ luajit \- Just-In-Time Compiler for the Lua Language .B luajit [\fIoptions\fR]... [\fIscript\fR [\fIargs\fR]...] .SH "WEB SITE" -.IR http://luajit.org +.IR https://luajit.org .SH DESCRIPTION .PP This is the command-line program to run Lua programs with \fBLuaJIT\fR. @@ -74,15 +74,15 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end" Runs some nested loops and shows the resulting traces. .SH COPYRIGHT .PP -\fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall. +\fBLuaJIT\fR is Copyright \(co 2005-2025 Mike Pall. .br \fBLuaJIT\fR is open source software, released under the MIT license. .SH SEE ALSO .PP More details in the provided HTML docs or at: -.IR http://luajit.org +.IR https://luajit.org .br More about the Lua language can be found at: -.IR http://lua.org/docs.html +.IR https://lua.org/docs.html .PP lua(1) diff --git a/source/libs/luajit/LuaJIT-src/etc/luajit.pc b/source/libs/luajit/LuaJIT-src/etc/luajit.pc index a78f1746e8f4086443b827e498f869903bc48963..96433008c7fe46d993b509ac9c5825b34c8ca2b8 100644 --- a/source/libs/luajit/LuaJIT-src/etc/luajit.pc +++ b/source/libs/luajit/LuaJIT-src/etc/luajit.pc @@ -1,8 +1,8 @@ # Package information for LuaJIT to be used by pkg-config. majver=2 minver=1 -relver=0 -version=${majver}.${minver}.${relver}-beta3 +relver=ROLLING +version=${majver}.${minver}.${relver} abiver=5.1 prefix=/usr/local @@ -17,7 +17,7 @@ INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver} Name: LuaJIT Description: Just-in-time compiler for Lua -URL: http://luajit.org +URL: https://luajit.org Version: ${version} Requires: Libs: -L${libdir} -l${libname} diff --git a/source/libs/luajit/LuaJIT-src/src/Makefile b/source/libs/luajit/LuaJIT-src/src/Makefile index 34c5e974a4318a90fe4714c0f10ca1d8c194c09f..c82c841e608b4ec6e48facf0849e4fe3b34c899d 100644 --- a/source/libs/luajit/LuaJIT-src/src/Makefile +++ b/source/libs/luajit/LuaJIT-src/src/Makefile @@ -7,12 +7,11 @@ # Also works with MinGW and Cygwin on Windows. # Please check msvcbuild.bat for building with MSVC on Windows. # -# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ############################################################################## MAJVER= 2 MINVER= 1 -RELVER= 0 ABIVER= 5.1 NODOTABIVER= 51 @@ -110,8 +109,8 @@ XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT #XCFLAGS+= -DLUAJIT_NUMMODE=1 #XCFLAGS+= -DLUAJIT_NUMMODE=2 # -# Enable GC64 mode for x64. -#XCFLAGS+= -DLUAJIT_ENABLE_GC64 +# Disable LJ_GC64 mode for x64. +#XCFLAGS+= -DLUAJIT_DISABLE_GC64 # ############################################################################## @@ -132,7 +131,6 @@ XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT # # This define is required to run LuaJIT under Valgrind. The Valgrind # header files must be installed. You should enable debug information, too. -# Use --suppressions=lj.supp to avoid some false positives. #XCFLAGS+= -DLUAJIT_USE_VALGRIND # # This is the client for the GDB JIT API. GDB 7.0 or higher is required @@ -158,13 +156,16 @@ XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) HOST_SYS= Windows - HOST_RM= del else HOST_SYS:= $(shell uname -s) ifneq (,$(findstring MINGW,$(HOST_SYS))) HOST_SYS= Windows HOST_MSYS= mingw endif + ifneq (,$(findstring MSYS,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= mingw + endif ifneq (,$(findstring CYGWIN,$(HOST_SYS))) HOST_SYS= Windows HOST_MSYS= cygwin @@ -191,7 +192,7 @@ CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) LDOPTIONS= $(CCDEBUG) $(LDFLAGS) HOST_CC= $(CC) -HOST_RM= rm -f +HOST_RM?= rm -f # If left blank, minilua is built and used. You can supply an installed # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua HOST_LUA= @@ -209,7 +210,7 @@ TARGET_CC= $(STATIC_CC) TARGET_STCC= $(STATIC_CC) TARGET_DYNCC= $(DYNAMIC_CC) TARGET_LD= $(CROSS)$(CC) -TARGET_AR= $(CROSS)ar rcus 2>/dev/null +TARGET_AR= $(CROSS)ar rcus TARGET_STRIP= $(CROSS)strip TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) @@ -217,6 +218,7 @@ TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) TARGET_DLLNAME= lua$(NODOTABIVER).dll +TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) TARGET_DYNXLDOPTS= @@ -231,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) -TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) +TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) TARGET_LJARCH= x64 else @@ -297,6 +299,9 @@ endif ifneq (,$(LMULTILIB)) TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\" endif +ifneq (,$(INSTALL_LJLIBD)) + TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\" +endif ############################################################################## # Target system detection. @@ -305,34 +310,39 @@ endif TARGET_SYS?= $(HOST_SYS) ifeq (Windows,$(TARGET_SYS)) TARGET_STRIP+= --strip-unneeded - TARGET_XSHLDFLAGS= -shared + TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME) TARGET_DYNXLDOPTS= else + TARGET_AR+= 2>/dev/null ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) TARGET_XCFLAGS+= -fno-stack-protector endif ifeq (Darwin,$(TARGET_SYS)) ifeq (,$(MACOSX_DEPLOYMENT_TARGET)) - export MACOSX_DEPLOYMENT_TARGET=10.4 + $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) endif TARGET_STRIP+= -x - TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC + TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL + TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= - TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) - ifeq (x64,$(TARGET_LJARCH)) - TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000 - TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000 - endif + TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 else ifeq (iOS,$(TARGET_SYS)) TARGET_STRIP+= -x - TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC + TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= - TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) + TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 ifeq (arm64,$(TARGET_LJARCH)) TARGET_XCFLAGS+= -fno-omit-frame-pointer endif else + ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) + # Find out whether the target toolchain always generates unwind tables. + TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o) + ifneq (,$(findstring E,$(TARGET_TESTUNWIND))) + TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL + endif + endif ifneq (SunOS,$(TARGET_SYS)) ifneq (PS3,$(TARGET_SYS)) TARGET_XLDFLAGS+= -Wl,-E @@ -359,7 +369,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS)) HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX else ifeq (iOS,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1 else HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER endif @@ -380,10 +390,11 @@ MINILUA_O= host/minilua.o MINILUA_LIBS= -lm MINILUA_T= host/minilua MINILUA_X= $(MINILUA_T) +MINILUA_DEP= ifeq (,$(HOST_LUA)) HOST_LUA= $(MINILUA_X) - DASM_DEP= $(MINILUA_T) + MINILUA_DEP= $(MINILUA_T) endif DASM_DIR= ../dynasm @@ -425,6 +436,10 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D NO_UNWIND TARGET_ARCH+= -DLUAJIT_NO_UNWIND endif +ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D PAUTH + TARGET_ARCH+= -DLJ_ABI_PAUTH=1 +endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN @@ -439,6 +454,9 @@ ifeq (arm,$(TARGET_LJARCH)) DASM_AFLAGS+= -D IOS endif else +ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D MIPSR6 +endif ifeq (ppc,$(TARGET_LJARCH)) ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D SQRT @@ -452,9 +470,6 @@ ifeq (ppc,$(TARGET_LJARCH)) ifeq (PS3,$(TARGET_SYS)) DASM_AFLAGS+= -D PPE -D TOC endif - ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH))) - DASM_ARCH= ppc64 - endif endif endif endif @@ -462,6 +477,14 @@ endif DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc +GIT= git +ifeq (Windows,$(HOST_SYS)$(HOST_MSYS)) + GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +else + GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || : +endif +GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*) + BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ host/buildvm_lib.o host/buildvm_fold.o BUILDVM_T= host/buildvm @@ -476,13 +499,15 @@ LJVM_BOUT= $(LJVM_S) LJVM_MODE= elfasm LJLIB_O= lib_base.o lib_math.o lbitlib.o lib_bit.o lib_string.o lib_table.o \ - lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o + lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \ + lib_buffer.o LJLIB_C= $(LJLIB_O:.o=.c) -LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ +LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ - lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ + lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ + lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \ + lj_api.o lj_profile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ @@ -506,8 +531,8 @@ LUAJIT_T= luajit ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ - host/buildvm_arch.h -ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) + host/buildvm_arch.h luajit.h +ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) luajit_relver.txt $(LIB_VMDEFP) WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) @@ -557,6 +582,7 @@ ifeq (Windows,$(HOST_SYS)) MINILUA_X= host\minilua BUILDVM_X= host\buildvm ALL_RM:= $(subst /,\,$(ALL_RM)) + HOST_RM= del endif endif @@ -597,7 +623,6 @@ E= @echo default all: $(TARGET_T) amalg: - @grep "^[+|]" ljamalg.c $(MAKE) all "LJCORE_O=ljamalg.o" clean: @@ -631,7 +656,12 @@ $(MINILUA_T): $(MINILUA_O) $(E) "HOSTLINK $@" $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) -host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua +luajit.h: $(MINILUA_DEP) $(GIT_DEP) luajit_rolling.h + $(E) "VERSION $@" + $(Q)$(GIT_RELVER) + $(Q)$(HOST_LUA) host/genversion.lua + +host/buildvm_arch.h: $(DASM_DASC) $(MINILUA_DEP) lj_arch.h lua.h luaconf.h $(E) "DYNASM $@" $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) diff --git a/source/libs/luajit/LuaJIT-src/src/Makefile.dep b/source/libs/luajit/LuaJIT-src/src/Makefile.dep index 44d66f198f68209c203ab8eb2e26476c3ab07aba..237315102dc5ce371fbb4212f78b94731ea4c57a 100644 --- a/source/libs/luajit/LuaJIT-src/src/Makefile.dep +++ b/source/libs/luajit/LuaJIT-src/src/Makefile.dep @@ -1,16 +1,20 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h + lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_vmevent.h lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ - lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ - lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ - lj_strfmt.h lj_lib.h lj_libdef.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \ + lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ + lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \ + lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h lbitlib.o: lbitlib.c lua.h luaconf.h lauxlib.h lualib.h lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ lj_ffdef.h lj_lib.h lj_libdef.h +lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ + lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \ + lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ lj_libdef.h @@ -29,7 +33,8 @@ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h + lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_vm.h lj_prng.h \ + lj_libdef.h lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ lj_libdef.h @@ -42,16 +47,18 @@ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h -lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h +lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \ + lj_prng.h lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ - lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ - lj_asm_*.h + lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ + lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ + lj_prng.h lj_emit_*.h lj_asm_*.h +lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ lj_bcdef.h lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -76,8 +83,8 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ - lj_ccallback.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \ + lj_cdata.h lj_cconv.h lj_ccallback.h lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h @@ -92,7 +99,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ - lj_crecord.h lj_strfmt.h + lj_crecord.h lj_strfmt.h lj_strscan.h lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ lj_ccallback.h lj_buf.h @@ -109,38 +116,38 @@ lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_strfmt.h lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ - lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \ + lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \ + lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ lj_traceerr.h lj_vm.h lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \ - lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h + lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_vmevent.h lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \ lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ - lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h + lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ lj_strfmt.h lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ - lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \ - lj_bcdump.h lj_lib.h + lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \ + lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h + lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h @@ -156,7 +163,7 @@ lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ lj_vm.h lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h + lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_strscan.h @@ -169,6 +176,7 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ lj_vm.h lj_vmevent.h +lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h @@ -176,7 +184,10 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h + lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h +lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ + lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ @@ -184,11 +195,13 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ - lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h + lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \ + lj_alloc.h luajit.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_char.h + lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \ + lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -199,24 +212,25 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ - lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h + lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_udata.h + lj_gc.h lj_err.h lj_errmsg.h lj_udata.h lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ lj_vm.h lj_vmevent.h lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_vm.h -ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \ - lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ - lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \ - lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \ - lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \ - lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ - lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ - lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ - lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ +ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \ + lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \ + lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \ + lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_vmevent.h lj_err.c lj_debug.h lj_ff.h \ + lj_ffdef.h lj_strfmt.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c \ + lj_buf.c lj_str.c lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c \ + lj_strscan.h lj_lib.h lj_debug.c lj_prng.c lj_state.c lj_lex.h \ + lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h lj_profile.h \ + lj_vmevent.c lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c \ + lj_serialize.c lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h \ lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ @@ -228,7 +242,7 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ - lib_ffi.c lib_init.c + lib_ffi.c lib_buffer.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/source/libs/luajit/LuaJIT-src/src/Makefile.std b/source/libs/luajit/LuaJIT-src/src/Makefile.std new file mode 100644 index 0000000000000000000000000000000000000000..4a56d1e8e50202145713015e00ac3b9140a63e25 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/Makefile.std @@ -0,0 +1,746 @@ +############################################################################## +# LuaJIT Makefile. Requires GNU Make. +# +# Please read doc/install.html before changing any variables! +# +# Suitable for POSIX platforms (Linux, *BSD, OSX etc.). +# Also works with MinGW and Cygwin on Windows. +# Please check msvcbuild.bat for building with MSVC on Windows. +# +# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +############################################################################## + +MAJVER= 2 +MINVER= 1 +ABIVER= 5.1 +NODOTABIVER= 51 + +############################################################################## +############################# COMPILER OPTIONS ############################# +############################################################################## +# These options mainly affect the speed of the JIT compiler itself, not the +# speed of the JIT-compiled code. Turn any of the optional settings on by +# removing the '#' in front of them. Make sure you force a full recompile +# with "make clean", followed by "make" if you change any options. +# +DEFAULT_CC = gcc +# +# LuaJIT builds as a native 32 or 64 bit binary by default. +CC= $(DEFAULT_CC) +# +# Use this if you want to force a 32 bit build on a 64 bit multilib OS. +#CC= $(DEFAULT_CC) -m32 +# +# Since the assembler part does NOT maintain a frame pointer, it's pointless +# to slow down the C part by not omitting it. Debugging, tracebacks and +# unwinding are not affected -- the assembler part has frame unwind +# information and GCC emits it where needed (x64) or with -g (see CCDEBUG). +CCOPT= -O2 -fomit-frame-pointer +# Use this if you want to generate a smaller binary (but it's slower): +#CCOPT= -Os -fomit-frame-pointer +# Note: it's no longer recommended to use -O3 with GCC 4.x. +# The I-Cache bloat usually outweighs the benefits from aggressive inlining. +# +# Target-specific compiler options: +# +# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute +# the binaries to a different machine you could also use: -march=native +# +CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse +CCOPT_x64= +CCOPT_arm= +CCOPT_arm64= +CCOPT_ppc= +CCOPT_mips= +# +CCDEBUG= +# Uncomment the next line to generate debug information: +#CCDEBUG= -g +# +CCWARN= -Wall +# Uncomment the next line to enable more warnings: +#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith +# +############################################################################## + +############################################################################## +################################ BUILD MODE ################################ +############################################################################## +# The default build mode is mixed mode on POSIX. On Windows this is the same +# as dynamic mode. +# +# Mixed mode creates a static + dynamic library and a statically linked luajit. +BUILDMODE= mixed +# +# Static mode creates a static library and a statically linked luajit. +#BUILDMODE= static +# +# Dynamic mode creates a dynamic library and a dynamically linked luajit. +# Note: this executable will only run when the library is installed! +#BUILDMODE= dynamic +# +############################################################################## + +############################################################################## +################################# FEATURES ################################# +############################################################################## +# Enable/disable these features as needed, but make sure you force a full +# recompile with "make clean", followed by "make". +XCFLAGS= +# +# Permanently disable the FFI extension to reduce the size of the LuaJIT +# executable. But please consider that the FFI library is compiled-in, +# but NOT loaded by default. It only allocates any memory, if you actually +# make use of it. +#XCFLAGS+= -DLUAJIT_DISABLE_FFI +# +# Features from Lua 5.2 that are unlikely to break existing code are +# enabled by default. Some other features that *might* break some existing +# code (e.g. __pairs or os.execute() return values) can be enabled here. +# Note: this does not provide full compatibility with Lua 5.2 at this time. +#XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT +# +# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. +#XCFLAGS+= -DLUAJIT_DISABLE_JIT +# +# Some architectures (e.g. PPC) can use either single-number (1) or +# dual-number (2) mode. Uncomment one of these lines to override the +# default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. +#XCFLAGS+= -DLUAJIT_NUMMODE=1 +#XCFLAGS+= -DLUAJIT_NUMMODE=2 +# +# Disable LJ_GC64 mode for x64. +#XCFLAGS+= -DLUAJIT_DISABLE_GC64 +# +############################################################################## + +############################################################################## +############################ DEBUGGING SUPPORT ############################# +############################################################################## +# Enable these options as needed, but make sure you force a full recompile +# with "make clean", followed by "make". +# Note that most of these are NOT suitable for benchmarking or release mode! +# +# Use the system provided memory allocator (realloc) instead of the +# bundled memory allocator. This is slower, but sometimes helpful for +# debugging. This option cannot be enabled on x64 without GC64, since +# realloc usually doesn't return addresses in the right address range. +# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and +# the only way to get useful results from it for all other architectures. +#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC +# +# This define is required to run LuaJIT under Valgrind. The Valgrind +# header files must be installed. You should enable debug information, too. +#XCFLAGS+= -DLUAJIT_USE_VALGRIND +# +# This is the client for the GDB JIT API. GDB 7.0 or higher is required +# to make use of it. See lj_gdbjit.c for details. Enabling this causes +# a non-negligible overhead, even when not running under GDB. +#XCFLAGS+= -DLUAJIT_USE_GDBJIT +# +# Turn on assertions for the Lua/C API to debug problems with lua_* calls. +# This is rather slow -- use only while developing C libraries/embeddings. +#XCFLAGS+= -DLUA_USE_APICHECK +# +# Turn on assertions for the whole LuaJIT VM. This significantly slows down +# everything. Use only if you suspect a problem with LuaJIT itself. +#XCFLAGS+= -DLUA_USE_ASSERT +# +############################################################################## +# You probably don't need to change anything below this line! +############################################################################## + +############################################################################## +# Host system detection. +############################################################################## + +ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) + HOST_SYS= Windows +else + HOST_SYS:= $(shell uname -s) + ifneq (,$(findstring MINGW,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= mingw + endif + ifneq (,$(findstring MSYS,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= mingw + endif + ifneq (,$(findstring CYGWIN,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= cygwin + endif +endif + +############################################################################## +# Flags and options for host and target. +############################################################################## + +# You can override the following variables at the make command line: +# CC HOST_CC STATIC_CC DYNAMIC_CC +# CFLAGS HOST_CFLAGS TARGET_CFLAGS +# LDFLAGS HOST_LDFLAGS TARGET_LDFLAGS TARGET_SHLDFLAGS +# LIBS HOST_LIBS TARGET_LIBS +# CROSS HOST_SYS TARGET_SYS TARGET_FLAGS +# +# Cross-compilation examples: +# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows +# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- + +ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) +CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) +LDOPTIONS= $(CCDEBUG) $(LDFLAGS) + +HOST_CC= $(CC) +HOST_RM?= rm -f +# If left blank, minilua is built and used. You can supply an installed +# copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua +HOST_LUA= + +HOST_XCFLAGS= -I. +HOST_XLDFLAGS= +HOST_XLIBS= +HOST_ACFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH) $(HOST_CFLAGS) +HOST_ALDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS) $(HOST_LDFLAGS) +HOST_ALIBS= $(HOST_XLIBS) $(LIBS) $(HOST_LIBS) + +STATIC_CC = $(CROSS)$(CC) +DYNAMIC_CC = $(CROSS)$(CC) -fPIC +TARGET_CC= $(STATIC_CC) +TARGET_STCC= $(STATIC_CC) +TARGET_DYNCC= $(DYNAMIC_CC) +TARGET_LD= $(CROSS)$(CC) +TARGET_AR= $(CROSS)ar rcus +TARGET_STRIP= $(CROSS)strip + +TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) +TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib +TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) +TARGET_DLLNAME= lua$(NODOTABIVER).dll +TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a +TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) +TARGET_DYNXLDOPTS= + +TARGET_LFSFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +TARGET_XCFLAGS= $(TARGET_LFSFLAGS) -U_FORTIFY_SOURCE +TARGET_XLDFLAGS= +TARGET_XLIBS= -lm +TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) +TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) +TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) +TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) +TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) +TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) + +TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) +ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= x64 +else +ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= x86 +else +ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) + TARGET_LJARCH= arm +else +ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 + endif + TARGET_LJARCH= arm64 +else +ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) + ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) + TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE + else + TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE + endif + TARGET_LJARCH= ppc +else +ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) + ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__MIPSEL__=1 + endif + ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= mips64 + else + TARGET_LJARCH= mips + endif +else + $(error Unsupported target architecture) +endif +endif +endif +endif +endif +endif + +ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 + TARGET_ARCH+= -D__CELLOS_LV2__ + TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC + TARGET_XLIBS+= -lpthread +endif + +TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) +TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH)) + +ifneq (,$(PREFIX)) +ifneq (/usr/local,$(PREFIX)) + TARGET_XCFLAGS+= -DLUA_ROOT=\"$(PREFIX)\" + ifneq (/usr,$(PREFIX)) + TARGET_DYNXLDOPTS= -Wl,-rpath,$(TARGET_LIBPATH) + endif +endif +endif +ifneq (,$(MULTILIB)) + TARGET_XCFLAGS+= -DLUA_MULTILIB=\"$(MULTILIB)\" +endif +ifneq (,$(LMULTILIB)) + TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\" +endif +ifneq (,$(INSTALL_LJLIBD)) + TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\" +endif + +############################################################################## +# Target system detection. +############################################################################## + +TARGET_SYS?= $(HOST_SYS) +ifeq (Windows,$(TARGET_SYS)) + TARGET_STRIP+= --strip-unneeded + TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME) + TARGET_DYNXLDOPTS= +else + TARGET_AR+= 2>/dev/null +ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) + TARGET_XCFLAGS+= -fno-stack-protector +endif +ifeq (Darwin,$(TARGET_SYS)) + ifeq (,$(MACOSX_DEPLOYMENT_TARGET)) + $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) + endif + TARGET_STRIP+= -x + TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL + TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC + TARGET_DYNXLDOPTS= + TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 +else +ifeq (iOS,$(TARGET_SYS)) + TARGET_STRIP+= -x + TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC + TARGET_DYNXLDOPTS= + TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 + ifeq (arm64,$(TARGET_LJARCH)) + TARGET_XCFLAGS+= -fno-omit-frame-pointer + endif +else + ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) + # Find out whether the target toolchain always generates unwind tables. + TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o) + ifneq (,$(findstring E,$(TARGET_TESTUNWIND))) + TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL + endif + endif + ifneq (SunOS,$(TARGET_SYS)) + ifneq (PS3,$(TARGET_SYS)) + TARGET_XLDFLAGS+= -Wl,-E + endif + endif + ifeq (Linux,$(TARGET_SYS)) + TARGET_XLIBS+= -ldl + endif + ifeq (GNU/kFreeBSD,$(TARGET_SYS)) + TARGET_XLIBS+= -ldl + endif +endif +endif +endif + +ifneq ($(HOST_SYS),$(TARGET_SYS)) + ifeq (Windows,$(TARGET_SYS)) + HOST_XCFLAGS+= -malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS + else + ifeq (Linux,$(TARGET_SYS)) + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_LINUX + else + ifeq (Darwin,$(TARGET_SYS)) + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX + else + ifeq (iOS,$(TARGET_SYS)) + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1 + else + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER + endif + endif + endif + endif +endif + +ifneq (,$(CCDEBUG)) + TARGET_STRIP= @: +endif + +############################################################################## +# Files and pathnames. +############################################################################## + +MINILUA_O= host/minilua.o +MINILUA_LIBS= -lm +MINILUA_T= host/minilua +MINILUA_X= $(MINILUA_T) +MINILUA_DEP= + +ifeq (,$(HOST_LUA)) + HOST_LUA= $(MINILUA_X) + MINILUA_DEP= $(MINILUA_T) +endif + +DASM_DIR= ../dynasm +DASM= $(HOST_LUA) $(DASM_DIR)/dynasm.lua +DASM_XFLAGS= +DASM_AFLAGS= +DASM_ARCH= $(TARGET_LJARCH) + +ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ENDIAN_LE +else + DASM_AFLAGS+= -D ENDIAN_BE +endif +ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D P64 +endif +ifneq (,$(findstring LJ_HASJIT 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D JIT +endif +ifneq (,$(findstring LJ_HASFFI 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D FFI +endif +ifneq (,$(findstring LJ_DUALNUM 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D DUALNUM +endif +ifneq (,$(findstring LJ_ARCH_HASFPU 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D FPU + TARGET_ARCH+= -DLJ_ARCH_HASFPU=1 +else + TARGET_ARCH+= -DLJ_ARCH_HASFPU=0 +endif +ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D HFABI + TARGET_ARCH+= -DLJ_ABI_SOFTFP=0 +else + TARGET_ARCH+= -DLJ_ABI_SOFTFP=1 +endif +ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D NO_UNWIND + TARGET_ARCH+= -DLUAJIT_NO_UNWIND +endif +ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D PAUTH + TARGET_ARCH+= -DLJ_ABI_PAUTH=1 +endif +DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) +ifeq (Windows,$(TARGET_SYS)) + DASM_AFLAGS+= -D WIN +endif +ifeq (x64,$(TARGET_LJARCH)) + ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH))) + DASM_ARCH= x86 + endif +else +ifeq (arm,$(TARGET_LJARCH)) + ifeq (iOS,$(TARGET_SYS)) + DASM_AFLAGS+= -D IOS + endif +else +ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D MIPSR6 +endif +ifeq (ppc,$(TARGET_LJARCH)) + ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SQRT + endif + ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ROUND + endif + ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D GPR64 + endif + ifeq (PS3,$(TARGET_SYS)) + DASM_AFLAGS+= -D PPE -D TOC + endif +endif +endif +endif + +DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) +DASM_DASC= vm_$(DASM_ARCH).dasc + +GIT= git +ifeq (Windows,$(HOST_SYS)$(HOST_MSYS)) + GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +else + GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || : +endif +GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*) + +BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ + host/buildvm_lib.o host/buildvm_fold.o +BUILDVM_T= host/buildvm +BUILDVM_X= $(BUILDVM_T) + +HOST_O= $(MINILUA_O) $(BUILDVM_O) +HOST_T= $(MINILUA_T) $(BUILDVM_T) + +LJVM_S= lj_vm.S +LJVM_O= lj_vm.o +LJVM_BOUT= $(LJVM_S) +LJVM_MODE= elfasm + +LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ + lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \ + lib_buffer.o +LJLIB_C= $(LJLIB_O:.o=.c) + +LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ + lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ + lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ + lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \ + lj_api.o lj_profile.o \ + lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ + lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ + lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ + lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ + lj_asm.o lj_trace.o lj_gdbjit.o \ + lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ + lj_carith.o lj_clib.o lj_cparse.o \ + lj_lib.o lj_alloc.o lib_aux.o \ + $(LJLIB_O) lib_init.o + +LJVMCORE_O= $(LJVM_O) $(LJCORE_O) +LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) + +LIB_VMDEF= jit/vmdef.lua +LIB_VMDEFP= $(LIB_VMDEF) + +LUAJIT_O= luajit.o +LUAJIT_A= libluajit.a +LUAJIT_SO= libluajit.so +LUAJIT_T= luajit + +ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) +ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ + host/buildvm_arch.h luajit.h +ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) luajit_relver.txt $(LIB_VMDEFP) +WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk +ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) + +############################################################################## +# Build mode handling. +############################################################################## + +# Mixed mode defaults. +TARGET_O= $(LUAJIT_A) +TARGET_T= $(LUAJIT_T) $(LUAJIT_SO) +TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO) + +ifeq (Windows,$(TARGET_SYS)) + TARGET_DYNCC= $(STATIC_CC) + LJVM_MODE= peobj + LJVM_BOUT= $(LJVM_O) + LUAJIT_T= luajit.exe + ifeq (cygwin,$(HOST_MSYS)) + LUAJIT_SO= cyg$(TARGET_DLLNAME) + else + LUAJIT_SO= $(TARGET_DLLNAME) + endif + # Mixed mode is not supported on Windows. And static mode doesn't work well. + # C modules cannot be loaded, because they bind to lua51.dll. + ifneq (static,$(BUILDMODE)) + BUILDMODE= dynamic + TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL + endif +endif +ifeq (Darwin,$(TARGET_SYS)) + LJVM_MODE= machasm +endif +ifeq (iOS,$(TARGET_SYS)) + LJVM_MODE= machasm +endif +ifeq (SunOS,$(TARGET_SYS)) + BUILDMODE= static +endif +ifeq (PS3,$(TARGET_SYS)) + BUILDMODE= static +endif + +ifeq (Windows,$(HOST_SYS)) + MINILUA_T= host/minilua.exe + BUILDVM_T= host/buildvm.exe + ifeq (,$(HOST_MSYS)) + MINILUA_X= host\minilua + BUILDVM_X= host\buildvm + ALL_RM:= $(subst /,\,$(ALL_RM)) + HOST_RM= del + endif +endif + +ifeq (static,$(BUILDMODE)) + TARGET_DYNCC= @: + TARGET_T= $(LUAJIT_T) + TARGET_DEP= $(LIB_VMDEF) +else +ifeq (dynamic,$(BUILDMODE)) + ifneq (Windows,$(TARGET_SYS)) + TARGET_CC= $(DYNAMIC_CC) + endif + TARGET_DYNCC= @: + LJVMCORE_DYNO= $(LJVMCORE_O) + TARGET_O= $(LUAJIT_SO) + TARGET_XLDFLAGS+= $(TARGET_DYNXLDOPTS) +else +ifeq (Darwin,$(TARGET_SYS)) + TARGET_DYNCC= @: + LJVMCORE_DYNO= $(LJVMCORE_O) +endif +ifeq (iOS,$(TARGET_SYS)) + TARGET_DYNCC= @: + LJVMCORE_DYNO= $(LJVMCORE_O) +endif +endif +endif + +Q= @ +E= @echo +#Q= +#E= @: + +############################################################################## +# Make targets. +############################################################################## + +default all: $(TARGET_T) + +amalg: + $(MAKE) all "LJCORE_O=ljamalg.o" + +clean: + $(HOST_RM) $(ALL_RM) + +libbc: + ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C) + $(MAKE) all + +depend: + @for file in $(ALL_HDRGEN); do \ + test -f $$file || touch $$file; \ + done + @$(HOST_CC) $(HOST_ACFLAGS) -MM *.c host/*.c | \ + sed -e "s| [^ ]*/dasm_\S*\.h||g" \ + -e "s|^\([^l ]\)|host/\1|" \ + -e "s| lj_target_\S*\.h| lj_target_*.h|g" \ + -e "s| lj_emit_\S*\.h| lj_emit_*.h|g" \ + -e "s| lj_asm_\S*\.h| lj_asm_*.h|g" >Makefile.dep + @for file in $(ALL_HDRGEN); do \ + test -s $$file || $(HOST_RM) $$file; \ + done + +.PHONY: default all amalg clean libbc depend + +############################################################################## +# Rules for generated files. +############################################################################## + +$(MINILUA_T): $(MINILUA_O) + $(E) "HOSTLINK $@" + $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) + +luajit.h: $(MINILUA_DEP) $(GIT_DEP) luajit_rolling.h + $(E) "VERSION $@" + $(Q)$(GIT_RELVER) + $(Q)$(HOST_LUA) host/genversion.lua + +host/buildvm_arch.h: $(DASM_DASC) $(MINILUA_DEP) lj_arch.h lua.h luaconf.h + $(E) "DYNASM $@" + $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) + +host/buildvm.o: $(DASM_DIR)/dasm_*.h + +$(BUILDVM_T): $(BUILDVM_O) + $(E) "HOSTLINK $@" + $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(BUILDVM_O) $(HOST_ALIBS) + +$(LJVM_BOUT): $(BUILDVM_T) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m $(LJVM_MODE) -o $@ + +lj_bcdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m bcdef -o $@ $(LJLIB_C) + +lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m ffdef -o $@ $(LJLIB_C) + +lj_libdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m libdef -o $@ $(LJLIB_C) + +lj_recdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m recdef -o $@ $(LJLIB_C) + +$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m vmdef -o $(LIB_VMDEFP) $(LJLIB_C) + +lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m folddef -o $@ lj_opt_fold.c + +############################################################################## +# Object file rules. +############################################################################## + +%.o: %.c + $(E) "CC $@" + $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< + $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< + +%.o: %.S + $(E) "ASM $@" + $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $< + $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $< + +$(LUAJIT_O): + $(E) "CC $@" + $(Q)$(TARGET_STCC) $(TARGET_ACFLAGS) -c -o $@ $< + +$(HOST_O): %.o: %.c + $(E) "HOSTCC $@" + $(Q)$(HOST_CC) $(HOST_ACFLAGS) -c -o $@ $< + +include Makefile.dep + +############################################################################## +# Target file rules. +############################################################################## + +$(LUAJIT_A): $(LJVMCORE_O) + $(E) "AR $@" + $(Q)$(TARGET_AR) $@ $(LJVMCORE_O) + +# The dependency on _O, but linking with _DYNO is intentional. +$(LUAJIT_SO): $(LJVMCORE_O) + $(E) "DYNLINK $@" + $(Q)$(TARGET_LD) $(TARGET_ASHLDFLAGS) -o $@ $(LJVMCORE_DYNO) $(TARGET_ALIBS) + $(Q)$(TARGET_STRIP) $@ + +$(LUAJIT_T): $(TARGET_O) $(LUAJIT_O) $(TARGET_DEP) + $(E) "LINK $@" + $(Q)$(TARGET_LD) $(TARGET_ALDFLAGS) -o $@ $(LUAJIT_O) $(TARGET_O) $(TARGET_ALIBS) + $(Q)$(TARGET_STRIP) $@ + $(E) "OK Successfully built LuaJIT" + +############################################################################## diff --git a/source/libs/luajit/LuaJIT-src/src/_lj_bcdef.h_ b/source/libs/luajit/LuaJIT-src/src/_lj_bcdef.h_ deleted file mode 100644 index 231ed3a4e44c7123abb4498c870b7308906e6926..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/src/_lj_bcdef.h_ +++ /dev/null @@ -1,226 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -LJ_DATADEF const uint16_t lj_bc_ofs[] = { -0, -72, -144, -216, -288, -427, -569, -632, -695, -764, -833, -886, -938, -989, -1040, -1081, -1122, -1148, -1180, -1240, -1314, -1368, -1422, -1476, -1530, -1589, -1643, -1697, -1751, -1805, -1841, -1908, -1975, -2042, -2109, -2158, -2230, -2306, -2342, -2378, -2408, -2437, -2462, -2505, -2541, -2628, -2710, -2748, -2782, -2833, -2897, -3006, -3099, -3117, -3135, -3283, -3407, -3506, -3679, -3908, -4032, -4174, -4220, -4262, -4266, -4414, -4482, -4647, -4838, -4926, -4930, -5066, -5158, -5263, -5360, -5465, -5485, -5555, -5622, -5642, -5686, -5725, -5745, -5763, -5810, -5835, -5855, -5918, -5972, -5972, -6097, -6098, -6177, -7841, -7908, -8419, -8522, -8579, -8710, -7974, -8136, -8228, -8280, -8311, -8768, -8809, -9417, -8864, -9167, -9469, -9596, -9620, -9647, -9711, -9744, -9778, -9809, -9840, -9873, -9914, -9957, -9990, -10030, -10070, -10245, -10393, -10110, -10110, -9678, -10149, -10549, -10492, -10196, -10603, -10662, -11596, -11994, -11941, -12063, -12142, -12224, -12306, -12388, -11650, -11747, -11844, -10721, -10752, -10799, -10921, -11090, -11217, -11327, -11442, -11557 -}; - -LJ_DATADEF const uint16_t lj_bc_mode[] = { -BCDEF(BCMODE) -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF -}; - diff --git a/source/libs/luajit/LuaJIT-src/src/_lj_ffdef.h_ b/source/libs/luajit/LuaJIT-src/src/_lj_ffdef.h_ deleted file mode 100644 index 29dd4d3edceaf5df9be18397ee9bef079f1fbef2..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/src/_lj_ffdef.h_ +++ /dev/null @@ -1,210 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -FFDEF(assert) -FFDEF(type) -FFDEF(next) -FFDEF(pairs) -FFDEF(ipairs_aux) -FFDEF(ipairs) -FFDEF(getmetatable) -FFDEF(setmetatable) -FFDEF(getfenv) -FFDEF(setfenv) -FFDEF(rawget) -FFDEF(rawset) -FFDEF(rawequal) -FFDEF(unpack) -FFDEF(select) -FFDEF(tonumber) -FFDEF(tostring) -FFDEF(error) -FFDEF(pcall) -FFDEF(xpcall) -FFDEF(loadfile) -FFDEF(load) -FFDEF(loadstring) -FFDEF(dofile) -FFDEF(gcinfo) -FFDEF(collectgarbage) -FFDEF(newproxy) -FFDEF(print) -FFDEF(coroutine_status) -FFDEF(coroutine_running) -FFDEF(coroutine_create) -FFDEF(coroutine_yield) -FFDEF(coroutine_resume) -FFDEF(coroutine_wrap_aux) -FFDEF(coroutine_wrap) -FFDEF(math_abs) -FFDEF(math_floor) -FFDEF(math_ceil) -FFDEF(math_sqrt) -FFDEF(math_log10) -FFDEF(math_exp) -FFDEF(math_sin) -FFDEF(math_cos) -FFDEF(math_tan) -FFDEF(math_asin) -FFDEF(math_acos) -FFDEF(math_atan) -FFDEF(math_sinh) -FFDEF(math_cosh) -FFDEF(math_tanh) -FFDEF(math_frexp) -FFDEF(math_modf) -FFDEF(math_deg) -FFDEF(math_rad) -FFDEF(math_log) -FFDEF(math_atan2) -FFDEF(math_pow) -FFDEF(math_fmod) -FFDEF(math_ldexp) -FFDEF(math_min) -FFDEF(math_max) -FFDEF(math_random) -FFDEF(math_randomseed) -FFDEF(bit_tobit) -FFDEF(bit_bnot) -FFDEF(bit_bswap) -FFDEF(bit_lshift) -FFDEF(bit_rshift) -FFDEF(bit_arshift) -FFDEF(bit_rol) -FFDEF(bit_ror) -FFDEF(bit_band) -FFDEF(bit_bor) -FFDEF(bit_bxor) -FFDEF(bit_tohex) -FFDEF(string_len) -FFDEF(string_byte) -FFDEF(string_char) -FFDEF(string_sub) -FFDEF(string_rep) -FFDEF(string_reverse) -FFDEF(string_lower) -FFDEF(string_upper) -FFDEF(string_dump) -FFDEF(string_find) -FFDEF(string_match) -FFDEF(string_gmatch_aux) -FFDEF(string_gmatch) -FFDEF(string_gsub) -FFDEF(string_format) -FFDEF(table_foreachi) -FFDEF(table_foreach) -FFDEF(table_getn) -FFDEF(table_maxn) -FFDEF(table_insert) -FFDEF(table_remove) -FFDEF(table_concat) -FFDEF(table_sort) -FFDEF(io_method_close) -FFDEF(io_method_read) -FFDEF(io_method_write) -FFDEF(io_method_flush) -FFDEF(io_method_seek) -FFDEF(io_method_setvbuf) -FFDEF(io_method_lines) -FFDEF(io_method___gc) -FFDEF(io_method___tostring) -FFDEF(io_open) -FFDEF(io_popen) -FFDEF(io_tmpfile) -FFDEF(io_close) -FFDEF(io_read) -FFDEF(io_write) -FFDEF(io_flush) -FFDEF(io_input) -FFDEF(io_output) -FFDEF(io_lines) -FFDEF(io_type) -FFDEF(os_execute) -FFDEF(os_remove) -FFDEF(os_rename) -FFDEF(os_tmpname) -FFDEF(os_getenv) -FFDEF(os_exit) -FFDEF(os_clock) -FFDEF(os_date) -FFDEF(os_time) -FFDEF(os_difftime) -FFDEF(os_setlocale) -FFDEF(debug_getregistry) -FFDEF(debug_getmetatable) -FFDEF(debug_setmetatable) -FFDEF(debug_getfenv) -FFDEF(debug_setfenv) -FFDEF(debug_getinfo) -FFDEF(debug_getlocal) -FFDEF(debug_setlocal) -FFDEF(debug_getupvalue) -FFDEF(debug_setupvalue) -FFDEF(debug_upvalueid) -FFDEF(debug_upvaluejoin) -FFDEF(debug_sethook) -FFDEF(debug_gethook) -FFDEF(debug_debug) -FFDEF(debug_traceback) -FFDEF(jit_on) -FFDEF(jit_off) -FFDEF(jit_flush) -FFDEF(jit_status) -FFDEF(jit_attach) -FFDEF(jit_util_funcinfo) -FFDEF(jit_util_funcbc) -FFDEF(jit_util_funck) -FFDEF(jit_util_funcuvname) -FFDEF(jit_util_traceinfo) -FFDEF(jit_util_traceir) -FFDEF(jit_util_tracek) -FFDEF(jit_util_tracesnap) -FFDEF(jit_util_tracemc) -FFDEF(jit_util_traceexitstub) -FFDEF(jit_util_ircalladdr) -FFDEF(jit_opt_start) -FFDEF(ffi_meta___index) -FFDEF(ffi_meta___newindex) -FFDEF(ffi_meta___eq) -FFDEF(ffi_meta___len) -FFDEF(ffi_meta___lt) -FFDEF(ffi_meta___le) -FFDEF(ffi_meta___concat) -FFDEF(ffi_meta___call) -FFDEF(ffi_meta___add) -FFDEF(ffi_meta___sub) -FFDEF(ffi_meta___mul) -FFDEF(ffi_meta___div) -FFDEF(ffi_meta___mod) -FFDEF(ffi_meta___pow) -FFDEF(ffi_meta___unm) -FFDEF(ffi_meta___tostring) -FFDEF(ffi_meta___pairs) -FFDEF(ffi_meta___ipairs) -FFDEF(ffi_clib___index) -FFDEF(ffi_clib___newindex) -FFDEF(ffi_clib___gc) -FFDEF(ffi_callback_free) -FFDEF(ffi_callback_set) -FFDEF(ffi_cdef) -FFDEF(ffi_new) -FFDEF(ffi_cast) -FFDEF(ffi_typeof) -FFDEF(ffi_istype) -FFDEF(ffi_sizeof) -FFDEF(ffi_alignof) -FFDEF(ffi_offsetof) -FFDEF(ffi_errno) -FFDEF(ffi_string) -FFDEF(ffi_copy) -FFDEF(ffi_fill) -FFDEF(ffi_abi) -FFDEF(ffi_metatype) -FFDEF(ffi_gc) -FFDEF(ffi_load) - -#undef FFDEF - -#ifndef FF_NUM_ASMFUNC -#define FF_NUM_ASMFUNC 62 -#endif - diff --git a/source/libs/luajit/LuaJIT-src/src/_lj_folddef.h_ b/source/libs/luajit/LuaJIT-src/src/_lj_folddef.h_ deleted file mode 100644 index c4ec4efd54e435842f0079e9b5eca89d12f59c28..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/src/_lj_folddef.h_ +++ /dev/null @@ -1,1068 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -static const FoldFunc fold_func[] = { - fold_kfold_numarith, - fold_kfold_ldexp, - fold_kfold_fpmath, - fold_kfold_numpow, - fold_kfold_numcomp, - fold_kfold_intarith, - fold_kfold_intovarith, - fold_kfold_bnot, - fold_kfold_bswap, - fold_kfold_intcomp, - fold_kfold_intcomp0, - fold_kfold_int64arith, - fold_kfold_int64arith2, - fold_kfold_int64shift, - fold_kfold_bnot64, - fold_kfold_bswap64, - fold_kfold_int64comp, - fold_kfold_int64comp0, - fold_kfold_snew_kptr, - fold_kfold_snew_empty, - fold_kfold_strref, - fold_kfold_strref_snew, - fold_kfold_strcmp, - fold_kfold_add_kgc, - fold_kfold_add_kptr, - fold_kfold_add_kright, - fold_kfold_tobit, - fold_kfold_conv_kint_num, - fold_kfold_conv_kintu32_num, - fold_kfold_conv_kint_ext, - fold_kfold_conv_kint_i64, - fold_kfold_conv_kint64_num_i64, - fold_kfold_conv_kint64_num_u64, - fold_kfold_conv_kint64_int_i64, - fold_kfold_conv_knum_int_num, - fold_kfold_conv_knum_u32_num, - fold_kfold_conv_knum_i64_num, - fold_kfold_conv_knum_u64_num, - fold_kfold_tostr_knum, - fold_kfold_tostr_kint, - fold_kfold_strto, - lj_opt_cse, - fold_kfold_kref, - fold_shortcut_round, - fold_shortcut_left, - fold_shortcut_dropleft, - fold_shortcut_leftleft, - fold_simplify_numadd_negx, - fold_simplify_numadd_xneg, - fold_simplify_numsub_k, - fold_simplify_numsub_negk, - fold_simplify_numsub_xneg, - fold_simplify_nummuldiv_k, - fold_simplify_nummuldiv_negk, - fold_simplify_nummuldiv_negneg, - fold_simplify_numpow_xk, - fold_simplify_numpow_kx, - fold_shortcut_conv_num_int, - fold_simplify_conv_int_num, - fold_simplify_conv_i64_num, - fold_simplify_conv_int_i64, - fold_simplify_conv_flt_num, - fold_simplify_tobit_conv, - fold_simplify_floor_conv, - fold_simplify_conv_sext, - fold_simplify_conv_narrow, - fold_cse_conv, - fold_narrow_convert, - fold_simplify_intadd_k, - fold_simplify_intmul_k, - fold_simplify_intsub_k, - fold_simplify_intsub_kleft, - fold_simplify_intadd_k64, - fold_simplify_intsub_k64, - fold_simplify_intmul_k32, - fold_simplify_intmul_k64, - fold_simplify_intmod_k, - fold_simplify_intmod_kleft, - fold_simplify_intsub, - fold_simplify_intsubadd_leftcancel, - fold_simplify_intsubsub_leftcancel, - fold_simplify_intsubsub_rightcancel, - fold_simplify_intsubadd_rightcancel, - fold_simplify_intsubaddadd_cancel, - fold_simplify_band_k, - fold_simplify_bor_k, - fold_simplify_bxor_k, - fold_simplify_shift_ik, - fold_simplify_shift_andk, - fold_simplify_shift1_ki, - fold_simplify_shift2_ki, - fold_simplify_shiftk_andk, - fold_simplify_andk_shiftk, - fold_reassoc_intarith_k, - fold_reassoc_intarith_k64, - fold_reassoc_dup, - fold_reassoc_bxor, - fold_reassoc_shift, - fold_reassoc_minmax_k, - fold_reassoc_minmax_left, - fold_reassoc_minmax_right, - fold_abc_fwd, - fold_abc_k, - fold_abc_invar, - fold_comm_swap, - fold_comm_equal, - fold_comm_comp, - fold_comm_dup, - fold_comm_bxor, - fold_merge_eqne_snew_kgc, - lj_opt_fwd_aload, - fold_kfold_hload_kkptr, - lj_opt_fwd_hload, - lj_opt_fwd_uload, - lj_opt_fwd_tab_len, - fold_cse_uref, - lj_opt_fwd_hrefk, - fold_fwd_href_tnew, - fold_fwd_href_tdup, - fold_fload_tab_tnew_asize, - fold_fload_tab_tnew_hmask, - fold_fload_tab_tdup_asize, - fold_fload_tab_tdup_hmask, - fold_fload_tab_ah, - fold_fload_str_len_kgc, - fold_fload_str_len_snew, - fold_fload_cdata_typeid_kgc, - fold_fload_cdata_int64_kgc, - fold_fload_cdata_typeid_cnew, - fold_fload_cdata_ptr_int64_cnew, - lj_opt_cse, - lj_opt_fwd_fload, - fold_fwd_sload, - fold_xload_kptr, - lj_opt_fwd_xload, - fold_barrier_tab, - fold_barrier_tnew_tdup, - lj_opt_dse_ahstore, - lj_opt_dse_ustore, - lj_opt_dse_fstore, - lj_opt_dse_xstore, - lj_ir_emit -}; - -static const uint32_t fold_hash[916] = { -0xffffffff, -0xffffffff, -0x5b4c8016, -0x0d4e7016, -0xffffffff, -0x1000701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x29110c1a, -0xffffffff, -0xffffffff, -0x5b488016, -0x0d4a7016, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7b87fc07, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x0d467016, -0xffffffff, -0x5a4c73ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5153fc29, -0xffffffff, -0xffffffff, -0xffffffff, -0x5d408016, -0xffffffff, -0x594873ff, -0x8187440f, -0xffffffff, -0xffffffff, -0xffffffff, -0x8287fc0f, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6715ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a11fc1a, -0xffffffff, -0x1daa5a70, -0xffffffff, -0xffffffff, -0x0a0bfc16, -0x5c408c16, -0x6911ffff, -0x8db7ffff, -0xffffffff, -0xffffffff, -0x1caa59d4, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a0dffff, -0x2b68d002, -0xffffffff, -0x3cab5695, -0xffffffff, -0x41aaa675, -0xffffffff, -0xffffffff, -0xffffffff, -0x27ae5800, -0xffffffff, -0x6a09ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7f865c0f, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a05ffff, -0x42abffff, -0x5e44881c, -0x5d50a016, -0x066c5816, -0x00646c1b, -0x75753bff, -0x1951fc18, -0x6264c81b, -0x1850641c, -0xffffffff, -0x6a01ffff, -0x87a7ffff, -0x4953fc1c, -0x8da80000, -0x4f52a3ff, -0x00606c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0x5d428416, -0x88a53800, -0xffffffff, -0xffffffff, -0xffffffff, -0x05645816, -0xffffffff, -0x005c6c1b, -0x20aa71d6, -0xffffffff, -0xffffffff, -0xffffffff, -0x1399fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x157f33ff, -0xffffffff, -0xffffffff, -0x584dfc20, -0xffffffff, -0xffffffff, -0xffffffff, -0x8d9bffff, -0xffffffff, -0x055c5816, -0xffffffff, -0x00546c1b, -0xffffffff, -0xffffffff, -0x5849fc20, -0xffffffff, -0xffffffff, -0xffffffff, -0x8c97ffff, -0x5543fc1c, -0x05585816, -0xffffffff, -0x00506c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8a93ffff, -0x26ae6c00, -0x05545816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x76753c17, -0x41aaa695, -0xffffffff, -0x898fffff, -0xffffffff, -0x05505816, -0xffffffff, -0xffffffff, -0xffffffff, -0x858867ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x848bffff, -0xffffffff, -0x054c5816, -0x79873c06, -0x47525bff, -0xffffffff, -0x3f695401, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8387ffff, -0xffffffff, -0x05485816, -0xffffffff, -0x5a4e5bff, -0xffffffff, -0xffffffff, -0x6264c816, -0x43aaa26e, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x05445816, -0xffffffff, -0x5a4a5bff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3455fc1b, -0x0c5a701c, -0x6366cbff, -0x0e3c7000, -0xffffffff, -0x05405816, -0xffffffff, -0x59465bff, -0xffffffff, -0xffffffff, -0xffffffff, -0x41aaa276, -0x0c56701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x21aa7275, -0x0b52701c, -0x61489016, -0x6465fc33, -0x8d77ffff, -0xffffffff, -0x7b87fc05, -0xffffffff, -0xffffffff, -0x2a126bff, -0x385a6fff, -0xffffffff, -0x446dfc16, -0xffffffff, -0x7473ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7d873000, -0xffffffff, -0x5c409016, -0x686fffff, -0x8187440d, -0xffffffff, -0xffffffff, -0x3554b81b, -0x8287fc0d, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x686bffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8d9ffc00, -0x737a5fff, -0x41aaaa75, -0xffffffff, -0xffffffff, -0x5e40801c, -0x0b42701c, -0x6b67ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2b68d000, -0xffffffff, -0xffffffff, -0x6d133017, -0xffffffff, -0xffffffff, -0x4c59fc16, -0xffffffff, -0xffffffff, -0x110bfc1c, -0x3aab566e, -0xffffffff, -0x5052a7ff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6515fc28, -0x4a55fc16, -0x7f865c0d, -0x88a53c00, -0x41aaa296, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x4451fc16, -0xffffffff, -0x60448bff, -0x21aa7295, -0xffffffff, -0x3cab5676, -0x04106c1b, -0xffffffff, -0x78873807, -0xffffffff, -0xffffffff, -0x574dfc16, -0xffffffff, -0x4e53ffff, -0xffffffff, -0x09145816, -0xffffffff, -0x040c6c1b, -0x8287fc00, -0x5e50a01c, -0x6467fc32, -0xffffffff, -0x5749fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a105816, -0x2e3e7c00, -0x04086c1b, -0x7083fc00, -0xffffffff, -0xffffffff, -0xffffffff, -0x5645fc16, -0xffffffff, -0x22aa6e6e, -0x5e42841c, -0x614e9c16, -0x090c5816, -0x04046c1b, -0x1eaa5ab3, -0xffffffff, -0xffffffff, -0xffffffff, -0x5441fc16, -0x41aaaa95, -0xffffffff, -0x5352a028, -0x09085816, -0x17505c16, -0x04006c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6b43ffff, -0xffffffff, -0x09045816, -0xffffffff, -0x43aaa2ae, -0xffffffff, -0xffffffff, -0xffffffff, -0x083e5800, -0x7c865c00, -0xffffffff, -0x76753c15, -0x3051fc2e, -0x09005816, -0xffffffff, -0xffffffff, -0xffffffff, -0x3aab568e, -0xffffffff, -0x43aaa66e, -0xffffffff, -0x1daa5a71, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a12701c, -0x5f66cfff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3cab5696, -0xffffffff, -0x100e701c, -0x41aaa676, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a125c17, -0x3654b82e, -0x100a701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x1006701c, -0xffffffff, -0x1951fc19, -0xffffffff, -0xffffffff, -0xffffffff, -0x23aa6e8e, -0xffffffff, -0x5b4e8016, -0xffffffff, -0x1eaa5ad3, -0x1002701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x29130c1a, -0xffffffff, -0xffffffff, -0x0d4c7016, -0xffffffff, -0x475273ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5b468016, -0x0d487016, -0x5a4e73ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5d54a816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5a4a73ff, -0x6615fc16, -0x3bab56ae, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x594673ff, -0xffffffff, -0x61468c16, -0x8d17ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a13fc1a, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6913ffff, -0x40abfeb3, -0x8db9ffff, -0x41aaa696, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a0fffff, -0x8db5ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7a873c07, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a0bffff, -0x3f695402, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x24aa6eae, -0xffffffff, -0xffffffff, -0x6a07ffff, -0xffffffff, -0xffffffff, -0x066e5816, -0xffffffff, -0x00666c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a03ffff, -0xffffffff, -0x4b55fc1c, -0x066a5816, -0xffffffff, -0x00626c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x4851fc1c, -0x05665816, -0x18506016, -0x005e6c1b, -0x12986416, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8da1ffff, -0xffffffff, -0x3bab56ce, -0xffffffff, -0x43aaa6ae, -0xffffffff, -0xffffffff, -0x584ffc20, -0x7b87fc06, -0xffffffff, -0x5f4287ff, -0x8d9dffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x00566c1b, -0xffffffff, -0xffffffff, -0x584bfc20, -0x5253fc28, -0xffffffff, -0xffffffff, -0xffffffff, -0x5645fc1c, -0xffffffff, -0x40abfed3, -0x00526c1b, -0x8187440e, -0xffffffff, -0x5847fc20, -0x8287fc0e, -0xffffffff, -0xffffffff, -0x8b95ffff, -0x2e3c7800, -0x5441fc1c, -0xffffffff, -0xffffffff, -0xffffffff, -0x17505c1c, -0xffffffff, -0xffffffff, -0x41aaaa76, -0xffffffff, -0x614c9816, -0x8991ffff, -0x1daa5a6f, -0x05525816, -0x4d585bff, -0xffffffff, -0x8087400c, -0xffffffff, -0xffffffff, -0xffffffff, -0x1baa59d3, -0x828dffff, -0x25aa6ece, -0x054e5816, -0x76753c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0x2b68d001, -0xffffffff, -0xffffffff, -0xffffffff, -0x8689ffff, -0xffffffff, -0x054a5816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x43aca01b, -0x05465816, -0x7f865c0e, -0x5a4c5bff, -0x39ab55d3, -0x01626c16, -0x02686fff, -0x3457fc1b, -0xffffffff, -0xffffffff, -0x0f3e7000, -0x3dab55ae, -0x05425816, -0x1951fc17, -0x59485bff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3153fc1b, -0x0c58701c, -0x5f64cbff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x035a6c16, -0xffffffff, -0xffffffff, -0xffffffff, -0x0b54701c, -0xffffffff, -0x8779ffff, -0x1faa71d5, -0xffffffff, -0xffffffff, -0x2d5eb81b, -0x72b5fc08, -0xffffffff, -0xffffffff, -0xffffffff, -0x0b50701c, -0x456ffc16, -0x7b75ffff, -0xffffffff, -0xffffffff, -0x147e5c16, -0xffffffff, -0xffffffff, -0x2a106bff, -0xffffffff, -0x1eaa5ab4, -0x446bfc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x41aaaa96, -0xffffffff, -0x3556b81b, -0x87a5fc00, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x4e6dffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3252b81b, -0xffffffff, -0x5e54a81c, -0xffffffff, -0xffffffff, -0x0b44701c, -0x28b05c00, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x73785fff, -0xffffffff, -0xffffffff, -0xffffffff, -0x0b40701c, -0xffffffff, -0x6b65ffff, -0xffffffff, -0xffffffff, -0x1daa5a72, -0xffffffff, -0xffffffff, -0xffffffff, -0x6266cc1b, -0xffffffff, -0x375bfc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x3f695400, -0xffffffff, -0xffffffff, -0xffffffff, -0x6d113017, -0x3ead541b, -0xffffffff, -0x5d448816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x18506416, -0xffffffff, -0xffffffff, -0x16b37400, -0xffffffff, -0x4653fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x04126c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x574ffc16, -0xffffffff, -0x6855ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x040e6c1b, -0x41aaa275, -0xffffffff, -0xffffffff, -0xffffffff, -0x574bfc16, -0x6f826400, -0x6851ffff, -0x1eaa5ad4, -0x2a125816, -0xffffffff, -0x040a6c1b, -0x7185fc00, -0xffffffff, -0xffffffff, -0xffffffff, -0x5747fc16, -0x7b87fc04, -0xffffffff, -0xffffffff, -0x090e5816, -0xffffffff, -0x04066c1b, -0x6e81fc00, -0x1aac6c1b, -0x1850601c, -0x2e5cbbff, -0x5543fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x090a5816, -0xffffffff, -0x04026c1b, -0xffffffff, -0xffffffff, -0x8087440c, -0xffffffff, -0xffffffff, -0xffffffff, -0x6c45ffff, -0x8287fc0c, -0x09065816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6b41ffff, -0x3353fc2e, -0x09025816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2f50bbff, -0x073c5800, -0x6266cc16, -0x5f4083ff, -0xffffffff, -0xffffffff, -0x43aca41b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a10701c, -0x6364cfff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7e865c0c, -0xffffffff, -0xffffffff, -0x3656b82e, -0x41aaa295, -0x100c701c, -0x614a9416, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2c5ebc1b, -0xffffffff, -0x2a105c17, -0xffffffff, -0x1008701c, -0x3cab5675, -0xffffffff, -0xffffffff, -0x77873806, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x1004701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff -}; - -#define fold_hashkey(k) (lj_rol(lj_rol((k),17)-(k),16)%915) - diff --git a/source/libs/luajit/LuaJIT-src/src/_lj_libdef.h_ b/source/libs/luajit/LuaJIT-src/src/_lj_libdef.h_ deleted file mode 100644 index 5693a5813912d3c37cd02736b265ad85ca2f1846..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/src/_lj_libdef.h_ +++ /dev/null @@ -1,393 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -#ifdef LJLIB_MODULE_base -#undef LJLIB_MODULE_base -static const lua_CFunction lj_lib_cf_base[] = { - lj_ffh_assert, - lj_ffh_next, - lj_ffh_pairs, - lj_ffh_ipairs_aux, - lj_ffh_ipairs, - lj_ffh_setmetatable, - lj_cf_getfenv, - lj_cf_setfenv, - lj_ffh_rawget, - lj_cf_rawset, - lj_cf_rawequal, - lj_cf_unpack, - lj_cf_select, - lj_ffh_tonumber, - lj_ffh_tostring, - lj_cf_error, - lj_ffh_pcall, - lj_cf_loadfile, - lj_cf_load, - lj_cf_loadstring, - lj_cf_dofile, - lj_cf_gcinfo, - lj_cf_collectgarbage, - lj_cf_newproxy, - lj_cf_print -}; -static const uint8_t lj_lib_init_base[] = { -2,0,28,70,97,115,115,101,114,116,195,110,105,108,199,98,111,111,108,101,97, -110,252,1,200,117,115,101,114,100,97,116,97,198,115,116,114,105,110,103,197, -117,112,118,97,108,198,116,104,114,101,97,100,197,112,114,111,116,111,200,102, -117,110,99,116,105,111,110,197,116,114,97,99,101,197,99,100,97,116,97,197,116, -97,98,108,101,252,9,198,110,117,109,98,101,114,132,116,121,112,101,68,110,101, -120,116,253,69,112,97,105,114,115,64,253,70,105,112,97,105,114,115,140,103, -101,116,109,101,116,97,116,97,98,108,101,76,115,101,116,109,101,116,97,116, -97,98,108,101,7,103,101,116,102,101,110,118,7,115,101,116,102,101,110,118,70, -114,97,119,103,101,116,6,114,97,119,115,101,116,8,114,97,119,101,113,117,97, -108,6,117,110,112,97,99,107,6,115,101,108,101,99,116,72,116,111,110,117,109, -98,101,114,195,110,105,108,197,102,97,108,115,101,196,116,114,117,101,72,116, -111,115,116,114,105,110,103,5,101,114,114,111,114,69,112,99,97,108,108,134, -120,112,99,97,108,108,8,108,111,97,100,102,105,108,101,4,108,111,97,100,10, -108,111,97,100,115,116,114,105,110,103,6,100,111,102,105,108,101,6,103,99,105, -110,102,111,14,99,111,108,108,101,99,116,103,97,114,98,97,103,101,252,2,8,110, -101,119,112,114,111,120,121,200,116,111,115,116,114,105,110,103,5,112,114,105, -110,116,252,3,200,95,86,69,82,83,73,79,78,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_coroutine -#undef LJLIB_MODULE_coroutine -static const lua_CFunction lj_lib_cf_coroutine[] = { - lj_cf_coroutine_status, - lj_cf_coroutine_running, - lj_cf_coroutine_create, - lj_ffh_coroutine_yield, - lj_ffh_coroutine_resume, - lj_cf_coroutine_wrap -}; -static const uint8_t lj_lib_init_coroutine[] = { -30,13,6,6,115,116,97,116,117,115,7,114,117,110,110,105,110,103,6,99,114,101, -97,116,101,69,121,105,101,108,100,70,114,101,115,117,109,101,254,4,119,114, -97,112,255 -}; -#endif - -#ifdef LJLIB_MODULE_math -#undef LJLIB_MODULE_math -static const lua_CFunction lj_lib_cf_math[] = { - lj_ffh_math_abs, - lj_ffh_math_sqrt, - lj_ffh_math_log, - lj_ffh_math_atan2, - lj_ffh_math_ldexp, - lj_ffh_math_min, - lj_cf_math_random, - lj_cf_math_randomseed -}; -static const uint8_t lj_lib_init_math[] = { -37,16,30,67,97,98,115,133,102,108,111,111,114,132,99,101,105,108,68,115,113, -114,116,133,108,111,103,49,48,131,101,120,112,131,115,105,110,131,99,111,115, -131,116,97,110,132,97,115,105,110,132,97,99,111,115,132,97,116,97,110,132,115, -105,110,104,132,99,111,115,104,132,116,97,110,104,133,102,114,101,120,112,132, -109,111,100,102,251,248,193,99,26,220,165,76,64,131,100,101,103,251,57,157, -82,162,70,223,145,63,131,114,97,100,67,108,111,103,69,97,116,97,110,50,131, -112,111,119,132,102,109,111,100,69,108,100,101,120,112,67,109,105,110,131,109, -97,120,251,24,45,68,84,251,33,9,64,194,112,105,250,251,0,0,0,0,0,0,240,127, -196,104,117,103,101,250,252,2,6,114,97,110,100,111,109,252,2,10,114,97,110, -100,111,109,115,101,101,100,255 -}; -#endif - -#ifdef LJLIB_MODULE_bit -#undef LJLIB_MODULE_bit -static const lua_CFunction lj_lib_cf_bit[] = { - lj_ffh_bit_tobit, - lj_ffh_bit_lshift, - lj_ffh_bit_band, - lj_cf_bit_tohex -}; -static const uint8_t lj_lib_init_bit[] = { -65,42,12,69,116,111,98,105,116,132,98,110,111,116,133,98,115,119,97,112,70, -108,115,104,105,102,116,134,114,115,104,105,102,116,135,97,114,115,104,105, -102,116,131,114,111,108,131,114,111,114,68,98,97,110,100,131,98,111,114,132, -98,120,111,114,5,116,111,104,101,120,255 -}; -#endif - -#ifdef LJLIB_MODULE_string -#undef LJLIB_MODULE_string -static const lua_CFunction lj_lib_cf_string[] = { - lj_ffh_string_len, - lj_ffh_string_byte, - lj_ffh_string_char, - lj_ffh_string_sub, - lj_ffh_string_rep, - lj_ffh_string_reverse, - lj_cf_string_dump, - lj_cf_string_find, - lj_cf_string_match, - lj_cf_string_gmatch, - lj_cf_string_gsub, - lj_cf_string_format -}; -static const uint8_t lj_lib_init_string[] = { -77,53,14,67,108,101,110,68,98,121,116,101,68,99,104,97,114,67,115,117,98,67, -114,101,112,71,114,101,118,101,114,115,101,133,108,111,119,101,114,133,117, -112,112,101,114,4,100,117,109,112,4,102,105,110,100,5,109,97,116,99,104,254, -6,103,109,97,116,99,104,4,103,115,117,98,6,102,111,114,109,97,116,255 -}; -#endif - -#ifdef LJLIB_MODULE_table -#undef LJLIB_MODULE_table -static const lua_CFunction lj_lib_cf_table[] = { - lj_cf_table_foreachi, - lj_cf_table_foreach, - lj_ffh_table_getn, - lj_cf_table_maxn, - lj_cf_table_insert, - lj_cf_table_remove, - lj_cf_table_concat, - lj_cf_table_sort -}; -static const uint8_t lj_lib_init_table[] = { -92,61,8,8,102,111,114,101,97,99,104,105,7,102,111,114,101,97,99,104,68,103, -101,116,110,4,109,97,120,110,6,105,110,115,101,114,116,6,114,101,109,111,118, -101,6,99,111,110,99,97,116,4,115,111,114,116,255 -}; -#endif - -#ifdef LJLIB_MODULE_io_method -#undef LJLIB_MODULE_io_method -static const lua_CFunction lj_lib_cf_io_method[] = { - lj_cf_io_method_close, - lj_cf_io_method_read, - lj_cf_io_method_write, - lj_cf_io_method_flush, - lj_cf_io_method_seek, - lj_cf_io_method_setvbuf, - lj_cf_io_method_lines, - lj_cf_io_method___gc, - lj_cf_io_method___tostring -}; -static const uint8_t lj_lib_init_io_method[] = { -100,62,10,5,99,108,111,115,101,4,114,101,97,100,5,119,114,105,116,101,5,102, -108,117,115,104,4,115,101,101,107,7,115,101,116,118,98,117,102,5,108,105,110, -101,115,4,95,95,103,99,10,95,95,116,111,115,116,114,105,110,103,252,1,199,95, -95,105,110,100,101,120,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_io -#undef LJLIB_MODULE_io -static const lua_CFunction lj_lib_cf_io[] = { - lj_cf_io_open, - lj_cf_io_popen, - lj_cf_io_tmpfile, - lj_cf_io_close, - lj_cf_io_read, - lj_cf_io_write, - lj_cf_io_flush, - lj_cf_io_input, - lj_cf_io_output, - lj_cf_io_lines, - lj_cf_io_type -}; -static const uint8_t lj_lib_init_io[] = { -109,62,12,252,2,192,250,4,111,112,101,110,5,112,111,112,101,110,7,116,109,112, -102,105,108,101,5,99,108,111,115,101,4,114,101,97,100,5,119,114,105,116,101, -5,102,108,117,115,104,5,105,110,112,117,116,6,111,117,116,112,117,116,5,108, -105,110,101,115,4,116,121,112,101,255 -}; -#endif - -#ifdef LJLIB_MODULE_os -#undef LJLIB_MODULE_os -static const lua_CFunction lj_lib_cf_os[] = { - lj_cf_os_execute, - lj_cf_os_remove, - lj_cf_os_rename, - lj_cf_os_tmpname, - lj_cf_os_getenv, - lj_cf_os_exit, - lj_cf_os_clock, - lj_cf_os_date, - lj_cf_os_time, - lj_cf_os_difftime, - lj_cf_os_setlocale -}; -static const uint8_t lj_lib_init_os[] = { -120,62,11,7,101,120,101,99,117,116,101,6,114,101,109,111,118,101,6,114,101, -110,97,109,101,7,116,109,112,110,97,109,101,6,103,101,116,101,110,118,4,101, -120,105,116,5,99,108,111,99,107,4,100,97,116,101,4,116,105,109,101,8,100,105, -102,102,116,105,109,101,9,115,101,116,108,111,99,97,108,101,255 -}; -#endif - -#ifdef LJLIB_MODULE_debug -#undef LJLIB_MODULE_debug -static const lua_CFunction lj_lib_cf_debug[] = { - lj_cf_debug_getregistry, - lj_cf_debug_getmetatable, - lj_cf_debug_setmetatable, - lj_cf_debug_getfenv, - lj_cf_debug_setfenv, - lj_cf_debug_getinfo, - lj_cf_debug_getlocal, - lj_cf_debug_setlocal, - lj_cf_debug_getupvalue, - lj_cf_debug_setupvalue, - lj_cf_debug_upvalueid, - lj_cf_debug_upvaluejoin, - lj_cf_debug_sethook, - lj_cf_debug_gethook, - lj_cf_debug_debug, - lj_cf_debug_traceback -}; -static const uint8_t lj_lib_init_debug[] = { -131,62,16,11,103,101,116,114,101,103,105,115,116,114,121,12,103,101,116,109, -101,116,97,116,97,98,108,101,12,115,101,116,109,101,116,97,116,97,98,108,101, -7,103,101,116,102,101,110,118,7,115,101,116,102,101,110,118,7,103,101,116,105, -110,102,111,8,103,101,116,108,111,99,97,108,8,115,101,116,108,111,99,97,108, -10,103,101,116,117,112,118,97,108,117,101,10,115,101,116,117,112,118,97,108, -117,101,9,117,112,118,97,108,117,101,105,100,11,117,112,118,97,108,117,101, -106,111,105,110,7,115,101,116,104,111,111,107,7,103,101,116,104,111,111,107, -5,100,101,98,117,103,9,116,114,97,99,101,98,97,99,107,255 -}; -#endif - -#ifdef LJLIB_MODULE_jit -#undef LJLIB_MODULE_jit -static const lua_CFunction lj_lib_cf_jit[] = { - lj_cf_jit_on, - lj_cf_jit_off, - lj_cf_jit_flush, - lj_cf_jit_status, - lj_cf_jit_attach -}; -static const uint8_t lj_lib_init_jit[] = { -147,62,9,2,111,110,3,111,102,102,5,102,108,117,115,104,6,115,116,97,116,117, -115,6,97,116,116,97,99,104,252,5,194,111,115,250,252,4,196,97,114,99,104,250, -252,3,203,118,101,114,115,105,111,110,95,110,117,109,250,252,2,199,118,101, -114,115,105,111,110,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_jit_util -#undef LJLIB_MODULE_jit_util -static const lua_CFunction lj_lib_cf_jit_util[] = { - lj_cf_jit_util_funcinfo, - lj_cf_jit_util_funcbc, - lj_cf_jit_util_funck, - lj_cf_jit_util_funcuvname, - lj_cf_jit_util_traceinfo, - lj_cf_jit_util_traceir, - lj_cf_jit_util_tracek, - lj_cf_jit_util_tracesnap, - lj_cf_jit_util_tracemc, - lj_cf_jit_util_traceexitstub, - lj_cf_jit_util_ircalladdr -}; -static const uint8_t lj_lib_init_jit_util[] = { -152,62,11,8,102,117,110,99,105,110,102,111,6,102,117,110,99,98,99,5,102,117, -110,99,107,10,102,117,110,99,117,118,110,97,109,101,9,116,114,97,99,101,105, -110,102,111,7,116,114,97,99,101,105,114,6,116,114,97,99,101,107,9,116,114,97, -99,101,115,110,97,112,7,116,114,97,99,101,109,99,13,116,114,97,99,101,101,120, -105,116,115,116,117,98,10,105,114,99,97,108,108,97,100,100,114,255 -}; -#endif - -#ifdef LJLIB_MODULE_jit_opt -#undef LJLIB_MODULE_jit_opt -static const lua_CFunction lj_lib_cf_jit_opt[] = { - lj_cf_jit_opt_start -}; -static const uint8_t lj_lib_init_jit_opt[] = { -163,62,1,5,115,116,97,114,116,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi_meta -#undef LJLIB_MODULE_ffi_meta -static const lua_CFunction lj_lib_cf_ffi_meta[] = { - lj_cf_ffi_meta___index, - lj_cf_ffi_meta___newindex, - lj_cf_ffi_meta___eq, - lj_cf_ffi_meta___len, - lj_cf_ffi_meta___lt, - lj_cf_ffi_meta___le, - lj_cf_ffi_meta___concat, - lj_cf_ffi_meta___call, - lj_cf_ffi_meta___add, - lj_cf_ffi_meta___sub, - lj_cf_ffi_meta___mul, - lj_cf_ffi_meta___div, - lj_cf_ffi_meta___mod, - lj_cf_ffi_meta___pow, - lj_cf_ffi_meta___unm, - lj_cf_ffi_meta___tostring, - lj_cf_ffi_meta___pairs, - lj_cf_ffi_meta___ipairs -}; -static const uint8_t lj_lib_init_ffi_meta[] = { -164,62,19,7,95,95,105,110,100,101,120,10,95,95,110,101,119,105,110,100,101, -120,4,95,95,101,113,5,95,95,108,101,110,4,95,95,108,116,4,95,95,108,101,8,95, -95,99,111,110,99,97,116,6,95,95,99,97,108,108,5,95,95,97,100,100,5,95,95,115, -117,98,5,95,95,109,117,108,5,95,95,100,105,118,5,95,95,109,111,100,5,95,95, -112,111,119,5,95,95,117,110,109,10,95,95,116,111,115,116,114,105,110,103,7, -95,95,112,97,105,114,115,8,95,95,105,112,97,105,114,115,195,102,102,105,203, -95,95,109,101,116,97,116,97,98,108,101,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi_clib -#undef LJLIB_MODULE_ffi_clib -static const lua_CFunction lj_lib_cf_ffi_clib[] = { - lj_cf_ffi_clib___index, - lj_cf_ffi_clib___newindex, - lj_cf_ffi_clib___gc -}; -static const uint8_t lj_lib_init_ffi_clib[] = { -182,62,3,7,95,95,105,110,100,101,120,10,95,95,110,101,119,105,110,100,101,120, -4,95,95,103,99,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi_callback -#undef LJLIB_MODULE_ffi_callback -static const lua_CFunction lj_lib_cf_ffi_callback[] = { - lj_cf_ffi_callback_free, - lj_cf_ffi_callback_set -}; -static const uint8_t lj_lib_init_ffi_callback[] = { -185,62,3,4,102,114,101,101,3,115,101,116,252,1,199,95,95,105,110,100,101,120, -250,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi -#undef LJLIB_MODULE_ffi -static const lua_CFunction lj_lib_cf_ffi[] = { - lj_cf_ffi_cdef, - lj_cf_ffi_new, - lj_cf_ffi_cast, - lj_cf_ffi_typeof, - lj_cf_ffi_istype, - lj_cf_ffi_sizeof, - lj_cf_ffi_alignof, - lj_cf_ffi_offsetof, - lj_cf_ffi_errno, - lj_cf_ffi_string, - lj_cf_ffi_copy, - lj_cf_ffi_fill, - lj_cf_ffi_abi, - lj_cf_ffi_metatype, - lj_cf_ffi_gc, - lj_cf_ffi_load -}; -static const uint8_t lj_lib_init_ffi[] = { -187,62,22,4,99,100,101,102,3,110,101,119,4,99,97,115,116,6,116,121,112,101, -111,102,6,105,115,116,121,112,101,6,115,105,122,101,111,102,7,97,108,105,103, -110,111,102,8,111,102,102,115,101,116,111,102,5,101,114,114,110,111,6,115,116, -114,105,110,103,4,99,111,112,121,4,102,105,108,108,3,97,98,105,252,8,192,250, -8,109,101,116,97,116,121,112,101,252,7,192,250,2,103,99,252,5,192,250,4,108, -111,97,100,252,4,193,67,250,252,3,194,111,115,250,252,2,196,97,114,99,104,250, -255 -}; -#endif - diff --git a/source/libs/luajit/LuaJIT-src/src/_lj_recdef.h_ b/source/libs/luajit/LuaJIT-src/src/_lj_recdef.h_ deleted file mode 100644 index 2c3c9be63caa521873e612659d28e1853274fbae..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/src/_lj_recdef.h_ +++ /dev/null @@ -1,263 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -static const uint16_t recff_idmap[] = { -0, -0x0100, -0x0200, -0x0300, -0, -0, -0x0400, -0x0500, -0x0600, -0x0700, -0, -0, -0x0800, -0x0900, -0x0a00, -0, -0x0b00, -0x0c00, -0x0d00, -0, -0x0e00, -0x0f00, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x1000, -0x1100+(IRFPM_FLOOR), -0x1100+(IRFPM_CEIL), -0x1200+(IRFPM_SQRT), -0x1200+(IRFPM_LOG10), -0x1200+(IRFPM_EXP), -0x1200+(IRFPM_SIN), -0x1200+(IRFPM_COS), -0x1200+(IRFPM_TAN), -0x1300+(FF_math_asin), -0x1300+(FF_math_acos), -0x1300+(FF_math_atan), -0x1400+(IRCALL_sinh), -0x1400+(IRCALL_cosh), -0x1400+(IRCALL_tanh), -0, -0x1500, -0x1600, -0x1600, -0x1700, -0x1800, -0x1900, -0, -0x1a00, -0x1b00+(IR_MIN), -0x1b00+(IR_MAX), -0x1c00, -0, -0x1d00+(IR_TOBIT), -0x1d00+(IR_BNOT), -0x1d00+(IR_BSWAP), -0x1e00+(IR_BSHL), -0x1e00+(IR_BSHR), -0x1e00+(IR_BSAR), -0x1e00+(IR_BROL), -0x1e00+(IR_BROR), -0x1f00+(IR_BAND), -0x1f00+(IR_BOR), -0x1f00+(IR_BXOR), -0, -0x2000, -0x2100+(0), -0, -0x2100+(1), -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x2200, -0, -0x2300, -0x2400, -0, -0, -0, -0, -0x2500+(0), -0x2600+(0), -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x2500+(GCROOT_IO_OUTPUT), -0x2600+(GCROOT_IO_OUTPUT), -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x2700+(0), -0x2700+(1), -0x2800+(MM_eq), -0x2800+(MM_len), -0x2800+(MM_lt), -0x2800+(MM_le), -0x2800+(MM_concat), -0x2900, -0x2800+(MM_add), -0x2800+(MM_sub), -0x2800+(MM_mul), -0x2800+(MM_div), -0x2800+(MM_mod), -0x2800+(MM_pow), -0x2800+(MM_unm), -0, -0, -0, -0x2a00+(1), -0x2a00+(0), -0, -0, -0, -0, -0x2b00, -0x2b00, -0x2c00, -0x2d00, -0x2e00+(FF_ffi_sizeof), -0x2e00+(FF_ffi_alignof), -0x2e00+(FF_ffi_offsetof), -0x2f00, -0x3000, -0x3100, -0x3200, -0x3300, -0, -0x3400 -}; - -static const RecordFunc recff_func[] = { -recff_nyi, -recff_c, -recff_assert, -recff_type, -recff_ipairs_aux, -recff_ipairs, -recff_getmetatable, -recff_setmetatable, -recff_rawget, -recff_rawset, -recff_rawequal, -recff_select, -recff_tonumber, -recff_tostring, -recff_pcall, -recff_xpcall, -recff_math_abs, -recff_math_round, -recff_math_unary, -recff_math_atrig, -recff_math_htrig, -recff_math_modf, -recff_math_degrad, -recff_math_log, -recff_math_atan2, -recff_math_pow, -recff_math_ldexp, -recff_math_minmax, -recff_math_random, -recff_bit_unary, -recff_bit_shift, -recff_bit_nary, -recff_string_len, -recff_string_range, -recff_table_getn, -recff_table_insert, -recff_table_remove, -recff_io_write, -recff_io_flush, -recff_cdata_index, -recff_cdata_arith, -recff_cdata_call, -recff_clib_index, -recff_ffi_new, -recff_ffi_typeof, -recff_ffi_istype, -recff_ffi_xof, -recff_ffi_errno, -recff_ffi_string, -recff_ffi_copy, -recff_ffi_fill, -recff_ffi_abi, -recff_ffi_gc -}; - diff --git a/source/libs/luajit/LuaJIT-src/src/host/.gitignore b/source/libs/luajit/LuaJIT-src/src/host/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..762ac2a0c6e67d5dd75702a5473beeb978f03ef0 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/host/.gitignore @@ -0,0 +1,3 @@ +minilua +buildvm +buildvm_arch.h diff --git a/source/libs/luajit/LuaJIT-src/src/host/buildvm.c b/source/libs/luajit/LuaJIT-src/src/host/buildvm.c index 98a7a57a40cab92f097167cedc9fea65178123cc..24db75f40ba0ef452c95bb6660d97bf1c4b1b2bd 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/buildvm.c +++ b/source/libs/luajit/LuaJIT-src/src/host/buildvm.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** This is a tool to build the hand-tuned assembler code required for ** LuaJIT's bytecode interpreter. It supports a variety of output formats @@ -18,8 +18,10 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_bc.h" +#if LJ_HASJIT #include "lj_ir.h" #include "lj_ircall.h" +#endif #include "lj_frame.h" #include "lj_dispatch.h" #if LJ_HASFFI @@ -113,7 +115,7 @@ static const char *sym_decorate(BuildCtx *ctx, name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ else *p = '\0'; -#elif LJ_TARGET_PPC && !LJ_TARGET_OSX && !LJ_TARGET_CONSOLE +#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE /* Keep @plt etc. */ #else *p = '\0'; @@ -250,6 +252,7 @@ BCDEF(BCNAME) NULL }; +#if LJ_HASJIT const char *const ir_names[] = { #define IRNAME(name, m, m1, m2) #name, IRDEF(IRNAME) @@ -290,7 +293,9 @@ static const char *const trace_errors[] = { #include "lj_traceerr.h" NULL }; +#endif +#if LJ_HASJIT static const char *lower(char *buf, const char *s) { char *p = buf; @@ -301,6 +306,7 @@ static const char *lower(char *buf, const char *s) *p = '\0'; return buf; } +#endif /* Emit C source code for bytecode-related definitions. */ static void emit_bcdef(BuildCtx *ctx) @@ -318,15 +324,19 @@ static void emit_bcdef(BuildCtx *ctx) /* Emit VM definitions as Lua code for debug modules. */ static void emit_vmdef(BuildCtx *ctx) { +#if LJ_HASJIT char buf[80]; +#endif int i; fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); + fprintf(ctx->fp, "assert(require(\"jit\").version == \"%s\", \"LuaJIT core/library version mismatch\")\n\n", LUAJIT_VERSION); fprintf(ctx->fp, "return {\n\n"); fprintf(ctx->fp, "bcnames = \""); for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); fprintf(ctx->fp, "\",\n\n"); +#if LJ_HASJIT fprintf(ctx->fp, "irnames = \""); for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); fprintf(ctx->fp, "\",\n\n"); @@ -355,6 +365,7 @@ static void emit_vmdef(BuildCtx *ctx) for (i = 0; trace_errors[i]; i++) fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); fprintf(ctx->fp, "},\n\n"); +#endif } /* -- Argument parsing ---------------------------------------------------- */ diff --git a/source/libs/luajit/LuaJIT-src/src/host/buildvm.h b/source/libs/luajit/LuaJIT-src/src/host/buildvm.h index b90428dc48f5e82a33eaaa96c642291546877d73..a56eefd204a89076272b578123b38d8f43ab2e82 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/buildvm.h +++ b/source/libs/luajit/LuaJIT-src/src/host/buildvm.h @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _BUILDVM_H diff --git a/source/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c b/source/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c index ffd14903c64c3326c60597315330094b223e99e9..3870b8fe5e504fd833e4951bc6d11a71eb9772ef 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c +++ b/source/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: Assembler source code emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" @@ -144,14 +144,6 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); } else if ((ins >> 26) == 18) { -#if LJ_ARCH_PPC64 - const char *suffix = strchr(sym, '@'); - if (suffix && suffix[1] == 'h') { - fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym); - } else if (suffix && suffix[1] == 'l') { - fprintf(ctx->fp, "\tld 12, %s\n", sym); - } else -#endif fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym); } else { fprintf(stderr, @@ -250,10 +242,13 @@ void emit_asm(BuildCtx *ctx) int i, rel; fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); -#if LJ_ARCH_PPC64 - fprintf(ctx->fp, "\t.abiversion 2\n"); -#endif fprintf(ctx->fp, "\t.text\n"); +#if LJ_TARGET_MIPS32 && !LJ_ABI_SOFTFP + fprintf(ctx->fp, "\t.module fp=32\n"); +#endif +#if LJ_TARGET_MIPS + fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); +#endif emit_asm_align(ctx, 4); #if LJ_TARGET_PS3 @@ -279,9 +274,6 @@ void emit_asm(BuildCtx *ctx) ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" ".pad #28\n"); #endif -#endif -#if LJ_TARGET_MIPS - fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); #endif for (i = rel = 0; i < ctx->nsym; i++) { @@ -338,7 +330,7 @@ void emit_asm(BuildCtx *ctx) #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); #endif -#if LJ_TARGET_PPC && !LJ_TARGET_PS3 +#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP /* Hard-float ABI. */ fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); #endif @@ -347,6 +339,10 @@ void emit_asm(BuildCtx *ctx) fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident); break; case BUILD_machasm: +#if defined(__apple_build_version__) && __apple_build_version__ >= 15000000 && __apple_build_version__ < 15000300 + /* Workaround for XCode 15.0 - 15.2. */ + fprintf(ctx->fp, "\t.subsections_via_symbols\n"); +#endif fprintf(ctx->fp, "\t.cstring\n" "\t.ascii \"%s\\0\"\n", ctx->dasm_ident); diff --git a/source/libs/luajit/LuaJIT-src/src/host/buildvm_fold.c b/source/libs/luajit/LuaJIT-src/src/host/buildvm_fold.c index d579f4d416a8f6f039d66b8e0f699de24f1aa35d..7017ab81c0dfff275dbd3e7dc42ffee8273c9deb 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/buildvm_fold.c +++ b/source/libs/luajit/LuaJIT-src/src/host/buildvm_fold.c @@ -1,10 +1,11 @@ /* ** LuaJIT VM builder: IR folding hash table generator. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" #include "lj_obj.h" +#if LJ_HASJIT #include "lj_ir.h" /* Context for the folding hash table generator. */ @@ -226,4 +227,10 @@ void emit_fold(BuildCtx *ctx) makehash(ctx); } +#else +void emit_fold(BuildCtx *ctx) +{ + UNUSED(ctx); +} +#endif diff --git a/source/libs/luajit/LuaJIT-src/src/host/buildvm_lib.c b/source/libs/luajit/LuaJIT-src/src/host/buildvm_lib.c index 2956fdb6cd04681e3dc53ef12292aa32d35fe764..478847e142e05530aa4780b150be1f5a321c2e68 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/buildvm_lib.c +++ b/source/libs/luajit/LuaJIT-src/src/host/buildvm_lib.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: library definition compiler. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "buildvm.h" @@ -379,12 +379,21 @@ void emit_lib(BuildCtx *ctx) /* Simplistic pre-processor. Only handles top-level #if/#endif. */ if (buf[0] == '#' && buf[1] == 'i' && buf[2] == 'f') { int ok = 1; - if (!strcmp(buf, "#if LJ_52\n")) + size_t len = strlen(buf); + if (buf[len-1] == '\n') { + buf[len-1] = 0; + if (buf[len-2] == '\r') { + buf[len-2] = 0; + } + } + if (!strcmp(buf, "#if LJ_52")) ok = LJ_52; - else if (!strcmp(buf, "#if LJ_HASJIT\n")) + else if (!strcmp(buf, "#if LJ_HASJIT")) ok = LJ_HASJIT; - else if (!strcmp(buf, "#if LJ_HASFFI\n")) + else if (!strcmp(buf, "#if LJ_HASFFI")) ok = LJ_HASFFI; + else if (!strcmp(buf, "#if LJ_HASBUFFER")) + ok = LJ_HASBUFFER; if (!ok) { int lvl = 1; while (fgets(buf, sizeof(buf), fp) != NULL) { diff --git a/source/libs/luajit/LuaJIT-src/src/host/buildvm_libbc.h b/source/libs/luajit/LuaJIT-src/src/host/buildvm_libbc.h index b2600bd5905e5e2ab4f8708c9f8440b94d03e03f..276463b25bb5e22afa14ff5de43cd7e042700dde 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/buildvm_libbc.h +++ b/source/libs/luajit/LuaJIT-src/src/host/buildvm_libbc.h @@ -4,42 +4,67 @@ static const int libbc_endian = 0; static const uint8_t libbc_code[] = { #if LJ_FR2 -0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, -0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, -16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, -0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1, -128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2, -0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7, -0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, -0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, -8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, -0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, -0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, -3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, -0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, -41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, -18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, -6,252,127,76,4,2,0,0 +/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3, +220,203,178,130,4, +/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20, +198,190,199,252,3, +/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0, +BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0, +BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128, +BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0, +/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI, +2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0, +BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0, +BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15, +/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0, +0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0, +BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0, +BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0, +BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7, +BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0, +BC_RET1,3,2,0,BC_RET0,0,1,0,0,2, +/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE, +2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE, +4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4, +128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0, +BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0, +BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0, +BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR, +11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0, #else -0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, -0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, -16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, -0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1, -128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2, -0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0, -0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, -0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, -8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, -0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, -0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, -3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, -0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, -41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, -18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, -6,252,127,76,4,2,0,0 +/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3, +220,203,178,130,4, +/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20, +198,190,199,252,3, +/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0, +BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0, +BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128, +BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0, +/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI, +2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0, +BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0, +BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15, +/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0, +0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0, +BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0, +BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0, +BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7, +BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0, +BC_RET1,3,2,0,BC_RET0,0,1,0,0,2, +/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE, +2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE, +4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4, +128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0, +BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0, +BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0, +BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR, +11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0, #endif +0 }; static const struct { const char *name; int ofs; } libbc_map[] = { @@ -48,9 +73,9 @@ static const struct { const char *name; int ofs; } libbc_map[] = { {"string_len",50}, {"table_foreachi",69}, {"table_foreach",136}, -{"table_getn",207}, -{"table_remove",226}, -{"table_move",355}, -{NULL,502} +{"table_getn",213}, +{"table_remove",232}, +{"table_move",361}, +{NULL,508} }; diff --git a/source/libs/luajit/LuaJIT-src/src/host/buildvm_peobj.c b/source/libs/luajit/LuaJIT-src/src/host/buildvm_peobj.c index 2eb2bb7bb334fe2d95d17f05ef601aed7b65ab6a..b662f60fc9345a7ee755a487a84388c44236a0a5 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/buildvm_peobj.c +++ b/source/libs/luajit/LuaJIT-src/src/host/buildvm_peobj.c @@ -1,6 +1,6 @@ /* ** LuaJIT VM builder: PE object emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Only used for building on Windows, since we cannot assume the presence ** of a suitable assembler. The host and target byte order must match. @@ -9,7 +9,7 @@ #include "buildvm.h" #include "lj_bc.h" -#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC +#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN /* Context for PE object emitter. */ static char *strtab; @@ -93,12 +93,17 @@ typedef struct PEsymaux { #define PEOBJ_RELOC_ADDR32NB 0x03 #define PEOBJ_RELOC_OFS 0 #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ -#elif LJ_TARGET_PPC -#define PEOBJ_ARCH_TARGET 0x01f2 -#define PEOBJ_RELOC_REL32 0x06 -#define PEOBJ_RELOC_DIR32 0x02 +#define PEOBJ_PDATA_NRELOC 6 +#define PEOBJ_XDATA_SIZE (8*2+4+6*2) +#elif LJ_TARGET_ARM64 +#define PEOBJ_ARCH_TARGET 0xaa64 +#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */ +#define PEOBJ_RELOC_DIR32 0x01 +#define PEOBJ_RELOC_ADDR32NB 0x02 #define PEOBJ_RELOC_OFS (-4) -#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */ +#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ +#define PEOBJ_PDATA_NRELOC 4 +#define PEOBJ_XDATA_SIZE (4+24+4 +4+8) #endif /* Section numbers (0-based). */ @@ -106,7 +111,7 @@ enum { PEOBJ_SECT_ABS = -2, PEOBJ_SECT_UNDEF = -1, PEOBJ_SECT_TEXT, -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC PEOBJ_SECT_PDATA, PEOBJ_SECT_XDATA, #elif LJ_TARGET_X86 @@ -181,6 +186,9 @@ void emit_peobj(BuildCtx *ctx) uint32_t sofs; int i, nrsym; union { uint8_t b; uint32_t u; } host_endian; +#ifdef PEOBJ_PDATA_NRELOC + uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; +#endif sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); @@ -194,18 +202,18 @@ void emit_peobj(BuildCtx *ctx) /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); pesect[PEOBJ_SECT_PDATA].ofs = sofs; - sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4); + sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4); pesect[PEOBJ_SECT_PDATA].relocofs = sofs; - sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE; + sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); pesect[PEOBJ_SECT_XDATA].ofs = sofs; - sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */ + sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */ pesect[PEOBJ_SECT_XDATA].relocofs = sofs; sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ @@ -240,7 +248,7 @@ void emit_peobj(BuildCtx *ctx) */ nrsym = ctx->nrelocsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ #endif @@ -251,15 +259,8 @@ void emit_peobj(BuildCtx *ctx) /* Write .text section. */ host_endian.u = 1; if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { -#if LJ_TARGET_PPC - uint32_t *p = (uint32_t *)ctx->code; - int n = (int)(ctx->codesz >> 2); - for (i = 0; i < n; i++, p++) - *p = lj_bswap(*p); /* Byteswap .text section. */ -#else fprintf(stderr, "Error: different byte order for host and target\n"); exit(1); -#endif } owrite(ctx, ctx->code, ctx->codesz); for (i = 0; i < ctx->nreloc; i++) { @@ -272,7 +273,6 @@ void emit_peobj(BuildCtx *ctx) #if LJ_TARGET_X64 { /* Write .pdata section. */ - uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ PEreloc reloc; pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; @@ -321,6 +321,87 @@ void emit_peobj(BuildCtx *ctx) reloc.type = PEOBJ_RELOC_ADDR32NB; owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); } +#elif LJ_TARGET_ARM64 + /* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */ + { /* Write .pdata section. */ + uint32_t pdata[4]; + PEreloc reloc; + pdata[0] = 0; + pdata[1] = 0; + pdata[2] = fcofs; + pdata[3] = 4+24+4; + owrite(ctx, &pdata, sizeof(pdata)); + /* Start of .text and start of .xdata. */ + reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + /* Start of vm_ffi_call and start of second part of .xdata. */ + reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } + { /* Write .xdata section. */ + uint32_t u32; + uint8_t *p, uwc[24]; + PEreloc reloc; + +#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2) +#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */ +#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */ +#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3)) +#define CSAVE_REGS(r1,r2,o1) do { \ + int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \ +} while (0) +#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3)) +#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3)) +#define CSAVE_FREGS(r1,r2,o1) do { \ + int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \ +} while (0) +#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */ +#define CODE_NOP 0xe3 +#define CODE_END 0xe4 +#define CEND_ALIGN do { \ + *p++ = CODE_END; \ + while ((p - uwc) & 3) *p++ = CODE_NOP; \ +} while (0) + + /* Unwind codes for .text section with handler. */ + p = uwc; + CADD_FP(192); /* +2 */ + CSAVE_REGS(19, 28, 176); /* +5*2 */ + CSAVE_FREGS(8, 15, 96); /* +4*2 */ + CSAVE_FPLR(192); /* +1 */ + CALLOC_S(208); /* +1 */ + CEND_ALIGN; /* +1 +1 -> 24 */ + + u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2); + owrite(ctx, &u32, 4); + owrite(ctx, &uwc, 24); + + u32 = 0; /* Handler RVA to be relocated at 4 + 24. */ + owrite(ctx, &u32, 4); + + /* Unwind codes for vm_ffi_call without handler. */ + p = uwc; + CADD_FP(16); /* +2 */ + CSAVE_FPLR(16); /* +1 */ + CSAVE_REGPX(19, -32); /* +2 */ + CEND_ALIGN; /* +1 +2 -> 8 */ + + u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2); + owrite(ctx, &u32, 4); + owrite(ctx, &uwc, 8); + + reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } #elif LJ_TARGET_X86 /* Write .sxdata section. */ for (i = 0; i < nrsym; i++) { @@ -352,7 +433,7 @@ void emit_peobj(BuildCtx *ctx) emit_peobj_sym(ctx, ctx->relocsym[i], 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); emit_peobj_sym(ctx, "lj_err_unwind_win", 0, diff --git a/source/libs/luajit/LuaJIT-src/src/host/genlibbc.lua b/source/libs/luajit/LuaJIT-src/src/host/genlibbc.lua index 6f5a05cce5169e8d9ccd912369304c4c9608a609..1bc119bf5d7bdb3c2942089be03d7c33373aa3fc 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/genlibbc.lua +++ b/source/libs/luajit/LuaJIT-src/src/host/genlibbc.lua @@ -2,7 +2,7 @@ -- Lua script to dump the bytecode of the library functions written in Lua. -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. ---------------------------------------------------------------------------- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- @@ -55,7 +55,7 @@ local function transform_lua(code) end) code = string.gsub(code, "PAIRS%((.-)%)", function(var) fixup.PAIRS = true - return format("nil, %s, 0", var) + return format("nil, %s, 0x4dp80", var) end) return "return "..code, fixup end @@ -79,9 +79,11 @@ local name2itype = { str = 5, func = 9, tab = 12, int = 14, num = 15 } -local BC = {} +local BC, BCN = {}, {} for i=0,#bcnames/6-1 do - BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i + local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "") + BC[name] = i + BCN[i] = name end local xop, xra = isbe and 3 or 0, isbe and 2 or 1 local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3 @@ -96,6 +98,7 @@ local function fixup_dump(dump, fixup) p = read_uleb128(p) p = read_uleb128(p) p, sizebc = read_uleb128(p) + local startbc = tonumber(p - start) local rawtab = {} for i=0,sizebc-1 do local op = p[xop] @@ -129,46 +132,79 @@ local function fixup_dump(dump, fixup) end p = p + 4 end - return ffi.string(start, n) + local ndump = ffi.string(start, n) + -- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX. + ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f") + return { dump = ndump, startbc = startbc, sizebc = sizebc } end -local function find_defs(src) +local function find_defs(src, mode) local defs = {} for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do - local env = {} local tcode, fixup = transform_lua(code) - local func = assert(load(tcode, "", nil, env))() - defs[name] = fixup_dump(string.dump(func, true), fixup) + local func = assert(load(tcode, "", mode)) + defs[name] = fixup_dump(string.dump(func, mode), fixup) defs[#defs+1] = name end return defs end -local function gen_header(defs) +local function gen_header(defs32, defs64) local t = {} local function w(x) t[#t+1] = x end w("/* This is a generated file. DO NOT EDIT! */\n\n") w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") - local s = "" - for _,name in ipairs(defs) do - s = s .. defs[name] - end - w("static const uint8_t libbc_code[] = {\n") - local n = 0 - for i=1,#s do - local x = string.byte(s, i) - w(x); w(",") - n = n + (x < 10 and 2 or (x < 100 and 3 or 4)) - if n >= 75 then n = 0; w("\n") end + for j,defs in ipairs{defs64, defs32} do + local s, sb = "", "" + for i,name in ipairs(defs) do + local d = defs[name] + s = s .. d.dump + sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1) + .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc) + .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4) + end + if j == 1 then + w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n") + else + w("\n#else\n") + end + local n = 0 + for i=1,#s do + local x = string.byte(s, i) + local xb = string.byte(sb, i) + if xb == 255 then + local name = BCN[x] + local m = #name + 4 + if n + m > 78 then n = 0; w("\n") end + n = n + m + w("BC_"); w(name) + else + local m = x < 10 and 2 or (x < 100 and 3 or 4) + if xb == 0 then + if n + m > 78 then n = 0; w("\n") end + else + local name = defs[xb]:gsub("_", ".") + if n ~= 0 then w("\n") end + w("/* "); w(name); w(" */ ") + n = #name + 7 + end + n = n + m + w(x) + end + w(",") + end end - w("0\n};\n\n") + w("\n#endif\n0\n};\n\n") w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") - local m = 0 - for _,name in ipairs(defs) do - w('{"'); w(name); w('",'); w(m) w('},\n') - m = m + #defs[name] + local m32, m64 = 0, 0 + for i,name in ipairs(defs32) do + assert(name == defs64[i]) + w('{"'); w(name); w('",'); w(m32) w('},\n') + m32 = m32 + #defs32[name].dump + m64 = m64 + #defs64[name].dump + assert(m32 == m64) end - w("{NULL,"); w(m); w("}\n};\n\n") + w("{NULL,"); w(m32); w("}\n};\n\n") return table.concat(t) end @@ -191,7 +227,8 @@ end local outfile = parse_arg(arg) local src = read_files(arg) -local defs = find_defs(src) -local hdr = gen_header(defs) +local defs32 = find_defs(src, "Wdts") +local defs64 = find_defs(src, "Xdts") +local hdr = gen_header(defs32, defs64) write_file(outfile, hdr) diff --git a/source/libs/luajit/LuaJIT-src/src/host/genminilua.lua b/source/libs/luajit/LuaJIT-src/src/host/genminilua.lua index 50feff014d046b29e61e73e3344e90d377290186..11167f9a947d7bf32193d4d914f0199f6f98850f 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/genminilua.lua +++ b/source/libs/luajit/LuaJIT-src/src/host/genminilua.lua @@ -2,7 +2,7 @@ -- Lua script to generate a customized, minified version of Lua. -- The resulting 'minilua' is used for the build process of LuaJIT. ---------------------------------------------------------------------------- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- @@ -327,6 +327,12 @@ local function rename_tokens2(src) return gsub(src, "ZY([%w_]+)", "union %1") end +local function fix_bugs_and_warnings(src) + src = gsub(src, "(luaD_checkstack%(L,p%->maxstacksize)%)", "%1+p->numparams)") + src = gsub(src, "if%(sep==%-1%)(return'%[';)\nelse (luaX_lexerror%b();)", "if (sep!=-1)%2\n%1") + return gsub(src, "(default:{\nNode%*n=mainposition)", "/*fallthrough*/\n%1") +end + local function func_gather(src) local nodes, list = {}, {} local pos, len = 1, #src @@ -425,5 +431,6 @@ src = rename_tokens1(src) src = func_collect(src) src = rename_tokens2(src) src = restore_strings(src) +src = fix_bugs_and_warnings(src) src = merge_header(src, license) io.write(src) diff --git a/source/libs/luajit/LuaJIT-src/src/host/genversion.lua b/source/libs/luajit/LuaJIT-src/src/host/genversion.lua new file mode 100644 index 0000000000000000000000000000000000000000..043c9a46da35d5b84c0d43d42a26a518c25ca7bc --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/host/genversion.lua @@ -0,0 +1,45 @@ +---------------------------------------------------------------------------- +-- Lua script to embed the rolling release version in luajit.h. +---------------------------------------------------------------------------- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- + +local arg = {...} +local FILE_ROLLING_H = arg[1] or "luajit_rolling.h" +local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt" +local FILE_LUAJIT_H = arg[3] or "luajit.h" + +local function file_read(file) + local fp = assert(io.open(file, "rb"), "run from the wrong directory") + local data = assert(fp:read("*a")) + fp:close() + return data +end + +local function file_write_mod(file, data) + local fp = io.open(file, "rb") + if fp then + local odata = assert(fp:read("*a")) + fp:close() + if odata == data then return end + end + fp = assert(io.open(file, "wb")) + assert(fp:write(data)) + assert(fp:close()) +end + +local text = file_read(FILE_ROLLING_H):gsub("#error.-\n", "") +local relver = file_read(FILE_RELVER_TXT):match("(%d+)") + +if relver then + text = text:gsub("ROLLING", relver) +else + io.stderr:write([[ +**** WARNING Cannot determine rolling release version from git log. +**** WARNING The 'git' command must be available during the build. +]]) + file_write_mod(FILE_RELVER_TXT, "ROLLING\n") -- Fallback for install target. +end + +file_write_mod(FILE_LUAJIT_H, text) diff --git a/source/libs/luajit/LuaJIT-src/src/host/minilua.c b/source/libs/luajit/LuaJIT-src/src/host/minilua.c index 79150286cfc3d29cb1d6c65858c2f36af0708a23..76f32aed16549456ff8092fa922d7ffd3eef8535 100644 --- a/source/libs/luajit/LuaJIT-src/src/host/minilua.c +++ b/source/libs/luajit/LuaJIT-src/src/host/minilua.c @@ -1134,7 +1134,7 @@ if(!cl->isC){ CallInfo*ci; StkId st,base; Proto*p=cl->p; -luaD_checkstack(L,p->maxstacksize); +luaD_checkstack(L,p->maxstacksize+p->numparams); func=restorestack(L,funcr); if(!p->is_vararg){ base=func+1; @@ -1639,6 +1639,7 @@ lua_number2int(k,n); if(luai_numeq(cast_num(k),nvalue(key))) return luaH_getnum(t,k); } +/*fallthrough*/ default:{ Node*n=mainposition(t,key); do{ @@ -2905,8 +2906,8 @@ if(sep>=0){ read_long_string(ls,seminfo,sep); return TK_STRING; } -else if(sep==-1)return'['; -else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING); +else if (sep!=-1)luaX_lexerror(ls,"invalid long string delimiter",TK_STRING); +return'['; } case'=':{ next(ls); diff --git a/source/libs/luajit/LuaJIT-src/src/jit/.gitignore b/source/libs/luajit/LuaJIT-src/src/jit/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..500e2855af97fc6f4d96b74ab5a1c481c10aba5a --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/jit/.gitignore @@ -0,0 +1 @@ +vmdef.lua diff --git a/source/libs/luajit/LuaJIT-src/src/jit/bc.lua b/source/libs/luajit/LuaJIT-src/src/jit/bc.lua index 193cf01f939df93a211dd4f5e7496c3f49c899af..71ba52be0c61963bdb19a4a7c1186a9b1e935a4b 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/bc.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/bc.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT bytecode listing module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -41,7 +41,6 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local jutil = require("jit.util") local vmdef = require("jit.vmdef") local bit = require("bit") diff --git a/source/libs/luajit/LuaJIT-src/src/jit/bcsave.lua b/source/libs/luajit/LuaJIT-src/src/jit/bcsave.lua index c17c88e0ff1c4dd8823f81a2d39c0d0dab076a39..a30a34b6be0d19b46b03248e2c1297a1f4b19afa 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/bcsave.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/bcsave.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT module to save/list bytecode. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -11,12 +11,16 @@ ------------------------------------------------------------------------------ local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") +assert(jit.version_num == 20199, "LuaJIT core/library version mismatch") local bit = require("bit") -- Symbol name prefix for LuaJIT bytecode. local LJBC_PREFIX = "luaJIT_BC_" +local type, assert = type, assert +local format = string.format +local tremove, tconcat = table.remove, table.concat + ------------------------------------------------------------------------------ local function usage() @@ -25,15 +29,19 @@ Save LuaJIT bytecode: luajit -b[options] input output -l Only list bytecode. -s Strip debug info (default). -g Keep debug info. + -W Generate 32 bit (non-GC64) bytecode. + -X Generate 64 bit (GC64) bytecode. + -d Generate bytecode in deterministic manner. -n name Set module name (default: auto-detect from input name). -t type Set output file type (default: auto-detect from output name). -a arch Override architecture for object files (default: native). -o os Override OS for object files (default: native). + -F name Override filename (default: input filename). -e chunk Use chunk string as input. -- Stop handling options. - Use stdin as input and/or stdout as output. -File types: c h obj o raw (default) +File types: c cc h obj o raw (default) ]] os.exit(1) end @@ -45,10 +53,23 @@ local function check(ok, ...) os.exit(1) end -local function readfile(input) - if type(input) == "function" then return input end - if input == "-" then input = nil end - return check(loadfile(input)) +local function readfile(ctx, input) + if ctx.string then + return check(loadstring(input, nil, ctx.mode)) + elseif ctx.filename then + local data + if input == "-" then + data = io.stdin:read("*a") + else + local fp = assert(io.open(input, "rb")) + data = assert(fp:read("*a")) + assert(fp:close()) + end + return check(load(data, ctx.filename, ctx.mode)) + else + if input == "-" then input = nil end + return check(loadfile(input, ctx.mode)) + end end local function savefile(name, mode) @@ -56,15 +77,30 @@ local function savefile(name, mode) return check(io.open(name, mode)) end +local function set_stdout_binary(ffi) + ffi.cdef[[int _setmode(int fd, int mode);]] + ffi.C._setmode(1, 0x8000) +end + ------------------------------------------------------------------------------ local map_type = { - raw = "raw", c = "c", h = "h", o = "obj", obj = "obj", + raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj", } local map_arch = { - x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, - ppc = true, mips = true, mipsel = true, + x86 = { e = "le", b = 32, m = 3, p = 0x14c, }, + x64 = { e = "le", b = 64, m = 62, p = 0x8664, }, + arm = { e = "le", b = 32, m = 40, p = 0x1c0, }, + arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, }, + arm64be = { e = "be", b = 64, m = 183, }, + ppc = { e = "be", b = 32, m = 20, }, + mips = { e = "be", b = 32, m = 8, f = 0x50001006, }, + mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, }, + mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, }, + mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, + mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, + mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, } local map_os = { @@ -73,33 +109,33 @@ local map_os = { } local function checkarg(str, map, err) - str = string.lower(str) + str = str:lower() local s = check(map[str], "unknown ", err) - return s == true and str or s + return type(s) == "string" and s or str end local function detecttype(str) - local ext = string.match(string.lower(str), "%.(%a+)$") + local ext = str:lower():match("%.(%a+)$") return map_type[ext] or "raw" end local function checkmodname(str) - check(string.match(str, "^[%w_.%-]+$"), "bad module name") - return string.gsub(str, "[%.%-]", "_") + check(str:match("^[%w_.%-]+$"), "bad module name") + return str:gsub("[%.%-]", "_") end local function detectmodname(str) if type(str) == "string" then - local tail = string.match(str, "[^/\\]+$") + local tail = str:match("[^/\\]+$") if tail then str = tail end - local head = string.match(str, "^(.*)%.[^.]*$") + local head = str:match("^(.*)%.[^.]*$") if head then str = head end - str = string.match(str, "^[%w_.%-]+") + str = str:match("^[%w_.%-]+") else str = nil end check(str, "cannot derive module name, use -n name") - return string.gsub(str, "[%.%-]", "_") + return str:gsub("[%.%-]", "_") end ------------------------------------------------------------------------------ @@ -111,6 +147,11 @@ local function bcsave_tail(fp, output, s) end local function bcsave_raw(output, s) + if output == "-" and jit.os == "Windows" then + local ok, ffi = pcall(require, "ffi") + check(ok, "FFI library required to write binary file to stdout") + set_stdout_binary(ffi) + end local fp = savefile(output, "wb") bcsave_tail(fp, output, s) end @@ -118,8 +159,8 @@ end local function bcsave_c(ctx, output, s) local fp = savefile(output, "w") if ctx.type == "c" then - fp:write(string.format([[ -#ifdef _cplusplus + fp:write(format([[ +#ifdef __cplusplus extern "C" #endif #ifdef _WIN32 @@ -128,7 +169,7 @@ __declspec(dllexport) const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname)) else - fp:write(string.format([[ + fp:write(format([[ #define %s%s_SIZE %d static const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) @@ -138,13 +179,13 @@ static const unsigned char %s%s[] = { local b = tostring(string.byte(s, i)) m = m + #b + 1 if m > 78 then - fp:write(table.concat(t, ",", 1, n), ",\n") + fp:write(tconcat(t, ",", 1, n), ",\n") n, m = 0, #b + 1 end n = n + 1 t[n] = b end - bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") + bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n") end local function bcsave_elfobj(ctx, output, s, ffi) @@ -199,12 +240,8 @@ typedef struct { } ELF64obj; ]] local symname = LJBC_PREFIX..ctx.modname - local is64, isbe = false, false - if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then - is64 = true - elseif ctx.arch == "ppc" or ctx.arch == "mips" then - isbe = true - end + local ai = assert(map_arch[ctx.arch]) + local is64, isbe = ai.b == 64, ai.e == "be" -- Handle different host/target endianess. local function f32(x) return x end @@ -237,10 +274,8 @@ typedef struct { hdr.eendian = isbe and 2 or 1 hdr.eversion = 1 hdr.type = f16(1) - hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) - if ctx.arch == "mips" or ctx.arch == "mipsel" then - hdr.flags = f32(0x50001006) - end + hdr.machine = f16(ai.m) + hdr.flags = f32(ai.f or 0) hdr.version = f32(1) hdr.shofs = fofs(ffi.offsetof(o, "sect")) hdr.ehsize = f16(ffi.sizeof(hdr)) @@ -275,7 +310,7 @@ typedef struct { o.sect[2].size = fofs(ofs) o.sect[3].type = f32(3) -- .strtab o.sect[3].ofs = fofs(sofs + ofs) - o.sect[3].size = fofs(#symname+1) + o.sect[3].size = fofs(#symname+2) ffi.copy(o.space+ofs+1, symname) ofs = ofs + #symname + 2 o.sect[4].type = f32(1) -- .rodata @@ -336,12 +371,8 @@ typedef struct { } PEobj; ]] local symname = LJBC_PREFIX..ctx.modname - local is64 = false - if ctx.arch == "x86" then - symname = "_"..symname - elseif ctx.arch == "x64" then - is64 = true - end + local ai = assert(map_arch[ctx.arch]) + local is64 = ai.b == 64 local symexport = " /EXPORT:"..symname..",DATA " -- The file format is always little-endian. Swap if the host is big-endian. @@ -355,7 +386,7 @@ typedef struct { -- Create PE object and fill in header. local o = ffi.new("PEobj") local hdr = o.hdr - hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) + hdr.arch = f16(assert(ai.p)) hdr.nsects = f16(2) hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) hdr.nsyms = f32(6) @@ -408,24 +439,12 @@ typedef struct { mach_header; uint32_t reserved; } mach_header_64; -typedef struct { - uint32_t cmd, cmdsize; - char segname[16]; - uint32_t vmaddr, vmsize, fileoff, filesize; - uint32_t maxprot, initprot, nsects, flags; -} mach_segment_command; typedef struct { uint32_t cmd, cmdsize; char segname[16]; uint64_t vmaddr, vmsize, fileoff, filesize; uint32_t maxprot, initprot, nsects, flags; } mach_segment_command_64; -typedef struct { - char sectname[16], segname[16]; - uint32_t addr, size; - uint32_t offset, align, reloff, nreloc, flags; - uint32_t reserved1, reserved2; -} mach_section; typedef struct { char sectname[16], segname[16]; uint64_t addr, size; @@ -438,124 +457,61 @@ typedef struct { typedef struct { int32_t strx; uint8_t type, sect; - int16_t desc; - uint32_t value; -} mach_nlist; -typedef struct { - uint32_t strx; - uint8_t type, sect; uint16_t desc; uint64_t value; } mach_nlist_64; -typedef struct -{ - uint32_t magic, nfat_arch; -} mach_fat_header; -typedef struct -{ - uint32_t cputype, cpusubtype, offset, size, align; -} mach_fat_arch; typedef struct { - struct { - mach_header hdr; - mach_segment_command seg; - mach_section sec; - mach_symtab_command sym; - } arch[1]; - mach_nlist sym_entry; - uint8_t space[4096]; -} mach_obj; -typedef struct { - struct { - mach_header_64 hdr; - mach_segment_command_64 seg; - mach_section_64 sec; - mach_symtab_command sym; - } arch[1]; + mach_header_64 hdr; + mach_segment_command_64 seg; + mach_section_64 sec; + mach_symtab_command sym; mach_nlist_64 sym_entry; uint8_t space[4096]; } mach_obj_64; -typedef struct { - mach_fat_header fat; - mach_fat_arch fat_arch[2]; - struct { - mach_header hdr; - mach_segment_command seg; - mach_section sec; - mach_symtab_command sym; - } arch[2]; - mach_nlist sym_entry; - uint8_t space[4096]; -} mach_fat_obj; ]] local symname = '_'..LJBC_PREFIX..ctx.modname - local isfat, is64, align, mobj = false, false, 4, "mach_obj" - if ctx.arch == "x64" then - is64, align, mobj = true, 8, "mach_obj_64" - elseif ctx.arch == "arm" then - isfat, mobj = true, "mach_fat_obj" - elseif ctx.arch == "arm64" then - is64, align, isfat, mobj = true, 8, true, "mach_fat_obj" - else - check(ctx.arch == "x86", "unsupported architecture for OSX") + local cputype, cpusubtype = 0x01000007, 3 + if ctx.arch ~= "x64" then + check(ctx.arch == "arm64", "unsupported architecture for OSX") + cputype, cpusubtype = 0x0100000c, 0 end local function aligned(v, a) return bit.band(v+a-1, -a) end - local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE. -- Create Mach-O object and fill in header. - local o = ffi.new(mobj) - local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) - local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch] - local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch] - if isfat then - o.fat.magic = be32(0xcafebabe) - o.fat.nfat_arch = be32(#cpusubtype) - end + local o = ffi.new("mach_obj_64") + local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8) -- Fill in sections and symbols. - for i=0,#cpusubtype-1 do - local ofs = 0 - if isfat then - local a = o.fat_arch[i] - a.cputype = be32(cputype[i+1]) - a.cpusubtype = be32(cpusubtype[i+1]) - -- Subsequent slices overlap each other to share data. - ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0]) - a.offset = be32(ofs) - a.size = be32(mach_size-ofs+#s) - end - local a = o.arch[i] - a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface - a.hdr.cputype = cputype[i+1] - a.hdr.cpusubtype = cpusubtype[i+1] - a.hdr.filetype = 1 - a.hdr.ncmds = 2 - a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym) - a.seg.cmd = is64 and 0x19 or 0x1 - a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec) - a.seg.vmsize = #s - a.seg.fileoff = mach_size-ofs - a.seg.filesize = #s - a.seg.maxprot = 1 - a.seg.initprot = 1 - a.seg.nsects = 1 - ffi.copy(a.sec.sectname, "__data") - ffi.copy(a.sec.segname, "__DATA") - a.sec.size = #s - a.sec.offset = mach_size-ofs - a.sym.cmd = 2 - a.sym.cmdsize = ffi.sizeof(a.sym) - a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs - a.sym.nsyms = 1 - a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs - a.sym.strsize = aligned(#symname+2, align) - end + o.hdr.magic = 0xfeedfacf + o.hdr.cputype = cputype + o.hdr.cpusubtype = cpusubtype + o.hdr.filetype = 1 + o.hdr.ncmds = 2 + o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym) + o.seg.cmd = 0x19 + o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec) + o.seg.vmsize = #s + o.seg.fileoff = mach_size + o.seg.filesize = #s + o.seg.maxprot = 1 + o.seg.initprot = 1 + o.seg.nsects = 1 + ffi.copy(o.sec.sectname, "__data") + ffi.copy(o.sec.segname, "__DATA") + o.sec.size = #s + o.sec.offset = mach_size + o.sym.cmd = 2 + o.sym.cmdsize = ffi.sizeof(o.sym) + o.sym.symoff = ffi.offsetof(o, "sym_entry") + o.sym.nsyms = 1 + o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry) + o.sym.strsize = aligned(#symname+2, 8) o.sym_entry.type = 0xf o.sym_entry.sect = 1 o.sym_entry.strx = 1 ffi.copy(o.space+1, symname) - -- Write Macho-O object file. + -- Write Mach-O object file. local fp = savefile(output, "wb") fp:write(ffi.string(o, mach_size)) bcsave_tail(fp, output, s) @@ -564,6 +520,9 @@ end local function bcsave_obj(ctx, output, s) local ok, ffi = pcall(require, "ffi") check(ok, "FFI library required to write this file type") + if output == "-" and jit.os == "Windows" then + set_stdout_binary(ffi) + end if ctx.os == "windows" then return bcsave_peobj(ctx, output, s, ffi) elseif ctx.os == "osx" then @@ -575,14 +534,14 @@ end ------------------------------------------------------------------------------ -local function bclist(input, output) - local f = readfile(input) +local function bclist(ctx, input, output) + local f = readfile(ctx, input) require("jit.bc").dump(f, savefile(output, "w"), true) end local function bcsave(ctx, input, output) - local f = readfile(input) - local s = string.dump(f, ctx.strip) + local f = readfile(ctx, input) + local s = string.dump(f, ctx.mode) local t = ctx.type if not t then t = detecttype(output) @@ -605,35 +564,43 @@ local function docmd(...) local n = 1 local list = false local ctx = { - strip = true, arch = jit.arch, os = string.lower(jit.os), - type = false, modname = false, + mode = "bt", arch = jit.arch, os = jit.os:lower(), + type = false, modname = false, string = false, } + local strip = "s" + local gc64 = "" while n <= #arg do local a = arg[n] - if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then - table.remove(arg, n) + if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then + tremove(arg, n) if a == "--" then break end for m=2,#a do - local opt = string.sub(a, m, m) + local opt = a:sub(m, m) if opt == "l" then list = true elseif opt == "s" then - ctx.strip = true + strip = "s" elseif opt == "g" then - ctx.strip = false + strip = "" + elseif opt == "W" or opt == "X" then + gc64 = opt + elseif opt == "d" then + ctx.mode = ctx.mode .. opt else if arg[n] == nil or m ~= #a then usage() end if opt == "e" then if n ~= 1 then usage() end - arg[1] = check(loadstring(arg[1])) + ctx.string = true elseif opt == "n" then - ctx.modname = checkmodname(table.remove(arg, n)) + ctx.modname = checkmodname(tremove(arg, n)) elseif opt == "t" then - ctx.type = checkarg(table.remove(arg, n), map_type, "file type") + ctx.type = checkarg(tremove(arg, n), map_type, "file type") elseif opt == "a" then - ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") + ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture") elseif opt == "o" then - ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") + ctx.os = checkarg(tremove(arg, n), map_os, "OS name") + elseif opt == "F" then + ctx.filename = "@"..tremove(arg, n) else usage() end @@ -643,9 +610,10 @@ local function docmd(...) n = n + 1 end end + ctx.mode = ctx.mode .. strip .. gc64 if list then if #arg == 0 or #arg > 2 then usage() end - bclist(arg[1], arg[2] or "-") + bclist(ctx, arg[1], arg[2] or "-") else if #arg ~= 2 then usage() end bcsave(ctx, arg[1], arg[2]) diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_arm.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_arm.lua index c2dd776991fe3ade39a9c464e566a4132ca558e5..494b9502feb45f208843d31f2a69a823cbc2a2c5 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_arm.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_arm.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT ARM disassembler module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64.lua index a7173326acb77e341bf6b9e2f8d214f899e07f03..4457aac080ba0dbe8a4c619e22f466792e71a915 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT ARM64 disassembler module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h -- -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. @@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted register. [0] = { shift = 29, mask = 3, [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + shift = 21, mask = 1, + [0] = "andDNMSg", "bicDNMSg" }, { - shift = 21, mask = 7, - [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + shift = 21, mask = 1, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg" }, { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + shift = 21, mask = 1, + [0] = "eorDNMSg", "eonDNMSg" }, { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + shift = 21, mask = 1, + [0] = "ands|tstD0NMSg", "bicsDNMSg" } }, false -- unallocated @@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted register. { shift = 29, mask = 3, [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + shift = 21, mask = 1, + [0] = "andDNMSg", "bicDNMSg" }, { - shift = 21, mask = 7, - [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + shift = 21, mask = 1, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg" }, { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + shift = 21, mask = 1, + [0] = "eorDNMSg", "eonDNMSg" }, { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + shift = 21, mask = 1, + [0] = "ands|tstD0NMSg", "bicsDNMSg" } } } @@ -666,6 +658,10 @@ local map_datafp = { -- Data processing, SIMD and FP. } } } + }, + { -- 010 + shift = 0, mask = 0x81f8fc00, + [0x100e400] = "moviDdG" } } @@ -735,7 +731,7 @@ local map_cond = { "hi", "ls", "ge", "lt", "gt", "le", "al", } -local map_shift = { [0] = "lsl", "lsr", "asr", } +local map_shift = { [0] = "lsl", "lsr", "asr", "ror"} local map_extend = { [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", @@ -840,6 +836,20 @@ local function parse_fpimm8(op) return sign * frac * 2^exp end +local function decode_fpmovi(op) + local lo = rshift(op, 5) + local hi = rshift(op, 9) + lo = bor(band(lo, 1) * 0xff, band(lo, 2) * 0x7f80, band(lo, 4) * 0x3fc000, + band(lo, 8) * 0x1fe00000) + hi = bor(band(hi, 1) * 0xff, band(hi, 0x80) * 0x1fe, + band(hi, 0x100) * 0xff00, band(hi, 0x200) * 0x7f8000) + if hi ~= 0 then + return fmt_hex32(hi)..tohex(lo) + else + return fmt_hex32(lo) + end +end + local function prefer_bfx(sf, uns, imms, immr) if imms < immr or imms == 31 or imms == 63 then return false @@ -956,7 +966,7 @@ local function disass_ins(ctx) elseif p == "U" then local rn = map_regs.x[band(rshift(op, 5), 31)] local sz = band(rshift(op, 30), 3) - local imm12 = lshift(arshift(lshift(op, 10), 20), sz) + local imm12 = lshift(rshift(lshift(op, 10), 20), sz) if imm12 ~= 0 then x = "["..rn..", #"..imm12.."]" else @@ -993,8 +1003,7 @@ local function disass_ins(ctx) x = x.."]" end elseif p == "P" then - local opcv, sh = rshift(op, 26), 2 - if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end + local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1)) local imm7 = lshift(arshift(lshift(op, 10), 25), sh) local rn = map_regs.x[band(rshift(op, 5), 31)] local ind = band(rshift(op, 23), 3) @@ -1089,7 +1098,7 @@ local function disass_ins(ctx) last = "#"..(sf+32 - immr) operands[#operands] = last x = x + 1 - elseif x >= immr then + else name = a2 x = x - immr + 1 end @@ -1140,6 +1149,8 @@ local function disass_ins(ctx) x = 0 elseif p == "F" then x = parse_fpimm8(op) + elseif p == "G" then + x = "#0x"..decode_fpmovi(op) elseif p == "g" or p == "f" or p == "x" or p == "w" or p == "d" or p == "s" then -- These are handled in D/N/M/A. diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64be.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64be.lua index 7eb389e2fa597224c9b66b5029de41705c725ad4..47f191dcc305499b9439822eda56e7b82dc98cbe 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64be.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_arm64be.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT ARM64BE disassembler wrapper module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- ARM64 instructions are always little-endian. So just forward to the diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips.lua index a12b8e62f3b532cf3e0a462ed69af9b5a1a3e044..fe1928e0b18012a87bd9089c7bcddd735fd2dfc2 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT MIPS disassembler module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT/X license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. @@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ------------------------------------------------------------------------------ --- Primary and extended opcode maps +-- Extended opcode maps common to all MIPS releases ------------------------------------------------------------------------------ -local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } +local map_cop0 = { + shift = 25, mask = 1, + [0] = { + shift = 21, mask = 15, + [0] = "mfc0TDW", [4] = "mtc0TDW", + [10] = "rdpgprDT", + [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, + [14] = "wrpgprDT", + }, { + shift = 0, mask = 63, + [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", + [24] = "eret", [31] = "deret", + [32] = "wait", + }, +} + +------------------------------------------------------------------------------ +-- Primary and extended opcode maps for MIPS R1-R5 +------------------------------------------------------------------------------ + +local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } + local map_special = { shift = 0, mask = 63, [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, @@ -87,22 +108,6 @@ local map_regimm = { false, false, false, "synciSO", } -local map_cop0 = { - shift = 25, mask = 1, - [0] = { - shift = 21, mask = 15, - [0] = "mfc0TDW", [4] = "mtc0TDW", - [10] = "rdpgprDT", - [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, - [14] = "wrpgprDT", - }, { - shift = 0, mask = 63, - [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", - [24] = "eret", [31] = "deret", - [32] = "wait", - }, -} - local map_cop1s = { shift = 0, mask = 63, [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", @@ -233,6 +238,208 @@ local map_pri = { false, "sdc1HSO", "sdc2TSO", "sdTSO", } +------------------------------------------------------------------------------ +-- Primary and extended opcode maps for MIPS R6 +------------------------------------------------------------------------------ + +local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" } +local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" } +local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" } +local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" } +local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" } +local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" } +local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" } +local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" } + +local map_special_r6 = { + shift = 0, mask = 63, + [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, + false, map_srl, "sraDTA", + "sllvDTS", false, map_srlv, "sravDTS", + "jrS", "jalrD1S", false, false, + "syscallY", "breakY", false, "sync", + "clzDS", "cloDS", "dclzDS", "dcloDS", + "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST", + map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6, + map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6, + "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", + "andDST", "or|moveDST0", "xorDST", "nor|notDST0", + false, false, "sltDST", "sltuDST", + "daddDST", "dadduDST", "dsubDST", "dsubuDST", + "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", + "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST", + "dsllDTA", false, "dsrlDTA", "dsraDTA", + "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", +} + +local map_bshfl_r6 = { + shift = 9, mask = 3, + [1] = "alignDSTa", + _ = { + shift = 6, mask = 31, + [0] = "bitswapDT", + [2] = "wsbhDT", + [16] = "sebDT", + [24] = "sehDT", + } +} + +local map_dbshfl_r6 = { + shift = 9, mask = 3, + [1] = "dalignDSTa", + _ = { + shift = 6, mask = 31, + [0] = "dbitswapDT", + [2] = "dsbhDT", + [5] = "dshdDT", + } +} + +local map_special3_r6 = { + shift = 0, mask = 63, + [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", + [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", + [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD", +} + +local map_regimm_r6 = { + shift = 16, mask = 31, + [0] = "bltzSB", [1] = "bgezSB", + [6] = "dahiSI", [30] = "datiSI", + [23] = "sigrieI", [31] = "synciSO", +} + +local map_pcrel_r6 = { + shift = 19, mask = 3, + [0] = "addiupcS2", "lwpcS2", "lwupcS2", { + shift = 18, mask = 1, + [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" } + } +} + +local map_cop1s_r6 = { + shift = 0, mask = 63, + [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", + "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG", + "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG", + "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG", + "sel.sFGH", false, false, false, + "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH", + "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG", + "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH", + false, "cvt.d.sFG", false, false, + "cvt.w.sFG", "cvt.l.sFG", +} + +local map_cop1d_r6 = { + shift = 0, mask = 63, + [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH", + "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG", + "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG", + "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG", + "sel.dFGH", false, false, false, + "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH", + "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG", + "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH", + "cvt.s.dFG", false, false, false, + "cvt.w.dFG", "cvt.l.dFG", +} + +local map_cop1w_r6 = { + shift = 0, mask = 63, + [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH", + "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH", + "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH", + "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH", + false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH", + false, false, false, false, + false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH", + false, false, false, false, + "cvt.s.wFG", "cvt.d.wFG", +} + +local map_cop1l_r6 = { + shift = 0, mask = 63, + [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH", + "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH", + "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH", + "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH", + false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH", + false, false, false, false, + false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH", + false, false, false, false, + "cvt.s.lFG", "cvt.d.lFG", +} + +local map_cop1_r6 = { + shift = 21, mask = 31, + [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", + "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", + false, "bc1eqzHB", false, false, + false, "bc1nezHB", false, false, + map_cop1s_r6, map_cop1d_r6, false, false, + map_cop1w_r6, map_cop1l_r6, +} + +local function maprs_popTS(rs, rt) + if rt == 0 then return 0 elseif rs == 0 then return 1 + elseif rs == rt then return 2 else return 3 end +end + +local map_pop06_r6 = { + maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB" +} +local map_pop07_r6 = { + maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB" +} +local map_pop26_r6 = { + maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB" +} +local map_pop27_r6 = { + maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB" +} + +local function maprs_popS(rs, rt) + if rs == 0 then return 0 else return 1 end +end + +local map_pop66_r6 = { + maprs = maprs_popS, [0] = "jicTI", "beqzcSb" +} +local map_pop76_r6 = { + maprs = maprs_popS, [0] = "jialcTI", "bnezcSb" +} + +local function maprs_popST(rs, rt) + if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end +end + +local map_pop10_r6 = { + maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB" +} +local map_pop30_r6 = { + maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB" +} + +local map_pri_r6 = { + [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ", + "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6, + map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI", + "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U", + map_cop0, map_cop1_r6, false, false, + false, false, map_pop26_r6, map_pop27_r6, + map_pop30_r6, "daddiuTSI", false, false, + false, "dauiTSI", false, map_special3_r6, + "lbTSO", "lhTSO", false, "lwTSO", + "lbuTSO", "lhuTSO", false, false, + "sbTSO", "shTSO", false, "swTSO", + false, false, false, false, + false, "lwc1HSO", "bc#", false, + false, "ldc1HSO", map_pop66_r6, "ldTSO", + false, "swc1HSO", "balc#", map_pcrel_r6, + false, "sdc1HSO", map_pop76_r6, "sdTSO", +} + ------------------------------------------------------------------------------ local map_gpr = { @@ -287,10 +494,14 @@ local function disass_ins(ctx) ctx.op = op ctx.rel = nil - local opat = map_pri[rshift(op, 26)] + local opat = ctx.map_pri[rshift(op, 26)] while type(opat) ~= "string" do if not opat then return unknown(ctx) end - opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + if opat.maprs then + opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))] + else + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end end local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") @@ -314,6 +525,8 @@ local function disass_ins(ctx) x = "f"..band(rshift(op, 21), 31) elseif p == "A" then x = band(rshift(op, 6), 31) + elseif p == "a" then + x = band(rshift(op, 6), 7) elseif p == "E" then x = band(rshift(op, 6), 31) + 32 elseif p == "M" then @@ -333,6 +546,10 @@ local function disass_ins(ctx) x = band(rshift(op, 11), 31) - last + 33 elseif p == "I" then x = arshift(lshift(op, 16), 16) + elseif p == "2" then + x = arshift(lshift(op, 13), 11) + elseif p == "3" then + x = arshift(lshift(op, 14), 11) elseif p == "U" then x = band(op, 0xffff) elseif p == "O" then @@ -342,7 +559,15 @@ local function disass_ins(ctx) local index = map_gpr[band(rshift(op, 16), 31)] operands[#operands] = format("%s(%s)", index, last) elseif p == "B" then - x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 + x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4 + ctx.rel = x + x = format("0x%08x", x) + elseif p == "b" then + x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4 + ctx.rel = x + x = format("0x%08x", x) + elseif p == "#" then + x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4 ctx.rel = x x = format("0x%08x", x) elseif p == "J" then @@ -408,6 +633,7 @@ local function create(code, addr, out) ctx.disass = disass_block ctx.hexdump = 8 ctx.get = get_be + ctx.map_pri = map_pri return ctx end @@ -417,6 +643,19 @@ local function create_el(code, addr, out) return ctx end +local function create_r6(code, addr, out) + local ctx = create(code, addr, out) + ctx.map_pri = map_pri_r6 + return ctx +end + +local function create_r6_el(code, addr, out) + local ctx = create(code, addr, out) + ctx.get = get_le + ctx.map_pri = map_pri_r6 + return ctx +end + -- Simple API: disassemble code (a string) at address and output via out. local function disass(code, addr, out) create(code, addr, out):disass() @@ -426,6 +665,14 @@ local function disass_el(code, addr, out) create_el(code, addr, out):disass() end +local function disass_r6(code, addr, out) + create_r6(code, addr, out):disass() +end + +local function disass_r6_el(code, addr, out) + create_r6_el(code, addr, out):disass() +end + -- Return register name for RID. local function regname(r) if r < 32 then return map_gpr[r] end @@ -436,8 +683,12 @@ end return { create = create, create_el = create_el, + create_r6 = create_r6, + create_r6_el = create_r6_el, disass = disass, disass_el = disass_el, + disass_r6 = disass_r6, + disass_r6_el = disass_r6_el, regname = regname } diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64.lua index c4374928ab5f06b6758c35c8f37527e938463e31..447ab80eb403f0db38e4e1bf1514f2e64a31eee4 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT MIPS64 disassembler wrapper module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This module just exports the big-endian functions from the diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64el.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64el.lua index 2b1470af505f0b29ac28c33b56ed02bab698b968..d4ead0291703bc79172fa54aa31c3614ad808d4b 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64el.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64el.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT MIPS64EL disassembler wrapper module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This module just exports the little-endian functions from the diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64r6.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64r6.lua new file mode 100644 index 0000000000000000000000000000000000000000..e19d3983ecc7500efc15c7da368a73a4b39fdffb --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64r6.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64R6 disassembler wrapper module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the r6 big-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_r6, + disass = dis_mips.disass_r6, + regname = dis_mips.regname +} + diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64r6el.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64r6el.lua new file mode 100644 index 0000000000000000000000000000000000000000..c0842fec4c1848b12e44fbb2791c6ea3cea12677 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_mips64r6el.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64R6EL disassembler wrapper module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the r6 little-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_r6_el, + disass = dis_mips.disass_r6_el, + regname = dis_mips.regname +} + diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_mipsel.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_mipsel.lua index f69b11f01fdd474a7ceb1688b3ca54dd99ce6f49..870d1d0febf767b9b3bae7e4a81dd37298561821 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_mipsel.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_mipsel.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT MIPSEL disassembler wrapper module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This module just exports the little-endian functions from the diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_ppc.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_ppc.lua index 2aeb1b29244c99a3964c394ede8e6d6696813c13..830d76b638ddcae8d56c372ac7ddb6ff9b03a62e 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_ppc.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_ppc.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT PPC disassembler module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT/X license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_x64.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_x64.lua index d5714ee1f70937727b09c4b7ac630b7499a18b20..dc43cdb33bc85fcd0d93f68f475a40102822e21c 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_x64.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_x64.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT x64 disassembler wrapper module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This module just exports the 64 bit functions from the combined diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dis_x86.lua b/source/libs/luajit/LuaJIT-src/src/jit/dis_x86.lua index 4371233d2b47accba02fb5e4e8c4df63f549eb5c..b1de0eeae1dd77b2b7fe947b20c37032199d9e6f 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dis_x86.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dis_x86.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT x86/x64 disassembler module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- This is a helper module used by the LuaJIT machine code dumper module. @@ -239,6 +239,24 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", --8x [0x8c] = "||pmaskmovXrvVSm", [0x8e] = "||pmaskmovVSmXvr", +--9x +[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm", +[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm", +[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm", +[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm", +[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm", +--Ax +[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm", +[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm", +[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm", +[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm", +[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm", +--Bx +[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm", +[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm", +[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm", +[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm", +[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm", --Dx [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", @@ -483,7 +501,7 @@ local function putpat(ctx, name, pat) local operands, regs, sz, mode, sp, rm, sc, rx, sdisp local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl - -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz + -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz for p in gmatch(pat, ".") do local x = nil if p == "V" or p == "U" then @@ -506,6 +524,9 @@ local function putpat(ctx, name, pat) sz = ctx.o16 and "X" or "M"; ctx.o16 = false if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end regs = map_regs[sz] + elseif p == "H" then + name = name..(ctx.rexw and "d" or "s") + ctx.rexw = false elseif p == "S" then name = name..lower(sz) elseif p == "s" then @@ -735,6 +756,7 @@ map_act = { V = putpat, U = putpat, T = putpat, M = putpat, X = putpat, P = putpat, F = putpat, G = putpat, Y = putpat, + H = putpat, -- Collect prefixes. [":"] = function(ctx, name, pat) diff --git a/source/libs/luajit/LuaJIT-src/src/jit/dump.lua b/source/libs/luajit/LuaJIT-src/src/jit/dump.lua index 2bea652bf81740c129029fcdcdad85e143fcd9f7..c855fa09b30bbb1d1290d1782f7bce8b8e627822 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/dump.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/dump.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT compiler dump module. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -55,7 +55,6 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local jutil = require("jit.util") local vmdef = require("jit.vmdef") local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc @@ -102,10 +101,12 @@ end local function fillsymtab(tr, nexit) local t = symtab if nexitsym == 0 then + local maskaddr = jit.arch == "arm" and -2 local ircall = vmdef.ircall for i=0,#ircall do local addr = ircalladdr(i) if addr ~= 0 then + if maskaddr then addr = band(addr, maskaddr) end if addr < 0 then addr = addr + 2^32 end t[addr] = ircall[i] end @@ -217,8 +218,10 @@ local function colorize_text(s) return s end -local function colorize_ansi(s, t) - return format(colortype_ansi[t], s) +local function colorize_ansi(s, t, extra) + local out = format(colortype_ansi[t], s) + if extra then out = "\027[3m"..out end + return out end local irtype_ansi = setmetatable({}, @@ -227,9 +230,10 @@ local irtype_ansi = setmetatable({}, local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } -local function colorize_html(s, t) +local function colorize_html(s, t, extra) s = gsub(s, "[<>&]", html_escape) - return format('<span class="irt_%s">%s</span>', irtype_text[t], s) + return format('<span class="irt_%s%s">%s</span>', + irtype_text[t], extra and " irt_extra" or "", s) end local irtype_html = setmetatable({}, @@ -254,6 +258,7 @@ span.irt_tab { color: #c00000; } span.irt_udt, span.irt_lud { color: #00c0c0; } span.irt_num { color: #4040c0; } span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } +span.irt_extra { font-style: italic; } </style> ]] @@ -269,6 +274,7 @@ local litname = { if band(mode, 8) ~= 0 then s = s.."C" end if band(mode, 16) ~= 0 then s = s.."R" end if band(mode, 32) ~= 0 then s = s.."I" end + if band(mode, 64) ~= 0 then s = s.."K" end t[mode] = s return s end}), @@ -277,15 +283,18 @@ local litname = { local s = irtype[band(mode, 31)] s = irtype[band(shr(mode, 5), 31)].."."..s if band(mode, 0x800) ~= 0 then s = s.." sext" end - local c = shr(mode, 14) - if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end + local c = shr(mode, 12) + if c == 1 then s = s.." none" + elseif c == 2 then s = s.." index" + elseif c == 3 then s = s.." check" end t[mode] = s return s end}), ["FLOAD "] = vmdef.irfield, ["FREF "] = vmdef.irfield, ["FPMATH"] = vmdef.irfpm, - ["BUFHDR"] = { [0] = "RESET", "APPEND" }, + ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" }, + ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" }, ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" }, } @@ -315,7 +324,9 @@ local function formatk(tr, idx, sn) local tn = type(k) local s if tn == "number" then - if band(sn or 0, 0x30000) ~= 0 then + if t < 12 then + s = k == 0 and "NULL" or format("[0x%08x]", k) + elseif band(sn or 0, 0x30000) ~= 0 then s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" elseif k == 2^52+2^51 then s = "bias" @@ -343,7 +354,7 @@ local function formatk(tr, idx, sn) else s = tostring(k) -- For primitives. end - s = colorize(format("%-4s", s), t) + s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0) if slot then s = format("%s @%d", s, slot) end @@ -363,7 +374,7 @@ local function printsnap(tr, snap) out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) else local m, ot, op1, op2 = traceir(tr, ref) - out:write(colorize(format("%04d", ref), band(ot, 31))) + out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0)) end out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME else @@ -541,7 +552,12 @@ local recdepth = 0 local function fmterr(err, info) if type(err) == "number" then if type(info) == "function" then info = fmtfunc(info) end - err = format(vmdef.traceerr[err], info) + local fmt = vmdef.traceerr[err] + if fmt == "NYI: bytecode %s" then + local oidx = 6 * info + info = sub(vmdef.bcnames, oidx+1, oidx+6) + end + err = format(fmt, info) end return err end @@ -582,7 +598,7 @@ local function dump_trace(what, tr, func, pc, otr, oex) end -- Dump recorded bytecode. -local function dump_record(tr, func, pc, depth, callee) +local function dump_record(tr, func, pc, depth) if depth ~= recdepth then recdepth = depth recprefix = rep(" .", depth) @@ -593,7 +609,6 @@ local function dump_record(tr, func, pc, depth, callee) if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end else line = "0000 "..recprefix.." FUNCC \n" - callee = func end if pc <= 0 then out:write(sub(line, 1, -2), " ; ", fmtfunc(func), "\n") @@ -607,12 +622,15 @@ end ------------------------------------------------------------------------------ +local gpr64 = jit.arch:match("64") +local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel" + -- Dump taken trace exits. local function dump_texit(tr, ex, ngpr, nfpr, ...) out:write("---- TRACE ", tr, " exit ", ex, "\n") if dumpmode.X then local regs = {...} - if jit.arch == "x64" then + if gpr64 then for i=1,ngpr do out:write(format(" %016x", regs[i])) if i % 4 == 0 then out:write("\n") end @@ -623,7 +641,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...) if i % 8 == 0 then out:write("\n") end end end - if jit.arch == "mips" or jit.arch == "mipsel" then + if fprmips32 then for i=1,nfpr,2 do out:write(format(" %+17.14g", regs[ngpr+i])) if i % 8 == 7 then out:write("\n") end diff --git a/source/libs/luajit/LuaJIT-src/src/jit/p.lua b/source/libs/luajit/LuaJIT-src/src/jit/p.lua index 7be105863d3bdacb778ee804abd07417b8dec4f7..3002c19f41fadc8057bbf0edb068ca413d62df4c 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/p.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/p.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT profiler. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -41,7 +41,6 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local profile = require("jit.profile") local vmdef = require("jit.vmdef") local math = math @@ -228,9 +227,7 @@ local function prof_finish() local samples = prof_samples if samples == 0 then if prof_raw ~= true then out:write("[No samples collected]\n") end - return - end - if prof_ann then + elseif prof_ann then prof_annotate(prof_count1, samples) else prof_top(prof_count1, prof_count2, samples, "") @@ -238,6 +235,7 @@ local function prof_finish() prof_count1 = nil prof_count2 = nil prof_ud = nil + if out ~= stdout then out:close() end end end diff --git a/source/libs/luajit/LuaJIT-src/src/jit/v.lua b/source/libs/luajit/LuaJIT-src/src/jit/v.lua index 934de9859c730656ec28f7e66ae96f9aaa80aa08..0d4ec277bc9cbd31d5c7851d6bef2da36050553f 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/v.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/v.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- Verbose mode of the LuaJIT compiler. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- @@ -59,11 +59,10 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local jutil = require("jit.util") local vmdef = require("jit.vmdef") local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo -local type, format = type, string.format +local type, sub, format = type, string.sub, string.format local stdout, stderr = io.stdout, io.stderr -- Active flag and output file handle. @@ -90,7 +89,12 @@ end local function fmterr(err, info) if type(err) == "number" then if type(info) == "function" then info = fmtfunc(info) end - err = format(vmdef.traceerr[err], info) + local fmt = vmdef.traceerr[err] + if fmt == "NYI: bytecode %s" then + local oidx = 6 * info + info = sub(vmdef.bcnames, oidx+1, oidx+6) + end + err = format(fmt, info) end return err end diff --git a/source/libs/luajit/LuaJIT-src/src/jit/zone.lua b/source/libs/luajit/LuaJIT-src/src/jit/zone.lua index fa702c4e989089f9ef5ae078463b09c0a74091de..7f210730a7b88dde0b528246efce2607cd81c2f5 100644 --- a/source/libs/luajit/LuaJIT-src/src/jit/zone.lua +++ b/source/libs/luajit/LuaJIT-src/src/jit/zone.lua @@ -1,7 +1,7 @@ ---------------------------------------------------------------------------- -- LuaJIT profiler zones. -- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -- diff --git a/source/libs/luajit/LuaJIT-src/src/lauxlib.h b/source/libs/luajit/LuaJIT-src/src/lauxlib.h index 29bde38862b9d8d85f18310d5a661dc648a6b083..405c527636350f93581b00ad7a6e3f86fdfe2ac8 100644 --- a/source/libs/luajit/LuaJIT-src/src/lauxlib.h +++ b/source/libs/luajit/LuaJIT-src/src/lauxlib.h @@ -92,7 +92,6 @@ LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); - /* ** {====================================================== ** File handles for IO library @@ -117,6 +116,7 @@ typedef struct luaL_Stream { + /* ** =============================================================== ** some useful macros @@ -180,7 +180,7 @@ LUALIB_API void (luaL_addstring) (luaL_Buffer *B, const char *s); LUALIB_API void (luaL_addvalue) (luaL_Buffer *B); LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); -/* -- Luajittex needs this one because it's faster than make it Lua -- */ +/* -- Luajittex needs this one because it's faster than make it with Lua -- */ LUA_API int (RESERVED_lua_dump) (lua_State *L, lua_Writer writer, void *data, int strip); /* Luajittex needs this one because it overloads loadfile */ LUALIB_API int (RESERVED_load_aux_JIT) (lua_State *L, int status, int envarg); diff --git a/source/libs/luajit/LuaJIT-src/src/lib_aux.c b/source/libs/luajit/LuaJIT-src/src/lib_aux.c index c40565c340d16f58e0a997f3cdd26684b5d30c39..4b4664a79daa9a16489720728fb99bcb8e05933a 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_aux.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_aux.c @@ -1,6 +1,6 @@ /* ** Auxiliary library for the Lua/C API. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major parts taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -21,6 +21,7 @@ #include "lj_state.h" #include "lj_trace.h" #include "lj_lib.h" +#include "lj_vmevent.h" #if LJ_TARGET_POSIX #include <sys/wait.h> @@ -218,8 +219,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) { - while (l--) - luaL_addchar(B, *s++); + if (l <= bufffree(B)) { + memcpy(B->p, s, l); + B->p += l; + } else { + emptybuffer(B); + lua_pushlstring(B->L, s, l); + B->lvl++; + adjuststack(B); + } } LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) @@ -311,6 +319,18 @@ static int panic(lua_State *L) return 0; } +#ifndef LUAJIT_DISABLE_VMEVENT +static int error_finalizer(lua_State *L) +{ + const char *s = lua_tostring(L, -1); + fputs("ERROR in finalizer: ", stderr); + fputs(s ? s : "?", stderr); + fputc('\n', stderr); + fflush(stderr); + return 0; +} +#endif + #ifdef LUAJIT_USE_SYSMALLOC #if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) @@ -332,25 +352,39 @@ static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize) LUALIB_API lua_State *luaL_newstate(void) { lua_State *L = lua_newstate(mem_alloc, NULL); - if (L) G(L)->panic = panic; + if (L) { + G(L)->panic = panic; +#ifndef LUAJIT_DISABLE_VMEVENT + luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE); + lua_pushcfunction(L, error_finalizer); + lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN)); + G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN); + L->top--; +#endif + } return L; } #else -#include "lj_alloc.h" - LUALIB_API lua_State *luaL_newstate(void) { lua_State *L; - void *ud = lj_alloc_create(); - if (ud == NULL) return NULL; #if LJ_64 && !LJ_GC64 - L = lj_state_newstate(lj_alloc_f, ud); + L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL); #else - L = lua_newstate(lj_alloc_f, ud); + L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL); #endif - if (L) G(L)->panic = panic; + if (L) { + G(L)->panic = panic; +#ifndef LUAJIT_DISABLE_VMEVENT + luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE); + lua_pushcfunction(L, error_finalizer); + lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN)); + G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN); + L->top--; +#endif + } return L; } diff --git a/source/libs/luajit/LuaJIT-src/src/lib_base.c b/source/libs/luajit/LuaJIT-src/src/lib_base.c index 3a757870aa34d05980894479ee75a9a3994f3f1f..5d1b88a9ad07144e9fdf97ceb04f41a52297774c 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_base.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_base.c @@ -1,6 +1,6 @@ /* ** Base and coroutine library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -19,6 +19,7 @@ #include "lj_gc.h" #include "lj_err.h" #include "lj_debug.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_meta.h" @@ -42,13 +43,13 @@ LJLIB_ASM(assert) LJLIB_REC(.) { - GCstr *s; lj_lib_checkany(L, 1); - s = lj_lib_optstr(L, 2); - if (s) - lj_err_callermsg(L, strdata(s)); - else + if (L->top == L->base+1) lj_err_caller(L, LJ_ERR_ASSERT); + else if (tvisstr(L->base+1) || tvisnumber(L->base+1)) + lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2))); + else + lj_err_run(L); return FFH_UNREACHABLE; } @@ -75,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.) /* This solves a circular dependency problem -- change FF_next_N as needed. */ LJ_STATIC_ASSERT((int)FF_next == FF_next_N); -LJLIB_ASM(next) +LJLIB_ASM(next) LJLIB_REC(.) { lj_lib_checktab(L, 1); + lj_err_msg(L, LJ_ERR_NEXTIDX); return FFH_UNREACHABLE; } @@ -144,6 +146,8 @@ LJLIB_CF(getfenv) LJLIB_REC(.) cTValue *o = L->base; if (!(o < L->top && tvisfunc(o))) { int level = lj_lib_optint(L, 1, 1); + if (level < 0) + lj_err_arg(L, 1, LJ_ERR_INVLVL); o = lj_debug_frame(L, level, &level); if (o == NULL) lj_err_arg(L, 1, LJ_ERR_INVLVL); @@ -166,6 +170,8 @@ LJLIB_CF(setfenv) setgcref(L->env, obj2gco(t)); return 0; } + if (level < 0) + lj_err_arg(L, 1, LJ_ERR_INVLVL); o = lj_debug_frame(L, level, &level); if (o == NULL) lj_err_arg(L, 1, LJ_ERR_INVLVL); @@ -224,9 +230,11 @@ LJLIB_CF(unpack) int32_t n, i = lj_lib_optint(L, 2, 1); int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ? lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t); + uint32_t nu; if (i > e) return 0; - n = e - i + 1; - if (n <= 0 || !lua_checkstack(L, n)) + nu = (uint32_t)e - (uint32_t)i; + n = (int32_t)(nu+1); + if (nu >= LUAI_MAXCSTACK || !lua_checkstack(L, n)) lj_err_caller(L, LJ_ERR_UNPACK); do { cTValue *tv = lj_tab_getint(t, i); @@ -287,18 +295,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) } else { const char *p = strdata(lj_lib_checkstr(L, 1)); char *ep; + unsigned int neg = 0; unsigned long ul; if (base < 2 || base > 36) lj_err_arg(L, 2, LJ_ERR_BASERNG); - ul = strtoul(p, &ep, base); - if (p != ep) { - while (lj_char_isspace((unsigned char)(*ep))) ep++; - if (*ep == '\0') { - if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) - setintV(L->base-1-LJ_FR2, (int32_t)ul); - else - setnumV(L->base-1-LJ_FR2, (lua_Number)ul); - return FFH_RES(1); + while (lj_char_isspace((unsigned char)(*p))) p++; + if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } + if (lj_char_isalnum((unsigned char)(*p))) { + ul = strtoul(p, &ep, base); + if (p != ep) { + while (lj_char_isspace((unsigned char)(*ep))) ep++; + if (*ep == '\0') { + if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { + if (neg) ul = ~ul+1u; + setintV(L->base-1-LJ_FR2, (int32_t)ul); + } else { + lua_Number n = (lua_Number)ul; + if (neg) n = -n; + setnumV(L->base-1-LJ_FR2, n); + } + return FFH_RES(1); + } } } } @@ -347,7 +364,11 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) static int load_aux(lua_State *L, int status, int envarg) { if (status == LUA_OK) { - if (tvistab(L->base+envarg-1)) { + /* + ** Set environment table for top-level function. + ** Don't do this for non-native bytecode, which returns a prototype. + */ + if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) { GCfunc *fn = funcV(L->top-1); GCtab *t = tabV(L->base+envarg-1); setgcref(fn->c.env, obj2gco(t)); @@ -395,10 +416,22 @@ LJLIB_CF(load) GCstr *name = lj_lib_optstr(L, 2); GCstr *mode = lj_lib_optstr(L, 3); int status; - if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) { - GCstr *s = lj_lib_checkstr(L, 1); + if (L->base < L->top && + (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) { + const char *s; + MSize len; + if (tvisbuf(L->base)) { + SBufExt *sbx = bufV(L->base); + s = sbx->r; + len = sbufxlen(sbx); + if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */ + } else { + GCstr *str = lj_lib_checkstr(L, 1); + s = strdata(str); + len = str->len; + } lua_settop(L, 4); /* Ensure env arg exists. */ - status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s), + status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s, mode ? strdata(mode) : NULL); } else { lj_lib_checkfunc(L, 1); @@ -493,7 +526,8 @@ LJLIB_CF(print) lua_gettable(L, LUA_GLOBALSINDEX); tv = L->top-1; } - shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); + shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) && + !gcrefu(basemt_it(G(L), LJ_TNUMX)); for (i = 0; i < nargs; i++) { cTValue *o = &L->base[i]; const char *str; @@ -590,7 +624,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap) setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); return FFH_RES(2); } - lj_state_growstack(co, (MSize)(L->top - L->base)); + if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) { + cTValue *msg = --co->top; + lj_err_callermsg(L, strVdata(msg)); + } return FFH_RETRY; } diff --git a/source/libs/luajit/LuaJIT-src/src/lib_bit.c b/source/libs/luajit/LuaJIT-src/src/lib_bit.c index c979a44839c949306c81e805e084f721d5e6b398..022f41b933600d1e46b8dc96098f13091e4332e9 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_bit.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_bit.c @@ -1,6 +1,6 @@ /* ** Bit manipulation library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lib_bit_c @@ -98,7 +98,7 @@ LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift); return bit_result64(L, id, x); } - if (id2) setintV(L->base+1, sh); + setintV(L->base+1, sh); return FFH_RETRY; #else lj_lib_checknumber(L, 1); @@ -155,7 +155,8 @@ LJLIB_CF(bit_tohex) LJLIB_REC(.) #endif SBuf *sb = lj_buf_tmp_(L); SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); - if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } + if (n < 0) { n = (int32_t)(~(uint32_t)n+1u); sf |= STRFMT_F_UPPER; } + if ((uint32_t)n > 254) n = 254; sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); #if LJ_HASFFI if (n < 16) b &= ((uint64_t)1 << 4*n)-1; diff --git a/source/libs/luajit/LuaJIT-src/src/lib_buffer.c b/source/libs/luajit/LuaJIT-src/src/lib_buffer.c new file mode 100644 index 0000000000000000000000000000000000000000..77d030bff7d556bf90f8fcec2fa588ab4f70ed12 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/lib_buffer.c @@ -0,0 +1,360 @@ +/* +** Buffer library. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lib_buffer_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" + +#if LJ_HASBUFFER +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_udata.h" +#include "lj_meta.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#include "lj_cdata.h" +#include "lj_cconv.h" +#endif +#include "lj_strfmt.h" +#include "lj_serialize.h" +#include "lj_lib.h" + +/* -- Helper functions ---------------------------------------------------- */ + +/* Check that the first argument is a string buffer. */ +static SBufExt *buffer_tobuf(lua_State *L) +{ + if (!(L->base < L->top && tvisbuf(L->base))) + lj_err_argtype(L, 1, "buffer"); + return bufV(L->base); +} + +/* Ditto, but for writers. */ +static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L) +{ + SBufExt *sbx = buffer_tobuf(L); + setsbufXL_(sbx, L); + return sbx; +} + +#define buffer_toudata(sbx) ((GCudata *)(sbx)-1) + +/* -- Buffer methods ------------------------------------------------------ */ + +#define LJLIB_MODULE_buffer_method + +LJLIB_CF(buffer_method_free) +{ + SBufExt *sbx = buffer_tobuf(L); + lj_bufx_free(L, sbx); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_reset) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + lj_bufx_reset(sbx); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_skip) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); + MSize len = sbufxlen(sbx); + if (n < len) { + sbx->r += n; + } else if (sbufiscow(sbx)) { + sbx->r = sbx->w; + } else { + sbx->r = sbx->w = sbx->b; + } + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_set) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + GCobj *ref; + const char *p; + MSize len; +#if LJ_HASFFI + if (tviscdata(L->base+1)) { + CTState *cts = ctype_cts(L); + lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, + L->base+1, CCF_ARG(2)); + len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); + } else +#endif + { + GCstr *str = lj_lib_checkstrx(L, 2); + p = strdata(str); + len = str->len; + } + lj_bufx_free(L, sbx); + lj_bufx_set_cow(L, sbx, p, len); + ref = gcV(L->base+1); + setgcref(sbx->cowref, ref); + lj_gc_objbarrier(L, buffer_toudata(sbx), ref); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_put) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + ptrdiff_t arg, narg = L->top - L->base; + for (arg = 1; arg < narg; arg++) { + cTValue *o = &L->base[arg], *mo = NULL; + retry: + if (tvisstr(o)) { + lj_buf_putstr((SBuf *)sbx, strV(o)); + } else if (tvisint(o)) { + lj_strfmt_putint((SBuf *)sbx, intV(o)); + } else if (tvisnum(o)) { + lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o)); + } else if (tvisbuf(o)) { + SBufExt *sbx2 = bufV(o); + if (sbx2 == sbx) lj_err_arg(L, (int)(arg+1), LJ_ERR_BUFFER_SELF); + lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2)); + } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { + /* Call __tostring metamethod inline. */ + copyTV(L, L->top++, mo); + copyTV(L, L->top++, o); + lua_call(L, 1, 1); + o = &L->base[arg]; /* The stack may have been reallocated. */ + copyTV(L, &L->base[arg], L->top-1); + L->top = L->base + narg; + goto retry; /* Retry with the result. */ + } else { + lj_err_argtype(L, (int)(arg+1), "string/number/__tostring"); + } + /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */ + } + L->top = L->base+1; /* Chain buffer object. */ + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method_putf) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2); + L->top = L->base+1; /* Chain buffer object. */ + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method_get) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + ptrdiff_t arg, narg = L->top - L->base; + if (narg == 1) { + narg++; + setnilV(L->top++); /* get() is the same as get(nil). */ + } + for (arg = 1; arg < narg; arg++) { + TValue *o = &L->base[arg]; + MSize n = tvisnil(o) ? LJ_MAX_BUF : + (MSize) lj_lib_checkintrange(L, (int)(arg+1), 0, LJ_MAX_BUF); + MSize len = sbufxlen(sbx); + if (n > len) n = len; + setstrV(L, o, lj_str_new(L, sbx->r, n)); + sbx->r += n; + } + if (sbx->r == sbx->w && !sbufiscow(sbx)) sbx->r = sbx->w = sbx->b; + lj_gc_check(L); + return (int)(narg-1); +} + +#if LJ_HASFFI +LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + const char *p; + MSize len; + if (tviscdata(L->base+1)) { + CTState *cts = ctype_cts(L); + lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, + L->base+1, CCF_ARG(2)); + } else { + lj_err_argtype(L, 2, "cdata"); + } + len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); + lj_buf_putmem((SBuf *)sbx, p, len); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_reserve) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); + GCcdata *cd; + lj_buf_more((SBuf *)sbx, sz); + ctype_loadffi(L); + cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); + *(void **)cdataptr(cd) = sbx->w; + setcdataV(L, L->top++, cd); + setintV(L->top++, sbufleft(sbx)); + return 2; +} + +LJLIB_CF(buffer_method_commit) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); + if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG); + sbx->w += len; + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_ref) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + GCcdata *cd; + ctype_loadffi(L); + cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); + *(void **)cdataptr(cd) = sbx->r; + setcdataV(L, L->top++, cd); + setintV(L->top++, sbufxlen(sbx)); + return 2; +} +#endif + +LJLIB_CF(buffer_method_encode) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + cTValue *o = lj_lib_checkany(L, 2); + lj_serialize_put(sbx, o); + lj_gc_check(L); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_decode) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + setnilV(L->top++); + sbx->r = lj_serialize_get(sbx, L->top-1); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method___gc) +{ + SBufExt *sbx = buffer_tobuf(L); + lj_bufx_free(L, sbx); + return 0; +} + +LJLIB_CF(buffer_method___tostring) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx))); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method___len) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + setintV(L->top-1, (int32_t)sbufxlen(sbx)); + return 1; +} + +LJLIB_PUSH("buffer") LJLIB_SET(__metatable) +LJLIB_PUSH(top-1) LJLIB_SET(__index) + +/* -- Buffer library functions -------------------------------------------- */ + +#define LJLIB_MODULE_buffer + +LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ + +LJLIB_CF(buffer_new) +{ + MSize sz = 0; + int targ = 1; + GCtab *env, *dict_str = NULL, *dict_mt = NULL; + GCudata *ud; + SBufExt *sbx; + if (L->base < L->top && !tvistab(L->base)) { + targ = 2; + if (!tvisnil(L->base)) + sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF); + } + if (L->base+targ-1 < L->top) { + GCtab *options = lj_lib_checktab(L, targ); + cTValue *opt_dict, *opt_mt; + opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict")); + if (opt_dict && tvistab(opt_dict)) { + dict_str = tabV(opt_dict); + lj_serialize_dict_prep_str(L, dict_str); + } + opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable")); + if (opt_mt && tvistab(opt_mt)) { + dict_mt = tabV(opt_mt); + lj_serialize_dict_prep_mt(L, dict_mt); + } + } + env = tabref(curr_func(L)->c.env); + ud = lj_udata_new(L, sizeof(SBufExt), env); + ud->udtype = UDTYPE_BUFFER; + /* NOBARRIER: The GCudata is new (marked white). */ + setgcref(ud->metatable, obj2gco(env)); + setudataV(L, L->top++, ud); + sbx = (SBufExt *)uddata(ud); + lj_bufx_init(L, sbx); + setgcref(sbx->dict_str, obj2gco(dict_str)); + setgcref(sbx->dict_mt, obj2gco(dict_mt)); + if (sz > 0) lj_buf_need2((SBuf *)sbx, sz); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_encode) LJLIB_REC(.) +{ + cTValue *o = lj_lib_checkany(L, 1); + setstrV(L, L->top++, lj_serialize_encode(L, o)); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_decode) LJLIB_REC(.) +{ + GCstr *str = lj_lib_checkstrx(L, 1); + setnilV(L->top++); + lj_serialize_decode(L, L->top-1, str); + lj_gc_check(L); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +int luaopen_string_buffer(lua_State *L) +{ + LJ_LIB_REG(L, NULL, buffer_method); + lua_getfield(L, -1, "__tostring"); + lua_setfield(L, -2, "tostring"); + LJ_LIB_REG(L, NULL, buffer); + return 1; +} + +#endif diff --git a/source/libs/luajit/LuaJIT-src/src/lib_debug.c b/source/libs/luajit/LuaJIT-src/src/lib_debug.c index f112b5bc87051408110412932cb28abc1d9a0e52..5d2c3e8efbed9f7099eb8405e562ce8e9c54dcf8 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_debug.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_debug.c @@ -1,6 +1,6 @@ /* ** Debug library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid) int32_t n = lj_lib_checkint(L, 2) - 1; if ((uint32_t)n >= fn->l.nupvalues) lj_err_arg(L, 2, LJ_ERR_IDXRNG); - setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : - (void *)&fn->c.upvalue[n]); + lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : + (void *)&fn->c.upvalue[n]); return 1; } @@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue) /* ------------------------------------------------------------------------ */ -#define KEY_HOOK ((void *)0x3004) +#define KEY_HOOK (U64x(80000000,00000000)|'h') static void hookf(lua_State *L, lua_Debug *ar) { static const char *const hooknames[] = {"call", "return", "line", "count", "tail return"}; - lua_pushlightuserdata(L, KEY_HOOK); + (L->top++)->u64 = KEY_HOOK; lua_rawget(L, LUA_REGISTRYINDEX); if (lua_isfunction(L, -1)) { lua_pushstring(L, hooknames[(int)ar->event]); @@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook) count = luaL_optint(L, arg+3, 0); func = hookf; mask = makemask(smask, count); } - lua_pushlightuserdata(L, KEY_HOOK); + (L->top++)->u64 = KEY_HOOK; lua_pushvalue(L, arg+1); lua_rawset(L, LUA_REGISTRYINDEX); lua_sethook(L, func, mask, count); @@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook) if (hook != NULL && hook != hookf) { /* external hook? */ lua_pushliteral(L, "external hook"); } else { - lua_pushlightuserdata(L, KEY_HOOK); + (L->top++)->u64 = KEY_HOOK; lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ } lua_pushstring(L, unmakemask(mask, buff)); @@ -369,7 +369,8 @@ LJLIB_CF(debug_debug) return 0; if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") || lua_pcall(L, 0, 0, 0)) { - fputs(lua_tostring(L, -1), stderr); + const char *s = lua_tostring(L, -1); + fputs(s ? s : "(error object is not a string)", stderr); fputs("\n", stderr); } lua_settop(L, 0); /* remove eventual returns */ diff --git a/source/libs/luajit/LuaJIT-src/src/lib_ffi.c b/source/libs/luajit/LuaJIT-src/src/lib_ffi.c index 136e98e8963ef198f2d8a4cd08b325794570feaf..a83e13a0008e1fb6cbe0493feac2062a4ec30c6e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_ffi.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_ffi.c @@ -1,6 +1,6 @@ /* ** FFI library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lib_ffi_c @@ -305,7 +305,7 @@ LJLIB_CF(ffi_meta___tostring) p = *(void **)p; } else if (ctype_isenum(ct->info)) { msg = "cdata<%s>: %d"; - p = (void *)(uintptr_t)*(uint32_t **)p; + p = (void *)(uintptr_t)*(uint32_t *)p; } else { if (ctype_isptr(ct->info)) { p = cdata_getptr(p, ct->size); @@ -513,7 +513,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.) /* Handle ctype __gc metamethod. Use the fast lookup here. */ cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id); if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) { - GCtab *t = cts->finalizer; + GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]); if (gcref(t->metatable)) { /* Add to finalizer table, if still enabled. */ copyTV(L, lj_tab_set(L, t, o-1), tv); @@ -573,6 +573,7 @@ LJLIB_CF(ffi_typeinfo) setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib); if (gcref(ct->name)) { GCstr *s = gco2str(gcref(ct->name)); + if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s)); setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s); } lj_gc_check(L); @@ -638,7 +639,7 @@ LJLIB_CF(ffi_alignof) LJLIB_REC(ffi_xof FF_ffi_alignof) CTState *cts = ctype_cts(L); CTypeID id = ffi_checkctype(L, cts, NULL); CTSize sz = 0; - CTInfo info = lj_ctype_info(cts, id, &sz); + CTInfo info = lj_ctype_info_raw(cts, id, &sz); setintV(L->top-1, 1 << ctype_align(info)); return 1; } @@ -720,48 +721,51 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.) return 0; } -#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) - /* Test ABI string. */ LJLIB_CF(ffi_abi) LJLIB_REC(.) { GCstr *s = lj_lib_checkstr(L, 1); - int b = 0; - switch (s->hash) { + int b = lj_cparse_case(s, #if LJ_64 - case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ + "\00564bit" #else - case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ + "\00532bit" #endif #if LJ_ARCH_HASFPU - case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ + "\003fpu" #endif #if LJ_ABI_SOFTFP - case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ + "\006softfp" #else - case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ + "\006hardfp" #endif #if LJ_ABI_EABI - case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ + "\004eabi" #endif #if LJ_ABI_WIN - case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ + "\003win" +#endif +#if LJ_ABI_PAUTH + "\005pauth" +#endif +#if LJ_TARGET_UWP + "\003uwp" +#endif +#if LJ_LE + "\002le" +#else + "\002be" #endif - case H_(3af93066,1f001464): b = 1; break; /* le/be */ #if LJ_GC64 - case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */ + "\004gc64" #endif - default: - break; - } + ) >= 0; setboolV(L->top-1, b); setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ return 1; } -#undef H_ - -LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ +LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to miscmap table. */ LJLIB_CF(ffi_metatype) { @@ -769,13 +773,13 @@ LJLIB_CF(ffi_metatype) CTypeID id = ffi_checkctype(L, cts, NULL); GCtab *mt = lj_lib_checktab(L, 2); GCtab *t = cts->miscmap; - CType *ct = ctype_get(cts, id); /* Only allow raw types. */ + CType *ct = ctype_raw(cts, id); TValue *tv; GCcdata *cd; if (!(ctype_isstruct(ct->info) || ctype_iscomplex(ct->info) || ctype_isvector(ct->info))) lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); - tv = lj_tab_setinth(L, t, -(int32_t)id); + tv = lj_tab_setinth(L, t, -(int32_t)ctype_typeid(cts, ct)); if (!tvisnil(tv)) lj_err_caller(L, LJ_ERR_PROTMT); settabV(L, tv, mt); @@ -787,8 +791,6 @@ LJLIB_CF(ffi_metatype) return 1; } -LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to finalizer table. */ - LJLIB_CF(ffi_gc) LJLIB_REC(.) { GCcdata *cd = ffi_checkcdata(L, 1); @@ -821,19 +823,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(arch) /* ------------------------------------------------------------------------ */ -/* Create special weak-keyed finalizer table. */ -static GCtab *ffi_finalizer(lua_State *L) -{ - /* NOBARRIER: The table is new (marked white). */ - GCtab *t = lj_tab_new(L, 0, 1); - settabV(L, L->top++, t); - setgcref(t->metatable, obj2gco(t)); - setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")), - lj_str_newlit(L, "k")); - t->nomm = (uint8_t)(~(1u<<MM_mode)); - return t; -} - /* Register FFI module as loaded. */ static void ffi_register_module(lua_State *L) { @@ -849,7 +838,6 @@ LUALIB_API int luaopen_ffi(lua_State *L) { CTState *cts = lj_ctype_init(L); settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1))); - cts->finalizer = ffi_finalizer(L); LJ_LIB_REG(L, NULL, ffi_meta); /* NOBARRIER: basemt is a GC root. */ setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1))); diff --git a/source/libs/luajit/LuaJIT-src/src/lib_init.c b/source/libs/luajit/LuaJIT-src/src/lib_init.c index 405471dac0e3c322c150f41b40af1ffcb9342db1..5f1c312f12efe4b0fe80eea3a6fb4b04113c2ce6 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_init.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_init.c @@ -1,6 +1,6 @@ /* ** Library initialization. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major parts taken verbatim from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h diff --git a/source/libs/luajit/LuaJIT-src/src/lib_io.c b/source/libs/luajit/LuaJIT-src/src/lib_io.c index 9763ed466fe451d0abed8fc1688cb3ef697fe99f..4e0397d4e562b3d499582d4e59f4c021ed10f1d3 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_io.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_io.c @@ -1,6 +1,6 @@ /* ** I/O library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -60,12 +60,12 @@ static IOFileUD *io_tofile(lua_State *L) return iof; } -static FILE *io_stdfile(lua_State *L, ptrdiff_t id) +static IOFileUD *io_stdfile(lua_State *L, ptrdiff_t id) { IOFileUD *iof = IOSTDF_IOF(L, id); if (iof->fp == NULL) lj_err_caller(L, LJ_ERR_IOSTDCL); - return iof->fp; + return iof; } static IOFileUD *io_file_new(lua_State *L) @@ -99,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) int stat = -1; #if LJ_TARGET_POSIX stat = pclose(iof->fp); -#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE +#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP stat = _pclose(iof->fp); -#else - lua_assert(0); - return 0; #endif #if LJ_52 iof->fp = NULL; @@ -112,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) ok = (stat != -1); #endif } else { - lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); + lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF, + "close of unknown FILE* type"); setnilV(L->top++); lua_pushliteral(L, "cannot close standard file"); return 2; @@ -180,7 +178,7 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m) MSize n = (MSize)fread(buf, 1, m, fp); setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); lj_gc_check(L); - return (n > 0 || m == 0); + return n > 0; } else { int c = getc(fp); ungetc(c, fp); @@ -189,8 +187,9 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m) } } -static int io_file_read(lua_State *L, FILE *fp, int start) +static int io_file_read(lua_State *L, IOFileUD *iof, int start) { + FILE *fp = iof->fp; int ok, n, nargs = (int)(L->top - L->base) - start; clearerr(fp); if (nargs == 0) { @@ -226,8 +225,9 @@ static int io_file_read(lua_State *L, FILE *fp, int start) return n - start; } -static int io_file_write(lua_State *L, FILE *fp, int start) +static int io_file_write(lua_State *L, IOFileUD *iof, int start) { + FILE *fp = iof->fp; cTValue *tv; int status = 1; for (tv = L->base+start; tv < L->top; tv++) { @@ -255,13 +255,11 @@ static int io_file_iter(lua_State *L) lj_err_caller(L, LJ_ERR_IOCLFL); L->top = L->base; if (n) { /* Copy upvalues with options to stack. */ - if (n > LUAI_MAXCSTACK) - lj_err_caller(L, LJ_ERR_STKOV); lj_state_checkstack(L, (MSize)n); memcpy(L->top, &fn->c.upvalue[1], n*sizeof(TValue)); L->top += n; } - n = io_file_read(L, iof->fp, 0); + n = io_file_read(L, iof, 0); if (ferror(iof->fp)) lj_err_callermsg(L, strVdata(L->top-2)); if (tvisnil(L->base) && (iof->type & IOFILE_FLAG_CLOSE)) { @@ -286,19 +284,25 @@ static int io_file_lines(lua_State *L) LJLIB_CF(io_method_close) { - IOFileUD *iof = L->base < L->top ? io_tofile(L) : - IOSTDF_IOF(L, GCROOT_IO_OUTPUT); + IOFileUD *iof; + if (L->base < L->top) { + iof = io_tofile(L); + } else { + iof = IOSTDF_IOF(L, GCROOT_IO_OUTPUT); + if (iof->fp == NULL) + lj_err_caller(L, LJ_ERR_IOCLFL); + } return io_file_close(L, iof); } LJLIB_CF(io_method_read) { - return io_file_read(L, io_tofile(L)->fp, 1); + return io_file_read(L, io_tofile(L), 1); } LJLIB_CF(io_method_write) LJLIB_REC(io_write 0) { - return io_file_write(L, io_tofile(L)->fp, 1); + return io_file_write(L, io_tofile(L), 1); } LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) @@ -306,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); } +#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24 +/* The Android NDK is such an unmatched marvel of engineering. */ +extern int fseeko32(FILE *, long int, int) __asm__("fseeko"); +extern long int ftello32(FILE *) __asm__("ftello"); +#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence))) +#define ftello(fp) (ftello32((fp))) +#endif + LJLIB_CF(io_method_seek) { FILE *fp = io_tofile(L)->fp; @@ -406,7 +418,7 @@ LJLIB_CF(io_open) LJLIB_CF(io_popen) { -#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE) +#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP) const char *fname = strdata(lj_lib_checkstr(L, 1)); GCstr *s = lj_lib_optstr(L, 2); const char *mode = s ? strdata(s) : "r"; @@ -427,7 +439,7 @@ LJLIB_CF(io_popen) LJLIB_CF(io_tmpfile) { IOFileUD *iof = io_file_new(L); -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA +#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX iof->fp = NULL; errno = ENOSYS; #else iof->fp = tmpfile(); @@ -452,7 +464,7 @@ LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT) LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT) { - return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL); + return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)->fp) == 0, NULL); } static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode) diff --git a/source/libs/luajit/LuaJIT-src/src/lib_jit.c b/source/libs/luajit/LuaJIT-src/src/lib_jit.c index 22ca0a1a249173e149ed2eb34ccb20ff0dac0ce1..fd8e585b83683b832168364233df9012ea846d37 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_jit.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_jit.c @@ -1,6 +1,6 @@ /* ** JIT library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lib_jit_c @@ -104,8 +104,8 @@ LJLIB_CF(jit_status) jit_State *J = L2J(L); L->top = L->base; setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); - flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); - flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); + flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); + flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); return (int)(L->top - L->base); #else setboolV(L->top++, 0); @@ -113,6 +113,13 @@ LJLIB_CF(jit_status) #endif } +LJLIB_CF(jit_security) +{ + int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING); + setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3)); + return 1; +} + LJLIB_CF(jit_attach) { #ifdef LUAJIT_DISABLE_VMEVENT @@ -154,24 +161,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(version) /* -- Reflection API for Lua functions ------------------------------------ */ -/* Return prototype of first argument (Lua function or prototype object) */ -static GCproto *check_Lproto(lua_State *L, int nolua) -{ - TValue *o = L->base; - if (L->top > o) { - if (tvisproto(o)) { - return protoV(o); - } else if (tvisfunc(o)) { - if (isluafunc(funcV(o))) - return funcproto(funcV(o)); - else if (nolua) - return NULL; - } - } - lj_err_argt(L, 1, LUA_TFUNCTION); - return NULL; /* unreachable */ -} - static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) { setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); @@ -180,7 +169,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) /* local info = jit.util.funcinfo(func [,pc]) */ LJLIB_CF(jit_util_funcinfo) { - GCproto *pt = check_Lproto(L, 1); + GCproto *pt = lj_lib_checkLproto(L, 1, 1); if (pt) { BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); GCtab *t; @@ -222,12 +211,12 @@ LJLIB_CF(jit_util_funcinfo) /* local ins, m = jit.util.funcbc(func, pc) */ LJLIB_CF(jit_util_funcbc) { - GCproto *pt = check_Lproto(L, 0); + GCproto *pt = lj_lib_checkLproto(L, 1, 0); BCPos pc = (BCPos)lj_lib_checkint(L, 2); if (pc < pt->sizebc) { BCIns ins = proto_bc(pt)[pc]; BCOp op = bc_op(ins); - lua_assert(op < BC__MAX); + lj_assertL(op < BC__MAX, "bad bytecode op %d", op); setintV(L->top, ins); setintV(L->top+1, lj_bc_mode[op]); L->top += 2; @@ -239,7 +228,7 @@ LJLIB_CF(jit_util_funcbc) /* local k = jit.util.funck(func, idx) */ LJLIB_CF(jit_util_funck) { - GCproto *pt = check_Lproto(L, 0); + GCproto *pt = lj_lib_checkLproto(L, 1, 0); ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2); if (idx >= 0) { if (idx < (ptrdiff_t)pt->sizekn) { @@ -259,7 +248,7 @@ LJLIB_CF(jit_util_funck) /* local name = jit.util.funcuvname(func, idx) */ LJLIB_CF(jit_util_funcuvname) { - GCproto *pt = check_Lproto(L, 0); + GCproto *pt = lj_lib_checkLproto(L, 1, 0); uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); if (idx < pt->sizeuv) { setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx))); @@ -339,11 +328,7 @@ LJLIB_CF(jit_util_tracek) ir = &T->ir[ir->op1]; } #if LJ_HASFFI - if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) { - ptrdiff_t oldtop = savestack(L, L->top); - luaopen_ffi(L); /* Load FFI library on-demand. */ - L->top = restorestack(L, oldtop); - } + if (ir->o == IR_KINT64) ctype_loadffi(L); #endif lj_ir_kvalue(L, L->top-2, ir); setintV(L->top-1, (int32_t)irt_type(ir->t)); @@ -419,7 +404,8 @@ LJLIB_CF(jit_util_ircalladdr) { uint32_t idx = (uint32_t)lj_lib_checkint(L, 1); if (idx < IRCALL__MAX) { - setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func); + ASMFunction func = lj_ir_callinfo[idx].func; + setintptrV(L->top-1, (intptr_t)(void *)lj_ptr_strip(func)); return 1; } return 0; @@ -471,7 +457,7 @@ static int jitopt_flag(jit_State *J, const char *str) str += str[2] == '-' ? 3 : 2; set = 0; } - for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { + for (opt = JIT_F_OPT; ; opt <<= 1) { size_t len = *(const uint8_t *)lst; if (len == 0) break; @@ -491,7 +477,7 @@ static int jitopt_param(jit_State *J, const char *str) int i; for (i = 0; i < JIT_P__MAX; i++) { size_t len = *(const uint8_t *)lst; - lua_assert(len != 0); + lj_assertJ(len != 0, "bad JIT_P_STRING"); if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { int32_t n = 0; const char *p = &str[len+1]; @@ -540,15 +526,15 @@ LJLIB_CF(jit_opt_start) /* Not loaded by default, use: local profile = require("jit.profile") */ -static const char KEY_PROFILE_THREAD = 't'; -static const char KEY_PROFILE_FUNC = 'f'; +#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t') +#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f') static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, int vmstate) { TValue key; cTValue *tv; - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); + key.u64 = KEY_PROFILE_FUNC; tv = lj_tab_get(L, tabV(registry(L)), &key); if (tvisfunc(tv)) { char vmst = (char)vmstate; @@ -575,9 +561,9 @@ LJLIB_CF(jit_profile_start) lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ TValue key; /* Anchor thread and function in registry. */ - setlightudV(&key, (void *)&KEY_PROFILE_THREAD); + key.u64 = KEY_PROFILE_THREAD; setthreadV(L, lj_tab_set(L, registry, &key), L2); - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); + key.u64 = KEY_PROFILE_FUNC; setfuncV(L, lj_tab_set(L, registry, &key), func); lj_gc_anybarriert(L, registry); luaJIT_profile_start(L, mode ? strdata(mode) : "", @@ -592,9 +578,9 @@ LJLIB_CF(jit_profile_stop) TValue key; luaJIT_profile_stop(L); registry = tabV(registry(L)); - setlightudV(&key, (void *)&KEY_PROFILE_THREAD); + key.u64 = KEY_PROFILE_THREAD; setnilV(lj_tab_set(L, registry, &key)); - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); + key.u64 = KEY_PROFILE_FUNC; setnilV(lj_tab_set(L, registry, &key)); lj_gc_anybarriert(L, registry); return 0; @@ -640,59 +626,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) #undef JIT_PARAMINIT 0 }; -#endif #if LJ_TARGET_ARM && LJ_TARGET_LINUX #include <sys/utsname.h> #endif -/* Arch-dependent CPU detection. */ -static uint32_t jit_cpudetect(lua_State *L) +/* Arch-dependent CPU feature detection. */ +static uint32_t jit_cpudetect(void) { uint32_t flags = 0; #if LJ_TARGET_X86ORX64 + uint32_t vendor[4]; uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { -#if !LJ_HASJIT -#define JIT_F_SSE2 2 -#endif - flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; -#if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; - if (vendor[2] == 0x6c65746e) { /* Intel. */ - if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ - flags |= JIT_F_LEA_AGU; - } else if (vendor[2] == 0x444d4163) { /* AMD. */ - uint32_t fam = (features[0] & 0x0ff00f00); - if (fam >= 0x00000f00) /* K8, K10. */ - flags |= JIT_F_PREFER_IMUL; - } if (vendor[0] >= 7) { uint32_t xfeatures[4]; lj_vm_cpuid(7, xfeatures); flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; } -#endif } - /* Check for required instruction set support on x86 (unnecessary on x64). */ -#if LJ_TARGET_X86 - if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU with SSE2 required"); -#endif + /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ + #elif LJ_TARGET_ARM -#if LJ_HASJIT + int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ #if LJ_TARGET_LINUX if (ver < 70) { /* Runtime ARM CPU detection. */ struct utsname ut; uname(&ut); if (strncmp(ut.machine, "armv", 4) == 0) { - if (ut.machine[4] >= '7') - ver = 70; - else if (ut.machine[4] == '6') - ver = 60; + if (ut.machine[4] >= '8') ver = 80; + else if (ut.machine[4] == '7') ver = 70; + else if (ut.machine[4] == '6') ver = 60; } } #endif @@ -700,20 +668,22 @@ static uint32_t jit_cpudetect(lua_State *L) ver >= 61 ? JIT_F_ARMV6T2_ : ver >= 60 ? JIT_F_ARMV6_ : 0; flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; -#endif + #elif LJ_TARGET_ARM64 + /* No optional CPU features to detect (for now). */ + #elif LJ_TARGET_PPC -#if LJ_HASJIT + #if LJ_ARCH_SQRT flags |= JIT_F_SQRT; #endif #if LJ_ARCH_ROUND flags |= JIT_F_ROUND; #endif -#endif + #elif LJ_TARGET_MIPS -#if LJ_HASJIT + /* Compile-time MIPS CPU detection. */ #if LJ_ARCH_VERSION >= 20 flags |= JIT_F_MIPSXXR2; @@ -731,34 +701,31 @@ static uint32_t jit_cpudetect(lua_State *L) if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif -#endif + #else #error "Missing CPU detection for this architecture" #endif - UNUSED(L); return flags; } /* Initialize JIT compiler. */ static void jit_init(lua_State *L) { - uint32_t flags = jit_cpudetect(L); -#if LJ_HASJIT jit_State *J = L2J(L); - J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; + J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); -#else - UNUSED(flags); -#endif } +#endif LUALIB_API int luaopen_jit(lua_State *L) { +#if LJ_HASJIT jit_init(L); +#endif lua_pushliteral(L, LJ_OS_NAME); lua_pushliteral(L, LJ_ARCH_NAME); - lua_pushinteger(L, LUAJIT_VERSION_NUM); + lua_pushinteger(L, LUAJIT_VERSION_NUM); /* Deprecated. */ lua_pushliteral(L, LUAJIT_VERSION); LJ_LIB_REG(L, LUA_JITLIBNAME, jit); #if LJ_HASPROFILE diff --git a/source/libs/luajit/LuaJIT-src/src/lib_math.c b/source/libs/luajit/LuaJIT-src/src/lib_math.c index ef9dda2da66316654bd78d8ed4e2861eb0e076b0..7f2b5277d2da8334b0e56ecc2b71b77e55daaa76 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_math.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_math.c @@ -1,6 +1,6 @@ /* ** Math library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include <math.h> @@ -13,8 +13,10 @@ #include "lualib.h" #include "lj_obj.h" +#include "lj_err.h" #include "lj_lib.h" #include "lj_vm.h" +#include "lj_prng.h" /* ------------------------------------------------------------------------ */ @@ -33,19 +35,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT) lj_lib_checknum(L, 1); return FFH_RETRY; } -LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) -LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) -LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) -LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) -LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) -LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) -LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) -LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) -LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) -LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) -LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) +LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10) +LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp) +LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin) +LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos) +LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan) +LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin) +LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos) +LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan) +LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh) +LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh) +LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh) LJLIB_ASM_(math_frexp) -LJLIB_ASM_(math_modf) LJLIB_REC(.) +LJLIB_ASM_(math_modf) LJLIB_ASM(math_log) LJLIB_REC(math_log) { @@ -105,34 +107,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge) ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. */ -/* PRNG state. */ -struct RandomState { - uint64_t gen[4]; /* State of the 4 LFSR generators. */ - int valid; /* State is valid. */ -}; - /* Union needed for bit-pattern conversion between uint64_t and double. */ typedef union { uint64_t u64; double d; } U64double; -/* Update generator i and compute a running xor of all states. */ -#define TW223_GEN(i, k, q, s) \ - z = rs->gen[i]; \ - z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ - r ^= z; rs->gen[i] = z; - -/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ -LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) -{ - uint64_t z, r = 0; - TW223_GEN(0, 63, 31, 18) - TW223_GEN(1, 58, 19, 28) - TW223_GEN(2, 55, 24, 7) - TW223_GEN(3, 47, 21, 8) - return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); -} - -/* PRNG initialization function. */ -static void random_init(RandomState *rs, double d) +/* PRNG seeding function. */ +static void random_seed(PRNGState *rs, double d) { uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ int i; @@ -141,24 +120,22 @@ static void random_init(RandomState *rs, double d) uint32_t m = 1u << (r&255); r >>= 8; u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; - if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ - rs->gen[i] = u.u64; + if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */ + rs->u[i] = u.u64; } - rs->valid = 1; for (i = 0; i < 10; i++) - lj_math_random_step(rs); + (void)lj_prng_u64(rs); } /* PRNG extract function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ LJLIB_CF(math_random) LJLIB_REC(.) { int n = (int)(L->top - L->base); - RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); U64double u; double d; - if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); - u.u64 = lj_math_random_step(rs); + u.u64 = lj_prng_u64d(rs); d = u.d - 1.0; if (n > 0) { #if LJ_DUALNUM @@ -203,11 +180,14 @@ LJLIB_CF(math_random) LJLIB_REC(.) } /* PRNG seed function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ LJLIB_CF(math_randomseed) { - RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); - random_init(rs, lj_lib_checknum(L, 1)); + PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + if (L->base != L->top) + random_seed(rs, lj_lib_checknum(L, 1)); + else if (!lj_prng_seed_secure(rs)) + lj_err_caller(L, LJ_ERR_PRNGSD); return 0; } @@ -217,9 +197,8 @@ LJLIB_CF(math_randomseed) LUALIB_API int luaopen_math(lua_State *L) { - RandomState *rs; - rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); - rs->valid = 0; /* Use lazy initialization to save some time on startup. */ + PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState)); + lj_prng_seed_fixed(rs); LJ_LIB_REG(L, LUA_MATHLIBNAME, math); return 1; } diff --git a/source/libs/luajit/LuaJIT-src/src/lib_os.c b/source/libs/luajit/LuaJIT-src/src/lib_os.c index 9e78d49ac3c68612927f735fd016c263b127fd07..ae3fc8578396849ad65dd49da986678e5490d89e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_os.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_os.c @@ -1,6 +1,6 @@ /* ** OS library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -76,7 +76,7 @@ LJLIB_CF(os_rename) LJLIB_CF(os_tmpname) { -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA +#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX lj_err_caller(L, LJ_ERR_OSUNIQF); return 0; #else @@ -205,12 +205,12 @@ LJLIB_CF(os_date) setboolfield(L, "isdst", stm->tm_isdst); } else if (*s) { SBuf *sb = &G(L)->tmpbuf; - MSize sz = 0; + MSize sz = 0, retry = 4; const char *q; for (q = s; *q; q++) sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */ setsbufL(sb, L); - for (;;) { + while (retry--) { /* Limit growth for invalid format or empty result. */ char *buf = lj_buf_need(sb, sz); size_t len = strftime(buf, sbufsz(sb), s, stm); if (len) { diff --git a/source/libs/luajit/LuaJIT-src/src/lib_package.c b/source/libs/luajit/LuaJIT-src/src/lib_package.c index 18adf09177f1924cdf869d7bffece3e48b980c54..7c691a178f0e65606228a2f404d04efd68cdbf67 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_package.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_package.c @@ -1,6 +1,6 @@ /* ** Package library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -57,7 +57,7 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) static const char *ll_bcsym(void *lib, const char *sym) { -#if defined(RTLD_DEFAULT) +#if defined(RTLD_DEFAULT) && !defined(NO_RTLD_DEFAULT) if (lib == NULL) lib = RTLD_DEFAULT; #elif LJ_TARGET_OSX || LJ_TARGET_BSD if (lib == NULL) lib = (void *)(intptr_t)-2; @@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym) BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); #endif +#if LJ_TARGET_UWP +void *LJ_WIN_LOADLIBA(const char *path) +{ + DWORD err = GetLastError(); + wchar_t wpath[256]; + HANDLE lib = NULL; + if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) { + lib = LoadPackagedLibrary(wpath, 0); + } + SetLastError(err); + return lib; +} +#endif + #undef setprogdir static void setprogdir(lua_State *L) @@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib) static void *ll_load(lua_State *L, const char *path, int gl) { - HINSTANCE lib = LoadLibraryExA(path, NULL, 0); + HINSTANCE lib = LJ_WIN_LOADLIBA(path); if (lib == NULL) pusherror(L); UNUSED(gl); return lib; @@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) return f; } +#if LJ_TARGET_UWP +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#endif + static const char *ll_bcsym(void *lib, const char *sym) { if (lib) { return (const char *)GetProcAddress((HINSTANCE)lib, sym); } else { +#if LJ_TARGET_UWP + return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym); +#else HINSTANCE h = GetModuleHandleA(NULL); const char *p = (const char *)GetProcAddress(h, sym); if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (const char *)ll_bcsym, &h)) p = (const char *)GetProcAddress(h, sym); return p; +#endif } } @@ -215,7 +237,12 @@ static const char *mksymname(lua_State *L, const char *modname, static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r) { - void **reg = ll_register(L, path); + void **reg; + if (strlen(path) >= 4096) { + lua_pushliteral(L, "path too long"); + return PACKAGE_ERR_LIB; + } + reg = ll_register(L, path); if (*reg == NULL) *reg = ll_load(L, path, (*name == '*')); if (*reg == NULL) { return PACKAGE_ERR_LIB; /* Unable to load library. */ @@ -233,7 +260,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r) const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC)); lua_pop(L, 1); if (bcdata) { - if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0) + if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) return PACKAGE_ERR_LOAD; return 0; } @@ -351,16 +378,6 @@ static int lj_cf_package_loader_lua(lua_State *L) return 1; /* library loaded successfully */ } -static int lj_cf_package_loader_c(lua_State *L) -{ - const char *name = luaL_checkstring(L, 1); - const char *filename = findfile(L, name, "cpath"); - if (filename == NULL) return 1; /* library not found in this path */ - if (ll_loadfunc(L, filename, name, 0) != 0) - loaderror(L, filename); - return 1; /* library loaded successfully */ -} - #define LUA_POF "luaopen_" #define LUA_OFSEP "_" #define POF LUA_POF @@ -383,6 +400,17 @@ int loader_C_luatex (lua_State *L, const char *name, const char *filename) { return 1; /* library loaded successfully */ } + +static int lj_cf_package_loader_c(lua_State *L) +{ + const char *name = luaL_checkstring(L, 1); + const char *filename = findfile(L, name, "cpath"); + if (filename == NULL) return 1; /* library not found in this path */ + if (ll_loadfunc(L, filename, name, 0) != 0) + loaderror(L, filename); + return 1; /* library loaded successfully */ +} + static int lj_cf_package_loader_croot(lua_State *L) { const char *filename; @@ -426,7 +454,7 @@ static int lj_cf_package_loader_preload(lua_State *L) if (lua_isnil(L, -1)) { /* Not found? */ const char *bcname = mksymname(L, name, SYMPREFIX_BC); const char *bcdata = ll_bcsym(NULL, bcname); - if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0) + if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) lua_pushfstring(L, "\n\tno field package.preload['%s']", name); } return 1; @@ -434,7 +462,7 @@ static int lj_cf_package_loader_preload(lua_State *L) /* ------------------------------------------------------------------------ */ -#define sentinel ((void *)0x4004) +#define KEY_SENTINEL (U64x(80000000,00000000)|'s') static int lj_cf_package_require(lua_State *L) { @@ -444,7 +472,7 @@ static int lj_cf_package_require(lua_State *L) lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); lua_getfield(L, 2, name); if (lua_toboolean(L, -1)) { /* is it there? */ - if (lua_touserdata(L, -1) == sentinel) /* check loops */ + if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */ luaL_error(L, "loop or previous error loading module " LUA_QS, name); return 1; /* package is already loaded */ } @@ -467,14 +495,14 @@ static int lj_cf_package_require(lua_State *L) else lua_pop(L, 1); } - lua_pushlightuserdata(L, sentinel); + (L->top++)->u64 = KEY_SENTINEL; lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ lua_pushstring(L, name); /* pass name as argument to module */ lua_call(L, 1, 1); /* run loaded module */ if (!lua_isnil(L, -1)) /* non-nil return? */ lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ lua_getfield(L, 2, name); - if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ + if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */ lua_pushboolean(L, 1); /* use true as result */ lua_pushvalue(L, -1); /* extra copy to be returned */ lua_setfield(L, 2, name); /* _LOADED[name] = true */ diff --git a/source/libs/luajit/LuaJIT-src/src/lib_string.c b/source/libs/luajit/LuaJIT-src/src/lib_string.c index 76b0730aca34b813466526ece19a5cb4575e3fb2..a037c8552ba0d57379e5e1e718bc06d0a58f025c 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_string.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_string.c @@ -1,6 +1,6 @@ /* ** String library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -122,11 +122,25 @@ static int writer_buf(lua_State *L, const void *p, size_t size, void *sb) LJLIB_CF(string_dump) { - GCfunc *fn = lj_lib_checkfunc(L, 1); - int strip = L->base+1 < L->top && tvistruecond(L->base+1); - SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ + GCproto *pt = lj_lib_checkLproto(L, 1, 1); + uint32_t flags = 0; + SBuf *sb; + TValue *o = L->base+1; + if (o < L->top) { + if (tvisstr(o)) { + const char *mode = strVdata(o); + char c; + while ((c = *mode++)) { + if (c == 's') flags |= BCDUMP_F_STRIP; + if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC; + } + } else if (tvistruecond(o)) { + flags |= BCDUMP_F_STRIP; + } + } + sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ L->top = L->base+1; - if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) + if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags)) lj_err_caller(L, LJ_ERR_STRDUMP); setstrV(L, L->top-1, lj_buf_str(L, sb)); lj_gc_check(L); @@ -136,7 +150,7 @@ LJLIB_CF(string_dump) /* ------------------------------------------------------------------------ */ /* macro to `unsign' a character */ -#define uchar(c) ((unsigned char)(c)) +#define uchar(c) ((unsigned char)(c)) #define CAP_UNFINISHED (-1) #define CAP_POSITION (-2) @@ -640,89 +654,14 @@ LJLIB_CF(string_gsub) /* ------------------------------------------------------------------------ */ -/* Emulate tostring() inline. */ -static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) -{ - TValue *o = L->base+arg-1; - cTValue *mo; - lua_assert(o < L->top); /* Caller already checks for existence. */ - if (LJ_LIKELY(tvisstr(o))) - return strV(o); - if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { - copyTV(L, L->top++, mo); - copyTV(L, L->top++, o); - lua_call(L, 1, 1); - copyTV(L, L->base+arg-1, --L->top); - return NULL; /* Buffer may be overwritten, retry. */ - } - return lj_strfmt_obj(L, o); -} - LJLIB_CF(string_format) LJLIB_REC(.) { - int arg, top = (int)(L->top - L->base); - GCstr *fmt; - SBuf *sb; - FormatState fs; - SFormat sf; int retry = 0; -again: - arg = 1; - sb = lj_buf_tmp_(L); - fmt = lj_lib_checkstr(L, arg); - lj_strfmt_init(&fs, strdata(fmt), fmt->len); - while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { - if (sf == STRFMT_LIT) { - lj_buf_putmem(sb, fs.str, fs.len); - } else if (sf == STRFMT_ERR) { - lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); - } else { - if (++arg > top) - luaL_argerror(L, arg, lj_obj_typename[0]); - switch (STRFMT_TYPE(sf)) { - case STRFMT_INT: - if (tvisint(L->base+arg-1)) { - int32_t k = intV(L->base+arg-1); - if (sf == STRFMT_INT) - lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ - else - lj_strfmt_putfxint(sb, sf, k); - } else { - lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); - } - break; - case STRFMT_UINT: - if (tvisint(L->base+arg-1)) - lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1)); - else - lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); - break; - case STRFMT_NUM: - lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); - break; - case STRFMT_STR: { - GCstr *str = string_fmt_tostring(L, arg, retry); - if (str == NULL) - retry = 1; - else if ((sf & STRFMT_T_QUOTED)) - lj_strfmt_putquoted(sb, str); /* No formatting. */ - else - lj_strfmt_putfstr(sb, sf, str); - break; - } - case STRFMT_CHAR: - lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); - break; - case STRFMT_PTR: /* No formatting. */ - lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1)); - break; - default: - lua_assert(0); - break; - } - } - } - if (retry++ == 1) goto again; + SBuf *sb; + do { + sb = lj_buf_tmp_(L); + retry = lj_strfmt_putarg(L, sb, 1, -retry); + } while (retry > 0); setstrV(L, L->top-1, lj_buf_str(L, sb)); lj_gc_check(L); return 1; @@ -743,6 +682,9 @@ LUALIB_API int luaopen_string(lua_State *L) setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); mt->nomm = (uint8_t)(~(1u<<MM_index)); +#if LJ_HASBUFFER + lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1)); +#endif return 1; } diff --git a/source/libs/luajit/LuaJIT-src/src/lib_table.c b/source/libs/luajit/LuaJIT-src/src/lib_table.c index 0450f1f684b5677be353bd3e9b37c507ba8c1246..d159360bad5a186f00106db2db30cc40f87f40c6 100644 --- a/source/libs/luajit/LuaJIT-src/src/lib_table.c +++ b/source/libs/luajit/LuaJIT-src/src/lib_table.c @@ -1,6 +1,6 @@ /* ** Table library. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -159,7 +159,7 @@ LJLIB_CF(table_concat) LJLIB_REC(.) SBuf *sb = lj_buf_tmp_(L); SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e); if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */ - int32_t idx = (int32_t)(intptr_t)sbufP(sb); + int32_t idx = (int32_t)(intptr_t)sb->w; cTValue *o = lj_tab_getint(t, idx); lj_err_callerv(L, LJ_ERR_TABCAT, lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx); diff --git a/source/libs/luajit/LuaJIT-src/src/lj.supp b/source/libs/luajit/LuaJIT-src/src/lj.supp deleted file mode 100644 index 217f7c8904f2d2a83abc006e4b73d3730e7d409b..0000000000000000000000000000000000000000 --- a/source/libs/luajit/LuaJIT-src/src/lj.supp +++ /dev/null @@ -1,41 +0,0 @@ -# Valgrind suppression file for LuaJIT 2.0. -{ - Optimized string compare - Memcheck:Addr4 - fun:lj_str_cmp -} -{ - Optimized string compare - Memcheck:Addr1 - fun:lj_str_cmp -} -{ - Optimized string compare - Memcheck:Addr4 - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Addr1 - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Cond - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Addr4 - fun:str_fastcmp -} -{ - Optimized string compare - Memcheck:Addr1 - fun:str_fastcmp -} -{ - Optimized string compare - Memcheck:Cond - fun:str_fastcmp -} diff --git a/source/libs/luajit/LuaJIT-src/src/lj_alloc.c b/source/libs/luajit/LuaJIT-src/src/lj_alloc.c index e482a834669ddf80e00b077e534a8dba13545446..cb704f7b3f5b18a0c702610fbfdf24c8b6a68acb 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_alloc.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_alloc.c @@ -6,7 +6,7 @@ ** ** This is a version (aka dlmalloc) of malloc/free/realloc written by ** Doug Lea and released to the public domain, as explained at -** http://creativecommons.org/licenses/publicdomain. +** https://creativecommons.org/licenses/publicdomain. ** ** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee) ** @@ -16,8 +16,8 @@ ** If you want to use dlmalloc in another project, you should get ** the original from: ftp://gee.cs.oswego.edu/pub/misc/ ** For thread-safe derivatives, take a look at: -** - ptmalloc: http://www.malloc.de/ -** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/ +** - ptmalloc: https://www.malloc.de/ +** - nedmalloc: https://www.nedprod.com/programs/portable/nedmalloc/ */ #define lj_alloc_c @@ -31,6 +31,7 @@ #include "lj_def.h" #include "lj_arch.h" #include "lj_alloc.h" +#include "lj_prng.h" #ifndef LUAJIT_USE_SYSMALLOC @@ -123,7 +124,7 @@ #if LJ_ALLOC_NTAVM /* Undocumented, but hey, that's what we all love so much about Windows. */ -typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, +typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits, size_t *size, ULONG alloctype, ULONG prot); static PNTAVM ntavm; @@ -140,7 +141,7 @@ static void init_mmap(void) #define INIT_MMAP() init_mmap() /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ -static void *CALL_MMAP(size_t size) +static void *mmap_plain(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -151,7 +152,7 @@ static void *CALL_MMAP(size_t size) } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static void *DIRECT_MMAP(size_t size) +static void *direct_mmap(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -164,26 +165,29 @@ static void *DIRECT_MMAP(size_t size) #else /* Win32 MMAP via VirtualAlloc */ -static void *CALL_MMAP(size_t size) +static void *mmap_plain(size_t size) { DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); SetLastError(olderr); return ptr ? ptr : MFAIL; } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static void *DIRECT_MMAP(size_t size) +static void *direct_mmap(size_t size) { DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, - PAGE_READWRITE); + void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_READWRITE); SetLastError(olderr); return ptr ? ptr : MFAIL; } #endif +#define CALL_MMAP(prng, size) mmap_plain(size) +#define DIRECT_MMAP(prng, size) direct_mmap(size) + /* This function supports releasing coalesed segments */ static int CALL_MUNMAP(void *ptr, size_t size) { @@ -226,36 +230,17 @@ static int CALL_MUNMAP(void *ptr, size_t size) #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) -/* No point in a giant ifdef mess. Just try to open /dev/urandom. -** It doesn't really matter if this fails, since we get some ASLR bits from -** every unsuitable allocation, too. And we prefer linear allocation, anyway. -*/ -#include <fcntl.h> -#include <unistd.h> - -static uintptr_t mmap_probe_seed(void) -{ - uintptr_t val; - int fd = open("/dev/urandom", O_RDONLY); - if (fd != -1) { - int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); - (void)close(fd); - if (ok) return val; - } - return 1; /* Punt. */ -} - -static void *mmap_probe(size_t size) +static void *mmap_probe(PRNGState *rs, size_t size) { /* Hint for next allocation. Doesn't need to be thread-safe. */ static uintptr_t hint_addr = 0; - static uintptr_t hint_prng = 0; int olderr = errno; int retry; for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); uintptr_t addr = (uintptr_t)p; - if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) { + if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER && + ((addr + size) >> LJ_ALLOC_MBITS) == 0) { /* We got a suitable address. Bump the hint address. */ hint_addr = addr + size; errno = olderr; @@ -280,15 +265,8 @@ static void *mmap_probe(size_t size) } } /* Finally, try pseudo-random probing. */ - if (LJ_UNLIKELY(hint_prng == 0)) { - hint_prng = mmap_probe_seed(); - } - /* The unsuitable address we got has some ASLR PRNG bits. */ - hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); - do { /* The PRNG itself is very weak, but see above. */ - hint_prng = hint_prng * 1103515245 + 12345; - hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; - hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); + do { + hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE); } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); } errno = olderr; @@ -299,18 +277,22 @@ static void *mmap_probe(size_t size) #if LJ_ALLOC_MMAP32 -#if defined(__sun__) +#if LJ_TARGET_SOLARIS #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) #else #define LJ_ALLOC_MMAP32_START ((uintptr_t)0) #endif +#if LJ_ALLOC_MMAP_PROBE +static void *mmap_map32(PRNGState *rs, size_t size) +#else static void *mmap_map32(size_t size) +#endif { #if LJ_ALLOC_MMAP_PROBE static int fallback = 0; if (fallback) - return mmap_probe(size); + return mmap_probe(rs, size); #endif { int olderr = errno; @@ -320,7 +302,7 @@ static void *mmap_map32(size_t size) #if LJ_ALLOC_MMAP_PROBE if (ptr == MFAIL) { fallback = 1; - return mmap_probe(size); + return mmap_probe(rs, size); } #endif return ptr; @@ -330,20 +312,25 @@ static void *mmap_map32(size_t size) #endif #if LJ_ALLOC_MMAP32 -#define CALL_MMAP(size) mmap_map32(size) +#if LJ_ALLOC_MMAP_PROBE +#define CALL_MMAP(prng, size) mmap_map32(prng, size) +#else +#define CALL_MMAP(prng, size) mmap_map32(size) +#endif #elif LJ_ALLOC_MMAP_PROBE -#define CALL_MMAP(size) mmap_probe(size) +#define CALL_MMAP(prng, size) mmap_probe(prng, size) #else -static void *CALL_MMAP(size_t size) +static void *mmap_plain(size_t size) { int olderr = errno; void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); errno = olderr; return ptr; } +#define CALL_MMAP(prng, size) mmap_plain(size) #endif -#if ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 +#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 && !LJ_TARGET_PS5 #include <sys/resource.h> @@ -378,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) #define CALL_MREMAP_NOMOVE 0 #define CALL_MREMAP_MAYMOVE 1 -#if LJ_64 && !LJ_GC64 +#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE #else #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE @@ -393,7 +380,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) #endif #ifndef DIRECT_MMAP -#define DIRECT_MMAP(s) CALL_MMAP(s) +#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s) #endif #ifndef CALL_MREMAP @@ -552,6 +539,7 @@ struct malloc_state { mchunkptr smallbins[(NSMALLBINS+1)*2]; tbinptr treebins[NTREEBINS]; msegment seg; + PRNGState *prng; }; typedef struct malloc_state *mstate; @@ -609,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss) noncontiguous segments are added. */ #define TOP_FOOT_SIZE\ - (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) /* ---------------------------- Indexing Bins ---------------------------- */ @@ -834,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss) /* ----------------------- Direct-mmapping chunks ----------------------- */ -static void *direct_alloc(size_t nb) +static void *direct_alloc(mstate m, size_t nb) { size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ - char *mm = (char *)(DIRECT_MMAP(mmsize)); + char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize)); if (mm != CMFAIL) { size_t offset = align_offset(chunk2mem(mm)); size_t psize = mmsize - offset - DIRECT_FOOT_PAD; @@ -850,6 +838,7 @@ static void *direct_alloc(size_t nb) return chunk2mem(p); } } + UNUSED(m); return NULL; } @@ -998,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb) /* Directly map large chunks */ if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { - void *mem = direct_alloc(nb); + void *mem = direct_alloc(m, nb); if (mem != 0) return mem; } @@ -1007,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb) size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; size_t rsize = granularity_align(req); if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ - char *mp = (char *)(CALL_MMAP(rsize)); + char *mp = (char *)(CALL_MMAP(m->prng, rsize)); if (mp != CMFAIL) { tbase = mp; tsize = rsize; @@ -1068,7 +1057,7 @@ static size_t release_unused_segments(mstate m) mchunkptr p = align_as_chunk(base); size_t psize = chunksize(p); /* Can unmap if first chunk holds entire segment and not pinned */ - if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) { + if (!cinuse(p) && (char *)p + psize == (char *)mem2chunk(sp)) { tchunkptr tp = (tchunkptr)p; if (p == m->dv) { m->dv = 0; @@ -1234,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb) /* ----------------------------------------------------------------------- */ -void *lj_alloc_create(void) +void *lj_alloc_create(PRNGState *rs) { size_t tsize = DEFAULT_GRANULARITY; char *tbase; INIT_MMAP(); - tbase = (char *)(CALL_MMAP(tsize)); + UNUSED(rs); + tbase = (char *)(CALL_MMAP(rs, tsize)); if (tbase != CMFAIL) { size_t msize = pad_request(sizeof(struct malloc_state)); mchunkptr mn; @@ -1258,6 +1248,12 @@ void *lj_alloc_create(void) return NULL; } +void lj_alloc_setprng(void *msp, PRNGState *rs) +{ + mstate ms = (mstate)msp; + ms->prng = rs; +} + void lj_alloc_destroy(void *msp) { mstate ms = (mstate)msp; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_alloc.h b/source/libs/luajit/LuaJIT-src/src/lj_alloc.h index f87a7cf3422b02c2b590ba3522205a01097d8d01..669f50b79e1a3a8e536ff159ab0229168e9c35bb 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_alloc.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_alloc.h @@ -9,7 +9,8 @@ #include "lj_def.h" #ifndef LUAJIT_USE_SYSMALLOC -LJ_FUNC void *lj_alloc_create(void); +LJ_FUNC void *lj_alloc_create(PRNGState *rs); +LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs); LJ_FUNC void lj_alloc_destroy(void *msp); LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_api.c b/source/libs/luajit/LuaJIT-src/src/lj_api.c index d17a5754d40d6e1c220be04812143665a8272cf2..e9fc25b438c5b907c8e67b7bcc53bca3ad4b916e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_api.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_api.c @@ -1,6 +1,6 @@ /* ** Public Lua/C API. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -28,8 +28,8 @@ /* -- Common helper functions --------------------------------------------- */ -#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) -#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) +#define lj_checkapi_slot(idx) \ + lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx)) static TValue *index2adr(lua_State *L, int idx) { @@ -37,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx) TValue *o = L->base + (idx - 1); return o < L->top ? o : niltv(L); } else if (idx > LUA_REGISTRYINDEX) { - api_check(L, idx != 0 && -idx <= L->top - L->base); + lj_checkapi(idx != 0 && -idx <= L->top - L->base, + "bad stack slot %d", idx); return L->top + idx; } else if (idx == LUA_GLOBALSINDEX) { TValue *o = &G(L)->tmptv; @@ -47,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx) return registry(L); } else { GCfunc *fn = curr_func(L); - api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); + lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn), + "calling frame is not a C function"); if (idx == LUA_ENVIRONINDEX) { TValue *o = &G(L)->tmptv; settabV(L, o, tabref(fn->c.env)); @@ -59,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx) } } -static TValue *stkindex2adr(lua_State *L, int idx) +static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx) +{ + TValue *o = index2adr(L, idx); + lj_checkapi(o != niltv(L), "invalid stack slot %d", idx); + return o; +} + +static TValue *index2adr_stack(lua_State *L, int idx) { if (idx > 0) { TValue *o = L->base + (idx - 1); + if (o < L->top) { + return o; + } else { + lj_checkapi(0, "invalid stack slot %d", idx); + return niltv(L); + } return o < L->top ? o : niltv(L); } else { - api_check(L, idx != 0 && -idx <= L->top - L->base); + lj_checkapi(idx != 0 && -idx <= L->top - L->base, + "invalid stack slot %d", idx); return L->top + idx; } } @@ -88,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size) if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { return 0; /* Stack overflow. */ } else if (size > 0) { - lj_state_checkstack(L, (MSize)size); + int avail = (int)(mref(L->maxstack, TValue) - L->top); + if (size > avail && + lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) { + L->top--; + return 0; /* Out of memory. */ + } } return 1; } @@ -99,17 +120,17 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg) lj_err_callerv(L, LJ_ERR_STKOVM, msg); } -LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) +LUA_API void lua_xmove(lua_State *L, lua_State *to, int n) { TValue *f, *t; - if (from == to) return; - api_checknelems(from, n); - api_check(from, G(from) == G(to)); + if (L == to) return; + lj_checkapi_slot(n); + lj_checkapi(G(L) == G(to), "move across global states"); lj_state_checkstack(to, (MSize)n); - f = from->top; + f = L->top; t = to->top = to->top + n; while (--n >= 0) copyTV(to, --t, --f); - from->top = f; + L->top = f; } LUA_API const lua_Number *lua_version(lua_State *L) @@ -129,7 +150,7 @@ LUA_API int lua_gettop(lua_State *L) LUA_API void lua_settop(lua_State *L, int idx) { if (idx >= 0) { - api_check(L, idx <= tvref(L->maxstack) - L->base); + lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx); if (L->base + idx > L->top) { if (L->base + idx >= tvref(L->maxstack)) lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); @@ -138,23 +159,21 @@ LUA_API void lua_settop(lua_State *L, int idx) L->top = L->base + idx; } } else { - api_check(L, -(idx+1) <= (L->top - L->base)); + lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx); L->top += idx+1; /* Shrinks top (idx < 0). */ } } LUA_API void lua_remove(lua_State *L, int idx) { - TValue *p = stkindex2adr(L, idx); - api_checkvalidindex(L, p); + TValue *p = index2adr_stack(L, idx); while (++p < L->top) copyTV(L, p-1, p); L->top--; } LUA_API void lua_insert(lua_State *L, int idx) { - TValue *q, *p = stkindex2adr(L, idx); - api_checkvalidindex(L, p); + TValue *q, *p = index2adr_stack(L, idx); for (q = L->top; q > p; q--) copyTV(L, q, q-1); copyTV(L, p, L->top); } @@ -162,19 +181,18 @@ LUA_API void lua_insert(lua_State *L, int idx) static void copy_slot(lua_State *L, TValue *f, int idx) { if (idx == LUA_GLOBALSINDEX) { - api_check(L, tvistab(f)); + lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); /* NOBARRIER: A thread (i.e. L) is never black. */ setgcref(L->env, obj2gco(tabV(f))); } else if (idx == LUA_ENVIRONINDEX) { GCfunc *fn = curr_func(L); if (fn->c.gct != ~LJ_TFUNC) lj_err_msg(L, LJ_ERR_NOENV); - api_check(L, tvistab(f)); + lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); setgcref(fn->c.env, obj2gco(tabV(f))); lj_gc_barrier(L, fn, f); } else { - TValue *o = index2adr(L, idx); - api_checkvalidindex(L, o); + TValue *o = index2adr_check(L, idx); copyTV(L, o, f); if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ lj_gc_barrier(L, curr_func(L), f); @@ -183,7 +201,7 @@ static void copy_slot(lua_State *L, TValue *f, int idx) LUA_API void lua_replace(lua_State *L, int idx) { - api_checknelems(L, 1); + lj_checkapi_slot(1); copy_slot(L, L->top - 1, idx); L->top--; } @@ -219,7 +237,7 @@ LUA_API int lua_type(lua_State *L, int idx) #else int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); #endif - lua_assert(tt != LUA_TNIL || tvisnil(o)); + lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion"); return tt; } } @@ -595,7 +613,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx) if (tvisudata(o)) return uddata(udataV(o)); else if (tvislightud(o)) - return lightudV(o); + return lightudV(G(L), o); else return NULL; } @@ -608,7 +626,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx) LUA_API const void *lua_topointer(lua_State *L, int idx) { - return lj_obj_ptr(index2adr(L, idx)); + return lj_obj_ptr(G(L), index2adr(L, idx)); } /* -- Stack setters (object creation) ------------------------------------- */ @@ -677,14 +695,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n) { GCfunc *fn; lj_gc_check(L); - api_checknelems(L, n); + lj_checkapi_slot(n); fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); fn->c.f = f; L->top -= n; while (n--) copyTV(L, &fn->c.upvalue[n], L->top+n); setfuncV(L, L->top, fn); - lua_assert(iswhite(obj2gco(fn))); + lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white"); incr_top(L); } @@ -696,7 +714,10 @@ LUA_API void lua_pushboolean(lua_State *L, int b) LUA_API void lua_pushlightuserdata(lua_State *L, void *p) { - setlightudV(L->top, checklightudptr(L, p)); +#if LJ_64 + p = lj_lightud_intern(L, p); +#endif + setrawlightudV(L->top, p); incr_top(L); } @@ -754,7 +775,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size) LUA_API void lua_concat(lua_State *L, int n) { - api_checknelems(L, n); + lj_checkapi_slot(n); if (n >= 2) { n--; do { @@ -763,7 +784,7 @@ LUA_API void lua_concat(lua_State *L, int n) L->top -= n; break; } - n -= (int)(L->top - top); + n -= (int)(L->top - (top - 2*LJ_FR2)); L->top = top+2; lj_vm_call(L, top, 1+1); L->top -= 1+LJ_FR2; @@ -780,9 +801,8 @@ LUA_API void lua_concat(lua_State *L, int n) LUA_API void lua_gettable(lua_State *L, int idx) { - cTValue *v, *t = index2adr(L, idx); - api_checkvalidindex(L, t); - v = lj_meta_tget(L, t, L->top-1); + cTValue *t = index2adr_check(L, idx); + cTValue *v = lj_meta_tget(L, t, L->top-1); if (v == NULL) { L->top += 2; lj_vm_call(L, L->top-2, 1+1); @@ -794,9 +814,8 @@ LUA_API void lua_gettable(lua_State *L, int idx) LUA_API void lua_getfield(lua_State *L, int idx, const char *k) { - cTValue *v, *t = index2adr(L, idx); + cTValue *v, *t = index2adr_check(L, idx); TValue key; - api_checkvalidindex(L, t); setstrV(L, &key, lj_str_newz(L, k)); v = lj_meta_tget(L, t, &key); if (v == NULL) { @@ -812,14 +831,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k) LUA_API void lua_rawget(lua_State *L, int idx) { cTValue *t = index2adr(L, idx); - api_check(L, tvistab(t)); + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); } LUA_API void lua_rawgeti(lua_State *L, int idx, int n) { cTValue *v, *t = index2adr(L, idx); - api_check(L, tvistab(t)); + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); v = lj_tab_getint(tabV(t), n); if (v) { copyTV(L, L->top, v); @@ -861,8 +880,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field) LUA_API void lua_getfenv(lua_State *L, int idx) { - cTValue *o = index2adr(L, idx); - api_checkvalidindex(L, o); + cTValue *o = index2adr_check(L, idx); if (tvisfunc(o)) { settabV(L, L->top, tabref(funcV(o)->c.env)); } else if (tvisudata(o)) { @@ -879,12 +897,14 @@ LUA_API int lua_next(lua_State *L, int idx) { cTValue *t = index2adr(L, idx); int more; - api_check(L, tvistab(t)); - more = lj_tab_next(L, tabV(t), L->top-1); - if (more) { + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); + more = lj_tab_next(tabV(t), L->top-1, L->top-1); + if (more > 0) { incr_top(L); /* Return new key and value slot. */ - } else { /* End of traversal. */ + } else if (!more) { /* End of traversal. */ L->top--; /* Remove key slot. */ + } else { + lj_err_msg(L, LJ_ERR_NEXTIDX); } return more; } @@ -892,7 +912,8 @@ LUA_API int lua_next(lua_State *L, int idx) LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n) { TValue *val; - const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val); + GCobj *o; + const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val, &o); if (name) { copyTV(L, L->top, val); incr_top(L); @@ -904,7 +925,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n) { GCfunc *fn = funcV(index2adr(L, idx)); n--; - api_check(L, (uint32_t)n < fn->l.nupvalues); + lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n); return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : (void *)&fn->c.upvalue[n]; } @@ -914,8 +935,10 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) GCfunc *fn1 = funcV(index2adr(L, idx1)); GCfunc *fn2 = funcV(index2adr(L, idx2)); n1--; n2--; - api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues); - api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues); + lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1); + lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2); + lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1); + lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1); setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); } @@ -944,9 +967,8 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) LUA_API void lua_settable(lua_State *L, int idx) { TValue *o; - cTValue *t = index2adr(L, idx); - api_checknelems(L, 2); - api_checkvalidindex(L, t); + cTValue *t = index2adr_check(L, idx); + lj_checkapi_slot(2); o = lj_meta_tset(L, t, L->top-2); if (o) { /* NOBARRIER: lj_meta_tset ensures the table is not black. */ @@ -965,9 +987,8 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k) { TValue *o; TValue key; - cTValue *t = index2adr(L, idx); - api_checknelems(L, 1); - api_checkvalidindex(L, t); + cTValue *t = index2adr_check(L, idx); + lj_checkapi_slot(1); setstrV(L, &key, lj_str_newz(L, k)); o = lj_meta_tset(L, t, &key); if (o) { @@ -986,7 +1007,7 @@ LUA_API void lua_rawset(lua_State *L, int idx) { GCtab *t = tabV(index2adr(L, idx)); TValue *dst, *key; - api_checknelems(L, 2); + lj_checkapi_slot(2); key = L->top-2; dst = lj_tab_set(L, t, key); copyTV(L, dst, key+1); @@ -998,7 +1019,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n) { GCtab *t = tabV(index2adr(L, idx)); TValue *dst, *src; - api_checknelems(L, 1); + lj_checkapi_slot(1); dst = lj_tab_setint(L, t, n); src = L->top-1; copyTV(L, dst, src); @@ -1010,13 +1031,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) { global_State *g; GCtab *mt; - cTValue *o = index2adr(L, idx); - api_checknelems(L, 1); - api_checkvalidindex(L, o); + cTValue *o = index2adr_check(L, idx); + lj_checkapi_slot(1); if (tvisnil(L->top-1)) { mt = NULL; } else { - api_check(L, tvistab(L->top-1)); + lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); mt = tabV(L->top-1); } g = G(L); @@ -1032,6 +1052,7 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) /* Flush cache, since traces specialize to basemt. But not during __gc. */ if (lj_trace_flushall(L)) lj_err_caller(L, LJ_ERR_NOGCMM); + o = index2adr(L, idx); /* Stack may have been reallocated. */ if (tvisbool(o)) { /* NOBARRIER: basemt is a GC root. */ setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt)); @@ -1053,11 +1074,10 @@ LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) LUA_API int lua_setfenv(lua_State *L, int idx) { - cTValue *o = index2adr(L, idx); + cTValue *o = index2adr_check(L, idx); GCtab *t; - api_checknelems(L, 1); - api_checkvalidindex(L, o); - api_check(L, tvistab(L->top-1)); + lj_checkapi_slot(1); + lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); t = tabV(L->top-1); if (tvisfunc(o)) { setgcref(funcV(o)->c.env, obj2gco(t)); @@ -1078,13 +1098,14 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) { cTValue *f = index2adr(L, idx); TValue *val; + GCobj *o; const char *name; - api_checknelems(L, 1); - name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val); + lj_checkapi_slot(1); + name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); if (name) { L->top--; copyTV(L, val, L->top); - lj_gc_barrier(L, funcV(f), L->top); + lj_gc_barrier(L, o, L->top); } return name; } @@ -1106,8 +1127,9 @@ static TValue *api_call_base(lua_State *L, int nargs) LUA_API void lua_call(lua_State *L, int nargs, int nresults) { - api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); - api_checknelems(L, nargs+1); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); + lj_checkapi_slot(nargs+1); lj_vm_call(L, api_call_base(L, nargs), nresults+1); } @@ -1117,13 +1139,13 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) uint8_t oldh = hook_save(g); ptrdiff_t ef; int status; - api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); - api_checknelems(L, nargs+1); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); + lj_checkapi_slot(nargs+1); if (errfunc == 0) { ef = 0; } else { - cTValue *o = stkindex2adr(L, errfunc); - api_checkvalidindex(L, o); + cTValue *o = index2adr_stack(L, errfunc); ef = savestack(L, o); } status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef); @@ -1138,7 +1160,10 @@ static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) fn->c.f = func; setfuncV(L, top++, fn); if (LJ_FR2) setnilV(top++); - setlightudV(top++, checklightudptr(L, ud)); +#if LJ_64 + ud = lj_lightud_intern(L, ud); +#endif + setrawlightudV(top++, ud); cframe_nres(L->cframe) = 1+0; /* Zero results. */ L->top = top; return top-1; /* Now call the newly allocated C function. */ @@ -1149,7 +1174,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) global_State *g = G(L); uint8_t oldh = hook_save(g); int status; - api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); status = lj_vm_cpcall(L, func, ud, cpcall); if (status) hook_restore(g, oldh); return status; @@ -1198,11 +1224,12 @@ LUA_API int lua_yield(lua_State *L, int nresults) setcont(top, lj_cont_hook); if (LJ_FR2) top++; setframe_pc(top, cframe_pc(cf)-1); - if (LJ_FR2) top++; + top++; setframe_gc(top, obj2gco(L), LJ_TTHREAD); + if (LJ_FR2) top++; setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); L->top = L->base = top+1; -#if LJ_TARGET_X64 +#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS lj_err_throw(L, LUA_YIELD); #else L->cframe = NULL; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_arch.h b/source/libs/luajit/LuaJIT-src/src/lj_arch.h index e8ad844ff1ddb73bc62c876e28a460cd34473501..f146f7252987a2b5db2f62a2d026391e5332663f 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_arch.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_arch.h @@ -1,6 +1,6 @@ /* ** Target architecture selection. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ARCH_H @@ -8,6 +8,8 @@ #include "lua.h" +/* -- Target definitions -------------------------------------------------- */ + /* Target endianess. */ #define LUAJIT_LE 0 #define LUAJIT_BE 1 @@ -38,6 +40,14 @@ #define LUAJIT_OS_BSD 4 #define LUAJIT_OS_POSIX 5 +/* Number mode. */ +#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ +#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ +#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ +#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ + +/* -- Target detection ---------------------------------------------------- */ + /* Select native target if no target defined. */ #ifndef LUAJIT_TARGET @@ -47,7 +57,7 @@ #define LUAJIT_TARGET LUAJIT_ARCH_X64 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) #define LUAJIT_TARGET LUAJIT_ARCH_ARM -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(_M_ARM64) #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #define LUAJIT_TARGET LUAJIT_ARCH_PPC @@ -56,7 +66,7 @@ #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #else -#error "No support for this architecture (yet)" +#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif #endif @@ -69,16 +79,23 @@ #elif defined(__linux__) #define LUAJIT_OS LUAJIT_OS_LINUX #elif defined(__MACH__) && defined(__APPLE__) +#include "TargetConditionals.h" #define LUAJIT_OS LUAJIT_OS_OSX #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__DragonFly__)) && !defined(__ORBIS__) + defined(__DragonFly__)) && !defined(__ORBIS__) && !defined(__PROSPERO__) #define LUAJIT_OS LUAJIT_OS_BSD -#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__) +#elif (defined(__sun__) && defined(__svr4__)) +#define LJ_TARGET_SOLARIS 1 +#define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__HAIKU__) #define LUAJIT_OS LUAJIT_OS_POSIX #elif defined(__CYGWIN__) #define LJ_TARGET_CYGWIN 1 #define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__QNX__) +#define LJ_TARGET_QNX 1 +#define LUAJIT_OS LUAJIT_OS_POSIX #else #define LUAJIT_OS LUAJIT_OS_OTHER #endif @@ -103,10 +120,16 @@ #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) -#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64)) +#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX +#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE +#define LJ_TARGET_IOS 1 +#else +#define LJ_TARGET_IOS 0 +#endif + #ifdef __CELLOS_LV2__ #define LJ_TARGET_PS3 1 #define LJ_TARGET_CONSOLE 1 @@ -119,6 +142,13 @@ #define NULL ((void*)0) #endif +#ifdef __PROSPERO__ +#define LJ_TARGET_PS5 1 +#define LJ_TARGET_CONSOLE 1 +#undef NULL +#define NULL ((void*)0) +#endif + #ifdef __psp2__ #define LJ_TARGET_PSVITA 1 #define LJ_TARGET_CONSOLE 1 @@ -135,10 +165,21 @@ #define LJ_TARGET_GC64 1 #endif -#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ -#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ -#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ -#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ +#ifdef __NX__ +#define LJ_TARGET_NX 1 +#define LJ_TARGET_CONSOLE 1 +#undef NULL +#define NULL ((void*)0) +#endif + +#ifdef _UWP +#define LJ_TARGET_UWP 1 +#if LUAJIT_TARGET == LUAJIT_ARCH_X64 +#define LJ_TARGET_GC64 1 +#endif +#endif + +/* -- Arch-specific settings ---------------------------------------------- */ /* Set target architecture properties. */ #if LUAJIT_TARGET == LUAJIT_ARCH_X86 @@ -146,14 +187,10 @@ #define LJ_ARCH_NAME "x86" #define LJ_ARCH_BITS 32 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else -#define LJ_ABI_WIN 0 -#endif #define LJ_TARGET_X86 1 #define LJ_TARGET_X86ORX64 1 #define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 8 #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNALIGNED 1 @@ -164,21 +201,19 @@ #define LJ_ARCH_NAME "x64" #define LJ_ARCH_BITS 64 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else -#define LJ_ABI_WIN 0 -#endif #define LJ_TARGET_X64 1 #define LJ_TARGET_X86ORX64 1 #define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 16 #define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNALIGNED 1 #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL -#ifdef LUAJIT_ENABLE_GC64 +#ifndef LUAJIT_DISABLE_GC64 #define LJ_TARGET_GC64 1 +#elif LJ_TARGET_OSX +#error "macOS requires GC64 -- don't disable it" #endif #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM @@ -195,19 +230,20 @@ #define LJ_ABI_EABI 1 #define LJ_TARGET_ARM 1 #define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 14 #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ #define LJ_TARGET_MASKSHIFT 0 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ +#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ +#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #define LJ_ARCH_VERSION 70 #elif __ARM_ARCH_6T2__ #define LJ_ARCH_VERSION 61 -#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ +#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ #define LJ_ARCH_VERSION 60 #else #define LJ_ARCH_VERSION 50 @@ -223,8 +259,12 @@ #define LJ_ARCH_NAME "arm64" #define LJ_ARCH_ENDIAN LUAJIT_LE #endif +#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__) +#define LJ_ABI_PAUTH 1 +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 30 #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 @@ -254,23 +294,44 @@ #else #define LJ_ARCH_BITS 32 #define LJ_ARCH_NAME "ppc" + +#if !defined(LJ_ARCH_HASFPU) +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) +#define LJ_ARCH_HASFPU 0 +#else +#define LJ_ARCH_HASFPU 1 +#endif +#endif + +#if !defined(LJ_ABI_SOFTFP) +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) +#define LJ_ABI_SOFTFP 1 +#else +#define LJ_ABI_SOFTFP 0 +#endif +#endif +#endif + +#if LJ_ABI_SOFTFP +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#else +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE #endif #define LJ_TARGET_PPC 1 #define LJ_TARGET_EHRETREG 3 +#define LJ_TARGET_EHRAREG 65 #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ #define LJ_TARGET_MASKSHIFT 0 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE #if LJ_TARGET_CONSOLE #define LJ_ARCH_PPC32ON64 1 #define LJ_ARCH_NOFFI 1 #elif LJ_ARCH_BITS == 64 -#define LJ_ARCH_PPC64 1 -#define LJ_TARGET_GC64 1 -#define LJ_ARCH_NOJIT 1 /* NYI */ +#error "No support for PPC64" +#undef LJ_TARGET_PPC #endif #if _ARCH_PWR7 @@ -302,18 +363,38 @@ #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) +#if __mips_isa_rev >= 6 +#define LJ_TARGET_MIPSR6 1 +#define LJ_TARGET_UNALIGNED 1 +#endif #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips32r6el" +#else #define LJ_ARCH_NAME "mipsel" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips64r6el" #else #define LJ_ARCH_NAME "mips64el" #endif +#endif #define LJ_ARCH_ENDIAN LUAJIT_LE #else #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips32r6" +#else #define LJ_ARCH_NAME "mips" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips64r6" #else #define LJ_ARCH_NAME "mips64" #endif +#endif #define LJ_ARCH_ENDIAN LUAJIT_BE #endif @@ -337,22 +418,22 @@ #define LJ_ARCH_BITS 32 #define LJ_TARGET_MIPS32 1 #else -#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU -#define LJ_ARCH_NOJIT 1 /* NYI */ -#endif #define LJ_ARCH_BITS 64 #define LJ_TARGET_MIPS64 1 #define LJ_TARGET_GC64 1 #endif #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 +#define LJ_TARGET_EHRAREG 31 #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_VERSION 60 +#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 #define LJ_ARCH_VERSION 20 #else #define LJ_ARCH_VERSION 10 @@ -362,9 +443,7 @@ #error "No target architecture defined" #endif -#ifndef LJ_PAGESIZE -#define LJ_PAGESIZE 4096 -#endif +/* -- Checks for requirements --------------------------------------------- */ /* Check for minimum required compiler versions. */ #if defined(__GNUC__) @@ -376,7 +455,7 @@ #if __GNUC__ < 4 #error "Need at least GCC 4.0 or newer" #endif -#elif LJ_TARGET_ARM || LJ_TARGET_PPC +#elif LJ_TARGET_ARM || LJ_TARGET_PPC #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) #error "Need at least GCC 4.2 or newer" #endif @@ -391,11 +470,17 @@ #endif #endif #elif !LJ_TARGET_PS3 +#if __clang__ +#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) +#error "Need at least Clang 3.5 or newer" +#endif +#else #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) #error "Need at least GCC 4.3 or newer" #endif #endif #endif +#endif /* Check target-specific constraints. */ #ifndef _BUILDVM_H @@ -406,41 +491,51 @@ #elif LJ_TARGET_ARM #if defined(__ARMEB__) #error "No support for big-endian ARM" +#undef LJ_TARGET_ARM #endif #if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ #error "No support for Cortex-M CPUs" +#undef LJ_TARGET_ARM #endif #if !(__ARM_EABI__ || LJ_TARGET_IOS) #error "Only ARM EABI or iOS 3.0+ ABI is supported" +#undef LJ_TARGET_ARM #endif #elif LJ_TARGET_ARM64 #if defined(_ILP32) #error "No support for ILP32 model on ARM64" +#undef LJ_TARGET_ARM64 #endif #elif LJ_TARGET_PPC -#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -#error "No support for PowerPC CPUs without double-precision FPU" -#endif -#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE +#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) #error "No support for little-endian PPC32" +#undef LJ_TARGET_PPC #endif -#if LJ_ARCH_PPC64 -#error "No support for PowerPC 64 bit mode (yet)" -#endif -#ifdef __NO_FPRS__ -#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" +#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) +#error "No support for PPC/e500, use LuaJIT 2.0" +#undef LJ_TARGET_PPC #endif #elif LJ_TARGET_MIPS32 #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) #error "Only o32 ABI supported for MIPS32" +#undef LJ_TARGET_MIPS +#endif +#if LJ_TARGET_MIPSR6 +/* Not that useful, since most available r6 CPUs are 64 bit. */ +#error "No support for MIPS32R6" +#undef LJ_TARGET_MIPS #endif #elif LJ_TARGET_MIPS64 #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) +/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ #error "Only n64 ABI supported for MIPS64" +#undef LJ_TARGET_MIPS #endif #endif #endif +/* -- Derived defines ----------------------------------------------------- */ + /* Enable or disable the dual-number mode for the VM. */ #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) @@ -490,6 +585,13 @@ #define LJ_HASFFI 1 #endif +/* Disable or enable the string buffer extension. */ +#if defined(LUAJIT_DISABLE_BUFFER) +#define LJ_HASBUFFER 0 +#else +#define LJ_HASBUFFER 1 +#endif + #if defined(LUAJIT_DISABLE_PROFILE) #define LJ_HASPROFILE 0 #elif LJ_TARGET_POSIX @@ -512,6 +614,11 @@ #define LJ_ABI_SOFTFP 0 #endif #define LJ_SOFTFP (!LJ_ARCH_HASFPU) +#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) + +#ifndef LJ_ABI_PAUTH +#define LJ_ABI_PAUTH 0 +#endif #if LJ_ARCH_ENDIAN == LUAJIT_BE #define LJ_LE 0 @@ -537,26 +644,52 @@ #define LJ_TARGET_UNALIGNED 0 #endif +#ifndef LJ_PAGESIZE +#define LJ_PAGESIZE 4096 +#endif + /* Various workarounds for embedded operating systems or weak C runtimes. */ #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS #define LUAJIT_NO_LOG2 #endif -#if defined(__symbian__) || LJ_TARGET_WINDOWS -#define LUAJIT_NO_EXP2 -#endif #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) #define LJ_NO_SYSTEM 1 #endif -#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ -/* NYI: no support for compact unwind specification, yet. */ -#define LUAJIT_NO_UNWIND 1 +#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN +#define LJ_ABI_WIN 1 +#else +#define LJ_ABI_WIN 0 +#endif + +#if LJ_TARGET_WINDOWS +#if LJ_TARGET_UWP +#define LJ_WIN_VALLOC VirtualAllocFromApp +#define LJ_WIN_VPROTECT VirtualProtectFromApp +extern void *LJ_WIN_LOADLIBA(const char *path); +#else +#define LJ_WIN_VALLOC VirtualAlloc +#define LJ_WIN_VPROTECT VirtualProtect +#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0) +#endif #endif -#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 +#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 #define LJ_NO_UNWIND 1 #endif +#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__)))) +#define LJ_UNWIND_EXT 1 +#else +#define LJ_UNWIND_EXT 0 +#endif + +#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86) +#define LJ_UNWIND_JIT 1 +#else +#define LJ_UNWIND_JIT 0 +#endif + /* Compatibility with Lua 5.1 vs. 5.2. */ #ifdef LUAJIT_ENABLE_LUA52COMPAT #define LJ_52 1 @@ -564,4 +697,46 @@ #define LJ_52 0 #endif +/* -- VM security --------------------------------------------------------- */ + +/* Don't make any changes here. Instead build with: +** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value" +** +** Important note to distro maintainers: DO NOT change the defaults for a +** regular distro build -- neither upwards, nor downwards! +** These build-time configurable security flags are intended for embedders +** who may have specific needs wrt. security vs. performance. +*/ + +/* Security defaults. */ +#ifndef LUAJIT_SECURITY_PRNG +/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */ +#define LUAJIT_SECURITY_PRNG 1 +#endif + +#ifndef LUAJIT_SECURITY_STRHASH +/* String hash: 0 = sparse only, 1 = sparse + dense. */ +#define LUAJIT_SECURITY_STRHASH 1 +#endif + +#ifndef LUAJIT_SECURITY_STRID +/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */ +#define LUAJIT_SECURITY_STRID 1 +#endif + +#ifndef LUAJIT_SECURITY_MCODE +/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */ +#define LUAJIT_SECURITY_MCODE 1 +#endif + +#define LJ_SECURITY_MODE \ + ( 0u \ + | ((LUAJIT_SECURITY_PRNG & 3) << 0) \ + | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \ + | ((LUAJIT_SECURITY_STRID & 3) << 4) \ + | ((LUAJIT_SECURITY_MCODE & 3) << 6) \ + ) +#define LJ_SECURITY_MODESTRING \ + "\004prng\007strhash\005strid\005mcode" + #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_asm.c b/source/libs/luajit/LuaJIT-src/src/lj_asm.c index c2cf5a95a5b34a54bb8aef7a9645e09823738251..fec43512517c77e3a83f5f3746cf53f381646399 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_asm.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_asm.c @@ -1,6 +1,6 @@ /* ** IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_asm_c @@ -11,6 +11,7 @@ #if LJ_HASJIT #include "lj_gc.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_frame.h" @@ -22,13 +23,13 @@ #include "lj_ircall.h" #include "lj_iropt.h" #include "lj_mcode.h" -#include "lj_iropt.h" #include "lj_trace.h" #include "lj_snap.h" #include "lj_asm.h" #include "lj_dispatch.h" #include "lj_vm.h" #include "lj_target.h" +#include "lj_prng.h" #ifdef LUA_USE_ASSERT #include <stdio.h> @@ -72,6 +73,8 @@ typedef struct ASMState { IRRef snaprename; /* Rename highwater mark for snapshot check. */ SnapNo snapno; /* Current snapshot number. */ SnapNo loopsnapno; /* Loop snapshot number. */ + int snapalloc; /* Current snapshot needs allocation. */ + BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ IRRef sectref; /* Section base reference (loopref or 0). */ @@ -85,11 +88,18 @@ typedef struct ASMState { MCode *mcbot; /* Bottom of reserved MCode. */ MCode *mctop; /* Top of generated MCode. */ + MCode *mctoporig; /* Original top of generated MCode. */ MCode *mcloop; /* Pointer to loop MCode (or NULL). */ MCode *invmcp; /* Points to invertible loop branch (or NULL). */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *realign; /* Realign loop if not NULL. */ +#ifdef LUAJIT_RANDOM_RA + /* Randomize register allocation. OK for fuzz testing, not for production. */ + uint64_t prngbits; + PRNGState prngstate; +#endif + #ifdef RID_NUM_KREF intptr_t krefk[RID_NUM_KREF]; #endif @@ -97,6 +107,12 @@ typedef struct ASMState { uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ } ASMState; +#ifdef LUA_USE_ASSERT +#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__) +#else +#define lj_assertA(c, ...) ((void)as) +#endif + #define IR(ref) (&as->ir[(ref)]) #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ @@ -128,9 +144,8 @@ static LJ_AINLINE void checkmclim(ASMState *as) #ifdef LUA_USE_ASSERT if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { IRIns *ir = IR(as->curins+1); - fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, - as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); - lua_assert(0); + lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp, + as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); } #endif if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); @@ -165,6 +180,41 @@ IRFLDEF(FLOFS) 0 }; +#ifdef LUAJIT_RANDOM_RA +/* Return a fixed number of random bits from the local PRNG state. */ +static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) { + uint64_t b = as->prngbits; + uint32_t res = (1u << nbits) - 1u; + if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63); + res &= (uint32_t)b; + as->prngbits = b >> nbits; + return res; +} + +/* Pick a random register from a register set. */ +static Reg rset_pickrandom(ASMState *as, RegSet rs) +{ + Reg r = rset_pickbot_(rs); + rs >>= r; + if (rs > 1) { /* More than one bit set? */ + while (1) { + /* We need to sample max. the GPR or FPR half of the set. */ + uint32_t d = ra_random_bits(as, RSET_BITS-1); + if ((rs >> d) & 1) { + r += d; + break; + } + } + } + return r; +} +#define rset_picktop(rs) rset_pickrandom(as, rs) +#define rset_pickbot(rs) rset_pickrandom(as, rs) +#else +#define rset_picktop(rs) rset_picktop_(rs) +#define rset_pickbot(rs) rset_pickbot_(rs) +#endif + /* -- Target-specific instruction emitter --------------------------------- */ #if LJ_TARGET_X86ORX64 @@ -244,7 +294,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; } else { *p++ = '?'; - lua_assert(0); + lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt); } } else if (e[1] == 'f' || e[1] == 'i') { IRRef ref; @@ -262,7 +312,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) } else if (e[1] == 'x') { p += sprintf(p, "%08x", va_arg(argp, int32_t)); } else { - lua_assert(0); + lj_assertA(0, "bad debug format code"); } fmt = e+2; } @@ -321,7 +371,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) Reg r; if (ra_iskref(ref)) { r = ra_krefreg(ref); - lua_assert(!rset_test(as->freeset, r)); + lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r); ra_free(as, r); ra_modified(as, r); #if LJ_64 @@ -333,12 +383,14 @@ static Reg ra_rematk(ASMState *as, IRRef ref) } ir = IR(ref); r = ir->r; - lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref); + lj_assertA(!ra_hasspill(ir->s), + "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s); ra_free(as, r); ra_modified(as, r); ir->r = RID_INIT; /* Do not keep any hint. */ RA_DBGX((as, "remat $i $r", ir, r)); -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 if (ir->o == IR_KNUM) { emit_loadk64(as, r, ir); } else @@ -347,7 +399,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ emit_getgl(as, r, jit_base); } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { - lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ + /* REF_NIL stores ASMREF_L register. */ + lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L"); emit_getgl(as, r, cur_L); #if LJ_64 } else if (ir->o == IR_KINT64) { @@ -360,8 +413,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref) #endif #endif } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || - ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, + "rematk of bad IR op %d", ir->o); emit_loadi(as, r, ir->i); } return r; @@ -371,7 +425,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) static int32_t ra_spill(ASMState *as, IRIns *ir) { int32_t slot = ir->s; - lua_assert(ir >= as->ir + REF_TRUE); + lj_assertA(ir >= as->ir + REF_TRUE, + "spill of K%03d", REF_BIAS - (int)(ir - as->ir)); if (!ra_hasspill(slot)) { if (irt_is64(ir->t)) { slot = as->evenspill; @@ -396,7 +451,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) { IRIns *ir = IR(ref); Reg r = ir->r; - lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1); + lj_assertA(!ra_hasspill(ir->s), + "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s); ra_free(as, r); ra_modified(as, r); ir->r = RID_INIT; @@ -412,7 +469,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) IRIns *ir = IR(ref); int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ Reg r = ir->r; - lua_assert(ra_hasreg(r)); + lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS); ra_sethint(ir->r, r); /* Keep hint. */ ra_free(as, r); if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ @@ -441,14 +498,15 @@ static Reg ra_evict(ASMState *as, RegSet allow) { IRRef ref; RegCost cost = ~(RegCost)0; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "evict from empty set"); if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { GPRDEF(MINCOST) } else { FPRDEF(MINCOST) } ref = regcost_ref(cost); - lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); + lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins), + "evict of out-of-range IR %04d", ref - REF_BIAS); /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ if (!irref_isk(ref) && (as->weakset & allow)) { IRIns *ir = IR(ref); @@ -548,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) IRIns *ir = IR(ref); if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || #if LJ_GC64 +#if LJ_TARGET_ARM64 + (ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) || +#else (ir->o == IR_KINT && k == ir->i) || +#endif (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && k == (intptr_t)ir_kptr(ir)) @@ -606,7 +668,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); RegSet pick = as->freeset & allow; Reg r; - lua_assert(ra_noreg(ir->r)); + lj_assertA(ra_noreg(ir->r), + "IR %04d already has reg %d", ref - REF_BIAS, ir->r); if (pick) { /* First check register hint from propagation or PHI. */ if (ra_hashint(ir->r)) { @@ -670,8 +733,10 @@ static void ra_rename(ASMState *as, Reg down, Reg up) IRIns *ir = IR(ref); ir->r = (uint8_t)up; as->cost[down] = 0; - lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); - lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); + lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR), + "rename between GPR/FPR %d and %d", down, up); + lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down); + lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up); ra_free(as, down); /* 'down' is free ... */ ra_modified(as, down); rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ @@ -679,7 +744,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up) RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ - ra_addrename(as, down, ref, as->snapno); + /* + ** The rename is effective at the subsequent (already emitted) exit + ** branch. This is for the current snapshot (as->snapno). Except if we + ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1), + ** then it belongs to the next snapshot. + ** See also the discussion at asm_snap_checkrename(). + */ + ra_addrename(as, down, ref, as->snapno + as->snapalloc); } } @@ -712,7 +784,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) { Reg dest = ra_dest(as, ir, RID2RSET(r)); if (dest != r) { - lua_assert(rset_test(as->freeset, r)); + lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r); ra_modified(as, r); emit_movrr(as, ir, dest, r); } @@ -745,8 +817,9 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) #endif #endif } else if (ir->o != IR_KPRI) { - lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || - ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, + "K%03d has bad IR op %d", REF_BIAS - lref, ir->o); emit_loadi(as, dest, ir->i); return; } @@ -791,11 +864,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref) } #endif -#if !LJ_64 /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ static void ra_destpair(ASMState *as, IRIns *ir) { Reg destlo = ir->r, desthi = (ir+1)->r; + IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir; /* First spill unrelated refs blocking the destination registers. */ if (!rset_test(as->freeset, RID_RETLO) && destlo != RID_RETLO && desthi != RID_RETLO) @@ -819,29 +892,29 @@ static void ra_destpair(ASMState *as, IRIns *ir) /* Check for conflicts and shuffle the registers as needed. */ if (destlo == RID_RETHI) { if (desthi == RID_RETLO) { -#if LJ_TARGET_X86 +#if LJ_TARGET_X86ORX64 *--as->mcp = XI_XCHGa + RID_RETHI; + if (LJ_64 && irt_is64(irx->t)) *--as->mcp = 0x48; #else - emit_movrr(as, ir, RID_RETHI, RID_TMP); - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - emit_movrr(as, ir, RID_TMP, RID_RETLO); + emit_movrr(as, irx, RID_RETHI, RID_TMP); + emit_movrr(as, irx, RID_RETLO, RID_RETHI); + emit_movrr(as, irx, RID_TMP, RID_RETLO); #endif } else { - emit_movrr(as, ir, RID_RETHI, RID_RETLO); - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); + emit_movrr(as, irx, RID_RETHI, RID_RETLO); + if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); } } else if (desthi == RID_RETLO) { - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); + emit_movrr(as, irx, RID_RETLO, RID_RETHI); + if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); } else { - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); + if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); + if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); } /* Restore spill slots (if any). */ if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); } -#endif /* -- Snapshot handling --------- ----------------------------------------- */ @@ -876,8 +949,11 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs) static void asm_snap_alloc1(ASMState *as, IRRef ref) { IRIns *ir = IR(ref); - if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) { - if (ir->r == RID_SINK) { + if (!irref_isk(ref)) { + bloomset(as->snapfilt1, ref); + bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS)); + if (ra_used(ir)) return; + if (ir->r == RID_SINK || ir->r == RID_SUNK) { ir->r = RID_SUNK; #if LJ_HASFFI if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ @@ -888,11 +964,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) #endif { /* Allocate stored values for TNEW, TDUP and CNEW. */ IRIns *irs; - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); + lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW, + "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o); for (irs = IR(as->snapref-1); irs > ir; irs--) if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { - lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || - irs->o == IR_FSTORE || irs->o == IR_XSTORE); + lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE || + irs->o == IR_FSTORE || irs->o == IR_XSTORE, + "sunk store IR %04d has bad op %d", + (int)(irs - as->ir) - REF_BIAS, irs->o); asm_snap_alloc1(as, irs->op2); if (LJ_32 && (irs+1)->o == IR_HIOP) asm_snap_alloc1(as, (irs+1)->op2); @@ -928,18 +1007,21 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) } /* Allocate refs escaping to a snapshot. */ -static void asm_snap_alloc(ASMState *as) +static void asm_snap_alloc(ASMState *as, int snapno) { - SnapShot *snap = &as->T->snap[as->snapno]; + SnapShot *snap = &as->T->snap[snapno]; SnapEntry *map = &as->T->snapmap[snap->mapofs]; MSize n, nent = snap->nent; + as->snapfilt1 = as->snapfilt2 = 0; for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); if (!irref_isk(ref)) { asm_snap_alloc1(as, ref); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { - lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); + lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP, + "snap %d[%d] points to bad SOFTFP IR %04d", + snapno, n, ref - REF_BIAS); asm_snap_alloc1(as, ref+1); } } @@ -955,35 +1037,26 @@ static void asm_snap_alloc(ASMState *as) */ static int asm_snap_checkrename(ASMState *as, IRRef ren) { - SnapShot *snap = &as->T->snap[as->snapno]; - SnapEntry *map = &as->T->snapmap[snap->mapofs]; - MSize n, nent = snap->nent; - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - IRRef ref = snap_ref(sn); - if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref == ren)) { - IRIns *ir = IR(ref); - ra_spill(as, ir); /* Register renamed, so force a spill slot. */ - RA_DBGX((as, "snaprensp $f $s", ref, ir->s)); - return 1; /* Found. */ - } + if (bloomtest(as->snapfilt1, ren) && + bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) { + IRIns *ir = IR(ren); + ra_spill(as, ir); /* Register renamed, so force a spill slot. */ + RA_DBGX((as, "snaprensp $f $s", ren, ir->s)); + return 1; /* Found. */ } return 0; /* Not found. */ } -/* Prepare snapshot for next guard instruction. */ +/* Prepare snapshot for next guard or throwing instruction. */ static void asm_snap_prep(ASMState *as) { - if (as->curins < as->snapref) { - do { - if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ - as->snapno--; - as->snapref = as->T->snap[as->snapno].ref; - } while (as->curins < as->snapref); - asm_snap_alloc(as); + if (as->snapalloc) { + /* Alloc on first invocation for each snapshot. */ + as->snapalloc = 0; + asm_snap_alloc(as, as->snapno); as->snaprename = as->T->nins; } else { - /* Process any renames above the highwater mark. */ + /* Check any renames above the highwater mark. */ for (; as->snaprename < as->T->nins; as->snaprename++) { IRIns *ir = &as->T->ir[as->snaprename]; if (asm_snap_checkrename(as, ir->op1)) @@ -992,6 +1065,35 @@ static void asm_snap_prep(ASMState *as) } } +/* Move to previous snapshot when we cross the current snapshot ref. */ +static void asm_snap_prev(ASMState *as) +{ + if (as->curins < as->snapref) { + uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp); + if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV); + do { + if (as->snapno == 0) return; + as->snapno--; + as->snapref = as->T->snap[as->snapno].ref; + as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */ + } while (as->curins < as->snapref); /* May have no ins inbetween. */ + as->snapalloc = 1; + } +} + +/* Fixup snapshot mcode offsetst. */ +static void asm_snap_fixup_mcofs(ASMState *as) +{ + uint32_t sz = (uint32_t)(as->mctoporig - as->mcp); + SnapShot *snap = as->T->snap; + SnapNo i; + for (i = as->T->nsnap-1; i > 0; i--) { + /* Compute offset from mcode start and store in correct snapshot. */ + snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs); + } + snap[0].mcofs = 0; +} + /* -- Miscellaneous helpers ----------------------------------------------- */ /* Calculate stack adjustment. */ @@ -1003,21 +1105,26 @@ static int32_t asm_stack_adjust(ASMState *as) } /* Must match with hash*() in lj_tab.c. */ -static uint32_t ir_khash(IRIns *ir) +static uint32_t ir_khash(ASMState *as, IRIns *ir) { uint32_t lo, hi; + UNUSED(as); if (irt_isstr(ir->t)) { - return ir_kstr(ir)->hash; + return ir_kstr(ir)->sid; } else if (irt_isnum(ir->t)) { lo = ir_knum(ir)->u32.lo; hi = ir_knum(ir)->u32.hi << 1; } else if (irt_ispri(ir->t)) { - lua_assert(!irt_isnil(ir->t)); + lj_assertA(!irt_isnil(ir->t), "hash of nil key"); return irt_type(ir->t)-IRT_FALSE; } else { - lua_assert(irt_isgcv(ir->t)); + lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t)); lo = u32ptr(ir_kgc(ir)); +#if LJ_GC64 + hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); +#else hi = lo + HASH_BIAS; +#endif } return hashrot(lo, hi); } @@ -1031,6 +1138,7 @@ static void asm_snew(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; IRRef args[3]; + asm_snap_prep(as); args[0] = ASMREF_L; /* lua_State *L */ args[1] = ir->op1; /* const char *str */ args[2] = ir->op2; /* size_t len */ @@ -1043,6 +1151,7 @@ static void asm_tnew(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; IRRef args[2]; + asm_snap_prep(as); args[0] = ASMREF_L; /* lua_State *L */ args[1] = ASMREF_TMP1; /* uint32_t ahsize */ as->gcsteps++; @@ -1055,6 +1164,7 @@ static void asm_tdup(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; IRRef args[2]; + asm_snap_prep(as); args[0] = ASMREF_L; /* lua_State *L */ args[1] = ir->op1; /* const GCtab *kt */ as->gcsteps++; @@ -1080,28 +1190,43 @@ static void asm_gcstep(ASMState *as, IRIns *ir) /* -- Buffer operations --------------------------------------------------- */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode); +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb); +#endif static void asm_bufhdr(ASMState *as, IRIns *ir) { Reg sb = ra_dest(as, ir, RSET_GPR); - if ((ir->op2 & IRBUFHDR_APPEND)) { + switch (ir->op2) { + case IRBUFHDR_RESET: { + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irbp; + irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */ + emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w)); + emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b)); + break; + } + case IRBUFHDR_APPEND: { /* Rematerialize const buffer pointer instead of likely spill. */ IRIns *irp = IR(ir->op1); if (!(ra_hasreg(irp->r) || irp == ir-1 || (irp == ir-2 && !ra_used(ir-1)))) { - while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) + while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET)) irp = IR(irp->op1); if (irref_isk(irp->op1)) { ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); ir = irp; } } - } else { - Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); - /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ - emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); - emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); + break; + } +#if LJ_HASBUFFER + case IRBUFHDR_WRITE: + asm_bufhdr_write(as, sb); + break; +#endif + default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break; } #if LJ_TARGET_X86ORX64 ra_left(as, sb, ir->op1); @@ -1115,15 +1240,16 @@ static void asm_bufput(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; IRRef args[3]; IRIns *irs; - int kchar = -1; + int kchar = -129; args[0] = ir->op1; /* SBuf * */ args[1] = ir->op2; /* GCstr * */ irs = IR(ir->op2); - lua_assert(irt_isstr(irs->t)); + lj_assertA(irt_isstr(irs->t), + "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS); if (irs->o == IR_KGC) { GCstr *s = ir_kstr(irs); if (s->len == 1) { /* Optimize put of single-char string constant. */ - kchar = strdata(s)[0]; + kchar = (int8_t)strdata(s)[0]; /* Signed! */ args[1] = ASMREF_TMP1; /* int, truncated to char */ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; } @@ -1133,7 +1259,8 @@ static void asm_bufput(ASMState *as, IRIns *ir) args[1] = ASMREF_TMP1; /* TValue * */ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; } else { - lua_assert(irt_isinteger(IR(irs->op1)->t)); + lj_assertA(irt_isinteger(IR(irs->op1)->t), + "TOSTR of non-numeric IR %04d", irs->op1); args[1] = irs->op1; /* int */ if (irs->op2 == IRTOSTR_INT) ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; @@ -1150,8 +1277,8 @@ static void asm_bufput(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); if (args[1] == ASMREF_TMP1) { Reg tmp = ra_releasetmp(as, ASMREF_TMP1); - if (kchar == -1) - asm_tvptr(as, tmp, irs->op1); + if (kchar == -129) + asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1); else ra_allockreg(as, kchar, tmp); } @@ -1173,6 +1300,7 @@ static void asm_tostr(ASMState *as, IRIns *ir) { const CCallInfo *ci; IRRef args[2]; + asm_snap_prep(as); args[0] = ASMREF_L; as->gcsteps++; if (ir->op2 == IRTOSTR_NUM) { @@ -1188,7 +1316,7 @@ static void asm_tostr(ASMState *as, IRIns *ir) asm_setupresult(as, ir, ci); /* GCstr * */ asm_gencall(as, ci, args); if (ir->op2 == IRTOSTR_NUM) - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); + asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1); } #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 @@ -1198,7 +1326,8 @@ static void asm_conv64(ASMState *as, IRIns *ir) IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); IRCallID id; IRRef args[2]; - lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); + lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, + "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); args[LJ_BE] = (ir-1)->op1; args[LJ_LE] = ir->op1; if (st == IRT_NUM || st == IRT_FLOAT) { @@ -1228,12 +1357,19 @@ static void asm_newref(ASMState *as, IRIns *ir) IRRef args[3]; if (ir->r == RID_SINK) return; + asm_snap_prep(as); args[0] = ASMREF_L; /* lua_State *L */ args[1] = ir->op1; /* GCtab *t */ args[2] = ASMREF_TMP1; /* cTValue *key */ asm_setupresult(as, ir, ci); /* TValue * */ asm_gencall(as, ci, args); - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); + asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1); +} + +static void asm_tmpref(ASMState *as, IRIns *ir) +{ + Reg r = ra_dest(as, ir, RSET_GPR); + asm_tvptr(as, r, ir->op1, ir->op2); } static void asm_lref(ASMState *as, IRIns *ir) @@ -1253,15 +1389,16 @@ static void asm_collectargs(ASMState *as, IRIns *ir, const CCallInfo *ci, IRRef *args) { uint32_t n = CCI_XNARGS(ci); - lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ + /* Account for split args. */ + lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n); if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } while (n-- > 1) { ir = IR(ir->op1); - lua_assert(ir->o == IR_CARG); + lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree"); args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; } args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; - lua_assert(IR(ir->op1)->o != IR_CARG); + lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree"); } /* Reconstruct CCallInfo flags for CALLX*. */ @@ -1305,32 +1442,6 @@ static void asm_call(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); } -#if !LJ_SOFTFP -static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; - IRRef args[2]; - args[0] = lref; - args[1] = rref; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - -static int asm_fpjoin_pow(ASMState *as, IRIns *ir) -{ - IRIns *irp = IR(ir->op1); - if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { - IRIns *irpp = IR(irp->op1); - if (irpp == ir-2 && irpp->o == IR_FPMATH && - irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { - asm_fppow(as, ir, irpp->op1, irp->op2); - return 1; - } - } - return 0; -} -#endif - /* -- PHI and loop handling ----------------------------------------------- */ /* Break a PHI cycle by renaming to a free register (evict if needed). */ @@ -1601,6 +1712,64 @@ static void asm_loop(ASMState *as) #error "Missing assembler for target CPU" #endif +/* -- Common instruction helpers ------------------------------------------ */ + +#if !LJ_SOFTFP32 +#if !LJ_TARGET_X86ORX64 +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) +#endif + +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + asm_callid(as, ir, IRCALL_pow); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fpdiv(as, ir); +} +#endif + +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +static void asm_fuseequal(ASMState *as, IRIns *ir) +{ + /* Fuse HREF + EQ/NE. */ + if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { + as->curins--; + asm_href(as, ir-1, (IROp)ir->o); + } else { + asm_equal(as, ir); + } +} + +static void asm_alen(ASMState *as, IRIns *ir) +{ + asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : + IRCALL_lj_tab_len_hint); +} + /* -- Instruction dispatch ------------------------------------------------ */ /* Assemble a single instruction. */ @@ -1609,7 +1778,10 @@ static void asm_ir(ASMState *as, IRIns *ir) switch ((IROp)ir->o) { /* Miscellaneous ops. */ case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; + case IR_NOP: case IR_XBAR: + lj_assertA(!ra_used(ir), + "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS); + break; case IR_USE: ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; case IR_PHI: asm_phi(as, ir); break; @@ -1623,14 +1795,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ABC: asm_comp(as, ir); break; - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - } else { - asm_equal(as, ir); - } - break; + case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break; case IR_RETF: asm_retf(as, ir); break; @@ -1652,16 +1817,17 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_MUL: asm_mul(as, ir); break; case IR_MOD: asm_mod(as, ir); break; case IR_NEG: asm_neg(as, ir); break; -#if LJ_SOFTFP +#if LJ_SOFTFP32 case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: - lua_assert(0); /* Unused for LJ_SOFTFP. */ + case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: + /* Unused for LJ_SOFTFP32. */ + lj_assertA(0, "IR %04d with unused op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); break; #else case IR_DIV: asm_div(as, ir); break; case IR_POW: asm_pow(as, ir); break; case IR_ABS: asm_abs(as, ir); break; - case IR_ATAN2: asm_atan2(as, ir); break; case IR_LDEXP: asm_ldexp(as, ir); break; case IR_FPMATH: asm_fpmath(as, ir); break; case IR_TOBIT: asm_tobit(as, ir); break; @@ -1681,6 +1847,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_NEWREF: asm_newref(as, ir); break; case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; case IR_FREF: asm_fref(as, ir); break; + case IR_TMPREF: asm_tmpref(as, ir); break; case IR_STRREF: asm_strref(as, ir); break; case IR_LREF: asm_lref(as, ir); break; @@ -1691,6 +1858,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_FLOAD: asm_fload(as, ir); break; case IR_XLOAD: asm_xload(as, ir); break; case IR_SLOAD: asm_sload(as, ir); break; + case IR_ALEN: asm_alen(as, ir); break; case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; case IR_FSTORE: asm_fstore(as, ir); break; @@ -1700,7 +1868,14 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; case IR_TNEW: asm_tnew(as, ir); break; case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; + case IR_CNEW: case IR_CNEWI: +#if LJ_HASFFI + asm_cnew(as, ir); +#else + lj_assertA(0, "IR %04d with unused op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); +#endif + break; /* Buffer operations. */ case IR_BUFHDR: asm_bufhdr(as, ir); break; @@ -1759,6 +1934,8 @@ static void asm_head_side(ASMState *as) IRRef1 sloadins[RID_MAX]; RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */ RegSet live = RSET_EMPTY; /* Live parent registers. */ + RegSet pallow = RSET_GPR; /* Registers needed by the parent stack check. */ + Reg pbase; IRIns *irp = &as->parent->ir[REF_BASE]; /* Parent base. */ int32_t spadj, spdelta; int pass2 = 0; @@ -1767,17 +1944,22 @@ static void asm_head_side(ASMState *as) if (as->snapno && as->topslot > as->parent->topslot) { /* Force snap #0 alloc to prevent register overwrite in stack check. */ - as->snapno = 0; - asm_snap_alloc(as); + asm_snap_alloc(as, 0); + } + pbase = asm_head_side_base(as, irp); + if (pbase != RID_NONE) { + rset_clear(allow, pbase); + rset_clear(pallow, pbase); } - allow = asm_head_side_base(as, irp, allow); /* Scan all parent SLOADs and collect register dependencies. */ for (i = as->stopins; i > REF_BASE; i--) { IRIns *ir = IR(i); RegSP rs; - lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || - (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); + lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || + (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL, + "IR %04d has bad parent op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); rs = as->parentmap[i - REF_FIRST]; if (ra_hasreg(ir->r)) { rset_clear(allow, ir->r); @@ -1798,6 +1980,7 @@ static void asm_head_side(ASMState *as) sloadins[rs] = (IRRef1)i; rset_set(live, rs); /* Block live parent register. */ } + if (!ra_hasspill(regsp_spill(rs))) rset_clear(pallow, regsp_reg(rs)); } /* Calculate stack frame adjustment. */ @@ -1914,7 +2097,7 @@ static void asm_head_side(ASMState *as) ExitNo exitno = as->J->exitno; #endif as->T->topslot = (uint8_t)as->topslot; /* Remember for child traces. */ - asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno); + asm_stack_check(as, as->topslot, irp, pallow, exitno); } } @@ -2005,12 +2188,16 @@ static void asm_setup_regsp(ASMState *as) #endif ra_setup(as); +#if LJ_TARGET_ARM64 + ra_setkref(as, RID_GL, (intptr_t)J2G(as->J)); +#endif /* Clear reg/sp for constants. */ for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { ir->prev = REGSP_INIT; if (irt_is64(ir->t) && ir->o != IR_KNULL) { #if LJ_GC64 + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ #else /* Make life easier for backends by putting address of constant in i. */ @@ -2026,6 +2213,7 @@ static void asm_setup_regsp(ASMState *as) as->snaprename = nins; as->snapref = nins; as->snapno = T->nsnap; + as->snapalloc = 0; as->stopins = REF_BASE; as->orignins = nins; @@ -2035,7 +2223,7 @@ static void asm_setup_regsp(ASMState *as) ir = IR(REF_FIRST); if (as->parent) { uint16_t *p; - lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); + lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir); if (lastir - ir > LJ_MAX_JSLOTS) lj_trace_err(as->J, LJ_TRERR_NYICOAL); as->stopins = (IRRef)((lastir-1) - as->ir); @@ -2074,6 +2262,10 @@ static void asm_setup_regsp(ASMState *as) ir->prev = (uint16_t)REGSP_HINT((rload & 15)); rload = lj_ror(rload, 4); continue; + case IR_TMPREF: + if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4) + as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */ + break; #endif case IR_CALLXS: { CCallInfo ci; @@ -2083,7 +2275,17 @@ static void asm_setup_regsp(ASMState *as) as->modset |= RSET_SCRATCH; continue; } - case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { + case IR_CALLL: + /* lj_vm_next needs two TValues on the stack. */ +#if LJ_TARGET_X64 && LJ_ABI_WIN + if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4) + as->evenspill = SPS_FIRST + 4; +#else + if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4) + as->evenspill = 4; +#endif + /* fallthrough */ + case IR_CALLN: case IR_CALLA: case IR_CALLS: { const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; ir->prev = asm_setup_call_slots(as, ir, ci); if (inloop) @@ -2091,7 +2293,6 @@ static void asm_setup_regsp(ASMState *as) (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; continue; } -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) case IR_HIOP: switch ((ir-1)->o) { #if LJ_SOFTFP && LJ_TARGET_ARM @@ -2102,15 +2303,15 @@ static void asm_setup_regsp(ASMState *as) } break; #endif -#if !LJ_SOFTFP && LJ_NEED_FP64 +#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI case IR_CONV: if (irt_isfp((ir-1)->t)) { ir->prev = REGSP_HINT(RID_FPRET); continue; } - /* fallthrough */ #endif - case IR_CALLN: case IR_CALLXS: + /* fallthrough */ + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: #if LJ_SOFTFP case IR_MIN: case IR_MAX: #endif @@ -2121,12 +2322,11 @@ static void asm_setup_regsp(ASMState *as) break; } break; -#endif #if LJ_SOFTFP case IR_MIN: case IR_MAX: if ((ir+1)->o != IR_HIOP) break; - /* fallthrough */ #endif + /* fallthrough */ /* C calls evict all scratch regs and return results in RID_RET. */ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: if (REGARG_NUMGPR < 3 && as->evenspill < 3) @@ -2137,9 +2337,12 @@ static void asm_setup_regsp(ASMState *as) if (ir->op2 != REF_NIL && as->evenspill < 4) as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ } + /* fallthrough */ #else + /* fallthrough */ case IR_CNEW: #endif + /* fallthrough */ case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: case IR_BUFSTR: ir->prev = REGSP_HINT(RID_RET); @@ -2151,35 +2354,45 @@ static void asm_setup_regsp(ASMState *as) as->modset = RSET_SCRATCH; break; #if !LJ_SOFTFP - case IR_ATAN2: -#if LJ_TARGET_X86 - if (as->evenspill < 4) /* Leave room to call atan2(). */ - as->evenspill = 4; -#endif #if !LJ_TARGET_X86ORX64 case IR_LDEXP: #endif #endif + /* fallthrough */ case IR_POW: if (!LJ_SOFTFP && irt_isnum(ir->t)) { if (inloop) as->modset |= RSET_SCRATCH; #if LJ_TARGET_X86 + if (irt_isnum(IR(ir->op2)->t)) { + if (as->evenspill < 4) /* Leave room to call pow(). */ + as->evenspill = 4; + } break; #else ir->prev = REGSP_HINT(RID_FPRET); continue; #endif } - /* fallthrough for integer POW */ + /* fallthrough */ /* for integer POW */ case IR_DIV: case IR_MOD: - if (!irt_isnum(ir->t)) { + if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) { + ir->prev = REGSP_HINT(RID_RET); + if (inloop) + as->modset |= (RSET_SCRATCH & RSET_GPR); + continue; + } + break; +#if LJ_64 && LJ_SOFTFP + case IR_ADD: case IR_SUB: case IR_MUL: + if (irt_isnum(ir->t)) { ir->prev = REGSP_HINT(RID_RET); if (inloop) as->modset |= (RSET_SCRATCH & RSET_GPR); continue; } break; +#endif case IR_FPMATH: #if LJ_TARGET_X86ORX64 if (ir->op2 <= IRFPM_TRUNC) { @@ -2190,9 +2403,6 @@ static void asm_setup_regsp(ASMState *as) continue; } break; - } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { - if (as->evenspill < 4) /* Leave room to call pow(). */ - as->evenspill = 4; } #endif if (inloop) @@ -2208,6 +2418,7 @@ static void asm_setup_regsp(ASMState *as) case IR_BSHL: case IR_BSHR: case IR_BSAR: if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ break; + /* fallthrough */ case IR_BROL: case IR_BROR: if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { IR(ir->op2)->r = REGSP_HINT(RID_ECX); @@ -2252,7 +2463,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T) { ASMState as_; ASMState *as = &as_; - MCode *origtop; /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ { @@ -2267,7 +2477,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Ensure an initialized instruction beyond the last one for HIOP checks. */ /* This also allows one RENAME to be added without reallocating curfinal. */ as->orignins = lj_ir_nextins(J); - J->cur.ir[as->orignins].o = IR_NOP; + lj_ir_nop(&J->cur.ir[as->orignins]); /* Setup initial state. Copy some fields to reduce indirections. */ as->J = J; @@ -2278,9 +2488,12 @@ void lj_asm_trace(jit_State *J, GCtrace *T) as->realign = NULL; as->loopinv = 0; as->parent = J->parent ? traceref(J, J->parent) : NULL; +#ifdef LUAJIT_RANDOM_RA + (void)lj_prng_u64(&J2G(J)->prng); /* Ensure PRNG step between traces. */ +#endif /* Reserve MCode memory. */ - as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); + as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); as->mcp = as->mctop; as->mclim = as->mcbot + MCLIM_REDZONE; asm_setup_target(as); @@ -2319,6 +2532,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T) #endif as->ir = J->curfinal->ir; /* Use the copied IR. */ as->curins = J->cur.nins = as->orignins; +#ifdef LUAJIT_RANDOM_RA + as->prngstate = J2G(J)->prng; /* Must (re)start from identical state. */ + as->prngbits = 0; +#endif RA_DBG_START(); RA_DBGX((as, "===== STOP =====")); @@ -2338,7 +2555,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Assemble a trace in linear backwards order. */ for (as->curins--; as->curins > as->stopins; as->curins--) { IRIns *ir = IR(as->curins); - lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ + /* 64 bit types handled by SPLIT for 32 bit archs. */ + lj_assertA(!(LJ_32 && irt_isint64(ir->t)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); + asm_snap_prev(as); if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) continue; /* Dead-code elimination can be soooo easy. */ if (irt_isguard(ir->t)) @@ -2368,10 +2589,13 @@ void lj_asm_trace(jit_State *J, GCtrace *T) asm_phi_fixup(as); if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ - lua_assert(J->curfinal->nk == T->nk); + lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth"); memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ T->nins = J->curfinal->nins; + /* Fill mcofs of any unprocessed snapshots. */ + as->curins = REF_FIRST; + asm_snap_prev(as); break; /* Done. */ } @@ -2390,13 +2614,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Set trace entry point before fixing up tail to allow link to self. */ T->mcode = as->mcp; T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; - if (!as->loopref) + if (as->loopref) + asm_loop_tail_fixup(as); + else asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); + asm_snap_fixup_mcofs(as); #if LJ_TARGET_MCODE_FIXUP asm_mcode_fixup(T->mcode, T->szmcode); #endif - lj_mcode_sync(T->mcode, origtop); + lj_mcode_sync(T->mcode, as->mctoporig); } #undef IR diff --git a/source/libs/luajit/LuaJIT-src/src/lj_asm.h b/source/libs/luajit/LuaJIT-src/src/lj_asm.h index 2819481b6de0a66255348e9577dcb54f41e82c5d..b793b10195059983738f5f26d3395c9e9c571ddc 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_asm.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_asm.h @@ -1,6 +1,6 @@ /* ** IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ASM_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_asm_arm.h b/source/libs/luajit/LuaJIT-src/src/lj_asm_arm.h index 37bfa40f2faf58f74310de70bf154fc253044ab8..de435057e197e9e9a46148cea8182e57bd8afdef 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_asm_arm.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_asm_arm.h @@ -1,6 +1,6 @@ /* ** ARM IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow) } } } - lua_assert(rset_test(RSET_GPREVEN, r)); + lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); ra_modified(as, r); ra_modified(as, r+1); RA_DBGX((as, "scratchpair $r $r", r, r+1)); @@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ return ra_allock(as, (ofs & ~255), allow); } + } else if (ir->o == IR_TMPREF) { + *ofsp = 0; + return RID_SP; } } *ofsp = 0; @@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, return; } } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -310,7 +313,11 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, } #if !LJ_SOFTFP -/* Fuse to multiply-add/sub instruction. */ +/* +** Fuse to multiply-add/sub instruction. +** VMLA rounds twice (UMA, not FMA) -- no need to check for JIT_F_OPT_FMA. +** VFMA needs VFPv4, which is uncommon on the remaining ARM32 targets. +*/ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) { IRRef lref = ir->op1, rref = ir->op2; @@ -389,9 +396,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ if (irt_isnum(ir->t)) { - lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ + lj_assertA(rset_test(as->freeset, gpr+1), + "reg %d not free", gpr+1); /* Ditto. */ emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); gpr += 2; } else { @@ -408,7 +417,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ if (ref) ra_leftov(as, gpr, ref); gpr++; } else { @@ -433,7 +443,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (!LJ_SOFTFP && irt_isfp(ir->t)) { if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); @@ -495,6 +505,30 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_lso(as, ARMI_LDR, RID_TMP, base, -4); } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L); + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + if ((as->flags & JIT_F_ARMV6T2)) { + emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp); + } else { + emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp); + emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp); + } + emit_lso(as, ARMI_LDR, RID_TMP, + ra_allock(as, (addr & ~4095), + rset_exclude(rset_exclude(RSET_GPR, sb), tmp)), + (addr & 4095)); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ #if !LJ_SOFTFP @@ -530,13 +564,17 @@ static void asm_conv(ASMState *as, IRIns *ir) #endif IRRef lref = ir->op1; /* 64 bit integer conversions are handled by SPLIT. */ - lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); + lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); #if LJ_SOFTFP /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ #else - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -553,7 +591,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg left = ra_alloc1(as, lref, RSET_FPR); @@ -572,7 +611,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, lref, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((as->flags & JIT_F_ARMV6)) { ARMIns ai = st == IRT_I8 ? ARMI_SXTB : st == IRT_U8 ? ARMI_UXTB : @@ -658,35 +697,55 @@ static void asm_strto(ASMState *as, IRIns *ir) /* -- Memory references --------------------------------------------------- */ /* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - } else { + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if ((mode & IRTMPREF_OUT1)) { #if LJ_SOFTFP - lua_assert(0); + lj_assertA(irref_isk(ref), "unsplit FP op"); + emit_dm(as, ARMI_MOV, dest, RID_SP); + emit_lso(as, ARMI_STR, + ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), + RID_SP, 0); + emit_lso(as, ARMI_STR, + ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), + RID_SP, 4); #else - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_dm(as, ARMI_MOV, dest, RID_SP); + emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0); #endif + } else if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i32ptr(ir_knum(ir)), dest); + } else { +#if LJ_SOFTFP + lj_assertA(0, "unsplit FP op"); +#else + /* Otherwise force a spill and use the spill slot. */ + emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); +#endif + } + } else { + /* Otherwise use [sp] and [sp+4] to hold the TValue. + ** This assumes the following call has max. 4 args. + */ + Reg type; + emit_dm(as, ARMI_MOV, dest, RID_SP); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_GPR); + emit_lso(as, ARMI_STR, src, RID_SP, 0); + } + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) + type = ra_alloc1(as, ref+1, RSET_GPR); + else + type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); + emit_lso(as, ARMI_STR, type, RID_SP, 4); } } else { - /* Otherwise use [sp] and [sp+4] to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; emit_dm(as, ARMI_MOV, dest, RID_SP); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_lso(as, ARMI_STR, src, RID_SP, 0); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_lso(as, ARMI_STR, type, RID_SP, 4); } } @@ -811,16 +870,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); } else { emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); - if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ + if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */ emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); + emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid)); emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); } else if (irref_isk(refkey)) { emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, @@ -867,7 +926,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 4095) { idx = dest; rset_clear(allow, dest); @@ -910,31 +969,39 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, ARMI_LDR, dest, v); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); + if (guarded) { + asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); - emit_opk(as, ARMI_ADD, dest, uv, + } + if (ir->o == IR_UREFC) + emit_opk(as, ARMI_ADD, dest, dest, (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + else + emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_lso(as, ARMI_LDRB, RID_TMP, dest, + (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loadi(as, dest, k); } else { - emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); } - emit_lso(as, ARMI_LDR, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); } } static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -971,39 +1038,43 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static ARMIns asm_fxloadins(IRIns *ir) +static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return ARMI_LDRSB; case IRT_U8: return ARMI_LDRB; case IRT_I16: return ARMI_LDRSH; case IRT_U16: return ARMI_LDRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; + case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ default: return ARMI_LDR; } } -static ARMIns asm_fxstoreins(IRIns *ir) +static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return ARMI_STRB; case IRT_I16: case IRT_U16: return ARMI_STRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; + case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ default: return ARMI_STR; } } static void asm_fload(ASMState *as, IRIns *ir) { - if (ir->op1 == REF_NIL) { - lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + ARMIns ai = asm_fxloadins(as, ir); + Reg idx; + int32_t ofs; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); + ofs = 0; } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); - int32_t ofs; + idx = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->op2 == IRFL_TAB_ARRAY) { ofs = asm_fuseabase(as, ir->op1); if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ @@ -1012,11 +1083,11 @@ static void asm_fload(ASMState *as, IRIns *ir) } } ofs = field_ofs[ir->op2]; - if ((ai & 0x04000000)) - emit_lso(as, ai, dest, idx, ofs); - else - emit_lsox(as, ai, dest, idx, ofs); } + if ((ai & 0x04000000)) + emit_lso(as, ai, dest, idx, ofs); + else + emit_lsox(as, ai, dest, idx, ofs); } static void asm_fstore(ASMState *as, IRIns *ir) @@ -1026,7 +1097,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - ARMIns ai = asm_fxstoreins(ir); + ARMIns ai = asm_fxstoreins(as, ir); if ((ai & 0x04000000)) emit_lso(as, ai, src, idx, ofs); else @@ -1038,8 +1109,8 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) @@ -1047,7 +1118,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) if (ir->r != RID_SINK) { Reg src = ra_alloc1(as, ir->op2, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } @@ -1066,13 +1137,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) rset_clear(allow, type); } if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); rset_clear(allow, dest); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow, (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; if (!hiop || type == RID_NONE) { rset_clear(allow, idx); if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && @@ -1133,10 +1206,13 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType t = hiop ? IRT_NUM : irt_type(ir->t); Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); #if LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ if (hiop && ra_used(ir+1)) { type = ra_dest(as, ir+1, allow); rset_clear(allow, type); @@ -1152,8 +1228,9 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg tmp = RID_NONE; if ((ir->op2 & IRSLOAD_CONVERT)) tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); @@ -1185,7 +1262,12 @@ dotypecheck: } } asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); + if ((ir->op2 & IRSLOAD_KEYINDEX)) { + emit_n(as, ARMI_CMN|ARMI_K12|1, type); + emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type); + } else { + emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); + } } if (ra_hasreg(dest)) { #if !LJ_SOFTFP @@ -1218,7 +1300,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) IRRef args[4]; RegSet allow = (RSET_GPR & ~RSET_SCRATCH); RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; if (ra_hasreg(ir->r)) @@ -1230,10 +1313,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); if (sz == 8) { ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); + lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); } for (;;) { Reg r = ra_alloc1(as, ir->op2, allow); @@ -1268,8 +1351,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1301,7 +1382,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1364,8 +1445,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id) static void asm_fpmath(ASMState *as, IRIns *ir) { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; if (ir->op2 <= IRFPM_TRUNC) asm_callround(as, ir, ir->op2); else if (ir->op2 == IRFPM_SQRT) @@ -1412,14 +1491,29 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) emit_dn(as, ai^m, dest, left); } -static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) +/* Try to drop cmp r, #0. */ +static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai) { - if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ + if (as->flagmcp == as->mcp) { + uint32_t cc = (as->mcp[1] >> 28); as->flagmcp = NULL; - as->mcp++; - ai |= ARMI_S; + if (cc <= CC_NE) { + as->mcp++; + ai |= ARMI_S; + } else if (cc == CC_GE) { + *++as->mcp ^= ((CC_GE^CC_PL) << 28); + ai |= ARMI_S; + } else if (cc == CC_LT) { + *++as->mcp ^= ((CC_LT^CC_MI) << 28); + ai |= ARMI_S; + } /* else: other conds don't work in general. */ } - asm_intop(as, ir, ai); + return ai; +} + +static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) +{ + asm_intop(as, ir, asm_drop_cmp0(as, ai)); } static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) @@ -1492,15 +1586,10 @@ static void asm_mul(ASMState *as, IRIns *ir) #define asm_mulov(as, ir) asm_mul(as, ir) #if !LJ_SOFTFP -#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) #endif -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) - static void asm_neg(ASMState *as, IRIns *ir) { #if !LJ_SOFTFP @@ -1514,20 +1603,7 @@ static void asm_neg(ASMState *as, IRIns *ir) static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) { - if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ - uint32_t cc = (as->mcp[1] >> 28); - as->flagmcp = NULL; - if (cc <= CC_NE) { - as->mcp++; - ai |= ARMI_S; - } else if (cc == CC_GE) { - *++as->mcp ^= ((CC_GE^CC_PL) << 28); - ai |= ARMI_S; - } else if (cc == CC_LT) { - *++as->mcp ^= ((CC_LT^CC_MI) << 28); - ai |= ARMI_S; - } /* else: other conds don't work with bit ops. */ - } + ai = asm_drop_cmp0(as, ai); if (ir->op2 == 0) { Reg dest = ra_dest(as, ir, RSET_GPR); uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); @@ -1582,7 +1658,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) -#define asm_brol(as, ir) lua_assert(0) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) { @@ -1657,8 +1733,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) asm_intmin_max(as, ir, cc); } -#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) +#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) /* -- Comparisons --------------------------------------------------------- */ @@ -1733,7 +1809,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir) Reg left; uint32_t m; int cmpprev0 = 0; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "bad comparison data type %d", irt_type(ir->t)); if (asm_swapops(as, lref, rref)) { Reg tmp = lref; lref = rref; rref = tmp; if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ @@ -1825,15 +1902,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI || LJ_SOFTFP /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_HASFFI || LJ_SOFTFP if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ #if LJ_SOFTFP @@ -1850,7 +1927,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { as->curins--; /* Always skip the loword min/max. */ if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); + asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); return; #elif LJ_HASFFI } else if ((ir-1)->o == IR_CONV) { @@ -1864,6 +1941,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) asm_xstore_(as, ir, 4); return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { #if LJ_HASFFI @@ -1882,6 +1960,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) asm_intneg(as, ir, ARMI_RSC); asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); break; + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; #endif #if LJ_SOFTFP case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: @@ -1889,24 +1970,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) if (!uselo) ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ break; + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: + /* Nothing to do here. Handled by lo op itself. */ + break; #endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: if (!uselo) ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); -#endif } /* -- Profiling ----------------------------------------------------------- */ @@ -1925,24 +1998,27 @@ static void asm_prof(ASMState *as, IRIns *ir) static void asm_stack_check(ASMState *as, BCReg topslot, IRIns *irp, RegSet allow, ExitNo exitno) { + int savereg = 0; Reg pbase; uint32_t k; if (irp) { if (!ra_hasspill(irp->s)) { pbase = irp->r; - lua_assert(ra_hasreg(pbase)); + lj_assertA(ra_hasreg(pbase), "base reg lost"); } else if (allow) { pbase = rset_pickbot(allow); } else { pbase = RID_RET; - emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ + savereg = 1; } } else { pbase = RID_BASE; } emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); + if (savereg) + emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ k = emit_isk12(0, (int32_t)(8*topslot)); - lua_assert(k); + lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, ARMI_CMP^k, RID_TMP); emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, @@ -1952,7 +2028,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (ra_hasspill(irp->s)) emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); - if (ra_hasspill(irp->s) && !allow) + if (savereg) emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ emit_loadi(as, RID_TMP, (i & ~4095)); } else { @@ -1966,11 +2042,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) SnapEntry *map = &as->T->snapmap[snap->mapofs]; SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; MSize n, nent = snap->nent; + int32_t bias = 0; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1) - bias; IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) @@ -1979,7 +2056,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) #if LJ_SOFTFP RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); Reg tmp; - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, rset_exclude(RSET_GPREVEN, RID_BASE)); emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); @@ -1988,12 +2066,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); #else Reg src = ra_alloc1(as, ref, RSET_FPR); + if (LJ_UNLIKELY(ofs < -1020 || ofs > 1020)) { + int32_t adj = ofs & 0xffffff00; /* K12-friendly. */ + bias += adj; + ofs -= adj; + emit_addptr(as, RID_BASE, -adj); + } emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); #endif } else { RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); emit_lso(as, ARMI_STR, src, RID_BASE, ofs); @@ -2006,6 +2091,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else if ((sn & SNAP_SOFTFPNUM)) { type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); #endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); } @@ -2013,11 +2100,15 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + emit_addptr(as, RID_BASE, bias); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ +/* Marker to prevent patching the GC check exit. */ +#define ARM_NOPATCH_GC_CHECK (ARMI_BIC|ARMI_K12) + /* Check GC threshold and do one or more GC steps. */ static void asm_gc_check(ASMState *as) { @@ -2029,6 +2120,7 @@ static void asm_gc_check(ASMState *as) l_end = emit_label(as); /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ + *--as->mcp = ARM_NOPATCH_GC_CHECK; emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ @@ -2063,6 +2155,12 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + /* -- Head of trace ------------------------------------------------------- */ /* Reload L register from g->cur_L. */ @@ -2088,7 +2186,7 @@ static void asm_head_root_base(ASMState *as) } /* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +static Reg asm_head_side_base(ASMState *as, IRIns *irp) { IRIns *ir; asm_head_lreg(as); @@ -2096,16 +2194,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) ra_spill(as, ir); if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); + return ra_dest(as, ir, RSET_GPR); } else { Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); + lj_assertA(ra_hasreg(r), "base reg lost"); if (r != ir->r && !rset_test(as->freeset, r)) ra_restore(as, regcost_ref(as->cost[r])); ra_destreg(as, ir, r); + return r; } - return allow; } /* -- Tail of trace ------------------------------------------------------- */ @@ -2121,7 +2218,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) } else { /* Patch stack adjustment. */ uint32_t k = emit_isk12(ARMI_ADD, spadj); - lua_assert(k); + lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); } /* Patch exit branch. */ @@ -2174,7 +2271,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; - return REGSP_HINT(RID_RET); + return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); } static void asm_setup_target(ASMState *as) @@ -2197,13 +2294,14 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) /* Look for bl_cc exitstub, replace with b_cc target. */ uint32_t ins = *p; if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u && - ((ins ^ (px-p)) & 0x00ffffffu) == 0) { + ((ins ^ (px-p)) & 0x00ffffffu) == 0 && + p[-1] != ARM_NOPATCH_GC_CHECK) { *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu); cend = p+1; if (!cstart) cstart = p; } } - lua_assert(cstart != NULL); + lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); lj_mcode_sync(cstart, cend); lj_mcode_patch(J, mcarea, 1); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_asm_arm64.h b/source/libs/luajit/LuaJIT-src/src/lj_asm_arm64.h index 8fd92e76fdd8a8f1bd0f99f0c77fb248b21be760..4feaa3b0c2aeb5589df198d4522632dddb5259d7 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_asm_arm64.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_asm_arm64.h @@ -1,6 +1,6 @@ /* ** ARM64 IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. ** Sponsored by Cisco Systems, Inc. @@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) asm_mclimit(as); /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu)); - *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno)); + *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); + *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); mxp--; - *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu)); - *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP)); + *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); + *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); as->mctop = mxp; } @@ -77,25 +77,30 @@ static void asm_guardcc(ASMState *as, A64CC cc) MCode *p = as->mcp; if (LJ_UNLIKELY(p == as->invmcp)) { as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); + *p = A64I_B | A64F_S26(target-p); emit_cond_branch(as, cc^1, p-1); return; } emit_cond_branch(as, cc, target); } -/* Emit test and branch instruction to exit for guard. */ -static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) +/* Emit test and branch instruction to exit for guard, if in range. */ +static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) { MCode *target = asm_exitstub_addr(as, as->snapno); MCode *p = as->mcp; + ptrdiff_t delta = target - p; if (LJ_UNLIKELY(p == as->invmcp)) { + if (as->orignins > 1023) return 0; /* Delta might end up too large. */ as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_tnb(as, ai^0x01000000u, r, bit, p-1); - return; + *p = A64I_B | A64F_S26(delta); + ai ^= 0x01000000u; + target = p-1; + } else if (LJ_UNLIKELY(delta >= 0x1fff)) { + return 0; } emit_tnb(as, ai, r, bit, target); + return 1; } /* Emit compare and branch instruction to exit for guard. */ @@ -105,7 +110,7 @@ static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) MCode *p = as->mcp; if (LJ_UNLIKELY(p == as->invmcp)) { as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); + *p = A64I_B | A64F_S26(target-p); emit_cnb(as, ai^0x01000000u, r, p-1); return; } @@ -198,6 +203,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, return RID_GL; } } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv); + return RID_GL; } } *ofsp = 0; @@ -208,16 +216,14 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); + int logical = (ai & 0x1f000000) == 0x0a000000; if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); return A64F_M(ir->r); } else if (irref_isk(ref)) { - uint32_t m; - int64_t k = get_k64val(ir); - if ((ai & 0x1f000000) == 0x0a000000) - m = emit_isk13(k, irt_is64(ir->t)); - else - m = emit_isk12(k); + int64_t k = get_k64val(as, ref); + uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) : + emit_isk12(irt_is64(ir->t) ? k : (int32_t)k); if (m) return m; } else if (mayfuse(as, ref)) { @@ -229,7 +235,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); IRIns *irl = IR(ir->op1); if (sh == A64SH_LSL && - irl->o == IR_CONV && + irl->o == IR_CONV && !logical && irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && shift <= 4 && canfuse(as, irl)) { @@ -239,7 +245,11 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) Reg m = ra_alloc1(as, ir->op1, allow); return A64F_M(m) | A64F_SH(sh, shift); } - } else if (ir->o == IR_CONV && + } else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) { + Reg m = ra_alloc1(as, ir->op1, allow); + int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); + return A64F_M(m) | A64F_SH(A64SH_ROR, shift); + } else if (ir->o == IR_CONV && !logical && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { Reg m = ra_alloc1(as, ir->op1, allow); return A64F_M(m) | A64F_EX(A64EX_SXTW); @@ -295,8 +305,10 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, } else if (asm_isk32(as, ir->op1, &ofs)) { ref = ir->op2; } else { - Reg rn = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); + Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2; + Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1; + Reg rn = ra_alloc1(as, refv, allow); + IRIns *irr = IR(refk); uint32_t m; if (irr+1 == ir && !ra_used(irr) && irr->o == IR_ADD && irref_isk(irr->op2)) { @@ -307,7 +319,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, goto skipopm; } } - m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); + m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn)); ofs = sizeof(GCstr); skipopm: emit_lso(as, ai, rd, rd, ofs); @@ -332,7 +344,8 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) { IRRef lref = ir->op1, rref = ir->op2; IRIns *irm; - if (lref != rref && + if ((as->flags & JIT_F_OPT_FMA) && + lref != rref && ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ra_noreg(irm->r)) || (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && @@ -352,9 +365,9 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) static int asm_fuseandshift(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); - lua_assert(ir->o == IR_BAND); + lj_assertA(ir->o == IR_BAND, "bad usage"); if (canfuse(as, irl) && irref_isk(ir->op2)) { - uint64_t mask = get_k64val(IR(ir->op2)); + uint64_t mask = get_k64val(as, ir->op2); if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { int32_t shmask = irt_is64(irl->t) ? 63 : 31; int32_t shift = (IR(irl->op2)->i & shmask); @@ -382,7 +395,7 @@ static int asm_fuseandshift(ASMState *as, IRIns *ir) static int asm_fuseorshift(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - lua_assert(ir->o == IR_BOR); + lj_assertA(ir->o == IR_BOR, "bad usage"); if (canfuse(as, irl) && canfuse(as, irr) && ((irl->o == IR_BSHR && irr->o == IR_BSHL) || (irl->o == IR_BSHL && irr->o == IR_BSHR))) { @@ -413,38 +426,73 @@ static int asm_fuseorshift(ASMState *as, IRIns *ir) static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; + int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7; Reg gpr, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); + if (ci->func) + emit_call(as, ci->func); for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) as->cost[gpr] = REGCOST(~0u, ASMREF_L); gpr = REGARG_FIRSTGPR; +#if LJ_HASFFI && LJ_ABI_WIN + if ((ci->flags & CCI_VARARG)) { + fpr = REGARG_LASTFPR+1; + } +#endif for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; IRIns *ir = IR(ref); if (ref) { if (irt_isfp(ir->t)) { if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Must have been evicted. */ ra_leftov(as, fpr, ref); fpr++; +#if LJ_HASFFI && LJ_ABI_WIN + } else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) { + Reg rf = ra_alloc1(as, ref, RSET_FPR); + emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31); +#endif } else { Reg r = ra_alloc1(as, ref, RSET_FPR); - emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); - ofs += 8; + int32_t al = spalign; +#if LJ_HASFFI && LJ_TARGET_OSX + al |= irt_isnum(ir->t) ? 7 : 3; +#endif + spofs = (spofs + al) & ~al; + if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4; + emit_spstore(as, ir, r, spofs); + spofs += al + 1; } } else { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ ra_leftov(as, gpr, ref); gpr++; } else { Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; + int32_t al = spalign; +#if LJ_HASFFI && LJ_TARGET_OSX + al |= irt_size(ir->t) - 1; +#endif + spofs = (spofs + al) & ~al; + if (al >= 3) { + if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4; + emit_spstore(as, ir, r, spofs); + } else { + lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1); + emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs); + } + spofs += al + 1; } } +#if LJ_HASFFI && LJ_TARGET_OSX + } else { /* Marker for start of varargs. */ + gpr = REGARG_LASTGPR+1; + fpr = REGARG_LASTFPR+1; + spalign = 7; +#endif } } } @@ -453,11 +501,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ + if (hiop && ra_hasreg((ir+1)->r)) + rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (irt_isfp(ir->t)) { if (ci->flags & CCI_CASTU64) { Reg dest = ra_dest(as, ir, RSET_FPR) & 31; @@ -466,6 +517,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } else { ra_destreg(as, ir, RID_FPRET); } + } else if (hiop) { + ra_destpair(as, ir); } else { ra_destreg(as, ir, RID_RET); } @@ -488,7 +541,7 @@ static void asm_callx(ASMState *as, IRIns *ir) ci.func = (ASMFunction)(ir_k64(irf)->u64); } else { /* Need a non-argument register for indirect calls. */ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - emit_n(as, A64I_BLR, freg); + emit_n(as, A64I_BLR_AUTH, freg); ci.func = (ASMFunction)(void *)0; } asm_gencall(as, &ci, args); @@ -505,8 +558,6 @@ static void asm_retf(ASMState *as, IRIns *ir) as->topslot -= (BCReg)delta; if ((int32_t)as->topslot < 0) as->topslot = 0; irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); @@ -515,6 +566,21 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_lso(as, A64I_LDRx, RID_TMP, base, -8); } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) @@ -544,7 +610,7 @@ static void asm_conv(ASMState *as, IRIns *ir) int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); int stfp = (st == IRT_NUM || st == IRT_FLOAT); IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -564,7 +630,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg left = ra_alloc1(as, lref, RSET_FPR); @@ -584,7 +651,7 @@ static void asm_conv(ASMState *as, IRIns *ir) A64Ins ai = st == IRT_I8 ? A64I_SXTBw : st == IRT_U8 ? A64I_UXTBw : st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); emit_dn(as, ai, dest, left); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -597,7 +664,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_dn(as, A64I_SXTW, dest, left); } } else { - if (st64) { + if (st64 && !(ir->op2 & IRCONV_NONE)) { /* This is either a 32 bit reg/reg mov which zeroes the hiword ** or a load of the loword from a 64 bit address. */ @@ -614,25 +681,22 @@ static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; - Reg dest = 0, tmp; - int destused = ra_used(ir); + Reg tmp; int32_t ofs = 0; ra_evictset(as, RSET_SCRATCH); - if (destused) { + if (ra_used(ir)) { if (ra_hasspill(ir->s)) { ofs = sps_scale(ir->s); - destused = 0; if (ra_hasreg(ir->r)) { ra_free(as, ir->r); ra_modified(as, ir->r); emit_spload(as, ir, ir->r, ofs); } } else { - dest = ra_dest(as, ir, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_FPR); + emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); } } - if (destused) - emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); asm_guardcnb(as, A64I_CBZ, RID_RET); args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ @@ -648,7 +712,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) { RegSet allow = rset_exclude(RSET_GPR, base); IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); if (irref_isk(ref)) { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -669,22 +734,23 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) } /* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i64ptr(ir_knum(ir)), dest); + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i64ptr(ir_knum(ir)), dest); + return; + } + emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0); } else { - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); + asm_tvstore64(as, dest, 0, ref); } - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - asm_tvstore64(as, dest, 0, ref); - ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); } + /* g->tmptv holds the TValue(s). */ + emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL); } static void asm_aref(ASMState *as, IRIns *ir) @@ -721,105 +787,77 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) int destused = ra_used(ir); Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, tmp = RID_TMP; + Reg tmp = RID_TMP, type = RID_NONE, key = RID_NONE, tkey; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); - int isk = irref_isk(ir->op2); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t k = 0; uint32_t khash; - MCLabel l_end, l_loop, l_next; + MCLabel l_end, l_loop; rset_clear(allow, tab); - if (!isk) { - key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - rset_clear(allow, key); - if (!irt_isstr(kt)) { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - } else if (irt_isnum(kt)) { - int64_t val = (int64_t)ir_knum(irkey)->u64; - if (!(k = emit_isk12(val))) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - } else if (!irt_ispri(kt)) { - if (!(k = emit_isk12(irkey->i))) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); + /* Allocate register for tkey outside of the loop. */ + if (isk) { + int64_t kk; + if (irt_isaddr(kt)) { + kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else if (irt_isnum(kt)) { + kk = (int64_t)ir_knum(irkey)->u64; + /* Assumes -0.0 is already canonicalized to +0.0. */ + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + kk = ~((int64_t)~irt_toitype(kt) << 47); } + k = emit_isk12(kk); + tkey = k ? 0 : ra_allock(as, kk, allow); + } else { + tkey = ra_scratch(as, allow); } /* Key not found in chain: jump to exit (if merged) or load niltv. */ l_end = emit_label(as); as->invmcp = NULL; - if (merge == IR_NE) + if (merge == IR_NE) { asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); + } else if (destused) { + uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val)); + lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)"); + emit_dn(as, A64I_ADDx^k12, dest, RID_GL); + } /* Follow hash chain until the end. */ l_loop = --as->mcp; - emit_n(as, A64I_CMPx^A64I_K12^0, dest); - emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); - l_next = emit_label(as); + if (destused) + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); /* Type and value comparison. */ if (merge == IR_EQ) asm_guardcc(as, CC_EQ); else emit_cond_branch(as, CC_EQ, l_end); + emit_nm(as, A64I_CMPx^k, tmp, tkey); + if (!destused) + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key)); + *l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest; - if (irt_isnum(kt)) { - if (isk) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - if (k) - emit_n(as, A64I_CMPx^k, tmp); - else - emit_nm(as, A64I_CMPx, key, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); - Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); - rset_clear(allow, tisnum); - emit_nm(as, A64I_FCMPd, key, ftmp); - emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); - emit_cond_branch(as, CC_LO, l_next); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); - } - } else if (irt_isaddr(kt)) { - Reg scr; - if (isk) { - int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - scr = ra_allock(as, kk, allow); - emit_nm(as, A64I_CMPx, scr, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + /* Construct tkey as canonicalized or tagged key. */ + if (!isk) { + if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey); + /* A64I_FMOV_R_D from key to tkey done below. */ } else { - scr = ra_scratch(as, allow); - emit_nm(as, A64I_CMPx, tmp, scr); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); + lj_assertA(irt_isaddr(kt), "bad HREF key type"); + key = ra_alloc1(as, refkey, allow); + type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key)); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type); } - rset_clear(allow, scr); - } else { - Reg type, scr; - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - scr = ra_scratch(as, rset_clear(allow, type)); - rset_clear(allow, scr); - emit_nm(as, A64I_CMPw, scr, type); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); } - *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; - if (!isk && irt_isaddr(kt)) { - Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); - rset_clear(allow, type); - } /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); } else { @@ -831,32 +869,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dnm(as, A64I_ANDw, dest, dest, tmphash); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); } else if (irt_isstr(kt)) { - /* Fetch of str->hash is cheaper than ra_allock. */ emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); + emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); } else { /* Must match with hash*() in lj_tab.c. */ emit_dnm(as, A64I_ANDw, dest, dest, tmp); emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); emit_dnm(as, A64I_SUBw, dest, dest, tmp); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); - emit_dnm(as, A64I_EORw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); + emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest); emit_dnm(as, A64I_SUBw, tmp, tmp, dest); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); - emit_dnm(as, A64I_EORw, tmp, tmp, dest); if (irt_isnum(kt)) { + emit_dnm(as, A64I_EORw, tmp, tkey, dest); emit_dnm(as, A64I_ADDw, dest, dest, dest); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVw, tmp, dest); - emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey); + emit_nm(as, A64I_FCMPZd, (key & 31), 0); + emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31)); } else { - checkmclim(as); - emit_dm(as, A64I_MOVw, tmp, key); - emit_dnm(as, A64I_EORw, dest, dest, - ra_allock(as, irt_toitype(kt) << 15, allow)); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVx, dest, key); + emit_dnm(as, A64I_EORw, tmp, key, dest); + emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key); } } } @@ -868,15 +900,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir) IRIns *irkey = IR(kslot->op1); int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); int32_t kofs = ofs + (int32_t)offsetof(Node, key); - int bigofs = !emit_checkofs(A64I_LDRx, ofs); - RegSet allow = RSET_GPR; + int bigofs = !emit_checkofs(A64I_LDRx, kofs); Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, allow); - Reg key = ra_scratch(as, rset_clear(allow, node)); + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg idx = node; + RegSet allow = rset_exclude(RSET_GPR, node); uint64_t k; - lua_assert(ofs % sizeof(Node) == 0); - rset_clear(allow, key); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (bigofs) { idx = dest; rset_clear(allow, dest); @@ -892,40 +922,46 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } else { k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); } - emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); - emit_lso(as, A64I_LDRx, key, idx, kofs); + emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow)); + emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs); if (bigofs) - emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); + emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node)); } static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, A64I_LDRx, dest, v); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); - emit_opk(as, A64I_ADDx, dest, uv, + if (guarded) + asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP); + if (ir->o == IR_UREFC) + emit_opk(as, A64I_ADDx, dest, dest, (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + else + emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_lso(as, A64I_LDRB, RID_TMP, dest, + (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loadu64(as, dest, k); } else { - emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); } - emit_lso(as, A64I_LDRx, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); } } static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -977,7 +1013,7 @@ static void asm_fload(ASMState *as, IRIns *ir) Reg idx; A64Ins ai = asm_fxloadins(ir); int32_t ofs; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ idx = RID_GL; ofs = (ir->op2 << 2) - GG_OFS(g); } else { @@ -1008,7 +1044,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); } @@ -1023,11 +1059,12 @@ static void asm_xstore(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir) { - Reg idx, tmp, type; + Reg idx, tmp; int32_t ofs = 0; RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - irt_isint(ir->t)); + lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + irt_isint(ir->t), + "bad load type %d", irt_type(ir->t)); if (ra_used(ir)) { Reg dest = ra_dest(as, ir, allow); tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; @@ -1041,22 +1078,25 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } else { tmp = ra_scratch(as, gpr); } - type = ra_scratch(as, rset_clear(gpr, tmp)); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx); + rset_clear(gpr, idx); + if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; /* Always do the type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); if (irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), + "bad load type %d", irt_type(ir->t)); emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); + ra_allock(as, LJ_TISNUM << 15, gpr), tmp); } else if (irt_isaddr(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); } else if (irt_isnil(ir->t)) { emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); } else { emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); + ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp); } if (ofs & FUSE_REG) emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); @@ -1111,8 +1151,10 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType1 t = ir->t; Reg dest = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { dest = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, dest); @@ -1121,7 +1163,8 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg tmp = RID_NONE; if ((ir->op2 & IRSLOAD_CONVERT)) tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); - lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); + lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); if (irt_isaddr(t)) { @@ -1156,23 +1199,23 @@ dotypecheck: tmp = ra_scratch(as, allow); rset_clear(allow, tmp); } - if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) + if (ra_hasreg(dest) && tmp != dest) emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); /* Need type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); if (irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); + lj_assertA(irt_isinteger(t) || irt_isnum(t), + "bad SLOAD type %d", irt_type(t)); emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, allow), tmp); + ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : (LJ_TISNUM << 15), allow), tmp); } else if (irt_isnil(t)) { emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); } else if (irt_ispri(t)) { emit_nm(as, A64I_CMPx, ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); } else { - Reg type = ra_scratch(as, allow); - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); } emit_lso(as, A64I_LDRx, tmp, base, ofs); return; @@ -1196,7 +1239,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; asm_setupresult(as, ir, ci); /* GCcdata * */ @@ -1204,7 +1248,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ir->o == IR_CNEWI) { int32_t ofs = sizeof(GCcdata); Reg r = ra_alloc1(as, ir->op2, allow); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; @@ -1231,8 +1275,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1241,17 +1283,14 @@ static void asm_tbar(ASMState *as, IRIns *ir) { Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg gr = ra_allock(as, i64ptr(J2G(as->J)), - rset_exclude(rset_exclude(RSET_GPR, tab), link)); Reg mark = RID_TMP; MCLabel l_end = emit_label(as); - emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_lso(as, A64I_STRx, tab, gr, - (int32_t)offsetof(global_State, gc.grayagain)); + /* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */ + emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_setgl(as, tab, gc.grayagain); emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); - emit_lso(as, A64I_LDRx, link, gr, - (int32_t)offsetof(global_State, gc.grayagain)); + emit_getgl(as, link, gc.grayagain); emit_cond_branch(as, CC_EQ, l_end); emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); @@ -1262,25 +1301,23 @@ static void asm_obar(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; IRRef args[2]; MCLabel l_end; - RegSet allow = RSET_GPR; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ir->op1; /* TValue *tv */ asm_gencall(as, ci, args); - ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); + emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(allow, obj)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end); emit_cond_branch(as, CC_EQ, l_end); emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); emit_lso(as, A64I_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); } @@ -1309,8 +1346,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir) } else if (fpm <= IRFPM_TRUNC) { asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); - } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { - return; } else { asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); } @@ -1324,12 +1359,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) if (irref_isk(lref)) return 1; /* But swap constants to the right. */ ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || (ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) return 0; /* Don't swap fusable operands to the left. */ ir = IR(lref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || (ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) return 1; /* But swap fusable operands to the right. */ @@ -1375,13 +1410,12 @@ static void asm_intneg(ASMState *as, IRIns *ir) static void asm_intmul(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); if (irt_isguard(ir->t)) { /* IR_MULOV */ asm_guardcc(as, CC_NE); emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ - emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); - emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); + emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest); emit_dnm(as, A64I_SMULL, dest, right, left); } else { emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); @@ -1417,46 +1451,12 @@ static void asm_mul(ASMState *as, IRIns *ir) asm_intmul(as, ir); } -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, A64I_FDIVd); -} - -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - #define asm_addov(as, ir) asm_add(as, ir) #define asm_subov(as, ir) asm_sub(as, ir) #define asm_mulov(as, ir) asm_mul(as, ir) +#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd) #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} static void asm_neg(ASMState *as, IRIns *ir) { @@ -1571,7 +1571,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) -#define asm_brol(as, ir) lua_assert(0) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) { @@ -1587,7 +1587,7 @@ static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = ((left >> 8) & 31); left &= 31; - emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); + emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left); emit_nm(as, A64I_FCMPd, left, right); } @@ -1599,8 +1599,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) asm_intmin_max(as, ir, cc); } -#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) +#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE) /* -- Comparisons --------------------------------------------------------- */ @@ -1652,15 +1652,16 @@ static void asm_intcomp(ASMState *as, IRIns *ir) Reg left; uint32_t m; int cmpprev0 = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), + "bad comparison data type %d", irt_type(ir->t)); if (asm_swapops(as, lref, rref)) { IRRef tmp = lref; lref = rref; rref = tmp; if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ } oldcc = cc; - if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { + if (irref_isk(rref) && get_k64val(as, rref) == 0) { IRIns *irl = IR(lref); if (cc == CC_GE) cc = CC_PL; else if (cc == CC_LT) cc = CC_MI; @@ -1674,16 +1675,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir) if (asm_swapops(as, blref, brref)) { Reg tmp = blref; blref = brref; brref = tmp; } + bleft = ra_alloc1(as, blref, RSET_GPR); if (irref_isk(brref)) { - uint64_t k = get_k64val(IR(brref)); - if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { - asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, - ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); + uint64_t k = get_k64val(as, brref); + if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) && + asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft, + emit_ctz64(k))) return; - } m2 = emit_isk13(k, irt_is64(irl->t)); } - bleft = ra_alloc1(as, blref, RSET_GPR); ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); if (!m2) m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); @@ -1719,12 +1719,25 @@ static void asm_comp(ASMState *as, IRIns *ir) #define asm_equal(as, ir) asm_comp(as, ir) -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { + case IR_CALLN: + case IR_CALLL: + case IR_CALLS: + case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } } /* -- Profiling ----------------------------------------------------------- */ @@ -1732,7 +1745,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) static void asm_prof(ASMState *as, IRIns *ir) { uint32_t k = emit_isk13(HOOK_PROFILE, 0); - lua_assert(k != 0); + lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13"); UNUSED(ir); asm_guardcc(as, CC_NE); emit_n(as, A64I_TSTw^k, RID_TMP); @@ -1745,37 +1758,28 @@ static void asm_prof(ASMState *as, IRIns *ir) static void asm_stack_check(ASMState *as, BCReg topslot, IRIns *irp, RegSet allow, ExitNo exitno) { - Reg pbase; uint32_t k; + Reg pbase = RID_BASE; if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lua_assert(ra_hasreg(pbase)); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ - } - } else { - pbase = RID_BASE; + pbase = irp->r; + if (!ra_hasreg(pbase)) + pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET); } emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); + if (pbase & 0x80) /* Restore temp. register. */ + emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0); k = emit_isk12((8*topslot)); - lua_assert(k); + lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, A64I_CMPx^k, RID_TMP); - emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); + emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31)); emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - if (ra_hasspill(irp->s)) - emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ - } else { - emit_getgl(as, RID_TMP, cur_L); + if (pbase & 0x40) { + emit_getgl(as, (pbase & 31), jit_base); + if (pbase & 0x80) /* Save temp register. */ + emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0); } + emit_getgl(as, RID_TMP, cur_L); } /* Restore Lua stack from on-trace state. */ @@ -1795,7 +1799,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) continue; - if (irt_isnum(ir->t)) { + if ((sn & SNAP_KEYINDEX)) { + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) : + ra_alloc1(as, ref, allow); + rset_clear(allow, r); + emit_lso(as, A64I_STRw, r, RID_BASE, ofs); + emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4); + } else if (irt_isnum(ir->t)) { Reg src = ra_alloc1(as, ref, RSET_FPR); emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); } else { @@ -1803,36 +1814,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ +/* Marker to prevent patching the GC check exit. */ +#define ARM64_NOPATCH_GC_CHECK \ + (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO)) + /* Check GC threshold and do one or more GC steps. */ static void asm_gc_check(ASMState *as) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; IRRef args[2]; MCLabel l_end; - Reg tmp1, tmp2; + Reg tmp2; ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ + *--as->mcp = ARM64_NOPATCH_GC_CHECK; args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); - tmp1 = ra_releasetmp(as, ASMREF_TMP1); + emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); tmp2 = ra_releasetmp(as, ASMREF_TMP2); emit_loadi(as, tmp2, as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ emit_cond_branch(as, CC_LS, l_end); emit_nm(as, A64I_CMPx, RID_TMP, tmp2); - emit_lso(as, A64I_LDRx, tmp2, tmp1, - (int32_t)offsetof(global_State, gc.threshold)); - emit_lso(as, A64I_LDRx, RID_TMP, tmp1, - (int32_t)offsetof(global_State, gc.total)); - ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); + emit_getgl(as, tmp2, gc.threshold); + emit_getgl(as, RID_TMP, gc.total); as->gcsteps = 0; checkmclim(as); } @@ -1851,53 +1864,52 @@ static void asm_loop_fixup(ASMState *as) p[-2] |= ((uint32_t)delta & mask) << 5; } else { ptrdiff_t delta = target - (p - 1); - p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); + p[-1] = A64I_B | A64F_S26(delta); } } -/* -- Head of trace ------------------------------------------------------- */ - -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) { - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } + UNUSED(as); /* Nothing to do. */ } +/* -- Head of trace ------------------------------------------------------- */ + /* Coalesce BASE register for a root trace. */ static void asm_head_root_base(ASMState *as) { - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (r != RID_BASE) + emit_movrr(as, ir, r, RID_BASE); + } } /* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); - } else { - Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); +static Reg asm_head_side_base(ASMState *as, IRIns *irp) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { + return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + /* Move from coalesced parent reg. */ + emit_movrr(as, ir, r, irp->r); + return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } } - return allow; + return RID_NONE; } /* -- Tail of trace ------------------------------------------------------- */ @@ -1915,12 +1927,12 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) } else { /* Patch stack adjustment. */ uint32_t k = emit_isk12(spadj); - lua_assert(k); + lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); } /* Patch exit branch. */ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); + p[-1] = A64I_B | A64F_S26((target-p)+1); } /* Prepare tail of code. */ @@ -1941,20 +1953,47 @@ static void asm_tail_prep(ASMState *as) /* Ensure there are enough stack slots for call arguments. */ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { - IRRef args[CCI_NARGS_MAX*2]; +#if LJ_HASFFI uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots += 2; + if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) || + (LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) { + IRRef args[CCI_NARGS_MAX*2]; + int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots; + asm_collectargs(as, ir, ci, args); +#if LJ_ABI_WIN + if ((ci->flags & CCI_VARARG)) nfpr = 0; +#endif + for (i = 0; i < nargs; i++) { + int al = spalign; + if (!args[i]) { +#if LJ_TARGET_OSX + /* Marker for start of varaargs. */ + nfpr = 0; + ngpr = 0; + spalign = 7; +#endif + } else if (irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) { nfpr--; continue; } +#if LJ_ABI_WIN + if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; } +#elif LJ_TARGET_OSX + al |= irt_isnum(IR(args[i])->t) ? 7 : 3; +#endif + } else { + if (ngpr > 0) { ngpr--; continue; } +#if LJ_TARGET_OSX + al |= irt_size(IR(args[i])->t) - 1; +#endif + } + spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */ } + nslots = (spofs + 3) >> 2; + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return REGSP_HINT(RID_RET); +#endif + return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); } static void asm_setup_target(ASMState *as) @@ -1983,40 +2022,54 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) { MCode *p = T->mcode; MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *cstart = NULL, *cend = p; + MCode *cstart = NULL; MCode *mcarea = lj_mcode_patch(J, p, 0); MCode *px = exitstub_trace_addr(T, exitno); + int patchlong = 1; + /* Note: this assumes a trace exit is only ever patched once. */ for (; p < pe; p++) { /* Look for exitstub branch, replace with branch to target. */ + ptrdiff_t delta = target - p; MCode ins = A64I_LE(*p); if ((ins & 0xff000000u) == 0x54000000u && ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch bcc exitstub. */ - *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u)); - cend = p+1; - if (!cstart) cstart = p; + /* Patch bcc, if within range. */ + if (A64F_S_OK(delta, 19)) { + *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); + if (!cstart) cstart = p; + } } else if ((ins & 0xfc000000u) == 0x14000000u && ((ins ^ (px-p)) & 0x03ffffffu) == 0) { - /* Patch b exitstub. */ - *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu)); - cend = p+1; + /* Patch b. */ + lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); + *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta)); if (!cstart) cstart = p; } else if ((ins & 0x7e000000u) == 0x34000000u && ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch cbz/cbnz exitstub. */ - *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u)); - cend = p+1; - if (!cstart) cstart = p; + /* Patch cbz/cbnz, if within range. */ + if (p[-1] == ARM64_NOPATCH_GC_CHECK) { + patchlong = 0; + } else if (A64F_S_OK(delta, 19)) { + *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); + if (!cstart) cstart = p; + } } else if ((ins & 0x7e000000u) == 0x36000000u && ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { - /* Patch tbz/tbnz exitstub. */ - *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u)); - cend = p+1; - if (!cstart) cstart = p; + /* Patch tbz/tbnz, if within range. */ + if (A64F_S_OK(delta, 14)) { + *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta)); + if (!cstart) cstart = p; + } } } - lua_assert(cstart != NULL); - lj_mcode_sync(cstart, cend); + /* Always patch long-range branch in exit stub itself. Except, if we can't. */ + if (patchlong) { + ptrdiff_t delta = target - px; + lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); + *px = A64I_B | A64F_S26(delta); + if (!cstart) cstart = px; + } + if (cstart) lj_mcode_sync(cstart, px+1); lj_mcode_patch(J, mcarea, 1); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_asm_mips.h b/source/libs/luajit/LuaJIT-src/src/lj_asm_mips.h index affe7d899625ffa05950fa93ae3d59a38a48f191..af0e714f15710ea95501b3ce9cdcc70cd64088ac 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_asm_mips.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_asm_mips.h @@ -1,6 +1,6 @@ /* ** MIPS IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) { Reg r = IR(ref)->r; if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) return RID_ZERO; r = ra_allocref(as, ref, allow); } else { @@ -64,18 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) /* Setup spare long-range jump slots per mcarea. */ static void asm_sparejump_setup(ASMState *as) { - MCode *mxp = as->mcbot; - /* Assumes sizeof(MCLink) == 8. */ - if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { - lua_assert(MIPSI_NOP == 0); - memset(mxp+2, 0, MIPS_SPAREJUMP*8); - mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; + MCode *mxp = as->mctop; + if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { + mxp -= MIPS_SPAREJUMP*2; + lj_assertA(MIPSI_NOP == 0, "bad NOP"); + memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); + as->mctop = mxp; + } +} + +static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) +{ + MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); + int slot = MIPS_SPAREJUMP; + while (slot--) { + mxp -= 2; + if (*mxp == tjump) { + return mxp; + } else if (*mxp == MIPSI_NOP) { + *mxp = tjump; + return mxp; + } } + return NULL; } /* Setup exit stub after the end of each trace. */ @@ -85,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as) /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); + lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, + "branch target out of range"); *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; as->mctop = mxp; } @@ -102,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) as->invmcp = NULL; as->loopinv = 1; as->mcp = p+1; +#if !LJ_TARGET_MIPSR6 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ +#else + mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : + (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */ +#endif target = p; /* Patch target later in asm_loop_fixup. */ } emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); @@ -176,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) return ra_allock(as, ofs-(int16_t)ofs, allow); } } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); + return RID_JGL; } } *ofsp = 0; @@ -191,20 +211,20 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, if (ra_noreg(ir->r) && canfuse(as, ir)) { if (ir->o == IR_ADD) { intptr_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), checki16(ofs2))) { ref = ir->op1; ofs = (int32_t)ofs2; } } else if (ir->o == IR_STRREF) { intptr_t ofs2 = 65536; - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { - ofs2 = ofs + get_kval(IR(ir->op2)); + ofs2 = ofs + get_kval(as, ir->op2); ref = ir->op1; } else if (irref_isk(ir->op1)) { - ofs2 = ofs + get_kval(IR(ir->op1)); + ofs2 = ofs + get_kval(as, ir->op1); ref = ir->op2; } if (!checki16(ofs2)) { @@ -248,7 +268,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #if !LJ_SOFTFP if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && !(ci->flags & CCI_VARARG)) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Already evicted. */ ra_leftov(as, fpr, ref); fpr += LJ_32 ? 2 : 1; gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; @@ -260,7 +281,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Already evicted. */ #if !LJ_SOFTFP if (irt_isfp(ir->t)) { RegSet of = as->freeset; @@ -273,7 +295,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #if LJ_32 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); - lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr+1), + "reg %d not free", gpr+1); /* Already evicted. */ gpr += 2; #else emit_tg(as, MIPSI_DMFC1, gpr, r); @@ -291,7 +314,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { ra_leftov(as, gpr, ref); gpr++; -#if LJ_64 +#if LJ_64 && !LJ_SOFTFP fpr++; #endif } @@ -302,7 +325,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) emit_spstore(as, ir, r, ofs); ofs += irt_isnum(ir->t) ? 8 : 4; #else - emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); ofs += 8; #endif } @@ -313,7 +336,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif if (gpr <= REGARG_LASTGPR) { gpr++; -#if LJ_64 +#if LJ_64 && !LJ_SOFTFP fpr++; #endif } else { @@ -328,22 +351,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; -#if LJ_32 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); -#endif #if !LJ_SOFTFP if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; #endif if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ -#if LJ_32 if (hiop && ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ -#endif ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { int32_t ofs = sps_scale(ir->s); @@ -369,10 +388,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } else { ra_destreg(as, ir, RID_FPRET); } -#if LJ_32 } else if (hiop) { ra_destpair(as, ir); -#endif } else { ra_destreg(as, ir, RID_RET); } @@ -391,7 +408,7 @@ static void asm_callx(ASMState *as, IRIns *ir) func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)get_kval(irf); + ci.func = (ASMFunction)(void *)get_kval(as, func); } else { /* Need specific register for indirect calls. */ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); MCode *p = as->mcp; @@ -411,7 +428,11 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); + RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) +#if LJ_TARGET_MIPSR6 + |RID2RSET(RID_F21) +#endif + ; if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); ra_destreg(as, ir, RID_FPRET); @@ -435,9 +456,30 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_addptr(as, base, -8*delta); asm_guard(as, MIPSI_BNE, RID_TMP, ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); - emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); + emit_tsi(as, MIPSI_AL, RID_TMP, base, (LJ_BE || LJ_FR2) ? -8 : -4); } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + if ((as->flags & JIT_F_MIPSXXR2)) { + emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp, + lj_fls(SBUF_MASK_FLAG), 0); + } else { + emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); + emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG); + } + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ #if !LJ_SOFTFP @@ -445,8 +487,13 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) { Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); Reg dest = ra_dest(as, ir, RSET_GPR); +#if !LJ_TARGET_MIPSR6 asm_guard(as, MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); +#else + asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left); +#endif emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, MIPSI_CVT_W_D, tmp, left); @@ -462,12 +509,36 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fgh(as, MIPSI_ADD_D, tmp, left, right); } +#elif LJ_64 /* && LJ_SOFTFP */ +static void asm_tointg(ASMState *as, IRIns *ir, Reg r) +{ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| + RID2RSET(RID_R1)|RID2RSET(RID_R12); + if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); + ra_evictset(as, drop); + /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ + ra_destreg(as, ir, RID_RET); + asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); + emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); + if (r == RID_NONE) + ra_leftov(as, REGARG_FIRSTGPR, ir->op1); + else if (r != REGARG_FIRSTGPR) + emit_move(as, REGARG_FIRSTGPR, r); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + emit_dta(as, MIPSI_SLL, dest, dest, 0); + asm_callid(as, ir, IRCALL_lj_vm_tobit); +} #endif static void asm_conv(ASMState *as, IRIns *ir) { IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 int stfp = (st == IRT_NUM || st == IRT_FLOAT); #endif #if LJ_64 @@ -475,15 +546,20 @@ static void asm_conv(ASMState *as, IRIns *ir) #endif IRRef lref = ir->op1; #if LJ_32 - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ + /* 64 bit integer conversions are handled by SPLIT. */ + lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); #endif -#if LJ_32 && LJ_SOFTFP +#if LJ_SOFTFP32 /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ #else - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); +#if !LJ_SOFTFP if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -541,7 +617,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -575,8 +652,13 @@ static void asm_conv(ASMState *as, IRIns *ir) (void *)&as->J->k64[LJ_K64_M2P64], rset_exclude(RSET_GPR, dest)); emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); +#if !LJ_TARGET_MIPSR6 + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp); +#endif emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], rset_exclude(RSET_GPR, dest)); @@ -587,8 +669,13 @@ static void asm_conv(ASMState *as, IRIns *ir) (void *)&as->J->k32[LJ_K32_M2P64], rset_exclude(RSET_GPR, dest)); emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); +#if !LJ_TARGET_MIPSR6 + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp); +#endif emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], rset_exclude(RSET_GPR, dest)); @@ -603,18 +690,55 @@ static void asm_conv(ASMState *as, IRIns *ir) MIPSIns mi = irt_is64(ir->t) ? (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); - emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); - emit_fg(as, mi, left, left); + emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp); + emit_fg(as, mi, tmp, left); #endif } } } else +#else + if (irt_isfp(ir->t)) { +#if LJ_64 && LJ_HASFFI + if (stfp) { /* FP to FP conversion. */ + asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : + IRCALL_softfp_d2f); + } else { /* Integer to FP conversion. */ + IRCallID cid = ((IRT_IS64 >> st) & 1) ? + (irt_isnum(ir->t) ? + (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : + (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : + (irt_isnum(ir->t) ? + (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : + (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); + asm_callid(as, ir, cid); + } +#else + asm_callid(as, ir, IRCALL_softfp_i2d); +#endif + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, RID_NONE); + } else { + IRCallID cid = irt_is64(ir->t) ? + ((st == IRT_NUM) ? + (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : + (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : + ((st == IRT_NUM) ? + (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : + (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); + asm_callid(as, ir, cid); + } + } else +#endif #endif { Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((ir->op2 & IRCONV_SEXT)) { if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); @@ -645,7 +769,7 @@ static void asm_conv(ASMState *as, IRIns *ir) } } } else { - if (st64) { + if (st64 && !(ir->op2 & IRCONV_NONE)) { /* This is either a 32 bit reg/reg mov which zeroes the hiword ** or a load of the loword from a 64 bit address. */ @@ -666,7 +790,7 @@ static void asm_strto(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; int32_t ofs = 0; -#if LJ_SOFTFP +#if LJ_SOFTFP32 ra_evictset(as, RSET_SCRATCH); if (ra_used(ir)) { if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && @@ -711,7 +835,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) { RegSet allow = rset_exclude(RSET_GPR, base); IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); if (irref_isk(ref)) { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -732,34 +857,63 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) #endif /* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, igcptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ + int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if ((mode & IRTMPREF_OUT1)) { +#if LJ_SOFTFP + emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); +#if LJ_64 + emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64); +#else + lj_assertA(irref_isk(ref), "unsplit FP op"); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), + tmptv.u32.lo); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), + tmptv.u32.hi); +#endif +#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); + emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs); +#endif + } else if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, igcptr(ir_knum(ir)), dest); + } else { +#if LJ_SOFTFP32 + lj_assertA(0, "unsplit FP op"); +#else + /* Otherwise force a spill and use the spill slot. */ + emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); +#endif + } + } else { + /* Otherwise use g->tmptv to hold the TValue. */ #if LJ_32 - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); + Reg type; + emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_GPR); + emit_setgl(as, src, tmptv.gcr); + } + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) + type = ra_alloc1(as, ref+1, RSET_GPR); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR); + emit_setgl(as, type, tmptv.it); #else - asm_tvstore64(as, dest, 0, ref); - emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, - (int32_t)(offsetof(global_State, tmptv)-32768)); + asm_tvstore64(as, dest, 0, ref); + emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs); #endif + } + } else { + emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); } } @@ -780,8 +934,12 @@ static void asm_aref(ASMState *as, IRIns *ir) } base = ra_alloc1(as, ir->op1, RSET_GPR); idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); +#if !LJ_TARGET_MIPSR6 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); +#else + emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base); +#endif } /* Inlined hash lookup. Specialized for key type and for const keys. @@ -799,6 +957,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; +#if LJ_64 + Reg cmp64 = RID_NONE; +#endif IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); int isk = irref_isk(refkey); @@ -807,11 +968,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) MCLabel l_end, l_loop, l_next; rset_clear(allow, tab); -#if LJ_32 && LJ_SOFTFP - if (!isk) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - if (irkey[1].o == IR_HIOP) { + if (!LJ_SOFTFP && irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); + } else { + if (!irt_ispri(kt)) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } +#if LJ_32 + if (LJ_SOFTFP && irkey[1].o == IR_HIOP) { if (ra_hasreg((irkey+1)->r)) { type = tmpnum = (irkey+1)->r; tmp1 = ra_scratch(as, allow); @@ -822,25 +988,33 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) } rset_clear(allow, tmpnum); } else { - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); + type = ra_allock(as, (int32_t)irt_toitype(kt), allow); rset_clear(allow, type); } - } -#else - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - } else if (!irt_ispri(kt)) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); -#if LJ_32 - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); - rset_clear(allow, type); #endif } -#endif tmp2 = ra_scratch(as, allow); rset_clear(allow, tmp2); +#if LJ_64 + if (LJ_SOFTFP || !irt_isnum(kt)) { + /* Allocate cmp64 register used for 64-bit comparisons */ + if (LJ_SOFTFP && irt_isnum(kt)) { + cmp64 = key; + } else if (!isk && irt_isaddr(kt)) { + cmp64 = tmp2; + } else { + int64_t k; + if (isk && irt_isaddr(kt)) { + k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + k = ~((int64_t)~irt_toitype(kt) << 47); + } + cmp64 = ra_allock(as, k, allow); + rset_clear(allow, cmp64); + } + } +#endif /* Key not found in chain: jump to exit (if merged) or load niltv. */ l_end = emit_label(as); @@ -861,8 +1035,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) l_end = asm_exitstub_addr(as); } if (!LJ_SOFTFP && irt_isnum(kt)) { +#if !LJ_TARGET_MIPSR6 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end); + emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key); +#endif *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); @@ -883,21 +1062,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (irt_isaddr(kt)) { - Reg refk = tmp2; - if (isk) { - int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - refk = ra_allock(as, k, allow); - rset_clear(allow, refk); - } - emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); } else { - Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, pri); - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); } *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); if (!isk && irt_isaddr(kt)) { @@ -908,7 +1075,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) #endif /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); } else { @@ -916,7 +1083,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (isk) tmphash = ra_allock(as, khash, allow); emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); - lua_assert(sizeof(Node) == 24); + lj_assertA(sizeof(Node) == 24, "bad Node size"); emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); @@ -926,7 +1093,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { - emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); + emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); } else { /* Must match with hash*() in lj_tab.c. */ emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); @@ -961,7 +1128,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); if (irt_isnum(kt)) { emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); + emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); #if !LJ_SOFTFP emit_tg(as, MIPSI_DMFC1, tmp1, key); @@ -994,7 +1161,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg key = ra_scratch(as, allow); int64_t k; #endif - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); @@ -1023,7 +1190,7 @@ nolo: emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); #else if (irt_ispri(irkey->t)) { - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); k = ~((int64_t)~irt_toitype(irkey->t) << 47); } else if (irt_isnum(irkey->t)) { k = (int64_t)ir_knum(irkey)->u64; @@ -1040,29 +1207,36 @@ nolo: static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + if (guarded) + asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); + if (ir->o == IR_UREFC) + emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, dest, o); } else { - emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } - emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + - (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -1117,26 +1291,36 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static MIPSIns asm_fxloadins(IRIns *ir) +static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return MIPSI_LB; case IRT_U8: return MIPSI_LBU; case IRT_I16: return MIPSI_LH; case IRT_U16: return MIPSI_LHU; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; + case IRT_NUM: + lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); + if (!LJ_SOFTFP) return MIPSI_LDC1; + /* fallthrough */ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; + /* fallthrough */ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; } } -static MIPSIns asm_fxstoreins(IRIns *ir) +static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return MIPSI_SB; case IRT_I16: case IRT_U16: return MIPSI_SH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; + case IRT_NUM: + lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); + if (!LJ_SOFTFP) return MIPSI_SDC1; + /* fallthrough */ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; + /* fallthrough */ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; } } @@ -1144,10 +1328,10 @@ static MIPSIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - MIPSIns mi = asm_fxloadins(ir); + MIPSIns mi = asm_fxloadins(as, ir); Reg idx; int32_t ofs; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ idx = RID_JGL; ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); } else { @@ -1160,8 +1344,8 @@ static void asm_fload(ASMState *as, IRIns *ir) } } ofs = field_ofs[ir->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); } - lua_assert(!irt_isfp(ir->t)); emit_tsi(as, mi, dest, idx, ofs); } @@ -1172,8 +1356,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - MIPSIns mi = asm_fxstoreins(ir); - lua_assert(!irt_isfp(ir->t)); + MIPSIns mi = asm_fxstoreins(as, ir); + lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); emit_tsi(as, mi, src, idx, ofs); } } @@ -1182,8 +1366,9 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) @@ -1191,7 +1376,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) if (ir->r != RID_SINK) { Reg src = ra_alloc1z(as, ir->op2, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } @@ -1200,7 +1385,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) static void asm_ahuvload(ASMState *as, IRIns *ir) { - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); + int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); Reg dest = RID_NONE, type = RID_TMP, idx; RegSet allow = RSET_GPR; int32_t ofs = 0; @@ -1213,8 +1398,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } } if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); #if LJ_64 @@ -1225,6 +1411,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) #endif } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; rset_clear(allow, idx); if (irt_isnum(t)) { asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); @@ -1262,10 +1449,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) int32_t ofs = 0; if (ir->r == RID_SINK) return; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - emit_hsi(as, MIPSI_SDC1, src, idx, ofs); + emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); } else { #if LJ_32 if (!irt_ispri(ir->t)) { @@ -1313,45 +1500,64 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType1 t = ir->t; #if LJ_32 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); + int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); if (hiop) t.irt = IRT_NUM; #else int32_t ofs = 8*((int32_t)ir->op1-2); #endif - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_32 && LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); +#if LJ_SOFTFP32 + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ if (hiop && ra_used(ir+1)) { type = ra_dest(as, ir+1, allow); rset_clear(allow, type); } #else if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); + dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); asm_tointg(as, ir, dest); t.irt = IRT_NUM; /* Continue with a regular number type check. */ } else #endif if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); - if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { + if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { if (irt_isint(t)) { - Reg tmp = ra_scratch(as, RSET_FPR); + Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); +#if LJ_SOFTFP + ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); + ra_destreg(as, ir, RID_RET); + emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); + if (tmp != REGARG_FIRSTGPR) + emit_move(as, REGARG_FIRSTGPR, tmp); +#else emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); +#endif dest = tmp; t.irt = IRT_NUM; /* Check for original type. */ } else { Reg tmp = ra_scratch(as, RSET_GPR); +#if LJ_SOFTFP + ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); + ra_destreg(as, ir, RID_RET); + emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); + emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); +#else emit_fg(as, MIPSI_CVT_D_W, dest, dest); emit_tg(as, MIPSI_MTC1, tmp, dest); +#endif dest = tmp; t.irt = IRT_INT; /* Check for original type. */ } @@ -1378,7 +1584,7 @@ dotypecheck: asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); } else { - Reg ktype = ra_allock(as, irt_toitype(t), allow); + Reg ktype = ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : irt_toitype(t), allow); asm_guard(as, MIPSI_BNE, type, ktype); } } @@ -1396,11 +1602,15 @@ dotypecheck: if (irt_ispri(t)) { asm_guard(as, MIPSI_BNE, type, ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); + } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { + asm_guard(as, MIPSI_BNE, RID_TMP, + ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); + emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 0); } else { if (irt_isnum(t)) { asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); - if (ra_hasreg(dest)) + if (!LJ_SOFTFP && ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); } else { asm_guard(as, MIPSI_BNE, RID_TMP, @@ -1410,7 +1620,7 @@ dotypecheck: } emit_tsi(as, MIPSI_LD, type, base, ofs); } else if (ra_hasreg(dest)) { - if (irt_isnum(t)) + if (!LJ_SOFTFP && irt_isnum(t)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); else emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, @@ -1431,7 +1641,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; if (ra_hasreg(ir->r)) @@ -1447,7 +1658,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) int32_t ofs = sizeof(GCcdata); if (sz == 8) { ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); + lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); if (LJ_LE) ir++; } for (;;) { @@ -1458,10 +1669,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) ofs -= 4; if (LJ_BE) ir++; else ir--; } #else - emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), + emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow), RID_RET, sizeof(GCcdata)); #endif - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; args[0] = ASMREF_L; /* lua_State *L */ @@ -1484,8 +1695,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1513,7 +1722,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1549,33 +1758,46 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); emit_fg(as, mi, dest, left); } +#endif +#if !LJ_SOFTFP32 static void asm_fpmath(ASMState *as, IRIns *ir) { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; +#if !LJ_SOFTFP if (ir->op2 <= IRFPM_TRUNC) asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); else if (ir->op2 == IRFPM_SQRT) asm_fpunary(as, ir, MIPSI_SQRT_D); else +#endif asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } #endif +#if !LJ_SOFTFP +#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) +#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) +#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) +#elif LJ_64 /* && LJ_SOFTFP */ +#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) +#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) +#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) +#endif + static void asm_add(ASMState *as, IRIns *ir) { IRType1 t = ir->t; -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 if (irt_isnum(t)) { - asm_fparith(as, ir, MIPSI_ADD_D); + asm_fpadd(as, ir); } else #endif { + /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */ Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); + intptr_t k = get_kval(as, ir->op2); if (checki16(k)) { emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, left, k); @@ -1590,9 +1812,9 @@ static void asm_add(ASMState *as, IRIns *ir) static void asm_sub(ASMState *as, IRIns *ir) { -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_SUB_D); + asm_fpsub(as, ir); } else #endif { @@ -1606,9 +1828,9 @@ static void asm_sub(ASMState *as, IRIns *ir) static void asm_mul(ASMState *as, IRIns *ir) { -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_MUL_D); + asm_fpmul(as, ir); } else #endif { @@ -1616,46 +1838,26 @@ static void asm_mul(ASMState *as, IRIns *ir) Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; if (LJ_64 && irt_is64(ir->t)) { +#if !LJ_TARGET_MIPSR6 emit_dst(as, MIPSI_MFLO, dest, 0, 0); emit_dst(as, MIPSI_DMULT, 0, left, right); +#else + emit_dst(as, MIPSI_DMUL, dest, left, right); +#endif } else { emit_dst(as, MIPSI_MUL, dest, left, right); } } } -static void asm_mod(ASMState *as, IRIns *ir) +#if !LJ_SOFTFP32 +static void asm_fpdiv(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - #if !LJ_SOFTFP -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif asm_fparith(as, ir, MIPSI_DIV_D); +#else + asm_callid(as, ir, IRCALL_softfp_div); +#endif } #endif @@ -1665,6 +1867,13 @@ static void asm_neg(ASMState *as, IRIns *ir) if (irt_isnum(ir->t)) { asm_fpunary(as, ir, MIPSI_NEG_D); } else +#elif LJ_64 /* && LJ_SOFTFP */ + if (irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dst(as, MIPSI_XOR, dest, left, + ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); + } else #endif { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1674,17 +1883,25 @@ static void asm_neg(ASMState *as, IRIns *ir) } } +#if !LJ_SOFTFP #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) +#elif LJ_64 /* && LJ_SOFTFP */ +static void asm_abs(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); +} +#endif static void asm_arithov(ASMState *as, IRIns *ir) { + /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); - lua_assert(!irt_is64(ir->t)); + lj_assertA(!irt_is64(ir->t), "bad usage"); if (irref_isk(ir->op2)) { int k = IR(ir->op2)->i; - if (ir->o == IR_SUBOV) k = -k; + if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ left = ra_alloc1(as, ir->op1, RSET_GPR); asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); @@ -1724,9 +1941,14 @@ static void asm_mulov(ASMState *as, IRIns *ir) right), dest)); asm_guard(as, MIPSI_BNE, RID_TMP, tmp); emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); +#if !LJ_TARGET_MIPSR6 emit_dst(as, MIPSI_MFHI, tmp, 0, 0); emit_dst(as, MIPSI_MFLO, dest, 0, 0); emit_dst(as, MIPSI_MULT, 0, left, right); +#else + emit_dst(as, MIPSI_MUL, dest, left, right); + emit_dst(as, MIPSI_MUH, tmp, left, right); +#endif } #if LJ_32 && LJ_HASFFI @@ -1863,7 +2085,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); + intptr_t k = get_kval(as, ir->op2); if (checku16(k)) { emit_tsi(as, mik, dest, left, k); return; @@ -1896,7 +2118,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) #define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) #define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) #define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) -#define asm_brol(as, ir) lua_assert(0) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") static void asm_bror(ASMState *as, IRIns *ir) { @@ -1919,15 +2141,21 @@ static void asm_bror(ASMState *as, IRIns *ir) } } -#if LJ_32 && LJ_SOFTFP +#if LJ_SOFTFP static void asm_sfpmin_max(ASMState *as, IRIns *ir) { CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; +#if LJ_64 + IRRef args[2]; + args[0] = ir->op1; + args[1] = ir->op2; +#else IRRef args[4]; args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; +#endif asm_setupresult(as, ir, &ci); emit_call(as, (void *)ci.func, 0); ci.func = NULL; @@ -1937,29 +2165,52 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir) static void asm_min_max(ASMState *as, IRIns *ir, int ismax) { - if (!LJ_SOFTFP && irt_isnum(ir->t)) { + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { +#if LJ_SOFTFP + asm_sfpmin_max(as, ir); +#else Reg dest = ra_dest(as, ir, RSET_FPR); Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; +#if !LJ_TARGET_MIPSR6 if (dest == left) { - emit_fg(as, MIPSI_MOVT_D, dest, right); + emit_fg(as, MIPSI_MOVF_D, dest, right); } else { - emit_fg(as, MIPSI_MOVF_D, dest, left); + emit_fg(as, MIPSI_MOVT_D, dest, left); if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); + emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right); +#else + emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); +#endif +#endif } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - if (dest == left) { - emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); + if (left == right) { + if (dest != left) emit_move(as, dest, left); } else { - emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); - if (dest != right) emit_move(as, dest, right); +#if !LJ_TARGET_MIPSR6 + if (dest == left) { + emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); + } else { + emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); + if (dest != right) emit_move(as, dest, right); + } +#else + emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); + if (dest != right) { + emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP); + emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP); + } else { + emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP); + emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP); + } +#endif + emit_dst(as, MIPSI_SLT, RID_TMP, + ismax ? left : right, ismax ? right : left); } - emit_dst(as, MIPSI_SLT, RID_TMP, - ismax ? left : right, ismax ? right : left); } } @@ -1968,18 +2219,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) /* -- Comparisons --------------------------------------------------------- */ -#if LJ_32 && LJ_SOFTFP +#if LJ_SOFTFP /* SFP comparisons. */ static void asm_sfpcomp(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; RegSet drop = RSET_SCRATCH; Reg r; +#if LJ_64 + IRRef args[2]; + args[0] = ir->op1; + args[1] = ir->op2; +#else IRRef args[4]; args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; +#endif - for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { if (!rset_test(as->freeset, r) && regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) rset_clear(drop, r); @@ -2033,21 +2290,33 @@ static void asm_comp(ASMState *as, IRIns *ir) { /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ IROp op = ir->o; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { +#if LJ_SOFTFP + asm_sfpcomp(as, ir); +#else +#if !LJ_TARGET_MIPSR6 Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); +#else + Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); + asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right); +#endif +#endif } else { Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); asm_guard(as, mi, left, 0); } else { if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); + intptr_t k = get_kval(as, ir->op2); if ((op&2)) k++; if (checki16(k)) { asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); @@ -2069,9 +2338,17 @@ static void asm_equal(ASMState *as, IRIns *ir) Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR); right = (left >> 8); left &= 255; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { +#if LJ_SOFTFP + asm_sfpcomp(as, ir); +#elif !LJ_TARGET_MIPSR6 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); +#else + Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); + asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right); +#endif } else { asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); } @@ -2114,15 +2391,15 @@ static void asm_comp64eq(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ as->curins--; /* Always skip the CONV. */ #if LJ_HASFFI && !LJ_SOFTFP @@ -2169,37 +2446,33 @@ static void asm_hiop(ASMState *as, IRIns *ir) } return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { -#if LJ_HASFFI +#if LJ_32 && LJ_HASFFI case IR_ADD: as->curins--; asm_add64(as, ir); break; case IR_SUB: as->curins--; asm_sub64(as, ir); break; case IR_NEG: as->curins--; asm_neg64(as, ir); break; + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; #endif -#if LJ_SOFTFP +#if LJ_32 && LJ_SOFTFP case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: case IR_STRTO: if (!uselo) ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ break; + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: + /* Nothing to do here. Handled by lo op itself. */ + break; #endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: if (!uselo) ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ -#endif } /* -- Profiling ----------------------------------------------------------- */ @@ -2264,15 +2537,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & SNAP_NORESTORE)) continue; if (irt_isnum(ir->t)) { -#if LJ_SOFTFP +#if LJ_SOFTFP32 Reg tmp; RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); +#elif LJ_SOFTFP /* && LJ_64 */ + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); #else Reg src = ra_alloc1(as, ref, RSET_FPR); emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); @@ -2281,7 +2558,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) #if LJ_32 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); rset_clear(allow, src); @@ -2294,21 +2572,41 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else if ((sn & SNAP_SOFTFPNUM)) { type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); #endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); #else - asm_tvstore64(as, RID_BASE, ofs, ref); + if ((sn & SNAP_KEYINDEX)) { + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + int64_t kki = (int64_t)LJ_KEYINDEX << 32; + if (irref_isk(ref)) { + emit_tsi(as, MIPSI_SD, + ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), + RID_BASE, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); + emit_tsi(as, MIPSI_SD, RID_TMP, RID_BASE, ofs); + emit_dst(as, MIPSI_DADDU, RID_TMP, src, rki); + } + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } #endif } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ +/* Marker to prevent patching the GC check exit. */ +#define MIPS_NOPATCH_GC_CHECK MIPSI_OR + /* Check GC threshold and do one or more GC steps. */ static void asm_gc_check(ASMState *as) { @@ -2324,6 +2622,7 @@ static void asm_gc_check(ASMState *as) args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); + l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); tmp = ra_releasetmp(as, ASMREF_TMP2); emit_loadi(as, tmp, as->gcsteps); @@ -2352,6 +2651,12 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + if (as->loopinv) as->mctop--; +} + /* -- Head of trace ------------------------------------------------------- */ /* Coalesce BASE register for a root trace. */ @@ -2359,7 +2664,6 @@ static void asm_head_root_base(ASMState *as) { IRIns *ir = IR(REF_BASE); Reg r = ir->r; - if (as->loopinv) as->mctop--; if (ra_hasreg(r)) { ra_free(as, r); if (rset_test(as->modset, r) || irt_ismarked(ir->t)) @@ -2370,25 +2674,24 @@ static void asm_head_root_base(ASMState *as) } /* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +static Reg asm_head_side_base(ASMState *as, IRIns *irp) { IRIns *ir = IR(REF_BASE); Reg r = ir->r; - if (as->loopinv) as->mctop--; if (ra_hasreg(r)) { ra_free(as, r); if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ + return r; /* Same BASE register already coalesced. */ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ + return irp->r; } else { emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ } } - return allow; + return RID_NONE; } /* -- Tail of trace ------------------------------------------------------- */ @@ -2466,32 +2769,39 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); for (p++; p < pe; p++) { if (*p == exitload) { /* Look for load of exit number. */ - if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ + /* Look for exitstub branch. Yes, this covers all used branch variants. */ + if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && + ((p[-1] & 0xf0000000u) == MIPSI_BEQ || + (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || +#if !LJ_TARGET_MIPSR6 + (p[-1] & 0xffe00000u) == MIPSI_BC1F +#else + (p[-1] & 0xff600000u) == MIPSI_BC1EQZ +#endif + ) && p[-2] != MIPS_NOPATCH_GC_CHECK) { ptrdiff_t delta = target - p; if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ patchbranch: p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p; + cstop = p+1; if (!cstart) cstart = p-1; } else { /* Branch out of range. Use spare jump slot in mcarea. */ - int i; - for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { - if (mcarea[i] == tjump) { - delta = mcarea+i - p; - goto patchbranch; - } else if (mcarea[i] == MIPSI_NOP) { - mcarea[i] = tjump; - cstart = mcarea+i; - delta = mcarea+i - p; + MCode *mcjump = asm_sparejump_use(mcarea, tjump); + if (mcjump) { + lj_mcode_sync(mcjump, mcjump+1); + delta = mcjump - p; + if (((delta + 0x8000) >> 16) == 0) { goto patchbranch; + } else { + lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); } } /* Ignore jump slot overflow. Child trace is simply not attached. */ } } else if (p+1 == pe) { /* Patch NOP after code for inverted loop branch. Use of J is ok. */ - lua_assert(p[1] == MIPSI_NOP); + lj_assertJ(p[1] == MIPSI_NOP, "expected NOP"); p[1] = tjump; *p = MIPSI_NOP; /* Replace the load of the exit number. */ cstop = p+2; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_asm_ppc.h b/source/libs/luajit/LuaJIT-src/src/lj_asm_ppc.h index 6daa861b91acbece22dd3c602578fc5afda4c1ab..df1ac42f7a0e2a0fb2bfac367fc3f82d5528aad2 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_asm_ppc.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_asm_ppc.h @@ -1,6 +1,6 @@ /* ** PPC IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* -- Register allocator extensions --------------------------------------- */ @@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) return ra_allock(as, ofs-(int16_t)ofs, allow); } } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); + return RID_JGL; } } *ofsp = 0; @@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, return; } } else if (ir->o == IR_STRREF) { - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -226,12 +229,14 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, emit_tab(as, pi, rt, left, right); } +#if !LJ_SOFTFP /* Fuse to multiply-add/sub instruction. */ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) { IRRef lref = ir->op1, rref = ir->op2; IRIns *irm; - if (lref != rref && + if ((as->flags & JIT_F_OPT_FMA) && + lref != rref && ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ra_noreg(irm->r)) || (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && @@ -245,6 +250,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) } return 0; } +#endif /* -- Calls --------------------------------------------------------------- */ @@ -253,16 +259,21 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = 8; - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; + Reg gpr = REGARG_FIRSTGPR; +#if !LJ_SOFTFP + Reg fpr = REGARG_FIRSTFPR; +#endif if ((void *)ci->func) emit_call(as, (void *)ci->func); for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; if (ref) { IRIns *ir = IR(ref); +#if !LJ_SOFTFP if (irt_isfp(ir->t)) { if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Already evicted. */ ra_leftov(as, fpr, ref); fpr++; } else { @@ -271,9 +282,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) emit_spstore(as, ir, r, ofs); ofs += irt_isnum(ir->t) ? 8 : 4; } - } else { + } else +#endif + { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Already evicted. */ ra_leftov(as, gpr, ref); gpr++; } else { @@ -290,8 +304,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } checkmclim(as); } +#if !LJ_SOFTFP if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); +#endif } /* Setup result reg/sp for call. Evict scratch regs. */ @@ -299,16 +315,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); +#if !LJ_SOFTFP if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; +#endif if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ if (hiop && ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { /* Use spill slot or temp slots. */ int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; @@ -323,10 +341,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } else { ra_destreg(as, ir, RID_FPRET); } -#if LJ_32 } else if (hiop) { ra_destpair(as, ir); -#endif } else { ra_destreg(as, ir, RID_RET); } @@ -375,8 +391,24 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ +#if !LJ_SOFTFP static void asm_tointg(ASMState *as, IRIns *ir, Reg left) { RegSet allow = RSET_FPR; @@ -409,15 +441,27 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fab(as, PPCI_FADD, tmp, left, right); } +#endif static void asm_conv(ASMState *as, IRIns *ir) { IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if !LJ_SOFTFP int stfp = (st == IRT_NUM || st == IRT_FLOAT); +#endif IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ + /* 64 bit integer conversions are handled by SPLIT. */ + lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); +#if LJ_SOFTFP + /* FP conversions are handled by SPLIT. */ + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ +#else + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -446,7 +490,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -476,11 +521,13 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_fb(as, PPCI_FCTIWZ, tmp, left); } } - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((ir->op2 & IRCONV_SEXT)) emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); else @@ -496,42 +543,95 @@ static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; - int32_t ofs; + int32_t ofs = SPOFS_TMP; +#if LJ_SOFTFP + ra_evictset(as, RSET_SCRATCH); + if (ra_used(ir)) { + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && + (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { + int i; + for (i = 0; i < 2; i++) { + Reg r = (ir+i)->r; + if (ra_hasreg(r)) { + ra_free(as, r); + ra_modified(as, r); + emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); + } + } + ofs = sps_scale(ir->s & ~1); + } else { + Reg rhi = ra_dest(as, ir+1, RSET_GPR); + Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); + emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); + emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); + } + } +#else RegSet drop = RSET_SCRATCH; if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ra_evictset(as, drop); + if (ir->s) ofs = sps_scale(ir->s); +#endif asm_guardcc(as, CC_EQ); emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ asm_gencall(as, ci, args); /* Store the result to the spill slot or temp slots. */ - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); } /* -- Memory references --------------------------------------------------- */ /* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); + int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if ((mode & IRTMPREF_OUT1)) { +#if LJ_SOFTFP + lj_assertA(irref_isk(ref), "unsplit FP op"); + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), + tmptv.u32.lo); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), + tmptv.u32.hi); +#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); + emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs); +#endif + } else if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i32ptr(ir_knum(ir)), dest); + } else { +#if LJ_SOFTFP + lj_assertA(0, "unsplit FP op"); +#else + /* Otherwise force a spill and use the spill slot. */ + emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); +#endif + } + } else { + /* Otherwise use g->tmptv to hold the TValue. */ + Reg type; + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_GPR); + emit_setgl(as, src, tmptv.gcr); + } + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) + type = ra_alloc1(as, ref+1, RSET_GPR); + else + type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); + emit_setgl(as, type, tmptv.it); } - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); + } else { + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); } } @@ -574,11 +674,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) Reg tisnum = RID_NONE, tmpnum = RID_NONE; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t khash; MCLabel l_end, l_loop, l_next; rset_clear(allow, tab); +#if LJ_SOFTFP + if (!isk) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + if (irkey[1].o == IR_HIOP) { + if (ra_hasreg((irkey+1)->r)) { + tmpnum = (irkey+1)->r; + ra_noweak(as, tmpnum); + } else { + tmpnum = ra_allocref(as, refkey+1, allow); + } + rset_clear(allow, tmpnum); + } + } +#else if (irt_isnum(kt)) { key = ra_alloc1(as, refkey, RSET_FPR); tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); @@ -588,6 +704,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) key = ra_alloc1(as, refkey, allow); rset_clear(allow, key); } +#endif tmp2 = ra_scratch(as, allow); rset_clear(allow, tmp2); @@ -610,7 +727,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) asm_guardcc(as, CC_EQ); else emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - if (irt_isnum(kt)) { + if (!LJ_SOFTFP && irt_isnum(kt)) { emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); emit_condbranch(as, PPCI_BC, CC_GE, l_next); emit_ab(as, PPCI_CMPLW, tmp1, tisnum); @@ -620,7 +737,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_ab(as, PPCI_CMPW, tmp2, key); emit_condbranch(as, PPCI_BC, CC_NE, l_next); } - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); + if (LJ_SOFTFP && ra_hasreg(tmpnum)) + emit_ab(as, PPCI_CMPW, tmp1, tmpnum); + else + emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); if (!irt_ispri(kt)) emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); } @@ -629,35 +749,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) (((char *)as->mcp-(char *)l_loop) & 0xffffu); /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); } else { Reg tmphash = tmp1; - if (irref_isk(refkey)) + if (isk) tmphash = ra_allock(as, khash, allow); emit_tab(as, PPCI_ADD, dest, dest, tmp1); emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { + if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); + emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid)); } else { /* Must match with hash*() in lj_tab.c. */ emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); - if (irt_isnum(kt)) { + if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { +#if LJ_SOFTFP + emit_asb(as, PPCI_XOR, tmp2, key, tmp1); + emit_rotlwi(as, dest, tmp1, HASH_ROT1); + emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); +#else int32_t ofs = ra_spill(as, irkey); emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); emit_rotlwi(as, dest, tmp1, HASH_ROT1); emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); +#endif } else { emit_asb(as, PPCI_XOR, tmp2, key, tmp1); emit_rotlwi(as, dest, tmp1, HASH_ROT1); @@ -678,7 +804,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); @@ -714,30 +840,37 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); + if (guarded) { + asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); emit_ai(as, PPCI_CMPWI, RID_TMP, 1); - emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + } + if (ir->o == IR_UREFC) + emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loadi(as, dest, k); } else { - emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); } - emit_tai(as, PPCI_LWZ, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); } } static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -777,26 +910,28 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static PPCIns asm_fxloadins(IRIns *ir) +static PPCIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ case IRT_U8: return PPCI_LBZ; case IRT_I16: return PPCI_LHA; case IRT_U16: return PPCI_LHZ; - case IRT_NUM: return PPCI_LFD; - case IRT_FLOAT: return PPCI_LFS; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD; + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; default: return PPCI_LWZ; } } -static PPCIns asm_fxstoreins(IRIns *ir) +static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return PPCI_STB; case IRT_I16: case IRT_U16: return PPCI_STH; - case IRT_NUM: return PPCI_STFD; - case IRT_FLOAT: return PPCI_STFS; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD; + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; default: return PPCI_STW; } } @@ -804,12 +939,12 @@ static PPCIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - PPCIns pi = asm_fxloadins(ir); + PPCIns pi = asm_fxloadins(as, ir); Reg idx; int32_t ofs; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ idx = RID_JGL; - ofs = (ir->op2 << 2) - 32768; + ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); } else { idx = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->op2 == IRFL_TAB_ARRAY) { @@ -821,7 +956,7 @@ static void asm_fload(ASMState *as, IRIns *ir) } ofs = field_ofs[ir->op2]; } - lua_assert(!irt_isi8(ir->t)); + lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8"); emit_tai(as, pi, dest, idx, ofs); } @@ -832,18 +967,19 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - PPCIns pi = asm_fxstoreins(ir); + PPCIns pi = asm_fxstoreins(as, ir); emit_tai(as, pi, src, idx, ofs); } } static void asm_xload(ASMState *as, IRIns *ir) { - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + Reg dest = ra_dest(as, ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); if (irt_isi8(ir->t)) emit_as(as, PPCI_EXTSB, dest, dest); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) @@ -857,8 +993,9 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) Reg src = ra_alloc1(as, irb->op1, RSET_GPR); asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); } else { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + Reg src = ra_alloc1(as, ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } @@ -871,24 +1008,39 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; RegSet allow = RSET_GPR; int32_t ofs = AHUREF_LSX; + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { + t.irt = IRT_NUM; + if (ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } + ofs = 0; + } if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if (!irt_isnum(t)) ofs = 0; - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) { + ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 : + ir->op2 ? 8 * ir->op2 : AHUREF_LSX; + } if (irt_isnum(t)) { Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); asm_guardcc(as, CC_GE); emit_ab(as, PPCI_CMPLW, type, tisnum); if (ra_hasreg(dest)) { - if (ofs == AHUREF_LSX) { + if (!LJ_SOFTFP && ofs == AHUREF_LSX) { tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, (idx&255)), (idx>>8))); emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); } else { - emit_fai(as, PPCI_LFD, dest, idx, ofs); + emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, + ofs+4*LJ_SOFTFP); } } } else { @@ -911,7 +1063,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) int32_t ofs = AHUREF_LSX; if (ir->r == RID_SINK) return; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { src = ra_alloc1(as, ir->op2, RSET_FPR); } else { if (!irt_ispri(ir->t)) { @@ -919,11 +1071,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) rset_clear(allow, src); ofs = 0; } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) + type = ra_alloc1(as, (ir+1)->op2, allow); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); rset_clear(allow, type); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { if (ofs == AHUREF_LSX) { emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); emit_slwi(as, RID_TMP, (idx>>8), 3); @@ -948,21 +1103,39 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType1 t = ir->t; Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + lj_assertA(LJ_DUALNUM || + !irt_isint(t) || + (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), + "bad SLOAD type"); +#if LJ_SOFTFP + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } +#else if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { dest = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, dest); t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + } else +#endif + if (ra_used(ir)) { + lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { + if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { if (irt_isint(t)) { emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); dest = ra_scratch(as, RSET_FPR); @@ -994,14 +1167,22 @@ dotypecheck: if ((ir->op2 & IRSLOAD_TYPECHECK)) { Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); +#if !LJ_SOFTFP type = RID_TMP; +#endif + emit_ab(as, PPCI_CMPLW, type, tisnum); } - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); + if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, + base, ofs-(LJ_SOFTFP?0:4)); } else { if ((ir->op2 & IRSLOAD_TYPECHECK)) { asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); + if ((ir->op2 & IRSLOAD_KEYINDEX)) { + emit_ai(as, PPCI_CMPWI, RID_TMP, (LJ_KEYINDEX & 0xffff)); + emit_asi(as, PPCI_XORIS, RID_TMP, RID_TMP, (LJ_KEYINDEX >> 16)); + } else { + emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); + } type = RID_TMP; } if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); @@ -1021,7 +1202,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; if (ra_hasreg(ir->r)) @@ -1034,10 +1216,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ir->o == IR_CNEWI) { RegSet allow = (RSET_GPR & ~RSET_SCRATCH); int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); if (sz == 8) { ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); + lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); } for (;;) { Reg r = ra_alloc1(as, ir->op2, allow); @@ -1068,8 +1250,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1083,7 +1263,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); emit_setgl(as, tab, gc.grayagain); - lua_assert(LJ_GC_BLACK == 0x04); + lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK"); emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ emit_getgl(as, link, gc.grayagain); emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); @@ -1098,7 +1278,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1119,6 +1299,7 @@ static void asm_obar(ASMState *as, IRIns *ir) /* -- Arithmetic and logic operations ------------------------------------- */ +#if !LJ_SOFTFP static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) { Reg dest = ra_dest(as, ir, RSET_FPR); @@ -1139,20 +1320,22 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) static void asm_fpmath(ASMState *as, IRIns *ir) { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) asm_fpunary(as, ir, PPCI_FSQRT); else asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } +#endif static void asm_add(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) asm_fparith(as, ir, PPCI_FADD); - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); PPCIns pi; @@ -1191,10 +1374,13 @@ static void asm_add(ASMState *as, IRIns *ir) static void asm_sub(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) asm_fparith(as, ir, PPCI_FSUB); - } else { + } else +#endif + { PPCIns pi = PPCI_SUBF; Reg dest = ra_dest(as, ir, RSET_GPR); Reg left, right; @@ -1220,9 +1406,12 @@ static void asm_sub(ASMState *as, IRIns *ir) static void asm_mul(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fparith(as, ir, PPCI_FMUL); - } else { + } else +#endif + { PPCIns pi = PPCI_MULLW; Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); @@ -1244,15 +1433,16 @@ static void asm_mul(ASMState *as, IRIns *ir) } } -#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV) static void asm_neg(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fpunary(as, ir, PPCI_FNEG); - } else { + } else +#endif + { Reg dest, left; PPCIns pi = PPCI_NEG; if (as->flagmcp == as->mcp) { @@ -1267,8 +1457,6 @@ static void asm_neg(ASMState *as, IRIns *ir) } #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) { @@ -1561,11 +1749,42 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) #define asm_brol(as, ir) \ asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) -#define asm_bror(as, ir) lua_assert(0) +#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR") + +#if LJ_SOFTFP +static void asm_sfpmin_max(ASMState *as, IRIns *ir) +{ + CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; + IRRef args[4]; + MCLabel l_right, l_end; + Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); + Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); + Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); + PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; + righthi = (lefthi >> 8); lefthi &= 255; + rightlo = (leftlo >> 8); leftlo &= 255; + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; + l_end = emit_label(as); + if (desthi != righthi) emit_mr(as, desthi, righthi); + if (destlo != rightlo) emit_mr(as, destlo, rightlo); + l_right = emit_label(as); + if (l_end != l_right) emit_jmp(as, l_end); + if (desthi != lefthi) emit_mr(as, desthi, lefthi); + if (destlo != leftlo) emit_mr(as, destlo, leftlo); + if (l_right == as->mcp+1) { + cond ^= 4; l_right = l_end; ++as->mcp; + } + emit_condbranch(as, PPCI_BC, cond, l_right); + ra_evictset(as, RSET_SCRATCH); + emit_cmpi(as, RID_RET, 1); + asm_gencall(as, &ci, args); +} +#endif static void asm_min_max(ASMState *as, IRIns *ir, int ismax) { - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); Reg tmp = dest; Reg right, left = ra_alloc2(as, ir, RSET_FPR); @@ -1573,9 +1792,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) if (tmp == left || tmp == right) tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, dest), left), right)); - emit_facb(as, PPCI_FSEL, dest, tmp, - ismax ? left : right, ismax ? right : left); - emit_fab(as, PPCI_FSUB, tmp, left, right); + emit_facb(as, PPCI_FSEL, dest, tmp, left, right); + emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg tmp1 = RID_TMP, tmp2 = dest; @@ -1653,7 +1871,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) static void asm_comp(ASMState *as, IRIns *ir) { PPCCC cc = asm_compmap[ir->o]; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; asm_guardcc(as, (cc >> 4)); @@ -1674,6 +1892,44 @@ static void asm_comp(ASMState *as, IRIns *ir) #define asm_equal(as, ir) asm_comp(as, ir) +#if LJ_SOFTFP +/* SFP comparisons. */ +static void asm_sfpcomp(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; + IRRef args[4]; + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; + + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) + rset_clear(drop, r); + } + ra_evictset(as, drop); + asm_setupresult(as, ir, ci); + switch ((IROp)ir->o) { + case IR_ULT: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); + case IR_ULE: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 1); + break; + case IR_GE: case IR_GT: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 2); + default: + asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); + break; + } + asm_gencall(as, ci, args); +} +#endif + #if LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_comp64(ASMState *as, IRIns *ir) @@ -1698,24 +1954,41 @@ static void asm_comp64(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_HASFFI || LJ_SOFTFP if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ as->curins--; /* Always skip the CONV. */ +#if LJ_HASFFI && !LJ_SOFTFP if (usehi || uselo) asm_conv64(as, ir); return; +#endif } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir-1); + return; + } +#endif +#if LJ_HASFFI asm_comp64(as, ir); +#endif return; +#if LJ_SOFTFP + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { + as->curins--; /* Always skip the loword min/max. */ + if (uselo || usehi) + asm_sfpmin_max(as, ir-1); + return; +#endif } else if ((ir-1)->o == IR_XSTORE) { as->curins--; /* Handle both stores here. */ if ((ir-1)->r != RID_SINK) { @@ -1724,24 +1997,33 @@ static void asm_hiop(ASMState *as, IRIns *ir) } return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { +#if LJ_HASFFI case IR_ADD: as->curins--; asm_add64(as, ir); break; case IR_SUB: as->curins--; asm_sub64(as, ir); break; case IR_NEG: as->curins--; asm_neg64(as, ir); break; - case IR_CALLN: - case IR_CALLXS: + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; +#endif +#if LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_STRTO: if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ break; - case IR_CNEWI: + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: /* Nothing to do here. Handled by lo op itself. */ break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ #endif + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } } /* -- Profiling ----------------------------------------------------------- */ @@ -1797,12 +2079,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & SNAP_NORESTORE)) continue; if (irt_isnum(ir->t)) { +#if LJ_SOFTFP + Reg tmp; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); +#else Reg src = ra_alloc1(as, ref, RSET_FPR); emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); +#endif } else { Reg type; RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); rset_clear(allow, src); @@ -1811,6 +2106,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & (SNAP_CONT|SNAP_FRAME))) { if (s == 0) continue; /* Do not overwrite link to previous frame. */ type = ra_allock(as, (int32_t)(*flinks--), allow); +#if LJ_SOFTFP + } else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); +#endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } @@ -1818,11 +2119,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ +/* Marker to prevent patching the GC check exit. */ +#define PPC_NOPATCH_GC_CHECK PPCI_ORIS + /* Check GC threshold and do one or more GC steps. */ static void asm_gc_check(ASMState *as) { @@ -1834,6 +2138,7 @@ static void asm_gc_check(ASMState *as) l_end = emit_label(as); /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ + *--as->mcp = PPC_NOPATCH_GC_CHECK; emit_ai(as, PPCI_CMPWI, RID_RET, 0); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ @@ -1865,6 +2170,12 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + /* -- Head of trace ------------------------------------------------------- */ /* Coalesce BASE register for a root trace. */ @@ -1882,7 +2193,7 @@ static void asm_head_root_base(ASMState *as) } /* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +static Reg asm_head_side_base(ASMState *as, IRIns *irp) { IRIns *ir = IR(REF_BASE); Reg r = ir->r; @@ -1891,15 +2202,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ + return r; /* Same BASE register already coalesced. */ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */ + return irp->r; } else { emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ } } - return allow; + return RID_NONE; } /* -- Tail of trace ------------------------------------------------------- */ @@ -1916,7 +2227,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) as->mctop = p; } else { /* Patch stack adjustment. */ - lua_assert(checki16(CFRAME_SIZE+spadj)); + lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; } @@ -1947,14 +2258,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; asm_collectargs(as, ir, ci, args); for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; } else { if (ngpr > 0) ngpr--; else nslots++; } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); + return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : + REGSP_HINT(RID_RET); } static void asm_setup_target(ASMState *as) @@ -1972,7 +2284,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MCode *px = exitstub_trace_addr(T, exitno); MCode *cstart = NULL; MCode *mcarea = lj_mcode_patch(J, p, 0); - int clearso = 0; + int clearso = 0, patchlong = 1; for (; p < pe; p++) { /* Look for exitstub branch, try to replace with branch to target. */ uint32_t ins = *p; @@ -1984,7 +2296,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) delta -= sizeof(MCode); } /* Many, but not all short-range branches can be patched directly. */ - if (((delta + 0x8000) >> 16) == 0) { + if (p[-1] == PPC_NOPATCH_GC_CHECK) { + patchlong = 0; + } else if (((delta + 0x8000) >> 16) == 0) { *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) | ((delta & 0x8000) * (PPCF_Y/0x8000)); if (!cstart) cstart = p; @@ -1992,14 +2306,17 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if ((ins & 0xfc000000u) == PPCI_B && ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x02000000) >> 26) == 0); + lj_assertJ(((delta + 0x02000000) >> 26) == 0, + "branch target out of range"); *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); if (!cstart) cstart = p; } } - { /* Always patch long-range branch in exit stub itself. */ + /* Always patch long-range branch in exit stub itself. Except, if we can't. */ + if (patchlong) { ptrdiff_t delta = (char *)target - (char *)px - clearso; - lua_assert(((delta + 0x02000000) >> 26) == 0); + lj_assertJ(((delta + 0x02000000) >> 26) == 0, + "branch target out of range"); *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); } if (!cstart) cstart = px; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_asm_x86.h b/source/libs/luajit/LuaJIT-src/src/lj_asm_x86.h index 3e189b1d04ef4487891d74fb4844c117cf744403..936ff438fdf041604fb204a30341e38a86a70f80 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_asm_x86.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_asm_x86.h @@ -1,6 +1,6 @@ /* ** x86/x64 IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* -- Guard handling ------------------------------------------------------ */ @@ -31,7 +31,7 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) #endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; - *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); + *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); /* Commit the code for this group (even if assembly fails later on). */ lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -60,7 +60,7 @@ static void asm_guardcc(ASMState *as, int cc) MCode *p = as->mcp; if (LJ_UNLIKELY(p == as->invmcp)) { as->loopinv = 1; - *(int32_t *)(p+1) = jmprel(p+5, target); + *(int32_t *)(p+1) = jmprel(as->J, p+5, target); target = p; cc ^= 1; if (as->realign) { @@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) /* Check if there's no conflicting instruction between curins and ref. ** Also avoid fusing loads if there are multiple references. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) +static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check) { IRIns *ir = as->ir; IRRef i = as->curins; @@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) while (--i > ref) { if (ir[i].o == conflict) return 0; /* Conflict found. */ - else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) + else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS)) + return 0; + else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref)) return 0; } return 1; /* Ok, no conflict. */ @@ -131,16 +133,17 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) as->mrm.ofs = 0; if (irb->o == IR_FLOAD) { IRIns *ira = IR(irb->op1); - lua_assert(irb->op2 == IRFL_TAB_ARRAY); + lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); /* We can avoid the FLOAD of t->array for colocated arrays. */ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { + !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) { as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ return irb->op1; /* Table obj. */ } } else if (irb->o == IR_ADD && irref_isk(irb->op2)) { /* Fuse base offset (vararg load). */ - as->mrm.ofs = IR(irb->op2)->i; + IRIns *irk = IR(irb->op2); + as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64; return irb->op1; } return ref; /* Otherwise use the given array base. */ @@ -150,7 +153,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) { IRIns *irx; - lua_assert(ir->o == IR_AREF); + lj_assertA(ir->o == IR_AREF, "expected AREF"); as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); irx = IR(ir->op2); if (irref_isk(ir->op2)) { @@ -216,9 +219,17 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) #endif } break; + case IR_TMPREF: +#if LJ_GC64 + as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv); + as->mrm.base = RID_DISPATCH; + as->mrm.idx = RID_NONE; +#else + as->mrm.ofs = igcptr(&J2G(as->J)->tmptv); + as->mrm.base = as->mrm.idx = RID_NONE; +#endif + return; default: - lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO || - ir->o == IR_KKPTR); break; } } @@ -230,9 +241,10 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) /* Fuse FLOAD/FREF reference into memory operand. */ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) { - lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); + lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF, + "bad IR op %d", ir->o); as->mrm.idx = RID_NONE; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ #if LJ_GC64 as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); as->mrm.base = RID_DISPATCH; @@ -271,7 +283,7 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) { IRIns *irr; - lua_assert(ir->o == IR_STRREF); + lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o); as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); @@ -378,15 +390,17 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { as->mrm.ofs = (int32_t)mcpofs(as, k); as->mrm.base = RID_RIP; - } else { + } else { /* Intern 64 bit constant at bottom of mcode. */ if (ir->i) { - lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), + "bad interned 64 bit constant"); } else { while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; *(uint64_t*)as->mcbot = *k; ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); } as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); as->mrm.base = RID_RIP; @@ -419,12 +433,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } if (ir->o == IR_KNUM) { RegSet avail = as->freeset & ~as->modset & RSET_FPR; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "no register allowed"); if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ return asm_fuseloadk64(as, ir); } else if (ref == REF_BASE || ir->o == IR_KINT64) { RegSet avail = as->freeset & ~as->modset & RSET_GPR; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "no register allowed"); if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ if (ref == REF_BASE) { #if LJ_GC64 @@ -444,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && - noconflict(as, ref, IR_RETF, 0) && + noconflict(as, ref, IR_RETF, 2) && !(LJ_GC64 && irt_isaddr(ir->t))) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + @@ -455,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } else if (ir->o == IR_FLOAD) { /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && - noconflict(as, ref, IR_FSTORE, 0)) { + noconflict(as, ref, IR_FSTORE, 2)) { asm_fusefref(as, ir, xallow); return RID_MRM; } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) && !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; @@ -470,12 +484,14 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). */ if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && - noconflict(as, ref, IR_XSTORE, 0)) { + noconflict(as, ref, IR_XSTORE, 2)) { asm_fusexref(as, ir->op1, xallow); return RID_MRM; } - } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { + } else if (ir->o == IR_VLOAD && IR(ir->op1)->o == IR_AREF && + !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); + as->mrm.ofs += 8 * ir->op2; return RID_MRM; } } @@ -605,7 +621,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif emit_loadi(as, r, ir->i); } else { - lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ + /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, r), "reg %d not free", r); if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); emit_movrr(as, ir, r, ir->r); @@ -614,7 +631,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } } } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ - lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ + lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)), + "unexpected float constant"); if (LJ_32 && (ofs & 4) && irref_isk(ref)) { /* Split stores for unaligned FP consts. */ emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); @@ -645,7 +663,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; - int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; if (ra_hasreg(ir->r)) @@ -685,12 +703,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); } #endif -#if LJ_32 } else if (hiop) { ra_destpair(as, ir); -#endif } else { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); ra_destreg(as, ir, RID_RET); } } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { @@ -775,6 +791,21 @@ static void asm_retf(ASMState *as, IRIns *ir) #endif } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); + emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L); + emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) @@ -786,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) emit_rr(as, XO_UCOMISD, left, tmp); emit_rr(as, XO_CVTSI2SD, tmp, dest); emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ + checkmclim(as); emit_rr(as, XO_CVTTSD2SI, dest, left); /* Can't fuse since left is needed twice. */ } @@ -809,8 +841,10 @@ static void asm_conv(ASMState *as, IRIns *ir) int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); int stfp = (st == IRT_NUM || st == IRT_FLOAT); IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -826,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ emit_rma(as, XO_MOVSD, bias, k); + checkmclim(as); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); return; } else { /* Integer to FP conversion. */ @@ -846,7 +881,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -881,7 +917,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg left, dest = ra_dest(as, ir, RSET_GPR); RegSet allow = RSET_GPR; x86Op op; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if (st == IRT_I8) { op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; } else if (st == IRT_U8) { @@ -915,7 +951,7 @@ static void asm_conv(ASMState *as, IRIns *ir) } } else { Reg dest = ra_dest(as, ir, RSET_GPR); - if (st64) { + if (st64 && !(ir->op2 & IRCONV_NONE)) { Reg left = asm_fuseload(as, lref, RSET_GPR); /* This is either a 32 bit reg/reg mov which zeroes the hiword ** or a load of the loword from a 64 bit address. @@ -952,7 +988,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ } else { - lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); + lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV"); } emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ @@ -966,8 +1002,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); Reg lo, hi; - lua_assert(st == IRT_NUM || st == IRT_FLOAT); - lua_assert(dt == IRT_I64 || dt == IRT_U64); + lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); + lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); hi = ra_dest(as, ir, RSET_GPR); lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); @@ -1041,47 +1077,48 @@ static void asm_strto(ASMState *as, IRIns *ir) /* -- Memory references --------------------------------------------------- */ /* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - /* For numbers use the constant itself or a spill slot as a TValue. */ - if (irref_isk(ref)) - emit_loada(as, dest, ir_knum(ir)); - else - emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ -#if LJ_GC64 - if (irref_isk(ref)) { - TValue k; - lj_ir_kvalue(as->J->L, &k, ir); - emit_movmroi(as, dest, 4, k.u32.hi); - emit_movmroi(as, dest, 0, k.u32.lo); + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { + /* Use the number constant itself as a TValue. */ + emit_loada(as, dest, ir_knum(ir)); + return; + } + emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0); } else { - /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); - if (irt_is64(ir->t)) { - emit_u32(as, irt_toitype(ir->t) << 15); - emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); +#if LJ_GC64 + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_movmroi(as, dest, 4, k.u32.hi); + emit_movmroi(as, dest, 0, k.u32.lo); } else { - /* Currently, no caller passes integers that might end up here. */ - emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); + if (irt_is64(ir->t)) { + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); + } else { + emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); + } + emit_movtomro(as, REX_64IR(ir, src), dest, 0); } - emit_movtomro(as, REX_64IR(ir, src), dest, 0); - } #else - if (!irref_isk(ref)) { - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); - emit_movtomro(as, REX_64IR(ir, src), dest, 0); - } else if (!irt_ispri(ir->t)) { - emit_movmroi(as, dest, 0, ir->i); - } - if (!(LJ_64 && irt_islightud(ir->t))) - emit_movmroi(as, dest, 4, irt_toitype(ir->t)); + if (!irref_isk(ref)) { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); + emit_movtomro(as, REX_64IR(ir, src), dest, 0); + } else if (!irt_ispri(ir->t)) { + emit_movmroi(as, dest, 0, ir->i); + } + if (!(LJ_64 && irt_islightud(ir->t))) + emit_movmroi(as, dest, 4, irt_toitype(ir->t)); #endif - emit_loada(as, dest, &J2G(as->J)->tmptv); + } } + emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */ } static void asm_aref(ASMState *as, IRIns *ir) @@ -1140,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) asm_guardcc(as, CC_E); else emit_sjcc(as, CC_E, l_end); + checkmclim(as); if (irt_isnum(kt)) { if (isk) { /* Assumes -0.0 is already canonicalized to +0.0. */ @@ -1179,13 +1217,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); } } else { - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); #else } else { if (!irt_ispri(kt)) { - lua_assert(irt_isaddr(kt)); + lj_assertA(irt_isaddr(kt), "bad HREF key type"); if (isk) emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), ptr2addr(ir_kgc(irkey))); @@ -1193,13 +1231,12 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); emit_sjcc(as, CC_NE, l_next); } - lua_assert(!irt_isnil(kt)); + lj_assertA(!irt_isnil(kt), "bad HREF key type"); emit_i8(as, irt_toitype(kt)); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); #endif } emit_sfixup(as, l_loop); - checkmclim(as); #if LJ_GC64 if (!isk && irt_isaddr(kt)) { emit_rr(as, XO_OR, tmp|REX_64, key); @@ -1208,29 +1245,25 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) #endif /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); - if ((as->flags & JIT_F_PREFER_IMUL)) { - emit_i8(as, sizeof(Node)); - emit_rr(as, XO_IMULi8, dest, dest); - } else { - emit_shifti(as, XOg_SHL, dest, 3); - emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); - } + emit_shifti(as, XOg_SHL, dest, 3); + emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); if (isk) { emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); } else if (irt_isstr(kt)) { - emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); + emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid)); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); } else { /* Must match with hashrot() in lj_tab.c. */ emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp); emit_shifti(as, XOg_ROL, tmp, HASH_ROT3); emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp); + checkmclim(as); emit_shifti(as, XOg_ROL, dest, HASH_ROT2); emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest); emit_shifti(as, XOg_ROL, dest, HASH_ROT1); @@ -1248,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) } else { emit_rr(as, XO_MOV, tmp, key); #if LJ_GC64 - checkmclim(as); emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); if ((as->flags & JIT_F_BMI2)) { emit_i8(as, 32); @@ -1275,10 +1307,10 @@ static void asm_hrefk(ASMState *as, IRIns *ir) #if !LJ_64 MCLabel l_exit; #endif - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ra_hasreg(dest)) { if (ofs != 0) { - if (dest == node && !(as->flags & JIT_F_LEA_AGU)) + if (dest == node) emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); @@ -1292,7 +1324,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); emit_rmro(as, XO_CMP, key|REX_64, node, ofs + (int32_t)offsetof(Node, key.u64)); - lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); + lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t), + "bad HREFK key type"); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : #if LJ_GC64 @@ -1303,7 +1336,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); #endif } else { - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); #if LJ_GC64 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, node, @@ -1327,13 +1360,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir) (int32_t)ir_knum(irkey)->u32.hi); } else { if (!irt_ispri(irkey->t)) { - lua_assert(irt_isgcv(irkey->t)); + lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type"); emit_gmroi(as, XG_ARITHi(XOg_CMP), node, ofs + (int32_t)offsetof(Node, key.gcr), ptr2addr(ir_kgc(irkey))); emit_sjcc(as, CC_NE, l_exit); } - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); emit_i8(as, irt_toitype(irkey->t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); @@ -1344,24 +1377,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_rma(as, XO_MOV, dest|REX_GC64, v); } else { Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { + if (ir->o == IR_UREFC) emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); - asm_guardcc(as, CC_NE); - emit_i8(as, 1); + else + emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); + if (guarded) { + asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE); + emit_i8(as, 0); emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); + } + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, uv, o); } else { - emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); + emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } - emit_rmro(as, XO_MOV, uv|REX_GC64, func, - (int32_t)offsetof(GCfuncL, uvptr) + - (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } @@ -1406,7 +1446,8 @@ static void asm_fxload(ASMState *as, IRIns *ir) if (LJ_64 && irt_is64(ir->t)) dest |= REX_64; else - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "unsplit 64 bit load"); xo = XO_MOV; break; } @@ -1451,13 +1492,16 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; #if LJ_64 && !LJ_GC64 - case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ + case IRT_LIGHTUD: + /* NYI: mask 64 bit lightuserdata. */ + lj_assertA(0, "store of lightuserdata"); #endif default: if (LJ_64 && irt_is64(ir->t)) src |= REX_64; else - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "unsplit 64 bit store"); xo = XO_MOVto; break; } @@ -1471,8 +1515,8 @@ static void asm_fxstore(ASMState *as, IRIns *ir) emit_i8(as, k); emit_mrm(as, XO_MOVmib, 0, RID_MRM); } else { - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || - irt_isaddr(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || + irt_isaddr(ir->t), "bad store type"); emit_i32(as, k); emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); } @@ -1507,13 +1551,16 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) #if LJ_GC64 Reg tmp = RID_NONE; #endif - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isint(ir->t))); + lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isint(ir->t)), + "bad load type %d", irt_type(ir->t)); #if LJ_64 && !LJ_GC64 if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); if (ra_hasreg(dest)) { + checkmclim(as); asm_fuseahuref(as, ir->op1, RSET_GPR); + if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); } return; @@ -1523,6 +1570,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); + if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; #if LJ_GC64 if (irt_isaddr(ir->t)) { emit_shifti(as, XOg_SHR|REX_64, dest, 17); @@ -1550,12 +1598,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } #endif asm_fuseahuref(as, ir->op1, gpr); + if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; } /* Always do the type check, even if the load result is unused. */ as->mrm.ofs += 4; asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), + "bad load type %d", irt_type(ir->t)); + checkmclim(as); #if LJ_GC64 emit_u32(as, LJ_TISNUM << 15); #else @@ -1637,13 +1688,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) #endif emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { - lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); + lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)), + "bad store type"); emit_i32(as, irr->i); emit_mrm(as, XO_MOVmi, 0, RID_MRM); } as->mrm.ofs += 4; #if LJ_GC64 - lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); + lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type"); emit_i32(as, LJ_TNUMX << 15); #else emit_i32(as, (int32_t)irt_toitype(ir->t)); @@ -1658,10 +1710,14 @@ static void asm_sload(ASMState *as, IRIns *ir) (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); IRType1 t = ir->t; Reg base; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + lj_assertA(LJ_DUALNUM || + !irt_isint(t) || + (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), + "bad SLOAD type"); if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { Reg left = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ @@ -1681,7 +1737,8 @@ static void asm_sload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); base = ra_alloc1(as, REF_BASE, RSET_GPR); - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); + lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); if ((ir->op2 & IRSLOAD_CONVERT)) { t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); @@ -1726,13 +1783,11 @@ static void asm_sload(ASMState *as, IRIns *ir) if ((ir->op2 & IRSLOAD_TYPECHECK)) { /* Need type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); - if (LJ_64 && irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); -#if LJ_GC64 - emit_u32(as, LJ_TISNUM << 15); -#else - emit_u32(as, LJ_TISNUM); -#endif + if ((LJ_64 && irt_type(t) >= IRT_NUM) || (ir->op2 & IRSLOAD_KEYINDEX)) { + lj_assertA(irt_isinteger(t) || irt_isnum(t), + "bad SLOAD type %d", irt_type(t)); + emit_u32(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : + LJ_GC64 ? (LJ_TISNUM << 15) : LJ_TISNUM); emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); #if LJ_GC64 } else if (irt_isnil(t)) { @@ -1758,7 +1813,7 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_i8(as, irt_toitype(t)); emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); emit_shifti(as, XOg_SAR|REX_64, tmp, 47); - emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); + emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); #else } else { emit_i8(as, irt_toitype(t)); @@ -1779,7 +1834,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; asm_setupresult(as, ir, ci); /* GCcdata * */ @@ -1809,7 +1865,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) int32_t ofs = sizeof(GCcdata); if (sz == 8) { ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); + lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP"); } do { if (irref_isk(ir->op2)) { @@ -1823,7 +1879,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) ofs -= 4; ir--; } while (1); #endif - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; args[0] = ASMREF_L; /* lua_State *L */ @@ -1847,8 +1903,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1875,7 +1929,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1954,15 +2008,11 @@ static void asm_fpmath(ASMState *as, IRIns *ir) fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); ra_left(as, RID_XMM0, ir->op1); } - } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { - /* Rejoined to pow(). */ } else { asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); } } -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) - static void asm_ldexp(ASMState *as, IRIns *ir) { int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ @@ -1979,35 +2029,11 @@ static void asm_ldexp(ASMState *as, IRIns *ir) asm_x87load(as, ir->op2); } -static void asm_fppowi(ASMState *as, IRIns *ir) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - ra_destreg(as, ir, RID_XMM0); - emit_call(as, lj_vm_powi_sse); - ra_left(as, RID_XMM0, ir->op1); - ra_left(as, RID_EAX, ir->op2); -} - -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_fppowi(as, ir); -} - static int asm_swapops(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); IRIns *irr = IR(ir->op2); - lua_assert(ra_noreg(irr->r)); + lj_assertA(ra_noreg(irr->r), "bad usage"); if (!irm_iscomm(lj_ir_mode[ir->o])) return 0; /* Can't swap non-commutative operations. */ if (irref_isk(ir->op2)) @@ -2060,8 +2086,9 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) int32_t k = 0; if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2); - if ((p[1] & 15) < 14) { - if ((p[1] & 15) >= 12) p[1] -= 4; /* L <->S, NL <-> NS */ + MCode *q = p[0] == 0x0f ? p+1 : p; + if ((*q & 15) < 14) { + if ((*q & 15) >= 12) *q -= 4; /* L <->S, NL <-> NS */ as->flagmcp = NULL; as->mcp = p; } /* else: cannot transform LE/NLE to cc without use of OF. */ @@ -2178,8 +2205,7 @@ static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) asm_fparith(as, ir, XO_ADDSD); - else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || - irt_is64(ir->t) || !asm_lea(as, ir)) + else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) asm_intarith(as, ir, XOg_ADD); } @@ -2199,27 +2225,7 @@ static void asm_mul(ASMState *as, IRIns *ir) asm_intarith(as, ir, XOg_X_IMUL); } -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, XO_DIVSD); -} - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} +#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD) static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) { @@ -2319,7 +2325,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv) dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); if (dest == RID_ECX) { dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX)); - emit_rr(as, XO_MOV, RID_ECX, dest); + emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest); } right = irr->r; if (ra_noreg(right)) @@ -2417,8 +2423,9 @@ static void asm_comp(ASMState *as, IRIns *ir) IROp leftop = (IROp)(IR(lref)->o); Reg r64 = REX_64IR(ir, 0); int32_t imm = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), + "bad comparison data type %d", irt_type(ir->t)); /* Swap constants (only for ABC) and fusable loads to the right. */ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ @@ -2500,7 +2507,7 @@ static void asm_comp(ASMState *as, IRIns *ir) /* Use test r,r instead of cmp r,0. */ x86Op xo = XO_TEST; if (irt_isu8(ir->t)) { - lua_assert(ir->o == IR_EQ || ir->o == IR_NE); + lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage"); xo = XO_TESTb; if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { if (LJ_64) { @@ -2602,15 +2609,15 @@ static void asm_comp_int64(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_32 && LJ_HASFFI /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_32 && LJ_HASFFI if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ as->curins--; /* Always skip the CONV. */ if (usehi || uselo) @@ -2624,8 +2631,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) asm_fxstore(as, ir); return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { +#if LJ_32 && LJ_HASFFI case IR_ADD: as->flagmcp = NULL; as->curins--; @@ -2648,19 +2657,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) asm_neg_not(as, ir-1, XOg_NEG); break; } - case IR_CALLN: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; case IR_CNEWI: /* Nothing to do here. Handled by CNEWI itself. */ break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ #endif + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } } /* -- Profiling ----------------------------------------------------------- */ @@ -2721,12 +2727,21 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) continue; - if (irt_isnum(ir->t)) { + if ((sn & SNAP_KEYINDEX)) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX); + if (irref_isk(ref)) { + emit_movmroi(as, RID_BASE, ofs, ir->i); + } else { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + emit_movtomro(as, src, RID_BASE, ofs); + } + } else if (irt_isnum(ir->t)) { Reg src = ra_alloc1(as, ref, RSET_FPR); emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); } else { - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isinteger(ir->t))); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isinteger(ir->t)), + "restore of IR type %d", irt_type(ir->t)); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); #if LJ_GC64 @@ -2771,7 +2786,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -2815,16 +2830,16 @@ static void asm_loop_fixup(ASMState *as) MCode *target = as->mcp; if (as->realign) { /* Realigned loops use short jumps. */ as->realign = NULL; /* Stop another retry. */ - lua_assert(((intptr_t)target & 15) == 0); + lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed"); if (as->loopinv) { /* Inverted loop branch? */ p -= 5; p[0] = XI_JMP; - lua_assert(target - p >= -128); + lj_assertA(target - p >= -128, "loop realign failed"); p[-1] = (MCode)(target - p); /* Patch sjcc. */ if (as->loopinv == 2) p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ } else { - lua_assert(target - p >= -128); + lj_assertA(target - p >= -128, "loop realign failed"); p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ p[-2] = XI_JMPs; } @@ -2853,6 +2868,12 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + /* -- Head of trace ------------------------------------------------------- */ /* Coalesce BASE register for a root trace. */ @@ -2870,7 +2891,7 @@ static void asm_head_root_base(ASMState *as) } /* Coalesce or reload BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +static Reg asm_head_side_base(ASMState *as, IRIns *irp) { IRIns *ir = IR(REF_BASE); Reg r = ir->r; @@ -2879,16 +2900,16 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ + return r; /* Same BASE register already coalesced. */ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { /* Move from coalesced parent reg. */ - rset_clear(allow, irp->r); emit_rr(as, XO_MOV, r|REX_GC64, irp->r); + return irp->r; } else { emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ } } - return allow; + return RID_NONE; } /* -- Tail of trace ------------------------------------------------------- */ @@ -2901,7 +2922,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target, *q; int32_t spadj = as->T->spadjust; if (spadj == 0) { - p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); + p -= LJ_64 ? 7 : 6; } else { MCode *p1; /* Patch stack adjustment. */ @@ -2913,24 +2934,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) p1 = p-9; *(int32_t *)p1 = spadj; } - if ((as->flags & JIT_F_LEA_AGU)) { #if LJ_64 - p1[-4] = 0x48; + p1[-3] = 0x48; #endif - p1[-3] = (MCode)XI_LEA; - p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); - p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); - } else { -#if LJ_64 - p1[-3] = 0x48; -#endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); - } + p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); + p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } /* Patch exit branch. */ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_JMP; /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ for (q = as->mctop-1; q >= p; q--) @@ -2957,7 +2969,7 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->mcp = p; } else { /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); + as->mcp = p - (LJ_64 ? 7 : 6); as->invmcp = NULL; } } @@ -3097,23 +3109,30 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MSize len = T->szmcode; MCode *px = exitstub_addr(J, exitno) - 6; MCode *pe = p+len-6; + MCode *pgc = NULL; #if LJ_GC64 uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); #else uint32_t statei = u32ptr(&J2G(J)->vmstate); #endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) - *(int32_t *)(p+len-4) = jmprel(p+len, target); + *(int32_t *)(p+len-4) = jmprel(J, p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ for (; p < pe; p += asm_x86_inslen(p)) { intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) break; } - lua_assert(p < pe); - for (; p < pe; p += asm_x86_inslen(p)) - if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) - *(int32_t *)(p+2) = jmprel(p+6, target); + lj_assertJ(p < pe, "instruction length decoder failed"); + for (; p < pe; p += asm_x86_inslen(p)) { + if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px && + p != pgc) { + *(int32_t *)(p+2) = jmprel(J, p+6, target); + } else if (*p == XI_CALL && + (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { + pgc = p+7; /* Do not patch GC check exit. */ + } + } lj_mcode_sync(T->mcode, T->mcode + T->szmcode); lj_mcode_patch(J, mcarea, 1); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_assert.c b/source/libs/luajit/LuaJIT-src/src/lj_assert.c new file mode 100644 index 0000000000000000000000000000000000000000..66695693b84094b72ad5c083c7132ea1fbd4b6b3 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/lj_assert.c @@ -0,0 +1,28 @@ +/* +** Internal assertions. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_assert_c +#define LUA_CORE + +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) + +#include <stdio.h> + +#include "lj_obj.h" + +void lj_assert_fail(global_State *g, const char *file, int line, + const char *func, const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func); + vfprintf(stderr, fmt, argp); + fputc('\n', stderr); + va_end(argp); + UNUSED(g); /* May be NULL. TODO: optionally dump state. */ + abort(); +} + +#endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_bc.c b/source/libs/luajit/LuaJIT-src/src/lj_bc.c index a597692cab4afe338046677532fb33d03a6bce3e..49d31530f03b0b0458bc1e4418151d9b7ff0ac02 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_bc.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_bc.c @@ -1,6 +1,6 @@ /* ** Bytecode instruction modes. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bc_c diff --git a/source/libs/luajit/LuaJIT-src/src/lj_bc.h b/source/libs/luajit/LuaJIT-src/src/lj_bc.h index 69a45f281e9081edf48c066aba42b4a84559e3f9..a94ea4e4a8759081cdfb40ea7d2ac6266b719784 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_bc.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_bc.h @@ -1,6 +1,6 @@ /* ** Bytecode instruction format. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BC_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_bcdump.h b/source/libs/luajit/LuaJIT-src/src/lj_bcdump.h index fdfc6ec0c6ef37cf17497dcc4abffd4ee0a0cab0..6450c19516c937c8163a49884457d8e68be29c2c 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_bcdump.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_bcdump.h @@ -1,6 +1,6 @@ /* ** Bytecode dump definitions. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BCDUMP_H @@ -46,6 +46,8 @@ #define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1) +#define BCDUMP_F_DETERMINISTIC 0x80000000 + /* Type codes for the GC constants of a prototype. Plus length for strings. */ enum { BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64, @@ -61,7 +63,7 @@ enum { /* -- Bytecode reader/writer ---------------------------------------------- */ LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, - void *data, int strip); + void *data, uint32_t flags); LJ_FUNC GCproto *lj_bcread_proto(LexState *ls); LJ_FUNC GCproto *lj_bcread(LexState *ls); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_bcread.c b/source/libs/luajit/LuaJIT-src/src/lj_bcread.c index 48c5e7c7f5a0bfe2f552bfb26fd6246274895b99..ee7d7c1870168635c1f7afe5f7444b12c78fae84 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_bcread.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_bcread.c @@ -1,6 +1,6 @@ /* ** Bytecode reader. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bcread_c @@ -47,17 +47,17 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) /* Refill buffer. */ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) { - lua_assert(len != 0); + lj_assertLS(len != 0, "empty refill"); if (len > LJ_MAX_BUF || ls->c < 0) bcread_error(ls, LJ_ERR_BCBAD); do { const char *buf; size_t sz; - char *p = sbufB(&ls->sb); + char *p = ls->sb.b; MSize n = (MSize)(ls->pe - ls->p); if (n) { /* Copy remainder to buffer. */ if (sbuflen(&ls->sb)) { /* Move down in buffer. */ - lua_assert(ls->pe == sbufP(&ls->sb)); + lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer"); if (ls->p != p) memmove(p, ls->p, n); } else { /* Copy from buffer provided by reader. */ p = lj_buf_need(&ls->sb, len); @@ -66,38 +66,39 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) ls->p = p; ls->pe = p + n; } - setsbufP(&ls->sb, p + n); + ls->sb.w = p + n; buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */ if (buf == NULL || sz == 0) { /* EOF? */ if (need) bcread_error(ls, LJ_ERR_BCBAD); ls->c = -1; /* Only bad if we get called again. */ break; } + if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L); if (n) { /* Append to buffer. */ n += (MSize)sz; p = lj_buf_need(&ls->sb, n < len ? len : n); - memcpy(sbufP(&ls->sb), buf, sz); - setsbufP(&ls->sb, p + n); + memcpy(ls->sb.w, buf, sz); + ls->sb.w = p + n; ls->p = p; ls->pe = p + n; } else { /* Return buffer provided by reader. */ ls->p = buf; ls->pe = buf + sz; } - } while (ls->p + len > ls->pe); + } while ((MSize)(ls->pe - ls->p) < len); } /* Need a certain number of bytes. */ static LJ_AINLINE void bcread_need(LexState *ls, MSize len) { - if (LJ_UNLIKELY(ls->p + len > ls->pe)) + if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) bcread_fill(ls, len, 1); } /* Want to read up to a certain number of bytes, but may need less. */ static LJ_AINLINE void bcread_want(LexState *ls, MSize len) { - if (LJ_UNLIKELY(ls->p + len > ls->pe)) + if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) bcread_fill(ls, len, 0); } @@ -106,7 +107,7 @@ static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len) { uint8_t *p = (uint8_t *)ls->p; ls->p += len; - lua_assert(ls->p <= ls->pe); + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return p; } @@ -119,7 +120,7 @@ static void bcread_block(LexState *ls, void *q, MSize len) /* Read byte from buffer. */ static LJ_AINLINE uint32_t bcread_byte(LexState *ls) { - lua_assert(ls->p < ls->pe); + lj_assertLS(ls->p < ls->pe, "buffer read overflow"); return (uint32_t)(uint8_t)*ls->p++; } @@ -127,7 +128,7 @@ static LJ_AINLINE uint32_t bcread_byte(LexState *ls) static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls) { uint32_t v = lj_buf_ruleb128(&ls->p); - lua_assert(ls->p <= ls->pe); + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return v; } @@ -144,7 +145,7 @@ static uint32_t bcread_uleb128_33(LexState *ls) } while (*p++ >= 0x80); } ls->p = (char *)p; - lua_assert(ls->p <= ls->pe); + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return v; } @@ -191,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o) o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); } else { - lua_assert(tp <= BCDUMP_KTAB_TRUE); + lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); setpriV(o, ~tp); } } @@ -213,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls) for (i = 0; i < nhash; i++) { TValue key; bcread_ktabk(ls, &key); - lua_assert(!tvisnil(&key)); + lj_assertLS(!tvisnil(&key), "nil key"); bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); } } @@ -250,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) #endif } else { lua_State *L = ls->L; - lua_assert(tp == BCDUMP_KGC_CHILD); + lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp); if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ bcread_error(ls, LJ_ERR_BCBAD); L->top--; @@ -280,8 +281,11 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn) static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) { BCIns *bc = proto_bc(pt); - bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, - pt->framesize, 0); + BCIns op; + if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */ + else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV; + else op = BC_FUNCF; + bc[0] = BCINS_AD(op, pt->framesize, 0); bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); /* Swap bytecode instructions if the endianess differs. */ if (bcread_swap(ls)) { @@ -394,21 +398,17 @@ static int bcread_header(LexState *ls) bcread_byte(ls) != BCDUMP_VERSION) return 0; bcread_flags(ls) = flags = bcread_uleb128(ls); if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; - if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; + if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0; if ((flags & BCDUMP_F_FFI)) { #if LJ_HASFFI lua_State *L = ls->L; - if (!ctype_ctsG(G(L))) { - ptrdiff_t oldtop = savestack(L, L->top); - luaopen_ffi(L); /* Load FFI library on-demand. */ - L->top = restorestack(L, oldtop); - } + ctype_loadffi(L); #else return 0; #endif } if ((flags & BCDUMP_F_STRIP)) { - ls->chunkname = lj_str_newz(ls->L, ls->chunkarg); + ls->chunkname = lj_str_newz(ls->L, *ls->chunkarg == BCDUMP_HEAD1 ? "=?" : ls->chunkarg); } else { MSize len = bcread_uleb128(ls); bcread_need(ls, len); @@ -421,7 +421,7 @@ static int bcread_header(LexState *ls) GCproto *lj_bcread(LexState *ls) { lua_State *L = ls->L; - lua_assert(ls->c == BCDUMP_HEAD1); + lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header"); bcread_savetop(L, ls, L->top); lj_buf_reset(&ls->sb); /* Check for a valid bytecode dump header. */ @@ -447,8 +447,7 @@ GCproto *lj_bcread(LexState *ls) setprotoV(L, L->top, pt); incr_top(L); } - if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 || - L->top-1 != bcread_oldtop(L, ls)) + if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) bcread_error(ls, LJ_ERR_BCBAD); /* Pop off last prototype. */ L->top--; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_bcwrite.c b/source/libs/luajit/LuaJIT-src/src/lj_bcwrite.c index 5e05caeaf5d880950fa736354266ea29233aaa56..de200ef4ad5cd6425155c674a419c97a86a21c2f 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_bcwrite.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_bcwrite.c @@ -1,6 +1,6 @@ /* ** Bytecode writer. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_bcwrite_c @@ -27,10 +27,21 @@ typedef struct BCWriteCtx { GCproto *pt; /* Root prototype. */ lua_Writer wfunc; /* Writer callback. */ void *wdata; /* Writer callback data. */ - int strip; /* Strip debug info. */ + TValue **heap; /* Heap used for deterministic sorting. */ + uint32_t heapsz; /* Size of heap. */ + uint32_t flags; /* BCDUMP_F_* flags. */ int status; /* Status from writer callback. */ +#ifdef LUA_USE_ASSERT + global_State *g; +#endif } BCWriteCtx; +#ifdef LUA_USE_ASSERT +#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__) +#else +#define lj_assertBCW(c, ...) ((void)ctx) +#endif + /* -- Bytecode writer ----------------------------------------------------- */ /* Write a single constant key/value of a template table. */ @@ -53,7 +64,7 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) if (num == (lua_Number)k) { /* -0 is never a constant. */ *p++ = BCDUMP_KTAB_INT; p = lj_strfmt_wuleb128(p, k); - setsbufP(&ctx->sb, p); + ctx->sb.w = p; return; } } @@ -61,10 +72,79 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.hi); } else { - lua_assert(tvispri(o)); + lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); *p++ = BCDUMP_KTAB_NIL+~itype(o); } - setsbufP(&ctx->sb, p); + ctx->sb.w = p; +} + +/* Compare two template table keys. */ +static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b) +{ + uint32_t at = itype(a), bt = itype(b); + if (at != bt) { /* This also handles false and true keys. */ + return at < bt; + } else if (at == LJ_TSTR) { + return lj_str_cmp(strV(a), strV(b)) < 0; + } else { + return a->u64 < b->u64; /* This works for numbers and integers. */ + } +} + +/* Insert key into a sorted heap. */ +static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end, + TValue *key) +{ + MSize child; + while ((child = idx * 2 + 1) < end) { + /* Find lower of the two children. */ + TValue *c0 = heap[child]; + if (child + 1 < end) { + TValue *c1 = heap[child + 1]; + if (bcwrite_ktabk_lt(c1, c0)) { + c0 = c1; + child++; + } + } + if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */ + heap[idx] = c0; /* Move lower child up. */ + idx = child; /* Descend. */ + } + heap[idx] = key; /* Insert key here. */ +} + +/* Resize heap, dropping content. */ +static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz) +{ + lua_State *L = sbufL(&ctx->sb); + if (ctx->heapsz) { + lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *); + ctx->heapsz = 0; + } + if (nsz) { + ctx->heap = lj_mem_newvec(L, nsz, TValue *); + ctx->heapsz = nsz; + } +} + +/* Write hash part of template table in sorted order. */ +static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash) +{ + TValue **heap = ctx->heap; + MSize i = nhash; + for (;; node--) { /* Build heap. */ + if (!tvisnil(&node->key)) { + bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key); + if (i == 0) break; + } + } + do { /* Drain heap. */ + TValue *key = heap[0]; /* Output lowest key from top. */ + bcwrite_ktabk(ctx, key, 0); + bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1); + key = heap[--nhash]; /* Remove last key. */ + bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */ + } while (nhash); } /* Write a template table. */ @@ -83,12 +163,12 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) MSize i, hmask = t->hmask; Node *node = noderef(t->node); for (i = 0; i <= hmask; i++) - nhash += !tvisnil(&node[i].val); + nhash += !tvisnil(&node[i].key); } /* Write number of array slots and hash slots. */ p = lj_strfmt_wuleb128(p, narray); p = lj_strfmt_wuleb128(p, nhash); - setsbufP(&ctx->sb, p); + ctx->sb.w = p; if (narray) { /* Write array entries (may contain nil). */ MSize i; TValue *o = tvref(t->array); @@ -96,14 +176,20 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) bcwrite_ktabk(ctx, o, 1); } if (nhash) { /* Write hash entries. */ - MSize i = nhash; Node *node = noderef(t->node) + t->hmask; - for (;; node--) - if (!tvisnil(&node->val)) { - bcwrite_ktabk(ctx, &node->key, 0); - bcwrite_ktabk(ctx, &node->val, 1); - if (--i == 0) break; - } + if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) { + if (ctx->heapsz < nhash) + bcwrite_heap_resize(ctx, t->hmask + 1); + bcwrite_ktab_sorted_hash(ctx, node, nhash); + } else { + MSize i = nhash; + for (;; node--) + if (!tvisnil(&node->key)) { + bcwrite_ktabk(ctx, &node->key, 0); + bcwrite_ktabk(ctx, &node->val, 1); + if (--i == 0) break; + } + } } } @@ -121,7 +207,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) tp = BCDUMP_KGC_STR + gco2str(o)->len; need = 5+gco2str(o)->len; } else if (o->gch.gct == ~LJ_TPROTO) { - lua_assert((pt->flags & PROTO_CHILD)); + lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child"); tp = BCDUMP_KGC_CHILD; #if LJ_HASFFI } else if (o->gch.gct == ~LJ_TCDATA) { @@ -132,12 +218,14 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } else if (id == CTID_UINT64) { tp = BCDUMP_KGC_U64; } else { - lua_assert(id == CTID_COMPLEX_DOUBLE); + lj_assertBCW(id == CTID_COMPLEX_DOUBLE, + "bad cdata constant CTID %d", id); tp = BCDUMP_KGC_COMPLEX; } #endif } else { - lua_assert(o->gch.gct == ~LJ_TTAB); + lj_assertBCW(o->gch.gct == ~LJ_TTAB, + "bad constant GC type %d", o->gch.gct); tp = BCDUMP_KGC_TAB; need = 1+2*5; } @@ -161,7 +249,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } #endif } - setsbufP(&ctx->sb, p); + ctx->sb.w = p; } } @@ -178,7 +266,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) goto save_int; } else { /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ - if (!LJ_DUALNUM) { /* Narrow number constants to integers. */ + if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { + /* Narrow number constants to integers. */ lua_Number num = numV(o); k = lj_num2int(num); if (num == (lua_Number)k) { /* -0 is never a constant. */ @@ -195,7 +284,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) p = lj_strfmt_wuleb128(p, o->u32.hi); } } - setsbufP(&ctx->sb, p); + ctx->sb.w = p; } /* Write bytecode instructions. */ @@ -219,10 +308,7 @@ static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8); - BCIns ins = traceref(J, rd)->startins; - q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL); - q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins); - q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins); + memcpy(q, &traceref(J, rd)->startins, 4); } } } @@ -260,7 +346,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) p = lj_strfmt_wuleb128(p, pt->sizekgc); p = lj_strfmt_wuleb128(p, pt->sizekn); p = lj_strfmt_wuleb128(p, pt->sizebc-1); - if (!ctx->strip) { + if (!(ctx->flags & BCDUMP_F_STRIP)) { if (proto_lineinfo(pt)) sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); p = lj_strfmt_wuleb128(p, sizedbg); @@ -273,7 +359,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) /* Write bytecode instructions and upvalue refs. */ p = bcwrite_bytecode(ctx, p, pt); p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2); - setsbufP(&ctx->sb, p); + ctx->sb.w = p; /* Write constants. */ bcwrite_kgc(ctx, pt); @@ -283,16 +369,16 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) if (sizedbg) { p = lj_buf_more(&ctx->sb, sizedbg); p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg); - setsbufP(&ctx->sb, p); + ctx->sb.w = p; } /* Pass buffer to writer function. */ if (ctx->status == 0) { MSize n = sbuflen(&ctx->sb) - 5; MSize nn = (lj_fls(n)+8)*9 >> 6; - char *q = sbufB(&ctx->sb) + (5 - nn); + char *q = ctx->sb.b + (5 - nn); p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */ - lua_assert(p == sbufB(&ctx->sb) + 5); + lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write"); ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata); } } @@ -308,16 +394,15 @@ static void bcwrite_header(BCWriteCtx *ctx) *p++ = BCDUMP_HEAD2; *p++ = BCDUMP_HEAD3; *p++ = BCDUMP_VERSION; - *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) + + *p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) + LJ_BE*BCDUMP_F_BE + - ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) + - LJ_FR2*BCDUMP_F_FR2; - if (!ctx->strip) { + ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0); + if (!(ctx->flags & BCDUMP_F_STRIP)) { p = lj_strfmt_wuleb128(p, len); p = lj_buf_wmem(p, name, len); } - ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb), - (MSize)(p - sbufB(&ctx->sb)), ctx->wdata); + ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b, + (MSize)(p - ctx->sb.b), ctx->wdata); } /* Write footer of bytecode dump. */ @@ -343,19 +428,25 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) /* Write bytecode for a prototype. */ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, - int strip) + uint32_t flags) { BCWriteCtx ctx; int status; ctx.pt = pt; ctx.wfunc = writer; ctx.wdata = data; - ctx.strip = strip; + ctx.heapsz = 0; + if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2; + ctx.flags = flags; ctx.status = 0; +#ifdef LUA_USE_ASSERT + ctx.g = G(L); +#endif lj_buf_init(L, &ctx.sb); status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); if (status == 0) status = ctx.status; lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb); + bcwrite_heap_resize(&ctx, 0); return status; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_buf.c b/source/libs/luajit/LuaJIT-src/src/lj_buf.c index 0dfe7f9807550301a227540061e46840ac5e40c5..01dcad5bc698c76d6518a4ef0f2b44dd80f052fe 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_buf.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_buf.c @@ -1,6 +1,6 @@ /* ** Buffer handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_buf_c @@ -20,44 +20,83 @@ static void buf_grow(SBuf *sb, MSize sz) { MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz; char *b; + GCSize flag; if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF; while (nsz < sz) nsz += nsz; - b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz); - setmref(sb->b, b); - setmref(sb->p, b + len); - setmref(sb->e, b + nsz); + flag = sbufflag(sb); + if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */ + lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW"); + b = (char *)lj_mem_new(sbufL(sb), nsz); + setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW); + setgcrefnull(sbufX(sb)->cowref); + memcpy(b, sb->b, osz); + } else { + b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz); + } + if ((flag & SBUF_FLAG_EXT)) { + sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */ + } + /* Adjust buffer pointers. */ + sb->b = b; + sb->w = b + len; + sb->e = b + nsz; + if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */ + SBuf *bsb = mref(sbufX(sb)->bsb, SBuf); + bsb->b = b; + bsb->w = b + len; + bsb->e = b + nsz; + } } LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) { - lua_assert(sz > sbufsz(sb)); + lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow"); if (LJ_UNLIKELY(sz > LJ_MAX_BUF)) lj_err_mem(sbufL(sb)); buf_grow(sb, sz); - return sbufB(sb); + return sb->b; } LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) { - MSize len = sbuflen(sb); - lua_assert(sz > sbufleft(sb)); - if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) - lj_err_mem(sbufL(sb)); - buf_grow(sb, len + sz); - return sbufP(sb); + if (sbufisext(sb)) { + SBufExt *sbx = (SBufExt *)sb; + MSize len = sbufxlen(sbx); + if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) + lj_err_mem(sbufL(sbx)); + if (len + sz > sbufsz(sbx)) { /* Must grow. */ + buf_grow((SBuf *)sbx, len + sz); + } else if (sbufiscow(sb) || sbufxslack(sbx) < (sbufsz(sbx) >> 3)) { + /* Also grow to avoid excessive compactions, if slack < size/8. */ + buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */ + return sbx->w; + } + if (sbx->r != sbx->b) { /* Compact by moving down. */ + memmove(sbx->b, sbx->r, len); + sbx->r = sbx->b; + sbx->w = sbx->b + len; + lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact"); + } + } else { + MSize len = sbuflen(sb); + lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow"); + if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) + lj_err_mem(sbufL(sb)); + buf_grow(sb, len + sz); + } + return sb->w; } void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb) { - char *b = sbufB(sb); - MSize osz = (MSize)(sbufE(sb) - b); + char *b = sb->b; + MSize osz = (MSize)(sb->e - b); if (osz > 2*LJ_MIN_SBUF) { - MSize n = (MSize)(sbufP(sb) - b); b = lj_mem_realloc(L, b, osz, (osz >> 1)); - setmref(sb->b, b); - setmref(sb->p, b + n); - setmref(sb->e, b + (osz >> 1)); + sb->w = sb->b = b; /* Not supposed to keep data across shrinks. */ + sb->e = b + (osz >> 1); } + lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt"); } char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) @@ -67,30 +106,62 @@ char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) return lj_buf_need(sb, sz); } +#if LJ_HASBUFFER && LJ_HASJIT +void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref) +{ + lua_State *L = sbufL(sbx); + lj_bufx_free(L, sbx); + lj_bufx_set_cow(L, sbx, p, len); + setgcref(sbx->cowref, ref); + lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref); +} + +#if LJ_HASFFI +MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz) +{ + lj_buf_more((SBuf *)sbx, sz); + return sbufleft(sbx); +} +#endif +#endif + /* -- Low-level buffer put operations ------------------------------------- */ SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len) { - char *p = lj_buf_more(sb, len); - p = lj_buf_wmem(p, q, len); - setsbufP(sb, p); + char *w = lj_buf_more(sb, len); + w = lj_buf_wmem(w, q, len); + sb->w = w; return sb; } -SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) +#if LJ_HASJIT || LJ_HASFFI +static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c) { - char *p = lj_buf_more(sb, 1); - *p++ = (char)c; - setsbufP(sb, p); + char *w = lj_buf_more2(sb, 1); + *w++ = (char)c; + sb->w = w; return sb; } +SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) +{ + char *w = sb->w; + if (LJ_LIKELY(w < sb->e)) { + *w++ = (char)c; + sb->w = w; + return sb; + } + return lj_buf_putchar2(sb, c); +} +#endif + SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) { MSize len = s->len; - char *p = lj_buf_more(sb, len); - p = lj_buf_wmem(p, strdata(s), len); - setsbufP(sb, p); + char *w = lj_buf_more(sb, len); + w = lj_buf_wmem(w, strdata(s), len); + sb->w = w; return sb; } @@ -99,47 +170,47 @@ SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s) { MSize len = s->len; - char *p = lj_buf_more(sb, len), *e = p+len; + char *w = lj_buf_more(sb, len), *e = w+len; const char *q = strdata(s)+len-1; - while (p < e) - *p++ = *q--; - setsbufP(sb, p); + while (w < e) + *w++ = *q--; + sb->w = w; return sb; } SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s) { MSize len = s->len; - char *p = lj_buf_more(sb, len), *e = p+len; + char *w = lj_buf_more(sb, len), *e = w+len; const char *q = strdata(s); - for (; p < e; p++, q++) { + for (; w < e; w++, q++) { uint32_t c = *(unsigned char *)q; #if LJ_TARGET_PPC - *p = c + ((c >= 'A' && c <= 'Z') << 5); + *w = c + ((c >= 'A' && c <= 'Z') << 5); #else if (c >= 'A' && c <= 'Z') c += 0x20; - *p = c; + *w = c; #endif } - setsbufP(sb, p); + sb->w = w; return sb; } SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s) { MSize len = s->len; - char *p = lj_buf_more(sb, len), *e = p+len; + char *w = lj_buf_more(sb, len), *e = w+len; const char *q = strdata(s); - for (; p < e; p++, q++) { + for (; w < e; w++, q++) { uint32_t c = *(unsigned char *)q; #if LJ_TARGET_PPC - *p = c - ((c >= 'a' && c <= 'z') << 5); + *w = c - ((c >= 'a' && c <= 'z') << 5); #else if (c >= 'a' && c <= 'z') c -= 0x20; - *p = c; + *w = c; #endif } - setsbufP(sb, p); + sb->w = w; return sb; } @@ -148,21 +219,21 @@ SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep) MSize len = s->len; if (rep > 0 && len) { uint64_t tlen = (uint64_t)rep * len; - char *p; + char *w; if (LJ_UNLIKELY(tlen > LJ_MAX_STR)) lj_err_mem(sbufL(sb)); - p = lj_buf_more(sb, (MSize)tlen); + w = lj_buf_more(sb, (MSize)tlen); if (len == 1) { /* Optimize a common case. */ uint32_t c = strdata(s)[0]; - do { *p++ = c; } while (--rep > 0); + do { *w++ = c; } while (--rep > 0); } else { const char *e = strdata(s) + len; do { const char *q = strdata(s); - do { *p++ = *q++; } while (q < e); + do { *w++ = *q++; } while (q < e); } while (--rep > 0); } - setsbufP(sb, p); + sb->w = w; } return sb; } @@ -173,27 +244,27 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) if (i <= e) { for (;;) { cTValue *o = lj_tab_getint(t, i); - char *p; + char *w; if (!o) { badtype: /* Error: bad element type. */ - setsbufP(sb, (void *)(intptr_t)i); /* Store failing index. */ + sb->w = (char *)(intptr_t)i; /* Store failing index. */ return NULL; } else if (tvisstr(o)) { MSize len = strV(o)->len; - p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); + w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); } else if (tvisint(o)) { - p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); + w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); } else if (tvisnum(o)) { - p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); + w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); } else { goto badtype; } if (i++ == e) { - setsbufP(sb, p); + sb->w = w; break; } - if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen); - setsbufP(sb, p); + if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen); + sb->w = w; } } return sb; @@ -203,7 +274,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb) { - return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb)); + return lj_str_new(sbufL(sb), sb->b, sbuflen(sb)); } /* Concatenate two strings. */ @@ -219,14 +290,14 @@ GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2) /* Read ULEB128 from buffer. */ uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp) { - const uint8_t *p = (const uint8_t *)*pp; - uint32_t v = *p++; + const uint8_t *w = (const uint8_t *)*pp; + uint32_t v = *w++; if (LJ_UNLIKELY(v >= 0x80)) { int sh = 0; v &= 0x7f; - do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); + do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80); } - *pp = (const char *)p; + *pp = (const char *)w; return v; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_buf.h b/source/libs/luajit/LuaJIT-src/src/lj_buf.h index a4051694445bb3f16e43eafbd1fb0f7e174d1314..15a04250aa0b98c44ba0bb96931f790931911529 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_buf.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_buf.h @@ -1,6 +1,6 @@ /* ** Buffer handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_BUF_H @@ -10,16 +10,60 @@ #include "lj_gc.h" #include "lj_str.h" -/* Resizable string buffers. Struct definition in lj_obj.h. */ -#define sbufB(sb) (mref((sb)->b, char)) -#define sbufP(sb) (mref((sb)->p, char)) -#define sbufE(sb) (mref((sb)->e, char)) -#define sbufL(sb) (mref((sb)->L, lua_State)) -#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb)))) -#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb)))) -#define sbufleft(sb) ((MSize)(sbufE((sb)) - sbufP((sb)))) -#define setsbufP(sb, q) (setmref((sb)->p, (q))) -#define setsbufL(sb, l) (setmref((sb)->L, (l))) +/* Resizable string buffers. */ + +/* The SBuf struct definition is in lj_obj.h: +** char *w; Write pointer. +** char *e; End pointer. +** char *b; Base pointer. +** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB. +*/ + +/* Extended string buffer. */ +typedef struct SBufExt { + SBufHeader; + union { + GCRef cowref; /* Copy-on-write object reference. */ + MRef bsb; /* Borrowed string buffer. */ + }; + char *r; /* Read pointer. */ + GCRef dict_str; /* Serialization string dictionary table. */ + GCRef dict_mt; /* Serialization metatable dictionary table. */ + int depth; /* Remaining recursion depth. */ +} SBufExt; + +#define sbufsz(sb) ((MSize)((sb)->e - (sb)->b)) +#define sbuflen(sb) ((MSize)((sb)->w - (sb)->b)) +#define sbufleft(sb) ((MSize)((sb)->e - (sb)->w)) +#define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r)) +#define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b)) + +#define SBUF_MASK_FLAG (7) +#define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG) +#define SBUF_FLAG_EXT 1 /* Extended string buffer. */ +#define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */ +#define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */ + +#define sbufL(sb) \ + ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L)) +#define setsbufL(sb, l) (setmref((sb)->L, (l))) +#define setsbufXL(sb, l, flag) \ + (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag))) +#define setsbufXL_(sb, l) \ + (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG))) + +#define sbufflag(sb) (mrefu((sb)->L)) +#define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT) +#define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW) +#define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW) +#define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW)) +#define sbufX(sb) \ + (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb)) +#define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag))) + +#define tvisbuf(o) \ + (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER) +#define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o)))) /* Buffer management */ LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); @@ -30,12 +74,12 @@ LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz); static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb) { setsbufL(sb, L); - setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL); + sb->w = sb->e = sb->b = NULL; } static LJ_AINLINE void lj_buf_reset(SBuf *sb) { - setmrefr(sb->p, sb->b); + sb->w = sb->b; } static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L) @@ -48,26 +92,77 @@ static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L) static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb) { - lj_mem_free(g, sbufB(sb), sbufsz(sb)); + lj_assertG(!sbufisext(sb), "bad free of SBufExt"); + lj_mem_free(g, sb->b, sbufsz(sb)); } static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz) { if (LJ_UNLIKELY(sz > sbufsz(sb))) return lj_buf_need2(sb, sz); - return sbufB(sb); + return sb->b; } static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz) { if (LJ_UNLIKELY(sz > sbufleft(sb))) return lj_buf_more2(sb, sz); - return sbufP(sb); + return sb->w; +} + +/* Extended buffer management */ +static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx) +{ + memset(sbx, 0, sizeof(SBufExt)); + setsbufXL(sbx, L, SBUF_FLAG_EXT); +} + +static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb) +{ + setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW); + setmref(sbx->bsb, sb); + sbx->r = sbx->w = sbx->b = sb->b; + sbx->e = sb->e; +} + +static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx, + const char *p, MSize len) +{ + setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW); + sbx->r = sbx->b = (char *)p; + sbx->w = sbx->e = (char *)p + len; +} + +static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx) +{ + if (sbufiscow(sbx)) { + setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW)); + setgcrefnull(sbx->cowref); + sbx->b = sbx->e = NULL; + } + sbx->r = sbx->w = sbx->b; } +static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx) +{ + if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx)); + setsbufXL(sbx, L, SBUF_FLAG_EXT); + setgcrefnull(sbx->cowref); + sbx->r = sbx->w = sbx->b = sbx->e = NULL; +} + +#if LJ_HASBUFFER && LJ_HASJIT +LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o); +#if LJ_HASFFI +LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz); +#endif +#endif + /* Low-level buffer put operations */ LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len); +#if LJ_HASJIT || LJ_HASFFI LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c); +#endif LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s); static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) @@ -77,9 +172,9 @@ static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c) { - char *p = lj_buf_more(sb, 1); - *p++ = (char)c; - setsbufP(sb, p); + char *w = lj_buf_more(sb, 1); + *w++ = (char)c; + sb->w = w; } /* High-level buffer put operations */ @@ -97,7 +192,7 @@ LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp); static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb) { - return lj_str_new(L, sbufB(sb), sbuflen(sb)); + return lj_str_new(L, sb->b, sbuflen(sb)); } #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_carith.c b/source/libs/luajit/LuaJIT-src/src/lj_carith.c index 218abd260f4c89d4d124a7450e9313dab6a54988..b09812c6564b65e73c6486f3da132cff54cdeb83 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_carith.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_carith.c @@ -1,6 +1,6 @@ /* ** C data arithmetic. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -44,9 +44,13 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca) p = (uint8_t *)cdata_getptr(p, ct->size); if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); } else if (ctype_isfunc(ct->info)) { + CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0; p = (uint8_t *)*(void **)p; ct = ctype_get(cts, lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR)); + if (i) { /* cts->tab may have been reallocated. */ + ca->ct[0] = ctype_get(cts, id0); + } } if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); ca->ct[i] = ct; @@ -122,7 +126,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm) setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); return 1; } else { - lua_assert(mm == MM_le); + lj_assertL(mm == MM_le, "bad metamethod %d", mm); setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); return 1; } @@ -207,8 +211,10 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm) else *up = lj_carith_powu64(u0, u1); break; - case MM_unm: *up = (uint64_t)-(int64_t)u0; break; - default: lua_assert(0); break; + case MM_unm: *up = ~u0+1u; break; + default: + lj_assertL(0, "bad metamethod %d", mm); + break; } lj_gc_check(L); return 1; @@ -265,7 +271,7 @@ int lj_carith_op(lua_State *L, MMS mm) { CTState *cts = ctype_cts(L); CDArith ca; - if (carith_checkarg(L, cts, &ca)) { + if (carith_checkarg(L, cts, &ca) && mm != MM_len && mm != MM_concat) { if (carith_int64(L, cts, &ca, mm) || carith_ptr(L, cts, &ca, mm)) { copyTV(L, &G(L)->tmptv2, L->top-1); /* Remember for trace recorder. */ return 1; @@ -301,7 +307,9 @@ uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op) case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break; case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break; case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break; - default: lua_assert(0); break; + default: + lj_assertX(0, "bad shift op %d", op); + break; } return x; } @@ -341,13 +349,10 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) if (LJ_LIKELY(tvisint(o))) { return (uint32_t)intV(o); } else { - int32_t i = lj_num2bit(numV(o)); - if (LJ_DUALNUM) setintV(o, i); - return (uint32_t)i; + return (uint32_t)lj_num2bit(numV(o)); } } - /* -- 64 bit integer arithmetic helpers ----------------------------------- */ #if LJ_32 && LJ_HASJIT diff --git a/source/libs/luajit/LuaJIT-src/src/lj_carith.h b/source/libs/luajit/LuaJIT-src/src/lj_carith.h index 67d976bf0c009fc7ce8861267e22b30fb5b1978b..6fad1c694814e43d7d817919a0e4b397fc178fcc 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_carith.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_carith.h @@ -1,6 +1,6 @@ /* ** C data arithmetic. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CARITH_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ccall.c b/source/libs/luajit/LuaJIT-src/src/lj_ccall.c index 5c252e5b68302a8cb3a9dbd8af567108b651269a..ae69cd28d100dee857b30ed3a6227ed12281ff33 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ccall.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_ccall.c @@ -1,6 +1,6 @@ /* ** FFI C call handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -20,12 +20,15 @@ #if LJ_TARGET_X86 /* -- x86 calling conventions --------------------------------------------- */ +#define CCALL_PUSH(arg) \ + *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR + #if LJ_ABI_WIN #define CCALL_HANDLE_STRUCTRET \ /* Return structs bigger than 8 by reference (on stack only). */ \ cc->retref = (sz > 8); \ - if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; + if (cc->retref) CCALL_PUSH(dp); #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET @@ -40,7 +43,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; \ + CCALL_PUSH(dp); \ } else { /* Struct with single FP field ends up in FPR. */ \ cc->resx87 = ccall_classify_struct(cts, ctr); \ } @@ -56,7 +59,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; + CCALL_PUSH(dp); #endif @@ -67,7 +70,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; \ + CCALL_PUSH(dp); \ } #endif @@ -278,8 +281,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -334,20 +337,20 @@ isfp = sz == 2*sizeof(float) ? 2 : 1; #define CCALL_HANDLE_REGARG \ - if (LJ_TARGET_IOS && isva) { \ + if (LJ_TARGET_OSX && isva) { \ /* IOS: All variadic arguments are on the stack. */ \ } else if (isfp) { /* Try to pass argument in FPRs. */ \ - int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \ + int n2 = ctype_isvector(d->info) ? 1 : \ + isfp == 1 ? n : (d->size >> (4-isfp)); \ if (nfpr + n2 <= CCALL_NARG_FPR) { \ dp = &cc->fpr[nfpr]; \ nfpr += n2; \ goto done; \ } else { \ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ - if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ } \ } else { /* Try to pass argument in GPRs. */ \ - if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ + if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ if (ngpr + n <= maxgpr) { \ dp = &cc->gpr[ngpr]; \ @@ -355,7 +358,6 @@ goto done; \ } else { \ ngpr = maxgpr; /* Prevent reordering. */ \ - if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ } \ } @@ -387,6 +389,25 @@ #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ +#define CCALL_HANDLE_GPR \ + /* Try to pass argument in GPRs. */ \ + if (n > 1) { \ + /* int64_t or complex (float). */ \ + lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \ + if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ + else if (ngpr + n > maxgpr) \ + ngpr = maxgpr; /* Prevent reordering. */ \ + } \ + if (ngpr + n <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + +#if LJ_ABI_SOFTFP +#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR +#else #define CCALL_HANDLE_REGARG \ if (isfp) { /* Try to pass argument in FPRs. */ \ if (nfpr + 1 <= CCALL_NARG_FPR) { \ @@ -395,24 +416,16 @@ d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ goto done; \ } \ - } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ - if (ctype_isinteger(d->info)) \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - else if (ngpr + n > maxgpr) \ - ngpr = maxgpr; /* Prevent reordering. */ \ - } \ - if (ngpr + n <= maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ + } else { \ + CCALL_HANDLE_GPR \ } +#endif +#if !LJ_ABI_SOFTFP #define CCALL_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ +#endif #elif LJ_TARGET_MIPS32 /* -- MIPS o32 calling conventions ---------------------------------------- */ @@ -459,8 +472,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -553,8 +566,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -631,7 +644,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs) ccall_classify_struct(cts, ct, rcl, ofs); } else { int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; - lua_assert(ctype_hassize(ct->info)); + lj_assertCTS(ctype_hassize(ct->info), + "classify ctype %08x without size", ct->info); if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ rcl[(ofs >= 8)] |= cl; } @@ -656,12 +670,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs) } /* Try to split up a small struct into registers. */ -static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl) +static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl) { MSize ngpr = cc->ngpr, nfpr = cc->nfpr; uint32_t i; + UNUSED(cts); for (i = 0; i < 2; i++) { - lua_assert(!(rcl[i] & CCALL_RCL_MEM)); + lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg"); if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ cc->gpr[ngpr++] = dp[i]; @@ -682,11 +697,13 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl, dp[0] = dp[1] = 0; /* Convert to temp. struct. */ lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); - if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */ - MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; - if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ - cc->nsp = nsp + n; - memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR); + if (ccall_struct_reg(cc, cts, dp, rcl)) { + /* Register overflow? Pass on stack. */ + MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR; + if (nsp + sz > CCALL_SIZE_STACK) + return 1; /* Too many arguments. */ + cc->nsp = nsp + sz; + memcpy((uint8_t *)cc->stack + nsp, dp, sz); } return 0; /* Ok. */ } @@ -838,7 +855,8 @@ noth: /* Not a homogeneous float/double aggregate. */ return 0; /* Struct is in GPRs. */ } -void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) +static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, + int ft) { if (LJ_ABI_SOFTFP ? ft : ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { @@ -967,6 +985,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, fid = ctf->sib; } +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + if ((ct->info & CTF_VARARG)) { + nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */ + ngpr = maxgpr; + nfpr = CCALL_NARG_FPR; + } +#endif + /* Walk through all passed arguments. */ for (o = L->base+1, narg = 1; o < top; o++, narg++) { CTypeID did; @@ -978,7 +1004,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); fid = ctf->sib; - lua_assert(ctype_isfield(ctf->info)); + lj_assertL(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { if (!(ct->info & CTF_VARARG)) @@ -1003,25 +1029,31 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_STRUCTARG } else if (ctype_iscomplex(d->info)) { CCALL_HANDLE_COMPLEXARG - } else { + } else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) { sz = CTSIZE_PTR; } - sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); - n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ + n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ CCALL_HANDLE_REGARG /* Handle register arguments. */ /* Otherwise pass argument on stack. */ - if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { - MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; - nsp = (nsp + align) & ~align; /* Align argument on stack. */ + if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ + MSize align = (1u << ctype_align(d->info)) - 1; + if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1)) + align = CTSIZE_PTR-1; + nsp = (nsp + align) & ~align; } - if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */ +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + /* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */ + dp = ((uint8_t *)cc->stack) + (int32_t)nsp; +#else + dp = ((uint8_t *)cc->stack) + nsp; +#endif + nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; + if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ err_nyi: lj_err_caller(L, LJ_ERR_FFI_NYICALL); } - dp = &cc->stack[nsp]; - nsp += n; isva = 0; done: @@ -1032,7 +1064,8 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); /* Extend passed integers to 32 bits at least. */ - if (ctype_isinteger_or_bool(d->info) && d->size < 4) { + if (ctype_isinteger_or_bool(d->info) && d->size < 4 && + (!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */ if (d->info & CTF_UNSIGNED) *(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp : (uint32_t)*(uint16_t *)dp; @@ -1079,14 +1112,17 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + if ((int32_t)nsp < 0) nsp = 0; +#endif -#if LJ_TARGET_X64 || LJ_TARGET_PPC +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) cc->nfpr = nfpr; /* Required for vararg functions. */ #endif - cc->nsp = nsp; - cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; - if (nsp > CCALL_SPS_FREE) - cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u); + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); + cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR; + if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR) + cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u); return gcsteps; } @@ -1126,7 +1162,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_RET #endif /* No reference types end up here, so there's no need for the CTypeID. */ - lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info))); + lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)), + "unexpected reference ctype"); return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); } @@ -1150,7 +1187,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) lj_vm_ffi_call(&cc); if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ TValue tv; - setlightudV(&tv, (void *)cc.func); + tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000); setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); } ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ccall.h b/source/libs/luajit/LuaJIT-src/src/lj_ccall.h index 59f664817a298d06f2e20632c458c4a74abd9b01..3528fca55e8d90ce72107419465523a82b6362f9 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ccall.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_ccall.h @@ -1,6 +1,6 @@ /* ** FFI C call handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCALL_H @@ -75,6 +75,9 @@ typedef union FPRArg { #define CCALL_NARG_FPR 8 #define CCALL_NRET_FPR 4 #define CCALL_SPS_FREE 0 +#if LJ_TARGET_OSX +#define CCALL_PACK_STACKARG 1 +#endif typedef intptr_t GPRArg; typedef union FPRArg { @@ -86,9 +89,9 @@ typedef union FPRArg { #elif LJ_TARGET_PPC #define CCALL_NARG_GPR 8 -#define CCALL_NARG_FPR 8 +#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) #define CCALL_NRET_GPR 4 /* For complex double. */ -#define CCALL_NRET_FPR 1 +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) #define CCALL_SPS_EXTRA 4 #define CCALL_SPS_FREE 0 @@ -139,6 +142,9 @@ typedef union FPRArg { #ifndef CCALL_ALIGN_STACKARG #define CCALL_ALIGN_STACKARG 1 #endif +#ifndef CCALL_PACK_STACKARG +#define CCALL_PACK_STACKARG 0 +#endif #ifndef CCALL_ALIGN_CALLSTATE #define CCALL_ALIGN_CALLSTATE 8 #endif @@ -152,14 +158,15 @@ typedef union FPRArg { LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); -#define CCALL_MAXSTACK 32 +#define CCALL_NUM_STACK 31 +#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR) /* -- C call state -------------------------------------------------------- */ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { void (*func)(void); /* Pointer to called function. */ uint32_t spadj; /* Stack pointer adjustment. */ - uint8_t nsp; /* Number of stack slots. */ + uint8_t nsp; /* Number of bytes on stack. */ uint8_t retref; /* Return value by reference. */ #if LJ_TARGET_X64 uint8_t ngpr; /* Number of arguments in GPRs. */ @@ -178,7 +185,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ #endif GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ - GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */ + GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */ } CCallState; /* -- C call handling ----------------------------------------------------- */ diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ccallback.c b/source/libs/luajit/LuaJIT-src/src/lj_ccallback.c index 846827b119b4e7767bb2233ad0eee2975b11915a..52f92932f0d731f2c8a0e6074ecb6d074deb8081 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ccallback.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_ccallback.c @@ -1,6 +1,6 @@ /* ** FFI C callback handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -107,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p) /* Initialize machine code for callback function pointers. */ #if LJ_OS_NOJIT /* Disabled callback support. */ -#define callback_mcode_init(g, p) UNUSED(p) +#define callback_mcode_init(g, p) (p) #elif LJ_TARGET_X86ORX64 -static void callback_mcode_init(global_State *g, uint8_t *page) +static void *callback_mcode_init(global_State *g, uint8_t *page) { uint8_t *p = page; uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; @@ -143,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page) *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); } } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_ARM -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; @@ -165,19 +165,19 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = ARMI_B | ((page-p-2) & 0x00ffffffu); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_ARM64 -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; + ASMFunction target = lj_vm_ffi_callback; MSize slot; *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); - *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); + *p++ = A64I_LE(A64I_BR_AUTH | A64F_N(RID_X11)); *p++ = A64I_LE(A64I_NOP); - ((void **)p)[0] = target; + ((ASMFunction *)p)[0] = target; ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { @@ -185,10 +185,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_PPC -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; @@ -204,10 +204,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_MIPS -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; @@ -236,11 +236,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page) p++; *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #else /* Missing support for this architecture. */ -#define callback_mcode_init(g, p) UNUSED(p) +#define callback_mcode_init(g, p) (p) #endif /* -- Machine code management --------------------------------------------- */ @@ -256,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page) #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif +#ifdef PROT_MPROTECT +#define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC)) +#else +#define CCPROT_CREATE 0 +#endif #endif @@ -263,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page) static void callback_mcode_new(CTState *cts) { size_t sz = (size_t)CALLBACK_MCODE_SIZE; - void *p; + void *p, *pe; if (CALLBACK_MAX_SLOT == 0) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); #if LJ_TARGET_WINDOWS - p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); if (!p) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); #elif LJ_TARGET_POSIX - p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, + p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); @@ -280,12 +285,15 @@ static void callback_mcode_new(CTState *cts) p = lj_mem_new(cts->L, sz); #endif cts->cb.mcode = p; - callback_mcode_init(cts->g, p); + pe = callback_mcode_init(cts->g, p); + UNUSED(pe); + lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz, + "miscalculated CALLBACK_MAX_SLOT"); lj_mcode_sync(p, (char *)p + sz); #if LJ_TARGET_WINDOWS { DWORD oprot; - VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); + LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); } #elif LJ_TARGET_POSIX mprotect(p, sz, (PROT_READ|PROT_EXEC)); @@ -406,7 +414,7 @@ void lj_ccallback_mcode_free(CTState *cts) nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ } \ } else { \ - if (!LJ_TARGET_IOS && n > 1) \ + if (!LJ_TARGET_OSX && n > 1) \ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ if (ngpr + n <= maxgpr) { \ sp = &cts->cb.gpr[ngpr]; \ @@ -419,6 +427,24 @@ void lj_ccallback_mcode_free(CTState *cts) #elif LJ_TARGET_PPC +#define CALLBACK_HANDLE_GPR \ + if (n > 1) { \ + lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ + ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \ + "bad GPR type"); \ + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ + } \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } + +#if LJ_ABI_SOFTFP +#define CALLBACK_HANDLE_REGARG \ + CALLBACK_HANDLE_GPR \ + UNUSED(isfp); +#else #define CALLBACK_HANDLE_REGARG \ if (isfp) { \ if (nfpr + 1 <= CCALL_NARG_FPR) { \ @@ -427,20 +453,15 @@ void lj_ccallback_mcode_free(CTState *cts) goto done; \ } \ } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - } \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ + CALLBACK_HANDLE_GPR \ } +#endif +#if !LJ_ABI_SOFTFP #define CALLBACK_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ +#endif #elif LJ_TARGET_MIPS32 @@ -533,13 +554,13 @@ static void callback_conv_args(CTState *cts, lua_State *L) if (LJ_FR2) { (o++)->u64 = LJ_CONT_FFI_CALLBACK; (o++)->u64 = rid; - o++; } else { o->u32.lo = LJ_CONT_FFI_CALLBACK; o->u32.hi = rid; o++; } setframe_gc(o, obj2gco(fn), fntp); + if (LJ_FR2) o++; setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT); L->top = L->base = ++o; if (!ct) @@ -567,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) CTSize sz; int isfp; MSize n; - lua_assert(ctype_isfield(ctf->info)); + lj_assertCTS(ctype_isfield(ctf->info), "field expected"); cta = ctype_rawchild(cts, ctf); isfp = ctype_isfp(cta->info); sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); @@ -659,7 +680,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) { lua_State *L = cts->L; global_State *g = cts->g; - lua_assert(L != NULL); + lj_assertG(L != NULL, "uninitialized cts->L in callback"); if (tvref(g->jit_base)) { setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); if (g->panic) g->panic(L); @@ -744,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct) CType *ctf = ctype_get(cts, fid); if (!ctype_isattrib(ctf->info)) { CType *cta; - lua_assert(ctype_isfield(ctf->info)); + lj_assertCTS(ctype_isfield(ctf->info), "field expected"); cta = ctype_rawchild(cts, ctf); if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || (ctype_isnum(cta->info) && cta->size <= 8)) || diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ccallback.h b/source/libs/luajit/LuaJIT-src/src/lj_ccallback.h index a8cdad386307149e9da8c3c1d2dfe22afd19b938..073a69c95f6f4e2b38ef9cfdb88f045df39394d3 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ccallback.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_ccallback.h @@ -1,6 +1,6 @@ /* ** FFI C callback handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCALLBACK_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_cconv.c b/source/libs/luajit/LuaJIT-src/src/lj_cconv.c index 13b8230dc83eec3f20b9b2479e5676350fff20ce..854b51db747774f58ec0312b865b44196819d108 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_cconv.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_cconv.c @@ -1,6 +1,6 @@ /* ** C type conversions. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -8,6 +8,7 @@ #if LJ_HASFFI #include "lj_err.h" +#include "lj_buf.h" #include "lj_tab.h" #include "lj_ctype.h" #include "lj_cdata.h" @@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, CTInfo dinfo = d->info, sinfo = s->info; void *tmpptr; - lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo)); - lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo)); + lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo), + "unresolved enum"); + lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo), + "unstripped attribute"); if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) goto err_conv; /* Some basic sanity checks. */ - lua_assert(!ctype_isnum(dinfo) || dsize > 0); - lua_assert(!ctype_isnum(sinfo) || ssize > 0); - lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4); - lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4); - lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize); - lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize); + lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number type"); + lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type"); + lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4, + "bad size for bool type"); + lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4, + "bad size for bool type"); + lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize, + "bad size for integer type"); + lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize, + "bad size for integer type"); switch (cconv_idx2(dinfo, sinfo)) { /* Destination is a bool. */ @@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s) goto err_conv; /* Must be exact same type. */ copyval: /* Copy value. */ - lua_assert(dsize == ssize); + lj_assertCTS(dsize == ssize, "value copy with different sizes"); memcpy(dp, sp, dsize); break; @@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, (uint8_t *)&o->n, sp, 0); /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ - lua_assert(tvisnum(o)); + lj_assertCTS(tvisnum(o), "non-canonical NaN passed"); } } else { uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); @@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, CTSize sz; copyval: /* Copy value. */ sz = s->size; - lua_assert(sz != CTSIZE_INVALID); + lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size"); /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); setcdataV(cts->L, o, cd); @@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) CTInfo info = s->info; CTSize pos, bsz; uint32_t val; - lua_assert(ctype_isbitfield(info)); + lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); /* NYI: packed bitfields may cause misaligned reads. */ switch (ctype_bitcsz(info)) { case 4: val = *(uint32_t *)sp; break; case 2: val = *(uint16_t *)sp; break; case 1: val = *(uint8_t *)sp; break; - default: lua_assert(0); val = 0; break; + default: + lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); + val = 0; + break; } /* Check if a packed bitfield crosses a container boundary. */ pos = ctype_bitpos(info); bsz = ctype_bitbsz(info); - lua_assert(pos < 8*ctype_bitcsz(info)); - lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); + lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); + lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); if (pos + bsz > 8*ctype_bitcsz(info)) lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); if (!(info & CTF_BOOL)) { @@ -449,7 +459,7 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) } } else { uint32_t b = (val >> pos) & 1; - lua_assert(bsz == 1); + lj_assertCTS(bsz == 1, "bad bool bitfield size"); setboolV(o, b); setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ } @@ -553,13 +563,15 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, sid = cdataV(o)->ctypeid; s = ctype_get(cts, sid); if (ctype_isref(s->info)) { /* Resolve reference for value. */ - lua_assert(s->size == CTSIZE_PTR); + lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); sp = *(void **)sp; sid = ctype_cid(s->info); } s = ctype_raw(cts, sid); if (ctype_isfunc(s->info)) { + CTypeID did = ctype_typeid(cts, d); sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR); + d = ctype_get(cts, did); /* cts->tab may have been reallocated. */ } else { if (ctype_isenum(s->info)) s = ctype_child(cts, s); goto doconv; @@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, CType *cct = lj_ctype_getfield(cts, d, str, &ofs); if (!cct || !ctype_isconstval(cct->info)) goto err_conv; - lua_assert(d->size == 4); + lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */ sp = (uint8_t *)&cct->size; sid = ctype_cid(cct->info); } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ @@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, tmpptr = uddata(ud); if (ud->udtype == UDTYPE_IO_FILE) tmpptr = *(void **)tmpptr; + else if (ud->udtype == UDTYPE_BUFFER) + tmpptr = ((SBufExt *)tmpptr)->r; } else if (tvislightud(o)) { - tmpptr = lightudV(o); + tmpptr = lightudV(cts->g, o); } else if (tvisfunc(o)) { void *p = lj_ccallback_new(cts, d, funcV(o)); if (p) { @@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) CTInfo info = d->info; CTSize pos, bsz; uint32_t val, mask; - lua_assert(ctype_isbitfield(info)); + lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); if ((info & CTF_BOOL)) { uint8_t tmpbool; - lua_assert(ctype_bitbsz(info) == 1); + lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size"); lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); val = tmpbool; } else { @@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) } pos = ctype_bitpos(info); bsz = ctype_bitbsz(info); - lua_assert(pos < 8*ctype_bitcsz(info)); - lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); + lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); + lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); /* Check if a packed bitfield crosses a container boundary. */ if (pos + bsz > 8*ctype_bitcsz(info)) lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); @@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; - default: lua_assert(0); break; + default: + lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); + break; } } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_cconv.h b/source/libs/luajit/LuaJIT-src/src/lj_cconv.h index 0a0b66c909acb5f7e4833b289e854b2085d3c09c..1e96cd6edf9c033bfa02adc9c060f94f040934fb 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_cconv.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_cconv.h @@ -1,6 +1,6 @@ /* ** C type conversions. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CCONV_H @@ -27,13 +27,14 @@ enum { static LJ_AINLINE uint32_t cconv_idx(CTInfo info) { uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ - lua_assert(ctype_type(info) <= CT_MAYCONVERT); + lj_assertX(ctype_type(info) <= CT_MAYCONVERT, + "cannot convert ctype %08x", info); #if LJ_64 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); #else idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); #endif - lua_assert(idx < 8); + lj_assertX(idx < 8, "cannot convert ctype %08x", info); return idx; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_cdata.c b/source/libs/luajit/LuaJIT-src/src/lj_cdata.c index 68e16d76fc9a59088b20b79bb8792fb3751c992e..3b48f76c1ebd486c9e33e585388ea6588cf31796 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_cdata.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_cdata.c @@ -1,6 +1,6 @@ /* ** C data management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -35,7 +35,7 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); uintptr_t almask = (1u << align) - 1u; GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); - lua_assert((char *)cd - p < 65536); + lj_assertL((char *)cd - p < 65536, "excessive cdata alignment"); cdatav(cd)->offset = (uint16_t)((char *)cd - p); cdatav(cd)->extra = extra; cdatav(cd)->len = sz; @@ -76,8 +76,8 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) } else if (LJ_LIKELY(!cdataisv(cd))) { CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; - lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || - ctype_isextern(ct->info)); + lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || + ctype_isextern(ct->info), "free of ctype without a size"); lj_mem_free(g, cd, sizeof(GCcdata) + sz); } else { lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); @@ -86,7 +86,7 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it) { - GCtab *t = ctype_ctsG(G(L))->finalizer; + GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]); if (gcref(t->metatable)) { /* Add cdata to finalizer table, if still enabled. */ TValue *tv, tmp; @@ -115,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, /* Resolve reference for cdata object. */ if (ctype_isref(ct->info)) { - lua_assert(ct->size == CTSIZE_PTR); + lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized"); p = *(uint8_t **)p; ct = ctype_child(cts, ct); } @@ -126,7 +126,8 @@ collect_attrib: if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; ct = ctype_child(cts, ct); } - lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ + /* Interning rejects refs to refs. */ + lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref"); if (tvisint(key)) { idx = (ptrdiff_t)intV(key); @@ -212,7 +213,8 @@ collect_attrib: static void cdata_getconst(CTState *cts, TValue *o, CType *ct) { CType *ctt = ctype_child(cts, ct); - lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); + lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, + "only 32 bit const supported"); /* NYI */ /* Constants are already zero-extended/sign-extended to 32 bits. */ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) setnumV(o, (lua_Number)(uint32_t)ct->size); @@ -233,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp) } /* Get child type of pointer/array/field. */ - lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info)); + lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info), + "pointer or field expected"); sid = ctype_cid(s->info); s = ctype_get(cts, sid); /* Resolve reference for field. */ if (ctype_isref(s->info)) { - lua_assert(s->size == CTSIZE_PTR); + lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); sp = *(uint8_t **)sp; sid = ctype_cid(s->info); s = ctype_get(cts, sid); @@ -266,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) } /* Get child type of pointer/array/field. */ - lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info)); + lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info), + "pointer or field expected"); d = ctype_child(cts, d); /* Resolve reference for field. */ if (ctype_isref(d->info)) { - lua_assert(d->size == CTSIZE_PTR); + lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized"); dp = *(uint8_t **)dp; d = ctype_child(cts, d); } @@ -286,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) d = ctype_child(cts, d); } - lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info)); + lj_assertCTS(ctype_hassize(d->info), "store to ctype without size"); + lj_assertCTS(!ctype_isvoid(d->info), "store to void type"); if (((d->info|qual) & CTF_CONST)) { err_const: diff --git a/source/libs/luajit/LuaJIT-src/src/lj_cdata.h b/source/libs/luajit/LuaJIT-src/src/lj_cdata.h index 5bb0f5dca24ef3dee3152fe5bf070c0a92f180de..204c1f8b7b5d0acb07acd693a9071062cc4918f3 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_cdata.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_cdata.h @@ -1,6 +1,6 @@ /* ** C data management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CDATA_H @@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ return ((void *)(uintptr_t)*(uint32_t *)p); } else { - lua_assert(sz == CTSIZE_PTR); + lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); return *(void **)p; } } @@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v) if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ *(uint32_t *)p = (uint32_t)(uintptr_t)v; } else { - lua_assert(sz == CTSIZE_PTR); + lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); *(void **)p = (void *)v; } } @@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz) GCcdata *cd; #ifdef LUA_USE_ASSERT CType *ct = ctype_raw(cts, id); - lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz); + lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz, + "inconsistent size of fixed-size cdata alloc"); #endif cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); cd->gct = ~LJ_TCDATA; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_clib.c b/source/libs/luajit/LuaJIT-src/src/lj_clib.c index 614265903ac96b6c04c44c78f22931d55946d16e..218e9c086386e737030a00cc499b3c3f8883a00e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_clib.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_clib.c @@ -1,6 +1,6 @@ /* ** FFI C library loader. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -25,7 +25,7 @@ #include <dlfcn.h> #include <stdio.h> -#if defined(RTLD_DEFAULT) +#if defined(RTLD_DEFAULT) && !defined(NO_RTLD_DEFAULT) #define CLIB_DEFHANDLE RTLD_DEFAULT #elif LJ_TARGET_OSX || LJ_TARGET_BSD #define CLIB_DEFHANDLE ((void *)(intptr_t)-2) @@ -119,12 +119,13 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) RTLD_LAZY | (global?RTLD_GLOBAL:RTLD_LOCAL)); if (!h) { const char *e, *err = dlerror(); - if (*err == '/' && (e = strchr(err, ':')) && + if (err && *err == '/' && (e = strchr(err, ':')) && (name = clib_resolve_lds(L, strdata(lj_str_new(L, err, e-err))))) { h = dlopen(name, RTLD_LAZY | (global?RTLD_GLOBAL:RTLD_LOCAL)); if (h) return h; err = dlerror(); } + if (!err) err = "dlopen failed"; lj_err_callermsg(L, err); } return h; @@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); /* Default libraries. */ enum { CLIB_HANDLE_EXE, +#if !LJ_TARGET_UWP CLIB_HANDLE_DLL, CLIB_HANDLE_CRT, CLIB_HANDLE_KERNEL32, CLIB_HANDLE_USER32, CLIB_HANDLE_GDI32, +#endif CLIB_HANDLE_MAX }; @@ -208,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name) static void *clib_loadlib(lua_State *L, const char *name, int global) { DWORD oldwerr = GetLastError(); - void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0); + void *h = LJ_WIN_LOADLIBA(clib_extname(L, name)); if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); SetLastError(oldwerr); UNUSED(global); @@ -218,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) static void clib_unloadlib(CLibrary *cl) { if (cl->handle == CLIB_DEFHANDLE) { +#if !LJ_TARGET_UWP MSize i; for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { void *h = clib_def_handle[i]; @@ -226,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl) FreeLibrary((HINSTANCE)h); } } +#endif } else if (cl->handle) { FreeLibrary((HINSTANCE)cl->handle); } } +#if LJ_TARGET_UWP +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#endif + static void *clib_getsym(CLibrary *cl, const char *name) { void *p = NULL; @@ -239,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) for (i = 0; i < CLIB_HANDLE_MAX; i++) { HINSTANCE h = (HINSTANCE)clib_def_handle[i]; if (!(void *)h) { /* Resolve default library handles (once). */ +#if LJ_TARGET_UWP + h = (HINSTANCE)&__ImageBase; +#else switch (i) { case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; case CLIB_HANDLE_DLL: @@ -249,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name) GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (const char *)&_fmode, &h); break; - case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0); break; - case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break; - case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break; + case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break; + case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break; + case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break; } if (!h) continue; +#endif clib_def_handle[i] = (void *)h; } p = (void *)GetProcAddress(h, name); @@ -337,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); if (ctype_isconstval(ct->info)) { CType *ctt = ctype_child(cts, ct); - lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); + lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, + "only 32 bit const supported"); /* NYI */ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) setnumV(tv, (lua_Number)(uint32_t)ct->size); else @@ -349,7 +363,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) #endif void *p = clib_getsym(cl, sym); GCcdata *cd; - lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); + lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info), + "unexpected ctype %08x in clib", ct->info); #if LJ_TARGET_X86 && LJ_ABI_WIN /* Retry with decorated name for fastcall/stdcall functions. */ if (!p && ctype_isfunc(ct->info)) { @@ -372,6 +387,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) cd = lj_cdata_new(cts, id, CTSIZE_PTR); *(void **)cdataptr(cd) = p; setcdataV(L, tv, cd); + lj_gc_anybarriert(L, cl->cache); } } return tv; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_clib.h b/source/libs/luajit/LuaJIT-src/src/lj_clib.h index fcc9dac5920adb657a460c06eaada77664235749..e48d0e8b267352c60968f78f7006f5014bedc5d5 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_clib.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_clib.h @@ -1,6 +1,6 @@ /* ** FFI C library loader. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CLIB_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_cparse.c b/source/libs/luajit/LuaJIT-src/src/lj_cparse.c index 83cfd1128b399d2be977a28e82e4ef24515dca14..0668466dfb28317a7b450a8923578f6de000cc71 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_cparse.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_cparse.c @@ -1,6 +1,6 @@ /* ** C declaration parser. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -28,6 +28,30 @@ ** If in doubt, please check the input against your favorite C compiler. */ +#ifdef LUA_USE_ASSERT +#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__)) +#else +#define lj_assertCP(c, ...) ((void)cp) +#endif + +/* -- Miscellaneous ------------------------------------------------------- */ + +/* Match string against a C literal. */ +#define cp_str_is(str, k) \ + ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1)) + +/* Check string against a linear list of matches. */ +int lj_cparse_case(GCstr *str, const char *match) +{ + MSize len; + int n; + for (n = 0; (len = (MSize)*match++); n++, match += len) { + if (str->len == len && !memcmp(match, strdata(str), len)) + return n; + } + return -1; +} + /* -- C lexer ------------------------------------------------------------- */ /* C lexer token names. */ @@ -43,7 +67,7 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em); static const char *cp_tok2str(CPState *cp, CPToken tok) { - lua_assert(tok < CTOK_FIRSTDECL); + lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok); if (tok > CTOK_OFS) return ctoknames[tok-CTOK_OFS-1]; else if (!lj_char_iscntrl(tok)) @@ -109,9 +133,9 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...) tokstr = NULL; } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || tok >= CTOK_FIRSTDECL) { - if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$'); + if (cp->sb.w == cp->sb.b) cp_save(cp, '$'); cp_save(cp, '\0'); - tokstr = sbufB(&cp->sb); + tokstr = cp->sb.b; } else { tokstr = cp_tok2str(cp, tok); } @@ -151,7 +175,8 @@ static CPToken cp_number(CPState *cp) TValue o; do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); cp_save(cp, '\0'); - fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C); + fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1, + &o, STRSCAN_OPT_C); if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; else if (!(cp->mode & CPARSE_MODE_SKIP)) @@ -254,7 +279,7 @@ static CPToken cp_string(CPState *cp) return CTOK_STRING; } else { if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\''); - cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb); + cp->val.i32 = (int32_t)(char)*cp->sb.b; cp->val.id = CTID_INT32; return CTOK_INTEGER; } @@ -373,7 +398,7 @@ static void cp_init(CPState *cp) cp->curpack = 0; cp->packstack[0] = 255; lj_buf_init(cp->L, &cp->sb); - lua_assert(cp->p != NULL); + lj_assertCP(cp->p != NULL, "uninitialized cp->p"); cp_get(cp); /* Read-ahead first char. */ cp->tok = 0; cp->tmask = CPNS_DEFAULT; @@ -443,7 +468,7 @@ static void cp_expr_sizeof(CPState *cp, CPValue *k, int wantsz) } else { cp_expr_unary(cp, k); } - info = lj_ctype_info(cp->cts, k->id, &sz); + info = lj_ctype_info_raw(cp->cts, k->id, &sz); if (wantsz) { if (sz != CTSIZE_INVALID) k->u32 = sz; @@ -463,7 +488,7 @@ static void cp_expr_prefix(CPState *cp, CPValue *k) } else if (cp_opt(cp, '+')) { cp_expr_unary(cp, k); /* Nothing to do (well, integer promotion). */ } else if (cp_opt(cp, '-')) { - cp_expr_unary(cp, k); k->i32 = -k->i32; + cp_expr_unary(cp, k); k->i32 = (int32_t)(~(uint32_t)k->i32+1); } else if (cp_opt(cp, '~')) { cp_expr_unary(cp, k); k->i32 = ~k->i32; } else if (cp_opt(cp, '!')) { @@ -576,28 +601,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) k->id = k2.id > k3.id ? k2.id : k3.id; continue; } + /* fallthrough */ case 1: if (cp_opt(cp, CTOK_OROR)) { cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32; continue; } + /* fallthrough */ case 2: if (cp_opt(cp, CTOK_ANDAND)) { cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id = CTID_INT32; continue; } + /* fallthrough */ case 3: if (cp_opt(cp, '|')) { cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result; } + /* fallthrough */ case 4: if (cp_opt(cp, '^')) { cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result; } + /* fallthrough */ case 5: if (cp_opt(cp, '&')) { cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result; } + /* fallthrough */ case 6: if (cp_opt(cp, CTOK_EQ)) { cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32; @@ -606,6 +637,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32; continue; } + /* fallthrough */ case 7: if (cp_opt(cp, '<')) { cp_expr_sub(cp, &k2, 8); @@ -640,6 +672,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) k->id = CTID_INT32; continue; } + /* fallthrough */ case 8: if (cp_opt(cp, CTOK_SHL)) { cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32; @@ -652,6 +685,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) k->u32 = k->u32 >> k2.u32; continue; } + /* fallthrough */ case 9: if (cp_opt(cp, '+')) { cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32; @@ -661,6 +695,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) } else if (cp_opt(cp, '-')) { cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result; } + /* fallthrough */ case 10: if (cp_opt(cp, '*')) { cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result; @@ -824,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* The cid is already part of info for copies of pointers/functions. */ idx = ct->next; if (ctype_istypedef(info)) { - lua_assert(id == 0); + lj_assertCP(id == 0, "typedef not at toplevel"); id = ctype_cid(info); /* Always refetch info/size, since struct/enum may have been completed. */ cinfo = ctype_get(cp->cts, id)->info; csize = ctype_get(cp->cts, id)->size; - lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo)); + lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo), + "typedef of bad type"); } else if (ctype_isfunc(info)) { /* Intern function. */ CType *fct; CTypeID fid; @@ -862,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* Inherit csize/cinfo from original type. */ } else { if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ - lua_assert(id == 0); + lj_assertCP(id == 0, "number not at toplevel"); if (!(info & CTF_BOOL)) { CTSize msize = ctype_msizeP(decl->attr); CTSize vsize = ctype_vsizeP(decl->attr); @@ -917,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); info |= (cinfo & CTF_QUAL); /* Inherit qual. */ } else { - lua_assert(ctype_isvoid(info)); + lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info); } csize = size; cinfo = info+id; @@ -929,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* -- C declaration parser ------------------------------------------------ */ -#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) - /* Reset declaration state to declaration specifier. */ static void cp_decl_reset(CPDecl *decl) { @@ -1059,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) if (cp->tok == CTOK_IDENT) { GCstr *attrstr = cp->str; cp_next(cp); - switch (attrstr->hash) { - case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ + switch (lj_cparse_case(attrstr, + "\007aligned" "\013__aligned__" + "\006packed" "\012__packed__" + "\004mode" "\010__mode__" + "\013vector_size" "\017__vector_size__" +#if LJ_TARGET_X86 + "\007regparm" "\013__regparm__" + "\005cdecl" "\011__cdecl__" + "\010thiscall" "\014__thiscall__" + "\010fastcall" "\014__fastcall__" + "\007stdcall" "\013__stdcall__" + "\012sseregparm" "\016__sseregparm__" +#endif + )) { + case 0: case 1: /* aligned */ cp_decl_align(cp, decl); break; - case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ + case 2: case 3: /* packed */ decl->attr |= CTFP_PACKED; break; - case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ + case 4: case 5: /* mode */ cp_decl_mode(cp, decl); break; - case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ + case 6: case 7: /* vector_size */ { CTSize vsize = cp_decl_sizeattr(cp); if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); } break; #if LJ_TARGET_X86 - case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ + case 8: case 9: /* regparm */ CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); decl->fattr |= CTFP_CCONV; break; - case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ + case 10: case 11: /* cdecl */ CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); decl->fattr |= CTFP_CCONV; break; - case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ + case 12: case 13: /* thiscall */ CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); decl->fattr |= CTFP_CCONV; break; - case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ + case 14: case 15: /* fastcall */ CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); decl->fattr |= CTFP_CCONV; break; - case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ + case 16: case 17: /* stdcall */ CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); decl->fattr |= CTFP_CCONV; break; - case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ + case 18: case 19: /* sseregparm */ decl->fattr |= CTF_SSEREGPARM; decl->fattr |= CTFP_CCONV; break; @@ -1128,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl) while (cp->tok == CTOK_IDENT) { GCstr *attrstr = cp->str; cp_next(cp); - switch (attrstr->hash) { - case H_(bc2395fa,98f267f8): /* align */ + if (cp_str_is(attrstr, "align")) { cp_decl_align(cp, decl); - break; - default: /* Ignore all other attributes. */ + } else { /* Ignore all other attributes. */ if (cp_opt(cp, '(')) { while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); cp_check(cp, ')'); } - break; } } cp_check(cp, ')'); @@ -1548,7 +1592,7 @@ end_decl: cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); sz = sizeof(int); } - lua_assert(sz != 0); + lj_assertCP(sz != 0, "basic ctype with zero size"); info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ cp_push(decl, info, sz); @@ -1717,17 +1761,18 @@ static CTypeID cp_decl_abstract(CPState *cp) static void cp_pragma(CPState *cp, BCLine pragmaline) { cp_next(cp); - if (cp->tok == CTOK_IDENT && - cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */ + if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) { cp_next(cp); cp_check(cp, '('); if (cp->tok == CTOK_IDENT) { - if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ - if (cp->curpack < CPARSE_MAX_PACKSTACK) { + if (cp_str_is(cp->str, "push")) { + if (cp->curpack < CPARSE_MAX_PACKSTACK-1) { cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; cp->curpack++; + } else { + cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS); } - } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ + } else if (cp_str_is(cp->str, "pop")) { if (cp->curpack > 0) cp->curpack--; } else { cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); @@ -1776,13 +1821,11 @@ static void cp_decl_multi(CPState *cp) if (tok == CTOK_INTEGER) { cp_line(cp, hashline); continue; - } else if (tok == CTOK_IDENT && - cp->str->hash == H_(187aab88,fcb60b42)) { /* line */ + } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) { if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok); cp_line(cp, hashline); continue; - } else if (tok == CTOK_IDENT && - cp->str->hash == H_(f5e6b4f8,1d509107)) { /* pragma */ + } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) { cp_pragma(cp, hashline); continue; } else { @@ -1811,7 +1854,7 @@ static void cp_decl_multi(CPState *cp) /* Treat both static and extern function declarations as extern. */ ct = ctype_get(cp->cts, ctypeid); /* We always get new anonymous functions (typedefs are copied). */ - lua_assert(gcref(ct->name) == NULL); + lj_assertCP(gcref(ct->name) == NULL, "unexpected named function"); id = ctypeid; /* Just name it. */ } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ id = cp_decl_constinit(cp, &ct, ctypeid); @@ -1853,8 +1896,6 @@ static void cp_decl_single(CPState *cp) if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); } -#undef H_ - /* ------------------------------------------------------------------------ */ /* Protected callback for C parser. */ @@ -1870,7 +1911,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud) cp_decl_single(cp); if (cp->param && cp->param != cp->L->top) cp_err(cp, LJ_ERR_FFI_NUMPARAM); - lua_assert(cp->depth == 0); + lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth"); return NULL; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_cparse.h b/source/libs/luajit/LuaJIT-src/src/lj_cparse.h index bad1060bbc9b312f7a3cd74c58d6029c651ceeb1..63e8851e81f8d5671672851cde1ef4b8df2d1759 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_cparse.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_cparse.h @@ -1,6 +1,6 @@ /* ** C declaration parser. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CPARSE_H @@ -60,6 +60,8 @@ typedef struct CPState { LJ_FUNC int lj_cparse(CPState *cp); +LJ_FUNC int lj_cparse_case(GCstr *str, const char *match); + #endif #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_crecord.c b/source/libs/luajit/LuaJIT-src/src/lj_crecord.c index e32ae23e187dfb47bd4dfd4f4cf83ac7f37a2a6b..f88cddfd95c2517a71650bc3ca9aab289b5c16a2 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_crecord.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_crecord.c @@ -1,6 +1,6 @@ /* ** Trace recorder for C data operations. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ffrecord_c @@ -32,6 +32,7 @@ #include "lj_crecord.h" #include "lj_dispatch.h" #include "lj_strfmt.h" +#include "lj_strscan.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -61,7 +62,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o) static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) { CTypeID id; - lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID); + lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID, + "expected CTypeID cdata"); id = *(CTypeID *)cdataptr(cd); tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); @@ -77,7 +79,7 @@ static CTypeID argv2ctype(jit_State *J, TRef tr, cTValue *o) /* Specialize to the string containing the C type declaration. */ emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, s)); cp.L = J->L; - cp.cts = ctype_ctsG(J2G(J)); + cp.cts = ctype_cts(J->L); oldtop = cp.cts->top; cp.srcname = strdata(s); cp.p = strdata(s); @@ -212,7 +214,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); ml[i].trofs = trofs; i++; - rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; + rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ rwin = 0; for ( ; j < i; j++) { @@ -237,13 +239,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen, if (len > CREC_COPY_MAXLEN) goto fallback; if (ct) { CTState *cts = ctype_ctsG(J2G(J)); - lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info)); + lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info), + "copy of non-aggregate"); if (ctype_isarray(ct->info)) { CType *cct = ctype_rawchild(cts, ct); tp = crec_ct2irt(cts, cct); if (tp == IRT_CDATA) goto rawcopy; step = lj_ir_type_size[tp]; - lua_assert((len & (step-1)) == 0); + lj_assertJ((len & (step-1)) == 0, "copy of fractional size"); } else if ((ct->info & CTF_UNION)) { step = (1u << ctype_align(ct->info)); goto rawcopy; @@ -614,10 +617,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) sp = lj_ir_kptr(J, NULL); } else if (tref_isudata(sp)) { GCudata *ud = udataV(sval); - if (ud->udtype == UDTYPE_IO_FILE) { + if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) { TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); - emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); - sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE); + emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype)); + sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, + ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE : + IRFL_SBUF_R); } else { sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); } @@ -629,7 +634,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) /* Specialize to the name of the enum constant. */ emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); if (cct && ctype_isconstval(cct->info)) { - lua_assert(ctype_child(cts, cct)->size == 4); + lj_assertJ(ctype_child(cts, cct)->size == 4, + "only 32 bit const supported"); /* NYI */ svisnz = (void *)(intptr_t)(ofs != 0); sp = lj_ir_kint(J, (int32_t)ofs); sid = ctype_cid(cct->info); @@ -643,8 +649,7 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) } } else if (tref_islightud(sp)) { #if LJ_64 - sp = emitir(IRT(IR_BAND, IRT_P64), sp, - lj_ir_kint64(J, U64x(00007fff,ffffffff))); + lj_trace_err(J, LJ_TRERR_NYICONV); #endif } else { /* NYI: tref_istab(sp). */ IRType t; @@ -757,7 +762,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; - lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */ + lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */ if (rd->data == 0) { /* __index metamethod. */ if ((info & CTF_BOOL)) { tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); @@ -769,7 +774,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); } else { - lua_assert(bsz < 32); /* Full-size fields cannot end up here. */ + lj_assertJ(bsz < 32, "unexpected full bitfield index"); tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); /* We can omit the U32 to NUM conversion, since bsz < 32. */ @@ -884,7 +889,7 @@ again: crec_index_bf(J, rd, ptr, fct->info); return; } else { - lua_assert(ctype_isfield(fct->info)); + lj_assertJ(ctype_isfield(fct->info), "field expected"); sid = ctype_cid(fct->info); } } @@ -1022,8 +1027,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) crec_ct_tv(J, dc, dp, sp, sval); } } else if (ctype_isstruct(d->info)) { - CTypeID fid = d->sib; + CTypeID fid; MSize i = 1; + if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */ + fid = d->sib; + while (fid) { + CType *df = ctype_get(cts, fid); + fid = df->sib; + if (ctype_isfield(df->info)) { + CType *dc; + if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ + dc = ctype_rawchild(cts, df); /* Field type. */ + if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) || + ctype_isenum(dc->info))) + goto special; + } else if (!ctype_isconstval(df->info)) { + goto special; + } + } + } + fid = d->sib; while (fid) { CType *df = ctype_get(cts, fid); fid = df->sib; @@ -1048,6 +1071,11 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, df->size + sizeof(GCcdata))); crec_ct_tv(J, dc, dp, sp, sval); + if ((d->info & CTF_UNION)) { + if (d->size != dc->size) /* NYI: partial init of union. */ + lj_trace_err(J, LJ_TRERR_NYICONV); + break; + } } else if (!ctype_isconstval(df->info)) { /* NYI: init bitfields and sub-structures. */ lj_trace_err(J, LJ_TRERR_NYICONV); @@ -1091,6 +1119,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, ngpr = 1; else if (ctype_cconv(ct->info) == CTCC_FASTCALL) ngpr = 2; +#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX + int ngpr = CCALL_NARG_GPR; #endif /* Skip initial attributes. */ @@ -1111,11 +1141,19 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); fid = ctf->sib; - lua_assert(ctype_isfield(ctf->info)); + lj_assertJ(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { if (!(ct->info & CTF_VARARG)) lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */ +#if LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (ngpr >= 0) { + ngpr = -1; + args[n++] = TREF_NIL; /* Marker for start of varargs. */ + if (n >= CCI_NARGS_MAX) + lj_trace_err(J, LJ_TRERR_NYICALL); + } +#endif did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ } d = ctype_raw(cts, did); @@ -1124,13 +1162,22 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, lj_trace_err(J, LJ_TRERR_NYICALL); tr = crec_ct_tv(J, d, 0, *base, o); if (ctype_isinteger_or_bool(d->info)) { +#if LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (!ngpr) { + /* Fixed args passed on the stack use their unpromoted size. */ + if (d->size != lj_ir_type_size[tref_type(tr)]) { + lj_assertJ(d->size == 1 || d->size==2, "unexpected size %d", d->size); + tr = emitconv(tr, d->size==1 ? IRT_U8 : IRT_U16, tref_type(tr), 0); + } + } else +#endif if (d->size < 4) { if ((d->info & CTF_UNSIGNED)) tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0); else tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); } - } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { + } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { lj_needsplit(J); } #if LJ_TARGET_X86 @@ -1161,6 +1208,10 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, } } #endif +#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX + if (!ctype_isfp(d->info) && ngpr) { + ngpr--; + } #endif args[n] = tr; } @@ -1209,8 +1260,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) TRef tr; TValue tv; /* Check for blacklisted C functions that might call a callback. */ - setlightudV(&tv, - cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4)); + tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000); if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) lj_trace_err(J, LJ_TRERR_BLACKL); if (ctype_isvoid(ctr->info)) { @@ -1449,7 +1499,8 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) { - CTState *cts = ctype_ctsG(J2G(J)); + CTState *cts = ctype_cts(J->L); + MMS mm = (MMS)rd->data; TRef sp[2]; CType *s[2]; MSize i; @@ -1478,9 +1529,13 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); goto ok; } else if (ctype_isfunc(ct->info)) { + CTypeID id0 = i ? ctype_typeid(cts, s[0]) : 0; tr = emitir(IRT(IR_FLOAD, IRT_PTR), tr, IRFL_CDATA_PTR); ct = ctype_get(cts, lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR)); + if (i) { + s[0] = ctype_get(cts, id0); /* cts->tab may have been reallocated. */ + } goto ok; } else { tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata))); @@ -1495,6 +1550,8 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) } } } else if (tref_isnil(tr)) { + if (!(mm == MM_len || mm == MM_eq || mm == MM_lt || mm == MM_le)) + lj_trace_err(J, LJ_TRERR_BADTYPE); tr = lj_ir_kptr(J, NULL); ct = ctype_get(cts, CTID_P_VOID); } else if (tref_isinteger(tr)) { @@ -1513,12 +1570,12 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) ct = ctype_child(cts, cct); tr = lj_ir_kint(J, (int32_t)ofs); } else { /* Interpreter will throw or return false. */ - ct = ctype_get(cts, CTID_P_VOID); + lj_trace_err(J, LJ_TRERR_BADTYPE); } } else if (ctype_isptr(ct->info)) { tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCstr))); } else { - ct = ctype_get(cts, CTID_P_VOID); + lj_trace_err(J, LJ_TRERR_BADTYPE); } } else if (!tref_isnum(tr)) { tr = 0; @@ -1530,8 +1587,9 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) } { TRef tr; - if (!(tr = crec_arith_int64(J, sp, s, (MMS)rd->data)) && - !(tr = crec_arith_ptr(J, sp, s, (MMS)rd->data)) && + if ((mm == MM_len || mm == MM_concat || + (!(tr = crec_arith_int64(J, sp, s, mm)) && + !(tr = crec_arith_ptr(J, sp, s, mm)))) && !(tr = crec_arith_meta(J, sp, s, cts, rd))) return; J->base[0] = tr; @@ -1750,11 +1808,21 @@ static CTypeID crec_bit64_type(CTState *cts, cTValue *tv) return 0; /* Use regular 32 bit ops. */ } +static TRef crec_bit64_arg(jit_State *J, CType *d, TRef sp, TValue *sval) +{ + if (LJ_UNLIKELY(tref_isstr(sp))) { + if (lj_strscan_num(strV(sval), sval)) { + sp = emitir(IRTG(IR_STRTO, IRT_NUM), sp, 0); + } /* else: interpreter will throw. */ + } + return crec_ct_tv(J, d, 0, sp, sval); +} + void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); - TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, - J->base[0], &rd->argv[0]); + TRef tr = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), + J->base[0], &rd->argv[0]); if (!tref_isinteger(tr)) tr = emitconv(tr, IRT_INT, tref_type(tr), 0); J->base[0] = tr; @@ -1765,7 +1833,7 @@ int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd) CTState *cts = ctype_ctsG(J2G(J)); CTypeID id = crec_bit64_type(cts, &rd->argv[0]); if (id) { - TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0); J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); return 1; @@ -1785,9 +1853,9 @@ int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd) if (id) { CType *ct = ctype_get(cts, id); uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64); - TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]); + TRef tr = crec_bit64_arg(J, ct, J->base[0], &rd->argv[0]); for (i = 1; J->base[i] != 0; i++) { - TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]); + TRef tr2 = crec_bit64_arg(J, ct, J->base[i], &rd->argv[i]); tr = emitir(ot, tr, tr2); } J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); @@ -1802,15 +1870,15 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) CTypeID id; TRef tsh = 0; if (J->base[0] && tref_iscdata(J->base[1])) { - tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, - J->base[1], &rd->argv[1]); + tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), + J->base[1], &rd->argv[1]); if (!tref_isinteger(tsh)) tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); J->base[1] = tsh; } id = crec_bit64_type(cts, &rd->argv[0]); if (id) { - TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); uint32_t op = rd->data; if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && @@ -1840,17 +1908,18 @@ TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr) CTypeID id2 = 0; n = (int32_t)lj_carith_check64(J->L, 2, &id2); if (id2) - trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]); + trsf = crec_bit64_arg(J, ctype_get(cts, CTID_INT32), trsf, &rd->argv[1]); else trsf = lj_opt_narrow_tobit(J, trsf); emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */ } else { n = id ? 16 : 8; } - if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } + if (n < 0) { n = (int32_t)(~n+1u); sf |= STRFMT_F_UPPER; } + if ((uint32_t)n > 254) n = 254; sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); if (id) { - tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); if (n < 16) tr = emitir(IRT(IR_BAND, IRT_U64), tr, lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1)); @@ -1879,10 +1948,36 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) d = ctype_get(cts, CTID_DOUBLE); J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]); } else { + /* Specialize to the ctype that couldn't be converted. */ + argv2cdata(J, J->base[0], &rd->argv[0]); J->base[0] = TREF_NIL; } } +TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o) +{ + CTypeID id = argv2cdata(J, tr, o)->ctypeid; + if (!(id == CTID_INT64 || id == CTID_UINT64)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + lj_needsplit(J); + return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr, + IRFL_CDATA_INT64); +} + +#if LJ_HASBUFFER +TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o) +{ + CTState *cts = ctype_ctsG(J2G(J)); + if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE); + return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o); +} + +TRef lj_crecord_topuint8(jit_State *J, TRef tr) +{ + return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr); +} +#endif + #undef IR #undef emitir #undef emitconv diff --git a/source/libs/luajit/LuaJIT-src/src/lj_crecord.h b/source/libs/luajit/LuaJIT-src/src/lj_crecord.h index c165def47551f9093aa64b82cbaa26e5d5cb4649..898365dbead503ecf929c54bbc107fa8427c8ec0 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_crecord.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_crecord.h @@ -1,6 +1,6 @@ /* ** Trace recorder for C data operations. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CRECORD_H @@ -33,6 +33,11 @@ LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr); LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); +LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o); +#if LJ_HASBUFFER +LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o); +LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr); +#endif #endif #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ctype.c b/source/libs/luajit/LuaJIT-src/src/lj_ctype.c index 0ea89c7486a2539e05fb1a0231a9141bc44b20c2..19eecd64dc067ed54f8c8413539f33b9662c000f 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ctype.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_ctype.c @@ -1,6 +1,6 @@ /* ** C type management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include "lj_obj.h" @@ -153,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp) { CTypeID id = cts->top; CType *ct; - lua_assert(cts->L); + lj_assertCTS(cts->L, "uninitialized cts->L"); if (LJ_UNLIKELY(id >= cts->sizetab)) { if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); #ifdef LUAJIT_CTYPE_CHECK_ANCHOR @@ -182,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size) { uint32_t h = ct_hashtype(info, size); CTypeID id = cts->hash[h]; - lua_assert(cts->L); + lj_assertCTS(cts->L, "uninitialized cts->L"); while (id) { CType *ct = ctype_get(cts, id); if (ct->info == info && ct->size == size) @@ -191,8 +191,20 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size) } id = cts->top; if (LJ_UNLIKELY(id >= cts->sizetab)) { +#ifdef LUAJIT_CTYPE_CHECK_ANCHOR + CType *ct; +#endif if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); +#ifdef LUAJIT_CTYPE_CHECK_ANCHOR + ct = lj_mem_newvec(cts->L, id+1, CType); + memcpy(ct, cts->tab, id*sizeof(CType)); + memset(cts->tab, 0, id*sizeof(CType)); + lj_mem_freevec(cts->g, cts->tab, cts->sizetab, CType); + cts->tab = ct; + cts->sizetab = id+1; +#else lj_mem_growvec(cts->L, cts->tab, cts->sizetab, CTID_MAX, CType); +#endif } cts->top = id+1; cts->tab[id].info = info; @@ -298,9 +310,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem) } ct = ctype_raw(cts, arrid); } - lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */ + lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected"); ct = ctype_rawchild(cts, ct); /* Get array element. */ - lua_assert(ctype_hassize(ct->info)); + lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size"); /* Calculate actual size of VLA and check for overflow. */ xsz += (uint64_t)ct->size * nelem; return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; @@ -323,7 +335,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp) } else { if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); - lua_assert(ctype_hassize(info) || ctype_isfunc(info)); + lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info), + "ctype without size"); *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; break; } @@ -332,6 +345,14 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp) return qual; } +/* Ditto, but follow a reference. */ +CTInfo lj_ctype_info_raw(CTState *cts, CTypeID id, CTSize *szp) +{ + CType *ct = ctype_get(cts, id); + if (ctype_isref(ct->info)) id = ctype_cid(ct->info); + return lj_ctype_info(cts, id, szp); +} + /* Get ctype metamethod. */ cTValue *lj_ctype_meta(CTState *cts, CTypeID id, MMS mm) { @@ -528,7 +549,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) ctype_appc(ctr, ')'); break; default: - lua_assert(0); + lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info); break; } ct = ctype_get(ctr->cts, ctype_cid(info)); @@ -561,7 +582,7 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned) if (isunsigned) { *--p = 'U'; } else if ((int64_t)n < 0) { - n = (uint64_t)-(int64_t)n; + n = ~n+1u; sign = 1; } do { *--p = (char)('0' + n % 10); } while (n /= 10); @@ -582,7 +603,7 @@ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) lj_strfmt_putfnum(sb, STRFMT_G14, re.n); if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+'); lj_strfmt_putfnum(sb, STRFMT_G14, im.n); - lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i'); + lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i'); return lj_buf_str(L, sb); } @@ -622,6 +643,18 @@ CTState *lj_ctype_init(lua_State *L) return cts; } +/* Create special weak-keyed finalizer table. */ +void lj_ctype_initfin(lua_State *L) +{ + /* NOBARRIER: The table is new (marked white). */ + GCtab *t = lj_tab_new(L, 0, 1); + setgcref(t->metatable, obj2gco(t)); + setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")), + lj_str_newlit(L, "k")); + t->nomm = (uint8_t)(~(1u<<MM_mode)); + setgcref(G(L)->gcroot[GCROOT_FFI_FIN], obj2gco(t)); +} + /* Free C type table and state. */ void lj_ctype_freestate(global_State *g) { diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ctype.h b/source/libs/luajit/LuaJIT-src/src/lj_ctype.h index 0c220a8886684bdc1f2428972d794850260c8731..8b7160dd9a4628829ec8917b63e65fc190005e31 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ctype.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_ctype.h @@ -1,6 +1,6 @@ /* ** C type management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_CTYPE_H @@ -177,7 +177,6 @@ typedef struct CTState { MSize sizetab; /* Size of C type table. */ lua_State *L; /* Lua state (needed for errors and allocations). */ global_State *g; /* Global state. */ - GCtab *finalizer; /* Map of cdata to finalizer. */ GCtab *miscmap; /* Map of -CTypeID to metatable and cb slot to func. */ CCallback cb; /* Temporary callback state. */ CTypeID1 hash[CTHASH_SIZE]; /* Hash anchors for C type table. */ @@ -260,6 +259,12 @@ typedef struct CTState { #define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ +#ifdef LUA_USE_ASSERT +#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__)) +#else +#define lj_assertCTS(c, ...) ((void)cts) +#endif + /* -- Predefined types ---------------------------------------------------- */ /* Target-dependent types. */ @@ -270,6 +275,8 @@ typedef struct CTState { #define CTTYDEFP(_) #endif +#define CTF_LONG_IF8 (CTF_LONG * (sizeof(long) == 8)) + /* Common types. */ #define CTTYDEF(_) \ _(NONE, 0, CT_ATTRIB, CTATTRIB(CTA_BAD)) \ @@ -283,8 +290,8 @@ typedef struct CTState { _(UINT16, 2, CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \ _(INT32, 4, CT_NUM, CTALIGN(2)) \ _(UINT32, 4, CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \ - _(INT64, 8, CT_NUM, CTF_LONG|CTALIGN(3)) \ - _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG|CTALIGN(3)) \ + _(INT64, 8, CT_NUM, CTF_LONG_IF8|CTALIGN(3)) \ + _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG_IF8|CTALIGN(3)) \ _(FLOAT, 4, CT_NUM, CTF_FP|CTALIGN(2)) \ _(DOUBLE, 8, CT_NUM, CTF_FP|CTALIGN(3)) \ _(COMPLEX_FLOAT, 8, CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \ @@ -292,6 +299,7 @@ typedef struct CTState { _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ + _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \ _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ CTTYDEFP(_) \ @@ -383,6 +391,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) return cts; } +/* Load FFI library on-demand. */ +#define ctype_loadffi(L) \ + do { \ + if (!ctype_ctsG(G(L))) { \ + ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \ + luaopen_ffi(L); \ + L->top = (TValue *)(mref(L->stack, char) + oldtop); \ + } \ + } while (0) + /* Save and restore state of C type table. */ #define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts) #define LJ_CTYPE_RESTORE(cts) \ @@ -392,7 +410,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) /* Check C type ID for validity when assertions are enabled. */ static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) { - lua_assert(id > 0 && id < cts->top); UNUSED(cts); + UNUSED(cts); + lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id); return id; } @@ -408,8 +427,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id) /* Get child C type. */ static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) { - lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || - ctype_isbitfield(ct->info))); /* These don't have children. */ + lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || + ctype_isbitfield(ct->info)), + "ctype %08x has no children", ct->info); return ctype_get(cts, ctype_cid(ct->info)); } @@ -449,11 +469,13 @@ LJ_FUNC CType *lj_ctype_rawref(CTState *cts, CTypeID id); LJ_FUNC CTSize lj_ctype_size(CTState *cts, CTypeID id); LJ_FUNC CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem); LJ_FUNC CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp); +LJ_FUNC CTInfo lj_ctype_info_raw(CTState *cts, CTypeID id, CTSize *szp); LJ_FUNC cTValue *lj_ctype_meta(CTState *cts, CTypeID id, MMS mm); LJ_FUNC GCstr *lj_ctype_repr(lua_State *L, CTypeID id, GCstr *name); LJ_FUNC GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned); LJ_FUNC GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size); LJ_FUNC CTState *lj_ctype_init(lua_State *L); +LJ_FUNC void lj_ctype_initfin(lua_State *L); LJ_FUNC void lj_ctype_freestate(global_State *g); #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_debug.c b/source/libs/luajit/LuaJIT-src/src/lj_debug.c index 959dc289c7c696b0a1b8b7c4a08d75e424a14b21..b3d52afc8f183867e90812043c7723f74d944454 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_debug.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_debug.c @@ -1,6 +1,6 @@ /* ** Debugging and introspection. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_debug_c @@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) const BCIns *ins; GCproto *pt; BCPos pos; - lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); + lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD, + "function or frame expected"); if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ return NO_BCPOS; } else if (nextframe == NULL) { /* Lua function on top. */ @@ -63,6 +64,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) if (cf == NULL || (char *)cframe_pc(cf) == (char *)cframe_L(cf)) return NO_BCPOS; ins = cframe_pc(cf); /* Only happens during error/hook handling. */ + if (!ins) return NO_BCPOS; } else { if (frame_islua(nextframe)) { ins = frame_pc(nextframe); @@ -93,15 +95,19 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) } } ins = cframe_pc(cf); + if (!ins) return NO_BCPOS; } } pt = funcproto(fn); pos = proto_bcpos(pt, ins) - 1; #if LJ_HASJIT if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ - GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); - lua_assert(bc_isret(bc_op(ins[-1]))); - pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); + if (bc_isret(bc_op(ins[-1]))) { + GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); + pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); + } else { + pos = NO_BCPOS; /* Punt in case of stack overflow for stitched trace. */ + } } #endif return pos; @@ -133,7 +139,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) BCPos pc = debug_framepc(L, fn, nextframe); if (pc != NO_BCPOS) { GCproto *pt = funcproto(fn); - lua_assert(pc <= pt->sizebc); + lj_assertL(pc <= pt->sizebc, "PC out of range"); return lj_debug_line(pt, pc); } return -1; @@ -214,26 +220,29 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, const char *lj_debug_uvname(GCproto *pt, uint32_t idx) { const uint8_t *p = proto_uvinfo(pt); - lua_assert(idx < pt->sizeuv); + lj_assertX(idx < pt->sizeuv, "bad upvalue index"); if (!p) return ""; if (idx) while (*p++ || --idx) ; return (const char *)p; } /* Get name and value of upvalue. */ -const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp) +const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, GCobj **op) { if (tvisfunc(o)) { GCfunc *fn = funcV(o); if (isluafunc(fn)) { GCproto *pt = funcproto(fn); if (idx < pt->sizeuv) { - *tvp = uvval(&gcref(fn->l.uvptr[idx])->uv); + GCobj *uvo = gcref(fn->l.uvptr[idx]); + *tvp = uvval(&uvo->uv); + *op = uvo; return lj_debug_uvname(pt, idx); } } else { if (idx < fn->c.nupvalues) { *tvp = &fn->c.upvalue[idx]; + *op = obj2gco(fn); return ""; } } @@ -429,20 +438,21 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) GCfunc *fn; if (*what == '>') { TValue *func = L->top - 1; - api_check(L, tvisfunc(func)); + if (!tvisfunc(func)) return 0; fn = funcV(func); L->top--; what++; } else { uint32_t offset = (uint32_t)ar->i_ci & 0xffff; uint32_t size = (uint32_t)ar->i_ci >> 16; - lua_assert(offset != 0); + lj_assertL(offset != 0, "bad frame offset"); frame = tvref(L->stack) + offset; if (size) nextframe = frame + size; - lua_assert(frame <= tvref(L->maxstack) && - (!nextframe || nextframe <= tvref(L->maxstack))); + lj_assertL(frame <= tvref(L->maxstack) && + (!nextframe || nextframe <= tvref(L->maxstack)), + "broken frame chain"); fn = frame_func(frame); - lua_assert(fn->c.gct == ~LJ_TFUNC); + lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function"); } for (; *what; what++) { if (*what == 'S') { @@ -642,7 +652,7 @@ void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) level += dir; } if (lastlen) - setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */ + sb->w = sb->b + lastlen; /* Zap trailing separator. */ } #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_debug.h b/source/libs/luajit/LuaJIT-src/src/lj_debug.h index 5917c00bc6fe43331eeb47e3ec7ae884bcdad0e6..8b1b769ffa0b4c77c352fe40f2a5a5006a9d370c 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_debug.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_debug.h @@ -1,6 +1,6 @@ /* ** Debugging and introspection. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DEBUG_H @@ -29,7 +29,8 @@ typedef struct lj_Debug { LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size); LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc); LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); -LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); +LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, + GCobj **op); LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, BCReg slot, const char **name); LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, diff --git a/source/libs/luajit/LuaJIT-src/src/lj_def.h b/source/libs/luajit/LuaJIT-src/src/lj_def.h index 517921ddecf2684578bcaef62efea102489b321e..01ce4aa18a1e283074a02c5517d6b7cab778d335 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_def.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_def.h @@ -1,6 +1,6 @@ /* ** LuaJIT common internal definitions. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DEF_H @@ -8,8 +8,8 @@ #include "lua.h" -#if defined(_MSC_VER) -/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */ +#if defined(_MSC_VER) && (_MSC_VER < 1700) +/* Old MSVC is stuck in the last century and doesn't have C99's stdint.h. */ typedef __int8 int8_t; typedef __int16 int16_t; typedef __int32 int32_t; @@ -69,7 +69,7 @@ typedef unsigned int uintptr_t; #define LJ_MAX_UPVAL 249 /* Max. # of upvalues. */ #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ -#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ +#define LJ_STACK_EXTRA (5+3*LJ_FR2) /* Extra stack space (metamethods). */ #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ @@ -80,7 +80,6 @@ typedef unsigned int uintptr_t; #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ -#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ /* JIT compiler limits. */ #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ @@ -105,9 +104,10 @@ typedef unsigned int uintptr_t; #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) #define checki32(x) ((x) == (int32_t)(x)) #define checku32(x) ((x) == (uint32_t)(x)) +#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0) #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) -#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) +#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1) /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) @@ -120,7 +120,7 @@ typedef uintptr_t BloomFilter; #define bloomset(b, x) ((b) |= bloombit((x))) #define bloomtest(b, x) ((b) & bloombit((x))) -#if defined(__GNUC__) || defined(__psp2__) +#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__) #define LJ_NORET __attribute__((noreturn)) #define LJ_ALIGN(n) __attribute__((aligned(n))) @@ -146,15 +146,9 @@ typedef uintptr_t BloomFilter; #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) #define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) -/* Don't ask ... */ -#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__)) -static LJ_AINLINE uint32_t lj_fls(uint32_t x) -{ - uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r; -} -#else #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) -#endif +#define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x)) +#define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63)) #if defined(__arm__) static LJ_AINLINE uint32_t lj_bswap(uint32_t x) @@ -182,7 +176,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) { return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); } -#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__ static LJ_AINLINE uint32_t lj_bswap(uint32_t x) { return (uint32_t)__builtin_bswap32((int32_t)x); @@ -263,20 +257,37 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) return _CountLeadingZeros(x) ^ 31; } #else -unsigned char _BitScanForward(uint32_t *, unsigned long); -unsigned char _BitScanReverse(uint32_t *, unsigned long); +unsigned char _BitScanForward(unsigned long *, unsigned long); +unsigned char _BitScanReverse(unsigned long *, unsigned long); #pragma intrinsic(_BitScanForward) #pragma intrinsic(_BitScanReverse) static LJ_AINLINE uint32_t lj_ffs(uint32_t x) { - uint32_t r; _BitScanForward(&r, x); return r; + unsigned long r; _BitScanForward(&r, x); return (uint32_t)r; } static LJ_AINLINE uint32_t lj_fls(uint32_t x) { - uint32_t r; _BitScanReverse(&r, x); return r; + unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; +} + +#if defined(_M_X64) || defined(_M_ARM64) +unsigned char _BitScanForward64(unsigned long *, uint64_t); +unsigned char _BitScanReverse64(unsigned long *, uint64_t); +#pragma intrinsic(_BitScanForward64) +#pragma intrinsic(_BitScanReverse64) + +static LJ_AINLINE uint32_t lj_ffs64(uint64_t x) +{ + unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r; } + +static LJ_AINLINE uint32_t lj_fls64(uint64_t x) +{ + unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r; +} +#endif #endif unsigned long _byteswap_ulong(unsigned long); @@ -338,14 +349,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET #define LJ_ASMF_NORET LJ_ASMF LJ_NORET -/* Runtime assertions. */ -#ifdef lua_assert -#define check_exp(c, e) (lua_assert(c), (e)) -#define api_check(l, e) lua_assert(e) +/* Internal assertions. */ +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) +#define lj_assert_check(g, c, ...) \ + ((c) ? (void)0 : \ + (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0)) +#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) #else -#define lua_assert(c) ((void)0) +#define lj_checkapi(c, ...) ((void)L) +#endif + +#ifdef LUA_USE_ASSERT +#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__) +#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__) +#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) +#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__) +#define check_exp(c, e) (lj_assertX((c), #c), (e)) +#else +#define lj_assertG_(g, c, ...) ((void)0) +#define lj_assertG(c, ...) ((void)g) +#define lj_assertL(c, ...) ((void)L) +#define lj_assertX(c, ...) ((void)0) #define check_exp(c, e) (e) -#define api_check luai_apicheck #endif /* Static assertions. */ @@ -359,4 +384,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) #endif +/* PRNG state. Need this here, details in lj_prng.h. */ +typedef struct PRNGState { + uint64_t u[4]; +} PRNGState; + #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_dispatch.c b/source/libs/luajit/LuaJIT-src/src/lj_dispatch.c index 5d6795f88e338fdb9e8f3298673f7c3309d7989b..786083169a002ea10f66a6b55d407ecb8c98727a 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_dispatch.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_dispatch.c @@ -1,6 +1,6 @@ /* ** Instruction dispatch handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_dispatch_c @@ -68,6 +68,8 @@ void lj_dispatch_init(GG_State *GG) /* The JIT engine is off by default. luaopen_jit() turns it on. */ disp[BC_FORL] = disp[BC_IFORL]; disp[BC_ITERL] = disp[BC_IITERL]; + /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */ + disp[BC_ITERN] = &lj_vm_IITERN; disp[BC_LOOP] = disp[BC_ILOOP]; disp[BC_FUNCF] = disp[BC_IFUNCF]; disp[BC_FUNCV] = disp[BC_IFUNCV]; @@ -118,19 +120,21 @@ void lj_dispatch_update(global_State *g) mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; if (oldmode != mode) { /* Mode changed? */ ASMFunction *disp = G2GG(g)->dispatch; - ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv; + ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv; g->dispatchmode = mode; /* Hotcount if JIT is on, but not while recording. */ if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); + f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]); f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); } else { /* Otherwise use the non-hotcounting instructions. */ f_forl = disp[GG_LEN_DDISP+BC_IFORL]; f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; + f_itern = &lj_vm_IITERN; f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); @@ -138,6 +142,7 @@ void lj_dispatch_update(global_State *g) /* Init static counting instruction dispatch first (may be copied below). */ disp[GG_LEN_DDISP+BC_FORL] = f_forl; disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; + disp[GG_LEN_DDISP+BC_ITERN] = f_itern; disp[GG_LEN_DDISP+BC_LOOP] = f_loop; /* Set dynamic instruction dispatch. */ @@ -165,6 +170,7 @@ void lj_dispatch_update(global_State *g) /* Otherwise set dynamic counting ins. */ disp[BC_FORL] = f_forl; disp[BC_ITERL] = f_iterl; + disp[BC_ITERN] = f_itern; disp[BC_LOOP] = f_loop; /* Set dynamic return dispatch. */ if ((mode & DISPMODE_RET)) { @@ -252,15 +258,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) } else { if (!(mode & LUAJIT_MODE_ON)) G2J(g)->flags &= ~(uint32_t)JIT_F_ON; -#if LJ_TARGET_X86ORX64 - else if ((G2J(g)->flags & JIT_F_SSE2)) - G2J(g)->flags |= (uint32_t)JIT_F_ON; - else - return 0; /* Don't turn on JIT compiler without SSE2 support. */ -#else else G2J(g)->flags |= (uint32_t)JIT_F_ON; -#endif lj_dispatch_update(g); } break; @@ -302,15 +301,15 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) if (idx != 0) { cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx; if (tvislightud(tv)) - g->wrapf = (lua_CFunction)lightudV(tv); + g->wrapf = (lua_CFunction)lightudV(g, tv); else return 0; /* Failed. */ } else { return 0; /* Failed. */ } - g->bc_cfunc_ext = BCINS_AD(BC_FUNCCW, 0, 0); + setbc_op(&g->bc_cfunc_ext, BC_FUNCCW); } else { - g->bc_cfunc_ext = BCINS_AD(BC_FUNCC, 0, 0); + setbc_op(&g->bc_cfunc_ext, BC_FUNCC); } break; default: @@ -374,7 +373,7 @@ static void callhook(lua_State *L, int event, BCLine line) hook_enter(g); #endif hookf(L, &ar); - lua_assert(hook_active(g)); + lj_assertG(hook_active(g), "active hook flag removed"); setgcref(g->cur_L, obj2gco(L)); #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF lj_profile_hook_leave(g); @@ -422,7 +421,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) #endif J->L = L; lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after tracing of instruction"); } } #endif @@ -453,7 +453,7 @@ static int call_init(lua_State *L, GCfunc *fn) int numparams = pt->numparams; int gotparams = (int)(L->top - L->base); int need = pt->framesize; - if ((pt->flags & PROTO_VARARG)) need += 1+gotparams; + if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams; lj_state_checkstack(L, (MSize)need); numparams -= gotparams; return numparams >= 0 ? numparams : 0; @@ -482,7 +482,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #endif pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); lj_trace_hot(J, pc); - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after hot call"); goto out; } else if (J->state != LJ_TRACE_IDLE && !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { @@ -491,7 +492,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #endif /* Record the FUNC* bytecodes, too. */ lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after hot instruction"); } #endif if ((g->hookmask & LUA_MASKCALL)) { diff --git a/source/libs/luajit/LuaJIT-src/src/lj_dispatch.h b/source/libs/luajit/LuaJIT-src/src/lj_dispatch.h index 5bda51a21376137c178984ae25891f44b29a7297..b1ccff7b8e4a4dbc6adf0f4cb1ccb70a08e327ac 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_dispatch.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_dispatch.h @@ -1,6 +1,6 @@ /* ** Instruction dispatch handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_DISPATCH_H @@ -31,7 +31,7 @@ extern double __divdf3(double a, double b); #define SFGOTDEF(_) #endif #if LJ_HASJIT -#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) +#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) #else #define JITGOTDEF(_) #endif @@ -89,12 +89,20 @@ typedef uint16_t HotCount; typedef struct GG_State { lua_State L; /* Main thread. */ global_State g; /* Global state. */ +#if LJ_TARGET_ARM && !LJ_TARGET_NX + /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ + uint8_t align1[(16-sizeof(global_State))&15]; +#endif #if LJ_TARGET_MIPS ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ #endif #if LJ_HASJIT jit_State J; /* JIT state. */ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ +#if LJ_TARGET_ARM && !LJ_TARGET_NX + /* Ditto for J. */ + uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; +#endif #endif ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ diff --git a/source/libs/luajit/LuaJIT-src/src/lj_emit_arm.h b/source/libs/luajit/LuaJIT-src/src/lj_emit_arm.h index dee8bdccd1919573a4c7b5ec9a021f8e4dd44c11..c60e7d7560b16fe58994eeccc977c3c9c2bf46ca 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_emit_arm.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_emit_arm.h @@ -1,6 +1,6 @@ /* ** ARM instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* -- Constant encoding --------------------------------------------------- */ @@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm) static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -255 && ofs <= 255); + lj_assertA(ofs >= -255 && ofs <= 255, + "load/store offset %d out of range", ofs); if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | ((ofs & 0xf0) << 4) | (ofs & 0x0f); @@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -4095 && ofs <= 4095); + lj_assertA(ofs >= -4095 && ofs <= 4095, + "load/store offset %d out of range", ofs); /* Combine LDR/STR pairs to LDRD/STRD. */ if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && @@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) #if !LJ_SOFTFP static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); + lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0, + "load/store offset %d out of range", ofs); if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); } @@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); + lj_assertA(r != d, "dest reg not free"); if (emit_canremat(ref)) { int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); uint32_t k = emit_isk12(ARMI_ADD, delta); @@ -142,25 +145,25 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) } /* Try to find a two step delta relative to another constant. */ -static int emit_kdelta2(ASMState *as, Reg d, int32_t i) +static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); + lj_assertA(r != rd, "dest reg %d not free", rd); if (emit_canremat(ref)) { int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; if (other) { int32_t delta = i - other; uint32_t sh, inv = 0, k2, k; - if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; } + if (delta < 0) { delta = (int32_t)(~(uint32_t)delta+1u); inv = ARMI_ADD^ARMI_SUB; } sh = lj_ffs(delta) & ~1; k2 = emit_isk12(0, delta & (255 << sh)); k = emit_isk12(0, delta & ~(255 << sh)); if (k) { - emit_dn(as, ARMI_ADD^k2^inv, d, d); - emit_dn(as, ARMI_ADD^k^inv, d, r); + emit_dn(as, ARMI_ADD^k2^inv, rd, rd); + emit_dn(as, ARMI_ADD^k^inv, rd, r); return 1; } } @@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) } /* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) +static void emit_loadi(ASMState *as, Reg rd, int32_t i) { uint32_t k = emit_isk12(ARMI_MOV, i); - lua_assert(rset_test(as->freeset, r) || r == RID_TMP); + lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, + "dest reg %d not free", rd); if (k) { /* Standard K12 constant. */ - emit_d(as, ARMI_MOV^k, r); + emit_d(as, ARMI_MOV^k, rd); } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta1(as, r, i)) { + emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + } else if (emit_kdelta1(as, rd, i)) { /* One step delta relative to another constant. */ } else if ((as->flags & JIT_F_ARMV6T2)) { /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta2(as, r, i)) { + emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); + emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + } else if (emit_kdelta2(as, rd, i)) { /* Two step delta relative to another constant. */ } else { /* Otherwise construct the constant with up to 4 instructions. */ @@ -197,15 +201,15 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) int32_t m = i & (255 << sh); i &= ~(255 << sh); if (i == 0) { - emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); + emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); break; } - emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); + emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); } } } -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) +#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); @@ -261,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target) { MCode *p = as->mcp; ptrdiff_t delta = (target - p) - 1; - lua_assert(((delta + 0x00800000) >> 24) == 0); + lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range"); *--p = ai | ((uint32_t)delta & 0x00ffffffu); as->mcp = p; } @@ -289,7 +293,7 @@ static void emit_call(ASMState *as, void *target) static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (dst >= RID_MAX_GPR) { emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, @@ -313,7 +317,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (r >= RID_MAX_GPR) emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); @@ -326,7 +330,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (r >= RID_MAX_GPR) emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_emit_arm64.h b/source/libs/luajit/LuaJIT-src/src/lj_emit_arm64.h index 6da4c7d4b4ee67ec095a60ca06754b91b0263ea1..ca1269b7c3c83db16ae90d66e8bb52e6c26d2d78 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_emit_arm64.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_emit_arm64.h @@ -1,6 +1,6 @@ /* ** ARM64 instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. ** Sponsored by Cisco Systems, Inc. @@ -8,8 +8,9 @@ /* -- Constant encoding --------------------------------------------------- */ -static uint64_t get_k64val(IRIns *ir) +static uint64_t get_k64val(ASMState *as, IRRef ref) { + IRIns *ir = IR(ref); if (ir->o == IR_KINT64) { return ir_kint64(ir)->u64; } else if (ir->o == IR_KGC) { @@ -17,50 +18,43 @@ static uint64_t get_k64val(IRIns *ir) } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { return (uint64_t)ir_kptr(ir); } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); + return (uint32_t)ir->i; /* Zero-extended. */ } } /* Encode constant in K12 format for data processing instructions. */ static uint32_t emit_isk12(int64_t n) { - uint64_t k = (n < 0) ? -n : n; - uint32_t m = (n < 0) ? 0x40000000 : 0; + uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n; + uint32_t m = n < 0 ? 0x40000000 : 0; if (k < 0x1000) { - return A64I_K12|m|A64F_U12(k); + return (uint32_t)(A64I_K12|m|A64F_U12(k)); } else if ((k & 0xfff000) == k) { - return A64I_K12|m|0x400000|A64F_U12(k>>12); + return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12)); } return 0; } -#define emit_clz64(n) __builtin_clzll(n) -#define emit_ctz64(n) __builtin_ctzll(n) +#define emit_clz64(n) (lj_fls64(n)^63) +#define emit_ctz64(n) lj_ffs64(n) /* Encode constant in K13 format for logical data processing instructions. */ static uint32_t emit_isk13(uint64_t n, int is64) { - int inv = 0, w = 128, lz, tz; - if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ - if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ - do { /* Find the repeat width. */ - if (is64 && (uint32_t)(n^(n>>32))) break; - n = (uint32_t)n; - if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ - w = 32; if ((n^(n>>16)) & 0xffff) break; - n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; - n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; - n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; - n = n & 0x3; w = 2; - } while (0); - lz = emit_clz64(n); - tz = emit_ctz64(n); - if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ - if (inv) - return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); - else - return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); + /* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */ + int rot, ones, size, immr, imms; + if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n; + if ((n+1u) <= 1u) return 0; /* Neither all-zero nor all-ones are allowed. */ + rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64; + n = lj_ror(n, rot & 63); + ones = emit_ctz64(~n); + size = emit_clz64(n) + ones; + if (lj_ror(n, size & 63) != n) return 0; /* Non-repeating? */ + immr = -rot & (size - 1); + imms = (-(size << 1) | (ones - 1)) & 63; + return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms); } static uint32_t emit_isfpk64(uint64_t n) @@ -72,6 +66,17 @@ static uint32_t emit_isfpk64(uint64_t n) return ~0u; } +static uint32_t emit_isfpmovi(uint64_t n) +{ + /* Is every byte either 0x00 or 0xff? */ + if ((n & U64x(01010101,01010101)) * 0xff != n) return 0; + /* Form 8-bit value by taking one bit from each byte. */ + n &= U64x(80402010,08040201); + n = (n * U64x(01010101,01010101)) >> 56; + /* Split into the format expected by movi. */ + return ((n & 0xe0) << 6) | 0x700 | (n & 0x1f); +} + /* -- Emit basic instructions --------------------------------------------- */ static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) @@ -104,6 +109,11 @@ static void emit_d(ASMState *as, A64Ins ai, Reg rd) *--as->mcp = ai | A64F_D(rd); } +static void emit_dl(ASMState *as, A64Ins ai, Reg rd, uint32_t l) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_S19(l >> 2); +} + static void emit_n(ASMState *as, A64Ins ai, Reg rn) { *--as->mcp = ai | A64F_N(rn); @@ -119,10 +129,21 @@ static int emit_checkofs(A64Ins ai, int64_t ofs) } } -static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) +static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc) +{ + if (ofs >= 0) { + return ai | A64F_U12(ofs>>sc); /* Subsequent lj_ror checks ofs. */ + } else if (ofs >= -256) { + return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff); + } else { + return A64F_D(31); /* Will mismatch prev. */ + } +} + +static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64) { - int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; - lua_assert(ot); + int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64; + lj_assertA(ot, "load/store offset %d out of range", ofs); /* Combine LDR/STR pairs to LDP/STP. */ if ((sc == 2 || sc == 3) && (!(ai & 0x400000) || rd != rn) && @@ -130,18 +151,16 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) uint32_t prev = *as->mcp & ~A64F_D(31); int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); A64Ins aip; - if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { + if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) { aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); - } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { + } else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) { aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); ofsm = ofs; } else { goto nopair; } - if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) { - *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | + if (lj_ror((unsigned int)ofsm + (64u<<sc), sc) <= 127u) { + *as->mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); return; } @@ -156,28 +175,28 @@ nopair: /* -- Emit loads/stores --------------------------------------------------- */ /* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= ASMREF_L) +#define emit_canremat(ref) ((ref) <= REF_BASE) -/* Try to find an N-step delta relative to other consts with N < lim. */ -static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) +/* Try to find a one-step delta relative to other consts. */ +static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) { - RegSet work = ~as->freeset & RSET_GPR; - if (lim <= 1) return 0; /* Can't beat that. */ + RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != rd); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < REF_TRUE) { uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : - get_k64val(IR(ref)); + get_k64val(as, ref); int64_t delta = (int64_t)(k - kx); + if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */ if (delta == 0) { - emit_dm(as, A64I_MOVx, rd, r); + emit_dm(as, is64|A64I_MOVw, rd, r); return 1; } else { - uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); + uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta); if (k12) { - emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); + emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r); return 1; } /* Do other ops or multi-step deltas pay off? Probably not. @@ -190,77 +209,101 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) return 0; /* Failed. */ } -static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) +#define glofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) +#define checkmcpofs(as, k) \ + (A64F_S_OK(mcpofs(as, k)>>2, 19)) + +/* Try to form a const as ADR or ADRP or ADRP + ADD. */ +static int emit_kadrp(ASMState *as, Reg rd, uint64_t k) { - uint32_t k13 = emit_isk13(u64, is64); - if (k13) { /* Can the constant be represented as a bitmask immediate? */ - emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); - } else { - int i, zeros = 0, ones = 0, neg; - if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ - /* Count homogeneous 16 bit fragments. */ - for (i = 0; i < 4; i++) { - uint64_t frag = (u64 >> i*16) & 0xffff; - zeros += (frag == 0); - ones += (frag == 0xffff); + A64Ins ai = A64I_ADR; + int64_t ofs = mcpofs(as, k); + if (!A64F_S_OK((uint64_t)ofs, 21)) { + uint64_t kpage = k & ~0xfffull; + MCode *adrp = as->mcp - 1 - (k != kpage); + ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12; + if (!A64F_S_OK(ofs, 21)) + return 0; /* Failed. */ + if (k != kpage) + emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd); + ai = A64I_ADRP; + } + emit_dl(as, ai|(((uint32_t)ofs&3)<<29), rd, ofs); + return 1; +} + +static void emit_loadk(ASMState *as, Reg rd, uint64_t u64) +{ + int zeros = 0, ones = 0, neg, lshift = 0; + int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2; + /* Count non-homogeneous 16 bit fragments. */ + while (--i >= 0) { + uint32_t frag = (u64 >> i*16) & 0xffff; + zeros += (frag != 0); + ones += (frag != 0xffff); + } + neg = ones < zeros; /* Use MOVN if it pays off. */ + if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */ + uint32_t k13 = emit_isk13(u64, is64); + if (k13) { + emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); + return; } - neg = ones > zeros; /* Use MOVN if it pays off. */ - if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { - int shift = 0, lshift = 0; - uint64_t n64 = neg ? ~u64 : u64; - if (n64 != 0) { - /* Find first/last fragment to be filled. */ - shift = (63-emit_clz64(n64)) & ~15; - lshift = emit_ctz64(n64) & ~15; - } - /* MOVK requires the original value (u64). */ - while (shift > lshift) { - uint32_t u16 = (u64 >> shift) & 0xffff; - /* Skip fragments that are correctly filled by MOVN/MOVZ. */ - if (u16 != (neg ? 0xffff : 0)) - emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); - shift -= 16; - } - /* But MOVN needs an inverted value (n64). */ - emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | - A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); + if (emit_kdelta(as, rd, u64, is64)) { + return; + } + if (emit_kadrp(as, rd, u64)) { /* Either 1 or 2 ins. */ + return; + } + } + if (neg) { + u64 = ~u64; + if (!is64) u64 = (uint32_t)u64; + } + if (u64) { + /* Find first/last fragment to be filled. */ + int shift = (63-emit_clz64(u64)) & ~15; + lshift = emit_ctz64(u64) & ~15; + for (; shift > lshift; shift -= 16) { + uint32_t frag = (u64 >> shift) & 0xffff; + if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */ + if (neg) frag ^= 0xffff; /* MOVK requires the original value. */ + emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd); } } + /* But MOVN needs an inverted value. */ + emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) | + A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); } /* Load a 32 bit constant into a GPR. */ -#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) +#define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i) /* Load a 64 bit constant into a GPR. */ -#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) - -#define glofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) -#define mcpofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) -#define checkmcpofs(as, k) \ - ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) +#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i) static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) { - /* First, check if ip + offset is in range. */ - if ((ai & 0x00400000) && checkmcpofs(as, p)) { - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); - } else { - Reg base = RID_GL; /* Next, try GL + offset. */ - int64_t ofs = glofs(as, p); - if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ - int64_t i64 = i64ptr(p); - base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); - ofs = i64 & 0x7fffull; - } - emit_lso(as, ai, r, base, ofs); + Reg base = RID_GL; + int64_t ofs = glofs(as, p); + if (emit_checkofs(ai, ofs)) { + /* GL + offset, might subsequently fuse to LDP/STP. */ + } else if (ai == A64I_LDRx && checkmcpofs(as, p)) { + /* IP + offset is cheaper than allock, but address must be in range. */ + emit_dl(as, A64I_LDRLx, r, mcpofs(as, p)); + return; + } else { /* Split up into base reg + offset. */ + int64_t i64 = i64ptr(p); + base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); + ofs = i64 & 0x7fffull; } + emit_lso(as, ai, r, base, ofs); } /* Load 64 bit IR constant into register. */ @@ -273,21 +316,24 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) if (fpk != ~0u) { emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); return; + } else if ((fpk = emit_isfpmovi(*k))) { + emit_d(as, A64I_MOVI_DI | (fpk << 5), (r & 31)); + return; } } ofs = glofs(as, k); if (emit_checkofs(A64I_LDRx, ofs)) { emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, (r & 31), RID_GL, ofs); + } else if (checkmcpofs(as, k)) { + emit_dl(as, r >= RID_MAX_GPR ? A64I_LDRLd : A64I_LDRLx, + (r & 31), mcpofs(as, k)); } else { if (r >= RID_MAX_GPR) { emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); r = RID_TMP; } - if (checkmcpofs(as, k)) - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); - else - emit_loadu64(as, r, *k); + emit_loadu64(as, r, *k); } } @@ -312,7 +358,7 @@ static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); + lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); *p = A64I_BCC | A64F_S19(delta) | cond; } @@ -320,39 +366,46 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *p = ai | ((uint32_t)delta & 0x03ffffffu); + lj_assertA(A64F_S_OK(delta, 26), "branch target out of range"); + *p = ai | A64F_S26(delta); } static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); + lj_assertA(bit < 63, "bit number out of range"); + lj_assertA(A64F_S_OK(delta, 14), "branch target out of range"); if (bit > 31) ai |= A64I_X; - *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; + *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; } static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); + lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); *p = ai | A64F_S19(delta) | r; } #define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) -static void emit_call(ASMState *as, void *target) +static void emit_call(ASMState *as, ASMFunction target) { MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - if ((((delta>>2) + 0x02000000) >> 26) == 0) { - *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); +#if LJ_ABI_PAUTH + char *targetp = ptrauth_auth_data((char *)target, + ptrauth_key_function_pointer, 0); +#else + char *targetp = (char *)target; +#endif + ptrdiff_t delta = targetp - (char *)p; + if (A64F_S_OK(delta>>2, 26)) { + *p = A64I_BL | A64F_S26(delta>>2); } else { /* Target out of range: need indirect call. But don't use R0-R7. */ Reg r = ra_allock(as, i64ptr(target), RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - *p = A64I_BLR | A64F_N(r); + *p = A64I_BLR_AUTH | A64F_N(r); } } @@ -412,7 +465,8 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, - ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); + ofs < 0 ? (int32_t)(~(uint32_t)ofs+1u) : ofs, + rset_exclude(RSET_GPR, r)); } #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) diff --git a/source/libs/luajit/LuaJIT-src/src/lj_emit_mips.h b/source/libs/luajit/LuaJIT-src/src/lj_emit_mips.h index 8a9ee24dce7baed3da8a5a3ccf9d5f6b407914ad..d8104959aaa4a738068bd5ebce5ea5c03cc5ac7f 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_emit_mips.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_emit_mips.h @@ -1,28 +1,32 @@ /* ** MIPS instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #if LJ_64 -static intptr_t get_k64val(IRIns *ir) +static intptr_t get_k64val(ASMState *as, IRRef ref) { + IRIns *ir = IR(ref); if (ir->o == IR_KINT64) { return (intptr_t)ir_kint64(ir)->u64; } else if (ir->o == IR_KGC) { return (intptr_t)ir_kgc(ir); } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { return (intptr_t)ir_kptr(ir); + } else if (LJ_SOFTFP && ir->o == IR_KNUM) { + return (intptr_t)ir_knum(ir)->u64; } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); return ir->i; /* Sign-extended. */ } } #endif #if LJ_64 -#define get_kval(ir) get_k64val(ir) +#define get_kval(as, ref) get_k64val(as, ref) #else -#define get_kval(ir) ((ir)->i) +#define get_kval(as, ref) (IR((ref))->i) #endif /* -- Emit basic instructions --------------------------------------------- */ @@ -66,7 +70,7 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) } } -#if LJ_64 +#if LJ_64 || LJ_HASBUFFER static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, uint32_t lsb) { @@ -80,18 +84,18 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) +static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < ASMREF_L) { intptr_t delta = (intptr_t)((uintptr_t)i - - (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); + (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref))); if (checki16(delta)) { - emit_tsi(as, MIPSI_AADDIU, t, r, delta); + emit_tsi(as, MIPSI_AADDIU, rd, r, delta); return 1; } } @@ -136,6 +140,7 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) } else if (emit_kdelta1(as, r, (intptr_t)u64)) { return; } else { + /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */ if ((u64 & 0xffff)) { emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); } @@ -220,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(((delta + 0x8000) >> 16) == 0); + lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); as->mcp = p; } @@ -234,10 +239,22 @@ static void emit_jmp(ASMState *as, MCode *target) static void emit_call(ASMState *as, void *target, int needcfa) { MCode *p = as->mcp; - *--p = MIPSI_NOP; +#if LJ_TARGET_MIPSR6 + ptrdiff_t delta = (char *)target - (char *)p; + if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */ + *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu); + as->mcp = p; + return; + } +#endif + *--p = MIPSI_NOP; /* Delay slot. */ if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { +#if !LJ_TARGET_MIPSR6 *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | (((uintptr_t)target >>2) & 0x03ffffffu); +#else + *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); +#endif } else { /* Target out of range: need indirect call. */ *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); needcfa = 1; @@ -284,7 +301,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - lua_assert(checki16(ofs)); + lj_assertA(checki16(ofs), "offset %d out of range", ofs); emit_tsi(as, MIPSI_AADDIU, r, r, ofs); } } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_emit_ppc.h b/source/libs/luajit/LuaJIT-src/src/lj_emit_ppc.h index 21c3c2ace704e5169fe713f6448af2ffaee39a44..b13f00fe5b0e43c18da04b294632ab1f50ba92df 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_emit_ppc.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_emit_ppc.h @@ -1,6 +1,6 @@ /* ** PPC instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* -- Emit basic instructions --------------------------------------------- */ @@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) { - lua_assert(n >= 0 && n < 32); + lj_assertA(n >= 0 && n < 32, "shift out or range"); emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); } static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) { - lua_assert(n >= 0 && n < 32); + lj_assertA(n >= 0 && n < 32, "shift out or range"); emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); } @@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) +static int emit_kdelta1(ASMState *as, Reg rd, int32_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < ASMREF_L) { int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); if (checki16(delta)) { - emit_tai(as, PPCI_ADDI, t, r, delta); + emit_tai(as, PPCI_ADDI, rd, r, delta); return 1; } } @@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x8000) >> 16) == 0); + lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_emit_x86.h b/source/libs/luajit/LuaJIT-src/src/lj_emit_x86.h index 5207f9dada5bfdf11de4e4b8e0d309010697e83f..f477301162aca3e00e67a5592bacc736fcc861be 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_emit_x86.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_emit_x86.h @@ -1,6 +1,6 @@ /* ** x86/x64 instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* -- Emit basic instructions --------------------------------------------- */ @@ -45,7 +45,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, *(uint32_t *)(p+delta-5) = (uint32_t)xo; return p+delta-5; } -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) if (__builtin_constant_p(xo) && n == -2) p[delta-2] = (MCode)(xo >> 24); else if (__builtin_constant_p(xo) && n == -3) @@ -92,7 +92,7 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ static int32_t ptr2addr(const void *p) { - lua_assert((uintptr_t)p < (uintptr_t)0x80000000); + lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range"); return i32ptr(p); } #else @@ -208,7 +208,7 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) rb = RID_ESP; #endif } else if (LJ_GC64 && rb == RID_RIP) { - lua_assert(as->mrm.idx == RID_NONE); + lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index"); mode = XM_OFS0; p -= 4; *(int32_t *)p = as->mrm.ofs; @@ -274,10 +274,12 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) /* mov r, i / xor r, r */ static void emit_loadi(ASMState *as, Reg r, int32_t i) { - /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ + /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */ if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || (as->curins+1 < as->T->nins && - IR(as->curins+1)->o == IR_HIOP)))) { + IR(as->curins+1)->o == IR_HIOP))) && + !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) || + (*as->mcp & 0xf0) == XI_JCCs)) { emit_rr(as, XO_ARITH(XOg_XOR), r, r); } else { MCode *p = as->mcp; @@ -343,9 +345,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); - } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { - emit_rmro(as, xo, rr, rr, 0); - emit_loadu64(as, rr, (uintptr_t)addr); + } else if (!checki32((intptr_t)addr)) { + Reg ra = (rr & 15); + if (xo != XO_MOV) { + /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ + uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; + uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; + ra = RID_DISPATCH; + if (checku32(dispaddr)) { + emit_loadi(as, ra, (int32_t)dispaddr); + } else { /* Full-size 64 bit load. */ + MCode *p = as->mcp; + *(uint64_t *)(p-8) = dispaddr; + p[-9] = (MCode)(XI_MOVri+(ra&7)); + p[-10] = 0x48 + ((ra>>3)&1); + p -= 10; + as->mcp = p; + } + if (xo == XO_GROUP3b) emit_i8(as, i8); + } + emit_rmro(as, xo, rr, ra, 0); + emit_loadu64(as, ra, (uintptr_t)addr); } else #endif { @@ -381,7 +401,8 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) emit_rma(as, xo, r64, k); } else { if (ir->i) { - lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), + "bad interned 64 bit constant"); } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { emit_loadu64(as, r, *k); return; @@ -413,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); + lj_assertA(delta == (int8_t)delta, "short jump target out of range"); p[-1] = (MCode)(int8_t)delta; p[-2] = XI_JMPs; as->mcp = p - 2; @@ -425,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); + lj_assertA(delta == (int8_t)delta, "short jump target out of range"); p[-1] = (MCode)(int8_t)delta; p[-2] = (MCode)(XI_JCCs+(cc&15)); as->mcp = p - 2; @@ -451,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source) #define emit_label(as) ((as)->mcp) /* Compute relative 32 bit offset for jump and call instructions. */ -static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) +static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) { ptrdiff_t delta = target - p; - lua_assert(delta == (int32_t)delta); + UNUSED(J); + lj_assertJ(delta == (int32_t)delta, "jump target out of range"); return (int32_t)delta; } @@ -462,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) static void emit_jcc(ASMState *as, int cc, MCode *target) { MCode *p = as->mcp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = (MCode)(XI_JCCn+(cc&15)); p[-6] = 0x0f; as->mcp = p - 6; @@ -472,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target) static void emit_jmp(ASMState *as, MCode *target) { MCode *p = as->mcp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_JMP; as->mcp = p - 5; } @@ -489,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target) return; } #endif - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_CALL; as->mcp = p - 5; } @@ -539,10 +561,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); - else - emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_err.c b/source/libs/luajit/LuaJIT-src/src/lj_err.c index b6be357e79182bf821d06b838133bed81b3b86f7..03b5030be6360fa759d4d0bf7772c569f8371b5f 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_err.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_err.c @@ -1,6 +1,6 @@ /* ** Error handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_err_c @@ -29,12 +29,18 @@ ** Pros and Cons: ** ** - EXT requires unwind tables for *all* functions on the C stack between -** the pcall/catch and the error/throw. This is the default on x64, -** but needs to be manually enabled on x86/PPC for non-C++ code. +** the pcall/catch and the error/throw. C modules used by Lua code can +** throw errors, so these need to have unwind tables, too. Transitively +** this applies to all system libraries used by C modules -- at least +** when they have callbacks which may throw an error. ** -** - INT is faster when actually throwing errors (but this happens rarely). +** - INT is faster when actually throwing errors, but this happens rarely. ** Setting up error handlers is zero-cost in any case. ** +** - INT needs to save *all* callee-saved registers when entering the +** interpreter. EXT only needs to save those actually used inside the +** interpreter. JIT-compiled code may need to save some more. +** ** - EXT provides full interoperability with C++ exceptions. You can throw ** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. ** C++ destructors are called as needed. C++ exceptions caught by pcall @@ -46,27 +52,38 @@ ** the wrapper function feature. Lua errors thrown through C++ frames ** cannot be caught by C++ code and C++ destructors are not run. ** -** EXT is the default on x64 systems and on Windows, INT is the default on all -** other systems. +** - EXT can handle errors from internal helper functions that are called +** from JIT-compiled code (except for Windows/x86 and 32 bit ARM). +** INT has no choice but to call the panic handler, if this happens. +** Note: this is mainly relevant for out-of-memory errors. +** +** EXT is the default on all systems where the toolchain produces unwind +** tables by default (*). This is hard-coded and/or detected in src/Makefile. +** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL +** +** INT is the default on all other systems. +** +** EXT can be manually enabled for toolchains that are able to produce +** conforming unwind tables: +** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL" +** As explained above, *all* C code used directly or indirectly by LuaJIT +** must be compiled with -funwind-tables (or -fexceptions). C++ code must +** *not* be compiled with -fno-exceptions. +** +** If you're unsure whether error handling inside the VM works correctly, +** try running this and check whether it prints "OK": ** -** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack -** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled -** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set -** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules -** and all C libraries that have callbacks which may be used to call back -** into Lua. C++ code must *not* be compiled with -fno-exceptions. +** luajit -e "print(select(2, load('OK')):match('OK'))" ** -** EXT is mandatory on WIN64 since the calling convention has an abundance -** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). -** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). +** (*) Originally, toolchains only generated unwind tables for C++ code. For +** interoperability reasons, this can be manually enabled for plain C code, +** too (with -funwind-tables). With the introduction of the x64 architecture, +** the corresponding POSIX and Windows ABIs mandated unwind tables for all +** code. Over the following years most desktop and server platforms have +** enabled unwind tables by default on all architectures. OTOH mobile and +** embedded platforms do not consistently mandate unwind tables. */ -#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND -#define LJ_UNWIND_EXT 1 -#elif LJ_TARGET_WINDOWS -#define LJ_UNWIND_EXT 1 -#endif - /* -- Error messages ------------------------------------------------------ */ /* Error message strings. */ @@ -150,18 +167,22 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) case FRAME_CONT: /* Continuation frame. */ if (frame_iscont_fficb(frame)) goto unwind_c; + /* fallthrough */ case FRAME_VARG: /* Vararg frame. */ frame = frame_prevd(frame); break; case FRAME_PCALL: /* FF pcall() frame. */ case FRAME_PCALLH: /* FF pcall() frame inside hook. */ if (errcode) { + global_State *g; if (errcode == LUA_YIELD) { frame = frame_prevd(frame); break; } + g = G(L); + setgcref(g->cur_L, obj2gco(L)); if (frame_typep(frame) == FRAME_PCALL) - hook_leave(G(L)); + hook_leave(g); L->base = frame_prevd(frame) + 1; L->cframe = cf; unwindstack(L, L->base); @@ -183,7 +204,192 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) /* -- External frame unwinding -------------------------------------------- */ -#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN +#if LJ_ABI_WIN + +/* +** Someone in Redmond owes me several days of my life. A lot of this is +** undocumented or just plain wrong on MSDN. Some of it can be gathered +** from 3rd party docs or must be found by trial-and-error. They really +** don't want you to write your own language-specific exception handler +** or to interact gracefully with MSVC. :-( +*/ + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +#if LJ_TARGET_X86 +typedef void *UndocumentedDispatcherContext; /* Unused on x86. */ +#else +/* Taken from: http://www.nynaeve.net/?p=99 */ +typedef struct UndocumentedDispatcherContext { + ULONG64 ControlPc; + ULONG64 ImageBase; + PRUNTIME_FUNCTION FunctionEntry; + ULONG64 EstablisherFrame; + ULONG64 TargetIp; + PCONTEXT ContextRecord; + void (*LanguageHandler)(void); + PVOID HandlerData; + PUNWIND_HISTORY_TABLE HistoryTable; + ULONG ScopeIndex; + ULONG Fill0; +} UndocumentedDispatcherContext; +#endif + +/* Another wild guess. */ +extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); + +#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) +#define LJ_GCC_EXCODE ((DWORD)0x20474343) + +#define LJ_EXCODE ((DWORD)0xe24c4a00) +#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) +#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) +#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) + +/* Windows exception handler for interpreter frame. */ +LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, + void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) +{ +#if LJ_TARGET_X86 + void *cf = (char *)f - CFRAME_OFS_SEH; +#elif LJ_TARGET_ARM64 + void *cf = (char *)f - CFRAME_SIZE; +#else + void *cf = f; +#endif + lua_State *L = cframe_L(cf); + int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? + LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; + if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ + if (rec->ExceptionCode == STATUS_LONGJUMP && + rec->ExceptionRecord && + LJ_EXCODE_CHECK(rec->ExceptionRecord->ExceptionCode)) { + errcode = LJ_EXCODE_ERRCODE(rec->ExceptionRecord->ExceptionCode); + if ((rec->ExceptionFlags & 0x20)) { /* EH_TARGET_UNWIND */ + /* Unwinding is about to finish; revert the ExceptionCode so that + ** RtlRestoreContext does not try to restore from a _JUMP_BUFFER. + */ + rec->ExceptionCode = 0; + } + } + /* Unwind internal frames. */ + err_unwind(L, cf, errcode); + } else { + void *cf2 = err_unwind(L, cf, 0); + if (cf2) { /* We catch it, so start unwinding the upper frames. */ +#if !LJ_TARGET_X86 + EXCEPTION_RECORD rec2; +#endif + if (rec->ExceptionCode == LJ_MSVC_EXCODE || + rec->ExceptionCode == LJ_GCC_EXCODE) { +#if !LJ_TARGET_CYGWIN + __DestructExceptionObject(rec, 1); +#endif + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { + /* Don't catch access violations etc. */ + return 1; /* ExceptionContinueSearch */ + } +#if LJ_TARGET_X86 + UNUSED(ctx); + UNUSED(dispatch); + /* Call all handlers for all lower C frames (including ourselves) again + ** with EH_UNWINDING set. Then call the specified function, passing cf + ** and errcode. + */ + lj_vm_rtlunwind(cf, (void *)rec, + (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? + (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); + /* lj_vm_rtlunwind does not return. */ +#else + if (LJ_EXCODE_CHECK(rec->ExceptionCode)) { + /* For unwind purposes, wrap the EXCEPTION_RECORD in something that + ** looks like a longjmp, so that MSVC will execute C++ destructors in + ** the frames we unwind over. ExceptionInformation[0] should really + ** contain a _JUMP_BUFFER*, but hopefully nobody is looking too closely + ** at this point. + */ + rec2.ExceptionCode = STATUS_LONGJUMP; + rec2.ExceptionRecord = rec; + rec2.ExceptionAddress = 0; + rec2.NumberParameters = 1; + rec2.ExceptionInformation[0] = (ULONG_PTR)ctx; + rec = &rec2; + } + /* Unwind the stack and call all handlers for all lower C frames + ** (including ourselves) again with EH_UNWINDING set. Then set + ** stack pointer = f, result = errcode and jump to the specified target. + */ + RtlUnwindEx(f, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? + lj_vm_unwind_ff_eh : + lj_vm_unwind_c_eh), + rec, (void *)(uintptr_t)errcode, dispatch->ContextRecord, + dispatch->HistoryTable); + /* RtlUnwindEx should never return. */ +#endif + } + } + return 1; /* ExceptionContinueSearch */ +} + +#if LJ_UNWIND_JIT + +#if LJ_TARGET_X64 +#define CONTEXT_REG_PC Rip +#elif LJ_TARGET_ARM64 +#define CONTEXT_REG_PC Pc +#else +#error "NYI: Windows arch-specific unwinder for JIT-compiled code" +#endif + +/* Windows unwinder for JIT-compiled code. */ +static void err_unwind_win_jit(global_State *g, int errcode) +{ + CONTEXT ctx; + UNWIND_HISTORY_TABLE hist; + + memset(&hist, 0, sizeof(hist)); + RtlCaptureContext(&ctx); + while (1) { + DWORD64 frame, base, addr = ctx.CONTEXT_REG_PC; + void *hdata; + PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist); + if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */ + ExitNo exitno; + uintptr_t stub = lj_trace_unwind(G2J(g), (uintptr_t)(addr - sizeof(MCode)), &exitno); + if (stub) { /* Jump to side exit to unwind the trace. */ + ctx.CONTEXT_REG_PC = stub; + G2J(g)->exitcode = errcode; + RtlRestoreContext(&ctx, NULL); /* Does not return. */ + } + break; + } + RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func, + &ctx, &hdata, &frame, NULL); + if (!addr) break; + } + /* Unwinding failed, if we end up here. */ +} +#endif + +/* Raise Windows exception. */ +static void err_raise_ext(global_State *g, int errcode) +{ +#if LJ_UNWIND_JIT + if (tvref(g->jit_base)) { + err_unwind_win_jit(g, errcode); + return; /* Unwinding failed. */ + } +#elif LJ_HASJIT + /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */ + setmref(g->jit_base, NULL); +#endif + UNUSED(g); + RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); +} + +#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__)) /* ** We have to use our own definitions instead of the mandatory (!) unwind.h, @@ -193,6 +399,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) typedef struct _Unwind_Context _Unwind_Context; #define _URC_OK 0 +#define _URC_FATAL_PHASE2_ERROR 2 #define _URC_FATAL_PHASE1_ERROR 3 #define _URC_HANDLER_FOUND 6 #define _URC_INSTALL_CONTEXT 7 @@ -212,9 +419,11 @@ typedef struct _Unwind_Exception void (*excleanup)(int, struct _Unwind_Exception *); uintptr_t p1, p2; } __attribute__((__aligned__)) _Unwind_Exception; +#define UNWIND_EXCEPTION_TYPE _Unwind_Exception extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); +extern uintptr_t _Unwind_GetIP(_Unwind_Context *); extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); extern void _Unwind_DeleteException(_Unwind_Exception *); extern int _Unwind_RaiseException(_Unwind_Exception *); @@ -232,7 +441,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, lua_State *L; if (version != 1) return _URC_FATAL_PHASE1_ERROR; - UNUSED(uexclass); cf = (void *)_Unwind_GetCFA(ctx); L = cframe_L(cf); if ((actions & _UA_SEARCH_PHASE)) { @@ -259,10 +467,10 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, if ((actions & _UA_FORCE_UNWIND)) { return _URC_CONTINUE_UNWIND; } else if (cf) { + ASMFunction ip; _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode); - _Unwind_SetIP(ctx, (uintptr_t)(cframe_unwind_ff(cf) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh)); + ip = cframe_unwind_ff(cf) ? lj_vm_unwind_ff_eh : lj_vm_unwind_c_eh; + _Unwind_SetIP(ctx, (uintptr_t)lj_ptr_strip(ip)); return _URC_INSTALL_CONTEXT; } #if LJ_TARGET_X86ORX64 @@ -280,25 +488,150 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, ** it on non-x64 because the interpreter restores all callee-saved regs. */ lj_err_throw(L, errcode); +#if LJ_TARGET_X64 +#error "Broken build system -- only use the provided Makefiles!" +#endif #endif } return _URC_CONTINUE_UNWIND; } -#if LJ_UNWIND_EXT -#if LJ_TARGET_OSX || defined(__OpenBSD__) -/* Sorry, no thread safety for OSX. Complain to Apple, not me. */ -static _Unwind_Exception static_uex; +#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) +struct dwarf_eh_bases { void *tbase, *dbase, *func; }; +extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases); + +/* Verify that external error handling actually has a chance to work. */ +void lj_err_verify(void) +{ +#if !LJ_TARGET_OSX + /* Check disabled on MacOS due to brilliant software engineering at Apple. */ + struct dwarf_eh_bases ehb; + lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables"); +#endif + /* Check disabled, because of broken Fedora/ARM64. See #722. + lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables"); + */ +} +#endif + +#if LJ_UNWIND_JIT +/* DWARF2 personality handler for JIT-compiled code. */ +static int err_unwind_jit(int version, int actions, + uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) +{ + /* NYI: FFI C++ exception interoperability. */ + if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass)) + return _URC_FATAL_PHASE1_ERROR; + if ((actions & _UA_SEARCH_PHASE)) { + return _URC_HANDLER_FOUND; + } + if ((actions & _UA_CLEANUP_PHASE)) { + global_State *g = *(global_State **)(uex+1); + ExitNo exitno; + uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */ + uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno); + lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame"); + if (stub) { /* Jump to side exit to unwind the trace. */ + G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass); +#ifdef LJ_TARGET_MIPS + _Unwind_SetGR(ctx, 4, stub); + _Unwind_SetGR(ctx, 5, exitno); + _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub); #else -static __thread _Unwind_Exception static_uex; + _Unwind_SetIP(ctx, stub); #endif + return _URC_INSTALL_CONTEXT; + } + return _URC_FATAL_PHASE2_ERROR; + } + return _URC_FATAL_PHASE1_ERROR; +} -/* Raise DWARF2 exception. */ -static void err_raise_ext(int errcode) +/* DWARF2 template frame info for JIT-compiled code. +** +** After copying the template to the start of the mcode segment, +** the frame handler function and the code size is patched. +** The frame handler always installs a new context to jump to the exit, +** so don't bother to add any unwind opcodes. +*/ +static const uint8_t err_frame_jit_template[] = { +#if LJ_BE + 0,0,0, +#endif + LJ_64 ? 0x1c : 0x14, /* CIE length. */ +#if LJ_LE + 0,0,0, +#endif + 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version, augmentation. */ + 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */ +#if LJ_64 + 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ + 0,0,0,0,0, /* Alignment. */ +#else + 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ + 0, /* Alignment. */ +#endif +#if LJ_BE + 0,0,0, +#endif + LJ_64 ? 0x14 : 0x10, /* FDE length. */ + 0,0,0, + LJ_64 ? 0x24 : 0x1c, /* CIE offset. */ + 0,0,0, + LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */ +#if LJ_LE + 0,0,0, +#endif + 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */ +#if LJ_64 + 0,0,0,0, /* Alignment. */ +#endif + 0,0,0,0 /* Final FDE. */ +}; + +#define ERR_FRAME_JIT_OFS_HANDLER 0x12 +#define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18) +#define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24) +#if LJ_TARGET_OSX +#define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE +#else +#define ERR_FRAME_JIT_OFS_REGISTER 0 +#endif + +extern void __register_frame(const void *); +extern void __deregister_frame(const void *); + +uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info) +{ + ASMFunction handler = (ASMFunction)err_unwind_jit; + memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template)); +#if LJ_ABI_PAUTH +#if LJ_TARGET_ARM64 + handler = ptrauth_auth_and_resign(handler, + ptrauth_key_function_pointer, 0, + ptrauth_key_process_independent_code, info + ERR_FRAME_JIT_OFS_HANDLER); +#else +#error "missing pointer authentication support for this architecture" +#endif +#endif + memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler)); + *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) = + (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base)); + __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER); +#ifdef LUA_USE_ASSERT + { + struct dwarf_eh_bases ehb; + lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb), + "bad JIT unwind table registration"); + } +#endif + return info + sizeof(err_frame_jit_template); +} + +void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info) { - static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); - static_uex.excleanup = NULL; - _Unwind_RaiseException(&static_uex); + UNUSED(base); UNUSED(sz); + __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER); } #endif @@ -310,6 +643,7 @@ static void err_raise_ext(int errcode) #define _US_FORCE_UNWIND 8 typedef struct _Unwind_Control_Block _Unwind_Control_Block; +#define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block struct _Unwind_Control_Block { uint64_t exclass; @@ -368,136 +702,63 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, } if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) return _URC_FAILURE; +#ifdef LUA_USE_ASSERT + /* We should never get here unless this is a forced unwind aka backtrace. */ + if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) { + _Unwind_SetGR(ctx, 0, 0xff33aa88); + } +#endif return _URC_CONTINUE_UNWIND; } -#if LJ_UNWIND_EXT -static __thread _Unwind_Control_Block static_uex; +#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) +typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *); +extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *); -static void err_raise_ext(int errcode) +static int err_verify_bt(_Unwind_Context *ctx, int *got) { - memset(&static_uex, 0, sizeof(static_uex)); - static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); - _Unwind_RaiseException(&static_uex); + if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; } + else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); } + return _URC_OK; } -#endif -#endif /* LJ_TARGET_ARM */ - -#elif LJ_ABI_WIN +/* Verify that external error handling actually has a chance to work. */ +void lj_err_verify(void) +{ + int got = 0; + _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got); + lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables"); +} +#endif /* -** Someone in Redmond owes me several days of my life. A lot of this is -** undocumented or just plain wrong on MSDN. Some of it can be gathered -** from 3rd party docs or must be found by trial-and-error. They really -** don't want you to write your own language-specific exception handler -** or to interact gracefully with MSVC. :-( +** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM. ** -** Apparently MSVC doesn't call C++ destructors for foreign exceptions -** unless you compile your C++ code with /EHa. Unfortunately this means -** catch (...) also catches things like access violations. The use of -** _set_se_translator doesn't really help, because it requires /EHa, too. +** The quirky ARM unwind API doesn't have __register_frame(). +** A potential workaround might involve _Unwind_Backtrace. +** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway, +** since they are built without unwind tables by default. */ -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - -#if LJ_TARGET_X64 -/* Taken from: http://www.nynaeve.net/?p=99 */ -typedef struct UndocumentedDispatcherContext { - ULONG64 ControlPc; - ULONG64 ImageBase; - PRUNTIME_FUNCTION FunctionEntry; - ULONG64 EstablisherFrame; - ULONG64 TargetIp; - PCONTEXT ContextRecord; - void (*LanguageHandler)(void); - PVOID HandlerData; - PUNWIND_HISTORY_TABLE HistoryTable; - ULONG ScopeIndex; - ULONG Fill0; -} UndocumentedDispatcherContext; -#else -typedef void *UndocumentedDispatcherContext; -#endif - -/* Another wild guess. */ -extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); +#endif /* LJ_TARGET_ARM */ -#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) -/* Workaround for broken MinGW64 declaration. */ -VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); -#define RtlUnwindEx RtlUnwindEx_FIXED -#endif -#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) -#define LJ_GCC_EXCODE ((DWORD)0x20474343) - -#define LJ_EXCODE ((DWORD)0xe24c4a00) -#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) -#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) -#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) +#if LJ_UNWIND_EXT +static __thread struct { + UNWIND_EXCEPTION_TYPE ex; + global_State *g; +} static_uex; -/* Windows exception handler for interpreter frame. */ -LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, - void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) +/* Raise external exception. */ +static void err_raise_ext(global_State *g, int errcode) { -#if LJ_TARGET_X64 - void *cf = f; -#else - void *cf = (char *)f - CFRAME_OFS_SEH; -#endif - lua_State *L = cframe_L(cf); - int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? - LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; - if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ - /* Unwind internal frames. */ - err_unwind(L, cf, errcode); - } else { - void *cf2 = err_unwind(L, cf, 0); - if (cf2) { /* We catch it, so start unwinding the upper frames. */ - if (rec->ExceptionCode == LJ_MSVC_EXCODE || - rec->ExceptionCode == LJ_GCC_EXCODE) { -#if LJ_TARGET_WINDOWS - __DestructExceptionObject(rec, 1); -#endif - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); - } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { - /* Don't catch access violations etc. */ - return 1; /* ExceptionContinueSearch */ - } -#if LJ_TARGET_X64 - /* Unwind the stack and call all handlers for all lower C frames - ** (including ourselves) again with EH_UNWINDING set. Then set - ** rsp = cf, rax = errcode and jump to the specified target. - */ - RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh), - rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); - /* RtlUnwindEx should never return. */ -#else - UNUSED(ctx); - UNUSED(dispatch); - /* Call all handlers for all lower C frames (including ourselves) again - ** with EH_UNWINDING set. Then call the specified function, passing cf - ** and errcode. - */ - lj_vm_rtlunwind(cf, (void *)rec, - (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); - /* lj_vm_rtlunwind does not return. */ -#endif - } - } - return 1; /* ExceptionContinueSearch */ + memset(&static_uex, 0, sizeof(static_uex)); + static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode); + static_uex.g = g; + _Unwind_RaiseException(&static_uex.ex); } -/* Raise Windows exception. */ -static void err_raise_ext(int errcode) -{ - RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); -} +#endif #endif @@ -508,22 +769,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) { global_State *g = G(L); lj_trace_abort(g); - setmref(g->jit_base, NULL); L->status = LUA_OK; #if LJ_UNWIND_EXT - err_raise_ext(errcode); + err_raise_ext(g, errcode); /* ** A return from this function signals a corrupt C stack that cannot be ** unwound. We have no choice but to call the panic function and exit. ** ** Usually this is caused by a C function without unwind information. - ** This should never happen on x64, but may happen if you've manually - ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every* - ** non-C++ file with -funwind-tables. + ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL + ** and forgot to recompile *every* non-C++ file with -funwind-tables. */ if (G(L)->panic) G(L)->panic(L); #else +#if LJ_HASJIT + setmref(g->jit_base, NULL); +#endif { void *cf = err_unwind(L, NULL, errcode); if (cframe_unwind_ff(cf)) @@ -546,6 +808,18 @@ LJ_NOINLINE void lj_err_mem(lua_State *L) { if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */ lj_vm_unwind_c(L->cframe, LUA_ERRMEM); + if (LJ_HASJIT) { + TValue *base = tvref(G(L)->jit_base); + if (base) L->base = base; + } + if (curr_funcisL(L)) { + L->top = curr_topL(L); + if (LJ_UNLIKELY(L->top > tvref(L->maxstack))) { + /* The current Lua frame violates the stack. Replace it with a dummy. */ + L->top = L->base; + setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD); + } + } setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM)); lj_err_throw(L, LUA_ERRMEM); } @@ -585,6 +859,7 @@ static ptrdiff_t finderrfunc(lua_State *L) if (cframe_canyield(cf)) return 0; if (cframe_errfunc(cf) >= 0) return cframe_errfunc(cf); + cf = cframe_prev(cf); frame = frame_prevd(frame); break; case FRAME_PCALL: @@ -593,7 +868,7 @@ static ptrdiff_t finderrfunc(lua_State *L) return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */ return 0; default: - lua_assert(0); + lj_assertL(0, "bad frame type"); return 0; } } @@ -601,13 +876,15 @@ static ptrdiff_t finderrfunc(lua_State *L) } /* Runtime error. */ -LJ_NOINLINE void lj_err_run(lua_State *L) +LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) { - ptrdiff_t ef = finderrfunc(L); + ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L); if (ef) { - TValue *errfunc = restorestack(L, ef); - TValue *top = L->top; + TValue *errfunc, *top; + lj_state_checkstack(L, LUA_MINSTACK * 2); /* Might raise new error. */ lj_trace_abort(G(L)); + errfunc = restorestack(L, ef); + top = L->top; if (!tvisfunc(errfunc) || L->status == LUA_ERRERR) { setstrV(L, top-1, lj_err_str(L, LJ_ERR_ERRERR)); lj_err_throw(L, LUA_ERRERR); @@ -622,12 +899,34 @@ LJ_NOINLINE void lj_err_run(lua_State *L) lj_err_throw(L, LUA_ERRRUN); } +/* Stack overflow error. */ +void LJ_FASTCALL lj_err_stkov(lua_State *L) +{ + lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL); + lj_err_run(L); +} + +#if LJ_HASJIT +/* Rethrow error after doing a trace exit. */ +LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode) +{ + if (errcode == LUA_ERRRUN) + lj_err_run(L); + else + lj_err_throw(L, errcode); +} +#endif + /* Formatted runtime error message. */ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) { const char *msg; va_list argp; va_start(argp, em); + if (LJ_HASJIT) { + TValue *base = tvref(G(L)->jit_base); + if (base) L->base = base; + } if (curr_funcisL(L)) L->top = curr_topL(L); msg = lj_strfmt_pushvf(L, err2msg(em), argp); va_end(argp); @@ -690,9 +989,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) const BCIns *pc = cframe_Lpc(L); if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { const char *tname = lj_typename(o); + setframe_gc(o, obj2gco(L), LJ_TTHREAD); if (LJ_FR2) o++; setframe_pc(o, pc); - setframe_gc(o, obj2gco(L), LJ_TTHREAD); L->top = L->base = o+1; err_msgv(L, LJ_ERR_BADCALL, tname); } @@ -702,25 +1001,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) /* Error in context of caller. */ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) { - TValue *frame = L->base-1; - TValue *pframe = NULL; - if (frame_islua(frame)) { - pframe = frame_prevl(frame); - } else if (frame_iscont(frame)) { - if (frame_iscont_fficb(frame)) { - pframe = frame; - frame = NULL; - } else { - pframe = frame_prevd(frame); + TValue *frame = NULL, *pframe = NULL; + if (!(LJ_HASJIT && tvref(G(L)->jit_base))) { + frame = L->base-1; + if (frame_islua(frame)) { + pframe = frame_prevl(frame); + } else if (frame_iscont(frame)) { + if (frame_iscont_fficb(frame)) { + pframe = frame; + frame = NULL; + } else { + pframe = frame_prevd(frame); #if LJ_HASFFI - /* Remove frame for FFI metamethods. */ - if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && - frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { - L->base = pframe+1; - L->top = frame; - setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); - } + /* Remove frame for FFI metamethods. */ + if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && + frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { + L->base = pframe+1; + L->top = frame; + setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); + } #endif + } } } lj_debug_addloc(L, msg, pframe, frame); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_err.h b/source/libs/luajit/LuaJIT-src/src/lj_err.h index cba5fb7149e6f39d266be93b6bfc302112d3146f..0cb945b08258387d06c092b49b424a3d0d10f152 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_err.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_err.h @@ -1,6 +1,6 @@ /* ** Error handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_ERR_H @@ -23,7 +23,11 @@ LJ_DATA const char *lj_err_allmsg; LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); LJ_FUNC_NORET void lj_err_mem(lua_State *L); -LJ_FUNC_NORET void lj_err_run(lua_State *L); +LJ_FUNC_NORET void LJ_FASTCALL lj_err_stkov(lua_State *L); +LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L); +#if LJ_HASJIT +LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode); +#endif LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, BCLine line, ErrMsg em, va_list argp); @@ -38,4 +42,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...); LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); +#if LJ_UNWIND_JIT && !LJ_ABI_WIN +LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info); +LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info); +#else +#define lj_err_register_mcode(base, sz, info) (info) +#define lj_err_deregister_mcode(base, sz, info) UNUSED(base) +#endif + +#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT) +LJ_FUNC void lj_err_verify(void); +#else +#define lj_err_verify() ((void)0) +#endif + #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_errmsg.h b/source/libs/luajit/LuaJIT-src/src/lj_errmsg.h index 060a9f89750b376e11ee52b1d501b427fef57744..045d025b4dc069de43517e7840db64a29d499cad 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_errmsg.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_errmsg.h @@ -1,6 +1,6 @@ /* ** VM error messages. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* This file may be included multiple times with different ERRDEF macros. */ @@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable") ERRDEF(UNPACK, "too many results to unpack") ERRDEF(RDRSTR, "reader function must return a string") ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) +ERRDEF(NUMRNG, "number out of range") ERRDEF(IDXRNG, "index out of range") ERRDEF(BASERNG, "base out of range") ERRDEF(LVLRNG, "level out of range") @@ -78,6 +79,7 @@ ERRDEF(SETFENV, LUA_QL("setfenv") " cannot change environment of given object") ERRDEF(CORUN, "cannot resume running coroutine") ERRDEF(CODEAD, "cannot resume dead coroutine") ERRDEF(COSUSP, "cannot resume non-suspended coroutine") +ERRDEF(PRNGSD, "PRNG seeding failed") ERRDEF(TABINS, "wrong number of arguments to " LUA_QL("insert")) ERRDEF(TABCAT, "invalid value (%s) at index %d in table for " LUA_QL("concat")) ERRDEF(TABSORT, "invalid order function for sorting") @@ -101,11 +103,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) #if LJ_HASJIT ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") -#if LJ_TARGET_X86ORX64 -ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") -#else ERRDEF(NOJIT, "JIT compiler disabled") -#endif #elif defined(LJ_ARCH_NOJIT) ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") #else @@ -183,6 +181,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") #endif +#if LJ_HASBUFFER +/* String buffer errors. */ +ERRDEF(BUFFER_SELF, "cannot put buffer into itself") +ERRDEF(BUFFER_BADOPT, "bad options table") +ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS) +ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x") +ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d") +ERRDEF(BUFFER_DEPTH, "too deep to serialize") +ERRDEF(BUFFER_DUPKEY, "duplicate table key") +ERRDEF(BUFFER_EOB, "unexpected end of buffer") +ERRDEF(BUFFER_LEFTOV, "left-over data in buffer") +#endif + #undef ERRDEF /* Detecting unused error messages: diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ff.h b/source/libs/luajit/LuaJIT-src/src/lj_ff.h index 31d65a00dba0239a03eaf7b959533db75b632171..301df0eb996777aeeae837bd63e2ec9ba1ce0307 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ff.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_ff.h @@ -1,6 +1,6 @@ /* ** Fast function IDs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FF_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.c b/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.c index dfdee2dbfe1078b0ac19ef5d3310364a4460ffc6..9ea81e549a86dc4b4ab1c273b8393de189e0f42f 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.c @@ -1,6 +1,6 @@ /* ** Fast function call recorder. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ffrecord_c @@ -11,6 +11,7 @@ #if LJ_HASJIT #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_frame.h" @@ -28,6 +29,7 @@ #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" +#include "lj_serialize.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -96,6 +98,14 @@ static ptrdiff_t results_wanted(jit_State *J) return -1; } +static TValue *rec_stop_stitch_cp(lua_State *L, lua_CFunction dummy, void *ud) +{ + jit_State *J = (jit_State *)ud; + lj_record_stop(J, LJ_TRLINK_STITCH, 0); + UNUSED(L); UNUSED(dummy); + return NULL; +} + /* Trace stitching: add continuation below frame to start a new trace. */ static void recff_stitch(jit_State *J) { @@ -106,6 +116,7 @@ static void recff_stitch(jit_State *J) TValue *nframe = base + 1 + LJ_FR2; const BCIns *pc = frame_pc(base-1); TValue *pframe = frame_prevl(base-1); + int errcode; /* Move func + args up in Lua stack and insert continuation. */ memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); @@ -130,13 +141,21 @@ static void recff_stitch(jit_State *J) J->baseslot += 2 + LJ_FR2; J->framedepth++; - lj_record_stop(J, LJ_TRLINK_STITCH, 0); + errcode = lj_vm_cpcall(L, NULL, J, rec_stop_stitch_cp); /* Undo Lua stack changes. */ memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); setframe_pc(base-1, pc); L->base -= 2 + LJ_FR2; L->top -= 2 + LJ_FR2; + + if (errcode) { + if (errcode == LUA_ERRRUN) + copyTV(L, L->top-1, L->top + (1 + LJ_FR2)); + else + setintV(L->top-1, (int32_t)LJ_TRERR_RECERR); + lj_err_throw(L, errcode); /* Propagate errors. */ + } } /* Fallback handler for fast functions that are not recorded (yet). */ @@ -182,6 +201,14 @@ static TRef recff_bufhdr(jit_State *J) lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); } +/* Emit TMPREF. */ +static TRef recff_tmpref(jit_State *J, TRef tr, int mode) +{ + if (!LJ_DUALNUM && tref_isinteger(tr)) + tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); + return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode); +} + /* -- Base library fast functions ----------------------------------------- */ static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd) @@ -232,7 +259,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); if (!tref_isnil(mt)) - emitir(IRT(IR_TBAR, IRT_TAB), tr, 0); + emitir(IRT(IR_TBAR, IRT_NIL), tr, 0); J->base[0] = tr; J->needsnap = 1; } /* else: Interpreter will throw. */ @@ -281,7 +308,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd) if (tref_isstr(tr)) J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); else if (tref_istab(tr)) - J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); + J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL); /* else: Interpreter will throw. */ UNUSED(rd); } @@ -296,7 +323,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) } else { TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); - emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); + emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#')); } return 0; } else { /* select(n, ...) */ @@ -317,9 +344,9 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) ptrdiff_t n = (ptrdiff_t)J->maxslot; if (start < 0) start += n; else if (start > n) start = n; - rd->nres = n - start; if (start >= 1) { ptrdiff_t i; + rd->nres = n - start; for (i = 0; i < n - start; i++) J->base[i] = J->base[start+i]; } /* else: Interpreter will throw. */ @@ -455,6 +482,7 @@ static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) #endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ + J->needsnap = 1; /* Start catching on-trace errors. */ } /* else: Interpreter will throw. */ } @@ -490,6 +518,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) if (errcode) lj_err_throw(J->L, errcode); /* Propagate errors. */ rd->nres = -1; /* Pending call. */ + J->needsnap = 1; /* Start catching on-trace errors. */ } /* else: Interpreter will throw. */ } @@ -505,6 +534,40 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) recff_nyiu(J, rd); } +static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd) +{ +#if LJ_BE + /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, + ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. + */ + recff_nyi(J, rd); +#else + TRef tab = J->base[0]; + if (tref_istab(tab)) { + RecordIndex ix; + cTValue *keyv; + ix.tab = tab; + if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */ + ix.key = lj_ir_kint(J, 0); + keyv = niltvg(J2G(J)); + } else { + TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); + ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp); + keyv = &rd->argv[1]; + } + copyTV(J->L, &ix.tabv, &rd->argv[0]); + ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv); + /* Omit the value, if not used by the caller. */ + ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) && + bc_b(frame_pc(J->L->base-1)[-1])-1 < 2); + ix.mobj = 0; /* We don't need the next index. */ + rd->nres = lj_record_next(J, &ix); + J->base[0] = ix.key; + J->base[1] = ix.val; + } /* else: Interpreter will throw. */ +#endif +} + /* -- Math library fast functions ----------------------------------------- */ static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) @@ -563,7 +626,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); TRef tr2 = lj_ir_tonum(J, J->base[1]); - J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2); + J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2); UNUSED(rd); } @@ -580,47 +643,16 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd) UNUSED(rd); } -/* Record math.asin, math.acos, math.atan. */ -static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd) -{ - TRef y = lj_ir_tonum(J, J->base[0]); - TRef x = lj_ir_knum_one(J); - uint32_t ffid = rd->data; - if (ffid != FF_math_atan) { - TRef tmp = emitir(IRTN(IR_MUL), y, y); - tmp = emitir(IRTN(IR_SUB), x, tmp); - tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT); - if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; } - } - J->base[0] = emitir(IRTN(IR_ATAN2), y, x); -} - -static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd) +static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); } -static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) -{ - TRef tr = J->base[0]; - if (tref_isinteger(tr)) { - J->base[0] = tr; - J->base[1] = lj_ir_kint(J, 0); - } else { - TRef trt; - tr = lj_ir_tonum(J, tr); - trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC); - J->base[0] = trt; - J->base[1] = emitir(IRTN(IR_SUB), tr, trt); - } - rd->nres = 2; -} - static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) { - J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], - &rd->argv[0], &rd->argv[1]); + J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1], + &rd->argv[0], &rd->argv[1], IR_POW); UNUSED(rd); } @@ -647,7 +679,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) GCudata *ud = udataV(&J->fn->c.upvalue[0]); TRef tr, one; lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ - tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); + tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud))); one = lj_ir_knum_one(J); tr = emitir(IRTN(IR_SUB), tr, one); if (J->base[0]) { @@ -738,7 +770,7 @@ static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) #if LJ_HASFFI TRef hdr = recff_bufhdr(J); TRef tr = recff_bit64_tohex(J, rd, hdr); - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); #else recff_nyiu(J, rd); /* Don't bother working around this NYI. */ #endif @@ -864,8 +896,10 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) if (i > 1) { /* Concatenate the strings, if there's more than one. */ TRef hdr = recff_bufhdr(J), tr = hdr; for (i = 0; J->base[i] != 0; i++) - tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]); - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } else if (i == 0) { + J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); } UNUSED(rd); } @@ -881,19 +915,19 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); if (vrep > 1) { TRef hdr2 = recff_bufhdr(J); - TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep); - tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str); - str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2); + TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep); + tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str); + str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2); } } tr = hdr = recff_bufhdr(J); if (str2) { - tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str); + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str); str = str2; rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); } tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep); - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); } static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) @@ -901,7 +935,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) TRef str = lj_ir_tostr(J, J->base[0]); TRef hdr = recff_bufhdr(J); TRef tr = lj_ir_call(J, rd->data, hdr, str); - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); } static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) @@ -949,7 +983,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) str->len-(MSize)start, pat->len)) { TRef pos; emitir(IRTG(IR_NE, IRT_PGC), tr, trp0); - pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0)); + /* Recompute offset. trsptr may not point into trstr after folding. */ + pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart); J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); rd->nres = 2; @@ -963,34 +998,41 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) } } -static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) +static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx) { - TRef trfmt = lj_ir_tostr(J, J->base[0]); - GCstr *fmt = argv2str(J, &rd->argv[0]); - int arg = 1; - TRef hdr, tr; + ptrdiff_t arg = sbufx; + TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]); + GCstr *fmt = argv2str(J, &rd->argv[arg]); FormatState fs; SFormat sf; + int nfmt = 0; /* Specialize to the format string. */ emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt)); - tr = hdr = recff_bufhdr(J); lj_strfmt_init(&fs, strdata(fmt), fmt->len); while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */ - TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++]; + TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg]; TRef trsf = lj_ir_kint(J, (int32_t)sf); IRCallID id; switch (STRFMT_TYPE(sf)) { case STRFMT_LIT: - tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); break; case STRFMT_INT: id = IRCALL_lj_strfmt_putfnum_int; handle_int: - if (!tref_isinteger(tra)) + if (!tref_isinteger(tra)) { +#if LJ_HASFFI + if (tref_iscdata(tra)) { + tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]); + tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); + break; + } +#endif goto handle_num; + } if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ - tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); } else { #if LJ_HASFFI @@ -1012,15 +1054,16 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) handle_num: tra = lj_ir_tonum(J, tra); tr = lj_ir_call(J, id, tr, trsf, tra); - if (LJ_SOFTFP) lj_needsplit(J); + if (LJ_SOFTFP32) lj_needsplit(J); break; case STRFMT_STR: if (!tref_isstr(tra)) { recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */ + /* NYI: also buffers. */ return; } if (sf == STRFMT_STR) /* Shortcut for plain %s. */ - tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra); + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra); else if ((sf & STRFMT_T_QUOTED)) tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); else @@ -1029,7 +1072,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) case STRFMT_CHAR: tra = lj_opt_narrow_toint(J, tra); if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ - tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); else tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); @@ -1040,10 +1083,341 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) recff_nyiu(J, rd); return; } + if (++nfmt > 100) lj_trace_err(J, LJ_TRERR_TRACEOV); } - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); + if (sbufx) { + emitir(IRT(IR_USE, IRT_NIL), tr, 0); + } else { + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } +} + +static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) +{ + recff_format(J, rd, recff_bufhdr(J), 0); +} + +/* -- Buffer library fast functions --------------------------------------- */ + +#if LJ_HASBUFFER + +static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud) +{ + return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L); +} + +static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val) +{ + TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L); + emitir(IRT(IR_FSTORE, IRT_PGC), fref, val); +} + +static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl) +{ + return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl); +} + +static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val) +{ + TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl); + emitir(IRT(IR_FSTORE, IRT_PTR), fref, val); +} + +static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw) +{ + TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr); + if (LJ_64) + len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); + return len; } +/* Emit typecheck for string buffer. */ +static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg) +{ + TRef trtype, ud = J->base[arg]; + if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE); + trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); + emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER)); + J->needsnap = 1; + return ud; +} + +/* Emit BUFHDR for write to extended string buffer. */ +static TRef recff_sbufx_write(jit_State *J, TRef ud) +{ + TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata))); + return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE); +} + +/* Check for integer in range for the buffer API. */ +static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, ptrdiff_t arg) +{ + TRef tr = J->base[arg]; + TRef trlim = lj_ir_kint(J, LJ_MAX_BUF); + if (tref_isinteger(tr)) { + emitir(IRTGI(IR_ULE), tr, trlim); + } else if (tref_isnum(tr)) { + tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); + emitir(IRTGI(IR_ULE), tr, trlim); +#if LJ_HASFFI + } else if (tref_iscdata(tr)) { + tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]); + emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF)); + tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE); +#else + UNUSED(rd); +#endif + } else { + lj_trace_err(J, LJ_TRERR_BADTYPE); + } + return tr; +} + +static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + SBufExt *sbx = bufV(&rd->argv[0]); + int iscow = (int)sbufiscow(sbx); + TRef trl = recff_sbufx_get_L(J, ud); + TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW)); + TRef zeropgc = lj_ir_kintpgc(J, 0); + emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc); + if (iscow) { + TRef zerop = lj_ir_kintp(J, 0); + trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW)); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop); + recff_sbufx_set_L(J, ud, trl); + emitir(IRT(IR_FSTORE, IRT_PGC), + emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop); + } else { + TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb); + } +} + +static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + TRef len = recff_sbufx_len(J, trr, trw); + TRef trn = recff_sbufx_checkint(J, rd, 1); + len = emitir(IRTI(IR_MIN), len, trn); + trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); +} + +static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tr = J->base[1]; + if (tref_isstr(tr)) { + TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); + TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); + IRIns *irp = IR(tref_ref(trp)); + /* trp must point into the anchored obj, even after folding. */ + if (irp->o == IR_STRREF) + tr = irp->op1; + else if (!tref_isk(tr)) + trp = emitir(IRT(IR_ADD, IRT_PGC), tr, lj_ir_kintpgc(J, sizeof(GCstr))); + lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); +#if LJ_HASFFI + } else if (tref_iscdata(tr)) { + TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]); + TRef len = recff_sbufx_checkint(J, rd, 2); + lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); +#endif + } /* else: Interpreter will throw. */ +} + +static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tr; + ptrdiff_t arg; + if (!J->base[1]) return; + for (arg = 1; (tr = J->base[arg]); arg++) { + if (tref_isudata(tr)) { + TRef ud2 = recff_sbufx_check(J, rd, arg); + emitir(IRTG(IR_NE, IRT_PGC), ud, ud2); + } + } + for (arg = 1; (tr = J->base[arg]); arg++) { + if (tref_isstr(tr)) { + trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr); + } else if (tref_isnumber(tr)) { + trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, + emitir(IRT(IR_TOSTR, IRT_STR), tr, + tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT)); + } else if (tref_isudata(tr)) { + TRef trr = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_W); + TRef len = recff_sbufx_len(J, trr, trw); + trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len); + } else { + recff_nyiu(J, rd); + } + } + emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); +} + +static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + recff_format(J, rd, trbuf, 1); +} + +static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + TRef tr; + ptrdiff_t arg; + if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; } + for (arg = 0; (tr = J->base[arg+1]); arg++) { + if (!tref_isnil(tr)) { + J->base[arg+1] = recff_sbufx_checkint(J, rd, arg+1); + } + } + for (arg = 0; (tr = J->base[arg+1]); arg++) { + TRef len = recff_sbufx_len(J, trr, trw); + if (tref_isnil(tr)) { + J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); + trr = trw; + } else { + TRef tru; + len = emitir(IRTI(IR_MIN), len, tr); + tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len); + J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); + trr = tru; /* Doing the ADD before the SNEW generates better code. */ + } + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); + } + rd->nres = arg; +} + +static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw)); +} + +static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + J->base[0] = recff_sbufx_len(J, trr, trw); +} + +#if LJ_HASFFI +static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]); + TRef len = recff_sbufx_checkint(J, rd, 2); + trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len); + emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); +} + +static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef trsz = recff_sbufx_checkint(J, rd, 1); + J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz); + J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W)); + rd->nres = 2; +} + +static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef len = recff_sbufx_checkint(J, rd, 1); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E); + TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw); + if (LJ_64) + left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); + emitir(IRTGI(IR_ULE), len, left); + trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw); +} + +static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + J->base[0] = lj_crecord_topuint8(J, trr); + J->base[1] = recff_sbufx_len(J, trr, trw); + rd->nres = 2; +} +#endif + +static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); + lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp); + /* No IR_USE needed, since the call is a store. */ +} + +static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); + TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp); + IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0])); + /* No IR_USE needed, since the call is a store. */ + J->base[0] = lj_record_vload(J, tmp, 0, t); + /* The sbx->r store must be after the VLOAD type check, in case it fails. */ + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); +} + +static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd) +{ + TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1); + J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp); + /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */ + emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0); + UNUSED(rd); +} + +static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd) +{ + if (tvisstr(&rd->argv[0])) { + GCstr *str = strV(&rd->argv[0]); + SBufExt sbx; + IRType t; + TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); + TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]); + /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM. + ** That's why IRCALL_lj_serialize_decode needs a fake INT result. + */ + emitir(IRT(IR_USE, IRT_NIL), tr, 0); + memset(&sbx, 0, sizeof(SBufExt)); + lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len); + t = (IRType)lj_serialize_peektype(&sbx); + J->base[0] = lj_record_vload(J, tmp, 0, t); + } /* else: Interpreter will throw. */ +} + +#endif + /* -- Table library fast functions ---------------------------------------- */ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) @@ -1054,7 +1428,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) rd->nres = 0; if (tref_istab(ix.tab) && ix.val) { if (!J->base[2]) { /* Simple push: t[#t+1] = v */ - TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); + TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL); GCtab *t = tabV(&rd->argv[0]); ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); settabV(J->L, &ix.tabv, t); @@ -1078,11 +1452,11 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? lj_opt_narrow_toint(J, J->base[3]) : - lj_ir_call(J, IRCALL_lj_tab_len, tab); + emitir(IRTI(IR_ALEN), tab, TREF_NIL); TRef hdr = recff_bufhdr(J); TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); - J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); } /* else: Interpreter will throw. */ UNUSED(rd); } @@ -1091,6 +1465,15 @@ static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd) { TRef tra = lj_opt_narrow_toint(J, J->base[0]); TRef trh = lj_opt_narrow_toint(J, J->base[1]); + if (tref_isk(tra) && tref_isk(trh)) { + int32_t a = IR(tref_ref(tra))->i; + if (a < 0x7fff) { + uint32_t hbits = hsize2hbits(IR(tref_ref(trh))->i); + a = a > 0 ? a+1 : 0; + J->base[0] = emitir(IRTG(IR_TNEW, IRT_TAB), (uint32_t)a, hbits); + return; + } + } J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh); UNUSED(rd); } @@ -1114,13 +1497,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ -#if LJ_GC64 - /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); -#else - tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); - ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); -#endif } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.h b/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.h index 3b407450d5824b892c75116d6288cfa156b05b1e..6ac9f0e33f414e75090fa9efabb8e7991a4dae44 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_ffrecord.h @@ -1,6 +1,6 @@ /* ** Fast function call recorder. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FFRECORD_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_frame.h b/source/libs/luajit/LuaJIT-src/src/lj_frame.h index 19c49a4aef49bbd8ae64f46704a5726b2b29b170..a7e07d896918fdcae9add187b5fcd6ce25111c5e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_frame.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_frame.h @@ -1,6 +1,6 @@ /* ** Stack frames. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FRAME_H @@ -46,7 +46,7 @@ enum { #define frame_gc(f) (gcval((f)-1)) #define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz) #define frame_pc(f) ((const BCIns *)frame_ftsz(f)) -#define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp))) +#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp))) #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) #else @@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #endif #define CFRAME_SHIFT_MULTRES 3 #elif LJ_TARGET_ARM64 -#define CFRAME_OFS_ERRF 196 -#define CFRAME_OFS_NRES 200 -#define CFRAME_OFS_PREV 160 -#define CFRAME_OFS_L 176 -#define CFRAME_OFS_PC 168 -#define CFRAME_OFS_MULTRES 192 +#define CFRAME_OFS_ERRF 36 +#define CFRAME_OFS_NRES 40 +#define CFRAME_OFS_PREV 0 +#define CFRAME_OFS_L 16 +#define CFRAME_OFS_PC 8 +#define CFRAME_OFS_MULTRES 32 #define CFRAME_SIZE 208 #define CFRAME_SHIFT_MULTRES 3 #elif LJ_TARGET_PPC @@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_OFS_L 36 #define CFRAME_OFS_PC 32 #define CFRAME_OFS_MULTRES 28 -#define CFRAME_SIZE 272 +#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) #define CFRAME_SHIFT_MULTRES 3 #endif #elif LJ_TARGET_MIPS32 diff --git a/source/libs/luajit/LuaJIT-src/src/lj_func.c b/source/libs/luajit/LuaJIT-src/src/lj_func.c index 639dad87684e16f88fd0b7e93c5465a030165e97..44d76b72465876226ff050832b58770d92fcc9ac 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_func.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_func.c @@ -1,6 +1,6 @@ /* ** Function handling (prototypes, functions and upvalues). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) /* -- Upvalues ------------------------------------------------------------ */ -static void unlinkuv(GCupval *uv) +static void unlinkuv(global_State *g, GCupval *uv) { - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + UNUSED(g); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); setgcrefr(uvnext(uv)->prev, uv->prev); setgcrefr(uvprev(uv)->next, uv->next); } @@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) GCupval *uv; /* Search the sorted list of open upvalues. */ while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { - lua_assert(!p->closed && uvval(p) != &p->tv); + lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain"); if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ flipwhite(obj2gco(p)); @@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) setgcrefr(uv->next, g->uvhead.next); setgcref(uvnext(uv)->prev, obj2gco(uv)); setgcref(g->uvhead.next, obj2gco(uv)); - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); return uv; } @@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) while (gcref(L->openupval) != NULL && uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { GCobj *o = obj2gco(uv); - lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv); + lj_assertG(!isblack(o), "bad black upvalue"); + lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain"); setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ if (isdead(g, o)) { lj_func_freeuv(g, uv); } else { - unlinkuv(uv); + unlinkuv(g, uv); lj_gc_closeuv(g, uv); } } @@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) { if (!uv->closed) - unlinkuv(uv); + unlinkuv(g, uv); lj_mem_freet(g, uv); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_func.h b/source/libs/luajit/LuaJIT-src/src/lj_func.h index 901751b98139cf32b8d25a4d6144ce5197206d7e..7d538d71c081a76c7cc16ee0a2f21915db94250f 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_func.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_func.h @@ -1,6 +1,6 @@ /* ** Function handling (prototypes, functions and upvalues). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_FUNC_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_gc.c b/source/libs/luajit/LuaJIT-src/src/lj_gc.c index 2aaf5b2c4f735e16f47461574e8f50965899e008..d9581d20d337ab0a39cd252c442d37b2b86f136b 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_gc.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_gc.c @@ -1,6 +1,6 @@ /* ** Garbage collector. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -25,7 +25,9 @@ #include "lj_cdata.h" #endif #include "lj_trace.h" +#include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_vmevent.h" #define GCSTEPSIZE 1024u #define GCSWEEPMAX 40 @@ -41,7 +43,8 @@ /* Mark a TValue (if needed). */ #define gc_marktv(g, tv) \ - { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ + { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \ + "TValue and GC type mismatch"); \ if (tviswhite(tv)) gc_mark(g, gcV(tv)); } /* Mark a GCobj (if needed). */ @@ -55,21 +58,32 @@ static void gc_mark(global_State *g, GCobj *o) { int gct = o->gch.gct; - lua_assert(iswhite(o) && !isdead(g, o)); + lj_assertG(iswhite(o), "mark of non-white object"); + lj_assertG(!isdead(g, o), "mark of dead object"); white2gray(o); if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { GCtab *mt = tabref(gco2ud(o)->metatable); gray2black(o); /* Userdata are never gray. */ if (mt) gc_markobj(g, mt); gc_markobj(g, tabref(gco2ud(o)->env)); + if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) { + SBufExt *sbx = (SBufExt *)uddata(gco2ud(o)); + if (sbufiscow(sbx) && gcref(sbx->cowref)) + gc_markobj(g, gcref(sbx->cowref)); + if (gcref(sbx->dict_str)) + gc_markobj(g, gcref(sbx->dict_str)); + if (gcref(sbx->dict_mt)) + gc_markobj(g, gcref(sbx->dict_mt)); + } } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { GCupval *uv = gco2uv(o); gc_marktv(g, uvval(uv)); if (uv->closed) gray2black(o); /* Closed upvalues are never gray. */ } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { - lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || - gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE); + lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || + gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE, + "bad GC type %d", gct); setgcrefr(o->gch.gclist, g->gc.gray); setgcref(g->gc.gray, o); } @@ -102,7 +116,8 @@ static void gc_mark_uv(global_State *g) { GCupval *uv; for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); if (isgray(obj2gco(uv))) gc_marktv(g, uvval(uv)); } @@ -172,8 +187,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t) } if (weak) { /* Weak tables are cleared in the atomic phase. */ #if LJ_HASFFI - CTState *cts = ctype_ctsG(g); - if (cts && cts->finalizer == t) { + if (gcref(g->gcroot[GCROOT_FFI_FIN]) == obj2gco(t)) { weak = (int)(~0u & ~LJ_GC_WEAKVAL); } else #endif @@ -197,7 +211,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t) for (i = 0; i <= hmask; i++) { Node *n = &node[i]; if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ - lua_assert(!tvisnil(&n->key)); + lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot"); if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); } @@ -212,7 +226,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) gc_markobj(g, tabref(fn->c.env)); if (isluafunc(fn)) { uint32_t i; - lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv); + lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv, + "function upvalues out of range"); gc_markobj(g, funcproto(fn)); for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); @@ -228,7 +243,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) static void gc_marktrace(global_State *g, TraceNo traceno) { GCobj *o = obj2gco(traceref(G2J(g), traceno)); - lua_assert(traceno != G2J(g)->cur.traceno); + lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped"); if (iswhite(o)) { white2gray(o); setgcrefr(o->gch.gclist, g->gc.gray); @@ -309,7 +324,7 @@ static size_t propagatemark(global_State *g) { GCobj *o = gcref(g->gc.gray); int gct = o->gch.gct; - lua_assert(isgray(o)); + lj_assertG(isgray(o), "propagation of non-gray object"); gray2black(o); setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ if (LJ_LIKELY(gct == ~LJ_TTAB)) { @@ -341,7 +356,7 @@ static size_t propagatemark(global_State *g) return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); #else - lua_assert(0); + lj_assertG(0, "bad GC type %d", gct); return 0; #endif } @@ -395,11 +410,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ gc_fullsweep(g, &gco2th(o)->openupval); if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ - lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead object"); makewhite(g, o); /* Value is alive, change to the current white. */ p = &o->gch.nextgc; } else { /* Otherwise value is dead, free it. */ - lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive object"); setgcrefr(*p, o->gch.nextgc); if (o == gcref(g->gc.root)) setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ @@ -409,6 +426,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) return p; } +/* Sweep one string interning table chain. Preserves hashalg bit. */ +static void gc_sweepstr(global_State *g, GCRef *chain) +{ + /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ + int ow = otherwhite(g); + uintptr_t u = gcrefu(*chain); + GCRef q; + GCRef *p = &q; + GCobj *o; + setgcrefp(q, (u & ~(uintptr_t)1)); + while ((o = gcref(*p)) != NULL) { + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead string"); + makewhite(g, o); /* String is alive, change to the current white. */ + p = &o->gch.nextgc; + } else { /* Otherwise string is dead, free it. */ + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive string"); + setgcrefr(*p, o->gch.nextgc); + lj_str_free(g, gco2str(o)); + } + } + setgcrefp(*chain, (gcrefu(q) | (u & 1))); +} + /* Check whether we can clear a key or a value slot from a table. */ static int gc_mayclear(cTValue *o, int val) { @@ -426,11 +469,12 @@ static int gc_mayclear(cTValue *o, int val) } /* Clear collected entries from weak tables. */ -static void gc_clearweak(GCobj *o) +static void gc_clearweak(global_State *g, GCobj *o) { + UNUSED(g); while (o) { GCtab *t = gco2tab(o); - lua_assert((t->marked & LJ_GC_WEAK)); + lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table"); if ((t->marked & LJ_GC_WEAKVAL)) { MSize i, asize = t->asize; for (i = 0; i < asize; i++) { @@ -466,6 +510,7 @@ static void gc_call_finalizer(global_State *g, lua_State *L, TValue *top; lj_trace_abort(g); hook_entergc(g); /* Disable hooks and new traces during __gc. */ + if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ top = L->top; copyTV(L, top++, mo); @@ -474,9 +519,15 @@ static void gc_call_finalizer(global_State *g, lua_State *L, L->top = top+1; errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ hook_restore(g, oldh); + if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = oldt; /* Restore GC threshold. */ - if (errcode) - lj_err_throw(L, errcode); /* Propagate errors. */ + if (errcode) { + ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ + lj_vmevent_send(L, ERRFIN, + copyTV(L, L->top++, restorestack(L, errobj)); + ); + L->top--; + } } /* Finalize one userdata or cdata object from the mmudata list. */ @@ -485,7 +536,7 @@ static void gc_finalize(lua_State *L) global_State *g = G(L); GCobj *o = gcnext(gcref(g->gc.mmudata)); cTValue *mo; - lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */ + lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace"); /* Unchain from list of userdata to be finalized. */ if (o == gcref(g->gc.mmudata)) setgcrefnull(g->gc.mmudata); @@ -501,9 +552,8 @@ static void gc_finalize(lua_State *L) o->gch.marked &= (uint8_t)~LJ_GC_CDATA_FIN; /* Resolve finalizer. */ setcdataV(L, &tmp, gco2cd(o)); - tv = lj_tab_set(L, ctype_ctsG(g)->finalizer, &tmp); + tv = lj_tab_set(L, tabref(g->gcroot[GCROOT_FFI_FIN]), &tmp); if (!tvisnil(tv)) { - g->gc.nocdatafin = 0; copyTV(L, &tmp, tv); setnilV(tv); /* Clear entry in finalizer table. */ gc_call_finalizer(g, L, &tmp, o); @@ -533,36 +583,32 @@ void lj_gc_finalize_udata(lua_State *L) void lj_gc_finalize_cdata(lua_State *L) { global_State *g = G(L); - CTState *cts = ctype_ctsG(g); - if (cts) { - GCtab *t = cts->finalizer; - Node *node = noderef(t->node); - ptrdiff_t i; - setgcrefnull(t->metatable); /* Mark finalizer table as disabled. */ - for (i = (ptrdiff_t)t->hmask; i >= 0; i--) - if (!tvisnil(&node[i].val) && tviscdata(&node[i].key)) { - GCobj *o = gcV(&node[i].key); - TValue tmp; - makewhite(g, o); - o->gch.marked &= (uint8_t)~LJ_GC_CDATA_FIN; - copyTV(L, &tmp, &node[i].val); - setnilV(&node[i].val); - gc_call_finalizer(g, L, &tmp, o); - } - } + GCtab *t = tabref(g->gcroot[GCROOT_FFI_FIN]); + Node *node = noderef(t->node); + ptrdiff_t i; + setgcrefnull(t->metatable); /* Mark finalizer table as disabled. */ + for (i = (ptrdiff_t)t->hmask; i >= 0; i--) + if (!tvisnil(&node[i].val) && tviscdata(&node[i].key)) { + GCobj *o = gcV(&node[i].key); + TValue tmp; + makewhite(g, o); + o->gch.marked &= (uint8_t)~LJ_GC_CDATA_FIN; + copyTV(L, &tmp, &node[i].val); + setnilV(&node[i].val); + gc_call_finalizer(g, L, &tmp, o); + } } #endif /* Free all remaining GC objects. */ void lj_gc_freeall(global_State *g) { - MSize i, strmask; + MSize i; /* Free everything, except super-fixed objects (the main thread). */ g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; gc_fullsweep(g, &g->gc.root); - strmask = g->strmask; - for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ - gc_fullsweep(g, &g->strhash[i]); + for (i = g->str.mask; i != ~(MSize)0; i--) /* Free all string hash chains. */ + gc_sweepstr(g, &g->str.tab[i]); } /* -- Collector ----------------------------------------------------------- */ @@ -577,7 +623,7 @@ static void atomic(global_State *g, lua_State *L) setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ setgcrefnull(g->gc.weak); - lua_assert(!iswhite(obj2gco(mainthread(g)))); + lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white"); gc_markobj(g, L); /* Mark running thread. */ gc_traverse_curtrace(g); /* Traverse current trace. */ gc_mark_gcroot(g); /* Mark GC roots (again). */ @@ -592,7 +638,7 @@ static void atomic(global_State *g, lua_State *L) udsize += gc_propagate_gray(g); /* And propagate the marks. */ /* All marking done, clear weak tables. */ - gc_clearweak(gcref(g->gc.weak)); + gc_clearweak(g, gcref(g->gc.weak)); lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */ @@ -625,26 +671,23 @@ static size_t gc_onestep(lua_State *L) return 0; case GCSsweepstring: { GCSize old = g->gc.total; - gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ - if (g->gc.sweepstr > g->strmask) + gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */ + if (g->gc.sweepstr > g->str.mask) g->gc.state = GCSsweep; /* All string hash chains sweeped. */ - lua_assert(old >= g->gc.total); + lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; return GCSWEEPCOST; } case GCSsweep: { GCSize old = g->gc.total; setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); - lua_assert(old >= g->gc.total); + lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { - if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) - lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ + if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1) + lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */ if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ g->gc.state = GCSfinalize; -#if LJ_HASFFI - g->gc.nocdatafin = 1; -#endif } else { /* Otherwise skip this phase to help the JIT. */ g->gc.state = GCSpause; /* End of GC cycle. */ g->gc.debt = 0; @@ -654,21 +697,21 @@ static size_t gc_onestep(lua_State *L) } case GCSfinalize: if (gcref(g->gc.mmudata) != NULL) { + GCSize old = g->gc.total; if (tvref(g->jit_base)) /* Don't call finalizers on trace. */ return LJ_MAX_MEM; gc_finalize(L); /* Finalize one userdata object. */ + if (old >= g->gc.total && g->gc.estimate > old - g->gc.total) + g->gc.estimate -= old - g->gc.total; if (g->gc.estimate > GCFINALIZECOST) g->gc.estimate -= GCFINALIZECOST; return GCFINALIZECOST; } -#if LJ_HASFFI - if (!g->gc.nocdatafin) lj_tab_rehash(L, ctype_ctsG(g)->finalizer); -#endif g->gc.state = GCSpause; /* End of GC cycle. */ g->gc.debt = 0; return 0; default: - lua_assert(0); + lj_assertG(0, "bad GC state"); return 0; } } @@ -742,7 +785,8 @@ void lj_gc_fullgc(lua_State *L) } while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) gc_onestep(L); /* Finish sweep. */ - lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause); + lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause, + "bad GC state"); /* Now perform a full GC. */ g->gc.state = GCSpause; do { gc_onestep(L); } while (g->gc.state != GCSpause); @@ -755,9 +799,11 @@ void lj_gc_fullgc(lua_State *L) /* Move the GC propagation frontier forward. */ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) { - lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); - lua_assert(o->gch.gct != ~LJ_TTAB); + lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o), + "bad object states for forward barrier"); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); + lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table"); /* Preserve invariant during propagation. Otherwise it doesn't matter. */ if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) gc_mark(g, v); /* Move frontier forward. */ @@ -794,7 +840,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv) lj_gc_barrierf(g, o, gcV(&uv->tv)); } else { makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); } } } @@ -814,12 +861,13 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno) void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz) { global_State *g = G(L); - lua_assert((osz == 0) == (p == NULL)); + lj_assertG((osz == 0) == (p == NULL), "realloc API violation"); p = g->allocf(g->allocd, p, osz, nsz); if (p == NULL && nsz > 0) lj_err_mem(L); - lua_assert((nsz == 0) == (p == NULL)); - lua_assert(checkptrGC(p)); + lj_assertG((nsz == 0) == (p == NULL), "allocf API violation"); + lj_assertG(checkptrGC(p), + "allocated memory address %p outside required range", p); g->gc.total = (g->gc.total - osz) + nsz; return p; } @@ -831,7 +879,8 @@ void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size) GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); if (o == NULL) lj_err_mem(L); - lua_assert(checkptrGC(o)); + lj_assertG(checkptrGC(o), + "allocated memory address %p outside required range", o); g->gc.total += size; setgcrefr(o->gch.nextgc, g->gc.root); setgcref(g->gc.root, o); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_gc.h b/source/libs/luajit/LuaJIT-src/src/lj_gc.h index 669bbe9240385262008970447f0cee6c37b5552f..b44095d32cad9a85d66ddf8784c8bc3a811ef196 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_gc.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_gc.h @@ -1,6 +1,6 @@ /* ** Garbage collector. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_GC_H @@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno); static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) { GCobj *o = obj2gco(t); - lua_assert(isblack(o) && !isdead(g, o)); - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lj_assertG(isblack(o) && !isdead(g, o), + "bad object states for backward barrier"); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); black2gray(o); setgcrefr(t->gclist, g->gc.grayagain); setgcref(g->gc.grayagain, o); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.c b/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.c index c219ffac0a897f2551c8f7786aa5d07ba83fe43f..9e68932a8cb16fa0864de806a02846e8d5384f9a 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.c @@ -1,6 +1,6 @@ /* ** Client for the GDB JIT API. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_gdbjit_c @@ -363,7 +363,7 @@ static const ELFheader elfhdr_template = { .eosabi = 12, #elif defined(__DragonFly__) .eosabi = 0, -#elif (defined(__sun__) && defined(__svr4__)) +#elif LJ_TARGET_SOLARIS .eosabi = 6, #else .eosabi = 0, @@ -637,7 +637,7 @@ static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx) DUV(DW_AT_low_pc); DUV(DW_FORM_addr); DUV(DW_AT_high_pc); DUV(DW_FORM_addr); DUV(DW_AT_stmt_list); DUV(DW_FORM_data4); - DB(0); DB(0); + DB(0); DB(0); DB(0); ctx->p = p; } @@ -724,7 +724,7 @@ static void gdbjit_buildobj(GDBJITctx *ctx) SECTALIGN(ctx->p, sizeof(uintptr_t)); gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); - lua_assert(ctx->objsize < sizeof(GDBJITobj)); + lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow"); } #undef SECTALIGN @@ -782,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T) ctx.spadjp = CFRAME_SIZE_JIT + (MSize)(parent ? traceref(J, parent)->spadjust : 0); ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); + lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, + "start PC out of range"); ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); ctx.filename = proto_chunknamestr(pt); if (*ctx.filename == '@' || *ctx.filename == '=') diff --git a/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.h b/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.h index bbaa1568c4474fec79b905d94c013653a8847492..23b3daa7ffd739ef9834b97ffc34b811a635c84b 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_gdbjit.h @@ -1,6 +1,6 @@ /* ** Client for the GDB JIT API. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_GDBJIT_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ir.c b/source/libs/luajit/LuaJIT-src/src/lj_ir.c index 5baece67e63e875746cc823eabc3259ec92bc3da..e7a5e8bc0943781c661509582c6523427d01dba1 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ir.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_ir.c @@ -1,6 +1,6 @@ /* ** SSA IR (Intermediate Representation) emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_ir_c @@ -30,15 +30,16 @@ #endif #include "lj_vm.h" #include "lj_strscan.h" +#include "lj_serialize.h" #include "lj_strfmt.h" -#include "lj_lib.h" +#include "lj_prng.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) /* Pass IR on to next optimization in chain (FOLD). */ -#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) /* -- IR tables ----------------------------------------------------------- */ @@ -90,8 +91,9 @@ static void lj_ir_growbot(jit_State *J) { IRIns *baseir = J->irbuf + J->irbotlim; MSize szins = J->irtoplim - J->irbotlim; - lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); + lj_assertJ(szins != 0, "zero IR size"); + lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim, + "unexpected IR growth"); if (J->cur.nins + (szins >> 1) < J->irtoplim) { /* More than half of the buffer is free on top: shift up by a quarter. */ MSize ofs = szins >> 2; @@ -146,11 +148,12 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) } /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ -LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) +TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) { - lua_assert((ofs & 3) == 0); + lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset"); ofs >>= 2; - lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit. */ + lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff, + "GG_State field offset breaks 10 bit FOLD key limit"); lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); return lj_opt_fold(J); } @@ -181,7 +184,7 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) static LJ_AINLINE IRRef ir_nextk64(jit_State *J) { IRRef ref = J->cur.nk - 2; - lua_assert(J->state != LJ_TRACE_ASM); + lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state"); if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); J->cur.nk = ref; return ref; @@ -277,7 +280,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert(!isdead(J2G(J), o)); + lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object"); for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) if (ir_kgc(&cir[ref]) == o) goto found; @@ -299,7 +302,7 @@ TRef lj_ir_ktrace(jit_State *J) { IRRef ref = ir_nextkgc(J); IRIns *ir = IR(ref); - lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); + lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping"); ir->t.irt = IRT_P64; ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ ir->op12 = 0; @@ -313,7 +316,7 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) IRIns *ir, *cir = J->cur.ir; IRRef ref; #if LJ_64 && !LJ_GC64 - lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); + lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer"); #endif for (ref = J->chain[op]; ref; ref = cir[ref].prev) if (ir_kptr(&cir[ref]) == ptr) @@ -360,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); IRRef ref; /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ - lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); + lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot, + "out-of-range key/slot"); for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) if (cir[ref].op12 == op12) goto found; @@ -381,13 +385,15 @@ found: void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) { UNUSED(L); - lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ + lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */ switch (ir->o) { case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; case IR_KINT: setintV(tv, ir->i); break; case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; - case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; - case IR_KNULL: setlightudV(tv, NULL); break; + case IR_KPTR: case IR_KKPTR: + setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir)); + break; + case IR_KNULL: setintV(tv, 0); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; #if LJ_HASFFI case IR_KINT64: { @@ -397,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) break; } #endif - default: lua_assert(0); break; + default: lj_assertL(0, "bad IR constant op %d", ir->o); break; } } @@ -457,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op) case IR_UGE: return !(a < b); case IR_ULE: return !(a > b); case IR_UGT: return !(a <= b); - default: lua_assert(0); return 0; + default: lj_assertX(0, "bad IR op %d", op); return 0; } } @@ -470,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op) case IR_GE: return (res >= 0); case IR_LE: return (res <= 0); case IR_GT: return (res > 0); - default: lua_assert(0); return 0; + default: lj_assertX(0, "bad IR op %d", op); return 0; } } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ir.h b/source/libs/luajit/LuaJIT-src/src/lj_ir.h index 34c27853948ca787d94068cb19ef872546188cf2..06458f208b935522719473073f45c9cc3ff8bec9 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ir.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_ir.h @@ -1,6 +1,6 @@ /* ** SSA IR (Intermediate Representation) format. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IR_H @@ -75,10 +75,9 @@ _(NEG, N , ref, ref) \ \ _(ABS, N , ref, ref) \ - _(ATAN2, N , ref, ref) \ _(LDEXP, N , ref, ref) \ - _(MIN, C , ref, ref) \ - _(MAX, C , ref, ref) \ + _(MIN, N , ref, ref) \ + _(MAX, N , ref, ref) \ _(FPMATH, N , ref, lit) \ \ /* Overflow-checking arithmetic ops. */ \ @@ -96,6 +95,7 @@ _(UREFO, LW, ref, lit) \ _(UREFC, LW, ref, lit) \ _(FREF, R , ref, lit) \ + _(TMPREF, S , ref, lit) \ _(STRREF, N , ref, ref) \ _(LREF, L , ___, ___) \ \ @@ -106,7 +106,8 @@ _(FLOAD, L , ref, lit) \ _(XLOAD, L , ref, lit) \ _(SLOAD, L , lit, lit) \ - _(VLOAD, L , ref, ___) \ + _(VLOAD, L , ref, lit) \ + _(ALEN, L , ref, ref) \ \ _(ASTORE, S , ref, ref) \ _(HSTORE, S , ref, ref) \ @@ -124,8 +125,8 @@ \ /* Buffer operations. */ \ _(BUFHDR, L , ref, lit) \ - _(BUFPUT, L , ref, ref) \ - _(BUFSTR, A , ref, ref) \ + _(BUFPUT, LW, ref, ref) \ + _(BUFSTR, AW, ref, ref) \ \ /* Barriers. */ \ _(TBAR, S , ref, ___) \ @@ -133,15 +134,15 @@ _(XBAR, S , ___, ___) \ \ /* Type conversions. */ \ - _(CONV, NW, ref, lit) \ + _(CONV, N , ref, lit) \ _(TOBIT, N , ref, ref) \ _(TOSTR, N , ref, lit) \ _(STRTO, N , ref, ___) \ \ /* Calls. */ \ - _(CALLN, N , ref, lit) \ - _(CALLA, A , ref, lit) \ - _(CALLL, L , ref, lit) \ + _(CALLN, NW, ref, lit) \ + _(CALLA, AW, ref, lit) \ + _(CALLL, LW, ref, lit) \ _(CALLS, S , ref, lit) \ _(CALLXS, S , ref, ref) \ _(CARG, N , ref, ref) \ @@ -178,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE); /* FPMATH sub-functions. ORDER FPM. */ #define IRFPMDEF(_) \ _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ - _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ - _(SIN) _(COS) _(TAN) \ + _(SQRT) _(LOG) _(LOG2) \ _(OTHER) typedef enum { @@ -205,9 +205,15 @@ IRFPMDEF(FPMENUM) _(UDATA_META, offsetof(GCudata, metatable)) \ _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ _(UDATA_FILE, sizeof(GCudata)) \ + _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \ + _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \ + _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \ + _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \ + _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \ + _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \ _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ _(CDATA_PTR, sizeof(GCcdata)) \ - _(CDATA_INT, sizeof(GCcdata)) \ + _(CDATA_INT, sizeof(GCcdata)) \ _(CDATA_INT64, sizeof(GCcdata)) \ _(CDATA_INT64_4, sizeof(GCcdata) + 4) @@ -218,6 +224,11 @@ IRFLDEF(FLENUM) IRFL__MAX } IRFieldID; +/* TMPREF mode bits, stored in op2. */ +#define IRTMPREF_IN1 0x01 /* First input value. */ +#define IRTMPREF_OUT1 0x02 /* First output value. */ +#define IRTMPREF_OUT2 0x04 /* Second output value. */ + /* SLOAD mode bits, stored in op2. */ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ #define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ @@ -225,15 +236,17 @@ IRFLDEF(FLENUM) #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ +#define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */ -/* XLOAD mode, stored in op2. */ -#define IRXLOAD_READONLY 1 /* Load from read-only data. */ -#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ -#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ +/* XLOAD mode bits, stored in op2. */ +#define IRXLOAD_READONLY 0x01 /* Load from read-only data. */ +#define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */ +#define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */ /* BUFHDR mode, stored in op2. */ #define IRBUFHDR_RESET 0 /* Reset buffer. */ #define IRBUFHDR_APPEND 1 /* Append to buffer. */ +#define IRBUFHDR_WRITE 2 /* Write to string buffer. */ /* CONV mode, stored in op2. */ #define IRCONV_SRCMASK 0x001f /* Source IRType. */ @@ -250,6 +263,7 @@ IRFLDEF(FLENUM) #define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ +#define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */ /* TOSTR mode, stored in op2. */ #define IRTOSTR_INT 0 /* Convert integer to string. */ @@ -369,6 +383,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isu32(t) (irt_type(t) == IRT_U32) #define irt_isi64(t) (irt_type(t) == IRT_I64) #define irt_isu64(t) (irt_type(t) == IRT_U64) +#define irt_isp32(t) (irt_type(t) == IRT_P32) #define irt_isfp(t) (irt_isnum(t) || irt_isfloat(t)) #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) @@ -377,10 +392,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) #if LJ_GC64 +/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ #define IRT_IS64 \ ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\ (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\ - (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)) + (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\ + (1u<<IRT_NIL)) #elif LJ_64 #define IRT_IS64 \ ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) @@ -412,11 +429,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) static LJ_AINLINE uint32_t irt_toitype_(IRType t) { - lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); + lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD, + "no plain type tag for lightuserdata"); if (LJ_DUALNUM && t > IRT_NUM) { return LJ_TISNUM; } else { - lua_assert(t <= IRT_NUM); + lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t); return ~(uint32_t)t; } } @@ -479,6 +497,7 @@ typedef uint32_t TRef; #define TREF_REFMASK 0x0000ffff #define TREF_FRAME 0x00010000 #define TREF_CONT 0x00020000 +#define TREF_KEYINDEX 0x00100000 #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) @@ -560,6 +579,11 @@ typedef union IRIns { TValue tv; /* TValue constant (overlaps entire slot). */ } IRIns; +#define ir_isk64(ir) \ + ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ + (LJ_GC64 && \ + ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR))) + #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) @@ -567,12 +591,7 @@ typedef union IRIns { #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) -#define ir_k64(ir) \ - check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ - (LJ_GC64 && \ - ((ir)->o == IR_KGC || \ - (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ - &(ir)[1].tv) +#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv) #define ir_kptr(ir) \ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ mref((ir)[LJ_GC64].ptr, void)) @@ -585,4 +604,12 @@ static LJ_AINLINE int ir_sideeff(IRIns *ir) LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W); +/* Replace IR instruction with NOP. */ +static LJ_AINLINE void lj_ir_nop(IRIns *ir) +{ + ir->ot = IRT(IR_NOP, IRT_NIL); + ir->op1 = ir->op2 = 0; + ir->prev = 0; +} + #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_ircall.h b/source/libs/luajit/LuaJIT-src/src/lj_ircall.h index 973c36e6ec96a13aee4ee030d934f57db40a3287..5196144e489870548b098ce2e73c714ee13e9a48 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_ircall.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_ircall.h @@ -1,6 +1,6 @@ /* ** IR CALL* instruction definitions. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IRCALL_H @@ -21,6 +21,7 @@ typedef struct CCallInfo { #define CCI_OTSHIFT 16 #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ +#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE) #define CCI_OPSHIFT 24 #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ @@ -29,10 +30,12 @@ typedef struct CCallInfo { #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) +#define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL) #define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL) #define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL) /* C call info flags. */ +#define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */ #define CCI_L 0x0100 /* Implicit L arg. */ #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ @@ -51,7 +54,7 @@ typedef struct CCallInfo { #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) #define CCI_XA (1u << CCI_XARGS_SHIFT) -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) #else #define CCI_XNARGS(ci) CCI_NARGS((ci)) @@ -60,7 +63,7 @@ typedef struct CCallInfo { /* Helpers for conditional function definitions. */ #define IRCALLCOND_ANY(x) x -#if LJ_TARGET_X86ORX64 +#if LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 #define IRCALLCOND_FPMATH(x) NULL #else #define IRCALLCOND_FPMATH(x) x @@ -78,13 +81,19 @@ typedef struct CCallInfo { #define IRCALLCOND_SOFTFP_FFI(x) NULL #endif -#if LJ_SOFTFP && LJ_TARGET_MIPS32 +#if LJ_SOFTFP && LJ_TARGET_MIPS #define IRCALLCOND_SOFTFP_MIPS(x) x #else #define IRCALLCOND_SOFTFP_MIPS(x) NULL #endif -#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) +#if LJ_SOFTFP && LJ_TARGET_MIPS64 +#define IRCALLCOND_SOFTFP_MIPS64(x) x +#else +#define IRCALLCOND_SOFTFP_MIPS64(x) NULL +#endif + +#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) #define IRCALLCOND_FP64_FFI(x) x @@ -104,6 +113,18 @@ typedef struct CCallInfo { #define IRCALLCOND_FFI32(x) NULL #endif +#if LJ_HASBUFFER +#define IRCALLCOND_BUFFER(x) x +#else +#define IRCALLCOND_BUFFER(x) NULL +#endif + +#if LJ_HASBUFFER && LJ_HASFFI +#define IRCALLCOND_BUFFFI(x) x +#else +#define IRCALLCOND_BUFFFI(x) NULL +#endif + #if LJ_SOFTFP #define XA_FP CCI_XA #define XA2_FP (CCI_XA+CCI_XA) @@ -112,6 +133,14 @@ typedef struct CCallInfo { #define XA2_FP 0 #endif +#if LJ_SOFTFP32 +#define XA_FP32 CCI_XA +#define XA2_FP32 (CCI_XA+CCI_XA) +#else +#define XA_FP32 0 +#define XA2_FP32 0 +#endif + #if LJ_32 #define XA_64 CCI_XA #define XA2_64 (CCI_XA+CCI_XA) @@ -124,40 +153,57 @@ typedef struct CCallInfo { #define IRCALLDEF(_) \ _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ _(ANY, lj_str_find, 4, N, PGC, 0) \ - _(ANY, lj_str_new, 3, S, STR, CCI_L) \ + _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \ _(ANY, lj_strscan_num, 2, FN, INT, 0) \ - _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \ - _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \ - _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \ - _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \ - _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \ - _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \ - _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \ - _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \ - _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \ - _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \ - _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \ - _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \ - _(ANY, lj_buf_putmem, 3, S, PGC, 0) \ - _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \ - _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \ - _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \ - _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \ - _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \ - _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \ - _(ANY, lj_buf_puttab, 5, L, PGC, 0) \ - _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ - _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \ - _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ - _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ + _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \ + _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \ + _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \ + _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \ + _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \ + _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \ + _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \ + _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \ + _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \ + _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \ + _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \ + _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \ + _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \ + _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \ + _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \ + _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \ + _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \ + _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \ + _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \ + _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \ + _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \ + _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \ + _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \ + _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \ + _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ - _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ + _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \ + _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \ + _(ANY, lj_vm_next, 2, FL, PTR, 0) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \ + _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ - _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ - _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ + _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \ + _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \ _(ANY, lj_vm_modi, 2, FN, INT, 0) \ + _(ANY, log10, 1, N, NUM, XA_FP) \ + _(ANY, exp, 1, N, NUM, XA_FP) \ + _(ANY, sin, 1, N, NUM, XA_FP) \ + _(ANY, cos, 1, N, NUM, XA_FP) \ + _(ANY, tan, 1, N, NUM, XA_FP) \ + _(ANY, asin, 1, N, NUM, XA_FP) \ + _(ANY, acos, 1, N, NUM, XA_FP) \ + _(ANY, atan, 1, N, NUM, XA_FP) \ _(ANY, sinh, 1, N, NUM, XA_FP) \ _(ANY, cosh, 1, N, NUM, XA_FP) \ _(ANY, tanh, 1, N, NUM, XA_FP) \ @@ -169,32 +215,26 @@ typedef struct CCallInfo { _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ - _(ANY, exp, 1, N, NUM, XA_FP) \ - _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \ _(ANY, log, 1, N, NUM, XA_FP) \ _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ - _(ANY, log10, 1, N, NUM, XA_FP) \ - _(ANY, sin, 1, N, NUM, XA_FP) \ - _(ANY, cos, 1, N, NUM, XA_FP) \ - _(ANY, tan, 1, N, NUM, XA_FP) \ - _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ _(ANY, pow, 2, N, NUM, XA2_FP) \ _(ANY, atan2, 2, N, NUM, XA2_FP) \ _(ANY, ldexp, 2, N, NUM, XA_FP) \ - _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ - _(SOFTFP, softfp_add, 4, N, NUM, 0) \ - _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ - _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ - _(SOFTFP, softfp_div, 4, N, NUM, 0) \ - _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ + _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ + _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ - _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ - _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \ - _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \ + _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ + _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ + _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ + _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ - _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ - _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ + _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ + _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ @@ -272,7 +312,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; #define fp64_f2l __aeabi_f2lz #define fp64_f2ul __aeabi_f2ulz #endif -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS || LJ_TARGET_PPC #define softfp_add __adddf3 #define softfp_sub __subdf3 #define softfp_mul __muldf3 @@ -315,7 +355,7 @@ extern double lj_vm_sfmax(double a, double b); #endif #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #define fp64_l2d __floatdidf #define fp64_ul2d __floatundidf #define fp64_l2f __floatdisf diff --git a/source/libs/luajit/LuaJIT-src/src/lj_iropt.h b/source/libs/luajit/LuaJIT-src/src/lj_iropt.h index 73aef0ef38e95268d794f3d89c05c0254e52f967..b9fb13ee5f37a872d8fa52175149d538d03dfb1b 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_iropt.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_iropt.h @@ -1,6 +1,6 @@ /* ** Common header for IR emitter and optimizations. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_IROPT_H @@ -56,6 +56,12 @@ LJ_FUNC TRef lj_ir_ktrace(jit_State *J); #define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k)) #endif +#if LJ_GC64 +#define lj_ir_kintpgc lj_ir_kintp +#else +#define lj_ir_kintpgc lj_ir_kint +#endif + static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) { TValue tv; @@ -120,10 +126,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); +LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim); LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); /* Dead-store elimination. */ @@ -144,13 +151,12 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); -LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ LJ_FUNC void lj_opt_dce(jit_State *J); LJ_FUNC int lj_opt_loop(jit_State *J); -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) LJ_FUNC void lj_opt_split(jit_State *J); #else #define lj_opt_split(J) UNUSED(J) diff --git a/source/libs/luajit/LuaJIT-src/src/lj_jit.h b/source/libs/luajit/LuaJIT-src/src/lj_jit.h index 92054e3df60067d0ef523f3fff3b935e5db84d8c..102ba0b4b784be4641f85c564a13ed8f146c06ee 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_jit.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_jit.h @@ -1,83 +1,97 @@ /* ** Common definitions for the JIT compiler. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_JIT_H #define _LJ_JIT_H #include "lj_obj.h" +#if LJ_HASJIT #include "lj_ir.h" -/* JIT engine flags. */ +/* -- JIT engine flags ---------------------------------------------------- */ + +/* General JIT engine flags. 4 bits. */ #define JIT_F_ON 0x00000001 -/* CPU-specific JIT engine flags. */ +/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ +#define JIT_F_CPU 0x00000010 + #if LJ_TARGET_X86ORX64 -#define JIT_F_SSE2 0x00000010 -#define JIT_F_SSE3 0x00000020 -#define JIT_F_SSE4_1 0x00000040 -#define JIT_F_PREFER_IMUL 0x00000080 -#define JIT_F_LEA_AGU 0x00000100 -#define JIT_F_BMI2 0x00000200 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SSE2 -#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" + +#define JIT_F_SSE3 (JIT_F_CPU << 0) +#define JIT_F_SSE4_1 (JIT_F_CPU << 1) +#define JIT_F_BMI2 (JIT_F_CPU << 2) + + +#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" + #elif LJ_TARGET_ARM -#define JIT_F_ARMV6_ 0x00000010 -#define JIT_F_ARMV6T2_ 0x00000020 -#define JIT_F_ARMV7 0x00000040 -#define JIT_F_VFPV2 0x00000080 -#define JIT_F_VFPV3 0x00000100 - -#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) + +#define JIT_F_ARMV6_ (JIT_F_CPU << 0) +#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) +#define JIT_F_ARMV7 (JIT_F_CPU << 2) +#define JIT_F_ARMV8 (JIT_F_CPU << 3) +#define JIT_F_VFPV2 (JIT_F_CPU << 4) +#define JIT_F_VFPV3 (JIT_F_CPU << 5) + +#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) +#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_ARMV6_ -#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" +#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" + #elif LJ_TARGET_PPC -#define JIT_F_SQRT 0x00000010 -#define JIT_F_ROUND 0x00000020 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SQRT +#define JIT_F_SQRT (JIT_F_CPU << 0) +#define JIT_F_ROUND (JIT_F_CPU << 1) + #define JIT_F_CPUSTRING "\4SQRT\5ROUND" + #elif LJ_TARGET_MIPS -#define JIT_F_MIPSXXR2 0x00000010 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 +#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) + #if LJ_TARGET_MIPS32 +#if LJ_TARGET_MIPSR6 +#define JIT_F_CPUSTRING "\010MIPS32R6" +#else #define JIT_F_CPUSTRING "\010MIPS32R2" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define JIT_F_CPUSTRING "\010MIPS64R6" #else #define JIT_F_CPUSTRING "\010MIPS64R2" #endif +#endif + #else -#define JIT_F_CPU_FIRST 0 + #define JIT_F_CPUSTRING "" + #endif -/* Optimization flags. */ +/* Optimization flags. 12 bits. */ +#define JIT_F_OPT 0x00010000 #define JIT_F_OPT_MASK 0x0fff0000 -#define JIT_F_OPT_FOLD 0x00010000 -#define JIT_F_OPT_CSE 0x00020000 -#define JIT_F_OPT_DCE 0x00040000 -#define JIT_F_OPT_FWD 0x00080000 -#define JIT_F_OPT_DSE 0x00100000 -#define JIT_F_OPT_NARROW 0x00200000 -#define JIT_F_OPT_LOOP 0x00400000 -#define JIT_F_OPT_ABC 0x00800000 -#define JIT_F_OPT_SINK 0x01000000 -#define JIT_F_OPT_FUSE 0x02000000 +#define JIT_F_OPT_FOLD (JIT_F_OPT << 0) +#define JIT_F_OPT_CSE (JIT_F_OPT << 1) +#define JIT_F_OPT_DCE (JIT_F_OPT << 2) +#define JIT_F_OPT_FWD (JIT_F_OPT << 3) +#define JIT_F_OPT_DSE (JIT_F_OPT << 4) +#define JIT_F_OPT_NARROW (JIT_F_OPT << 5) +#define JIT_F_OPT_LOOP (JIT_F_OPT << 6) +#define JIT_F_OPT_ABC (JIT_F_OPT << 7) +#define JIT_F_OPT_SINK (JIT_F_OPT << 8) +#define JIT_F_OPT_FUSE (JIT_F_OPT << 9) +#define JIT_F_OPT_FMA (JIT_F_OPT << 10) /* Optimizations names for -O. Must match the order above. */ -#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD #define JIT_F_OPTSTRING \ - "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" + "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse\3fma" /* Optimization levels set a fixed combination of flags. */ #define JIT_F_OPT_0 0 @@ -86,9 +100,12 @@ #define JIT_F_OPT_3 (JIT_F_OPT_2|\ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 +/* Note: FMA is not set by default. */ + +/* -- JIT engine parameters ----------------------------------------------- */ #if LJ_TARGET_WINDOWS || LJ_64 -/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ +/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */ #define JIT_P_sizemcode_DEFAULT 64 #else /* Could go as low as 4K, but the mmap() overhead would be rather high. */ @@ -129,11 +146,14 @@ JIT_PARAMDEF(JIT_PARAMENUM) #define JIT_PARAMSTR(len, name, value) #len #name #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) +/* -- JIT engine data structures ------------------------------------------ */ + /* Trace compiler state. */ typedef enum { LJ_TRACE_IDLE, /* Trace compiler idle. */ LJ_TRACE_ACTIVE = 0x10, LJ_TRACE_RECORD, /* Bytecode recording active. */ + LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */ LJ_TRACE_START, /* New trace started. */ LJ_TRACE_END, /* End of trace. */ LJ_TRACE_ASM, /* Assemble trace. */ @@ -158,10 +178,17 @@ typedef uint8_t MCode; typedef uint32_t MCode; #endif +/* Linked list of MCode areas. */ +typedef struct MCLink { + MCode *next; /* Next area. */ + size_t size; /* Size of current area. */ +} MCLink; + /* Stack snapshot header. */ typedef struct SnapShot { - uint16_t mapofs; /* Offset into snapshot map. */ + uint32_t mapofs; /* Offset into snapshot map. */ IRRef1 ref; /* First IR ref for this snapshot. */ + uint16_t mcofs; /* Offset into machine code in MCode units. */ uint8_t nslots; /* Number of valid slots. */ uint8_t topslot; /* Maximum frame extent. */ uint8_t nent; /* Number of compressed entries. */ @@ -177,12 +204,15 @@ typedef uint32_t SnapEntry; #define SNAP_CONT 0x020000 /* Continuation slot. */ #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ +#define SNAP_KEYINDEX 0x100000 /* Traversal key index. */ LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); +LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ - (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) + (((SnapEntry)(slot) << 24) + \ + ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK))) #if !LJ_FR2 #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) #endif @@ -227,8 +257,7 @@ typedef enum { /* Trace object. */ typedef struct GCtrace { GCHeader; - uint8_t topslot; /* Top stack slot already checked to be allocated. */ - uint8_t linktype; /* Type of link. */ + uint16_t nsnap; /* Number of snapshots. */ IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ #if LJ_GC64 uint32_t unused_gc64; @@ -236,8 +265,7 @@ typedef struct GCtrace { GCRef gclist; IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ - uint16_t nsnap; /* Number of snapshots. */ - uint16_t nsnapmap; /* Number of snapshot map elements. */ + uint32_t nsnapmap; /* Number of snapshot map elements. */ SnapShot *snap; /* Snapshot array. */ SnapEntry *snapmap; /* Snapshot map. */ GCRef startpt; /* Starting prototype. */ @@ -245,6 +273,9 @@ typedef struct GCtrace { BCIns startins; /* Original bytecode of starting instruction. */ MSize szmcode; /* Size of machine code. */ MCode *mcode; /* Start of machine code. */ +#if LJ_ABI_PAUTH + ASMFunction mcauth; /* Start of machine code, with ptr auth applied. */ +#endif MSize mcloop; /* Offset of loop start in machine code. */ uint16_t nchild; /* Number of child traces (root trace only). */ uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */ @@ -254,6 +285,8 @@ typedef struct GCtrace { TraceNo1 nextroot; /* Next root trace for same prototype. */ TraceNo1 nextside; /* Next side trace of same root trace. */ uint8_t sinktags; /* Trace has SINK tags. */ + uint8_t topslot; /* Top stack slot already checked to be allocated. */ + uint8_t linktype; /* Type of link. */ uint8_t unused1; #ifdef LUAJIT_USE_GDBJIT void *gdbjit_entry; /* GDB JIT entry. */ @@ -344,6 +377,7 @@ enum { #endif LJ_K64__MAX, }; +#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) enum { #if LJ_TARGET_X86ORX64 @@ -362,13 +396,14 @@ enum { #endif LJ_K32__MAX }; +#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS) /* Get 16 byte aligned pointer to SIMD constant. */ #define LJ_KSIMD(J, n) \ ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) /* Set/reset flag to activate the SPLIT pass for the current trace. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) #define lj_needsplit(J) (J->needsplit = 1) #define lj_resetsplit(J) (J->needsplit = 0) #else @@ -416,13 +451,17 @@ typedef struct jit_State { int32_t framedepth; /* Current frame depth. */ int32_t retdepth; /* Return frame depth (count of RETF). */ +#if LJ_K32__USED + uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */ +#endif TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ - TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ - uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ +#if LJ_K64__USED + TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */ +#endif IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ - IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ - IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ + IRRef irtoplim; /* Upper limit of instruction buffer (biased). */ + IRRef irbotlim; /* Lower limit of instruction buffer (biased). */ IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ MSize sizesnap; /* Size of temp. snapshot buffer. */ @@ -431,7 +470,7 @@ typedef struct jit_State { MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ PostProc postproc; /* Required post-processing after execution. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) uint8_t needsplit; /* Need SPLIT pass. */ #endif uint8_t retryrec; /* Retry recording. */ @@ -450,7 +489,6 @@ typedef struct jit_State { HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ uint32_t penaltyslot; /* Round-robin index into penalty slots. */ - uint32_t prngstate; /* PRNG state. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ @@ -464,6 +502,7 @@ typedef struct jit_State { const BCIns *startpc; /* Bytecode PC of starting instruction. */ TraceNo parent; /* Parent of current side trace (0 for root traces). */ ExitNo exitno; /* Exit number in parent of current side trace. */ + int exitcode; /* Exit code from unwound trace. */ BCIns *patchpc; /* PC for pending re-patch. */ BCIns patchins; /* Instruction for pending re-patch. */ @@ -482,18 +521,13 @@ typedef struct jit_State { BCLine prev_line; /* Previous line. */ int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ #endif -} -#if LJ_TARGET_ARM -LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ -#endif -jit_State; +} jit_State; -/* Trivial PRNG e.g. used for penalty randomization. */ -static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) -{ - /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ - J->prngstate = J->prngstate * 1103515245 + 12345; - return J->prngstate >> (32-bits); -} +#ifdef LUA_USE_ASSERT +#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__) +#else +#define lj_assertJ(c, ...) ((void)J) +#endif +#endif #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_lex.c b/source/libs/luajit/LuaJIT-src/src/lj_lex.c index 2d2f8194cfd5ab28eb70816cbb5d617ae6ce15b8..a986aeb8ade65bb4e637535a7fb9e1303ae77af0 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_lex.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_lex.c @@ -1,6 +1,6 @@ /* ** Lexical analyzer. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -48,6 +48,12 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls) size_t sz; const char *p = ls->rfunc(ls->L, ls->rdata, &sz); if (p == NULL || sz == 0) return LEX_EOF; + if (sz >= LJ_MAX_BUF) { + if (sz != ~(size_t)0) lj_err_mem(ls->L); + sz = ~(uintptr_t)0 - (uintptr_t)p; + if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1; + ls->endmark = 1; + } ls->pe = p + sz; ls->p = p + 1; return (LexChar)(uint8_t)p[0]; @@ -76,7 +82,7 @@ static LJ_AINLINE LexChar lex_savenext(LexState *ls) static void lex_newline(LexState *ls) { LexChar old = ls->c; - lua_assert(lex_iseol(ls)); + lj_assertLS(lex_iseol(ls), "bad usage"); lex_next(ls); /* Skip "\n" or "\r". */ if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */ if (++ls->linenumber >= LJ_MAX_LINE) @@ -90,7 +96,7 @@ static void lex_number(LexState *ls, TValue *tv) { StrScanFmt fmt; LexChar c, xp = 'e'; - lua_assert(lj_char_isdigit(ls->c)); + lj_assertLS(lj_char_isdigit(ls->c), "bad usage"); if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x') xp = 'p'; while (lj_char_isident(ls->c) || ls->c == '.' || @@ -99,7 +105,7 @@ static void lex_number(LexState *ls, TValue *tv) lex_savenext(ls); } lex_save(ls, '\0'); - fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv, + fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv, (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); if (LJ_DUALNUM && fmt == STRSCAN_INT) { @@ -110,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv) } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; - lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); - if (!ctype_ctsG(G(L))) { - ptrdiff_t oldtop = savestack(L, L->top); - luaopen_ffi(L); /* Load FFI library on-demand. */ - L->top = restorestack(L, oldtop); - } + lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG, + "unexpected number format %d", fmt); + ctype_loadffi(L); if (fmt == STRSCAN_IMAG) { cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; @@ -127,7 +130,8 @@ static void lex_number(LexState *ls, TValue *tv) lj_parse_keepcdata(ls, tv, cd); #endif } else { - lua_assert(fmt == STRSCAN_ERROR); + lj_assertLS(fmt == STRSCAN_ERROR, + "unexpected number format %d", fmt); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } } @@ -137,8 +141,8 @@ static int lex_skipeq(LexState *ls) { int count = 0; LexChar s = ls->c; - lua_assert(s == '[' || s == ']'); - while (lex_savenext(ls) == '=') + lj_assertLS(s == '[' || s == ']', "bad usage"); + while (lex_savenext(ls) == '=' && count < 0x20000000) count++; return (ls->c == s) ? count : (-count) - 1; } @@ -172,7 +176,7 @@ static void lex_longstring(LexState *ls, TValue *tv, int sep) } } endloop: if (tv) { - GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep), + GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep), sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); setstrV(ls->L, tv, str); } @@ -278,7 +282,7 @@ static void lex_string(LexState *ls, TValue *tv) } lex_savenext(ls); /* Skip trailing delimiter. */ setstrV(ls->L, tv, - lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2)); + lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2)); } /* -- Main lexical scanner ------------------------------------------------ */ @@ -298,7 +302,7 @@ static LexToken lex_scan(LexState *ls, TValue *tv) do { lex_savenext(ls); } while (lj_char_isident(ls->c)); - s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb)); + s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb)); setstrV(ls->L, tv, s); if (s->reserved > 0) /* Reserved word? */ return TK_OFS + s->reserved; @@ -406,6 +410,8 @@ int lj_lex_setup(lua_State *L, LexState *ls) ls->lookahead = TK_eof; /* No look-ahead token. */ ls->linenumber = 1; ls->lastline = 1; + ls->endmark = 0; + ls->fr2 = LJ_FR2; /* Generate native bytecode by default. */ lex_next(ls); /* Read-ahead first char. */ if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ @@ -462,7 +468,7 @@ void lj_lex_next(LexState *ls) /* Look ahead for the next token. */ LexToken lj_lex_lookahead(LexState *ls) { - lua_assert(ls->lookahead == TK_eof); + lj_assertLS(ls->lookahead == TK_eof, "double lookahead"); ls->lookahead = lex_scan(ls, &ls->lookaheadval); return ls->lookahead; } @@ -487,7 +493,7 @@ void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...) tokstr = NULL; } else if (tok == TK_name || tok == TK_string || tok == TK_number) { lex_save(ls, '\0'); - tokstr = sbufB(&ls->sb); + tokstr = ls->sb.b; } else { tokstr = lj_lex_token2str(ls, tok); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_lex.h b/source/libs/luajit/LuaJIT-src/src/lj_lex.h index 33fa8657262781270312f672145d645d0f6e0932..a7aacb460d4332a0463f68d8e3a8a72f561acfc3 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_lex.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_lex.h @@ -1,6 +1,6 @@ /* ** Lexical analyzer. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_LEX_H @@ -73,6 +73,8 @@ typedef struct LexState { BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */ MSize sizebcstack; /* Size of bytecode stack. */ uint32_t level; /* Syntactical nesting level. */ + int endmark; /* Trust bytecode end marker, even if not at EOF. */ + int fr2; /* Generate bytecode for LJ_FR2 mode. */ } LexState; LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); @@ -83,4 +85,10 @@ LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok); LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...); LJ_FUNC void lj_lex_init(lua_State *L); +#ifdef LUA_USE_ASSERT +#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__)) +#else +#define lj_assertLS(c, ...) ((void)ls) +#endif + #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_lib.c b/source/libs/luajit/LuaJIT-src/src/lj_lib.c index b8638de6a08d8c55412faa55a4f45512692e3e64..88cb2bdd6cebd8236888f00c716d14e33e2f9d35 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_lib.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_lib.c @@ -1,6 +1,6 @@ /* ** Library function support. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_lib_c @@ -16,6 +16,9 @@ #include "lj_func.h" #include "lj_bc.h" #include "lj_dispatch.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#endif #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" @@ -59,6 +62,7 @@ static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab) ls.pe = (const char *)~(uintptr_t)0; ls.c = -1; ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE)); + ls.fr2 = LJ_FR2; ls.chunkname = name; pt = lj_bcread_proto(&ls); pt->firstline = ~(BCLine)0; @@ -263,6 +267,23 @@ GCfunc *lj_lib_checkfunc(lua_State *L, int narg) return funcV(o); } +GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua) +{ + TValue *o = L->base + narg-1; + if (L->top > o) { + if (tvisproto(o)) { + return protoV(o); + } else if (tvisfunc(o)) { + if (isluafunc(funcV(o))) + return funcproto(funcV(o)); + else if (nolua) + return NULL; + } + } + lj_err_argt(L, narg, LUA_TFUNCTION); + return NULL; /* unreachable */ +} + GCtab *lj_lib_checktab(lua_State *L, int narg) { TValue *o = L->base + narg-1; @@ -301,3 +322,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) return def; } +/* -- Strict type checks -------------------------------------------------- */ + +/* The following type checks do not coerce between strings and numbers. +** And they handle plain int64_t/uint64_t FFI numbers, too. +*/ + +#if LJ_HASBUFFER +GCstr *lj_lib_checkstrx(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING); + return strV(o); +} + +int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) +{ + TValue *o = L->base + narg-1; + lj_assertL(b >= 0, "expected range must be non-negative"); + if (o < L->top) { + if (LJ_LIKELY(tvisint(o))) { + int32_t i = intV(o); + if (i >= a && i <= b) return i; + } else if (LJ_LIKELY(tvisnum(o))) { + /* For performance reasons, this doesn't check for integerness or + ** integer overflow. Overflow detection still works, since all FPUs + ** return either MININT or MAXINT, which is then out of range. + */ + int32_t i = (int32_t)numV(o); + if (i >= a && i <= b) return i; +#if LJ_HASFFI + } else if (tviscdata(o)) { + GCcdata *cd = cdataV(o); + if (cd->ctypeid == CTID_INT64) { + int64_t i = *(int64_t *)cdataptr(cd); + if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i; + } else if (cd->ctypeid == CTID_UINT64) { + uint64_t i = *(uint64_t *)cdataptr(cd); + if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i; + } else { + goto badtype; + } +#endif + } else { + goto badtype; + } + lj_err_arg(L, narg, LJ_ERR_NUMRNG); + } +badtype: + lj_err_argt(L, narg, LUA_TNUMBER); + return 0; /* unreachable */ +} +#endif + diff --git a/source/libs/luajit/LuaJIT-src/src/lj_lib.h b/source/libs/luajit/LuaJIT-src/src/lj_lib.h index 37ec9d78007dc269c0e10f6c66c6b7c2d84b50e7..ec54885ab115bf284c9b89cbcac63ac723aecbbb 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_lib.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_lib.h @@ -1,6 +1,6 @@ /* ** Library function support. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_LIB_H @@ -42,10 +42,17 @@ LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); +LJ_FUNC GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua); LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); +#if LJ_HASBUFFER +LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg); +LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg, + int32_t a, int32_t b); +#endif + /* Avoid including lj_frame.h. */ #if LJ_GC64 #define lj_lib_upvalue(L, n) \ @@ -107,9 +114,4 @@ LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, #define LIBINIT_FFID 0xfe #define LIBINIT_END 0xff -/* Exported library functions. */ - -typedef struct RandomState RandomState; -LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); - #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_load.c b/source/libs/luajit/LuaJIT-src/src/lj_load.c index 77f8326ddbdf33c17db2ac86639a0f8e3f347ee3..638f4e1ad0deeb57b2b12ffe99cdf64f6b8841a0 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_load.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_load.c @@ -1,6 +1,6 @@ /* ** Load and dump code. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include <errno.h> @@ -34,14 +34,28 @@ static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud) UNUSED(dummy); cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ bc = lj_lex_setup(L, ls); - if (ls->mode && !strchr(ls->mode, bc ? 'b' : 't')) { - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE)); - lj_err_throw(L, LUA_ERRSYNTAX); + if (ls->mode) { + int xmode = 1; + const char *mode = ls->mode; + char c; + while ((c = *mode++)) { + if (c == (bc ? 'b' : 't')) xmode = 0; + if (c == (LJ_FR2 ? 'W' : 'X')) ls->fr2 = !LJ_FR2; + } + if (xmode) { + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE)); + lj_err_throw(L, LUA_ERRSYNTAX); + } } pt = bc ? lj_bcread(ls) : lj_parse(ls); - fn = lj_func_newL_empty(L, pt, tabref(L->env)); - /* Don't combine above/below into one statement. */ - setfuncV(L, L->top++, fn); + if (ls->fr2 == LJ_FR2) { + fn = lj_func_newL_empty(L, pt, tabref(L->env)); + /* Don't combine above/below into one statement. */ + setfuncV(L, L->top++, fn); + } else { + /* Non-native generation returns a dumpable, but non-runnable prototype. */ + setprotoV(L, L->top++, pt); + } return NULL; } @@ -87,29 +101,30 @@ LUALIB_API int luaL_loadfilex(lua_State *L, const char *filename, FileReaderCtx ctx; int status; const char *chunkname; + int err = 0; if (filename) { + chunkname = lua_pushfstring(L, "@%s", filename); ctx.fp = fopen(filename, "rb"); if (ctx.fp == NULL) { + L->top--; lua_pushfstring(L, "cannot open %s: %s", filename, strerror(errno)); return LUA_ERRFILE; } - chunkname = lua_pushfstring(L, "@%s", filename); } else { ctx.fp = stdin; chunkname = "=stdin"; } status = lua_loadx(L, reader_file, &ctx, chunkname, mode); - if (ferror(ctx.fp)) { - L->top -= filename ? 2 : 1; - lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(errno)); - if (filename) - fclose(ctx.fp); - return LUA_ERRFILE; - } + if (ferror(ctx.fp)) err = errno; if (filename) { + fclose(ctx.fp); L->top--; copyTV(L, L->top-1, L->top); - fclose(ctx.fp); + } + if (err) { + L->top--; + lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(err)); + return LUA_ERRFILE; } return status; } @@ -159,18 +174,19 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s) LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) { cTValue *o = L->top-1; - api_check(L, L->top > L->base); + uint32_t flags = LJ_FR2*BCDUMP_F_FR2; /* Default mode for legacy C API. */ + lj_checkapi(L->top > L->base, "top slot empty"); if (tvisfunc(o) && isluafunc(funcV(o))) - return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); + return lj_bcwrite(L, funcproto(funcV(o)), writer, data, flags); else return 1; } - -/* -- Luajittex needs this one because it's faster than make it Lua -- */ + +/* -- Luajittex needs this one because it's faster than make it with Lua -- */ LUA_API int RESERVED_lua_dump(lua_State *L, lua_Writer writer, void *data, int strip) { cTValue *o = L->top-1; - api_check(L, L->top > L->base); + lj_checkapi(L->top > L->base, "top slot empty"); if (tvisfunc(o) && isluafunc(funcV(o))) return lj_bcwrite(L, funcproto(funcV(o)), writer, data, strip); else @@ -194,3 +210,4 @@ LUALIB_API int RESERVED_load_aux_JIT(lua_State *L, int status, int envarg) } } + diff --git a/source/libs/luajit/LuaJIT-src/src/lj_mcode.c b/source/libs/luajit/LuaJIT-src/src/lj_mcode.c index 77035bf72a4b059bf38bde246e617f5ff9724e4c..864da7fb4c81e4ddd3df168ca8cd182f1bdb7583 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_mcode.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_mcode.c @@ -1,6 +1,6 @@ /* ** Machine code management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_mcode_c @@ -14,6 +14,7 @@ #include "lj_mcode.h" #include "lj_trace.h" #include "lj_dispatch.h" +#include "lj_prng.h" #endif #if LJ_HASJIT || LJ_HASFFI #include "lj_vm.h" @@ -28,6 +29,11 @@ #include <valgrind/valgrind.h> #endif +#if LJ_TARGET_WINDOWS +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#endif + #if LJ_TARGET_IOS void sys_icache_invalidate(void *start, size_t len); #endif @@ -40,11 +46,13 @@ void lj_mcode_sync(void *start, void *end) #endif #if LJ_TARGET_X86ORX64 UNUSED(start); UNUSED(end); +#elif LJ_TARGET_WINDOWS + FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start); #elif LJ_TARGET_IOS sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC lj_vm_cachesync(start, end); -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) __clear_cache(start, end); #else #error "Missing builtin to flush instruction cache" @@ -57,17 +65,14 @@ void lj_mcode_sync(void *start, void *end) #if LJ_TARGET_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include <windows.h> - #define MCPROT_RW PAGE_READWRITE #define MCPROT_RX PAGE_EXECUTE_READ #define MCPROT_RWX PAGE_EXECUTE_READWRITE static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) { - void *p = VirtualAlloc((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); + void *p = LJ_WIN_VALLOC((void *)hint, sz, + MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); if (!p && !hint) lj_trace_err(J, LJ_TRERR_MCODEAL); return p; @@ -82,7 +87,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) static int mcode_setprot(void *p, size_t sz, DWORD prot) { DWORD oprot; - return !VirtualProtect(p, sz, prot, &oprot); + return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); } #elif LJ_TARGET_POSIX @@ -96,10 +101,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_RW (PROT_READ|PROT_WRITE) #define MCPROT_RX (PROT_READ|PROT_EXEC) #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) +#ifdef PROT_MPROTECT +#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX)) +#else +#define MCPROT_CREATE 0 +#endif static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) { - void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); p = NULL; @@ -118,52 +128,34 @@ static int mcode_setprot(void *p, size_t sz, int prot) return mprotect(p, sz, prot); } -#elif LJ_64 - -#error "Missing OS support for explicit placement of executable memory" - #else -/* Fallback allocator. This will fail if memory is not executable by default. */ -#define LUAJIT_UNPROTECT_MCODE -#define MCPROT_RW 0 -#define MCPROT_RX 0 -#define MCPROT_RWX 0 - -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) -{ - UNUSED(hint); UNUSED(prot); - return lj_mem_new(J->L, sz); -} - -static void mcode_free(jit_State *J, void *p, size_t sz) -{ - lj_mem_free(J2G(J), p, sz); -} +#error "Missing OS support for explicit placement of executable memory" #endif /* -- MCode area protection ----------------------------------------------- */ -/* Define this ONLY if page protection twiddling becomes a bottleneck. */ -#ifdef LUAJIT_UNPROTECT_MCODE +#if LUAJIT_SECURITY_MCODE == 0 -/* It's generally considered to be a potential security risk to have +/* Define this ONLY if page protection twiddling becomes a bottleneck. +** +** It's generally considered to be a potential security risk to have ** pages with simultaneous write *and* execute access in a process. ** ** Do not even think about using this mode for server processes or -** apps handling untrusted external data (such as a browser). +** apps handling untrusted external data. ** ** The security risk is not in LuaJIT itself -- but if an adversary finds -** any *other* flaw in your C application logic, then any RWX memory page -** simplifies writing an exploit considerably. +** any *other* flaw in your C application logic, then any RWX memory pages +** simplify writing an exploit considerably. */ #define MCPROT_GEN MCPROT_RWX #define MCPROT_RUN MCPROT_RWX static void mcode_protect(jit_State *J, int prot) { - UNUSED(J); UNUSED(prot); + UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); } #else @@ -180,7 +172,7 @@ static void mcode_protect(jit_State *J, int prot) #define MCPROT_RUN MCPROT_RX /* Protection twiddling failed. Probably due to kernel security. */ -static LJ_NOINLINE void mcode_protfail(jit_State *J) +static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) { lua_CFunction panic = J2G(J)->panic; if (panic) { @@ -188,6 +180,7 @@ static LJ_NOINLINE void mcode_protfail(jit_State *J) setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); panic(L); } + exit(EXIT_FAILURE); } /* Change protection of MCode area. */ @@ -242,7 +235,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) } /* Next try probing 64K-aligned pseudo-random addresses. */ do { - hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; + hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000); } while (!(hint + sz < range+range)); hint = target + hint - range; } @@ -255,7 +248,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) /* All memory addresses are reachable by relative jumps. */ static void *mcode_alloc(jit_State *J, size_t sz) { -#ifdef __OpenBSD__ +#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP /* Allow better executable memory allocation for OpenBSD W^X mode. */ void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); if (p && mcode_setprot(p, sz, MCPROT_GEN)) { @@ -272,12 +265,6 @@ static void *mcode_alloc(jit_State *J, size_t sz) /* -- MCode area management ----------------------------------------------- */ -/* Linked list of MCode areas. */ -typedef struct MCLink { - MCode *next; /* Next area. */ - size_t size; /* Size of current area. */ -} MCLink; - /* Allocate a new MCode area. */ static void mcode_allocarea(jit_State *J) { @@ -292,6 +279,7 @@ static void mcode_allocarea(jit_State *J) ((MCLink *)J->mcarea)->next = oldarea; ((MCLink *)J->mcarea)->size = sz; J->szallmcarea += sz; + J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot); } /* Free all MCode areas. */ @@ -302,7 +290,9 @@ void lj_mcode_free(jit_State *J) J->szallmcarea = 0; while (mc) { MCode *next = ((MCLink *)mc)->next; - mcode_free(J, mc, ((MCLink *)mc)->size); + size_t sz = ((MCLink *)mc)->size; + lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); + mcode_free(J, mc, sz); mc = next; } } @@ -337,35 +327,36 @@ void lj_mcode_abort(jit_State *J) /* Set/reset protection to allow patching of MCode areas. */ MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) { -#ifdef LUAJIT_UNPROTECT_MCODE - UNUSED(J); UNUSED(ptr); UNUSED(finish); - return NULL; -#else if (finish) { +#if LUAJIT_SECURITY_MCODE if (J->mcarea == ptr) mcode_protect(J, MCPROT_RUN); else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) mcode_protfail(J); +#endif return NULL; } else { MCode *mc = J->mcarea; /* Try current area first to use the protection cache. */ if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { +#if LUAJIT_SECURITY_MCODE mcode_protect(J, MCPROT_GEN); +#endif return mc; } /* Otherwise search through the list of MCode areas. */ for (;;) { mc = ((MCLink *)mc)->next; - lua_assert(mc != NULL); + lj_assertJ(mc != NULL, "broken MCode area chain"); if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { +#if LUAJIT_SECURITY_MCODE if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) mcode_protfail(J); +#endif return mc; } } } -#endif } /* Limit of MCode reservation reached. */ @@ -376,7 +367,7 @@ void lj_mcode_limiterr(jit_State *J, size_t need) sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; - if ((size_t)need > sizemcode) + if (need * sizeof(MCode) > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ if (J->szallmcarea + sizemcode > maxmcode) lj_trace_err(J, LJ_TRERR_MCODEAL); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_mcode.h b/source/libs/luajit/LuaJIT-src/src/lj_mcode.h index f0847e931e5716e41a44d75443e1708b547c7ee5..bd8d25734abcde118310986f83f84a52ae638209 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_mcode.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_mcode.h @@ -1,6 +1,6 @@ /* ** Machine code management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_MCODE_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_meta.c b/source/libs/luajit/LuaJIT-src/src/lj_meta.c index 0bd4d8429b65a5ecaa5068b4e227e86341f870a4..c9307615f78337d310bb73303a9f5765ed81b021 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_meta.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_meta.c @@ -1,6 +1,6 @@ /* ** Metamethod handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -47,7 +47,7 @@ void lj_meta_init(lua_State *L) cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) { cTValue *mo = lj_tab_getstr(mt, name); - lua_assert(mm <= MM_FAST); + lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm); if (!mo || tvisnil(mo)) { /* No metamethod? */ mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */ return NULL; @@ -86,8 +86,8 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) else top->u32.lo = LJ_CONT_TAILCALL; setframe_pc(top++, pc); - if (LJ_FR2) top++; setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */ + if (LJ_FR2) top++; setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT); L->base = L->top = top+1; /* @@ -240,8 +240,8 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) int fromc = 0; if (left < 0) { left = -left; fromc = 1; } do { - if (!(tvisstr(top) || tvisnumber(top)) || - !(tvisstr(top-1) || tvisnumber(top-1))) { + if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) || + !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) { cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); if (tvisnil(mo)) { mo = lj_meta_lookup(L, top, MM_concat); @@ -277,10 +277,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) ** next step: [...][CAT stack ............] */ TValue *e, *o = top; - uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; + uint64_t tlen = tvisstr(o) ? strV(o)->len : + tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; SBuf *sb; do { - o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; + o--; tlen += tvisstr(o) ? strV(o)->len : + tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); sb = lj_buf_tmp_(L); @@ -290,6 +292,9 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) GCstr *s = strV(o); MSize len = s->len; lj_buf_putmem(sb, strdata(s), len); + } else if (tvisbuf(o)) { + SBufExt *sbx = bufV(o); + lj_buf_putmem(sb, sbx->r, sbufxlen(sbx)); } else if (tvisint(o)) { lj_strfmt_putint(sb, intV(o)); } else { @@ -363,7 +368,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) } else if (op == BC_ISEQN) { o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; } else { - lua_assert(op == BC_ISEQP); + lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op); setpriV(&tv, ~bc_d(ins)); o2 = &tv; } @@ -426,7 +431,7 @@ void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp) { L->top = curr_topL(L); ra++; tp--; - lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */ + lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE"); if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra); else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_meta.h b/source/libs/luajit/LuaJIT-src/src/lj_meta.h index 73b45724735194ccf6f5cbb24aa9d2e0b159370e..0f53767f09a5317f601b4fa0124585b52457ec16 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_meta.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_meta.h @@ -1,6 +1,6 @@ /* ** Metamethod handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_META_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_obj.c b/source/libs/luajit/LuaJIT-src/src/lj_obj.c index ee33aeb3a8eb15fadaaf37e6131856d17766f864..72f48d40bfbfd1bc9692e762e22f0096cbed9345 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_obj.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_obj.c @@ -1,6 +1,6 @@ /* ** Miscellaneous object handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_obj_c @@ -34,12 +34,13 @@ int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2) } /* Return pointer to object or its object data. */ -const void * LJ_FASTCALL lj_obj_ptr(cTValue *o) +const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o) { + UNUSED(g); if (tvisudata(o)) return uddata(udataV(o)); else if (tvislightud(o)) - return lightudV(o); + return lightudV(g, o); else if (LJ_HASFFI && tviscdata(o)) return cdataptr(cdataV(o)); else if (tvisgcv(o)) diff --git a/source/libs/luajit/LuaJIT-src/src/lj_obj.h b/source/libs/luajit/LuaJIT-src/src/lj_obj.h index 52372c3e7e599e901ae66bab3f9f419069fa9214..855727bfab601029e42e204a0d39fa82277dc954 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_obj.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_obj.h @@ -1,6 +1,6 @@ /* ** LuaJIT VM tags, values and objects. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -13,7 +13,7 @@ #include "lj_def.h" #include "lj_arch.h" -/* -- Memory references (32 bit address space) ---------------------------- */ +/* -- Memory references --------------------------------------------------- */ /* Memory and GC object sizes. */ typedef uint32_t MSize; @@ -34,17 +34,21 @@ typedef struct MRef { #if LJ_GC64 #define mref(r, t) ((t *)(void *)(r).ptr64) +#define mrefu(r) ((r).ptr64) #define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) +#define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u)) #define setmrefr(r, v) ((r).ptr64 = (v).ptr64) #else #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) +#define mrefu(r) ((r).ptr32) #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) +#define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u)) #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) #endif -/* -- GC object references (32 bit address space) ------------------------- */ +/* -- GC object references ------------------------------------------------ */ /* GCobj reference */ typedef struct GCRef { @@ -153,11 +157,9 @@ typedef int32_t BCLine; /* Bytecode line number. */ typedef void (*ASMFunction)(void); /* Resizable string buffer. Need this here, details in lj_buf.h. */ +#define SBufHeader char *w, *e, *b; MRef L typedef struct SBuf { - MRef p; /* String buffer pointer. */ - MRef e; /* String buffer end pointer. */ - MRef b; /* String buffer base. */ - MRef L; /* lua_State, used for buffer resizing. */ + SBufHeader; } SBuf; /* -- Tags and values ----------------------------------------------------- */ @@ -232,7 +234,7 @@ typedef const TValue cTValue; ** ---MSW---.---LSW--- ** primitive types | itype | | ** lightuserdata | itype | void * | (32 bit platforms) -** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers) +** lightuserdata |ffff|seg| ofs | (64 bit platforms) ** GC objects | itype | GCRef | ** int (LJ_DUALNUM)| itype | int | ** number -------double------ @@ -245,7 +247,8 @@ typedef const TValue cTValue; ** ** ------MSW------.------LSW------ ** primitive types |1..1|itype|1..................1| -** GC objects/lightud |1..1|itype|-------GCRef--------| +** GC objects |1..1|itype|-------GCRef--------| +** lightuserdata |1..1|itype|seg|------ofs-------| ** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------| ** number ------------double------------- ** @@ -281,18 +284,31 @@ typedef const TValue cTValue; #define LJ_TISGCV (LJ_TSTR+1) #define LJ_TISTABUD LJ_TTAB +/* Type marker for slot holding a traversal index. Must be lightuserdata. */ +#define LJ_KEYINDEX 0xfffe7fffu + #if LJ_GC64 #define LJ_GCVMASK (((uint64_t)1 << 47) - 1) #endif +#if LJ_64 +/* To stay within 47 bits, lightuserdata is segmented. */ +#define LJ_LIGHTUD_BITS_SEG 8 +#define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG) +#endif + /* -- String object ------------------------------------------------------- */ +typedef uint32_t StrHash; /* String hash value. */ +typedef uint32_t StrID; /* String ID. */ + /* String object header. String payload follows. */ typedef struct GCstr { GCHeader; uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ - uint8_t unused; - MSize hash; /* Hash of string. */ + uint8_t hashalg; /* Hash algorithm. */ + StrID sid; /* Interned string ID. */ + StrHash hash; /* Hash of string. */ MSize len; /* Size of string. */ } GCstr; @@ -300,7 +316,6 @@ typedef struct GCstr { #define strdata(s) ((const char *)((s)+1)) #define strdatawr(s) ((char *)((s)+1)) #define strVdata(o) strdata(strV(o)) -#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1) /* -- Userdata object ----------------------------------------------------- */ @@ -320,6 +335,7 @@ enum { UDTYPE_USERDATA, /* Regular userdata. */ UDTYPE_IO_FILE, /* I/O library FILE. */ UDTYPE_FFI_CLIB, /* FFI C library namespace. */ + UDTYPE_BUFFER, /* String buffer. */ UDTYPE__MAX }; @@ -397,7 +413,7 @@ typedef struct GCproto { #define PROTO_UV_IMMUTABLE 0x4000 /* Immutable upvalue. */ #define proto_kgc(pt, idx) \ - check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ + check_exp((uintptr_t)(intptr_t)(idx) >= ~(uintptr_t)(pt)->sizekgc+1u, \ gcref(mref((pt)->k, GCRef)[(idx)])) #define proto_knumtv(pt, idx) \ check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) @@ -495,7 +511,7 @@ typedef struct GCtab { } GCtab; #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) -#define tabref(r) (&gcref((r))->tab) +#define tabref(r) ((GCtab *)gcref((r))) #define noderef(r) (mref((r), Node)) #define nextnode(n) (mref((n)->next, Node)) #if LJ_GC64 @@ -563,6 +579,9 @@ typedef enum { GCROOT_BASEMT_NUM = GCROOT_BASEMT + ~LJ_TNUMX, GCROOT_IO_INPUT, /* Userdata for default I/O input file. */ GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */ +#if LJ_HASFFI + GCROOT_FFI_FIN, /* FFI finalizer table. */ +#endif GCROOT_MAX } GCRootID; @@ -570,13 +589,18 @@ typedef enum { #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) +/* Garbage collector state. */ typedef struct GCState { GCSize total; /* Memory currently allocated. */ GCSize threshold; /* Memory threshold. */ uint8_t currentwhite; /* Current white color. */ uint8_t state; /* GC state. */ - uint8_t nocdatafin; /* No cdata finalizer called. */ - uint8_t unused2; + uint8_t unused0; +#if LJ_64 + uint8_t lightudnum; /* Number of lightuserdata segments - 1. */ +#else + uint8_t unused1; +#endif MSize sweepstr; /* Sweep position in string table. */ GCRef root; /* List of all collectable objects. */ MRef sweep; /* Sweep position in root list. */ @@ -588,27 +612,41 @@ typedef struct GCState { GCSize estimate; /* Estimate of memory actually in use. */ MSize stepmul; /* Incremental GC step granularity. */ MSize pause; /* Pause between successive GC cycles. */ +#if LJ_64 + MRef lightudseg; /* Upper bits of lightuserdata segments. */ +#endif } GCState; +/* String interning state. */ +typedef struct StrInternState { + GCRef *tab; /* String hash table anchors. */ + MSize mask; /* String hash mask (size of hash table - 1). */ + MSize num; /* Number of strings in hash table. */ + StrID id; /* Next string ID. */ + uint8_t idreseed; /* String ID reseed counter. */ + uint8_t second; /* String interning table uses secondary hashing. */ + uint8_t unused1; + uint8_t unused2; + LJ_ALIGN(8) uint64_t seed; /* Random string seed. */ +} StrInternState; + /* Global state, shared by all threads of a Lua universe. */ typedef struct global_State { - GCRef *strhash; /* String hash table (hash chain anchors). */ - MSize strmask; /* String hash mask (size of hash table - 1). */ - MSize strnum; /* Number of strings in hash table. */ lua_Alloc allocf; /* Memory allocator. */ void *allocd; /* Memory allocator data. */ GCState gc; /* Garbage collector. */ - volatile int32_t vmstate; /* VM state or current JIT code trace number. */ - SBuf tmpbuf; /* Temporary string buffer. */ GCstr strempty; /* Empty string. */ uint8_t stremptyz; /* Zero terminator of empty string. */ uint8_t hookmask; /* Hook mask. */ uint8_t dispatchmode; /* Dispatch mode. */ uint8_t vmevmask; /* VM event mask. */ + StrInternState str; /* String interning. */ + volatile int32_t vmstate; /* VM state or current JIT code trace number. */ GCRef mainthref; /* Link to main thread. */ - TValue registrytv; /* Anchor for registry. */ + SBuf tmpbuf; /* Temporary string buffer. */ TValue tmptv, tmptv2; /* Temporary TValues. */ Node nilnode; /* Fallback 1-element hash part (nil key and value). */ + TValue registrytv; /* Anchor for registry. */ GCupval uvhead; /* Head of double-linked list of all open upvalues. */ int32_t hookcount; /* Instruction hook countdown. */ int32_t hookcstart; /* Start count for instruction hook counter. */ @@ -620,6 +658,7 @@ typedef struct global_State { GCRef cur_L; /* Currently executing lua_State. */ MRef jit_base; /* Current JIT code L->base or NULL. */ MRef ctype_state; /* Pointer to C type state. */ + PRNGState prng; /* Global PRNG state. */ GCRef gcroot[GCROOT_MAX]; /* GC roots. */ } global_State; @@ -638,7 +677,8 @@ typedef struct global_State { #define HOOK_PROFILE 0x80 #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) -#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) +#define hook_entergc(g) \ + ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE) #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) @@ -678,6 +718,11 @@ struct lua_State { #define curr_topL(L) (L->base + curr_proto(L)->framesize) #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) +LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line, + const char *func, const char *fmt, ...); +#endif + /* -- GC object definition and conversions -------------------------------- */ /* GC header for generic access to common fields of GC objects. */ @@ -731,10 +776,6 @@ typedef union GCobj { /* -- TValue getters/setters ---------------------------------------------- */ -#ifdef LUA_USE_ASSERT -#include "lj_gc.h" -#endif - /* Macros to test types. */ #if LJ_GC64 #define itype(o) ((uint32_t)((o)->it64 >> 47)) @@ -795,10 +836,24 @@ typedef union GCobj { #endif #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o))) #if LJ_64 -#define lightudV(o) \ - check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) +#define lightudseg(u) \ + (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1)) +#define lightudlo(u) \ + ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1)) +#define lightudup(p) \ + ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32))) +static LJ_AINLINE void *lightudV(global_State *g, cTValue *o) +{ + uint64_t u = o->u64; + uint64_t seg = lightudseg(u); + uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); + lj_assertG(tvislightud(o), "lightuserdata expected"); + if (seg == (1 << LJ_LIGHTUD_BITS_SEG)-1) return NULL; + lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg); + return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u)); +} #else -#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) +#define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) #endif #define gcV(o) check_exp(tvisgcv(o), gcval(o)) #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) @@ -824,7 +879,7 @@ typedef union GCobj { #define setpriV(o, i) (setitype((o), (i))) #endif -static LJ_AINLINE void setlightudV(TValue *o, void *p) +static LJ_AINLINE void setrawlightudV(TValue *o, void *p) { #if LJ_GC64 o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47); @@ -835,29 +890,29 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) #endif } -#if LJ_64 -#define checklightudptr(L, p) \ - (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) -#else -#define checklightudptr(L, p) (p) -#endif - -#if LJ_FR2 +#if LJ_FR2 || LJ_32 #define contptr(f) ((void *)(f)) #define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) -#elif LJ_64 +#else #define contptr(f) \ ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) #define setcont(o, f) \ ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) -#else -#define contptr(f) ((void *)(f)) -#define setcont(o, f) setlightudV((o), contptr(f)) #endif -#define tvchecklive(L, o) \ - UNUSED(L), lua_assert(!tvisgcv(o) || \ - ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) +static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg) +{ + UNUSED(L); UNUSED(o); UNUSED(msg); +#if LUA_USE_ASSERT + if (tvisgcv(o)) { + lj_assertL(~itype(o) == gcval(o)->gch.gct, + "mismatch of TValue type %d vs GC type %d", + ~itype(o), gcval(o)->gch.gct); + /* Copy of isdead check from lj_gc.h to avoid circular include. */ + lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg); + } +#endif +} static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) { @@ -870,11 +925,12 @@ static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) { - setgcVraw(o, v, it); tvchecklive(L, o); + setgcVraw(o, v, it); + checklivetv(L, o, "store to dead GC object"); } #define define_setV(name, type, tag) \ -static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \ +static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \ { \ setgcV(L, o, obj2gco(v), tag); \ } @@ -917,13 +973,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i) /* Copy tagged values. */ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) { - *o1 = *o2; tvchecklive(L, o1); + *o1 = *o2; + checklivetv(L, o1, "copy of dead GC object"); } /* -- Number to integer conversion ---------------------------------------- */ #if LJ_SOFTFP LJ_ASMF int32_t lj_vm_tobit(double x); +#if LJ_TARGET_MIPS64 +LJ_ASMF int32_t lj_vm_tointg(double x); +#endif #endif static LJ_AINLINE int32_t lj_num2bit(lua_Number n) @@ -939,14 +999,22 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) #define lj_num2int(n) ((int32_t)(n)) +/* +** This must match the JIT backend behavior. In particular for archs +** that don't have a common hardware instruction for this conversion. +** Note that signed FP to unsigned int conversions have an undefined +** result and should never be relied upon in portable FFI code. +** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). +*/ static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) { -#ifdef _MSC_VER - if (n >= 9223372036854775808.0) /* They think it's a feature. */ - return (uint64_t)(int64_t)(n - 18446744073709551616.0); - else +#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS + int64_t i = (int64_t)n; + if (i < 0) i = (int64_t)(n - 18446744073709551616.0); + return (uint64_t)i; +#else + return (uint64_t)n; #endif - return (uint64_t)n; } static LJ_AINLINE int32_t numberVint(cTValue *o) @@ -975,6 +1043,20 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; /* Compare two objects without calling metamethods. */ LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2); -LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o); +LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o); + +#if LJ_ABI_PAUTH +#if LJ_TARGET_ARM64 +#include <ptrauth.h> +#define lj_ptr_sign(ptr, ctx) \ + ptrauth_sign_unauthenticated((ptr), ptrauth_key_function_pointer, (ctx)) +#define lj_ptr_strip(ptr) ptrauth_strip((ptr), ptrauth_key_function_pointer) +#else +#error "No support for pointer authentication for this architecture" +#endif +#else +#define lj_ptr_sign(ptr, ctx) (ptr) +#define lj_ptr_strip(ptr) (ptr) +#endif #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_opt_dce.c b/source/libs/luajit/LuaJIT-src/src/lj_opt_dce.c index 2417f3242aaaa7ee9b09807ecb1978866089c8c9..a9cce06a011b19200d382dbf84f5c9a3265dad45 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_opt_dce.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_opt_dce.c @@ -1,6 +1,6 @@ /* ** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_dce_c @@ -44,15 +44,12 @@ static void dce_propagate(jit_State *J) IRIns *ir = IR(ins); if (irt_ismarked(ir->t)) { irt_clearmark(ir->t); - pchain[ir->o] = &ir->prev; } else if (!ir_sideeff(ir)) { *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */ - ir->t.irt = IRT_NIL; - ir->o = IR_NOP; /* Replace instruction with NOP. */ - ir->op1 = ir->op2 = 0; - ir->prev = 0; + lj_ir_nop(ir); continue; } + pchain[ir->o] = &ir->prev; if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t); if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_opt_fold.c b/source/libs/luajit/LuaJIT-src/src/lj_opt_fold.c index acbf36a5c7c1602e4f33b3e2d3f5e64c43242424..36aacebb03732615a56f019da9bfe5ff52c0c141 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_opt_fold.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_opt_fold.c @@ -2,7 +2,7 @@ ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. ** ABCelim: Array Bounds Check Elimination. ** CSE: Common-Subexpression Elimination. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_fold_c @@ -173,7 +173,6 @@ LJFOLD(ADD KNUM KNUM) LJFOLD(SUB KNUM KNUM) LJFOLD(MUL KNUM KNUM) LJFOLD(DIV KNUM KNUM) -LJFOLD(ATAN2 KNUM KNUM) LJFOLD(LDEXP KNUM KNUM) LJFOLD(MIN KNUM KNUM) LJFOLD(MAX KNUM KNUM) @@ -213,13 +212,34 @@ LJFOLDF(kfold_fpmath) return lj_ir_knum(J, y); } -LJFOLD(POW KNUM KINT) +LJFOLD(CALLN KNUM any) +LJFOLDF(kfold_fpcall1) +{ + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + if (CCI_TYPE(ci) == IRT_NUM) { + double y = ((double (*)(double))ci->func)(knumleft); + return lj_ir_knum(J, y); + } + return NEXTFOLD; +} + +LJFOLD(CALLN CARG IRCALL_atan2) +LJFOLDF(kfold_fpcall2) +{ + if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + double a = ir_knum(IR(fleft->op1))->n; + double b = ir_knum(IR(fleft->op2))->n; + double y = ((double (*)(double, double))ci->func)(a, b); + return lj_ir_knum(J, y); + } + return NEXTFOLD; +} + +LJFOLD(POW KNUM KNUM) LJFOLDF(kfold_numpow) { - lua_Number a = knumleft; - lua_Number b = (lua_Number)fright->i; - lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); - return lj_ir_knum(J, y); + return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD)); } /* Must not use kfold_kref for numbers (could be NaN). */ @@ -247,7 +267,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; case IR_MOD: k1 = lj_vm_modi(k1, k2); break; - case IR_NEG: k1 = -k1; break; + case IR_NEG: k1 = (int32_t)(~(uint32_t)k1+1u); break; case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; @@ -258,7 +278,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; - default: lua_assert(0); break; + default: lj_assertX(0, "bad IR op %d", op); break; } return k1; } @@ -330,7 +350,7 @@ LJFOLDF(kfold_intcomp) case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); case IR_ABC: case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); - default: lua_assert(0); return FAILFOLD; + default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; } } @@ -344,10 +364,12 @@ LJFOLDF(kfold_intcomp0) /* -- Constant folding for 64 bit integers -------------------------------- */ -static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) +static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, + IROp op) { - switch (op) { + UNUSED(J); #if LJ_HASFFI + switch (op) { case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; @@ -355,13 +377,16 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; case IR_BSHL: k1 <<= (k2 & 63); break; - case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; - case IR_BSAR: k1 >>= (k2 & 63); break; - case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; - case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; -#endif - default: UNUSED(k2); lua_assert(0); break; + case IR_BSHR: k1 >>= (k2 & 63); break; + case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break; + case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; + case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; + default: lj_assertJ(0, "bad IR op %d", op); break; } +#else + UNUSED(k2); UNUSED(op); + lj_assertJ(0, "FFI IR op without FFI"); +#endif return k1; } @@ -373,7 +398,7 @@ LJFOLD(BOR KINT64 KINT64) LJFOLD(BXOR KINT64 KINT64) LJFOLDF(kfold_int64arith) { - return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, + return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64, ir_k64(fright)->u64, (IROp)fins->o)); } @@ -395,7 +420,7 @@ LJFOLDF(kfold_int64arith2) } return INT64FOLD(k1); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -411,7 +436,7 @@ LJFOLDF(kfold_int64shift) int32_t sh = (fright->i & 63); return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -421,7 +446,7 @@ LJFOLDF(kfold_bnot64) #if LJ_HASFFI return INT64FOLD(~ir_k64(fleft)->u64); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -431,7 +456,7 @@ LJFOLDF(kfold_bswap64) #if LJ_HASFFI return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -456,10 +481,10 @@ LJFOLDF(kfold_int64comp) case IR_UGE: return CONDFOLD(a >= b); case IR_ULE: return CONDFOLD(a <= b); case IR_UGT: return CONDFOLD(a > b); - default: lua_assert(0); return FAILFOLD; + default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; } #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -471,7 +496,7 @@ LJFOLDF(kfold_int64comp0) return DROPFOLD; return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -485,6 +510,7 @@ LJFOLDF(kfold_snew_kptr) } LJFOLD(SNEW any KINT) +LJFOLD(XSNEW any KINT) LJFOLDF(kfold_snew_empty) { if (fright->i == 0) @@ -496,7 +522,7 @@ LJFOLD(STRREF KGC KINT) LJFOLDF(kfold_strref) { GCstr *str = ir_kstr(fleft); - lua_assert((MSize)fright->i <= str->len); + lj_assertJ((MSize)fright->i <= str->len, "bad string ref"); return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); } @@ -548,22 +574,51 @@ LJFOLDF(kfold_strcmp) ** The compromise is to declare them as loads, emit them like stores and ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain ** fragments left over from CSE are eliminated by DCE. +** +** The string buffer methods emit a USE instead of a BUFSTR to keep the +** chain alive. */ -/* BUFHDR is emitted like a store, see below. */ +LJFOLD(BUFHDR any any) +LJFOLDF(bufhdr_merge) +{ + return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD; +} -LJFOLD(BUFPUT BUFHDR BUFSTR) -LJFOLDF(bufput_append) +LJFOLD(BUFPUT any BUFSTR) +LJFOLDF(bufput_bufstr) { - /* New buffer, no other buffer op inbetween and same buffer? */ - if ((J->flags & JIT_F_OPT_FWD) && - !(fleft->op2 & IRBUFHDR_APPEND) && - fleft->prev == fright->op2 && - fleft->op1 == IR(fright->op2)->op1) { - IRRef ref = fins->op1; - IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ - IR(ref)->op1 = fright->op1; - return ref; + if ((J->flags & JIT_F_OPT_FWD)) { + IRRef hdr = fright->op2; + /* New buffer, no other buffer op inbetween and same buffer? */ + if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET && + fleft->prev == hdr && + fleft->op1 == IR(hdr)->op1 && + !(irt_isphi(fright->t) && IR(hdr)->prev) && + (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) { + IRRef ref = fins->op1; + IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */ + IR(ref)->op1 = fright->op1; + return ref; + } + /* Replay puts to global temporary buffer. */ + if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) { + IRIns *ir = IR(fright->op1); + /* For now only handle single string.reverse .lower .upper .rep. */ + if (ir->o == IR_CALLL && + ir->op2 >= IRCALL_lj_buf_putstr_reverse && + ir->op2 <= IRCALL_lj_buf_putstr_rep) { + IRIns *carg1 = IR(ir->op1); + if (ir->op2 == IRCALL_lj_buf_putstr_rep) { + IRIns *carg2 = IR(carg1->op1); + if (carg2->op1 == hdr) { + return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2); + } + } else if (carg1->op1 == hdr) { + return lj_ir_call(J, ir->op2, fins->op1, carg1->op2); + } + } + } } return EMITFOLD; /* Always emit, CSE later. */ } @@ -592,18 +647,19 @@ LJFOLDF(bufput_kgc) LJFOLD(BUFSTR any any) LJFOLDF(bufstr_kfold_cse) { - lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || - fleft->o == IR_CALLL); + lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || + fleft->o == IR_CALLL, + "bad buffer constructor IR op %d", fleft->o); if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { if (fleft->o == IR_BUFHDR) { /* No put operations? */ - if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ + if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */ return lj_ir_kstr(J, &J2G(J)->strempty); fins->op1 = fleft->op1; fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ return CSEFOLD; } else if (fleft->o == IR_BUFPUT) { IRIns *irb = IR(fleft->op1); - if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) + if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET) return fleft->op2; /* Shortcut for a single put operation. */ } } @@ -613,9 +669,10 @@ LJFOLDF(bufstr_kfold_cse) while (ref) { IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); while (ira->o == irb->o && ira->op2 == irb->op2) { - lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || - ira->o == IR_CALLL || ira->o == IR_CARG); - if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) + lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || + ira->o == IR_CALLL || ira->o == IR_CARG, + "bad buffer constructor IR op %d", ira->o); + if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET) return ref; /* CSE succeeded. */ if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) break; @@ -673,7 +730,7 @@ LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) LJFOLDF(bufput_kfold_fmt) { IRIns *irc = IR(fleft->op1); - lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */ + lj_assertJ(irref_isk(irc->op2), "SFormat must be const"); if (irref_isk(fleft->op2)) { SFormat sf = (SFormat)IR(irc->op2)->i; IRIns *ira = IR(fleft->op2); @@ -978,8 +1035,7 @@ LJFOLDF(simplify_numadd_xneg) LJFOLD(SUB any KNUM) LJFOLDF(simplify_numsub_k) { - lua_Number n = knumright; - if (n == 0.0) /* x - (+-0) ==> x */ + if (ir_knum(fright)->u64 == 0) /* x - (+0) ==> x */ return LEFTFOLD; return NEXTFOLD; } @@ -1053,52 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg) return RETRYFOLD; } -LJFOLD(POW any KINT) -LJFOLDF(simplify_numpow_xk) +LJFOLD(POW any KNUM) +LJFOLDF(simplify_numpow_k) { - int32_t k = fright->i; - TRef ref = fins->op1; - if (k == 0) /* x ^ 0 ==> 1 */ + if (knumright == 0.0) /* x ^ 0 ==> 1 */ return lj_ir_knum_one(J); /* Result must be a number, not an int. */ - if (k == 1) /* x ^ 1 ==> x */ + else if (knumright == 1.0) /* x ^ 1 ==> x */ return LEFTFOLD; - if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ + else if (knumright == 2.0) /* x ^ 2 ==> x * x */ + return emitir(IRTN(IR_MUL), fins->op1, fins->op1); + else return NEXTFOLD; - if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */ - ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref); - k = -k; - } - /* Unroll x^k for 1 <= k <= 65536. */ - for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */ - ref = emitir(IRTN(IR_MUL), ref, ref); - if ((k >>= 1) != 0) { /* Handle trailing bits. */ - TRef tmp = emitir(IRTN(IR_MUL), ref, ref); - for (; k != 1; k >>= 1) { - if (k & 1) - ref = emitir(IRTN(IR_MUL), ref, tmp); - tmp = emitir(IRTN(IR_MUL), tmp, tmp); - } - ref = emitir(IRTN(IR_MUL), ref, tmp); - } - return ref; -} - -LJFOLD(POW KNUM any) -LJFOLDF(simplify_numpow_kx) -{ - lua_Number n = knumleft; - if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ - fins->o = IR_CONV; -#if LJ_TARGET_X86ORX64 - fins->op1 = fins->op2; - fins->op2 = IRCONV_NUM_INT; - fins->op2 = (IRRef1)lj_opt_fold(J); -#endif - fins->op1 = (IRRef1)lj_ir_knum_one(J); - fins->o = IR_LDEXP; - return RETRYFOLD; - } - return NEXTFOLD; } /* -- Simplify conversions ------------------------------------------------ */ @@ -1114,7 +1135,7 @@ LJFOLDF(shortcut_conv_num_int) } LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ -LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/ +LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */ LJFOLDF(simplify_conv_int_num) { /* Fold even across PHI to avoid expensive num->int conversions in loop. */ @@ -1149,8 +1170,10 @@ LJFOLDF(simplify_conv_i64_num) LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */ LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */ +LJFOLD(CONV CONV IRCONV_INT_U32) /* _INT or _U32 */ LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */ LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */ +LJFOLD(CONV CONV IRCONV_U32_INT) /* _INT or _U32 */ LJFOLDF(simplify_conv_int_i64) { int src; @@ -1183,10 +1206,10 @@ LJFOLDF(simplify_tobit_conv) { /* Fold even across PHI to avoid expensive num->int conversions in loop. */ if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { - lua_assert(irt_isnum(fleft->t)); + lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); return fleft->op1; } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { - lua_assert(irt_isnum(fleft->t)); + lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); fins->o = IR_CONV; fins->op1 = fleft->op1; fins->op2 = (IRT_INT<<5)|IRT_U32; @@ -1195,14 +1218,13 @@ LJFOLDF(simplify_tobit_conv) return NEXTFOLD; } -/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ +/* Shortcut floor/ceil/trunc + IRT_NUM <- integer conversion. */ LJFOLD(FPMATH CONV IRFPM_FLOOR) LJFOLD(FPMATH CONV IRFPM_CEIL) LJFOLD(FPMATH CONV IRFPM_TRUNC) LJFOLDF(simplify_floor_conv) { - if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || - (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) + if ((uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8)) return LEFTFOLD; return NEXTFOLD; } @@ -1226,8 +1248,8 @@ LJFOLDF(simplify_conv_sext) /* Use scalar evolution analysis results to strength-reduce sign-extension. */ if (ref == J->scev.idx) { IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; - lua_assert(irt_isint(J->scev.t)); - if (lo && IR(lo)->i + ofs >= 0) { + lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported"); + if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { ok_reduce: #if LJ_TARGET_X64 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ @@ -1257,16 +1279,21 @@ LJFOLD(CONV SUB IRCONV_U32_U64) LJFOLD(CONV MUL IRCONV_U32_U64) LJFOLDF(simplify_conv_narrow) { +#if LJ_64 + UNUSED(J); + return NEXTFOLD; +#else IROp op = (IROp)fleft->o; IRType t = irt_type(fins->t); IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; PHIBARRIER(fleft); - op1 = emitir(IRTI(IR_CONV), op1, mode); - op2 = emitir(IRTI(IR_CONV), op2, mode); + op1 = emitir(IRT(IR_CONV, t), op1, mode); + op2 = emitir(IRT(IR_CONV, t), op2, mode); fins->ot = IRT(op, t); fins->op1 = op1; fins->op2 = op2; return RETRYFOLD; +#endif } /* Special CSE rule for CONV. */ @@ -1302,7 +1329,8 @@ LJFOLDF(narrow_convert) /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ if (J->chain[IR_LOOP]) return NEXTFOLD; - lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); + lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT, + "unexpected CONV TOBIT"); return lj_opt_narrow_convert(J); } @@ -1339,7 +1367,7 @@ LJFOLDF(simplify_intsub_k) if (fright->i == 0) /* i - 0 ==> i */ return LEFTFOLD; fins->o = IR_ADD; /* i - k ==> i + (-k) */ - fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */ + fins->op2 = (IRRef1)lj_ir_kint(J, (int32_t)(~(uint32_t)fright->i+1u)); /* Overflow for -2^31 ok. */ return RETRYFOLD; } @@ -1370,7 +1398,7 @@ LJFOLDF(simplify_intsub_k64) if (k == 0) /* i - 0 ==> i */ return LEFTFOLD; fins->o = IR_ADD; /* i - k ==> i + (-k) */ - fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k); + fins->op2 = (IRRef1)lj_ir_kint64(J, ~k+1u); return RETRYFOLD; } @@ -1408,7 +1436,7 @@ LJFOLDF(simplify_intmul_k64) return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -1416,7 +1444,7 @@ LJFOLD(MOD any KINT) LJFOLDF(simplify_intmod_k) { int32_t k = fright->i; - lua_assert(k != 0); + lj_assertJ(k != 0, "integer mod 0"); if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ fins->o = IR_BAND; fins->op2 = lj_ir_kint(J, k-1); @@ -1666,7 +1694,8 @@ LJFOLDF(simplify_shiftk_andk) fins->ot = IRTI(IR_BAND); return RETRYFOLD; } else if (irk->o == IR_KINT64) { - uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o); + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i, + (IROp)fins->o); IROpT ot = fleft->ot; fins->op1 = fleft->op1; fins->op1 = (IRRef1)lj_opt_fold(J); @@ -1714,8 +1743,8 @@ LJFOLDF(simplify_andor_k64) IRIns *irk = IR(fleft->op2); PHIBARRIER(fleft); if (irk->o == IR_KINT64) { - uint64_t k = kfold_int64arith(ir_k64(irk)->u64, - ir_k64(fright)->u64, (IROp)fins->o); + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, + (IROp)fins->o); /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { @@ -1725,7 +1754,7 @@ LJFOLDF(simplify_andor_k64) } return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -1761,8 +1790,8 @@ LJFOLDF(reassoc_intarith_k64) #if LJ_HASFFI IRIns *irk = IR(fleft->op2); if (irk->o == IR_KINT64) { - uint64_t k = kfold_int64arith(ir_k64(irk)->u64, - ir_k64(fright)->u64, (IROp)fins->o); + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, + (IROp)fins->o); PHIBARRIER(fleft); fins->op1 = fleft->op1; fins->op2 = (IRRef1)lj_ir_kint64(J, k); @@ -1770,12 +1799,10 @@ LJFOLDF(reassoc_intarith_k64) } return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } -LJFOLD(MIN MIN any) -LJFOLD(MAX MAX any) LJFOLD(BAND BAND any) LJFOLD(BOR BOR any) LJFOLDF(reassoc_dup) @@ -1785,6 +1812,15 @@ LJFOLDF(reassoc_dup) return NEXTFOLD; } +LJFOLD(MIN MIN any) +LJFOLD(MAX MAX any) +LJFOLDF(reassoc_dup_minmax) +{ + if (fins->op2 == fleft->op2) + return LEFTFOLD; /* (a o b) o b ==> a o b */ + return NEXTFOLD; +} + LJFOLD(BXOR BXOR any) LJFOLDF(reassoc_bxor) { @@ -1823,23 +1859,12 @@ LJFOLDF(reassoc_shift) return NEXTFOLD; } -LJFOLD(MIN MIN KNUM) -LJFOLD(MAX MAX KNUM) LJFOLD(MIN MIN KINT) LJFOLD(MAX MAX KINT) LJFOLDF(reassoc_minmax_k) { IRIns *irk = IR(fleft->op2); - if (irk->o == IR_KNUM) { - lua_Number a = ir_knum(irk)->n; - lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); - if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ - return LEFTFOLD; - PHIBARRIER(fleft); - fins->op1 = fleft->op1; - fins->op2 = (IRRef1)lj_ir_knum(J, y); - return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ - } else if (irk->o == IR_KINT) { + if (irk->o == IR_KINT) { int32_t a = irk->i; int32_t y = kfold_intop(a, fright->i, fins->o); if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ @@ -1852,24 +1877,6 @@ LJFOLDF(reassoc_minmax_k) return NEXTFOLD; } -LJFOLD(MIN MAX any) -LJFOLD(MAX MIN any) -LJFOLDF(reassoc_minmax_left) -{ - if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) - return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ - return NEXTFOLD; -} - -LJFOLD(MIN any MAX) -LJFOLD(MAX any MIN) -LJFOLDF(reassoc_minmax_right) -{ - if (fins->op1 == fright->op1 || fins->op1 == fright->op2) - return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ - return NEXTFOLD; -} - /* -- Array bounds check elimination -------------------------------------- */ /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. @@ -1906,14 +1913,15 @@ LJFOLDF(abc_fwd) LJFOLD(ABC any KINT) LJFOLDF(abc_k) { + PHIBARRIER(fleft); if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { IRRef ref = J->chain[IR_ABC]; IRRef asize = fins->op1; while (ref > asize) { IRIns *ir = IR(ref); if (ir->op1 == asize && irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (fright->i > k) + uint32_t k = (uint32_t)IR(ir->op2)->i; + if ((uint32_t)fright->i > k) ir->op2 = fins->op2; return DROPFOLD; } @@ -1928,9 +1936,10 @@ LJFOLDF(abc_k) LJFOLD(ABC any any) LJFOLDF(abc_invar) { - /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */ + /* Invariant ABC marked as P32 or U32. Drop if op1 is invariant too. */ if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] && - !irt_isphi(IR(fins->op1)->t)) + (irt_isu32(fins->t) || + (!irref_isk(fins->op1) && !irt_isphi(IR(fins->op1)->t)))) return DROPFOLD; return NEXTFOLD; } @@ -1965,7 +1974,10 @@ LJFOLD(NE any any) LJFOLDF(comm_equal) { /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */ - if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) + if (fins->op1 == fins->op2 && + (!irt_isnum(fins->t) || + (fleft->o == IR_CONV && /* Converted integers cannot be NaN. */ + (uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8)))) return CONDFOLD(fins->o == IR_EQ); return fold_comm_swap(J); } @@ -1995,8 +2007,6 @@ LJFOLDF(comm_comp) LJFOLD(BAND any any) LJFOLD(BOR any any) -LJFOLD(MIN any any) -LJFOLD(MAX any any) LJFOLDF(comm_dup) { if (fins->op1 == fins->op2) /* x o x ==> x */ @@ -2004,6 +2014,15 @@ LJFOLDF(comm_dup) return fold_comm_swap(J); } +LJFOLD(MIN any any) +LJFOLD(MAX any any) +LJFOLDF(comm_dup_minmax) +{ + if (fins->op1 == fins->op2) /* x o x ==> x */ + return LEFTFOLD; + return NEXTFOLD; +} + LJFOLD(BXOR any any) LJFOLDF(comm_bxor) { @@ -2040,7 +2059,7 @@ LJFOLDF(merge_eqne_snew_kgc) { GCstr *kstr = ir_kstr(fright); int32_t len = (int32_t)kstr->len; - lua_assert(irt_isstr(fins->t)); + lj_assertJ(irt_isstr(fins->t), "bad equality IR type"); #if LJ_TARGET_UNALIGNED #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ @@ -2104,7 +2123,7 @@ LJFOLD(HLOAD KKPTR) LJFOLDF(kfold_hload_kkptr) { UNUSED(J); - lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); + lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv"); return TREF_NIL; } @@ -2114,11 +2133,29 @@ LJFOLDX(lj_opt_fwd_hload) LJFOLD(ULOAD any) LJFOLDX(lj_opt_fwd_uload) -LJFOLD(CALLL any IRCALL_lj_tab_len) -LJFOLDX(lj_opt_fwd_tab_len) +LJFOLD(ALEN any any) +LJFOLDX(lj_opt_fwd_alen) + +/* Try to merge UREFO/UREFC into referenced instruction. */ +static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir) +{ + if (ir->o == IR_UREFO && irt_isguard(ir->t)) { + /* Might be pointing to some other coroutine's stack. + ** And GC might shrink said stack, thereby repointing the upvalue. + ** GC might even collect said coroutine, thereby closing the upvalue. + */ + if (gcstep_barrier(J, ref)) + return EMITFOLD; /* So cannot merge. */ + /* Current fins wants a check, but ir doesn't have one. */ + if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) && + irt_type(ir->t) == IRT_IGC) + ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */ + } + return ref; /* Not a TRef, but the caller doesn't care. */ +} /* Upvalue refs are really loads, but there are no corresponding stores. -** So CSE is ok for them, except for UREFO across a GC step (see below). +** So CSE is ok for them, except for guarded UREFO across a GC step. ** If the referenced function is const, its upvalue addresses are const, too. ** This can be used to improve CSE by looking for the same address, ** even if the upvalues originate from a different function. @@ -2136,9 +2173,7 @@ LJFOLDF(cse_uref) if (irref_isk(ir->op1)) { GCfunc *fn2 = ir_kfunc(IR(ir->op1)); if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) { - if (fins->o == IR_UREFO && gcstep_barrier(J, ref)) - break; - return ref; + return merge_uref(J, ref, ir); } } ref = ir->prev; @@ -2147,6 +2182,24 @@ LJFOLDF(cse_uref) return EMITFOLD; } +/* Custom CSE for UREFO. */ +LJFOLD(UREFO any any) +LJFOLDF(cse_urefo) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { + IRRef ref = J->chain[IR_UREFO]; + IRRef lim = fins->op1; + IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op12 == op12) + return merge_uref(J, ref, ir); + ref = ir->prev; + } + } + return EMITFOLD; +} + LJFOLD(HREFK any any) LJFOLDX(lj_opt_fwd_hrefk) @@ -2248,6 +2301,27 @@ LJFOLDF(fload_str_len_tostr) return NEXTFOLD; } +LJFOLD(FLOAD any IRFL_SBUF_W) +LJFOLD(FLOAD any IRFL_SBUF_E) +LJFOLD(FLOAD any IRFL_SBUF_B) +LJFOLD(FLOAD any IRFL_SBUF_L) +LJFOLD(FLOAD any IRFL_SBUF_REF) +LJFOLD(FLOAD any IRFL_SBUF_R) +LJFOLDF(fload_sbuf) +{ + TRef tr = lj_opt_fwd_fload(J); + return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; +} + +/* The fast function ID of function objects is immutable. */ +LJFOLD(FLOAD KGC IRFL_FUNC_FFID) +LJFOLDF(fload_func_ffid_kgc) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) + return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); + return NEXTFOLD; +} + /* The C type ID of cdata objects is immutable. */ LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) LJFOLDF(fload_cdata_typeid_kgc) @@ -2315,7 +2389,7 @@ LJFOLDF(fwd_sload) TRef tr = lj_opt_cse(J); return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; } else { - lua_assert(J->slot[fins->op1] != 0); + lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed"); return J->slot[fins->op1]; } } @@ -2331,18 +2405,24 @@ LJFOLDF(xload_kptr) LJFOLD(XLOAD any any) LJFOLDX(lj_opt_fwd_xload) +/* -- Frame handling ------------------------------------------------------ */ + +/* Prevent CSE of a REF_BASE operand across IR_RETF. */ +LJFOLD(SUB any BASE) +LJFOLD(SUB BASE any) +LJFOLD(EQ any BASE) +LJFOLDF(fold_base) +{ + return lj_opt_cselim(J, J->chain[IR_RETF]); +} + /* -- Write barriers ------------------------------------------------------ */ /* Write barriers are amenable to CSE, but not across any incremental ** GC steps. -** -** The same logic applies to open upvalue references, because a stack -** may be resized during a GC step (not the current stack, but maybe that -** of a coroutine). */ LJFOLD(TBAR any) LJFOLD(OBAR any any) -LJFOLD(UREFO any any) LJFOLDF(barrier_tab) { TRef tr = lj_opt_cse(J); @@ -2394,6 +2474,7 @@ LJFOLD(XSTORE any any) LJFOLDX(lj_opt_dse_xstore) LJFOLD(NEWREF any any) /* Treated like a store. */ +LJFOLD(TMPREF any any) LJFOLD(CALLA any any) LJFOLD(CALLL any any) /* Safeguard fallback. */ LJFOLD(CALLS any any) @@ -2404,9 +2485,19 @@ LJFOLD(TNEW any any) LJFOLD(TDUP any) LJFOLD(CNEW any any) LJFOLD(XSNEW any any) -LJFOLD(BUFHDR any any) LJFOLDX(lj_ir_emit) +/* -- Miscellaneous ------------------------------------------------------- */ + +LJFOLD(CARG any any) +LJFOLDF(cse_carg) +{ + TRef tr = lj_opt_cse(J); + if (tref_ref(tr) < J->chain[IR_LOOP]) /* CSE across loop? */ + return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */ + return tr; +} + /* ------------------------------------------------------------------------ */ /* Every entry in the generated hash table is a 32 bit pattern: @@ -2430,8 +2521,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) IRRef ref; if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { - lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | - JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); + lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | + JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT, + "bad JIT_F_OPT_DEFAULT"); /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) return lj_opt_cse(J); @@ -2493,7 +2585,7 @@ retry: return lj_ir_kint(J, fins->i); if (ref == FAILFOLD) lj_trace_err(J, LJ_TRERR_GFAIL); - lua_assert(ref == DROPFOLD); + lj_assertJ(ref == DROPFOLD, "bad fold result"); return REF_DROP; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_opt_loop.c b/source/libs/luajit/LuaJIT-src/src/lj_opt_loop.c index 04c6d06ddffa760d4f4f3814ec99d337cfb83ad1..ac8fbae99b49d010ad23b4ed1f4310805c27b4d7 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_opt_loop.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_opt_loop.c @@ -1,6 +1,6 @@ /* ** LOOP: Loop Optimizations. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_loop_c @@ -223,8 +223,9 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, } J->guardemit.irt = 0; /* Setup new snapshot. */ - snap->mapofs = (uint16_t)nmapofs; + snap->mapofs = (uint32_t)nmapofs; snap->ref = (IRRef1)J->cur.nins; + snap->mcofs = 0; snap->nslots = nslots; snap->topslot = osnap->topslot; snap->count = 0; @@ -251,7 +252,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, nmap += nn; while (omap < nextmap) /* Copy PC + frame links. */ *nmap++ = *omap++; - J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap); + J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); } typedef struct LoopState { @@ -299,7 +300,8 @@ static void loop_unroll(LoopState *lps) loopmap = &J->cur.snapmap[loopsnap->mapofs]; /* The PC of snapshot #0 and the loop snapshot must match. */ psentinel = &loopmap[loopsnap->nent]; - lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); + lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent], + "mismatched PC for loop snapshot"); *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ /* Start substitution with snapshot #1 (#0 is empty for root traces). */ @@ -352,10 +354,12 @@ static void loop_unroll(LoopState *lps) irr = IR(ref); goto phiconv; } - } else if (ref != REF_DROP && irr->o == IR_CONV && - ref > invar && irr->op1 < invar) { - /* May need an extra PHI for a CONV. */ - ref = irr->op1; + } else if (ref != REF_DROP && ref > invar && + ((irr->o == IR_CONV && irr->op1 < invar) || + (irr->o == IR_ALEN && irr->op2 < invar && + irr->op2 != REF_NIL))) { + /* May need an extra PHI for a CONV or ALEN hint. */ + ref = irr->o == IR_CONV ? irr->op1 : irr->op2; irr = IR(ref); phiconv: if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { @@ -369,8 +373,8 @@ static void loop_unroll(LoopState *lps) } } if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ - J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; - lua_assert(J->cur.nsnapmap <= J->sizesnapmap); + J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; + lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index"); *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi, onsnap); @@ -383,7 +387,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap) SnapShot *snap = &J->cur.snap[nsnap-1]; SnapEntry *map = J->cur.snapmap; map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ - J->cur.nsnapmap = (uint16_t)nsnapmap; + J->cur.nsnapmap = (uint32_t)nsnapmap; J->cur.nsnap = nsnap; J->guardemit.irt = 0; lj_ir_rollback(J, ins); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_opt_mem.c b/source/libs/luajit/LuaJIT-src/src/lj_opt_mem.c index cc177d39e8b5184481d6b6ff4851c6d0fdb188f4..8cacfcfef9d475fa5f1eaf2f7d792e58e018c592 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_opt_mem.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_opt_mem.c @@ -3,7 +3,7 @@ ** AA: Alias Analysis using high-level semantic disambiguation. ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L). ** DSE: Dead-Store Elimination. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_mem_c @@ -18,6 +18,7 @@ #include "lj_jit.h" #include "lj_iropt.h" #include "lj_ircall.h" +#include "lj_dispatch.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -56,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) { IRIns *taba = IR(ta), *tabb = IR(tb); int newa, newb; - lua_assert(ta != tb); - lua_assert(irt_istab(taba->t) && irt_istab(tabb->t)); + lj_assertJ(ta != tb, "bad usage"); + lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage"); /* Disambiguate new allocations. */ newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); @@ -71,6 +72,34 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) return aa_escape(J, taba, tabb); } +/* Check whether there's no aliasing table.clear. */ +static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta) +{ + IRRef ref = J->chain[IR_CALLS]; + while (ref > lim) { + IRIns *calls = IR(ref); + if (calls->op2 == IRCALL_lj_tab_clear && + (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO)) + return 0; /* Conflict. */ + ref = calls->prev; + } + return 1; /* No conflict. Can safely FOLD/CSE. */ +} + +/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */ +int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) +{ + IRRef ta = fins->op1; + IRRef ref = J->chain[IR_NEWREF]; + while (ref > lim) { + IRIns *newref = IR(ref); + if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO) + return 0; /* Conflict. */ + ref = newref->prev; + } + return fwd_aa_tab_clear(J, lim, ta); +} + /* Alias analysis for array and hash access using key-based disambiguation. */ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) { @@ -99,7 +128,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) /* Disambiguate array references based on index arithmetic. */ int32_t ofsa = 0, ofsb = 0; IRRef basea = ka, baseb = kb; - lua_assert(refb->o == IR_AREF); + lj_assertJ(refb->o == IR_AREF, "expected AREF"); /* Gather base and offset from t[base] or t[base+-ofs]. */ if (keya->o == IR_ADD && irref_isk(keya->op2)) { basea = keya->op1; @@ -117,8 +146,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ } else { /* Disambiguate hash references based on the type of their keys. */ - lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && - (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); + lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && + (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF), + "bad xREF IR op %d or %d", refa->o, refb->o); if (!irt_sametype(keya->t, keyb->t)) return ALIAS_NO; /* Different key types. */ } @@ -152,9 +182,11 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; IRRef tab = ir->op1; ir = IR(tab); - if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { + if ((ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) && + fwd_aa_tab_clear(J, tab, tab)) { /* A NEWREF with a number key may end up pointing to the array part. ** But it's referenced from HSTORE and not found in the ASTORE chain. + ** Or a NEWREF may rehash the table and move unrelated number keys. ** For now simply consider this a conflict without forwarding anything. */ if (xr->o == IR_AREF) { @@ -165,6 +197,11 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) goto cselim; ref2 = newref->prev; } + } else { + IRIns *key = IR(xr->op2); + if (key->o == IR_KSLOT) key = IR(key->op1); + if (irt_isnum(key->t) && J->chain[IR_NEWREF] > tab) + goto cselim; } /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF. ** But the above search for conflicting stores was limited by xref. @@ -180,23 +217,23 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) } ref = store->prev; } - lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); - if (irt_ispri(fins->t)) { - return TREF_PRI(irt_type(fins->t)); - } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) || - irt_isstr(fins->t)) { + /* Simplified here: let loop_unroll() figure out any type instability. */ + if (ir->o == IR_TNEW) { + return TREF_NIL; + } else { TValue keyv; cTValue *tv; IRIns *key = IR(xr->op2); if (key->o == IR_KSLOT) key = IR(key->op1); lj_ir_kvalue(J->L, &keyv, key); tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); - lua_assert(itype2irt(tv) == irt_type(fins->t)); - if (irt_isnum(fins->t)) + if (tvispri(tv)) + return TREF_PRI(itype2irt(tv)); + else if (tvisnum(tv)) return lj_ir_knum_u64(J, tv->u64); - else if (LJ_DUALNUM && irt_isint(fins->t)) + else if (tvisint(tv)) return lj_ir_kint(J, intV(tv)); - else + else if (tvisgcv(tv)) return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ @@ -265,7 +302,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) while (ref > tab) { IRIns *newref = IR(ref); if (tab == newref->op1) { - if (fright->op1 == newref->op2) + if (fright->op1 == newref->op2 && fwd_aa_tab_clear(J, ref, tab)) return ref; /* Forward from NEWREF. */ else goto docse; @@ -275,7 +312,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) ref = newref->prev; } /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */ - if (IR(tab)->o == IR_TDUP) + if (IR(tab)->o == IR_TDUP && fwd_aa_tab_clear(J, tab, tab)) fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */ docse: return CSEFOLD; @@ -309,34 +346,6 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J) return 1; /* No conflict. Can fold to niltv. */ } -/* Check whether there's no aliasing table.clear. */ -static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta) -{ - IRRef ref = J->chain[IR_CALLS]; - while (ref > lim) { - IRIns *calls = IR(ref); - if (calls->op2 == IRCALL_lj_tab_clear && - (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO)) - return 0; /* Conflict. */ - ref = calls->prev; - } - return 1; /* No conflict. Can safely FOLD/CSE. */ -} - -/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */ -int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) -{ - IRRef ta = fins->op1; - IRRef ref = J->chain[IR_NEWREF]; - while (ref > lim) { - IRIns *newref = IR(ref); - if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO) - return 0; /* Conflict. */ - ref = newref->prev; - } - return fwd_aa_tab_clear(J, lim, ta); -} - /* ASTORE/HSTORE elimination. */ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) { @@ -360,16 +369,16 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) /* Different value: try to eliminate the redundant store. */ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ IRIns *ir; - /* Check for any intervening guards (includes conflicting loads). */ + /* Check for any intervening guards (includes conflicting loads). + ** Note that lj_tab_keyindex and lj_vm_next don't need guards, + ** since they are followed by at least one guarded VLOAD. + */ for (ir = IR(J->cur.nins-1); ir > store; ir--) - if (irt_isguard(ir->t) || ir->o == IR_CALLL) + if (irt_isguard(ir->t) || ir->o == IR_ALEN) goto doemit; /* No elimination possible. */ /* Remove redundant store from chain and replace with NOP. */ *refp = store->prev; - store->o = IR_NOP; - store->t.irt = IRT_NIL; - store->op1 = store->op2 = 0; - store->prev = 0; + lj_ir_nop(store); /* Now emit the new store instead. */ } goto doemit; @@ -380,6 +389,67 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } +/* ALEN forwarding. */ +TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J) +{ + IRRef tab = fins->op1; /* Table reference. */ + IRRef lim = tab; /* Search limit. */ + IRRef ref; + + /* Search for conflicting HSTORE with numeric key. */ + ref = J->chain[IR_HSTORE]; + while (ref > lim) { + IRIns *store = IR(ref); + IRIns *href = IR(store->op1); + IRIns *key = IR(href->op2); + if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { + lim = ref; /* Conflicting store found, limits search for ALEN. */ + break; + } + ref = store->prev; + } + + /* Try to find a matching ALEN. */ + ref = J->chain[IR_ALEN]; + while (ref > lim) { + /* CSE for ALEN only depends on the table, not the hint. */ + if (IR(ref)->op1 == tab) { + IRRef sref; + + /* Search for aliasing table.clear. */ + if (!fwd_aa_tab_clear(J, ref, tab)) + break; + + /* Search for hint-forwarding or conflicting store. */ + sref = J->chain[IR_ASTORE]; + while (sref > ref) { + IRIns *store = IR(sref); + IRIns *aref = IR(store->op1); + IRIns *fref = IR(aref->op1); + if (tab == fref->op1) { /* ASTORE to the same table. */ + /* Detect t[#t+1] = x idiom for push. */ + IRIns *idx = IR(aref->op2); + if (!irt_isnil(store->t) && + idx->o == IR_ADD && idx->op1 == ref && + IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) { + /* Note: this requires an extra PHI check in loop unroll. */ + fins->op2 = aref->op2; /* Set ALEN hint. */ + } + goto doemit; /* Conflicting store, possibly giving a hint. */ + } else if (aa_table(J, tab, fref->op1) != ALIAS_NO) { + goto doemit; /* Conflicting store. */ + } + sref = store->prev; + } + + return ref; /* Plain ALEN forwarding. */ + } + ref = IR(ref)->prev; + } +doemit: + return EMITFOLD; +} + /* -- ULOAD forwarding ---------------------------------------------------- */ /* The current alias analysis for upvalues is very simplistic. It only @@ -392,18 +462,23 @@ doemit: */ static AliasRet aa_uref(IRIns *refa, IRIns *refb) { - if (refa->o != refb->o) - return ALIAS_NO; /* Different UREFx type. */ if (refa->op1 == refb->op1) { /* Same function. */ if (refa->op2 == refb->op2) return ALIAS_MUST; /* Same function, same upvalue idx. */ else return ALIAS_NO; /* Same function, different upvalue idx. */ } else { /* Different functions, check disambiguation hash values. */ - if (((refa->op2 ^ refb->op2) & 0xff)) + if (((refa->op2 ^ refb->op2) & 0xff)) { return ALIAS_NO; /* Upvalues with different hash values cannot alias. */ - else - return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */ + } else if (refa->o != refb->o) { + /* Different UREFx type, but need to confirm the UREFO really is open. */ + if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC; + else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC; + return ALIAS_NO; + } else { + /* No conclusion can be drawn for same hash value and same UREFx type. */ + return ALIAS_MAY; + } } } @@ -429,7 +504,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) cselim: /* Try to find a matching load. Below the conflicting store, if any. */ - ref = J->chain[IR_ULOAD]; while (ref > lim) { IRIns *ir = IR(ref); @@ -470,10 +544,7 @@ TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J) goto doemit; /* No elimination possible. */ /* Remove redundant store from chain and replace with NOP. */ *refp = store->prev; - store->o = IR_NOP; - store->t.irt = IRT_NIL; - store->op1 = store->op2 = 0; - store->prev = 0; + lj_ir_nop(store); if (ref+1 < J->cur.nins && store[1].o == IR_OBAR && store[1].op1 == xref) { IRRef1 *bp = &J->chain[IR_OBAR]; @@ -482,10 +553,7 @@ TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J) bp = &obar->prev; /* Remove OBAR, too. */ *bp = obar->prev; - obar->o = IR_NOP; - obar->t.irt = IRT_NIL; - obar->op1 = obar->op2 = 0; - obar->prev = 0; + lj_ir_nop(obar); } /* Now emit the new store instead. */ } @@ -565,8 +633,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) goto doemit; break; /* Otherwise continue searching. */ case ALIAS_MUST: - if (store->op2 == val) /* Same value: drop the new store. */ - return DROPFOLD; + if (store->op2 == val && + !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R)) + return DROPFOLD; /* Same value: drop the new store. */ /* Different value: try to eliminate the redundant store. */ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ IRIns *ir; @@ -576,10 +645,7 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) goto doemit; /* No elimination possible. */ /* Remove redundant store from chain and replace with NOP. */ *refp = store->prev; - store->o = IR_NOP; - store->t.irt = IRT_NIL; - store->op1 = store->op2 = 0; - store->prev = 0; + lj_ir_nop(store); /* Now emit the new store instead. */ } goto doemit; @@ -590,6 +656,29 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } +/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */ +int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim) +{ + IRRef ref; + if (J->chain[IR_BUFPUT] > lim) + return 0; /* Conflict. */ + ref = J->chain[IR_CALLS]; + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) + return 0; /* Conflict. */ + ref = ir->prev; + } + ref = J->chain[IR_CALLL]; + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) + return 0; /* Conflict. */ + ref = ir->prev; + } + return 1; /* No conflict. Can safely FOLD/CSE. */ +} + /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */ /* Find cdata allocation for a reference (if any). */ @@ -830,10 +919,7 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) goto doemit; /* No elimination possible. */ /* Remove redundant store from chain and replace with NOP. */ *refp = store->prev; - store->o = IR_NOP; - store->t.irt = IRT_NIL; - store->op1 = store->op2 = 0; - store->prev = 0; + lj_ir_nop(store); /* Now emit the new store instead. */ } goto doemit; @@ -844,39 +930,6 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } -/* -- Forwarding of lj_tab_len -------------------------------------------- */ - -/* This is rather simplistic right now, but better than nothing. */ -TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) -{ - IRRef tab = fins->op1; /* Table reference. */ - IRRef lim = tab; /* Search limit. */ - IRRef ref; - - /* Any ASTORE is a conflict and limits the search. */ - if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; - - /* Search for conflicting HSTORE with numeric key. */ - ref = J->chain[IR_HSTORE]; - while (ref > lim) { - IRIns *store = IR(ref); - IRIns *href = IR(store->op1); - IRIns *key = IR(href->op2); - if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { - lim = ref; /* Conflicting store found, limits search for TLEN. */ - break; - } - ref = store->prev; - } - - /* Search for aliasing table.clear. */ - if (!fwd_aa_tab_clear(J, lim, tab)) - return lj_ir_emit(J); - - /* Try to find a matching load. Below the conflicting store, if any. */ - return lj_opt_cselim(J, lim); -} - /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ /* Check whether the previous value for a table store is non-nil. @@ -907,6 +960,8 @@ int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref)) return 0; /* A nil store with same const key or var key MAY alias. */ /* Different const keys CANNOT alias. */ + } else if (irt_isp32(IR(skref)->t) != irt_isp32(IR(xkref)->t)) { + return 0; /* HREF and HREFK MAY alias. */ } /* Different key types CANNOT alias. */ } /* Other non-nil stores MAY alias. */ ref = store->prev; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_opt_narrow.c b/source/libs/luajit/LuaJIT-src/src/lj_opt_narrow.c index cd96ca4b4f94f6ae03e0c7b902d80eba972792d7..01b5833d92e06d01d5d301edb70a60e7247e4075 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_opt_narrow.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_opt_narrow.c @@ -1,7 +1,7 @@ /* ** NARROW: Narrowing of numbers to integers (double to int32_t). ** STRIPOV: Stripping of overflow checks. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_narrow_c @@ -341,7 +341,8 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) NarrowIns *savesp = nc->sp; int count = narrow_conv_backprop(nc, ir->op1, depth); count += narrow_conv_backprop(nc, ir->op2, depth); - if (count <= 1) { /* Limit total number of conversions. */ + /* Limit total number of conversions. */ + if (count <= 1 && nc->sp < nc->maxsp) { *nc->sp++ = NARROWINS(IRT(ir->o, nc->t), ref); return count; } @@ -372,17 +373,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) } else if (op == NARROW_CONV) { *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ } else if (op == NARROW_SEXT) { - lua_assert(sp >= nc->stack+1); + lj_assertJ(sp >= nc->stack+1, "stack underflow"); sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); } else if (op == NARROW_INT) { - lua_assert(next < last); + lj_assertJ(next < last, "missing arg to NARROW_INT"); *sp++ = nc->t == IRT_I64 ? lj_ir_kint64(J, (int64_t)(int32_t)*next++) : lj_ir_kint(J, *next++); } else { /* Regular IROpT. Pops two operands and pushes one result. */ IRRef mode = nc->mode; - lua_assert(sp >= nc->stack+2); + lj_assertJ(sp >= nc->stack+2, "stack underflow"); sp--; /* Omit some overflow checks for array indexing. See comments above. */ if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { @@ -398,7 +399,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); } } - lua_assert(sp == nc->stack+1); + lj_assertJ(sp == nc->stack+1, "stack misalignment"); return nc->stack[0]; } @@ -452,7 +453,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) { IRIns *ir; - lua_assert(tref_isnumber(tr)); + lj_assertJ(tref_isnumber(tr), "expected number type"); if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); /* Omit some overflow checks for array indexing. See comments above. */ @@ -499,7 +500,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) /* Narrow C array index (overflow undefined). */ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) { - lua_assert(tref_isnumber(tr)); + lj_assertJ(tref_isnumber(tr), "expected number type"); if (tref_isnum(tr)) return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ @@ -551,8 +552,13 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) { rc = conv_str_tonum(J, rc, vc); if (tref_isinteger(rc)) { - if ((uint32_t)numberVint(vc) != 0x80000000u) - return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); + uint32_t k = (uint32_t)numberVint(vc); + if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { + TRef zero = lj_ir_kint(J, 0); + if (!LJ_DUALNUM) + emitir(IRTGI(IR_NE), rc, zero); + return emitir(IRTGI(IR_SUBOV), zero, rc); + } rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); } return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); @@ -579,44 +585,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) return emitir(IRTN(IR_SUB), rb, tmp); } -/* Narrowing of power operator or math.pow. */ -TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) -{ - rb = conv_str_tonum(J, rb, vb); - rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ - rc = conv_str_tonum(J, rc, vc); - /* Narrowing must be unconditional to preserve (-x)^i semantics. */ - if (tvisint(vc) || numisint(numV(vc))) { - int checkrange = 0; - /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ - if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { - int32_t k = numberVint(vc); - if (!(k >= -65536 && k <= 65536)) goto split_pow; - checkrange = 1; - } - if (!tref_isinteger(rc)) { - /* Guarded conversion to integer! */ - rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); - } - if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ - TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); - emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); - } - return emitir(IRTN(IR_POW), rb, rc); - } -split_pow: - /* FOLD covers most cases, but some are easier to do here. */ - if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) - return rb; /* 1 ^ x ==> 1 */ - rc = lj_ir_tonum(J, rc); - if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) - return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ - /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ - rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); - rc = emitir(IRTN(IR_MUL), rb, rc); - return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); -} - /* -- Predictive narrowing of induction variables ------------------------- */ /* Narrow a single runtime value. */ @@ -630,9 +598,10 @@ static int narrow_forl(jit_State *J, cTValue *o) /* Narrow the FORL index type by looking at the runtime values. */ IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) { - lua_assert(tvisnumber(&tv[FORL_IDX]) && + lj_assertJ(tvisnumber(&tv[FORL_IDX]) && tvisnumber(&tv[FORL_STOP]) && - tvisnumber(&tv[FORL_STEP])); + tvisnumber(&tv[FORL_STEP]), + "expected number types"); /* Narrow only if the runtime values of start/stop/step are all integers. */ if (narrow_forl(J, &tv[FORL_IDX]) && narrow_forl(J, &tv[FORL_STOP]) && diff --git a/source/libs/luajit/LuaJIT-src/src/lj_opt_sink.c b/source/libs/luajit/LuaJIT-src/src/lj_opt_sink.c index 929ccb61877e33d486adce443899e746cf87bf7d..9d51614131e48dfa9527e74c9ec3d0ad14227492 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_opt_sink.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_opt_sink.c @@ -1,6 +1,6 @@ /* ** SINK: Allocation Sinking and Store Sinking. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_sink_c @@ -36,12 +36,14 @@ static IRIns *sink_checkalloc(jit_State *J, IRIns *irs) } /* Recursively check whether a value depends on a PHI. */ -static int sink_phidep(jit_State *J, IRRef ref) +static int sink_phidep(jit_State *J, IRRef ref, int *workp) { IRIns *ir = IR(ref); + if (!*workp) return 1; /* Give up and pretend it does. */ + (*workp)--; if (irt_isphi(ir->t)) return 1; - if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1; - if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1; + if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1, workp)) return 1; + if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2, workp)) return 1; return 0; } @@ -56,7 +58,13 @@ static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref) return 1; /* Sinkable PHI. */ } /* Otherwise the value must be loop-invariant. */ - return ref < J->loopref && !sink_phidep(J, ref); + if (ref < J->loopref) { + /* Check for PHI dependencies, but give up after reasonable effort. */ + int work = 64; + return !sink_phidep(J, ref, &work); + } else { + return 0; /* Loop-variant. */ + } } return 1; /* Constant (non-PHI). */ } @@ -78,8 +86,7 @@ static void sink_mark_ins(jit_State *J) switch (ir->o) { case IR_BASE: return; /* Finished. */ - case IR_CALLL: /* IRCALL_lj_tab_len */ - case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: + case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN: irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ break; case IR_FLOAD: @@ -100,8 +107,8 @@ static void sink_mark_ins(jit_State *J) (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && !sink_checkphi(J, ir, (ir+1)->op2)))) irt_setmark(ir->t); /* Mark ineligible allocation. */ - /* fallthrough */ #endif + /* fallthrough */ case IR_USTORE: irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ break; @@ -219,6 +226,7 @@ static void sink_sweep_ins(jit_State *J) for (ir = IR(J->cur.nk); ir < irbase; ir++) { irt_clearmark(ir->t); ir->prev = REGSP_INIT; + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ if (irt_is64(ir->t) && ir->o != IR_KNULL) ir++; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_opt_split.c b/source/libs/luajit/LuaJIT-src/src/lj_opt_split.c index fc9352042ed3818cd195c5171a9b6741916c25dd..8d0259117bfde5055c69cd56d9d67faa17965d75 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_opt_split.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_opt_split.c @@ -1,6 +1,6 @@ /* ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_opt_split_c @@ -8,7 +8,7 @@ #include "lj_obj.h" -#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) +#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) #include "lj_err.h" #include "lj_buf.h" @@ -235,7 +235,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, return split_emit(J, IRTI(IR_BOR), t1, t2); } else { IRRef t1 = ir->prev, t2; - lua_assert(op == IR_BSHR || op == IR_BSAR); + lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); nir->o = IR_BSHR; t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); @@ -250,7 +250,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, ir->prev = lj_ir_kint(J, 0); return lo; } else { - lua_assert(op == IR_BSHR || op == IR_BSAR); + lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); if (k == 32) { J->cur.nins--; ir->prev = hi; @@ -400,35 +400,11 @@ static void split_ir(jit_State *J) hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: - hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow); break; case IR_FPMATH: - /* Try to rejoin pow from EXP2, MUL and LOG2. */ - if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { - IRIns *irp = IR(nir->op1); - if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { - IRIns *irm4 = IR(irp->op1); - IRIns *irm3 = IR(irm4->op1); - IRIns *irm12 = IR(irm3->op1); - IRIns *irl1 = IR(irm12->op1); - if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && - irl1->op2 == IRCALL_lj_vm_log2) { - IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ - IRRef arg3 = irm3->op2, arg4 = irm4->op2; - J->cur.nins--; - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); - break; - } - } - } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; - case IR_ATAN2: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); - break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; @@ -453,7 +429,7 @@ static void split_ir(jit_State *J) hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_FLOAD: - lua_assert(ir->op1 == REF_NIL); + lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State"); hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); nir->op2 += LJ_BE*4; break; @@ -489,8 +465,9 @@ static void split_ir(jit_State *J) break; } #endif - lua_assert(st == IRT_INT || - (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); + lj_assertJ(st == IRT_INT || + (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), + "bad source type for CONV"); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : @@ -520,7 +497,8 @@ static void split_ir(jit_State *J) hi = nir->op2; break; default: - lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); + lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, + "bad IR op %d", ir->o); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; @@ -577,7 +555,7 @@ static void split_ir(jit_State *J) hi = split_bitshift(J, hisubst, oir, nir, ir); break; case IR_FLOAD: - lua_assert(ir->op2 == IRFL_CDATA_INT64); + lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported"); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; @@ -643,7 +621,7 @@ static void split_ir(jit_State *J) hi = nir->op2; break; default: - lua_assert(ir->o <= IR_NE); /* Comparisons. */ + lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } @@ -667,7 +645,7 @@ static void split_ir(jit_State *J) tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); - } else if (ir->o == IR_TOSTR) { + } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; @@ -721,7 +699,7 @@ static void split_ir(jit_State *J) #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { - lua_assert(st == IRT_NUM && irt_isint(ir->t)); + lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types"); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { @@ -852,7 +830,7 @@ void lj_opt_split(jit_State *J) if (!J->needsplit) J->needsplit = split_needsplit(J); #else - lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ + lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state"); #endif if (J->needsplit) { int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_parse.c b/source/libs/luajit/LuaJIT-src/src/lj_parse.c index 08f7cfa6ac5ea35565bdcd0874906073cf82ad6d..70097598083ee43c87eeb7af3294dbf608314d08 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_parse.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_parse.c @@ -1,6 +1,6 @@ /* ** Lua parser (source code -> bytecode). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -163,6 +163,12 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD); LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); +#ifdef LUA_USE_ASSERT +#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__)) +#else +#define lj_assertFS(c, ...) ((void)fs) +#endif + /* -- Error handling ------------------------------------------------------ */ LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) @@ -200,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e) { lua_State *L = fs->L; TValue *o; - lua_assert(expr_isnumk(e)); + lj_assertFS(expr_isnumk(e), "bad usage"); o = lj_tab_set(L, fs->kt, &e->u.nval); if (tvhaskslot(o)) return tvkslot(o); @@ -225,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype) /* Add a string constant. */ static BCReg const_str(FuncState *fs, ExpDesc *e) { - lua_assert(expr_isstrk(e) || e->k == VGLOBAL); + lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage"); return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); } @@ -313,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest) { BCIns *jmp = &fs->bcbase[pc].ins; BCPos offset = dest-(pc+1)+BCBIAS_J; - lua_assert(dest != NO_JMP); + lj_assertFS(dest != NO_JMP, "uninitialized jump target"); if (offset > BCMAX_D) err_syntax(fs->ls, LJ_ERR_XJUMP); setbc_d(jmp, offset); @@ -362,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target) if (target == fs->pc) { jmp_tohere(fs, list); } else { - lua_assert(target < fs->pc); + lj_assertFS(target < fs->pc, "bad jump target"); jmp_patchval(fs, list, target, NO_REG, target); } } @@ -392,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg) { if (reg >= fs->nactvar) { fs->freereg--; - lua_assert(reg == fs->freereg); + lj_assertFS(reg == fs->freereg, "bad regfree"); } } @@ -542,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) } else if (e->k <= VKTRUE) { ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); } else { - lua_assert(e->k == VVOID || e->k == VJMP); + lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k); return; } bcemit_INS(fs, ins); @@ -637,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); } else { BCReg ra, rc; - lua_assert(var->k == VINDEXED); + lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k); ra = expr_toanyreg(fs, e); rc = var->u.s.aux; if ((int32_t)rc < 0) { @@ -645,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) } else if (rc > BCMAX_C) { ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); } else { +#ifdef LUA_USE_ASSERT /* Free late alloced key reg to avoid assert on free of value reg. */ /* This can only happen when called from expr_table(). */ - lua_assert(e->k != VNONRELOC || ra < fs->nactvar || - rc < ra || (bcreg_free(fs, rc),1)); + if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra) + bcreg_free(fs, rc); +#endif ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); } } @@ -659,19 +667,20 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) /* Emit method lookup expression. */ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) { - BCReg idx, func, obj = expr_toanyreg(fs, e); + BCReg idx, func, fr2, obj = expr_toanyreg(fs, e); expr_free(fs, e); func = fs->freereg; - bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */ - lua_assert(expr_isstrk(key)); + fr2 = fs->ls->fr2; + bcemit_AD(fs, BC_MOV, func+1+fr2, obj); /* Copy object to 1st argument. */ + lj_assertFS(expr_isstrk(key), "bad usage"); idx = const_str(fs, key); if (idx <= BCMAX_C) { - bcreg_reserve(fs, 2+LJ_FR2); + bcreg_reserve(fs, 2+fr2); bcemit_ABC(fs, BC_TGETS, func, obj, idx); } else { - bcreg_reserve(fs, 3+LJ_FR2); - bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx); - bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2); + bcreg_reserve(fs, 3+fr2); + bcemit_AD(fs, BC_KSTR, func+2+fr2, idx); + bcemit_ABC(fs, BC_TGETV, func, obj, func+2+fr2); fs->freereg--; } e->u.s.info = func; @@ -803,7 +812,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) else rc = expr_toanyreg(fs, e2); /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ - lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC); + lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC, + "bad expr type %d", e1->k); expr_toval(fs, e1); /* Avoid two consts to satisfy bytecode constraints. */ if (expr_isnumk(e1) && !expr_isnumk(e2) && @@ -853,9 +863,12 @@ static void bcemit_comp(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) e1 = e2; e2 = eret; /* Swap operands. */ op = ((op-BC_ISLT)^3)+BC_ISLT; expr_toval(fs, e1); + ra = expr_toanyreg(fs, e1); + rd = expr_toanyreg(fs, e2); + } else { + rd = expr_toanyreg(fs, e2); + ra = expr_toanyreg(fs, e1); } - rd = expr_toanyreg(fs, e2); - ra = expr_toanyreg(fs, e1); ins = BCINS_AD(op, ra, rd); } /* Using expr_free might cause asserts if the order is wrong. */ @@ -888,19 +901,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) if (op <= OPR_POW) { bcemit_arith(fs, op, e1, e2); } else if (op == OPR_AND) { - lua_assert(e1->t == NO_JMP); /* List must be closed. */ + lj_assertFS(e1->t == NO_JMP, "jump list not closed"); expr_discharge(fs, e2); jmp_append(fs, &e2->f, e1->f); *e1 = *e2; } else if (op == OPR_OR) { - lua_assert(e1->f == NO_JMP); /* List must be closed. */ + lj_assertFS(e1->f == NO_JMP, "jump list not closed"); expr_discharge(fs, e2); jmp_append(fs, &e2->t, e1->t); *e1 = *e2; } else if (op == OPR_CONCAT) { expr_toval(fs, e2); if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { - lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); + lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1, + "bad CAT stack layout"); expr_free(fs, e1); setbc_b(bcptr(fs, e2), e1->u.s.info); e1->u.s.info = e2->u.s.info; @@ -912,8 +926,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) } e1->k = VRELOCABLE; } else { - lua_assert(op == OPR_NE || op == OPR_EQ || - op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT); + lj_assertFS(op == OPR_NE || op == OPR_EQ || + op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT, + "bad binop %d", op); bcemit_comp(fs, op, e1, e2); } } @@ -942,30 +957,30 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) e->u.s.info = fs->freereg-1; e->k = VNONRELOC; } else { - lua_assert(e->k == VNONRELOC); + lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); } } else { - lua_assert(op == BC_UNM || op == BC_LEN); + lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op); if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ #if LJ_HASFFI if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ GCcdata *cd = cdataV(&e->u.nval); - int64_t *p = (int64_t *)cdataptr(cd); + uint64_t *p = (uint64_t *)cdataptr(cd); if (cd->ctypeid == CTID_COMPLEX_DOUBLE) - p[1] ^= (int64_t)U64x(80000000,00000000); + p[1] ^= U64x(80000000,00000000); else - *p = -*p; + *p = ~*p+1u; return; } else #endif if (expr_isnumk(e) && !expr_numiszero(e)) { /* Avoid folding to -0. */ TValue *o = expr_numtv(e); if (tvisint(o)) { - int32_t k = intV(o); - if (k == -k) + int32_t k = intV(o), negk = (int32_t)(~(uint32_t)k+1u); + if (k == negk) setnumV(o, -(lua_Number)k); else - setintV(o, -k); + setintV(o, negk); return; } else { o->u64 ^= U64x(80000000,00000000); @@ -1040,8 +1055,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name) lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); } - lua_assert((uintptr_t)name < VARNAME__MAX || - lj_tab_getstr(fs->kt, name) != NULL); + lj_assertFS((uintptr_t)name < VARNAME__MAX || + lj_tab_getstr(fs->kt, name) != NULL, + "unanchored variable name"); /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ setgcref(ls->vstack[vtop].name, obj2gco(name)); fs->varmap[fs->nactvar+n] = (uint16_t)vtop; @@ -1096,7 +1112,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e) return i; /* Already exists. */ /* Otherwise create a new one. */ checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); - lua_assert(e->k == VLOCAL || e->k == VUPVAL); + lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k); fs->uvmap[n] = (uint16_t)vidx; fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); fs->nuv = n+1; @@ -1147,7 +1163,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc) lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); } - lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL); + lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL, + "unanchored label name"); /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ setgcref(ls->vstack[vtop].name, obj2gco(name)); ls->vstack[vtop].startpc = pc; @@ -1177,8 +1194,9 @@ static void gola_close(LexState *ls, VarInfo *vg) FuncState *fs = ls->fs; BCPos pc = vg->startpc; BCIns *ip = &fs->bcbase[pc].ins; - lua_assert(gola_isgoto(vg)); - lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO); + lj_assertFS(gola_isgoto(vg), "expected goto"); + lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO, + "bad bytecode op %d", bc_op(*ip)); setbc_a(ip, vg->slot); if (bc_op(*ip) == BC_JMP) { BCPos next = jmp_next(fs, pc); @@ -1197,9 +1215,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx) if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { if (vg->slot < vl->slot) { GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); - lua_assert((uintptr_t)name >= VARNAME__MAX); + lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name"); ls->linenumber = ls->fs->bcbase[vg->startpc].line; - lua_assert(strref(vg->name) != NAME_BREAK); + lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break"); lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, strdata(strref(vg->name)), strdata(name)); } @@ -1263,7 +1281,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags) bl->vstart = fs->ls->vtop; bl->prev = fs->bl; fs->bl = bl; - lua_assert(fs->freereg == fs->nactvar); + lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc"); } /* End a scope. */ @@ -1274,7 +1292,7 @@ static void fscope_end(FuncState *fs) fs->bl = bl->prev; var_remove(ls, bl->nactvar); fs->freereg = fs->nactvar; - lua_assert(bl->nactvar == fs->nactvar); + lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc"); if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); if ((bl->flags & FSCOPE_BREAK)) { @@ -1309,9 +1327,12 @@ static void fs_fixup_bc(FuncState *fs, GCproto *pt, BCIns *bc, MSize n) { BCInsLine *base = fs->bcbase; MSize i; + BCIns op; pt->sizebc = n; - bc[0] = BCINS_AD((fs->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, - fs->framesize, 0); + if (fs->ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */ + else if ((fs->flags & PROTO_VARARG)) op = BC_FUNCV; + else op = BC_FUNCF; + bc[0] = BCINS_AD(op, fs->framesize, 0); for (i = 1; i < n; i++) bc[i] = base[i].ins; } @@ -1361,13 +1382,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) Node *n = &node[i]; if (tvhaskslot(&n->val)) { ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); - lua_assert(!tvisint(&n->key)); + lj_assertFS(!tvisint(&n->key), "unexpected integer key"); if (tvisnum(&n->key)) { TValue *tv = &((TValue *)kptr)[kidx]; if (LJ_DUALNUM) { lua_Number nn = numV(&n->key); int32_t k = lj_num2int(nn); - lua_assert(!tvismzero(&n->key)); + lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); if ((lua_Number)k == nn) setintV(tv, k); else @@ -1415,21 +1436,21 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt, uint8_t *li = (uint8_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 256); + lj_assertFS(delta >= 0 && delta < 256, "bad line delta"); li[i] = (uint8_t)delta; } while (++i < n); } else if (LJ_LIKELY(numline < 65536)) { uint16_t *li = (uint16_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 65536); + lj_assertFS(delta >= 0 && delta < 65536, "bad line delta"); li[i] = (uint16_t)delta; } while (++i < n); } else { uint32_t *li = (uint32_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0); + lj_assertFS(delta >= 0, "bad line delta"); li[i] = (uint32_t)delta; } while (++i < n); } @@ -1448,7 +1469,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) MSize len = s->len+1; char *p = lj_buf_more(&ls->sb, len); p = lj_buf_wmem(p, strdata(s), len); - setsbufP(&ls->sb, p); + ls->sb.w = p; } *ofsvar = sbuflen(&ls->sb); lastpc = 0; @@ -1469,7 +1490,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) startpc = vs->startpc; p = lj_strfmt_wuleb128(p, startpc-lastpc); p = lj_strfmt_wuleb128(p, vs->endpc-startpc); - setsbufP(&ls->sb, p); + ls->sb.w = p; lastpc = startpc; } } @@ -1482,7 +1503,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) { setmref(pt->uvinfo, p); setmref(pt->varinfo, (char *)p + ofsvar); - memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */ + memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */ } #else @@ -1519,7 +1540,7 @@ static void fs_fixup_ret(FuncState *fs) } fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ fscope_end(fs); - lua_assert(fs->bl == NULL); + lj_assertFS(fs->bl == NULL, "bad scope nesting"); /* May need to fixup returns encoded before first function was created. */ if (fs->flags & PROTO_FIXUP_RETURN) { BCPos pc; @@ -1537,7 +1558,7 @@ static void fs_fixup_ret(FuncState *fs) /* Replace with UCLO plus branch. */ fs->bcbase[pc].ins = BCINS_AD(BC_UCLO, 0, offset); break; - case BC_UCLO: + case BC_FNEW: return; /* We're done. */ default: break; @@ -1591,7 +1612,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) L->top--; /* Pop table of constants. */ ls->vtop = fs->vbase; /* Reset variable stack. */ ls->fs = fs->prev; - lua_assert(ls->fs != NULL || ls->tok == TK_eof); + lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state"); return pt; } @@ -1685,14 +1706,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v) } /* Get value of constant expression. */ -static void expr_kvalue(TValue *v, ExpDesc *e) +static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e) { + UNUSED(fs); if (e->k <= VKTRUE) { setpriV(v, ~(uint32_t)e->k); } else if (e->k == VKSTR) { setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR); } else { - lua_assert(tvisnumber(expr_numtv(e))); + lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant"); *v = *expr_numtv(e); } } @@ -1742,11 +1764,11 @@ static void expr_table(LexState *ls, ExpDesc *e) fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); } vcall = 0; - expr_kvalue(&k, &key); + expr_kvalue(fs, &k, &key); v = lj_tab_set(fs->L, t, &k); lj_gc_anybarriert(fs->L, t); if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ - expr_kvalue(v, &val); + expr_kvalue(fs, v, &val); } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ settabV(fs->L, v, t); /* Preserve key with table itself as value. */ fixt = 1; /* Fix this later, after all resizes. */ @@ -1765,8 +1787,9 @@ static void expr_table(LexState *ls, ExpDesc *e) if (vcall) { BCInsLine *ilp = &fs->bcbase[fs->pc-1]; ExpDesc en; - lua_assert(bc_a(ilp->ins) == freg && - bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB)); + lj_assertFS(bc_a(ilp->ins) == freg && + bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB), + "bad CALL code generation"); expr_init(&en, VKNUM, 0); en.u.nval.u32.lo = narr-1; en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ @@ -1796,7 +1819,7 @@ static void expr_table(LexState *ls, ExpDesc *e) for (i = 0; i <= hmask; i++) { Node *n = &node[i]; if (tvistab(&n->val)) { - lua_assert(tabV(&n->val) == t); + lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); setnilV(&n->val); /* Turn value into nil. */ } } @@ -1827,7 +1850,7 @@ static BCReg parse_params(LexState *ls, int needself) } while (lex_opt(ls, ',')); } var_add(ls, nparams); - lua_assert(fs->nactvar == nparams); + lj_assertFS(fs->nactvar == nparams, "bad regalloc"); bcreg_reserve(fs, nparams); lex_check(ls, ')'); return nparams; @@ -1914,14 +1937,14 @@ static void parse_args(LexState *ls, ExpDesc *e) err_syntax(ls, LJ_ERR_XFUNARG); return; /* Silence compiler. */ } - lua_assert(e->k == VNONRELOC); + lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); base = e->u.s.info; /* Base register for call. */ if (args.k == VCALL) { - ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2); + ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - ls->fr2); } else { if (args.k != VVOID) expr_tonextreg(fs, &args); - ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2); + ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - ls->fr2); } expr_init(e, VCALL, bcemit_INS(fs, ins)); e->u.s.aux = base; @@ -1961,7 +1984,7 @@ static void expr_primary(LexState *ls, ExpDesc *v) parse_args(ls, v); } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') { expr_tonextreg(fs, v); - if (LJ_FR2) bcreg_reserve(fs, 1); + if (ls->fr2) bcreg_reserve(fs, 1); parse_args(ls, v); } else { break; @@ -2316,11 +2339,15 @@ static void parse_return(LexState *ls) BCReg nret = expr_list(ls, &e); if (nret == 1) { /* Return one result. */ if (e.k == VCALL) { /* Check for tail call. */ +#ifdef LUAJIT_DISABLE_TAILCALL + goto notailcall; +#else BCIns *ip = bcptr(fs, &e); /* It doesn't pay off to add BC_VARGT just for 'return ...'. */ if (bc_op(*ip) == BC_VARG) goto notailcall; fs->pc--; ins = BCINS_AD(bc_op(*ip)-BC_CALL+BC_CALLT, bc_a(*ip), bc_c(*ip)); +#endif } else { /* Can return the result from any register. */ ins = BCINS_AD(BC_RET1, expr_toanyreg(fs, &e), 2); } @@ -2499,6 +2526,7 @@ static int predict_next(LexState *ls, FuncState *fs, BCPos pc) cTValue *o; switch (bc_op(ins)) { case BC_MOV: + if (bc_d(ins) >= fs->nactvar) return 0; name = gco2str(gcref(var_get(ls, fs, bc_d(ins)).name)); break; case BC_UGET: @@ -2543,8 +2571,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname) line = ls->linenumber; assign_adjust(ls, 3, expr_list(ls, &e), &e); /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */ - bcreg_bump(fs, 3+LJ_FR2); - isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); + bcreg_bump(fs, 3+ls->fr2); + isnext = (nvars <= 5 && fs->pc > exprpc && predict_next(ls, fs, exprpc)); var_add(ls, 3); /* Hidden control variables. */ lex_check(ls, TK_do); loop = bcemit_AJ(fs, isnext ? BC_ISNEXT : BC_JMP, base, NO_JMP); @@ -2667,7 +2695,8 @@ static int parse_stmt(LexState *ls) lj_lex_next(ls); parse_goto(ls); break; - } /* else: fallthrough */ + } + /* fallthrough */ default: parse_call_assign(ls); break; @@ -2683,8 +2712,9 @@ static void parse_chunk(LexState *ls) while (!islast && !parse_isend(ls->tok)) { islast = parse_stmt(ls); lex_opt(ls, ';'); - lua_assert(ls->fs->framesize >= ls->fs->freereg && - ls->fs->freereg >= ls->fs->nactvar); + lj_assertLS(ls->fs->framesize >= ls->fs->freereg && + ls->fs->freereg >= ls->fs->nactvar, + "bad regalloc"); ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ } synlevel_end(ls); @@ -2719,9 +2749,8 @@ GCproto *lj_parse(LexState *ls) err_token(ls, TK_eof); pt = fs_finish(ls, ls->linenumber); L->top--; /* Drop chunkname. */ - lua_assert(fs.prev == NULL); - lua_assert(ls->fs == NULL); - lua_assert(pt->sizeuv == 0); + lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting"); + lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues"); return pt; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_parse.h b/source/libs/luajit/LuaJIT-src/src/lj_parse.h index ceeab6994ff3cce9cbd9f2daf883b18abc293a3c..3870a32b8ae9b31084828101f68bc45d8d935d4b 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_parse.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_parse.h @@ -1,6 +1,6 @@ /* ** Lua parser (source code -> bytecode). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_PARSE_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_prng.c b/source/libs/luajit/LuaJIT-src/src/lj_prng.c new file mode 100644 index 0000000000000000000000000000000000000000..02146b273ad6d6173232bbc74d18902ae7c05951 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/lj_prng.c @@ -0,0 +1,259 @@ +/* +** Pseudo-random number generation. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_prng_c +#define LUA_CORE + +/* To get the syscall prototype. */ +#if defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "lj_def.h" +#include "lj_arch.h" +#include "lj_prng.h" + +/* -- PRNG step function -------------------------------------------------- */ + +/* This implements a Tausworthe PRNG with period 2^223. Based on: +** Tables of maximally-equidistributed combined LFSR generators, +** Pierre L'Ecuyer, 1991, table 3, 1st entry. +** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. +** +** Important note: This PRNG is NOT suitable for cryptographic use! +** +** But it works fine for math.random(), which has an API that's not +** suitable for cryptography, anyway. +** +** When used as a securely seeded global PRNG, it substantially raises +** the difficulty for various attacks on the VM. +*/ + +/* Update generator i and compute a running xor of all states. */ +#define TW223_GEN(rs, z, r, i, k, q, s) \ + z = rs->u[i]; \ + z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ + r ^= z; rs->u[i] = z; + +#define TW223_STEP(rs, z, r) \ + TW223_GEN(rs, z, r, 0, 63, 31, 18) \ + TW223_GEN(rs, z, r, 1, 58, 19, 28) \ + TW223_GEN(rs, z, r, 2, 55, 24, 7) \ + TW223_GEN(rs, z, r, 3, 47, 21, 8) + +/* PRNG step function with uint64_t result. */ +LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs) +{ + uint64_t z, r = 0; + TW223_STEP(rs, z, r) + return r; +} + +/* PRNG step function with double in uint64_t result. */ +LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs) +{ + uint64_t z, r = 0; + TW223_STEP(rs, z, r) + /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */ + return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); +} + +/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */ +static LJ_AINLINE void lj_prng_condition(PRNGState *rs) +{ + if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1); + if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6); + if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9); + if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17); +} + +/* -- PRNG seeding from OS ------------------------------------------------ */ + +#if LUAJIT_SECURITY_PRNG == 0 + +/* Nothing to define. */ + +#elif LJ_TARGET_XBOX360 + +extern int XNetRandom(void *buf, unsigned int len); + +#elif LJ_TARGET_PS3 + +extern int sys_get_random_number(void *buf, uint64_t len); + +#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA + +extern int sceRandomGetRandomNumber(void *buf, size_t len); + +#elif LJ_TARGET_NX + +#include <unistd.h> + +#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE +/* Must use BCryptGenRandom. */ +#include <bcrypt.h> +#pragma comment(lib, "bcrypt.lib") +#else +/* If you wonder about this mess, then search online for RtlGenRandom. */ +typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len); +static PRGR libfunc_rgr; +#endif + +#elif LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX +/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */ +#include <sys/syscall.h> +#else + +#if LJ_TARGET_OSX && !LJ_TARGET_IOS +/* +** In their infinite wisdom Apple decided to disallow getentropy() in the +** iOS App Store. Even though the call is common to all BSD-ish OS, it's +** recommended by Apple in their own security-related docs, and, to top +** off the foolery, /dev/urandom is handled by the same kernel code, +** yet accessing it is actually permitted (but less efficient). +*/ +#include <Availability.h> +#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 +#define LJ_TARGET_HAS_GETENTROPY 1 +#endif +#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX +#define LJ_TARGET_HAS_GETENTROPY 1 +#endif + +#if LJ_TARGET_HAS_GETENTROPY +extern int getentropy(void *buf, size_t len) +#ifdef __ELF__ + __attribute__((weak)) +#endif +; +#endif + +#endif + +/* For the /dev/urandom fallback. */ +#include <fcntl.h> +#include <unistd.h> + +#endif + +#if LUAJIT_SECURITY_PRNG == 0 + +/* If you really don't care about security, then define +** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed +** and provides NO SECURITY against various attacks on the VM. +** +** BTW: This is NOT the way to get predictable table iteration, +** predictable trace generation, predictable bytecode generation, etc. +*/ +int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) +{ + lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */ + return 1; +} + +#else + +/* Securely seed PRNG from system entropy. Returns 0 on failure. */ +int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) +{ +#if LJ_TARGET_XBOX360 + + if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_PS3 + + if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA + + if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_NX + + if (getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE + + if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u), + BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0) + goto ok; + +#elif LJ_TARGET_WINDOWS + + /* Keep the library loaded in case multiple VMs are started. */ + if (!libfunc_rgr) { + HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll"); + if (!lib) return 0; + libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036"); + if (!libfunc_rgr) return 0; + } + if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u))) + goto ok; + +#elif LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX && defined(SYS_getrandom) + + if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u)) + goto ok; + +#elif LJ_TARGET_HAS_GETENTROPY + +#ifdef __ELF__ + if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; +#else + if (getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; +#endif + +#endif + + /* Fallback to /dev/urandom. This may fail if the device is not + ** existent or accessible in a chroot or container, or if the process + ** or the OS ran out of file descriptors. + */ + { + int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC); + if (fd != -1) { + ssize_t n = read(fd, rs->u, sizeof(rs->u)); + (void)close(fd); + if (n == (ssize_t)sizeof(rs->u)) + goto ok; + } + } + +#else + + /* Add an elif above for your OS with a secure PRNG seed. + ** Note that fiddling around with rand(), getpid(), time() or coercing + ** ASLR to yield a few bits of randomness is not helpful. + ** If you don't want any security, then don't pretend you have any + ** and simply define LUAJIT_SECURITY_PRNG=0 for the build. + */ +#error "Missing secure PRNG seed for this OS" + +#endif + return 0; /* Fail. */ + +ok: + lj_prng_condition(rs); + (void)lj_prng_u64(rs); + return 1; /* Success. */ +} + +#endif + diff --git a/source/libs/luajit/LuaJIT-src/src/lj_prng.h b/source/libs/luajit/LuaJIT-src/src/lj_prng.h new file mode 100644 index 0000000000000000000000000000000000000000..7dc5eebf2e715adc4daefc0ad512b96d2149f618 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/lj_prng.h @@ -0,0 +1,24 @@ +/* +** Pseudo-random number generation. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_PRNG_H +#define _LJ_PRNG_H + +#include "lj_def.h" + +LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs); +LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs); +LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs); + +/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */ +static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs) +{ + rs->u[0] = U64x(a0d27757,0a345b8c); + rs->u[1] = U64x(764a296c,5d4aa64f); + rs->u[2] = U64x(51220704,070adeaa); + rs->u[3] = U64x(2a2717b5,a7b7b927); +} + +#endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_profile.c b/source/libs/luajit/LuaJIT-src/src/lj_profile.c index 116998e1e8f1f9b8137635cd122fda412e76d255..96acd9f0ac3218a2dedab658b6d6532f78090462 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_profile.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_profile.c @@ -1,6 +1,6 @@ /* ** Low-overhead profiling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_profile_c @@ -153,7 +153,7 @@ static void profile_trigger(ProfileState *ps) profile_lock(ps); ps->samples++; /* Always increment number of samples. */ mask = g->hookmask; - if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */ + if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */ int st = g->vmstate; ps->vmstate = st >= 0 ? 'N' : st == ~LJ_VMST_INTERP ? 'I' : @@ -185,7 +185,11 @@ static void profile_timer_start(ProfileState *ps) tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; setitimer(ITIMER_PROF, &tm, NULL); +#if LJ_TARGET_QNX + sa.sa_flags = 0; +#else sa.sa_flags = SA_RESTART; +#endif sa.sa_handler = profile_signal; sigemptyset(&sa.sa_mask); sigaction(SIGPROF, &sa, &ps->oldsa); @@ -247,7 +251,7 @@ static DWORD WINAPI profile_thread(void *psx) { ProfileState *ps = (ProfileState *)psx; int interval = ps->interval; -#if LJ_TARGET_WINDOWS +#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP ps->wmm_tbp(interval); #endif while (1) { @@ -255,7 +259,7 @@ static DWORD WINAPI profile_thread(void *psx) if (ps->abort) break; profile_trigger(ps); } -#if LJ_TARGET_WINDOWS +#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP ps->wmm_tep(interval); #endif return 0; @@ -264,9 +268,9 @@ static DWORD WINAPI profile_thread(void *psx) /* Start profiling timer thread. */ static void profile_timer_start(ProfileState *ps) { -#if LJ_TARGET_WINDOWS +#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP if (!ps->wmm) { /* Load WinMM library on-demand. */ - ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0); + ps->wmm = LJ_WIN_LOADLIBA("winmm.dll"); if (ps->wmm) { ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); @@ -346,8 +350,7 @@ LUA_API void luaJIT_profile_stop(lua_State *L) lj_trace_flushall(L); #endif lj_buf_free(g, &ps->sb); - setmref(ps->sb.b, NULL); - setmref(ps->sb.e, NULL); + ps->sb.w = ps->sb.e = NULL; ps->g = NULL; } } @@ -362,7 +365,7 @@ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, lj_buf_reset(sb); lj_debug_dumpstack(L, sb, fmt, depth); *len = (size_t)sbuflen(sb); - return sbufB(sb); + return sb->b; } #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_profile.h b/source/libs/luajit/LuaJIT-src/src/lj_profile.h index 0cccfd78cd786256eb2633d3e419f616b808c910..24fbd9dccd23afa822428c5fe0c8254bb6b698e8 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_profile.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_profile.h @@ -1,6 +1,6 @@ /* ** Low-overhead profiling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_PROFILE_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_record.c b/source/libs/luajit/LuaJIT-src/src/lj_record.c index 9d0469c42518ccaa03a4e7b1762929938c6dcb1e..38c180e8f9e1afce95131419e351a628d531b3fe 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_record.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_record.c @@ -1,6 +1,6 @@ /* ** Trace recorder (bytecode -> SSA IR). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_record_c @@ -33,6 +33,7 @@ #include "lj_snap.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_prng.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -50,34 +51,52 @@ static void rec_check_ir(jit_State *J) { IRRef i, nins = J->cur.nins, nk = J->cur.nk; - lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); + lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536, + "inconsistent IR layout"); for (i = nk; i < nins; i++) { IRIns *ir = IR(i); uint32_t mode = lj_ir_mode[ir->o]; IRRef op1 = ir->op1; IRRef op2 = ir->op2; + const char *err = NULL; switch (irm_op1(mode)) { - case IRMnone: lua_assert(op1 == 0); break; - case IRMref: lua_assert(op1 >= nk); - lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; + case IRMnone: + if (op1 != 0) err = "IRMnone op1 used"; + break; + case IRMref: + if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i)) + err = "IRMref op1 out of range"; + break; case IRMlit: break; - case IRMcst: lua_assert(i < REF_BIAS); + case IRMcst: + if (i >= REF_BIAS) { err = "constant in IR range"; break; } if (irt_is64(ir->t) && ir->o != IR_KNULL) i++; continue; } switch (irm_op2(mode)) { - case IRMnone: lua_assert(op2 == 0); break; - case IRMref: lua_assert(op2 >= nk); - lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; + case IRMnone: + if (op2) err = "IRMnone op2 used"; + break; + case IRMref: + if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i)) + err = "IRMref op2 out of range"; + break; case IRMlit: break; - case IRMcst: lua_assert(0); break; + case IRMcst: err = "IRMcst op2"; break; } - if (ir->prev) { - lua_assert(ir->prev >= nk); - lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); - lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); + if (!err && ir->prev) { + if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i)) + err = "chain out of range"; + else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o) + err = "chain to different op"; } + lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s", + i-REF_BIAS, + ir->o, + irm_op1(mode) == IRMref ? op1-REF_BIAS : op1, + irm_op2(mode) == IRMref ? op2-REF_BIAS : op2, + err); } } @@ -87,66 +106,80 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); - lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); - lua_assert(nslots < LJ_MAX_JSLOTS); + lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot"); + lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME), + "baseslot does not point to frame"); + lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow"); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; if (tr) { cTValue *tv = &base[s]; IRRef ref = tref_ref(tr); IRIns *ir = NULL; /* Silence compiler. */ + lj_assertJ(tv < J->L->top, "slot %d above top of Lua stack", s); if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { - lua_assert(ref >= J->cur.nk && ref < J->cur.nins); + lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins, + "slot %d ref %04d out of range", s, ref - REF_BIAS); ir = IR(ref); - lua_assert(irt_t(ir->t) == tref_t(tr)); + lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s); } if (s == 0) { - lua_assert(tref_isfunc(tr)); + lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function"); #if LJ_FR2 } else if (s == 1) { - lua_assert((tr & ~TREF_FRAME) == 0); + lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1"); #endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); #if LJ_FR2 - if (ref) - lua_assert(ir_knum(ir)->u64 == tv->u64); + lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, + "frame slot %d PC mismatch", s); tr = J->slot[s-1]; ir = IR(tref_ref(tr)); #endif - lua_assert(tref_isfunc(tr)); - if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); - lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) - : (s == delta + LJ_FR2)); + lj_assertJ(tref_isfunc(tr), + "frame slot %d is not a function", s-LJ_FR2); + lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir), + "frame slot %d function mismatch", s-LJ_FR2); + lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) + : (s == delta + LJ_FR2), + "frame slot %d broken chain", s-LJ_FR2); depth++; } else if ((tr & TREF_CONT)) { #if LJ_FR2 - if (ref) - lua_assert(ir_knum(ir)->u64 == tv->u64); + lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, + "cont slot %d continuation mismatch", s); #else - lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); + lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void), + "cont slot %d continuation mismatch", s); #endif - lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); + lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), + "cont slot %d not followed by frame", s); depth++; + } else if ((tr & TREF_KEYINDEX)) { + lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d", + s, tref_type(tr)); } else { - if (tvisnumber(tv)) - lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ - else - lua_assert(itype2irt(tv) == tref_type(tr)); + /* Number repr. may differ, but other types must be the same. */ + lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : + itype2irt(tv) == tref_type(tr), + "slot %d type mismatch: stack type %d vs IR type %d", + s, itypemap(tv), tref_type(tr)); if (tref_isk(tr)) { /* Compare constants. */ TValue tvk; lj_ir_kvalue(J->L, &tvk, ir); - if (!(tvisnum(&tvk) && tvisnan(&tvk))) - lua_assert(lj_obj_equal(tv, &tvk)); - else - lua_assert(tvisnum(tv) && tvisnan(tv)); + lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ? + (tvisnum(tv) && tvisnan(tv)) : + lj_obj_equal(tv, &tvk), + "slot %d const mismatch: stack %016llx vs IR %016llx", + s, tv->u64, tvk.u64); } } } } - lua_assert(J->framedepth == depth); + lj_assertJ(J->framedepth == depth, + "frame depth mismatch %d vs %d", J->framedepth, depth); } #endif @@ -182,7 +215,8 @@ static TRef getcurrf(jit_State *J) { if (J->base[-1-LJ_FR2]) return J->base[-1-LJ_FR2]; - lua_assert(J->baseslot == 1+LJ_FR2); + /* Non-base frame functions ought to be loaded already. */ + lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot"); return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } @@ -229,6 +263,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o) return 0; /* Can't represent lightuserdata (pointless). */ } +/* Emit a VLOAD with the correct type. */ +TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t) +{ + TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx); + if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ + return tr; +} + /* -- Record loop ops ----------------------------------------------------- */ /* Loop event. */ @@ -245,9 +287,9 @@ static void canonicalize_slots(jit_State *J) if (LJ_DUALNUM) return; for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { TRef tr = J->slot[s]; - if (tref_isinteger(tr)) { + if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) { IRIns *ir = IR(tref_ref(tr)); - if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) + if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY)))) J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); } } @@ -427,7 +469,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); int tc, dir = rec_for_direction(&tv[FORL_STEP]); - lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); + lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI, + "bad bytecode %d instead of FORI/JFORI", bc_op(*fori)); scev->t.irt = t; scev->dir = dir; scev->stop = tref_ref(stop); @@ -483,7 +526,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) IRT_NUM; for (i = FORL_IDX; i <= FORL_STEP; i++) { if (!tr[i]) sload(J, ra+i); - lua_assert(tref_isnumber_str(tr[i])); + lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type"); if (tref_isstr(tr[i])) tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); if (t == IRT_INT) { @@ -540,10 +583,10 @@ static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) } /* Record LOOP/JLOOP. Now, that was easy. */ -static LoopEvent rec_loop(jit_State *J, BCReg ra) +static LoopEvent rec_loop(jit_State *J, BCReg ra, int skip) { if (ra < J->maxslot) J->maxslot = ra; - J->pc++; + J->pc += skip; return LOOPEV_ENTER; } @@ -567,6 +610,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) { if (J->parent == 0 && J->exitno == 0) { if (pc == J->startpc && J->framedepth + J->retdepth == 0) { + if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */ /* Same loop? */ if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ lj_trace_err(J, LJ_TRERR_LLEAVE); @@ -607,6 +651,77 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) } /* Side trace continues across a loop that's left or not entered. */ } +/* Record ITERN. */ +static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) +{ +#if LJ_BE + /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, + ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. + */ + UNUSED(ra); UNUSED(rb); + setintV(&J->errinfo, (int32_t)BC_ITERN); + lj_trace_err_info(J, LJ_TRERR_NYIBC); +#else + RecordIndex ix; + /* Since ITERN is recorded at the start, we need our own loop detection. */ + if (J->pc == J->startpc && + J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { + IRRef ref = REF_FIRST + LJ_HASPROFILE; +#ifdef LUAJIT_ENABLE_CHECKHOOK + ref += 3; +#endif + if (J->cur.nins > ref || + (LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) { + J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */ + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ + return LOOPEV_ENTER; + } + } + J->maxslot = ra; + lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ + ix.tab = getslot(J, ra-2); + ix.key = J->base[ra-1] ? J->base[ra-1] : + sloadt(J, (int32_t)(ra-1), IRT_GUARD|IRT_INT, + IRSLOAD_TYPECHECK|IRSLOAD_KEYINDEX); + copyTV(J->L, &ix.tabv, &J->L->base[ra-2]); + copyTV(J->L, &ix.keyv, &J->L->base[ra-1]); + ix.idxchain = (rb < 3); /* Omit value type check, if unused. */ + ix.mobj = 1; /* We need the next index, too. */ + J->maxslot = ra + lj_record_next(J, &ix); + J->needsnap = 1; + if (!tref_isnil(ix.key)) { /* Looping back? */ + J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */ + J->base[ra] = ix.key; + J->base[ra+1] = ix.val; + J->pc += bc_j(J->pc[1])+2; + return LOOPEV_ENTER; + } else { + J->maxslot = ra-3; + J->pc += 2; + return LOOPEV_LEAVE; + } +#endif +} + +/* Record ISNEXT. */ +static void rec_isnext(jit_State *J, BCReg ra) +{ + cTValue *b = &J->L->base[ra-3]; + if (tvisfunc(b) && funcV(b)->c.ffid == FF_next && + tvistab(b+1) && tvisnil(b+2)) { + /* These checks are folded away for a compiled pairs(). */ + TRef func = getslot(J, ra-3); + TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID); + emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next)); + (void)getslot(J, ra-2); /* Type check for table. */ + (void)getslot(J, ra-1); /* Type check for nil key. */ + J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX; + J->maxslot = ra; + } else { /* Abort trace. Interpreter will despecialize bytecode. */ + lj_trace_err(J, LJ_TRERR_RECERR); + } +} + /* -- Record profiler hook checks ----------------------------------------- */ #if LJ_HASPROFILE @@ -615,7 +730,8 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) { GCproto *ppt; - lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l'); + lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l', + "bad profiler mode %c", J->prof_mode); if (!pt) return 0; ppt = J->prev_pt; @@ -676,7 +792,7 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) /* NYI: io_file_iter doesn't have an ffid, yet. */ { /* Specialize to the ffid. */ TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); - emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid)); + emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid)); } return tr; default: @@ -731,6 +847,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) J->framedepth++; J->base += func+1+LJ_FR2; J->baseslot += func+1+LJ_FR2; + if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); } /* Record tail call. */ @@ -785,19 +903,22 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) { TValue *frame = J->L->base - 1; ptrdiff_t i; + BCReg baseadj = 0; for (i = 0; i < gotresults; i++) (void)getslot(J, rbase+i); /* Ensure all results have a reference. */ while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1+LJ_FR2); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); gotresults++; + baseadj += cbase; rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ frame = frame_prevd(frame); + J->needsnap = 1; /* Stop catching on-trace errors. */ } /* Return to lower frame via interpreter for unhandled cases. */ if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && @@ -815,7 +936,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1+LJ_FR2); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); + baseadj += cbase; rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -829,7 +951,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) if ((pt->flags & PROTO_NOJIT)) lj_trace_err(J, LJ_TRERR_CJITOFF); if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { - if (check_downrec_unroll(J, pt)) { + if (!J->cur.root && check_downrec_unroll(J, pt)) { J->maxslot = (BCReg)(rbase + gotresults); lj_snap_purge(J); lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */ @@ -842,7 +964,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) J->maxslot = cbase+(BCReg)nresults; if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ J->framedepth--; - lua_assert(J->baseslot > cbase+1+LJ_FR2); + lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return"); J->baseslot -= cbase+1+LJ_FR2; J->base -= cbase+1+LJ_FR2; } else if (J->parent == 0 && J->exitno == 0 && @@ -851,13 +973,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_LLEAVE); } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ lj_trace_err(J, LJ_TRERR_NYIRETL); /* No way to insert snapshot here. */ + } else if (1 + pt->framesize >= LJ_MAX_JSLOTS) { + lj_trace_err(J, LJ_TRERR_STACKOV); } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); J->retdepth++; J->needsnap = 1; - lua_assert(J->baseslot == 1+LJ_FR2); + J->scev.idx = REF_NIL; + lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return"); /* Shift result slots up and clear the slots of the new frame below. */ memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); @@ -883,7 +1008,11 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg bslot = bc_b(*(frame_contpc(frame)-1)); TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; if (bslot != J->maxslot) { /* Concatenate the remainder. */ - TValue *b = J->L->base, save; /* Simulate lower frame and result. */ + /* Simulate lower frame and result. */ + TValue *b = J->L->base - baseadj, save; + /* Can't handle MM_concat + CALLT + fast func side-effects. */ + if (J->postproc != LJ_POST_NONE) + lj_trace_err(J, LJ_TRERR_NYIRETL); J->base[J->maxslot] = tr; copyTV(J->L, &save, b-(2<<LJ_FR2)); if (gotresults) @@ -893,10 +1022,12 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) J->L->base = b - cbase; tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2)); b = J->L->base + cbase; /* Undo. */ - J->L->base = b; + J->L->base = b + baseadj; copyTV(J->L, b-(2<<LJ_FR2), &save); } - if (tr) { /* Store final result. */ + if (tr >= 0xffffff00) { + lj_err_throw(J->L, -(int32_t)tr); /* Propagate errors. */ + } else if (tr) { /* Store final result. */ BCReg dst = bc_a(*(frame_contpc(frame)-1)); J->base[dst] = tr; if (dst >= J->maxslot) { @@ -905,12 +1036,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } /* Otherwise continue with another __concat call. */ } else { /* Result type already specialized. */ - lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); + lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt, + "bad continuation type"); } } else { lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ } - lua_assert(J->baseslot >= 1+LJ_FR2); + lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return"); } /* -- Metamethod handling ------------------------------------------------- */ @@ -942,6 +1074,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } else if (tref_isudata(ix->tab)) { int udtype = udataV(&ix->tabv)->udtype; mt = tabref(udataV(&ix->tabv)->metatable); + mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); /* The metatables of special userdata objects are treated as immutable. */ if (udtype != UDTYPE_USERDATA) { cTValue *mo; @@ -955,6 +1088,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } immutable_mt: mo = lj_tab_getstr(mt, mmname_str(J2G(J), mm)); + ix->mt = mix.tab; + ix->mtv = mt; if (!mo || tvisnil(mo)) return 0; /* No metamethod. */ /* Treat metamethod or index table as immutable, too. */ @@ -962,11 +1097,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) lj_trace_err(J, LJ_TRERR_BADTYPE); copyTV(J->L, &ix->mobjv, mo); ix->mobj = lj_ir_kgc(J, gcV(mo), tvisfunc(mo) ? IRT_FUNC : IRT_TAB); - ix->mtv = mt; - ix->mt = TREF_NIL; /* Dummy value for comparison semantics. */ return 1; /* Got metamethod or index table. */ } - mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); } else { /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ mt = tabref(basemt_obj(J2G(J), &ix->tabv)); @@ -976,13 +1108,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } /* The cdata metatable is treated as immutable. */ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; -#if LJ_GC64 - /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); -#else - ix->mt = mix.tab = lj_ir_ktab(J, mt); -#endif goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; @@ -1056,7 +1183,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) lj_record_call(J, func, 2); } else { if (LJ_52 && tref_istab(tr)) - return lj_ir_call(J, IRCALL_lj_tab_len, tr); + return emitir(IRTI(IR_ALEN), tr, TREF_NIL); lj_trace_err(J, LJ_TRERR_NOMM); } return 0; /* No result yet. */ @@ -1165,7 +1292,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) ix->tab = ix->val; copyTV(J->L, &ix->tabv, &ix->valv); } else { - lua_assert(tref_iscdata(ix->key)); + lj_assertJ(tref_iscdata(ix->key), "cdata expected"); ix->tab = ix->key; copyTV(J->L, &ix->tabv, &ix->keyv); } @@ -1262,17 +1389,19 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) /* Got scalar evolution analysis results for this reference? */ if (ref == J->scev.idx) { int32_t stop; - lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); + lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD, + "only int SCEV supported"); stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); /* Runtime value for stop of loop is within bounds? */ if ((uint64_t)stop + ofs < (uint64_t)asize) { /* Emit invariant bounds check for stop. */ - emitir(IRTG(IR_ABC, IRT_P32), asizeref, ofs == 0 ? J->scev.stop : + uint32_t abc = IRTG(IR_ABC, tref_isk(asizeref) ? IRT_U32 : IRT_P32); + emitir(abc, asizeref, ofs == 0 ? J->scev.stop : emitir(IRTI(IR_ADD), J->scev.stop, ofsref)); /* Emit invariant bounds check for start, if not const or negative. */ if (!(J->scev.dir && J->scev.start && (int64_t)IR(J->scev.start)->i + ofs >= 0)) - emitir(IRTG(IR_ABC, IRT_P32), asizeref, ikey); + emitir(abc, asizeref, ikey); return; } } @@ -1335,16 +1464,16 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); if (tref_isk(key)) { /* Optimize lookup of constant hash keys. */ - MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); - if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && - hslot <= 65535*(MSize)sizeof(Node)) { + GCSize hslot = (GCSize)((char *)ix->oldv-(char *)&noderef(t->node)[0].val); + if (hslot <= t->hmask*(GCSize)sizeof(Node) && + hslot <= 65535*(GCSize)sizeof(Node)) { TRef node, kslot, hm; *rbref = J->cur.nins; /* Mark possible rollback point. */ *rbguard = J->guardemit; hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE); - kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); + kslot = lj_ir_kslot(J, key, (IRRef)(hslot / sizeof(Node))); return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot); } } @@ -1380,7 +1509,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ /* Never call raw lj_record_idx() on non-table. */ - lua_assert(ix->idxchain != 0); + lj_assertJ(ix->idxchain != 0, "bad usage"); if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) lj_trace_err(J, LJ_TRERR_NOMM); handlemm: @@ -1402,6 +1531,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) return 0; /* No result yet. */ } } +#if LJ_HASBUFFER + /* The index table of buffer objects is treated as immutable. */ + if (ix->mt == TREF_NIL && !ix->val && + tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER && + tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) { + cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv)); + TRef tr = lj_record_constify(J, val); + if (tr) return tr; /* Specialize to the value, i.e. a method. */ + } +#endif /* Otherwise retry lookup with metaobject. */ ix->tab = ix->mobj; copyTV(J->L, &ix->tabv, &ix->mobjv); @@ -1464,14 +1603,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { - lua_assert(hasmm); + lj_assertJ(hasmm, "inconsistent metamethod handling"); goto handlemm; } - lua_assert(!hasmm); + lj_assertJ(!hasmm, "inconsistent metamethod handling"); if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ TRef key = ix->key; - if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ + if (tref_isinteger(key)) { /* NEWREF needs a TValue as a key. */ key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); + } else if (tref_isnum(key)) { + if (tref_isk(key)) { + if (tvismzero(&ix->keyv)) + key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */ + } else { + emitir(IRTG(IR_EQ, IRT_NUM), key, key); /* Check for !NaN. */ + } + } xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); keybarrier = 0; /* NEWREF already takes care of the key barrier. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP @@ -1512,6 +1659,47 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } } +/* Determine result type of table traversal. */ +static IRType rec_next_types(GCtab *t, uint32_t idx) +{ + for (; idx < t->asize; idx++) { + cTValue *a = arrayslot(t, idx); + if (LJ_LIKELY(!tvisnil(a))) + return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8); + } + idx -= t->asize; + for (; idx <= t->hmask; idx++) { + Node *n = &noderef(t->node)[idx]; + if (!tvisnil(&n->val)) + return itype2irt(&n->key) + (itype2irt(&n->val) << 8); + } + return IRT_NIL + (IRT_NIL << 8); +} + +/* Record a table traversal step aka next(). */ +int lj_record_next(jit_State *J, RecordIndex *ix) +{ + IRType t, tkey, tval; + TRef trvk; + t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo); + tkey = (t & 0xff); tval = (t >> 8); + trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key); + if (ix->mobj || tkey == IRT_NIL) { + TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk); + /* Always check for invalid key from next() for nil result. */ + if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1)); + ix->mobj = idx; + } + ix->key = lj_record_vload(J, trvk, 1, tkey); + if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */ + ix->val = TREF_NIL; + return 1; + } else { /* Need value. */ + ix->val = lj_record_vload(J, trvk, 0, tval); + return 2; + } +} + static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) { RecordIndex ix; @@ -1573,7 +1761,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) int needbarrier = 0; if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ TRef tr, kfunc; - lua_assert(val == 0); + lj_assertJ(val == 0, "bad usage"); if (!tref_isk(fn)) { /* Late specialization of current function. */ if (J->pt->flags >= PROTO_CLC_POLY) goto noconstify; @@ -1594,16 +1782,16 @@ noconstify: /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); if (!uvp->closed) { - uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); /* In current stack? */ if (uvval(uvp) >= tvref(J->L->stack) && uvval(uvp) < tvref(J->L->maxstack)) { int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); if (slot >= 0) { /* Aliases an SSA slot? */ + uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv)); emitir(IRTG(IR_EQ, IRT_PGC), REF_BASE, emitir(IRT(IR_ADD, IRT_PGC), uref, - lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); + lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8))); slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ if (val == 0) { return getslot(J, slot); @@ -1614,12 +1802,21 @@ noconstify: } } } + /* IR_UREFO+IRT_IGC is not checked for open-ness at runtime. + ** Always marked as a guard, since it might get promoted to IRT_PGC later. + */ + uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv); + uref = tref_ref(uref); emitir(IRTG(IR_UGT, IRT_PGC), emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), - lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); + lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8)); } else { + /* If fn is constant, then so is the GCupval*, and the upvalue cannot + ** transition back to open, so no guard is required in this case. + */ + IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC; + uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv)); needbarrier = 1; - uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); } if (val == 0) { /* Upvalue load */ IRType t = itype2irt(uvval(uvp)); @@ -1667,7 +1864,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) if (lnk) { /* Possible tail- or up-recursion. */ lj_trace_flush(J, lnk); /* Flush trace that only returns. */ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ - hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); + hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); } lj_trace_err(J, LJ_TRERR_CUNROLL); } @@ -1695,7 +1892,7 @@ static void rec_func_vararg(jit_State *J) { GCproto *pt = J->pt; BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; - lua_assert((pt->flags & PROTO_VARARG)); + lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function"); if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ @@ -1764,18 +1961,20 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) { int32_t numparams = J->pt->numparams; ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; - lua_assert(frame_isvarg(J->L->base-1)); + lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame"); if (LJ_FR2 && dst > J->maxslot) J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; if (nvararg < 0) nvararg = 0; - if (nresults == -1) { - nresults = nvararg; - J->maxslot = dst + (BCReg)nvararg; - } else if (dst + nresults > J->maxslot) { + if (nresults != 1) { + if (nresults == -1) nresults = nvararg; J->maxslot = dst + (BCReg)nresults; + } else if (dst >= J->maxslot) { + J->maxslot = dst + 1; } + if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); for (i = 0; i < nresults; i++) J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; } else { /* Unknown number of varargs passed to trace. */ @@ -1792,14 +1991,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2))); for (i = 0; i < nload; i++) { IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); - TRef aref = emitir(IRT(IR_AREF, IRT_PGC), - vbase, lj_ir_kint(J, (int32_t)i)); - TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); - if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ - J->base[dst+i] = tr; + J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t); } } else { emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); @@ -1807,15 +2003,19 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) } for (i = nvararg; i < nresults; i++) J->base[dst+i] = TREF_NIL; - if (dst + (BCReg)nresults > J->maxslot) + if (nresults != 1 || dst >= J->maxslot) { J->maxslot = dst + (BCReg)nresults; + } } else if (select_detect(J)) { /* y = select(x, ...) */ - TRef tridx = J->base[dst-1]; + TRef tridx = getslot(J, dst-1); TRef tr = TREF_NIL; ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]); if (idx < 0) goto nyivarg; - if (idx != 0 && !tref_isinteger(tridx)) + if (idx != 0 && !tref_isinteger(tridx)) { + if (tref_isstr(tridx)) + tridx = emitir(IRTG(IR_STRTO, IRT_NUM), tridx, 0); tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX); + } if (idx != 0 && tref_isk(tridx)) { emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT), fr, lj_ir_kint(J, frofs+8*(int32_t)idx)); @@ -1843,11 +2043,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) IRType t; TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, - lj_ir_kint(J, frofs-(8<<LJ_FR2))); + lj_ir_kintpgc(J, frofs-(8<<LJ_FR2))); t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); - tr = emitir(IRTG(IR_VLOAD, t), aref, 0); - if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ + tr = lj_record_vload(J, aref, 0, t); } J->base[dst-2-LJ_FR2] = tr; J->maxslot = dst-1-LJ_FR2; @@ -1879,13 +2078,22 @@ static TRef rec_tnew(jit_State *J, uint32_t ah) /* -- Concatenation ------------------------------------------------------- */ -static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) +typedef struct RecCatDataCP { + jit_State *J; + BCReg baseslot, topslot; + TRef tr; +} RecCatDataCP; + +static TValue *rec_mm_concat_cp(lua_State *L, lua_CFunction dummy, void *ud) { + RecCatDataCP *rcd = (RecCatDataCP *)ud; + jit_State *J = rcd->J; + BCReg baseslot = rcd->baseslot, topslot = rcd->topslot; TRef *top = &J->base[topslot]; - TValue savetv[5]; BCReg s; RecordIndex ix; - lua_assert(baseslot < topslot); + UNUSED(L); UNUSED(dummy); + lj_assertJ(baseslot < topslot, "bad CAT arg"); for (s = baseslot; s <= topslot; s++) (void)getslot(J, s); /* Ensure all arguments have a reference. */ if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) { @@ -1902,11 +2110,14 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); do { - tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++); + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++); } while (trp <= top); - tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); + tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); J->maxslot = (BCReg)(xbase - J->base); - if (xbase == base) return tr; /* Return simple concatenation result. */ + if (xbase == base) { + rcd->tr = tr; /* Return simple concatenation result. */ + return NULL; + } /* Pass partial result. */ topslot = J->maxslot--; *xbase = tr; @@ -1919,10 +2130,31 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]); ix.tab = top[-1]; ix.key = top[0]; - memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */ rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */ - memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */ - return 0; /* No result yet. */ + rcd->tr = 0; /* No result yet. */ + return NULL; +} + +static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) +{ + lua_State *L = J->L; + ptrdiff_t delta = L->top - L->base; + TValue savetv[5+LJ_FR2], errobj; + RecCatDataCP rcd; + int errcode; + rcd.J = J; + rcd.baseslot = baseslot; + rcd.topslot = topslot; + memcpy(savetv, &L->base[topslot-1], sizeof(savetv)); /* Save slots. */ + errcode = lj_vm_cpcall(L, NULL, &rcd, rec_mm_concat_cp); + if (errcode) copyTV(L, &errobj, L->top-1); + memcpy(&L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */ + if (errcode) { + L->top = L->base + delta; + copyTV(L, L->top++, &errobj); + return (TRef)(-errcode); + } + return rcd.tr; } /* -- Record bytecode ops ------------------------------------------------- */ @@ -2009,7 +2241,7 @@ void lj_record_ins(jit_State *J) if (bc_op(*J->pc) >= BC__MAX) return; break; - default: lua_assert(0); break; + default: lj_assertJ(0, "bad post-processing mode"); break; } J->postproc = LJ_POST_NONE; } @@ -2017,7 +2249,7 @@ void lj_record_ins(jit_State *J) /* Need snapshot before recording next bytecode (e.g. after a store). */ if (J->needsnap) { J->needsnap = 0; - lj_snap_purge(J); + if (J->pt) lj_snap_purge(J); lj_snap_add(J); J->mergesnap = 1; } @@ -2072,6 +2304,7 @@ void lj_record_ins(jit_State *J) case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : + tv->u32.hi == LJ_KEYINDEX ? (lj_ir_kint(J, 0) | TREF_KEYINDEX) : lj_ir_knumint(J, numV(tv)); } break; case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; @@ -2187,7 +2420,7 @@ void lj_record_ins(jit_State *J) if (tref_isstr(rc)) rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); else if (!LJ_52 && tref_istab(rc)) - rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); + rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL); else rc = rec_mm_len(J, rc, rcv); break; @@ -2234,7 +2467,7 @@ void lj_record_ins(jit_State *J) case BC_POW: if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) - rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv); + rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW); else rc = rec_mm_arith(J, &ix, MM_pow); break; @@ -2243,6 +2476,8 @@ void lj_record_ins(jit_State *J) case BC_CAT: rc = rec_cat(J, rb, rc); + if (rc >= 0xffffff00) + lj_err_throw(J->L, -(int32_t)rc); /* Propagate errors. */ break; /* -- Constant and move ops --------------------------------------------- */ @@ -2308,6 +2543,7 @@ void lj_record_ins(jit_State *J) case BC_TSETM: rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo); + J->maxslot = ra; /* The table slot at ra-1 is the highest used slot. */ break; case BC_TNEW: @@ -2377,7 +2613,8 @@ void lj_record_ins(jit_State *J) J->loopref = J->cur.nins; break; case BC_JFORI: - lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); + lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL, + "JFORI does not point to JFORL"); if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); /* Continue tracing if the loop is not entered. */ @@ -2389,8 +2626,11 @@ void lj_record_ins(jit_State *J) case BC_ITERL: rec_loop_interp(J, pc, rec_iterl(J, *pc)); break; + case BC_ITERN: + rec_loop_interp(J, pc, rec_itern(J, ra, rb)); + break; case BC_LOOP: - rec_loop_interp(J, pc, rec_loop(J, ra)); + rec_loop_interp(J, pc, rec_loop(J, ra, 1)); break; case BC_JFORL: @@ -2400,7 +2640,9 @@ void lj_record_ins(jit_State *J) rec_loop_jit(J, rc, rec_iterl(J, traceref(J, rc)->startins)); break; case BC_JLOOP: - rec_loop_jit(J, rc, rec_loop(J, ra)); + rec_loop_jit(J, rc, rec_loop(J, ra, + !bc_isret(bc_op(traceref(J, rc)->startins)) && + bc_op(traceref(J, rc)->startins) != BC_ITERN)); break; case BC_IFORL: @@ -2416,6 +2658,10 @@ void lj_record_ins(jit_State *J) J->maxslot = ra; /* Shrink used slots. */ break; + case BC_ISNEXT: + rec_isnext(J, ra); + break; + /* -- Function headers -------------------------------------------------- */ case BC_FUNCF: @@ -2430,7 +2676,8 @@ void lj_record_ins(jit_State *J) rec_func_lua(J); break; case BC_JFUNCV: - lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ + /* Cannot happen. No hotcall counting for varag funcs. */ + lj_assertJ(0, "unsupported vararg hotcall"); break; case BC_FUNCC: @@ -2444,8 +2691,6 @@ void lj_record_ins(jit_State *J) break; } /* fallthrough */ - case BC_ITERN: - case BC_ISNEXT: case BC_UCLO: case BC_FNEW: setintV(&J->errinfo, (int32_t)op); @@ -2468,8 +2713,9 @@ void lj_record_ins(jit_State *J) #undef rbv #undef rcv - /* Limit the number of recorded IR instructions. */ - if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) + /* Limit the number of recorded IR instructions and constants. */ + if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] || + J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst]) lj_trace_err(J, LJ_TRERR_TRACEOV); } @@ -2489,13 +2735,22 @@ static const BCIns *rec_setup_root(jit_State *J) J->bc_min = pc; break; case BC_ITERL: - lua_assert(bc_op(pc[-1]) == BC_ITERC); + if (bc_op(pc[-1]) == BC_JLOOP) + lj_trace_err(J, LJ_TRERR_LINNER); + lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL"); J->maxslot = ra + bc_b(pc[-1]) - 1; J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); pc += 1+bc_j(ins); - lua_assert(bc_op(pc[-1]) == BC_JMP); + lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); J->bc_min = pc; break; + case BC_ITERN: + lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN"); + J->maxslot = ra; + J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns); + J->bc_min = pc+2 + bc_j(pc[1]); + J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */ + break; case BC_LOOP: /* Only check BC range for real loops, but not for "repeat until true". */ pcj = pc + bc_j(ins); @@ -2525,7 +2780,7 @@ static const BCIns *rec_setup_root(jit_State *J) pc++; break; default: - lua_assert(0); + lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins)); break; } return pc; @@ -2592,9 +2847,14 @@ void lj_record_setup(jit_State *J) } lj_snap_replay(J, T); sidecheck: - if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || - T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + - J->param[JIT_P_tryside]) { + if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || + T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + + J->param[JIT_P_tryside])) { + if (bc_op(*J->pc) == BC_JLOOP) { + BCIns startins = traceref(J, bc_d(*J->pc))->startins; + if (bc_op(startins) == BC_ITERN) + rec_itern(J, bc_a(startins), bc_b(startins)); + } lj_record_stop(J, LJ_TRLINK_INTERP, 0); } } else { /* Root trace. */ @@ -2603,6 +2863,7 @@ void lj_record_setup(jit_State *J) J->pc = rec_setup_root(J); /* Note: the loop instruction itself is recorded at the end and not ** at the start! So snapshot #0 needs to point to the *next* instruction. + ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST. */ lj_snap_add(J); if (bc_op(J->cur.startins) == BC_FORL) diff --git a/source/libs/luajit/LuaJIT-src/src/lj_record.h b/source/libs/luajit/LuaJIT-src/src/lj_record.h index 93d374d2494c6ba8432cc5570f0592852c249422..2c7a575ba068e224179a7d3be7d57f321240892c 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_record.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_record.h @@ -1,6 +1,6 @@ /* ** Trace recorder (bytecode -> SSA IR). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_RECORD_H @@ -30,6 +30,7 @@ LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv); LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk); LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); +LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t); LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs); @@ -37,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults); LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); +LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix); LJ_FUNC void lj_record_ins(jit_State *J); LJ_FUNC void lj_record_setup(jit_State *J); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_serialize.c b/source/libs/luajit/LuaJIT-src/src/lj_serialize.c new file mode 100644 index 0000000000000000000000000000000000000000..175861119bbb22d0d26c36d795256e518b8afc91 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/lj_serialize.c @@ -0,0 +1,539 @@ +/* +** Object de/serialization. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_serialize_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASBUFFER +#include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_udata.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#include "lj_cdata.h" +#endif +#if LJ_HASJIT +#include "lj_ir.h" +#endif +#include "lj_serialize.h" + +/* Tags for internal serialization format. */ +enum { + SER_TAG_NIL, /* 0x00 */ + SER_TAG_FALSE, + SER_TAG_TRUE, + SER_TAG_NULL, + SER_TAG_LIGHTUD32, + SER_TAG_LIGHTUD64, + SER_TAG_INT, + SER_TAG_NUM, + SER_TAG_TAB, /* 0x08 */ + SER_TAG_DICT_MT = SER_TAG_TAB+6, + SER_TAG_DICT_STR, + SER_TAG_INT64, /* 0x10 */ + SER_TAG_UINT64, + SER_TAG_COMPLEX, + SER_TAG_0x13, + SER_TAG_0x14, + SER_TAG_0x15, + SER_TAG_0x16, + SER_TAG_0x17, + SER_TAG_0x18, /* 0x18 */ + SER_TAG_0x19, + SER_TAG_0x1a, + SER_TAG_0x1b, + SER_TAG_0x1c, + SER_TAG_0x1d, + SER_TAG_0x1e, + SER_TAG_0x1f, + SER_TAG_STR, /* 0x20 + str->len */ +}; +LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0); + +/* -- Helper functions ---------------------------------------------------- */ + +static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz) +{ + if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) { + sbx->w = w; + w = lj_buf_more2((SBuf *)sbx, sz); + } + return w; +} + +/* Write U124 to buffer. */ +static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v) +{ + if (v < 0x1fe0) { + v -= 0xe0; + *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v; + } else { + *w++ = (char)0xff; +#if LJ_BE + v = lj_bswap(v); +#endif + memcpy(w, &v, 4); w += 4; + } + return w; +} + +static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v) +{ + if (LJ_LIKELY(v < 0xe0)) { + *w++ = (char)v; + return w; + } else { + return serialize_wu124_(w, v); + } +} + +static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv) +{ + uint32_t v = *pv; + if (v != 0xff) { + if (r >= w) return NULL; + v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++; + } else { + if (r + 4 > w) return NULL; + v = lj_getu32(r); r += 4; +#if LJ_BE + v = lj_bswap(v); +#endif + } + *pv = v; + return r; +} + +static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv) +{ + if (LJ_LIKELY(r < w)) { + uint32_t v = *(uint8_t *)r; r++; + *pv = v; + if (LJ_UNLIKELY(v >= 0xe0)) { + r = serialize_ru124_(r, w, pv); + } + return r; + } + return NULL; +} + +/* Prepare string dictionary for use (once). */ +void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict) +{ + if (!dict->hmask) { /* No hash part means not prepared, yet. */ + MSize i, len = lj_tab_len(dict); + if (!len) return; + lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); + for (i = 1; i <= len && i < dict->asize; i++) { + cTValue *o = arrayslot(dict, i); + if (tvisstr(o)) { + if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */ + lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); + } + } else if (!tvisfalse(o)) { + lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); + } + } + } +} + +/* Prepare metatable dictionary for use (once). */ +void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict) +{ + if (!dict->hmask) { /* No hash part means not prepared, yet. */ + MSize i, len = lj_tab_len(dict); + if (!len) return; + lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); + for (i = 1; i <= len && i < dict->asize; i++) { + cTValue *o = arrayslot(dict, i); + if (tvistab(o)) { + if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */ + lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); + } + } else if (!tvisfalse(o)) { + lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); + } + } + } +} + +/* -- Internal serializer ------------------------------------------------- */ + +/* Put serialized object into buffer. */ +static char *serialize_put(char *w, SBufExt *sbx, cTValue *o) +{ + if (LJ_LIKELY(tvisstr(o))) { + const GCstr *str = strV(o); + MSize len = str->len; + w = serialize_more(w, sbx, 5+len); + w = serialize_wu124(w, SER_TAG_STR + len); + w = lj_buf_wmem(w, strdata(str), len); + } else if (tvisint(o)) { + uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o); + w = serialize_more(w, sbx, 1+4); + *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4; + } else if (tvisnum(o)) { + uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64; + w = serialize_more(w, sbx, 1+sizeof(lua_Number)); + *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8; + } else if (tvispri(o)) { + w = serialize_more(w, sbx, 1); + *w++ = (char)(SER_TAG_NIL + ~itype(o)); + } else if (tvistab(o)) { + const GCtab *t = tabV(o); + uint32_t narray = 0, nhash = 0, one = 2; + if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); + sbx->depth--; + if (t->asize > 0) { /* Determine max. length of array part. */ + ptrdiff_t i; + TValue *array = tvref(t->array); + for (i = (ptrdiff_t)t->asize-1; i >= 0; i--) + if (!tvisnil(&array[i])) + break; + narray = (uint32_t)(i+1); + if (narray && tvisnil(&array[0])) one = 4; + } + if (t->hmask > 0) { /* Count number of used hash slots. */ + uint32_t i, hmask = t->hmask; + Node *node = noderef(t->node); + for (i = 0; i <= hmask; i++) + nhash += !tvisnil(&node[i].val); + } + /* Write metatable index. */ + if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) { + TValue mto; + Node *n; + settabV(sbufL(sbx), &mto, tabref(t->metatable)); + n = hashgcref(tabref(sbx->dict_mt), mto.gcr); + do { + if (n->key.u64 == mto.u64) { + uint32_t idx = n->val.u32.lo; + w = serialize_more(w, sbx, 1+5); + *w++ = SER_TAG_DICT_MT; + w = serialize_wu124(w, idx); + break; + } + } while ((n = nextnode(n))); + } + /* Write number of array slots and hash slots. */ + w = serialize_more(w, sbx, 1+2*5); + *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0)); + if (narray) w = serialize_wu124(w, narray); + if (nhash) w = serialize_wu124(w, nhash); + if (narray) { /* Write array entries. */ + cTValue *oa = tvref(t->array) + (one >> 2); + cTValue *oe = tvref(t->array) + narray; + while (oa < oe) w = serialize_put(w, sbx, oa++); + } + if (nhash) { /* Write hash entries. */ + const Node *node = noderef(t->node) + t->hmask; + GCtab *dict_str = tabref(sbx->dict_str); + if (LJ_UNLIKELY(dict_str)) { + for (;; node--) + if (!tvisnil(&node->val)) { + if (LJ_LIKELY(tvisstr(&node->key))) { + /* Inlined lj_tab_getstr is 30% faster. */ + const GCstr *str = strV(&node->key); + Node *n = hashstr(dict_str, str); + do { + if (tvisstr(&n->key) && strV(&n->key) == str) { + uint32_t idx = n->val.u32.lo; + w = serialize_more(w, sbx, 1+5); + *w++ = SER_TAG_DICT_STR; + w = serialize_wu124(w, idx); + break; + } + n = nextnode(n); + if (!n) { + MSize len = str->len; + w = serialize_more(w, sbx, 5+len); + w = serialize_wu124(w, SER_TAG_STR + len); + w = lj_buf_wmem(w, strdata(str), len); + break; + } + } while (1); + } else { + w = serialize_put(w, sbx, &node->key); + } + w = serialize_put(w, sbx, &node->val); + if (--nhash == 0) break; + } + } else { + for (;; node--) + if (!tvisnil(&node->val)) { + w = serialize_put(w, sbx, &node->key); + w = serialize_put(w, sbx, &node->val); + if (--nhash == 0) break; + } + } + } + sbx->depth++; +#if LJ_HASFFI + } else if (tviscdata(o)) { + CTState *cts = ctype_cts(sbufL(sbx)); + CType *s = ctype_raw(cts, cdataV(o)->ctypeid); + uint8_t *sp = cdataptr(cdataV(o)); + if (ctype_isinteger(s->info) && s->size == 8) { + w = serialize_more(w, sbx, 1+8); + *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64; +#if LJ_BE + { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); } +#else + memcpy(w, sp, 8); +#endif + w += 8; + } else if (ctype_iscomplex(s->info) && s->size == 16) { + w = serialize_more(w, sbx, 1+16); + *w++ = SER_TAG_COMPLEX; +#if LJ_BE + { /* Only swap the doubles. The re/im order stays the same. */ + uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8); + u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8); + } +#else + memcpy(w, sp, 16); +#endif + w += 16; + } else { + goto badenc; /* NYI other cdata */ + } +#endif + } else if (tvislightud(o)) { + uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o); + w = serialize_more(w, sbx, 1+sizeof(ud)); + if (ud == 0) { + *w++ = SER_TAG_NULL; + } else if (LJ_32 || checku32(ud)) { +#if LJ_BE && LJ_64 + ud = lj_bswap64(ud); +#elif LJ_BE + ud = lj_bswap(ud); +#endif + *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4; +#if LJ_64 + } else { +#if LJ_BE + ud = lj_bswap64(ud); +#endif + *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8; +#endif + } + } else { + /* NYI userdata */ +#if LJ_HASFFI + badenc: +#endif + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o)); + } + return w; +} + +/* Get serialized object from buffer. */ +static char *serialize_get(char *r, SBufExt *sbx, TValue *o) +{ + char *w = sbx->w; + uint32_t tp; + r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; + if (LJ_LIKELY(tp >= SER_TAG_STR)) { + uint32_t len = tp - SER_TAG_STR; + if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob; + setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len)); + r += len; + } else if (tp == SER_TAG_INT) { + if (LJ_UNLIKELY(r + 4 > w)) goto eob; + setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r))); + r += 4; + } else if (tp == SER_TAG_NUM) { + if (LJ_UNLIKELY(r + 8 > w)) goto eob; + memcpy(o, r, 8); r += 8; +#if LJ_BE + o->u64 = lj_bswap64(o->u64); +#endif + if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */ + } else if (tp <= SER_TAG_TRUE) { + setpriV(o, ~tp); + } else if (tp == SER_TAG_DICT_STR) { + GCtab *dict_str; + uint32_t idx; + r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; + idx++; + dict_str = tabref(sbx->dict_str); + if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx))) + copyTV(sbufL(sbx), o, arrayslot(dict_str, idx)); + else + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); + } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) { + uint32_t narray = 0, nhash = 0; + GCtab *t, *mt = NULL; + if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); + sbx->depth--; + if (tp == SER_TAG_DICT_MT) { + GCtab *dict_mt; + uint32_t idx; + r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; + idx++; + dict_mt = tabref(sbx->dict_mt); + if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx))) + mt = tabV(arrayslot(dict_mt, idx)); + else + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); + r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; + if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag; + } + if (tp >= SER_TAG_TAB+2) { + r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob; + } + if ((tp & 1)) { + r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob; + } + t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash)); + /* NOBARRIER: The table is new (marked white). */ + setgcref(t->metatable, obj2gco(mt)); + settabV(sbufL(sbx), o, t); + if (narray) { + TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4); + TValue *oe = tvref(t->array) + narray; + while (oa < oe) r = serialize_get(r, sbx, oa++); + } + if (nhash) { + do { + TValue k, *v; + r = serialize_get(r, sbx, &k); + v = lj_tab_set(sbufL(sbx), t, &k); + if (LJ_UNLIKELY(!tvisnil(v))) + lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY); + r = serialize_get(r, sbx, v); + } while (--nhash); + } + sbx->depth++; +#if LJ_HASFFI + } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) { + uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8; + GCcdata *cd; + if (LJ_UNLIKELY(r + sz > w)) goto eob; + if (LJ_UNLIKELY(!ctype_ctsG(G(sbufL(sbx))))) goto badtag; + cd = lj_cdata_new_(sbufL(sbx), + tp == SER_TAG_INT64 ? CTID_INT64 : + tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE, + sz); + memcpy(cdataptr(cd), r, sz); r += sz; +#if LJ_BE + *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd)); + if (sz == 16) + ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]); +#endif + if (sz == 16) { /* Fix non-canonical NaNs. */ + TValue *cdo = (TValue *)cdataptr(cd); + if (!tvisnum(&cdo[0])) setnanV(&cdo[0]); + if (!tvisnum(&cdo[1])) setnanV(&cdo[1]); + } + setcdataV(sbufL(sbx), o, cd); +#endif + } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) { + uintptr_t ud = 0; + if (tp == SER_TAG_LIGHTUD32) { + if (LJ_UNLIKELY(r + 4 > w)) goto eob; + ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)); + r += 4; + } +#if LJ_64 + else if (tp == SER_TAG_LIGHTUD64) { + if (LJ_UNLIKELY(r + 8 > w)) goto eob; + memcpy(&ud, r, 8); r += 8; +#if LJ_BE + ud = lj_bswap64(ud); +#endif + } + setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud)); +#else + setrawlightudV(o, (void *)ud); +#endif + } else { +badtag: + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp); + } + return r; +eob: + lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB); + return NULL; +} + +/* -- External serialization API ------------------------------------------ */ + +/* Encode to buffer. */ +SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o) +{ + sbx->depth = LJ_SERIALIZE_DEPTH; + sbx->w = serialize_put(sbx->w, sbx, o); + return sbx; +} + +/* Decode from buffer. */ +char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o) +{ + sbx->depth = LJ_SERIALIZE_DEPTH; + return serialize_get(sbx->r, sbx, o); +} + +/* Stand-alone encoding, borrowing from global temporary buffer. */ +GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o) +{ + SBufExt sbx; + char *w; + memset(&sbx, 0, sizeof(SBufExt)); + lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf); + sbx.depth = LJ_SERIALIZE_DEPTH; + w = serialize_put(sbx.w, &sbx, o); + return lj_str_new(L, sbx.b, (size_t)(w - sbx.b)); +} + +/* Stand-alone decoding, copy-on-write from string. */ +void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str) +{ + SBufExt sbx; + char *r; + memset(&sbx, 0, sizeof(SBufExt)); + lj_bufx_set_cow(L, &sbx, strdata(str), str->len); + /* No need to set sbx.cowref here. */ + sbx.depth = LJ_SERIALIZE_DEPTH; + r = serialize_get(sbx.r, &sbx, o); + if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV); +} + +#if LJ_HASJIT +/* Peek into buffer to find the result IRType for specialization purposes. */ +LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx) +{ + uint32_t tp; + if (serialize_ru124(sbx->r, sbx->w, &tp)) { + /* This must match the handling of all tags in the decoder above. */ + switch (tp) { + case SER_TAG_NIL: return IRT_NIL; + case SER_TAG_FALSE: return IRT_FALSE; + case SER_TAG_TRUE: return IRT_TRUE; + case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64: + return IRT_LIGHTUD; + case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM; + case SER_TAG_NUM: return IRT_NUM; + case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2: + case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5: + case SER_TAG_DICT_MT: + return IRT_TAB; + case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX: + return IRT_CDATA; + case SER_TAG_DICT_STR: + default: + return IRT_STR; + } + } + return IRT_NIL; /* Will fail on actual decode. */ +} +#endif + +#endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_serialize.h b/source/libs/luajit/LuaJIT-src/src/lj_serialize.h new file mode 100644 index 0000000000000000000000000000000000000000..9e082b317904505e59359860069ad605b25d2c7f --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/lj_serialize.h @@ -0,0 +1,28 @@ +/* +** Object de/serialization. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_SERIALIZE_H +#define _LJ_SERIALIZE_H + +#include "lj_obj.h" +#include "lj_buf.h" + +#if LJ_HASBUFFER + +#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */ + +LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict); +LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict); +LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o); +LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o); +LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o); +LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str); +#if LJ_HASJIT +LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx); +#endif + +#endif + +#endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_snap.c b/source/libs/luajit/LuaJIT-src/src/lj_snap.c index bb063c2b65298a146a7a9ac96bc88a8e1d15d111..cb1044392ff1c095d1c3e32d22094c7ee3c605b0 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_snap.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_snap.c @@ -1,6 +1,6 @@ /* ** Snapshot handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_snap_c @@ -85,15 +85,20 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) IRIns *ir = &J->cur.ir[ref]; if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { - /* No need to snapshot unmodified non-inherited slots. */ - if (!(ir->op2 & IRSLOAD_INHERIT)) + /* + ** No need to snapshot unmodified non-inherited slots. + ** But always snapshot the function below a frame in LJ_FR2 mode. + */ + if (!(ir->op2 & IRSLOAD_INHERIT) && + (!LJ_FR2 || s == 0 || s+1 == nslots || + !(J->slot[s+1] & (TREF_CONT|TREF_FRAME)))) continue; /* No need to restore readonly slots and unmodified non-parent slots. */ if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) sn |= SNAP_NORESTORE; } - if (LJ_SOFTFP && irt_isnum(ir->t)) + if (LJ_SOFTFP32 && irt_isnum(ir->t)) sn |= SNAP_SOFTFPNUM; map[n++] = sn; } @@ -110,12 +115,15 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; #if LJ_FR2 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); - lua_assert(2 <= J->baseslot && J->baseslot <= 257); + lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot"); memcpy(map, &pcbase, sizeof(uint64_t)); #else MSize f = 0; map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ #endif + lj_assertJ(!J->pt || + (J->pc >= proto_bc(J->pt) && + J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC"); while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { #if !LJ_FR2 @@ -129,7 +137,7 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) #endif frame = frame_prevd(frame); } else { - lua_assert(!frame_isc(frame)); + lj_assertJ(!frame_isc(frame), "broken frame chain"); #if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); #endif @@ -141,10 +149,10 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) } *topslot = (uint8_t)(ftop - lim); #if LJ_FR2 - lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); + lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def"); return 2; #else - lua_assert(f == (MSize)(1 + J->framedepth)); + lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size"); return f; #endif } @@ -161,11 +169,12 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) nent = snapshot_slots(J, p, nslots); snap->nent = (uint8_t)nent; nent += snapshot_framelinks(J, p + nent, &snap->topslot); - snap->mapofs = (uint16_t)nsnapmap; + snap->mapofs = (uint32_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; + snap->mcofs = 0; snap->nslots = (uint8_t)nslots; snap->count = 0; - J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); + J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); } /* Add or merge a snapshot. */ @@ -222,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, #define DEF_SLOT(s) udf[(s)] *= 3 /* Scan through following bytecode and check for uses/defs. */ - lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); + lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, + "snapshot PC out of range"); for (;;) { BCIns ins = *pc++; BCOp op = bc_op(ins); @@ -233,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, switch (bcmode_c(op)) { case BCMvar: USE_SLOT(bc_c(ins)); break; case BCMrbase: - lua_assert(op == BC_CAT); + lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op); for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); for (; s < maxslot; s++) DEF_SLOT(s); break; @@ -242,7 +252,12 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, BCReg minslot = bc_a(ins); if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; - else if (op == BC_UCLO) { pc += bc_j(ins); break; } + else if (op == BC_UCLO) { + ptrdiff_t delta = bc_j(ins); + if (delta < 0) return maxslot; /* Prevent loop. */ + pc += delta; + break; + } for (s = minslot; s < maxslot; s++) DEF_SLOT(s); return minslot < maxslot ? minslot : maxslot; } @@ -266,7 +281,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); break; case BCMbase: - if (op >= BC_CALLM && op <= BC_VARG) { + if (op >= BC_CALLM && op <= BC_ITERN) { BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); @@ -277,6 +292,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); return 0; } + } else if (op == BC_VARG) { + return maxslot; /* NYI: punt. */ } else if (op == BC_KNIL) { for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); } else if (op == BC_TSETM) { @@ -285,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, break; default: break; } - lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); + lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, + "use/def analysis PC out of range"); } #undef USE_SLOT @@ -294,15 +312,45 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, return 0; /* unreachable */ } +/* Mark slots used by upvalues of child prototypes as used. */ +static void snap_useuv(GCproto *pt, uint8_t *udf) +{ + /* This is a coarse check, because it's difficult to correlate the lifetime + ** of slots and closures. But the number of false positives is quite low. + ** A false positive may cause a slot not to be purged, which is just + ** a missed optimization. + */ + if ((pt->flags & PROTO_CHILD)) { + ptrdiff_t i, j, n = pt->sizekgc; + GCRef *kr = mref(pt->k, GCRef) - 1; + for (i = 0; i < n; i++, kr--) { + GCobj *o = gcref(*kr); + if (o->gch.gct == ~LJ_TPROTO) { + for (j = 0; j < gco2pt(o)->sizeuv; j++) { + uint32_t v = proto_uv(gco2pt(o))[j]; + if ((v & PROTO_UV_LOCAL)) { + udf[(v & 0xff)] = 0; + } + } + } + } + } +} + /* Purge dead slots before the next snapshot. */ void lj_snap_purge(jit_State *J) { uint8_t udf[SNAP_USEDEF_SLOTS]; - BCReg maxslot = J->maxslot; - BCReg s = snap_usedef(J, udf, J->pc, maxslot); - for (; s < maxslot; s++) - if (udf[s] != 0) - J->base[s] = 0; /* Purge dead slots. */ + BCReg s, maxslot = J->maxslot; + if (bc_op(*J->pc) == BC_FUNCV && maxslot > J->pt->numparams) + maxslot = J->pt->numparams; + s = snap_usedef(J, udf, J->pc, maxslot); + if (s < maxslot) { + snap_useuv(J->pt, udf); + for (; s < maxslot; s++) + if (udf[s] != 0) + J->base[s] = 0; /* Purge dead slots. */ + } } /* Shrink last snapshot. */ @@ -315,6 +363,7 @@ void lj_snap_shrink(jit_State *J) BCReg maxslot = J->maxslot; BCReg baseslot = J->baseslot; BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); + if (minslot < maxslot) snap_useuv(J->pt, udf); maxslot += baseslot; minslot += baseslot; snap->nslots = (uint8_t)maxslot; @@ -326,7 +375,7 @@ void lj_snap_shrink(jit_State *J) snap->nent = (uint8_t)m; nlim = J->cur.nsnapmap - snap->mapofs - 1; while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ - J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */ + J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */ } /* -- Snapshot access ----------------------------------------------------- */ @@ -356,25 +405,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) } /* Copy RegSP from parent snapshot to the parent links of the IR. */ -IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) +IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir) { SnapShot *snap = &T->snap[snapno]; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); MSize n = 0; IRRef ref = 0; + UNUSED(J); for ( ; ; ir++) { uint32_t rs; if (ir->o == IR_SLOAD) { if (!(ir->op2 & IRSLOAD_PARENT)) break; for ( ; ; n++) { - lua_assert(n < snap->nent); + lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1); if (snap_slot(map[n]) == ir->op1) { ref = snap_ref(map[n++]); break; } } - } else if (LJ_SOFTFP && ir->o == IR_HIOP) { + } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { ref++; } else if (ir->o == IR_PVAL) { ref = ir->op1 + REF_BIAS; @@ -385,7 +435,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); ir->prev = (uint16_t)rs; - lua_assert(regsp_used(rs)); + lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS); } return ir; } @@ -403,7 +453,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) case IR_KNUM: case IR_KINT64: return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ - default: lua_assert(0); return TREF_NIL; break; + case IR_KNULL: return lj_ir_knull(J, irt_type(ir->t)); + default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL; } } @@ -413,7 +464,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) MSize j; for (j = 0; j < nmax; j++) if (snap_ref(map[j]) == ref) - return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); + return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME); return 0; } @@ -481,17 +532,19 @@ void lj_snap_replay(jit_State *J, GCtrace *T) tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; - lua_assert(s != 0); + lj_assertJ(s != 0, "unused slot 0 in snapshot"); tr = s; } else { IRType t = irt_type(ir->t); uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; - if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; + if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); + if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX; tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); } setslot: - J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ + /* Same as TREF_* flags. */ + J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME)); J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); if ((sn & SNAP_FRAME)) J->baseslot = s+1; @@ -505,12 +558,15 @@ void lj_snap_replay(jit_State *J, GCtrace *T) IRRef refp = snap_ref(sn); IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { + uint8_t m; if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; pass23 = 1; - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || - ir->o == IR_CNEW || ir->o == IR_CNEWI); - if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); - if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); + lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || + ir->o == IR_CNEW || ir->o == IR_CNEWI, + "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); + m = lj_ir_mode[ir->o]; + if (irm_op1(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op1); + if (irm_op2(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (ir+1)->op2); @@ -520,13 +576,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); - else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && + else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (irs+1)->op2); } } } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { - lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); + lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); } } @@ -537,14 +594,16 @@ void lj_snap_replay(jit_State *J, GCtrace *T) IRIns *ir = &T->ir[refp]; if (regsp_reg(ir->r) == RID_SUNK) { TRef op1, op2; + uint8_t m; if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; continue; } op1 = ir->op1; - if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); + m = lj_ir_mode[ir->o]; + if (irm_op1(m) == IRMref) op1 = snap_pref(J, T, map, nent, seen, op1); op2 = ir->op2; - if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); + if (irm_op2(m) == IRMref) op2 = snap_pref(J, T, map, nent, seen, op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { lj_needsplit(J); /* Emit joining HIOP. */ @@ -570,19 +629,37 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (irr->o == IR_HREFK || irr->o == IR_AREF) { IRIns *irf = &T->ir[irr->op1]; tmp = emitir(irf->ot, tmp, irf->op2); + } else if (irr->o == IR_NEWREF) { + IRRef allocref = tref_ref(tr); + IRRef keyref = tref_ref(key); + IRRef newref_ref = J->chain[IR_NEWREF]; + IRIns *newref = &J->cur.ir[newref_ref]; + lj_assertJ(irref_isk(keyref), + "sunk store for parent IR %04d with bad key %04d", + refp - REF_BIAS, keyref - REF_BIAS); + if (newref_ref > allocref && newref->op2 == keyref) { + lj_assertJ(newref->op1 == allocref, + "sunk store for parent IR %04d with bad tab %04d", + refp - REF_BIAS, allocref - REF_BIAS); + tmp = newref_ref; + goto skip_newref; + } } } tmp = emitir(irr->ot, tmp, key); + skip_newref: val = snap_pref(J, T, map, nent, seen, irs->op2); if (val == 0) { IRIns *irc = &T->ir[irs->op2]; - lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); + lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT, + "sunk store for parent IR %04d with bad op %d", + refp - REF_BIAS, irc->o); val = snap_pref(J, T, map, nent, seen, irc->op1); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); - } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && + } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) { IRType t = IRT_I64; - if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) + if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) t = IRT_NUM; lj_needsplit(J); if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { @@ -626,7 +703,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, IRType1 t = ir->t; RegSP rs = ir->prev; if (irref_isk(ref)) { /* Restore constant slot. */ - lj_ir_kvalue(J->L, o, ir); + if (ir->o == IR_KPTR) { + o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir); + } else { + lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL), + "restore of const from IR %04d with bad op %d", + ref - REF_BIAS, ir->o); + lj_ir_kvalue(J->L, o, ir); + } return; } if (LJ_UNLIKELY(bloomtest(rfilt, ref))) @@ -635,7 +719,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif @@ -645,13 +729,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, o->u64 = *(uint64_t *)sps; #endif } else { - lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ + lj_assertJ(!irt_ispri(t), "PRI ref with spill slot"); setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); if (ra_noreg(r)) { - lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); + lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "restore from IR %04d has no reg", ref - REF_BIAS); snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); return; @@ -660,6 +745,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); +#elif LJ_64 /* && LJ_SOFTFP */ + } else if (irt_isnum(t)) { + o->u64 = ex->gpr[r-RID_MIN_GPR]; #endif #if LJ_64 && !LJ_GC64 } else if (irt_is64(t)) { @@ -676,7 +764,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, #if LJ_HASFFI /* Restore raw data from the trace exit state. */ -static void snap_restoredata(GCtrace *T, ExitState *ex, +static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRRef ref, void *dst, CTSize sz) { @@ -684,8 +772,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, RegSP rs = ir->prev; int32_t *src; uint64_t tmp; + UNUSED(J); if (irref_isk(ref)) { - if (ir->o == IR_KNUM || ir->o == IR_KINT64) { + if (ir_isk64(ir)) { src = (int32_t *)&ir[1]; } else if (sz == 8) { tmp = (uint64_t)(uint32_t)ir->i; @@ -706,12 +795,12 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, Reg r = regsp_reg(rs); if (ra_noreg(r)) { /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ - lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); - snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); + lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "restore from IR %04d has no reg", ref - REF_BIAS); + snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4); *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; return; } - src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; #if !LJ_SOFTFP if (r >= RID_MAX_GPR) { src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; @@ -725,10 +814,14 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, #endif } else #endif - if (LJ_64 && LJ_BE && sz == 4) src++; + { + src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; + if (LJ_64 && LJ_BE && sz == 4) src++; + } } } - lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); + lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8, + "restore from IR %04d with bad size %d", ref - REF_BIAS, sz); if (sz == 4) *(int32_t *)dst = *src; else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; else if (sz == 1) *(int8_t *)dst = (int8_t)*src; @@ -741,8 +834,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRIns *ir, TValue *o) { - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || - ir->o == IR_CNEW || ir->o == IR_CNEWI); + lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || + ir->o == IR_CNEW || ir->o == IR_CNEWI, + "sunk allocation with bad op %d", ir->o); #if LJ_HASFFI if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); @@ -753,13 +847,14 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, setcdataV(J->L, o, cd); if (ir->o == IR_CNEWI) { uint8_t *p = (uint8_t *)cdataptr(cd); - lua_assert(sz == 4 || sz == 8); + lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz); if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { - snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); + snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2, + LJ_LE ? p+4 : p, 4); if (LJ_BE) p += 4; sz = 4; } - snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); + snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz); } else { IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; for (irs = ir+1; irs < irlast; irs++) @@ -767,8 +862,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, IRIns *iro = &T->ir[T->ir[irs->op1].op2]; uint8_t *p = (uint8_t *)cd; CTSize szs; - lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); - lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); + lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o); + lj_assertJ(T->ir[irs->op1].o == IR_ADD, + "sunk store with bad add op %d", T->ir[irs->op1].o); + lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64, + "sunk store with bad const offset op %d", iro->o); if (irt_is64(irs->t)) szs = 8; else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; @@ -777,14 +875,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, p += (int64_t)ir_k64(iro)->u64; else p += iro->i; - lua_assert(p >= (uint8_t *)cdataptr(cd) && - p + szs <= (uint8_t *)cdataptr(cd) + sz); + lj_assertJ(p >= (uint8_t *)cdataptr(cd) && + p + szs <= (uint8_t *)cdataptr(cd) + sz, + "sunk store with offset out of range"); if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { - lua_assert(szs == 4); - snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); + lj_assertJ(szs == 4, "sunk store with bad size %d", szs); + snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2, + LJ_LE ? p+4 : p, 4); if (LJ_BE) p += 4; } - snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); + snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs); } } } else @@ -799,13 +899,27 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irk = &T->ir[irs->op1]; TValue tmp, *val; - lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || - irs->o == IR_FSTORE); + lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE || + irs->o == IR_FSTORE, + "sunk store with bad op %d", irs->o); if (irk->o == IR_FREF) { - lua_assert(irk->op2 == IRFL_TAB_META); - snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); - /* NOBARRIER: The table is new (marked white). */ - setgcref(t->metatable, obj2gco(tabV(&tmp))); + switch (irk->op2) { + case IRFL_TAB_META: + if (T->ir[irs->op2].o == IR_KNULL) { + setgcrefnull(t->metatable); + } else { + snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); + /* NOBARRIER: The table is new (marked white). */ + setgcref(t->metatable, obj2gco(tabV(&tmp))); + } + break; + case IRFL_TAB_NOMM: + /* Negative metamethod cache invalidated by lj_tab_set() below. */ + break; + default: + lj_assertJ(0, "sunk store with bad field %d", irk->op2); + break; + } } else { irk = &T->ir[irk->op2]; if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; @@ -813,7 +927,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, val = lj_tab_set(J->L, t, &tmp); /* NOBARRIER: The table is new (marked white). */ snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); - if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { + if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); val->u32.hi = tmp.u32.lo; } @@ -843,7 +957,8 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ - setcframe_pc(cframe_raw(L->cframe), pc+1); + setcframe_pc(L->cframe, pc+1); + setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc); /* Make sure the stack is big enough for the slots from the snapshot. */ if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { @@ -874,7 +989,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) continue; } snap_restoreval(J, T, ex, snapno, rfilt, ref, o); - if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { + if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; @@ -884,13 +999,17 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; #endif + } else if ((sn & SNAP_KEYINDEX)) { + /* A IRT_INT key index slot is restored as a number. Undo this. */ + o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o))); + o->u32.hi = LJ_KEYINDEX; } } } #if LJ_FR2 L->base += (map[nent+LJ_BE] & 0xff); #endif - lua_assert(map + nent == flinks); + lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot"); /* Compute current stack top. */ switch (bc_op(*pc)) { diff --git a/source/libs/luajit/LuaJIT-src/src/lj_snap.h b/source/libs/luajit/LuaJIT-src/src/lj_snap.h index 2c9ae3d643a7015480b2123a988fafc961460bda..0958c16b371aa8809052dbe679d0037d6742e2a7 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_snap.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_snap.h @@ -1,6 +1,6 @@ /* ** Snapshot handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_SNAP_H @@ -13,7 +13,8 @@ LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_purge(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J); -LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); +LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, + IRIns *ir); LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_state.c b/source/libs/luajit/LuaJIT-src/src/lj_state.c index 632dd07e5f9c28d9a37b616a02cc853528fc93b1..d8fc545a0de1befcfccea6a888a71d72967c7c69 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_state.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_state.c @@ -1,6 +1,6 @@ /* ** State and stack handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -25,6 +25,7 @@ #include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_prng.h" #include "lj_lex.h" #include "lj_alloc.h" #include "luajit.h" @@ -60,7 +61,8 @@ static void resizestack(lua_State *L, MSize n) MSize oldsize = L->stacksize; MSize realsize = n + 1 + LJ_STACK_EXTRA; GCobj *up; - lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); + lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1, + "inconsistent stack size"); st = (TValue *)lj_mem_realloc(L, tvref(L->stack), (MSize)(oldsize*sizeof(TValue)), (MSize)(realsize*sizeof(TValue))); @@ -100,20 +102,49 @@ void lj_state_shrinkstack(lua_State *L, MSize used) /* Try to grow stack. */ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) { - MSize n; - if (L->stacksize > LJ_STACK_MAXEX) /* Overflow while handling overflow? */ - lj_err_throw(L, LUA_ERRERR); - n = L->stacksize + need; - if (n > LJ_STACK_MAX) { - n += 2*LUA_MINSTACK; - } else if (n < 2*L->stacksize) { - n = 2*L->stacksize; - if (n >= LJ_STACK_MAX) - n = LJ_STACK_MAX; + MSize n = L->stacksize + need; + if (LJ_LIKELY(n < LJ_STACK_MAX)) { /* The stack can grow as requested. */ + if (n < 2 * L->stacksize) { /* Try to double the size. */ + n = 2 * L->stacksize; + if (n > LJ_STACK_MAX) + n = LJ_STACK_MAX; + } + resizestack(L, n); + } else { /* Request would overflow. Raise a stack overflow error. */ + if (LJ_HASJIT) { + TValue *base = tvref(G(L)->jit_base); + if (base) L->base = base; + } + if (curr_funcisL(L)) { + L->top = curr_topL(L); + if (L->top > tvref(L->maxstack)) { + /* The current Lua frame violates the stack, so replace it with a + ** dummy. This can happen when BC_IFUNCF is trying to grow the stack. + */ + L->top = L->base; + setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD); + } + } + if (L->stacksize <= LJ_STACK_MAXEX) { + /* An error handler might want to inspect the stack overflow error, but + ** will need some stack space to run in. We give it a stack size beyond + ** the normal limit in order to do so, then rely on lj_state_relimitstack + ** calls during unwinding to bring us back to a convential stack size. + ** The + 1 is space for the error message, and 2 * LUA_MINSTACK is for + ** the lj_state_checkstack() call in lj_err_run(). + */ + resizestack(L, LJ_STACK_MAX + 1 + 2 * LUA_MINSTACK); + lj_err_stkov(L); /* May invoke an error handler. */ + } else { + /* If we're here, then the stack overflow error handler is requesting + ** to grow the stack even further. We have no choice but to abort the + ** error handler. + */ + GCstr *em = lj_err_str(L, LJ_ERR_STKOV); /* Might OOM. */ + setstrV(L, L->top++, em); /* There is always space to push an error. */ + lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */ + } } - resizestack(L, n); - if (L->stacksize > LJ_STACK_MAXEX) - lj_err_msg(L, LJ_ERR_STKOV); } void LJ_FASTCALL lj_state_growstack1(lua_State *L) @@ -121,6 +152,18 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L) lj_state_growstack(L, 1); } +static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud) +{ + UNUSED(dummy); + lj_state_growstack(co, *(MSize *)ud); + return NULL; +} + +int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need) +{ + return lj_vm_cpcall(L, NULL, &need, cpgrowstack); +} + /* Allocate basic stack for new state. */ static void stack_init(lua_State *L1, lua_State *L) { @@ -148,12 +191,16 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) /* NOBARRIER: State initialization, all objects are white. */ setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); - lj_str_resize(L, LJ_MIN_STRTAB-1); + lj_str_init(L); lj_meta_init(L); lj_lex_init(L); fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ g->gc.threshold = 4*g->gc.total; +#if LJ_HASFFI + lj_ctype_initfin(L); +#endif lj_trace_initstate(g); + lj_err_verify(); return NULL; } @@ -162,16 +209,25 @@ static void close_state(lua_State *L) global_State *g = G(L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_freeall(g); - lua_assert(gcref(g->gc.root) == obj2gco(L)); - lua_assert(g->strnum == 0); + lj_assertG(gcref(g->gc.root) == obj2gco(L), + "main thread is not first GC object"); + lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num); lj_trace_freestate(g); #if LJ_HASFFI lj_ctype_freestate(g); #endif - lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); + lj_str_freetab(g); lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); - lua_assert(g->gc.total == sizeof(GG_State)); +#if LJ_64 + if (mref(g->gc.lightudseg, uint32_t)) { + MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2; + lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t); + } +#endif + lj_assertG(g->gc.total == sizeof(GG_State), + "memory leak of %lld bytes", + (long long)(g->gc.total - sizeof(GG_State))); #ifndef LUAJIT_USE_SYSMALLOC if (g->allocf == lj_alloc_f) lj_alloc_destroy(g->allocd); @@ -181,16 +237,33 @@ static void close_state(lua_State *L) } #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) -lua_State *lj_state_newstate(lua_Alloc f, void *ud) +lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd) #else -LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) +LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) #endif { - GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); - lua_State *L = &GG->L; - global_State *g = &GG->g; + PRNGState prng; + GG_State *GG; + lua_State *L; + global_State *g; + /* We need the PRNG for the memory allocator, so initialize this first. */ + if (!lj_prng_seed_secure(&prng)) { + lj_assertX(0, "secure PRNG seeding failed"); + /* Can only return NULL here, so this errors with "not enough memory". */ + return NULL; + } +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == LJ_ALLOCF_INTERNAL) { + allocd = lj_alloc_create(&prng); + if (!allocd) return NULL; + allocf = lj_alloc_f; + } +#endif + GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); if (GG == NULL || !checkptrGC(GG)) return NULL; memset(GG, 0, sizeof(GG_State)); + L = &GG->L; + g = &GG->g; L->gct = ~LJ_TTHREAD; L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ L->dummy_ffid = FF_C; @@ -198,12 +271,18 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; g->strempty.marked = LJ_GC_WHITE0; g->strempty.gct = ~LJ_TSTR; - g->allocf = f; - g->allocd = ud; + g->allocf = allocf; + g->allocd = allocd; + g->prng = prng; +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == lj_alloc_f) { + lj_alloc_setprng(allocd, &g->prng); + } +#endif setgcref(g->mainthref, obj2gco(L)); setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); setgcref(g->uvhead.next, obj2gco(&g->uvhead)); - g->strmask = ~(MSize)0; + g->str.mask = ~(MSize)0; setnilV(registry(L)); setnilV(&g->nilnode.val); setnilV(&g->nilnode.key); @@ -283,17 +362,20 @@ lua_State *lj_state_new(lua_State *L) setmrefr(L1->glref, L->glref); setgcrefr(L1->env, L->env); stack_init(L1, L); /* init stack */ - lua_assert(iswhite(obj2gco(L1))); + lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white"); return L1; } void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) { - lua_assert(L != mainthread(g)); + lj_assertG(L != mainthread(g), "free of main thread"); if (obj2gco(L) == gcref(g->cur_L)) setgcrefnull(g->cur_L); - lj_func_closeuv(L, tvref(L->stack)); - lua_assert(gcref(L->openupval) == NULL); + if (gcref(L->openupval) != NULL) { + lj_func_closeuv(L, tvref(L->stack)); + lj_trace_abort(g); /* For aa_uref soundness. */ + lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); + } lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); lj_mem_freet(g, L); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_state.h b/source/libs/luajit/LuaJIT-src/src/lj_state.h index 02a0eafa332a08a26b292f8ed86b1f54c1e4e4ad..15f9fd4776495427e44d6d217c02599aa4ba4b7d 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_state.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_state.h @@ -1,6 +1,6 @@ /* ** State and stack handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STATE_H @@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L); LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need); LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L); +LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need); static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) { @@ -32,4 +33,6 @@ LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); #endif +#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4)) + #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_str.c b/source/libs/luajit/LuaJIT-src/src/lj_str.c index ec11ab00c9edcedd226f6444e012f11acf2b6d42..2ba83889f6d1daec2fd7a7ce2e6b57544c89c8b6 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_str.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_str.c @@ -1,6 +1,6 @@ /* ** String handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_str_c @@ -11,6 +11,7 @@ #include "lj_err.h" #include "lj_str.h" #include "lj_char.h" +#include "lj_prng.h" /* -- String helpers ------------------------------------------------------ */ @@ -37,27 +38,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) return (int32_t)(a->len - b->len); } -/* Fast string data comparison. Caveat: unaligned access to 1st string! */ -static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) -{ - MSize i = 0; - lua_assert(len > 0); - lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); - do { /* Note: innocuous access up to end of string + 3. */ - uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); - if (v) { - i -= len; -#if LJ_LE - return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; -#else - return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; -#endif - } - i += 4; - } while (i < len); - return 0; -} - /* Find fixed string p inside string s. */ const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) { @@ -90,135 +70,302 @@ int lj_str_haspattern(GCstr *s) return 0; /* No pattern matching chars found. */ } +/* -- String hashing ------------------------------------------------------ */ + +/* Keyed sparse ARX string hash. Constant time. */ +static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) +{ + /* Constants taken from lookup3 hash by Bob Jenkins. */ + StrHash a, b, h = len ^ (StrHash)seed; + if (len >= 4) { /* Caveat: unaligned access! */ + a = lj_getu32(str); + h ^= lj_getu32(str+len-4); + b = lj_getu32(str+(len>>1)-2); + h ^= b; h -= lj_rol(b, 14); + b += lj_getu32(str+(len>>2)-1); + } else { + a = *(const uint8_t *)str; + h ^= *(const uint8_t *)(str+len-1); + b = *(const uint8_t *)(str+(len>>1)); + h ^= b; h -= lj_rol(b, 14); + } + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + h ^= b; h -= lj_rol(b, 16); + return h; +} + +#if LUAJIT_SECURITY_STRHASH +/* Keyed dense ARX string hash. Linear time. */ +static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, + const char *str, MSize len) +{ + StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); + if (len > 12) { + StrHash a = (StrHash)seed; + const char *pe = str+len-12, *p = pe, *q = str; + do { + a += lj_getu32(p); + b += lj_getu32(p+4); + h += lj_getu32(p+8); + p = q; q += 12; + h ^= b; h -= lj_rol(b, 14); + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + } while (p < pe); + h ^= b; h -= lj_rol(b, 16); + a ^= h; a -= lj_rol(h, 4); + b ^= a; b -= lj_rol(a, 14); + } + return b; +} +#endif + /* -- String interning ---------------------------------------------------- */ -/* Resize the string hash table (grow and shrink). */ +#define LJ_STR_MAXCOLL 32 + +/* Resize the string interning hash table (grow and shrink). */ void lj_str_resize(lua_State *L, MSize newmask) { global_State *g = G(L); - GCRef *newhash; + GCRef *newtab, *oldtab = g->str.tab; MSize i; + + /* No resizing during GC traversal or if already too big. */ if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) - return; /* No resizing during GC traversal or if already too big. */ - newhash = lj_mem_newvec(L, newmask+1, GCRef); - memset(newhash, 0, (newmask+1)*sizeof(GCRef)); - for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ - GCobj *p = gcref(g->strhash[i]); - while (p) { /* Follow each hash chain and reinsert all strings. */ - MSize h = gco2str(p)->hash & newmask; - GCobj *next = gcnext(p); - /* NOBARRIER: The string table is a GC root. */ - setgcrefr(p->gch.nextgc, newhash[h]); - setgcref(newhash[h], p); - p = next; + return; + + newtab = lj_mem_newvec(L, newmask+1, GCRef); + memset(newtab, 0, (newmask+1)*sizeof(GCRef)); + +#if LUAJIT_SECURITY_STRHASH + /* Check which chains need secondary hashes. */ + if (g->str.second) { + int newsecond = 0; + /* Compute primary chain lengths. */ + for (i = g->str.mask; i != ~(MSize)0; i--) { + GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); + while (o) { + GCstr *s = gco2str(o); + MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) : + s->hash; + hash &= newmask; + setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1); + o = gcnext(o); + } + } + /* Mark secondary chains. */ + for (i = newmask; i != ~(MSize)0; i--) { + int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL; + newsecond |= secondary; + setgcrefp(newtab[i], secondary); } + g->str.second = newsecond; } - lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); - g->strmask = newmask; - g->strhash = newhash; -} - -/* -** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to -** compute its hash -*/ -#if !defined(LUAI_HASHLIMIT) -#define LUAI_HASHLIMIT 5 #endif -#define cast(t, exp) ((t)(exp)) -int luajittex_choose_hash_function = 0 ; -/* Intern a string and return string object. */ -GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) -{ - global_State *g; - GCstr *s; - GCobj *o; - MSize len = (MSize)lenx; - MSize a, b, h = len; - size_t step ; - size_t l1 ; - if (lenx >= LJ_MAX_STR) - lj_err_msg(L, LJ_ERR_STROV); - g = G(L); - - if (len==0) - return &g->strempty; - if (luajittex_choose_hash_function==0) { - /* Lua 5.1.5 hash function */ - /* for 5.2 max methods we also need to patch the vm eq */ - step = (len>>LUAI_HASHLIMIT)+1; /* if string is too long, don't hash all its chars */ - for (l1=len; l1>=step; l1-=step) /* compute hash */ - h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); - } else { - /* LuaJIT 2.0.2 hash function */ - /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ - if (len >= 4) { /* Caveat: unaligned access! */ - a = lj_getu32(str); - h ^= lj_getu32(str+len-4); - b = lj_getu32(str+(len>>1)-2); - h ^= b; h -= lj_rol(b, 14); - b += lj_getu32(str+(len>>2)-1); - } else if (len > 0) { - a = *(const uint8_t *)str; - h ^= *(const uint8_t *)(str+len-1); - b = *(const uint8_t *)(str+(len>>1)); - h ^= b; h -= lj_rol(b, 14); - } else { - /* Already done, kept for reference */ - return &g->strempty; + /* Reinsert all strings from the old table into the new table. */ + for (i = g->str.mask; i != ~(MSize)0; i--) { + GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); + while (o) { + GCobj *next = gcnext(o); + GCstr *s = gco2str(o); + MSize hash = s->hash; +#if LUAJIT_SECURITY_STRHASH + uintptr_t u; + if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */ + hash &= newmask; + u = gcrefu(newtab[hash]); + if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */ + s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len); + s->hashalg = 1; + hash &= newmask; + u = gcrefu(newtab[hash]); + } + } else { /* String hashed with secondary hash. */ + MSize shash = hash_sparse(g->str.seed, strdata(s), s->len); + u = gcrefu(newtab[shash & newmask]); + if (u & 1) { + hash &= newmask; + u = gcrefu(newtab[hash]); + } else { /* Revert string back to primary hash. */ + s->hash = shash; + s->hashalg = 0; + hash = (shash & newmask); + } + } + /* NOBARRIER: The string table is a GC root. */ + setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); + setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1))); +#else + hash &= newmask; + /* NOBARRIER: The string table is a GC root. */ + setgcrefr(o->gch.nextgc, newtab[hash]); + setgcref(newtab[hash], o); +#endif + o = next; } - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); - } + } + /* Free old table and replace with new table. */ + lj_str_freetab(g); + g->str.tab = newtab; + g->str.mask = newmask; +} - /* Check if the string has already been interned. */ - o = gcref(g->strhash[h & g->strmask]); - if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ +#if LUAJIT_SECURITY_STRHASH +/* Rehash and rechain all strings in a chain. */ +static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc, + const char *str, MSize len) +{ + global_State *g = G(L); + int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */ + GCRef *strtab = g->str.tab; + MSize strmask = g->str.mask; + GCobj *o = gcref(strtab[hashc & strmask]); + setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1)); + g->str.second = 1; + while (o) { + uintptr_t u; + GCobj *next = gcnext(o); + GCstr *s = gco2str(o); + StrHash hash; + if (ow) { /* Must sweep while rechaining. */ + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */ + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead string"); + makewhite(g, o); + } else { /* Free dead string. */ + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive string"); + lj_str_free(g, s); + o = next; + continue; } - o = gcnext(o); } - } else { /* Slow path: end of string is too close to a page boundary. */ - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ - } - o = gcnext(o); + hash = s->hash; + if (!s->hashalg) { /* Rehash with secondary hash. */ + hash = hash_dense(g->str.seed, hash, strdata(s), s->len); + s->hash = hash; + s->hashalg = 1; } + /* Rechain. */ + hash &= strmask; + u = gcrefu(strtab[hash]); + setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); + setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1))); + o = next; } - /* Nope, create a new string. */ - s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); + /* Try to insert the pending string again. */ + return lj_str_new(L, str, len); +} +#endif + +/* Reseed String ID from PRNG after random interval < 2^bits. */ +#if LUAJIT_SECURITY_STRID == 1 +#define STRID_RESEED_INTERVAL 8 +#elif LUAJIT_SECURITY_STRID == 2 +#define STRID_RESEED_INTERVAL 4 +#elif LUAJIT_SECURITY_STRID >= 3 +#define STRID_RESEED_INTERVAL 0 +#endif + +/* Allocate a new string and add to string interning table. */ +static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, + StrHash hash, int hashalg) +{ + GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr); + global_State *g = G(L); + uintptr_t u; newwhite(g, s); s->gct = ~LJ_TSTR; s->len = len; - s->hash = h; + s->hash = hash; +#ifndef STRID_RESEED_INTERVAL + s->sid = g->str.id++; +#elif STRID_RESEED_INTERVAL + if (!g->str.idreseed--) { + uint64_t r = lj_prng_u64(&g->prng); + g->str.id = (StrID)r; + g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL)); + } + s->sid = g->str.id++; +#else + s->sid = (StrID)lj_prng_u64(&g->prng); +#endif s->reserved = 0; + s->hashalg = (uint8_t)hashalg; + /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */ + *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0; memcpy(strdatawr(s), str, len); - strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ - /* Add it to string hash table. */ - h &= g->strmask; - s->nextgc = g->strhash[h]; + /* Add to string hash table. */ + hash &= g->str.mask; + u = gcrefu(g->str.tab[hash]); + setgcrefp(s->nextgc, (u & ~(uintptr_t)1)); /* NOBARRIER: The string table is a GC root. */ - setgcref(g->strhash[h], obj2gco(s)); - if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ - lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ + setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1))); + if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */ + lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */ return s; /* Return newly interned string. */ } +int luajittex_choose_hash_function = 0 ; +/* Intern a string and return string object. */ +GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) +{ + global_State *g = G(L); + if (lenx-1 < LJ_MAX_STR-1) { + MSize len = (MSize)lenx; + StrHash hash = hash_sparse(g->str.seed, str, len); + MSize coll = 0; + int hashalg = 0; + /* Check if the string has already been interned. */ + GCobj *o = gcref(g->str.tab[hash & g->str.mask]); +#if LUAJIT_SECURITY_STRHASH + if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */ + hashalg = 1; + hash = hash_dense(g->str.seed, hash, str, len); + o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1); + } +#endif + while (o != NULL) { + GCstr *sx = gco2str(o); + if (sx->hash == hash && sx->len == len) { + if (memcmp(str, strdata(sx), len) == 0) { + if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ + return sx; /* Return existing string. */ + } + coll++; + } + coll++; + o = gcnext(o); + } +#if LUAJIT_SECURITY_STRHASH + /* Rehash chain if there are too many collisions. */ + if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) { + return lj_str_rehash_chain(L, hash, str, len); + } +#endif + /* Otherwise allocate a new string. */ + return lj_str_alloc(L, str, len, hash, hashalg); + } else { + if (lenx) + lj_err_msg(L, LJ_ERR_STROV); + return &g->strempty; + } +} + void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) { - g->strnum--; - lj_mem_free(g, s, sizestring(s)); + g->str.num--; + lj_mem_free(g, s, lj_str_size(s->len)); +} + +void LJ_FASTCALL lj_str_init(lua_State *L) +{ + global_State *g = G(L); + g->str.seed = lj_prng_u64(&g->prng); + lj_str_resize(L, LJ_MIN_STRTAB-1); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_str.h b/source/libs/luajit/LuaJIT-src/src/lj_str.h index 85c1e405bd7d66746ad2fa23d38aaa83f335dc3d..44341b9f8c6ad7586877cebebec2950642910369 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_str.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_str.h @@ -1,6 +1,6 @@ /* ** String handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STR_H @@ -20,8 +20,12 @@ LJ_FUNC int lj_str_haspattern(GCstr *s); LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); +LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L); +#define lj_str_freetab(g) \ + (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef)) #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) +#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3)) #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_strfmt.c b/source/libs/luajit/LuaJIT-src/src/lj_strfmt.c index d7893ce981be557a556398d45bcd193eac0653c8..04aebaa47251bc42e0884f4e2fc66cc0361a953e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_strfmt.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_strfmt.c @@ -1,6 +1,6 @@ /* ** String formatting. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include <stdio.h> @@ -9,11 +9,17 @@ #define LUA_CORE #include "lj_obj.h" +#include "lj_err.h" #include "lj_buf.h" #include "lj_str.h" +#include "lj_meta.h" #include "lj_state.h" #include "lj_char.h" #include "lj_strfmt.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#endif +#include "lj_lib.h" /* -- Format parser ------------------------------------------------------- */ @@ -96,7 +102,7 @@ retlit: char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) { uint32_t u = (uint32_t)k; - if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } + if (k < 0) { u = ~u+1u; *p++ = '-'; } if (u < 10000) { if (u < 10) goto dig1; if (u < 100) goto dig2; @@ -161,6 +167,10 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) if (tvisstr(o)) { *lenp = strV(o)->len; return strVdata(o); + } else if (tvisbuf(o)) { + SBufExt *sbx = bufV(o); + *lenp = sbufxlen(sbx); + return sbx->r; } else if (tvisint(o)) { sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { @@ -169,7 +179,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) return NULL; } *lenp = sbuflen(sb); - return sbufB(sb); + return sb->b; } /* -- Unformatted conversions to buffer ----------------------------------- */ @@ -177,7 +187,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) /* Add integer to buffer. */ SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) { - setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k)); + sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k); return sb; } @@ -191,80 +201,93 @@ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) { - setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v)); + sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v); return sb; } /* Add quoted string to buffer. */ -SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) +static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len) { - const char *s = strdata(str); - MSize len = str->len; lj_buf_putb(sb, '"'); while (len--) { uint32_t c = (uint32_t)(uint8_t)*s++; - char *p = lj_buf_more(sb, 4); + char *w = lj_buf_more(sb, 4); if (c == '"' || c == '\\' || c == '\n') { - *p++ = '\\'; + *w++ = '\\'; } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ uint32_t d; - *p++ = '\\'; + *w++ = '\\'; if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { - *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; + *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; goto tens; } else if (c >= 10) { tens: - d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); + d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d); } c += '0'; } - *p++ = (char)c; - setsbufP(sb, p); + *w++ = (char)c; + sb->w = w; } lj_buf_putb(sb, '"'); return sb; } +#if LJ_HASJIT +SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) +{ + return strfmt_putquotedlen(sb, strdata(str), str->len); +} +#endif + /* -- Formatted conversions to buffer ------------------------------------- */ /* Add formatted char to buffer. */ SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) { MSize width = STRFMT_WIDTH(sf); - char *p = lj_buf_more(sb, width > 1 ? width : 1); - if ((sf & STRFMT_F_LEFT)) *p++ = (char)c; - while (width-- > 1) *p++ = ' '; - if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c; - setsbufP(sb, p); + char *w = lj_buf_more(sb, width > 1 ? width : 1); + if ((sf & STRFMT_F_LEFT)) *w++ = (char)c; + while (width-- > 1) *w++ = ' '; + if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c; + sb->w = w; return sb; } /* Add formatted string to buffer. */ -SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) +static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len) { - MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf); MSize width = STRFMT_WIDTH(sf); - char *p = lj_buf_more(sb, width > len ? width : len); - if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); - while (width-- > len) *p++ = ' '; - if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); - setsbufP(sb, p); + char *w; + if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf); + w = lj_buf_more(sb, width > len ? width : len); + if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); + while (width-- > len) *w++ = ' '; + if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); + sb->w = w; return sb; } +#if LJ_HASJIT +SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) +{ + return strfmt_putfstrlen(sb, sf, strdata(str), str->len); +} +#endif + /* Add formatted signed/unsigned integer to buffer. */ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) { - char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p; + char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w; #ifdef LUA_USE_ASSERT - char *ps; + char *ws; #endif MSize prefix = 0, len, prec, pprec, width, need; /* Figure out signed prefixes. */ if (STRFMT_TYPE(sf) == STRFMT_INT) { if ((int64_t)k < 0) { - k = (uint64_t)-(int64_t)k; + k = ~k+1u; prefix = 256 + '-'; } else if ((sf & STRFMT_F_PLUS)) { prefix = 256 + '+'; @@ -301,27 +324,27 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) width = STRFMT_WIDTH(sf); pprec = prec + (prefix >> 8); need = width > pprec ? width : pprec; - p = lj_buf_more(sb, need); + w = lj_buf_more(sb, need); #ifdef LUA_USE_ASSERT - ps = p; + ws = w; #endif /* Format number with leading/trailing whitespace and zeros. */ if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) - while (width-- > pprec) *p++ = ' '; + while (width-- > pprec) *w++ = ' '; if (prefix) { - if ((char)prefix >= 'X') *p++ = '0'; - *p++ = (char)prefix; + if ((char)prefix >= 'X') *w++ = '0'; + *w++ = (char)prefix; } if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) - while (width-- > pprec) *p++ = '0'; - while (prec-- > len) *p++ = '0'; - while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */ + while (width-- > pprec) *w++ = '0'; + while (prec-- > len) *w++ = '0'; + while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */ if ((sf & STRFMT_F_LEFT)) - while (width-- > pprec) *p++ = ' '; + while (width-- > pprec) *w++ = ' '; - lua_assert(need == (MSize)(p - ps)); - setsbufP(sb, p); + lj_assertX(need == (MSize)(w - ws), "miscalculated format size"); + sb->w = w; return sb; } @@ -346,6 +369,117 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) return lj_strfmt_putfxint(sb, sf, (uint64_t)k); } +/* Format stack arguments to buffer. */ +int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry) +{ + int narg = (int)(L->top - L->base); + GCstr *fmt = lj_lib_checkstr(L, arg); + FormatState fs; + SFormat sf; + lj_strfmt_init(&fs, strdata(fmt), fmt->len); + while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { + if (sf == STRFMT_LIT) { + lj_buf_putmem(sb, fs.str, fs.len); + } else if (sf == STRFMT_ERR) { + lj_err_callerv(L, LJ_ERR_STRFMT, + strdata(lj_str_new(L, fs.str, fs.len))); + } else { + TValue *o = &L->base[arg++]; + if (arg > narg) + lj_err_arg(L, arg, LJ_ERR_NOVAL); + switch (STRFMT_TYPE(sf)) { + case STRFMT_INT: + if (tvisint(o)) { + int32_t k = intV(o); + if (sf == STRFMT_INT) + lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ + else + lj_strfmt_putfxint(sb, sf, k); + break; + } +#if LJ_HASFFI + if (tviscdata(o)) { + GCcdata *cd = cdataV(o); + if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { + lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); + break; + } + } +#endif + lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_UINT: + if (tvisint(o)) { + lj_strfmt_putfxint(sb, sf, intV(o)); + break; + } +#if LJ_HASFFI + if (tviscdata(o)) { + GCcdata *cd = cdataV(o); + if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { + lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); + break; + } + } +#endif + lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_NUM: + lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_STR: { + MSize len; + const char *s; + cTValue *mo; + if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 && + !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { + /* Call __tostring metamethod once. */ + copyTV(L, L->top++, mo); + copyTV(L, L->top++, o); + lua_call(L, 1, 1); + o = &L->base[arg-1]; /* Stack may have been reallocated. */ + copyTV(L, o, --L->top); /* Replace inline for retry. */ + if (retry < 2) { /* Global buffer may have been overwritten. */ + retry = 1; + break; + } + } + if (LJ_LIKELY(tvisstr(o))) { + len = strV(o)->len; + s = strVdata(o); +#if LJ_HASBUFFER + } else if (tvisbuf(o)) { + SBufExt *sbx = bufV(o); + if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF); + len = sbufxlen(sbx); + s = sbx->r; +#endif + } else { + GCstr *str = lj_strfmt_obj(L, o); + len = str->len; + s = strdata(str); + } + if ((sf & STRFMT_T_QUOTED)) + strfmt_putquotedlen(sb, s, len); /* No formatting. */ + else + strfmt_putfstrlen(sb, sf, s, len); + break; + } + case STRFMT_CHAR: + lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); + break; + case STRFMT_PTR: /* No formatting. */ + lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o)); + break; + default: + lj_assertL(0, "bad string format type"); + break; + } + } + } + return retry; +} + /* -- Conversions to strings ---------------------------------------------- */ /* Convert integer to string. */ @@ -393,7 +527,7 @@ GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) p = lj_buf_wmem(p, "builtin#", 8); p = lj_strfmt_wint(p, funcV(o)->c.ffid); } else { - p = lj_strfmt_wptr(p, lj_obj_ptr(o)); + p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o)); } return lj_str_new(L, buf, (size_t)(p - buf)); } @@ -449,7 +583,7 @@ const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) case STRFMT_ERR: default: lj_buf_putb(sb, '?'); - lua_assert(0); + lj_assertL(0, "bad string format near offset %d", fs.len); break; } } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_strfmt.h b/source/libs/luajit/LuaJIT-src/src/lj_strfmt.h index 6e1d9017e624bce59f7ac81f46c35951206b826b..43cf53739c341128bdb2f44961f2e995575e0480 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_strfmt.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_strfmt.h @@ -1,6 +1,6 @@ /* ** String formatting. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STRFMT_H @@ -79,7 +79,8 @@ static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) { fs->p = (const uint8_t *)p; fs->e = (const uint8_t *)p + len; - lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */ + /* Must be NUL-terminated. May have NULs inside, too. */ + lj_assertX(*fs->e == 0, "format not NUL-terminated"); } /* Raw conversions. */ @@ -94,7 +95,9 @@ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); #endif LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); +#if LJ_HASJIT LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); +#endif /* Formatted conversions to buffer. */ LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k); @@ -102,7 +105,10 @@ LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n); LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n); LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n); LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c); +#if LJ_HASJIT LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); +#endif +LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry); /* Conversions to strings. */ LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k); @@ -117,7 +123,7 @@ LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp); LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) __attribute__ ((format (printf, 2, 3))) #endif ; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_strfmt_num.c b/source/libs/luajit/LuaJIT-src/src/lj_strfmt_num.c index 9271f68a117cc04e2d99ee1c6dd55f950a348327..a33fc63ad8fe4b3a34d6c3e7aeb8ec601b5c7fe1 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_strfmt_num.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_strfmt_num.c @@ -1,6 +1,6 @@ /* ** String formatting for floating-point numbers. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** Contributed by Peter Cawley. */ @@ -257,7 +257,7 @@ static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, } else { prec -= hilen - 9; } - lua_assert(prec < 9); + lj_assertX(prec < 9, "bad precision %d", prec); lj_strfmt_wuint9(nd9, nd[ndhi]); lj_strfmt_wuint9(ref9, *ref); return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); @@ -414,14 +414,14 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) ** Rescaling was performed, but this introduced some error, and might ** have pushed us across a rounding boundary. We check whether this ** error affected the result by introducing even more error (2ulp in - ** either direction), and seeing whether a roundary boundary was + ** either direction), and seeing whether a rounding boundary was ** crossed. Having already converted the -2ulp case, we save off its ** most significant digits, convert the +2ulp case, and compare them. */ int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); const int8_t *m_e = four_ulp_m_e + eidx * 2; - lua_assert(0 <= eidx && eidx < 128); + lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx); nd[33] = nd[ndhi]; nd[32] = nd[(ndhi - 1) & 0x3f]; nd[31] = nd[(ndhi - 2) & 0x3f]; @@ -454,7 +454,8 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) prec--; if (!i) { if (ndlo == ndhi) { prec = 0; break; } - lj_strfmt_wuint9(tail, nd[++ndlo]); + ndlo = (ndlo + 1) & 0x3f; + lj_strfmt_wuint9(tail, nd[ndlo]); i = 9; } } @@ -576,7 +577,7 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) /* Add formatted floating-point number to buffer. */ SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) { - setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL)); + sb->w = lj_strfmt_wfnum(sb, sf, n, NULL); return sb; } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_strscan.c b/source/libs/luajit/LuaJIT-src/src/lj_strscan.c index f5f35c96028062ed35992c020c70193b99519de5..502c78e97e145eeb38a4ab11931b2e1388409f74 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_strscan.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_strscan.c @@ -1,6 +1,6 @@ /* ** String scanning. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include <math.h> @@ -63,6 +63,7 @@ #define STRSCAN_MAXDIG 800 /* 772 + extra are sufficient. */ #define STRSCAN_DDIG (STRSCAN_DIG/2) #define STRSCAN_DMASK (STRSCAN_DDIG-1) +#define STRSCAN_MAXEXP (1 << 20) /* Helpers for circular buffer. */ #define DNEXT(a) (((a)+1) & STRSCAN_DMASK) @@ -79,7 +80,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) /* Avoid double rounding for denormals. */ if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { /* NYI: all of this generates way too much code on 32 bit CPUs. */ -#if defined(__GNUC__) && LJ_64 +#if (defined(__GNUC__) || defined(__clang__)) && LJ_64 int32_t b = (int32_t)(__builtin_clzll(x)^63); #else int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : @@ -93,7 +94,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) } /* Convert to double using a signed int64_t conversion, then rescale. */ - lua_assert((int64_t)x >= 0); + lj_assertX((int64_t)x >= 0, "bad double conversion"); n = (double)(int64_t)x; if (neg) n = -n; if (ex2) n = ldexp(n, ex2); @@ -121,20 +122,21 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o, /* Format-specific handling. */ switch (fmt) { case STRSCAN_INT: - if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { - o->i = neg ? -(int32_t)x : (int32_t)x; + if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg && + !(x == 0 && neg)) { + o->i = neg ? (int32_t)(~x+1u) : (int32_t)x; return STRSCAN_INT; /* Fast path for 32 bit integers. */ } if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; } /* fallthrough */ case STRSCAN_U32: if (dig > 8) return STRSCAN_ERROR; - o->i = neg ? -(int32_t)x : (int32_t)x; + o->i = neg ? (int32_t)(~x+1u) : (int32_t)x; return STRSCAN_U32; case STRSCAN_I64: case STRSCAN_U64: if (dig > 16) return STRSCAN_ERROR; - o->u64 = neg ? (uint64_t)-(int64_t)x : x; + o->u64 = neg ? ~x+1u : x; return fmt; default: break; @@ -166,12 +168,12 @@ static StrScanFmt strscan_oct(const uint8_t *p, TValue *o, /* fallthrough */ case STRSCAN_U32: if ((x >> 32)) return STRSCAN_ERROR; - o->i = neg ? -(int32_t)x : (int32_t)x; + o->i = neg ? (int32_t)(~(uint32_t)x+1u) : (int32_t)x; break; default: case STRSCAN_I64: case STRSCAN_U64: - o->u64 = neg ? (uint64_t)-(int64_t)x : x; + o->u64 = neg ? ~x+1u : x; break; } return fmt; @@ -227,18 +229,18 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, switch (fmt) { case STRSCAN_INT: if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { - o->i = neg ? -(int32_t)x : (int32_t)x; + o->i = neg ? (int32_t)(~x+1u) : (int32_t)x; return STRSCAN_INT; /* Fast path for 32 bit integers. */ } if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; goto plainnumber; } /* fallthrough */ case STRSCAN_U32: if ((x >> 32) != 0) return STRSCAN_ERROR; - o->i = neg ? -(int32_t)x : (int32_t)x; + o->i = neg ? (int32_t)(~x+1u) : (int32_t)x; return STRSCAN_U32; case STRSCAN_I64: case STRSCAN_U64: - o->u64 = neg ? (uint64_t)-(int64_t)x : x; + o->u64 = neg ? ~x+1u : x; return fmt; default: plainnumber: /* Fast path for plain numbers < 2^63. */ @@ -262,7 +264,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, uint32_t hi = 0, lo = (uint32_t)(xip-xi); int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); - lua_assert(lo > 0 && (ex10 & 1) == 0); + lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10); /* Handle simple overflow/underflow. */ if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } @@ -346,18 +348,18 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o, switch (fmt) { case STRSCAN_INT: if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { - o->i = neg ? -(int32_t)x : (int32_t)x; + o->i = neg ? (int32_t)(~x+1u) : (int32_t)x; return STRSCAN_INT; /* Fast path for 32 bit integers. */ } if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; } /* fallthrough */ case STRSCAN_U32: if (dig > 32) return STRSCAN_ERROR; - o->i = neg ? -(int32_t)x : (int32_t)x; + o->i = neg ? (int32_t)(~x+1u) : (int32_t)x; return STRSCAN_U32; case STRSCAN_I64: case STRSCAN_U64: - o->u64 = neg ? (uint64_t)-(int64_t)x : x; + o->u64 = neg ? ~x+1u : x; return fmt; default: break; @@ -370,9 +372,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o, } /* Scan string containing a number. Returns format. Returns value in o. */ -StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) +StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, + uint32_t opt) { int32_t neg = 0; + const uint8_t *pe = p + len; /* Remove leading space, parse sign and non-numbers. */ if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { @@ -390,7 +394,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) p += 3; } while (lj_char_isspace(*p)) p++; - if (*p) return STRSCAN_ERROR; + if (*p || p < pe) return STRSCAN_ERROR; o->u64 = tmp.u64; return STRSCAN_NUM; } @@ -441,10 +445,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) /* Handle decimal point. */ if (dp) { + if (base == 2) return STRSCAN_ERROR; fmt = STRSCAN_NUM; if (dig) { ex = (int32_t)(dp-(p-1)); dp = p-1; while (ex < 0 && *dp-- == '0') ex++, dig--; /* Skip trailing zeros. */ + if (ex <= -STRSCAN_MAXEXP) return STRSCAN_ERROR; if (base == 16) ex *= 4; } } @@ -458,10 +464,11 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) if (!lj_char_isdigit(*p)) return STRSCAN_ERROR; xx = (*p++ & 15); while (lj_char_isdigit(*p)) { - if (xx < 65536) xx = xx * 10 + (*p & 15); + xx = xx * 10 + (*p & 15); + if (xx >= STRSCAN_MAXEXP) return STRSCAN_ERROR; p++; } - ex += negx ? -(int32_t)xx : (int32_t)xx; + ex += negx ? (int32_t)(~xx+1u) : (int32_t)xx; } /* Parse suffix. */ @@ -488,16 +495,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) while (lj_char_isspace(*p)) p++; if (*p) return STRSCAN_ERROR; } + if (p < pe) return STRSCAN_ERROR; /* Fast path for decimal 32 bit integers. */ if (fmt == STRSCAN_INT && base == 10 && (dig < 10 || (dig == 10 && *sp <= '2' && x < 0x80000000u+neg))) { - int32_t y = neg ? -(int32_t)x : (int32_t)x; if ((opt & STRSCAN_OPT_TONUM)) { - o->n = (double)y; + o->n = neg ? -(double)x : (double)x; + return STRSCAN_NUM; + } else if (x == 0 && neg) { + o->n = -0.0; return STRSCAN_NUM; } else { - o->i = y; + o->i = neg ? (int32_t)(~x+1u) : (int32_t)x; return STRSCAN_INT; } } @@ -513,7 +523,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); /* Try to convert number to integer, if requested. */ - if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) { + if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) { double n = o->n; int32_t i = lj_num2int(n); if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; } @@ -524,18 +534,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) { - StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, + StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, STRSCAN_OPT_TONUM); - lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); + lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format"); return (fmt != STRSCAN_ERROR); } #if LJ_DUALNUM int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) { - StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, + StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, STRSCAN_OPT_TOINT); - lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); + lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT, + "bad scan format"); if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); return (fmt != STRSCAN_ERROR); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_strscan.h b/source/libs/luajit/LuaJIT-src/src/lj_strscan.h index 6fb0dda08cd3a3e6d40ff7e4b6420b1f67fa7139..397fb76f9c651e8c4fea767132a4397a1c13de57 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_strscan.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_strscan.h @@ -1,6 +1,6 @@ /* ** String scanning. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_STRSCAN_H @@ -22,7 +22,8 @@ typedef enum { STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, } StrScanFmt; -LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); +LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, + uint32_t opt); LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); #if LJ_DUALNUM LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); diff --git a/source/libs/luajit/LuaJIT-src/src/lj_tab.c b/source/libs/luajit/LuaJIT-src/src/lj_tab.c index 47c0cfd34af5938e824e6aa00311b444b0fc80f9..2d0805520689b4fe4dfac1a50e53804f7ea202d4 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_tab.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_tab.c @@ -1,6 +1,6 @@ /* ** Table handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -16,29 +16,10 @@ /* -- Object hashing ------------------------------------------------------ */ -/* Hash values are masked with the table hash mask and used as an index. */ -static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) -{ - Node *n = noderef(t->node); - return &n[hash & t->hmask]; -} - -/* String hashes are precomputed when they are interned. */ -#define hashstr(t, s) hashmask(t, (s)->hash) - -#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) -#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) -#if LJ_GC64 -#define hashgcref(t, r) \ - hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) -#else -#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) -#endif - /* Hash an arbitrary key and return its anchor position in the hash table. */ static Node *hashkey(const GCtab *t, cTValue *key) { - lua_assert(!tvisint(key)); + lj_assertX(!tvisint(key), "attempt to hash integer"); if (tvisstr(key)) return hashstr(t, strV(key)); else if (tvisnum(key)) @@ -57,7 +38,7 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) { uint32_t hsize; Node *node; - lua_assert(hbits != 0); + lj_assertL(hbits != 0, "zero hash size"); if (hbits > LJ_MAX_HBITS) lj_err_msg(L, LJ_ERR_TABOV); hsize = 1u << hbits; @@ -78,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t) { uint32_t i, hmask = t->hmask; Node *node = noderef(t->node); - lua_assert(t->hmask != 0); + lj_assertX(t->hmask != 0, "empty hash part"); for (i = 0; i <= hmask; i++) { Node *n = &node[i]; setmref(n->next, NULL); @@ -103,7 +84,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) /* First try to colocate the array part. */ if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { Node *nilnode; - lua_assert((sizeof(GCtab) & 7) == 0); + lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size"); t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); t->gct = ~LJ_TTAB; t->nomm = (uint8_t)~0; @@ -185,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) GCtab *t; uint32_t asize, hmask; t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); - lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); + lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask, + "mismatched size of table and template"); t->nomm = 0; /* Keys with metamethod names may be present. */ asize = kt->asize; if (asize > 0) { @@ -310,7 +292,7 @@ void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) static uint32_t countint(cTValue *key, uint32_t *bins) { - lua_assert(!tvisint(key)); + lj_assertX(!tvisint(key), "bad integer key"); if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); @@ -385,13 +367,6 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) lj_tab_resize(L, t, asize, hsize2hbits(total)); } -#if LJ_HASFFI -void lj_tab_rehash(lua_State *L, GCtab *t) -{ - rehashtab(L, t, niltv(L)); -} -#endif - void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) { lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); @@ -412,7 +387,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key) return NULL; } -cTValue *lj_tab_getstr(GCtab *t, GCstr *key) +cTValue *lj_tab_getstr(GCtab *t, const GCstr *key) { Node *n = hashstr(t, key); do { @@ -463,7 +438,8 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) if (!tvisnil(&n->val) || t->hmask == 0) { Node *nodebase = noderef(t->node); Node *collide, *freenode = getfreetop(t, nodebase); - lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); + lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1, + "bad freenode"); do { if (freenode == nodebase) { /* No free node found? */ rehashtab(L, t, key); /* Rehash table. */ @@ -471,7 +447,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) } } while (!tvisnil(&(--freenode)->key)); setfreetop(t, nodebase, freenode); - lua_assert(freenode != &G(L)->nilnode); + lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash"); collide = hashkey(t, &n->key); if (collide != n) { /* Colliding node not the main node? */ while (noderef(collide->next) != n) /* Find predecessor. */ @@ -486,11 +462,33 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) /* Rechain pseudo-resurrected string keys with colliding hashes. */ while (nextnode(freenode)) { Node *nn = nextnode(freenode); - if (tvisstr(&nn->key) && !tvisnil(&nn->val) && - hashstr(t, strV(&nn->key)) == n) { + if (!tvisnil(&nn->val) && hashkey(t, &nn->key) == n) { freenode->next = nn->next; nn->next = n->next; setmref(n->next, nn); + /* + ** Rechaining a resurrected string key creates a new dilemma: + ** Another string key may have originally been resurrected via + ** _any_ of the previous nodes as a chain anchor. Including + ** a node that had to be moved, which makes them unreachable. + ** It's not feasible to check for all previous nodes, so rechain + ** any string key that's currently in a non-main positions. + */ + while ((nn = nextnode(freenode))) { + if (!tvisnil(&nn->val)) { + Node *mn = hashkey(t, &nn->key); + if (mn != freenode && mn != nn) { + freenode->next = nn->next; + nn->next = mn->next; + setmref(mn->next, nn); + } else { + freenode = nn; + } + } else { + freenode = nn; + } + } + break; } else { freenode = nn; } @@ -505,7 +503,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) if (LJ_UNLIKELY(tvismzero(&n->key))) n->key.u64 = 0; lj_gc_anybarriert(L, t); - lua_assert(tvisnil(&n->val)); + lj_assertL(tvisnil(&n->val), "new hash slot is not empty"); return &n->val; } @@ -522,7 +520,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key) return lj_tab_newkey(L, t, &k); } -TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) +TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key) { TValue k; Node *n = hashstr(t, key); @@ -563,103 +561,126 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) /* -- Table traversal ----------------------------------------------------- */ -/* Get the traversal index of a key. */ -static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) +/* Table traversal indexes: +** +** Array key index: [0 .. t->asize-1] +** Hash key index: [t->asize .. t->asize+t->hmask] +** Invalid key: ~0 +*/ + +/* Get the successor traversal index of a key. */ +uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key) { TValue tmp; if (tvisint(key)) { int32_t k = intV(key); if ((uint32_t)k < t->asize) - return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ + return (uint32_t)k + 1; setnumV(&tmp, (lua_Number)k); key = &tmp; } else if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); if ((uint32_t)k < t->asize && nk == (lua_Number)k) - return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ + return (uint32_t)k + 1; } if (!tvisnil(key)) { Node *n = hashkey(t, key); do { if (lj_obj_equal(&n->key, key)) - return t->asize + (uint32_t)(n - noderef(t->node)); - /* Hash key indexes: [t->asize..t->asize+t->nmask] */ + return t->asize + (uint32_t)((n+1) - noderef(t->node)); } while ((n = nextnode(n))); - if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */ - return key->u32.lo - 1; - lj_err_msg(L, LJ_ERR_NEXTIDX); - return 0; /* unreachable */ + if (key->u32.hi == LJ_KEYINDEX) /* Despecialized ITERN while running. */ + return key->u32.lo; + return ~0u; /* Invalid key to next. */ } - return ~0u; /* A nil key starts the traversal. */ + return 0; /* A nil key starts the traversal. */ } -/* Advance to the next step in a table traversal. */ -int lj_tab_next(lua_State *L, GCtab *t, TValue *key) +/* Get the next key/value pair of a table traversal. */ +int lj_tab_next(GCtab *t, cTValue *key, TValue *o) { - uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */ - for (i++; i < t->asize; i++) /* First traverse the array keys. */ - if (!tvisnil(arrayslot(t, i))) { - setintV(key, i); - copyTV(L, key+1, arrayslot(t, i)); + uint32_t idx = lj_tab_keyindex(t, key); /* Find successor index of key. */ + /* First traverse the array part. */ + for (; idx < t->asize; idx++) { + cTValue *a = arrayslot(t, idx); + if (LJ_LIKELY(!tvisnil(a))) { + setintV(o, idx); + o[1] = *a; return 1; } - for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */ - Node *n = &noderef(t->node)[i]; + } + idx -= t->asize; + /* Then traverse the hash part. */ + for (; idx <= t->hmask; idx++) { + Node *n = &noderef(t->node)[idx]; if (!tvisnil(&n->val)) { - copyTV(L, key, &n->key); - copyTV(L, key+1, &n->val); + o[0] = n->key; + o[1] = n->val; return 1; } } - return 0; /* End of traversal. */ + return (int32_t)idx < 0 ? -1 : 0; /* Invalid key or end of traversal. */ } /* -- Table length calculation -------------------------------------------- */ -static MSize unbound_search(GCtab *t, MSize j) +/* Compute table length. Slow path with mixed array/hash lookups. */ +LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi) { cTValue *tv; - MSize i = j; /* i is zero or a present index */ - j++; - /* find `i' and `j' such that i is present and j is not */ - while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { - i = j; - j *= 2; - if (j > (MSize)(INT_MAX-2)) { /* overflow? */ - /* table was built with bad purposes: resort to linear search */ - i = 1; - while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; - return i - 1; + size_t lo = hi; + hi++; + /* Widening search for an upper bound. */ + while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) { + lo = hi; + hi += hi; + if (hi > (size_t)(0x7fffffff - 2)) { /* Punt and do a linear search. */ + lo = 1; + while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++; + return (MSize)(lo - 1); } } - /* now do a binary search between them */ - while (j - i > 1) { - MSize m = (i+j)/2; - cTValue *tvb = lj_tab_getint(t, (int32_t)m); - if (tvb && !tvisnil(tvb)) i = m; else j = m; + /* Binary search to find a non-nil to nil transition. */ + while (hi - lo > 1) { + size_t mid = (lo+hi) >> 1; + cTValue *tvb = lj_tab_getint(t, (int32_t)mid); + if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid; } - return i; + return (MSize)lo; } -/* -** Try to find a boundary in table `t'. A `boundary' is an integer index -** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). -*/ +/* Compute table length. Fast path. */ MSize LJ_FASTCALL lj_tab_len(GCtab *t) { - MSize j = (MSize)t->asize; - if (j > 1 && tvisnil(arrayslot(t, j-1))) { - MSize i = 1; - while (j - i > 1) { - MSize m = (i+j)/2; - if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; + size_t hi = (size_t)t->asize; + if (hi) hi--; + /* In a growing array the last array element is very likely nil. */ + if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) { + /* Binary search to find a non-nil to nil transition in the array. */ + size_t lo = 0; + while (hi - lo > 1) { + size_t mid = (lo+hi) >> 1; + if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid; } - return i-1; + return (MSize)lo; } - if (j) j--; - if (t->hmask <= 0) - return j; - return unbound_search(t, j); + /* Without a hash part, there's an implicit nil after the last element. */ + return t->hmask ? tab_len_slow(t, hi) : (MSize)hi; } +#if LJ_HASJIT +/* Verify hinted table length or compute it. */ +MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint) +{ + size_t asize = (size_t)t->asize; + cTValue *tv = arrayslot(t, hint); + if (LJ_LIKELY(hint+1 < asize)) { + if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint; + } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) { + return (MSize)hint; + } + return lj_tab_len(t); +} +#endif + diff --git a/source/libs/luajit/LuaJIT-src/src/lj_tab.h b/source/libs/luajit/LuaJIT-src/src/lj_tab.h index 71e34945e8aa1f7a5a5e7f3b1ce98af22ee149a2..4296d101674abab8013debfb4b659fdbcbc3194a 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_tab.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_tab.h @@ -1,6 +1,6 @@ /* ** Table handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TAB_H @@ -31,6 +31,25 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) return hi; } +/* Hash values are masked with the table hash mask and used as an index. */ +static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) +{ + Node *n = noderef(t->node); + return &n[hash & t->hmask]; +} + +/* String IDs are generated when a string is interned. */ +#define hashstr(t, s) hashmask(t, (s)->sid) + +#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) +#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) +#if LJ_GC64 +#define hashgcref(t, r) \ + hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) +#else +#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) +#endif + #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); @@ -41,23 +60,20 @@ LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t); LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); -#if LJ_HASFFI -LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); -#endif LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits); LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); /* Caveat: all getters except lj_tab_get() can return NULL! */ LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); -LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); +LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key); LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); /* Caveat: all setters require a write barrier for the stored value. */ LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); -LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); +LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key); LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) @@ -67,7 +83,11 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); #define lj_tab_setint(L, t, key) \ (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) -LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); +LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key); +LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o); LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); +#if LJ_HASJIT +LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint); +#endif #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_target.h b/source/libs/luajit/LuaJIT-src/src/lj_target.h index 8dcae957f0efa9e8c5f3d6918c4dbe554861fba4..47592432f0000362289141022015ed907a586f44 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_target.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_target.h @@ -1,6 +1,6 @@ /* ** Definitions for target CPU. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_H @@ -57,8 +57,14 @@ typedef uint32_t RegSP; */ #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 typedef uint64_t RegSet; +#define RSET_BITS 6 +#define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +#define rset_pickbot_(rs) ((Reg)lj_ffs64(rs)) #else typedef uint32_t RegSet; +#define RSET_BITS 5 +#define rset_picktop_(rs) ((Reg)lj_fls(rs)) +#define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) #endif #define RID2RSET(r) (((RegSet)1) << (r)) @@ -69,13 +75,6 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) -#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) -#else -#define rset_picktop(rs) ((Reg)lj_fls(rs)) -#define rset_pickbot(rs) ((Reg)lj_ffs(rs)) -#endif /* -- Register allocation cost -------------------------------------------- */ @@ -152,7 +151,8 @@ typedef uint32_t RegCost; /* Return the address of an exit stub. */ static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) { - lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); + lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL, + "exit stub group for exit %d uninitialized", exitno); return (char *)group[exitno / EXITSTUBS_PER_GROUP] + EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); } diff --git a/source/libs/luajit/LuaJIT-src/src/lj_target_arm.h b/source/libs/luajit/LuaJIT-src/src/lj_target_arm.h index 5551b1f1cef9b63d290d66c5811e8873bb66ac01..d0bbc5a5fbe8211da595a9b95acaa3899da56f38 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_target_arm.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_target_arm.h @@ -1,6 +1,6 @@ /* ** Definitions for ARM CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_ARM_H @@ -211,6 +211,7 @@ typedef enum ARMIns { /* ARMv6T2 */ ARMI_MOVW = 0xe3000000, ARMI_MOVT = 0xe3400000, + ARMI_BFI = 0xe7c00010, /* VFP */ ARMI_VMOV_D = 0xeeb00b40, diff --git a/source/libs/luajit/LuaJIT-src/src/lj_target_arm64.h b/source/libs/luajit/LuaJIT-src/src/lj_target_arm64.h index 520023ae218c19a6f96f392d9f08dda349d7eeed..92741871178de9f9b1a6877eacb1bcb9794f5f0c 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_target_arm64.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_target_arm64.h @@ -1,6 +1,6 @@ /* ** Definitions for ARM64 CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_ARM64_H @@ -31,6 +31,8 @@ enum { /* Calling conventions. */ RID_RET = RID_X0, + RID_RETLO = RID_X0, + RID_RETHI = RID_X1, RID_FPRET = RID_D0, /* These definitions must match with the *.dasc file(s): */ @@ -132,9 +134,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) #define A64F_IMMR(x) ((x) << 16) #define A64F_U16(x) ((x) << 5) #define A64F_U12(x) ((x) << 10) -#define A64F_S26(x) (x) +#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu)) #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) -#define A64F_S14(x) ((x) << 5) +#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5) #define A64F_S9(x) ((x) << 12) #define A64F_BIT(x) ((x) << 19) #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) @@ -145,6 +147,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) #define A64F_LSL16(x) (((x) / 16) << 21) #define A64F_BSH(sh) ((sh) << 10) +/* Check for valid field range. */ +#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) + typedef enum A64Ins { A64I_S = 0x20000000, A64I_X = 0x80000000, @@ -207,6 +212,8 @@ typedef enum A64Ins { A64I_EXTRw = 0x13800000, A64I_EXTRx = 0x93c00000, + A64I_BFMw = 0x33000000, + A64I_BFMx = 0xb3400000, A64I_SBFMw = 0x13000000, A64I_SBFMx = 0x93400000, A64I_SXTBw = 0x13001c00, @@ -227,6 +234,8 @@ typedef enum A64Ins { A64I_MOVZx = 0xd2800000, A64I_MOVNw = 0x12800000, A64I_MOVNx = 0x92800000, + A64I_ADR = 0x10000000, + A64I_ADRP = 0x90000000, A64I_LDRB = 0x39400000, A64I_LDRH = 0x79400000, @@ -253,6 +262,9 @@ typedef enum A64Ins { A64I_CBZ = 0x34000000, A64I_CBNZ = 0x35000000, + A64I_BRAAZ = 0xd61f081f, + A64I_BLRAAZ = 0xd63f081f, + A64I_NOP = 0xd503201f, /* FP */ @@ -269,6 +281,7 @@ typedef enum A64Ins { A64I_FSQRTd = 0x1e61c000, A64I_LDRs = 0xbd400000, A64I_LDRd = 0xfd400000, + A64I_LDRLd = 0x5c000000, A64I_STRs = 0xbd000000, A64I_STRd = 0xfd000000, A64I_LDPs = 0x2d400000, @@ -308,8 +321,12 @@ typedef enum A64Ins { A64I_FMOV_R_D = 0x9e660000, A64I_FMOV_D_R = 0x9e670000, A64I_FMOV_DI = 0x1e601000, + A64I_MOVI_DI = 0x2f000400, } A64Ins; +#define A64I_BR_AUTH (LJ_ABI_PAUTH ? A64I_BRAAZ : A64I_BR) +#define A64I_BLR_AUTH (LJ_ABI_PAUTH ? A64I_BLRAAZ : A64I_BLR) + typedef enum A64Shift { A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR } A64Shift; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_target_mips.h b/source/libs/luajit/LuaJIT-src/src/lj_target_mips.h index 740687b355d020a21229806a9d0d13c4c40255fa..38ed226a4dc022e4f13fd5dd504fd12046b27693 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_target_mips.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_target_mips.h @@ -1,6 +1,6 @@ /* ** Definitions for MIPS CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_MIPS_H @@ -223,6 +223,8 @@ typedef enum MIPSIns { MIPSI_ADDIU = 0x24000000, MIPSI_SUB = 0x00000022, MIPSI_SUBU = 0x00000023, + +#if !LJ_TARGET_MIPSR6 MIPSI_MUL = 0x70000002, MIPSI_DIV = 0x0000001a, MIPSI_DIVU = 0x0000001b, @@ -232,6 +234,15 @@ typedef enum MIPSIns { MIPSI_MFHI = 0x00000010, MIPSI_MFLO = 0x00000012, MIPSI_MULT = 0x00000018, +#else + MIPSI_MUL = 0x00000098, + MIPSI_MUH = 0x000000d8, + MIPSI_DIV = 0x0000009a, + MIPSI_DIVU = 0x0000009b, + + MIPSI_SELEQZ = 0x00000035, + MIPSI_SELNEZ = 0x00000037, +#endif MIPSI_SLL = 0x00000000, MIPSI_SRL = 0x00000002, @@ -245,6 +256,8 @@ typedef enum MIPSIns { MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ MIPSI_DROTRV = 0x00000056, + MIPSI_INS = 0x7c000004, /* MIPSXXR2 */ + MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ @@ -253,8 +266,13 @@ typedef enum MIPSIns { MIPSI_B = 0x10000000, MIPSI_J = 0x08000000, MIPSI_JAL = 0x0c000000, +#if !LJ_TARGET_MIPSR6 MIPSI_JALX = 0x74000000, MIPSI_JR = 0x00000008, +#else + MIPSI_JR = 0x00000009, + MIPSI_BALC = 0xe8000000, +#endif MIPSI_JALR = 0x0000f809, MIPSI_BEQ = 0x10000000, @@ -282,15 +300,23 @@ typedef enum MIPSIns { /* MIPS64 instructions. */ MIPSI_DADD = 0x0000002c, - MIPSI_DADDI = 0x60000000, MIPSI_DADDU = 0x0000002d, MIPSI_DADDIU = 0x64000000, MIPSI_DSUB = 0x0000002e, MIPSI_DSUBU = 0x0000002f, +#if !LJ_TARGET_MIPSR6 MIPSI_DDIV = 0x0000001e, MIPSI_DDIVU = 0x0000001f, MIPSI_DMULT = 0x0000001c, MIPSI_DMULTU = 0x0000001d, +#else + MIPSI_DDIV = 0x0000009e, + MIPSI_DMOD = 0x000000de, + MIPSI_DDIVU = 0x0000009f, + MIPSI_DMODU = 0x000000df, + MIPSI_DMUL = 0x0000009c, + MIPSI_DMUH = 0x000000dc, +#endif MIPSI_DSLL = 0x00000038, MIPSI_DSRL = 0x0000003a, @@ -308,6 +334,11 @@ typedef enum MIPSIns { MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, +#if LJ_TARGET_MIPSR6 + MIPSI_LSA = 0x00000005, + MIPSI_DLSA = 0x00000015, + MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA, +#endif /* Extract/insert instructions. */ MIPSI_DEXTM = 0x7c000001, @@ -317,18 +348,19 @@ typedef enum MIPSIns { MIPSI_DINSU = 0x7c000006, MIPSI_DINS = 0x7c000007, - MIPSI_RINT_D = 0x4620001a, - MIPSI_RINT_S = 0x4600001a, - MIPSI_RINT = 0x4400001a, MIPSI_FLOOR_D = 0x4620000b, - MIPSI_CEIL_D = 0x4620000a, - MIPSI_ROUND_D = 0x46200008, /* FP instructions. */ MIPSI_MOV_S = 0x46000006, MIPSI_MOV_D = 0x46200006, +#if !LJ_TARGET_MIPSR6 MIPSI_MOVT_D = 0x46210011, MIPSI_MOVF_D = 0x46200011, +#else + MIPSI_MIN_D = 0x4620001C, + MIPSI_MAX_D = 0x4620001E, + MIPSI_SEL_D = 0x46200010, +#endif MIPSI_ABS_D = 0x46200005, MIPSI_NEG_D = 0x46200007, @@ -363,15 +395,23 @@ typedef enum MIPSIns { MIPSI_DMTC1 = 0x44a00000, MIPSI_DMFC1 = 0x44200000, +#if !LJ_TARGET_MIPSR6 MIPSI_BC1F = 0x45000000, MIPSI_BC1T = 0x45010000, - MIPSI_C_EQ_D = 0x46200032, MIPSI_C_OLT_S = 0x46000034, MIPSI_C_OLT_D = 0x46200034, MIPSI_C_ULT_D = 0x46200035, MIPSI_C_OLE_D = 0x46200036, MIPSI_C_ULE_D = 0x46200037, +#else + MIPSI_BC1EQZ = 0x45200000, + MIPSI_BC1NEZ = 0x45a00000, + MIPSI_CMP_EQ_D = 0x46a00002, + MIPSI_CMP_LT_S = 0x46800004, + MIPSI_CMP_LT_D = 0x46a00004, +#endif + } MIPSIns; #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_target_ppc.h b/source/libs/luajit/LuaJIT-src/src/lj_target_ppc.h index c5c991a377af7650de28481af2abdc2ab05322a9..5a1b5a7ccab77808b45056c9998adb181e12e5c3 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_target_ppc.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_target_ppc.h @@ -1,6 +1,6 @@ /* ** Definitions for PPC CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_PPC_H diff --git a/source/libs/luajit/LuaJIT-src/src/lj_target_x86.h b/source/libs/luajit/LuaJIT-src/src/lj_target_x86.h index 356f792459bfdaf696a844fc909aa6f7cb781342..6a528e82884851376a04ba14d703afe45a0ce33e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_target_x86.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_target_x86.h @@ -1,6 +1,6 @@ /* ** Definitions for x86 and x64 CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TARGET_X86_H @@ -38,10 +38,9 @@ enum { RID_RET = RID_EAX, #if LJ_64 RID_FPRET = RID_XMM0, -#else +#endif RID_RETLO = RID_EAX, RID_RETHI = RID_EDX, -#endif /* These definitions must match with the *.dasc file(s): */ RID_BASE = RID_EDX, /* Interpreter BASE. */ @@ -117,8 +116,8 @@ enum { #if LJ_64 /* Prefer the low 8 regs of each type to reduce REX prefixes. */ -#undef rset_picktop -#define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) +#undef rset_picktop_ +#define rset_picktop_(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) #endif /* -- Spill slots --------------------------------------------------------- */ @@ -165,6 +164,8 @@ typedef struct { #define EXITSTUB_SPACING (2+2) #define EXITSTUBS_PER_GROUP 32 +#define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */ + /* -- x86 ModRM operand encoding ------------------------------------------ */ typedef enum { @@ -228,16 +229,10 @@ typedef enum { /* Note: little-endian byte-order! */ XI_FLDZ = 0xeed9, XI_FLD1 = 0xe8d9, - XI_FLDLG2 = 0xecd9, - XI_FLDLN2 = 0xedd9, XI_FDUP = 0xc0d9, /* Really fld st0. */ XI_FPOP = 0xd8dd, /* Really fstp st0. */ XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ XI_FRNDINT = 0xfcd9, - XI_FSIN = 0xfed9, - XI_FCOS = 0xffd9, - XI_FPTAN = 0xf2d9, - XI_FPATAN = 0xf3d9, XI_FSCALE = 0xfdd9, XI_FYL2X = 0xf1d9, diff --git a/source/libs/luajit/LuaJIT-src/src/lj_trace.c b/source/libs/luajit/LuaJIT-src/src/lj_trace.c index d85b47f8013e1529c59944c402e4a37bc0640e87..144461b36f362982f5dc59c926415624b60dae56 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_trace.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_trace.c @@ -1,6 +1,6 @@ /* ** Trace management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_trace_c @@ -30,6 +30,7 @@ #include "lj_vm.h" #include "lj_vmevent.h" #include "lj_target.h" +#include "lj_prng.h" /* -- Error handling ------------------------------------------------------ */ @@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T) name++; else name = "(string)"; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); + lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, + "trace PC out of range"); lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); if (!fp) { char fname[40]; @@ -151,6 +153,9 @@ static void trace_save(jit_State *J, GCtrace *T) newwhite(J2G(J), T); T->gct = ~LJ_TTRACE; T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */ +#if LJ_ABI_PAUTH + T->mcauth = lj_ptr_sign((ASMFunction)T->mcode, T); +#endif p += szins; TRACE_APPENDVEC(snap, nsnap, SnapShot) TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) @@ -183,7 +188,7 @@ void lj_trace_reenableproto(GCproto *pt) { if ((pt->flags & PROTO_ILOOP)) { BCIns *bc = proto_bc(pt); - BCPos i, sizebc = pt->sizebc;; + BCPos i, sizebc = pt->sizebc; pt->flags &= ~PROTO_ILOOP; if (bc_op(bc[0]) == BC_IFUNCF) setbc_op(&bc[0], BC_FUNCF); @@ -205,27 +210,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T) return; /* No need to unpatch branches in parent traces (yet). */ switch (bc_op(*pc)) { case BC_JFORL: - lua_assert(traceref(J, bc_d(*pc)) == T); + lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace"); *pc = T->startins; pc += bc_j(T->startins); - lua_assert(bc_op(*pc) == BC_JFORI); + lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI"); setbc_op(pc, BC_FORI); break; case BC_JITERL: case BC_JLOOP: - lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op)); + lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP || + bc_isret(op), "bad original bytecode %d", op); *pc = T->startins; break; case BC_JMP: - lua_assert(op == BC_ITERL); + lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op); pc += bc_j(*pc)+2; if (bc_op(*pc) == BC_JITERL) { - lua_assert(traceref(J, bc_d(*pc)) == T); + lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace"); *pc = T->startins; } break; case BC_JFUNCF: - lua_assert(op == BC_FUNCF); + lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op); *pc = T->startins; break; default: /* Already unpatched. */ @@ -237,7 +243,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T) static void trace_flushroot(jit_State *J, GCtrace *T) { GCproto *pt = &gcref(T->startpt)->pt; - lua_assert(T->root == 0 && pt != NULL); + lj_assertJ(T->root == 0, "not a root trace"); + lj_assertJ(pt != NULL, "trace has no prototype"); /* First unpatch any modified bytecode. */ trace_unpatch(J, T); /* Unlink root trace from chain anchored in prototype. */ @@ -353,7 +360,8 @@ void lj_trace_freestate(global_State *g) { /* This assumes all traces have already been freed. */ ptrdiff_t i; for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) - lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); + lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL, + "trace still allocated"); } #endif lj_mcode_free(J); @@ -368,8 +376,13 @@ void lj_trace_freestate(global_State *g) /* Blacklist a bytecode instruction. */ static void blacklist_pc(GCproto *pt, BCIns *pc) { - setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); - pt->flags |= PROTO_ILOOP; + if (bc_op(*pc) == BC_ITERN) { + setbc_op(pc, BC_ITERC); + setbc_op(pc+1+bc_j(pc[1]), BC_JMP); + } else { + setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); + pt->flags |= PROTO_ILOOP; + } } /* Penalize a bytecode instruction. */ @@ -380,7 +393,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ /* First try to bump its hotcount several times. */ val = ((uint32_t)J->penalty[i].val << 1) + - LJ_PRNG_BITS(J, PENALTY_RNDBITS); + (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1)); if (val > PENALTY_MAX) { blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ return; @@ -406,10 +419,11 @@ static void trace_start(jit_State *J) TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ - if (J->parent == 0 && J->exitno == 0) { + if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) { /* Lazy bytecode patching to disable hotcount events. */ - lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || - bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); + lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || + bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, + "bad hot bytecode %d", bc_op(*J->pc)); setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); J->pt->flags |= PROTO_ILOOP; } @@ -417,10 +431,17 @@ static void trace_start(jit_State *J) return; } + /* Ensuring forward progress for BC_ITERN can trigger hotcount again. */ + if (!J->parent && bc_op(*J->pc) == BC_JLOOP) { /* Already compiled. */ + J->state = LJ_TRACE_IDLE; /* Silently ignored. */ + return; + } + /* Get a new trace number. */ traceno = trace_findfree(J); if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ - lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); + lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0, + "recorder called from GC hook"); lj_trace_flushall(J->L); J->state = LJ_TRACE_IDLE; /* Silently ignored. */ return; @@ -489,6 +510,7 @@ static void trace_stop(jit_State *J) J->cur.nextroot = pt->trace; pt->trace = (TraceNo1)traceno; break; + case BC_ITERN: case BC_RET: case BC_RET0: case BC_RET1: @@ -496,10 +518,14 @@ static void trace_stop(jit_State *J) goto addroot; case BC_JMP: /* Patch exit branch in parent to side trace entry. */ - lua_assert(J->parent != 0 && J->cur.root != 0); + lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace"); lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ - traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE; + { + SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; + snap->count = SNAPCOUNT_DONE; + if (J->cur.topslot > snap->topslot) snap->topslot = J->cur.topslot; + } /* Add to side trace chain in root trace. */ { GCtrace *root = traceref(J, J->cur.root); @@ -515,7 +541,7 @@ static void trace_stop(jit_State *J) traceref(J, J->exitno)->link = traceno; break; default: - lua_assert(0); + lj_assertJ(0, "bad stop bytecode %d", op); break; } @@ -536,8 +562,8 @@ static void trace_stop(jit_State *J) static int trace_downrec(jit_State *J) { /* Restart recording at the return instruction. */ - lua_assert(J->pt != NULL); - lua_assert(bc_isret(bc_op(*J->pc))); + lj_assertJ(J->pt != NULL, "no active prototype"); + lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode"); if (bc_op(*J->pc) == BC_RETM) return 0; /* NYI: down-recursion with RETM. */ J->parent = 0; @@ -587,21 +613,27 @@ static int trace_abort(jit_State *J) J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; lj_vmevent_send(L, TRACE, - TValue *frame; + cTValue *bot = tvref(L->stack)+LJ_FR2; + cTValue *frame; const BCIns *pc; - GCfunc *fn; + BCPos pos = 0; setstrV(L, L->top++, lj_str_newlit(L, "abort")); setintV(L->top++, traceno); /* Find original Lua function call to generate a better error message. */ - frame = J->L->base-1; - pc = J->pc; - while (!isluafunc(frame_func(frame))) { - pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1; - frame = frame_prev(frame); + for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { + if (isluafunc(frame_func(frame))) { + pos = proto_bcpos(funcproto(frame_func(frame)), pc); + break; + } else if (frame_prev(frame) <= bot) { + break; + } else if (frame_iscont(frame)) { + pc = frame_contpc(frame) - 1; + } else { + pc = frame_pc(frame) - 1; + } } - fn = frame_func(frame); - setfuncV(L, L->top++, fn); - setintV(L->top++, proto_bcpos(funcproto(fn), pc)); + setfuncV(L, L->top++, frame_func(frame)); + setintV(L->top++, pos); copyTV(L, L->top++, restorestack(L, errobj)); copyTV(L, L->top++, &J->errinfo); ); @@ -644,15 +676,22 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ trace_start(J); lj_dispatch_update(J2G(J)); - break; + if (J->state != LJ_TRACE_RECORD_1ST) + break; + /* fallthrough */ + case LJ_TRACE_RECORD_1ST: + J->state = LJ_TRACE_RECORD; + /* fallthrough */ case LJ_TRACE_RECORD: trace_pendpatch(J, 0); setvmstate(J2G(J), RECORD); lj_vmevent_send_(L, RECORD, - /* Save/restore tmptv state for trace recorder. */ + /* Save/restore state for trace recorder. */ TValue savetv = J2G(J)->tmptv; TValue savetv2 = J2G(J)->tmptv2; + TraceNo parent = J->parent; + ExitNo exitno = J->exitno; setintV(L->top++, J->cur.traceno); setfuncV(L, L->top++, J->fn); setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); @@ -660,6 +699,8 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) , J2G(J)->tmptv = savetv; J2G(J)->tmptv2 = savetv2; + J->parent = parent; + J->exitno = exitno; ); lj_record_ins(J); break; @@ -750,7 +791,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc) isluafunc(curr_func(J->L)) && snap->count != SNAPCOUNT_DONE && ++snap->count >= J->param[JIT_P_hotexit]) { - lua_assert(J->state == LJ_TRACE_IDLE); + lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording"); /* J->parent is non-zero for a side trace. */ J->state = LJ_TRACE_START; lj_trace_ins(J, pc); @@ -782,7 +823,9 @@ typedef struct ExitDataCP { static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) { ExitDataCP *exd = (ExitDataCP *)ud; - cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ + /* Always catch error here and don't call error function. */ + cframe_errfunc(L->cframe) = 0; + cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue); exd->pc = lj_snap_restore(exd->J, exd->exptr); UNUSED(dummy); return NULL; @@ -812,7 +855,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex) } #endif -#ifdef EXITSTATE_PCREG +#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE) /* Determine trace number from pc of exit instruction. */ static TraceNo trace_exit_find(jit_State *J, MCode *pc) { @@ -822,7 +865,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc) if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) return traceno; } - lua_assert(0); + lj_assertJ(0, "bad exit pc"); return 0; } #endif @@ -834,29 +877,39 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) lua_State *L = J->L; ExitState *ex = (ExitState *)exptr; ExitDataCP exd; - int errcode; - const BCIns *pc; + int errcode, exitcode = J->exitcode; + TValue exiterr; + const BCIns *pc, *retpc; void *cf; GCtrace *T; + + setnilV(&exiterr); + if (exitcode) { /* Trace unwound with error code. */ + J->exitcode = 0; + copyTV(L, &exiterr, L->top-1); + } + #ifdef EXITSTATE_PCREG J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); #endif T = traceref(J, J->parent); UNUSED(T); #ifdef EXITSTATE_CHECKEXIT if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ - lua_assert(T->root != 0); + lj_assertJ(T->root != 0, "stack check in root trace"); J->exitno = T->ir[REF_BASE].op2; J->parent = T->ir[REF_BASE].op1; T = traceref(J, J->parent); } #endif - lua_assert(T != NULL && J->exitno < T->nsnap); + lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number"); exd.J = J; exd.exptr = exptr; errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); if (errcode) return -errcode; /* Return negated error code. */ + if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ + if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) lj_vmevent_send(L, TEXIT, lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); @@ -868,29 +921,17 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) pc = exd.pc; cf = cframe_raw(L->cframe); setcframe_pc(cf, pc); - if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { + if (exitcode) { + return -exitcode; + } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { /* Just exit to interpreter. */ } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { if (!(G(L)->hookmask & HOOK_GC)) lj_gc_step(L); /* Exited because of GC: drive GC forward. */ - } else { + } else if ((J->flags & JIT_F_ON)) { trace_hotside(J, pc); } - if (bc_op(*pc) == BC_JLOOP) { - BCIns *retpc = &traceref(J, bc_d(*pc))->startins; - if (bc_isret(bc_op(*retpc))) { - if (J->state == LJ_TRACE_RECORD) { - J->patchins = *pc; - J->patchpc = (BCIns *)pc; - *J->patchpc = *retpc; - J->bcskip = 1; - } else { - pc = retpc; - setcframe_pc(cf, pc); - } - } - } - /* Return MULTRES or 0. */ + /* Return MULTRES or 0 or -17. */ ERRNO_RESTORE switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: @@ -899,6 +940,18 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); case BC_TSETM: return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc)); + case BC_JLOOP: + retpc = &traceref(J, bc_d(*pc))->startins; + if (bc_isret(bc_op(*retpc)) || bc_op(*retpc) == BC_ITERN) { + /* Dispatch to original ins to ensure forward progress. */ + if (J->state != LJ_TRACE_RECORD) return -17; + /* Unpatch bytecode when recording. */ + J->patchins = *pc; + J->patchpc = (BCIns *)pc; + *J->patchpc = *retpc; + J->bcskip = 1; + } + return 0; default: if (bc_op(*pc) >= BC_FUNCF) return (int)((BCReg)(L->top - L->base) + 1); @@ -906,4 +959,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } } +#if LJ_UNWIND_JIT +/* Given an mcode address determine trace exit address for unwinding. */ +uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep) +{ +#if EXITTRACE_VMSTATE + TraceNo traceno = J2G(J)->vmstate; +#else + TraceNo traceno = trace_exit_find(J, (MCode *)addr); +#endif + GCtrace *T = traceref(J, traceno); + if (T +#if EXITTRACE_VMSTATE + && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode +#endif + ) { + SnapShot *snap = T->snap; + SnapNo lo = 0, exitno = T->nsnap; + uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */ + /* Rightmost binary search for mcode offset to determine exit number. */ + do { + SnapNo mid = (lo+exitno) >> 1; + if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1; + } while (lo < exitno); + exitno--; + *ep = exitno; +#ifdef EXITSTUBS_PER_GROUP + return (uintptr_t)exitstub_addr(J, exitno); +#else + return (uintptr_t)exitstub_trace_addr(T, exitno); +#endif + } + /* Cannot correlate addr with trace/exit. This will be fatal. */ + lj_assertJ(0, "bad exit pc"); + return 0; +} +#endif + #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_trace.h b/source/libs/luajit/LuaJIT-src/src/lj_trace.h index 22cae741f3f818426eec4186af102b68786c389c..ff2c7616e08c7d5da7b7a3ec49215caf757fc2a9 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_trace.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_trace.h @@ -1,6 +1,6 @@ /* ** Trace management. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_TRACE_H @@ -37,6 +37,9 @@ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); +#if LJ_UNWIND_EXT +LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep); +#endif /* Signal asynchronous abort of trace or end of trace. */ #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) diff --git a/source/libs/luajit/LuaJIT-src/src/lj_traceerr.h b/source/libs/luajit/LuaJIT-src/src/lj_traceerr.h index 1363c4f327247d33cf254ea05a42880ec301492e..d283a9de61da7ca489938089e85358e12156582e 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_traceerr.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_traceerr.h @@ -1,6 +1,6 @@ /* ** Trace compiler error messages. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ /* This file may be included multiple times with different TREDEF macros. */ @@ -13,7 +13,7 @@ TREDEF(STACKOV, "trace too deep") TREDEF(SNAPOV, "too many snapshots") TREDEF(BLACKL, "blacklisted") TREDEF(RETRY, "retry recording") -TREDEF(NYIBC, "NYI: bytecode %d") +TREDEF(NYIBC, "NYI: bytecode %s") /* Recording loop ops. */ TREDEF(LLEAVE, "leaving loop in root trace") diff --git a/source/libs/luajit/LuaJIT-src/src/lj_udata.c b/source/libs/luajit/LuaJIT-src/src/lj_udata.c index bd0321b834bc29c556e66867e61a3c411c96b184..1e75be8db92827e24d8484dac44e7fb749f8b9ca 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_udata.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_udata.c @@ -1,6 +1,6 @@ /* ** Userdata handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_udata_c @@ -8,6 +8,7 @@ #include "lj_obj.h" #include "lj_gc.h" +#include "lj_err.h" #include "lj_udata.h" GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) @@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud) lj_mem_free(g, ud, sizeudata(ud)); } +#if LJ_64 +void *lj_lightud_intern(lua_State *L, void *p) +{ + global_State *g = G(L); + uint64_t u = (uint64_t)p; + uint32_t up = lightudup(u); + uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); + MSize segnum = g->gc.lightudnum; + if (segmap) { + MSize seg; + for (seg = 0; seg <= segnum; seg++) + if (segmap[seg] == up) /* Fast path. */ + return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); + segnum++; + /* Leave last segment unused to avoid clash with ITERN key. */ + if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU); + } + if (!((segnum-1) & segnum) && segnum != 1) { + lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t); + setmref(g->gc.lightudseg, segmap); + } + g->gc.lightudnum = segnum; + segmap[segnum] = up; + return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); +} +#endif + diff --git a/source/libs/luajit/LuaJIT-src/src/lj_udata.h b/source/libs/luajit/LuaJIT-src/src/lj_udata.h index f271a42d327c4f2112b00e46828b39f0da950758..d6bab237d7f2a15b79eca068dd3986562f9770f6 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_udata.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_udata.h @@ -1,6 +1,6 @@ /* ** Userdata handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_UDATA_H @@ -10,5 +10,8 @@ LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); +#if LJ_64 +LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p); +#endif #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_vm.h b/source/libs/luajit/LuaJIT-src/src/lj_vm.h index 1cc7eed782f0d1d1e98332c51dc2687d2e96091a..9cc42613d318068aa9dda91e8348cb64ff808602 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_vm.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_vm.h @@ -1,6 +1,6 @@ /* ** Assembler VM interface definitions. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_VM_H @@ -26,6 +26,9 @@ LJ_ASMF void lj_vm_unwind_ff_eh(void); #if LJ_TARGET_X86ORX64 LJ_ASMF void lj_vm_unwind_rethrow(void); #endif +#if LJ_TARGET_MIPS +LJ_ASMF void lj_vm_unwind_stub(void); +#endif /* Miscellaneous functions. */ #if LJ_TARGET_X86ORX64 @@ -48,10 +51,11 @@ LJ_ASMF void lj_vm_inshook(void); LJ_ASMF void lj_vm_rethook(void); LJ_ASMF void lj_vm_callhook(void); LJ_ASMF void lj_vm_profhook(void); +LJ_ASMF void lj_vm_IITERN(void); /* Trace exit handling. */ -LJ_ASMF void lj_vm_exit_handler(void); -LJ_ASMF void lj_vm_exit_interp(void); +LJ_ASMF char lj_vm_exit_handler[]; +LJ_ASMF char lj_vm_exit_interp[]; /* Internal math helper functions. */ #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) @@ -79,10 +83,6 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); LJ_ASMF void lj_vm_floor_sse(void); LJ_ASMF void lj_vm_ceil_sse(void); LJ_ASMF void lj_vm_trunc_sse(void); -LJ_ASMF void lj_vm_powi_sse(void); -#define lj_vm_powi NULL -#else -LJ_ASMF double lj_vm_powi(double, int32_t); #endif #if LJ_TARGET_PPC || LJ_TARGET_ARM64 #define lj_vm_trunc trunc @@ -92,14 +92,10 @@ LJ_ASMF double lj_vm_trunc(double); LJ_ASMF double lj_vm_trunc_sf(double); #endif #endif -#ifdef LUAJIT_NO_EXP2 -LJ_ASMF double lj_vm_exp2(double); -#else -#define lj_vm_exp2 exp2 -#endif #if LJ_HASFFI LJ_ASMF int lj_vm_errno(void); #endif +LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); #endif /* Continuations for metamethods. */ @@ -115,6 +111,6 @@ LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */ LJ_ASMF char lj_vm_asm_begin[]; /* Bytecode offsets are relative to lj_vm_asm_begin. */ -#define makeasmfunc(ofs) ((ASMFunction)(lj_vm_asm_begin + (ofs))) +#define makeasmfunc(ofs) lj_ptr_sign((ASMFunction)(lj_vm_asm_begin + (ofs)), 0) #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lj_vmevent.c b/source/libs/luajit/LuaJIT-src/src/lj_vmevent.c index 86640804bed70feb397d7e6c2e9848cd87d7b3be..070c6144aab43c659829f53172c5042d45954206 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_vmevent.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_vmevent.c @@ -1,6 +1,6 @@ /* ** VM event handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #include <stdio.h> diff --git a/source/libs/luajit/LuaJIT-src/src/lj_vmevent.h b/source/libs/luajit/LuaJIT-src/src/lj_vmevent.h index 050fb4dd24a4860e4173002183e2c0eec15cb389..8a99536068fd935cf8049dd6d69a8aebddefa326 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_vmevent.h +++ b/source/libs/luajit/LuaJIT-src/src/lj_vmevent.h @@ -1,6 +1,6 @@ /* ** VM event handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LJ_VMEVENT_H @@ -24,9 +24,10 @@ /* VM event IDs. */ typedef enum { VMEVENT_DEF(BC, 0x00003883), - VMEVENT_DEF(TRACE, 0xb2d91467), - VMEVENT_DEF(RECORD, 0x9284bf4f), - VMEVENT_DEF(TEXIT, 0xb29df2b0), + VMEVENT_DEF(TRACE, 0x12d91467), + VMEVENT_DEF(RECORD, 0x1284bf4f), + VMEVENT_DEF(TEXIT, 0x129df2b0), + VMEVENT_DEF(ERRFIN, 0x12d93888), LJ_VMEVENT__MAX } VMEvent; diff --git a/source/libs/luajit/LuaJIT-src/src/lj_vmmath.c b/source/libs/luajit/LuaJIT-src/src/lj_vmmath.c index b231d3e811e33a033155f8e2607098cc9d8d3b6b..2c9b96cce4c2d4589ac48208c919a7b1fb91b743 100644 --- a/source/libs/luajit/LuaJIT-src/src/lj_vmmath.c +++ b/source/libs/luajit/LuaJIT-src/src/lj_vmmath.c @@ -1,6 +1,6 @@ /* ** Math helper functions for assembler VM. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define lj_vmmath_c @@ -34,7 +34,18 @@ LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } #endif -/* -- Helper functions for generated machine code ------------------------- */ +/* -- Helper functions ---------------------------------------------------- */ + +/* Required to prevent the C compiler from applying FMA optimizations. +** +** Yes, there's -ffp-contract and the FP_CONTRACT pragma ... in theory. +** But the current state of C compilers is a mess in this regard. +** Also, this function is not performance sensitive at all. +*/ +LJ_NOINLINE static double lj_vm_floormul(double x, double y) +{ + return lj_vm_floor(x / y) * y; +} double lj_vm_foldarith(double x, double y, int op) { @@ -43,30 +54,32 @@ double lj_vm_foldarith(double x, double y, int op) case IR_SUB - IR_ADD: return x-y; break; case IR_MUL - IR_ADD: return x*y; break; case IR_DIV - IR_ADD: return x/y; break; - case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; + case IR_MOD - IR_ADD: return x-lj_vm_floormul(x, y); break; case IR_POW - IR_ADD: return pow(x, y); break; case IR_NEG - IR_ADD: return -x; break; case IR_ABS - IR_ADD: return fabs(x); break; #if LJ_HASJIT - case IR_ATAN2 - IR_ADD: return atan2(x, y); break; case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; - case IR_MIN - IR_ADD: return x > y ? y : x; break; - case IR_MAX - IR_ADD: return x < y ? y : x; break; + case IR_MIN - IR_ADD: return x < y ? x : y; break; + case IR_MAX - IR_ADD: return x > y ? x : y; break; #endif default: return x; } } +/* -- Helper functions for generated machine code ------------------------- */ + #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; - lua_assert(b != 0); /* This must be checked before using this function. */ - ua = a < 0 ? (uint32_t)-a : (uint32_t)a; - ub = b < 0 ? (uint32_t)-b : (uint32_t)b; + /* This must be checked before using this function. */ + lj_assertX(b != 0, "modulo with zero divisor"); + ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a; + ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b; y = ua % ub; if (y != 0 && (a^b) < 0) y = y - ub; - if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y; + if (((int32_t)y^b) < 0) y = ~y+1u; return (int32_t)y; } #endif @@ -80,47 +93,6 @@ double lj_vm_log2(double a) } #endif -#ifdef LUAJIT_NO_EXP2 -double lj_vm_exp2(double a) -{ - return exp(a * 0.6931471805599453); -} -#endif - -#if !LJ_TARGET_X86ORX64 -/* Unsigned x^k. */ -static double lj_vm_powui(double x, uint32_t k) -{ - double y; - lua_assert(k != 0); - for (; (k & 1) == 0; k >>= 1) x *= x; - y = x; - if ((k >>= 1) != 0) { - for (;;) { - x *= x; - if (k == 1) break; - if (k & 1) y *= x; - k >>= 1; - } - y *= x; - } - return y; -} - -/* Signed x^k. */ -double lj_vm_powi(double x, int32_t k) -{ - if (k > 1) - return lj_vm_powui(x, (uint32_t)k); - else if (k == 1) - return x; - else if (k == 0) - return 1.0; - else - return 1.0 / lj_vm_powui(x, (uint32_t)-k); -} -#endif - /* Computes fpm(x) for extended math functions. */ double lj_vm_foldfpm(double x, int fpm) { @@ -129,15 +101,9 @@ double lj_vm_foldfpm(double x, int fpm) case IRFPM_CEIL: return lj_vm_ceil(x); case IRFPM_TRUNC: return lj_vm_trunc(x); case IRFPM_SQRT: return sqrt(x); - case IRFPM_EXP: return exp(x); - case IRFPM_EXP2: return lj_vm_exp2(x); case IRFPM_LOG: return log(x); case IRFPM_LOG2: return lj_vm_log2(x); - case IRFPM_LOG10: return log10(x); - case IRFPM_SIN: return sin(x); - case IRFPM_COS: return cos(x); - case IRFPM_TAN: return tan(x); - default: lua_assert(0); + default: lj_assertX(0, "bad fpm %d", fpm); } return 0; } diff --git a/source/libs/luajit/LuaJIT-src/src/ljamalg.c b/source/libs/luajit/LuaJIT-src/src/ljamalg.c index f1f28623fa9874a251e263f833c689294d5f0ad5..26601d8cd6f8ed533afd590349ae573c144aa174 100644 --- a/source/libs/luajit/LuaJIT-src/src/ljamalg.c +++ b/source/libs/luajit/LuaJIT-src/src/ljamalg.c @@ -1,16 +1,6 @@ /* ** LuaJIT core and libraries amalgamation. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -+--------------------------------------------------------------------------+ -| WARNING: Compiling the amalgamation needs a lot of virtual memory | -| (around 300 MB with GCC 4.x)! If you don't have enough physical memory | -| your machine will start swapping to disk and the compile will not finish | -| within a reasonable amount of time. | -| So either compile on a bigger machine or use the non-amalgamated build. | -+--------------------------------------------------------------------------+ +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #define ljamalg_c @@ -28,6 +18,7 @@ #include "lua.h" #include "lauxlib.h" +#include "lj_assert.c" #include "lj_gc.c" #include "lj_err.c" #include "lj_char.c" @@ -40,6 +31,7 @@ #include "lj_udata.c" #include "lj_meta.c" #include "lj_debug.c" +#include "lj_prng.c" #include "lj_state.c" #include "lj_dispatch.c" #include "lj_vmevent.c" @@ -47,6 +39,7 @@ #include "lj_strscan.c" #include "lj_strfmt.c" #include "lj_strfmt_num.c" +#include "lj_serialize.c" #include "lj_api.c" #include "lj_profile.c" #include "lj_lex.c" @@ -93,5 +86,6 @@ #include "lib_bit.c" #include "lib_jit.c" #include "lib_ffi.c" +#include "lib_buffer.c" #include "lib_init.c" diff --git a/source/libs/luajit/LuaJIT-src/src/lua.h b/source/libs/luajit/LuaJIT-src/src/lua.h index b918366cbdf0f034a7abab6a1bfc072cf62f0b7a..ceca315aac48c9df23f815884fff7a27489d9e21 100644 --- a/source/libs/luajit/LuaJIT-src/src/lua.h +++ b/source/libs/luajit/LuaJIT-src/src/lua.h @@ -1,7 +1,7 @@ /* ** $Id: lua.h,v 1.218.1.5 2008/08/06 13:30:12 roberto Exp $ ** Lua - An Extensible Extension Language -** Lua.org, PUC-Rio, Brazil (http://www.lua.org) +** Lua.org, PUC-Rio, Brazil (https://www.lua.org) ** See Copyright Notice at the end of this file */ @@ -103,9 +103,11 @@ typedef LUA_NUMBER lua_Number; /* type for integer functions */ typedef LUA_INTEGER lua_Integer; + /* communication with LuaJiTTeX */ LUA_API int luajittex_choose_hash_function; + /* ** state manipulation */ @@ -353,15 +355,16 @@ LUA_API const lua_Number *lua_version (lua_State *L); LUA_API void lua_copy (lua_State *L, int fromidx, int toidx); LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); - + #define LUA_OPEQ 0 #define LUA_OPLT 1 #define LUA_OPLE 2 -/* see http://comments.gmane.org/gmane.comp.programming.swig/18673 */ +/* see https://sourceforge.net/p/swig/mailman/swig-user/thread/4FB2AEBE.9010807%40fultondesigns.co.uk/#msg29268198 */ # define lua_rawlen lua_objlen + /* From Lua 5.3. */ LUA_API int lua_isyieldable (lua_State *L); diff --git a/source/libs/luajit/LuaJIT-src/src/luaconf.h b/source/libs/luajit/LuaJIT-src/src/luaconf.h index c2d29d9492203524fcda6c6dadcabe40ce186624..7cd2edb5a4ae9cc57568a97914ec8cca03fd98e4 100644 --- a/source/libs/luajit/LuaJIT-src/src/luaconf.h +++ b/source/libs/luajit/LuaJIT-src/src/luaconf.h @@ -1,6 +1,6 @@ /* ** Configuration header. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef luaconf_h @@ -9,7 +9,6 @@ #ifndef WINVER #define WINVER 0x0501 #endif -#include <limits.h> #include <stddef.h> /* Default path for loading Lua and C modules with require(). */ @@ -37,7 +36,6 @@ #endif #define LUA_LROOT "/usr/local" #define LUA_LUADIR "/lua/5.1/" -#define LUA_LJDIR "/luajit-2.1.0-beta3/" #ifdef LUA_ROOT #define LUA_JROOT LUA_ROOT @@ -51,7 +49,11 @@ #define LUA_RCPATH #endif -#define LUA_JPATH ";" LUA_JROOT "/share" LUA_LJDIR "?.lua" +#ifndef LUA_LJDIR +#define LUA_LJDIR LUA_JROOT "/share/luajit-2.1" +#endif + +#define LUA_JPATH ";" LUA_LJDIR "/?.lua" #define LUA_LLDIR LUA_LROOT "/share" LUA_LUADIR #define LUA_LCDIR LUA_LROOT "/" LUA_LMULTILIB LUA_LUADIR #define LUA_LLPATH ";" LUA_LLDIR "?.lua;" LUA_LLDIR "?/init.lua" @@ -136,7 +138,7 @@ #define LUALIB_API LUA_API -/* Support for internal assertions. */ +/* Compatibility support for assertions. */ #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) #include <assert.h> #endif diff --git a/source/libs/luajit/LuaJIT-src/src/luajit.c b/source/libs/luajit/LuaJIT-src/src/luajit.c index 1ca243011fbd9e6801f83888b19ef1736c9fe113..a725db1c63b9837459e30072a0d68e28c9c295b8 100644 --- a/source/libs/luajit/LuaJIT-src/src/luajit.c +++ b/source/libs/luajit/LuaJIT-src/src/luajit.c @@ -1,6 +1,6 @@ /* ** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ** ** Major portions taken verbatim or adapted from the Lua interpreter. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h @@ -39,6 +39,7 @@ static lua_State *globalL = NULL; static const char *progname = LUA_PROGNAME; +static char *empty_argv[2] = { NULL, NULL }; #if !LJ_TARGET_CONSOLE static void lstop(lua_State *L, lua_Debug *ar) @@ -78,9 +79,9 @@ static void print_usage(void) fflush(stderr); } -static void l_message(const char *pname, const char *msg) +static void l_message(const char *msg) { - if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); } + if (progname) { fputs(progname, stderr); fputc(':', stderr); fputc(' ', stderr); } fputs(msg, stderr); fputc('\n', stderr); fflush(stderr); } @@ -90,7 +91,7 @@ static int report(lua_State *L, int status) if (status && !lua_isnil(L, -1)) { const char *msg = lua_tostring(L, -1); if (msg == NULL) msg = "(error object is not a string)"; - l_message(progname, msg); + l_message(msg); lua_pop(L, 1); } return status; @@ -150,6 +151,7 @@ static void print_jit_status(lua_State *L) fputs(s, stdout); } putc('\n', stdout); + lua_settop(L, 0); /* clear stack */ } static void createargtable(lua_State *L, char **argv, int argc, int argf) @@ -255,9 +257,8 @@ static void dotty(lua_State *L) lua_getglobal(L, "print"); lua_insert(L, 1); if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) - l_message(progname, - lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)", - lua_tostring(L, -1))); + l_message(lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)", + lua_tostring(L, -1))); } } lua_settop(L, 0); /* clear stack */ @@ -309,8 +310,7 @@ static int loadjitmodule(lua_State *L) lua_getfield(L, -1, "start"); if (lua_isnil(L, -1)) { nomodule: - l_message(progname, - "unknown luaJIT command or jit.* modules not installed"); + l_message("unknown luaJIT command or jit.* modules not installed"); return 1; } lua_remove(L, -2); /* Drop module table. */ @@ -421,6 +421,7 @@ static int collectargs(char **argv, int *flags) break; case 'e': *flags |= FLAGS_EXEC; + /* fallthrough */ case 'j': /* LuaJIT extension */ case 'l': *flags |= FLAGS_OPTION; @@ -514,8 +515,6 @@ static int pmain(lua_State *L) int argn; int flags = 0; globalL = L; - if (argv[0] && argv[0][0]) progname = argv[0]; - LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ argn = collectargs(argv, &flags); @@ -570,9 +569,11 @@ static int pmain(lua_State *L) int main(int argc, char **argv) { int status; - lua_State *L = lua_open(); + lua_State *L; + if (!argv[0]) argv = empty_argv; else if (argv[0][0]) progname = argv[0]; + L = lua_open(); if (L == NULL) { - l_message(argv[0], "cannot create state: not enough memory"); + l_message("cannot create state: not enough memory"); return EXIT_FAILURE; } smain.argc = argc; diff --git a/source/libs/luajit/LuaJIT-src/src/luajit.h b/source/libs/luajit/LuaJIT-src/src/luajit_rolling.h similarity index 82% rename from source/libs/luajit/LuaJIT-src/src/luajit.h rename to source/libs/luajit/LuaJIT-src/src/luajit_rolling.h index 708a5a11fdb0eacca397df32eb0bf6f349c5244d..73639aa8a62898d70c8c9e589d2e5951670d9995 100644 --- a/source/libs/luajit/LuaJIT-src/src/luajit.h +++ b/source/libs/luajit/LuaJIT-src/src/luajit_rolling.h @@ -1,7 +1,7 @@ /* -** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ +** LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ ** -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ** ** Permission is hereby granted, free of charge, to any person obtaining ** a copy of this software and associated documentation files (the @@ -22,7 +22,7 @@ ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ** -** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +** [ MIT license: https://www.opensource.org/licenses/mit-license.php ] */ #ifndef _LUAJIT_H @@ -30,11 +30,11 @@ #include "lua.h" -#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3" -#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ -#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3 -#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2017 Mike Pall" -#define LUAJIT_URL "http://luajit.org/" +#define LUAJIT_VERSION "LuaJIT 2.1.ROLLING" +#define LUAJIT_VERSION_NUM 20199 /* Deprecated. */ +#define LUAJIT_VERSION_SYM luaJIT_version_2_1_ROLLING +#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2025 Mike Pall" +#define LUAJIT_URL "https://luajit.org/" /* Modes for luaJIT_setmode. */ #define LUAJIT_MODE_MASK 0x00ff @@ -76,4 +76,5 @@ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, /* Enforce (dynamic) linker error for version mismatches. Call from main. */ LUA_API void LUAJIT_VERSION_SYM(void); +#error "DO NOT USE luajit_rolling.h -- only include build-generated luajit.h" #endif diff --git a/source/libs/luajit/LuaJIT-src/src/lualib.h b/source/libs/luajit/LuaJIT-src/src/lualib.h index aed323428eb45b5bc708792b63b6fc25b2b65a2a..0afd5a947df4f6bc0d3fbd749c2282475212be72 100644 --- a/source/libs/luajit/LuaJIT-src/src/lualib.h +++ b/source/libs/luajit/LuaJIT-src/src/lualib.h @@ -1,6 +1,6 @@ /* ** Standard library header. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h */ #ifndef _LUALIB_H @@ -23,7 +23,7 @@ #define LUA_FFILIBNAME "ffi" #define LUA_BITLIBNAME_32 "bit32" - + LUALIB_API int luaopen_base(lua_State *L); LUALIB_API int luaopen_math(lua_State *L); LUALIB_API int luaopen_string(lua_State *L); @@ -35,6 +35,7 @@ LUALIB_API int luaopen_debug(lua_State *L); LUALIB_API int luaopen_bit(lua_State *L); LUALIB_API int luaopen_jit(lua_State *L); LUALIB_API int luaopen_ffi(lua_State *L); +LUALIB_API int luaopen_string_buffer(lua_State *L); LUALIB_API int luaopen_bit32(lua_State *L); diff --git a/source/libs/luajit/LuaJIT-src/src/msvcbuild.bat b/source/libs/luajit/LuaJIT-src/src/msvcbuild.bat index 71bde7598dc4dbd7324b735718d10bf781251567..fa30d11df84497d5dfc546e65c6cc729dd8d40d9 100644 --- a/source/libs/luajit/LuaJIT-src/src/msvcbuild.bat +++ b/source/libs/luajit/LuaJIT-src/src/msvcbuild.bat @@ -1,59 +1,83 @@ @rem Script to build LuaJIT with MSVC. -@rem Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +@rem Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h @rem -@rem Either open a "Visual Studio .NET Command Prompt" -@rem (Note that the Express Edition does not contain an x64 compiler) -@rem -or- -@rem Open a "Windows SDK Command Shell" and set the compiler environment: -@rem setenv /release /x86 -@rem -or- -@rem setenv /release /x64 +@rem Open a "Visual Studio Command Prompt" (either x86 or x64). +@rem Then cd to this directory and run this script. Use the following +@rem options (in order), if needed. The default is a dynamic release build. @rem -@rem Then cd to this directory and run this script. +@rem nogc64 disable LJ_GC64 mode for x64 +@rem debug emit debug symbols +@rem amalg amalgamated build +@rem static static linkage @if not defined INCLUDE goto :FAIL @setlocal +@rem Add more debug flags here, e.g. DEBUGCFLAGS=/DLUA_USE_ASSERT +@set DEBUGCFLAGS= @set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline +@set LJDYNBUILD=/DLUA_BUILD_AS_DLL /MD +@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd +@set LJCOMPILETARGET=/Zi +@set LJLINKTYPE=/DEBUG /RELEASE +@set LJLINKTYPE_DEBUG=/DEBUG +@set LJLINKTARGET=/OPT:REF /OPT:ICF /INCREMENTAL:NO @set LJLINK=link /nologo @set LJMT=mt /nologo @set LJLIB=lib /nologo /nodefaultlib @set DASMDIR=..\dynasm @set DASM=%DASMDIR%\dynasm.lua -@set DASC=vm_x86.dasc +@set DASC=vm_x64.dasc @set LJDLLNAME=lua51.dll @set LJLIBNAME=lua51.lib -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c +@setlocal +@call :SETHOSTVARS %LJCOMPILE% host\minilua.c @if errorlevel 1 goto :BAD %LJLINK% /out:minilua.exe minilua.obj @if errorlevel 1 goto :BAD if exist minilua.exe.manifest^ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe +@endlocal -@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 +@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64 @set LJARCH=x64 @minilua -@if errorlevel 8 goto :X64 -@set DASMFLAGS=-D WIN -D JIT -D FFI +@if errorlevel 8 goto :NO32 +@set DASC=vm_x86.dasc +@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU @set LJARCH=x86 @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 +@goto :DA +:NO32 +@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64 +@set DASC=vm_arm64.dasc +@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64 +@set LJARCH=arm64 +@goto :DA :X64 -@if "%1" neq "gc64" goto :NOGC64 +@if "%1" neq "nogc64" goto :DA @shift -@set DASC=vm_x64.dasc -@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_GC64 -:NOGC64 +@set DASC=vm_x86.dasc +@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 +:DA minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD -%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +minilua host\genversion.lua + +@setlocal +@call :SETHOSTVARS +%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c @if errorlevel 1 goto :BAD %LJLINK% /out:buildvm.exe buildvm*.obj @if errorlevel 1 goto :BAD if exist buildvm.exe.manifest^ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe +@endlocal buildvm -m peobj -o lj_vm.obj @if errorlevel 1 goto :BAD @@ -72,14 +96,17 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @if "%1" neq "debug" goto :NODEBUG @shift -@set LJCOMPILE=%LJCOMPILE% /Zi -@set LJLINK=%LJLINK% /debug /opt:ref /opt:icf /incremental:no +@set LJCOMPILE=%LJCOMPILE% %DEBUGCFLAGS% +@set LJDYNBUILD=%LJDYNBUILD_DEBUG% +@set LJLINKTYPE=%LJLINKTYPE_DEBUG% :NODEBUG +@set LJCOMPILE=%LJCOMPILE% %LJCOMPILETARGET% +@set LJLINK=%LJLINK% %LJLINKTYPE% %LJLINKTARGET% @if "%1"=="amalg" goto :AMALGDLL @if "%1"=="static" goto :STATIC -%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL lj_*.c lib_*.c +%LJCOMPILE% %LJDYNBUILD% lj_*.c lib_*.c @if errorlevel 1 goto :BAD -%LJLINK% /DLL /out:%LJDLLNAME% lj_*.obj lib_*.obj +%LJLINK% /DLL /OUT:%LJDLLNAME% lj_*.obj lib_*.obj @if errorlevel 1 goto :BAD @goto :MTDLL :STATIC @@ -89,9 +116,16 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @if errorlevel 1 goto :BAD @goto :MTDLL :AMALGDLL -%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL ljamalg.c +@if "%2"=="static" goto :AMALGSTATIC +%LJCOMPILE% %LJDYNBUILD% ljamalg.c +@if errorlevel 1 goto :BAD +%LJLINK% /DLL /OUT:%LJDLLNAME% ljamalg.obj lj_vm.obj @if errorlevel 1 goto :BAD -%LJLINK% /DLL /out:%LJDLLNAME% ljamalg.obj lj_vm.obj +@goto :MTDLL +:AMALGSTATIC +%LJCOMPILE% ljamalg.c +@if errorlevel 1 goto :BAD +%LJLINK% /OUT:%LJDLLNAME% ljamalg.obj lj_vm.obj @if errorlevel 1 goto :BAD :MTDLL if exist %LJDLLNAME%.manifest^ @@ -99,7 +133,7 @@ if exist %LJDLLNAME%.manifest^ %LJCOMPILE% luajit.c @if errorlevel 1 goto :BAD -%LJLINK% /out:luajit.exe luajit.obj %LJLIBNAME% +%LJLINK% /OUT:luajit.exe luajit.obj %LJLIBNAME% @if errorlevel 1 goto :BAD if exist luajit.exe.manifest^ %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe @@ -110,6 +144,12 @@ if exist luajit.exe.manifest^ @echo. @echo === Successfully built LuaJIT for Windows/%LJARCH% === +@goto :END +:SETHOSTVARS +@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" ( + call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo + echo on +) @goto :END :BAD @echo. @@ -118,5 +158,5 @@ if exist luajit.exe.manifest^ @echo ******************************************************* @goto :END :FAIL -@echo You must open a "Visual Studio .NET Command Prompt" to run this script +@echo You must open a "Visual Studio Command Prompt" to run this script :END diff --git a/source/libs/luajit/LuaJIT-src/src/nxbuild.bat b/source/libs/luajit/LuaJIT-src/src/nxbuild.bat new file mode 100644 index 0000000000000000000000000000000000000000..9151339773edf514138d696ecff075fbe97f07a6 --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/nxbuild.bat @@ -0,0 +1,165 @@ +@rem Script to build LuaJIT with NintendoSDK + NX Addon. +@rem Donated to the public domain by Swyter. +@rem +@rem To run this script you must open a "Native Tools Command Prompt for VS". +@rem +@rem Either the x86 version for NX32, or x64 for the NX64 target. +@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe) +@rem must match the cross-compiled target (32 or 64 bits). +@rem +@rem Then cd to this directory and run this script. +@rem +@rem Recommended invocation: +@rem +@rem nxbuild # release build, amalgamated +@rem nxbuild debug # debug build, amalgamated +@rem +@rem Additional command-line options (not generally recommended): +@rem +@rem noamalg # (after debug) non-amalgamated build + +@if not defined INCLUDE goto :FAIL +@if not defined NINTENDO_SDK_ROOT goto :FAIL +@if not defined PLATFORM goto :FAIL + +@if "%platform%" == "x86" goto :DO_NX32 +@if "%platform%" == "x64" goto :DO_NX64 + +@echo Error: Current host platform is %platform%! +@echo. +@goto :FAIL + +@setlocal + +:DO_NX32 +@set DASC=vm_arm.dasc +@set DASMFLAGS= -D HFABI -D FPU +@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM +@set HOST_PTR_SIZE=4 +goto :BEGIN + +:DO_NX64 +@set DASC=vm_arm64.dasc +@set DASMFLAGS= -D ENDIAN_LE +@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64 +@set HOST_PTR_SIZE=8 + +:BEGIN +@rem ---- Host compiler ---- +@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE +@set LJLINK=link /nologo +@set LJMT=mt /nologo +@set DASMDIR=..\dynasm +@set DASM=%DASMDIR%\dynasm.lua +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c + +%LJCOMPILE% host\minilua.c +@if errorlevel 1 goto :BAD +%LJLINK% /out:minilua.exe minilua.obj +@if errorlevel 1 goto :BAD +if exist minilua.exe.manifest^ + %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe + +@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files. +@minilua +@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK + +@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%). +@echo Check that the script is being ran under the correct x86/x64 VS prompt. +@goto :BAD + +:PASSED_PTR_CHECK +@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI +minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% +@if errorlevel 1 goto :BAD + +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +minilua host\genversion.lua + +%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c +@if errorlevel 1 goto :BAD +%LJLINK% /out:buildvm.exe buildvm*.obj +@if errorlevel 1 goto :BAD +if exist buildvm.exe.manifest^ + %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe + +buildvm -m elfasm -o lj_vm.s +@if errorlevel 1 goto :BAD +buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m libdef -o lj_libdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m recdef -o lj_recdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m folddef -o lj_folddef.h lj_opt_fold.c +@if errorlevel 1 goto :BAD + +@rem ---- Cross compiler ---- +@set NXCOMPILER_ROOT="%NINTENDO_SDK_ROOT%\Compilers\NintendoClang" +@if "%platform%" neq "x64" goto :NX32_CROSSBUILD +@set LJCOMPILE="%NXCOMPILER_ROOT%\bin\clang" --target=aarch64-nintendo-nx-elf -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c +@set LJLIB="%NXCOMPILER_ROOT%\bin\llvm-ar" rc +@set TARGETLIB_SUFFIX=nx64 + +%NXCOMPILER_ROOT%\bin\clang --target=aarch64-nintendo-nx-elf -o lj_vm.o -c lj_vm.s +goto :DEBUGCHECK + +:NX32_CROSSBUILD +@set LJCOMPILE="%NXCOMPILER_ROOT%\bin\clang" --target=armv7l-nintendo-nx-eabihf -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c +@set LJLIB="%NXCOMPILER_ROOT%\bin\llvm-ar" rc +@set TARGETLIB_SUFFIX=nx32 + +%NXCOMPILER_ROOT%\bin\clang --target=armv7l-nintendo-nx-eabihf -o lj_vm.o -c lj_vm.s +:DEBUGCHECK + +@if "%1" neq "debug" goto :NODEBUG +@shift +@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0 +@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a +goto :BUILD +:NODEBUG +@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3 +@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a +:BUILD +del %TARGETLIB% +@set LJCOMPILE=%LJCOMPILE% -fPIC +@if "%1" neq "noamalg" goto :AMALG +for %%f in (lj_*.c lib_*.c) do ( + %LJCOMPILE% %%f + @if errorlevel 1 goto :BAD +) + +%LJLIB% %TARGETLIB% lj_*.o lib_*.o +@if errorlevel 1 goto :BAD +@goto :NOAMALG +:AMALG +%LJCOMPILE% ljamalg.c +@if errorlevel 1 goto :BAD +%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o +@if errorlevel 1 goto :BAD +:NOAMALG + +@del *.o *.obj *.manifest minilua.exe buildvm.exe +@echo. +@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) === + +@goto :END +:BAD +@echo. +@echo ******************************************************* +@echo *** Build FAILED -- Please check the error messages *** +@echo ******************************************************* +@goto :END +:FAIL +@echo To run this script you must open a "Native Tools Command Prompt for VS". +@echo. +@echo Either the x86 version for NX32, or x64 for the NX64 target. +@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe) +@echo must match the cross-compiled target (32 or 64 bits). +@echo. +@echo Keep in mind that NintendoSDK + NX Addon must be installed, too. +:END diff --git a/source/libs/luajit/LuaJIT-src/src/ps4build.bat b/source/libs/luajit/LuaJIT-src/src/ps4build.bat index e4a7defe7af8e6ce10c48511e341e0549683f254..d6f73e5ab40e8f4d03630119e00e994f6cac8ef4 100644 --- a/source/libs/luajit/LuaJIT-src/src/ps4build.bat +++ b/source/libs/luajit/LuaJIT-src/src/ps4build.bat @@ -26,13 +26,13 @@ @set LJMT=mt /nologo @set DASMDIR=..\dynasm @set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c -@set GC64=-DLUAJIT_ENABLE_GC64 +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c +@set GC64= @set DASC=vm_x64.dasc @if "%1" neq "gc32" goto :NOGC32 @shift -@set GC64= +@set GC64=-DLUAJIT_DISABLE_GC64 @set DASC=vm_x86.dasc :NOGC32 @@ -51,7 +51,11 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD -%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +minilua host\genversion.lua + +%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -DLUAJIT_NO_UNWIND host\buildvm*.c + @if errorlevel 1 goto :BAD %LJLINK% /out:buildvm.exe buildvm*.obj @if errorlevel 1 goto :BAD @@ -78,7 +82,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus @set INCLUDE="" -orbis-as -o lj_vm.o lj_vm.s +"%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-as" -o lj_vm.o lj_vm.s @if "%1" neq "debug" goto :NODEBUG @shift diff --git a/source/libs/luajit/LuaJIT-src/src/ps5build.bat b/source/libs/luajit/LuaJIT-src/src/ps5build.bat new file mode 100644 index 0000000000000000000000000000000000000000..30b719d63a4123d6b066c525f41993e87f401fff --- /dev/null +++ b/source/libs/luajit/LuaJIT-src/src/ps5build.bat @@ -0,0 +1,126 @@ +@rem Script to build LuaJIT with the PS5 SDK. +@rem Donated to the public domain. +@rem +@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) +@rem or "VS20xx x64 Native Tools Command Prompt". +@rem +@rem Then cd to this directory and run this script. +@rem +@rem Recommended invocation: +@rem +@rem ps5build release build, amalgamated, 64-bit GC +@rem ps5build debug debug build, amalgamated, 64-bit GC +@rem +@rem Additional command-line options (not generally recommended): +@rem +@rem gc32 (before debug) 32-bit GC +@rem noamalg (after debug) non-amalgamated build + +@if not defined INCLUDE goto :FAIL +@if not defined SCE_PROSPERO_SDK_DIR goto :FAIL + +@setlocal +@rem ---- Host compiler ---- +@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE +@set LJLINK=link /nologo +@set LJMT=mt /nologo +@set DASMDIR=..\dynasm +@set DASM=%DASMDIR%\dynasm.lua +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c +@set GC64= +@set DASC=vm_x64.dasc + +@if "%1" neq "gc32" goto :NOGC32 +@shift +@set GC64=-DLUAJIT_DISABLE_GC64 +@set DASC=vm_x86.dasc +:NOGC32 + +%LJCOMPILE% host\minilua.c +@if errorlevel 1 goto :BAD +%LJLINK% /out:minilua.exe minilua.obj +@if errorlevel 1 goto :BAD +if exist minilua.exe.manifest^ + %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe + +@rem Check for 64 bit host compiler. +@minilua +@if not errorlevel 8 goto :FAIL + +@set DASMFLAGS=-D P64 -D NO_UNWIND +minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% +@if errorlevel 1 goto :BAD + +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +minilua host\genversion.lua + +%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c +@if errorlevel 1 goto :BAD +%LJLINK% /out:buildvm.exe buildvm*.obj +@if errorlevel 1 goto :BAD +if exist buildvm.exe.manifest^ + %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe + +buildvm -m elfasm -o lj_vm.s +@if errorlevel 1 goto :BAD +buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m libdef -o lj_libdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m recdef -o lj_recdef.h %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% +@if errorlevel 1 goto :BAD +buildvm -m folddef -o lj_folddef.h lj_opt_fold.c +@if errorlevel 1 goto :BAD + +@rem ---- Cross compiler ---- +@set LJCOMPILE="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -Wall -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC %GC64% +@set LJLIB="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-ar" rcus +@set INCLUDE="" + +"%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -o lj_vm.o lj_vm.s + +@if "%1" neq "debug" goto :NODEBUG +@shift +@set LJCOMPILE=%LJCOMPILE% -g -O0 +@set TARGETLIB=libluajitD_ps5.a +goto :BUILD +:NODEBUG +@set LJCOMPILE=%LJCOMPILE% -O2 +@set TARGETLIB=libluajit_ps5.a +:BUILD +del %TARGETLIB% +@if "%1" neq "noamalg" goto :AMALG +for %%f in (lj_*.c lib_*.c) do ( + %LJCOMPILE% %%f + @if errorlevel 1 goto :BAD +) + +%LJLIB% %TARGETLIB% lj_*.o lib_*.o +@if errorlevel 1 goto :BAD +@goto :NOAMALG +:AMALG +%LJCOMPILE% ljamalg.c +@if errorlevel 1 goto :BAD +%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o +@if errorlevel 1 goto :BAD +:NOAMALG + +@del *.o *.obj *.manifest minilua.exe buildvm.exe +@echo. +@echo === Successfully built LuaJIT for PS5 === + +@goto :END +:BAD +@echo. +@echo ******************************************************* +@echo *** Build FAILED -- Please check the error messages *** +@echo ******************************************************* +@goto :END +:FAIL +@echo To run this script you must open a "Visual Studio .NET Command Prompt" +@echo (64 bit host compiler). The PS5 Prospero SDK must be installed, too. +:END diff --git a/source/libs/luajit/LuaJIT-src/src/psvitabuild.bat b/source/libs/luajit/LuaJIT-src/src/psvitabuild.bat index 3991dc65353d1a82e3bf084d9febb8bb8f773410..8d176e52fb98d06be66b8750ced8ae493f344388 100644 --- a/source/libs/luajit/LuaJIT-src/src/psvitabuild.bat +++ b/source/libs/luajit/LuaJIT-src/src/psvitabuild.bat @@ -14,7 +14,7 @@ @set LJMT=mt /nologo @set DASMDIR=..\dynasm @set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c %LJCOMPILE% host\minilua.c @if errorlevel 1 goto :BAD @@ -31,6 +31,9 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_arm.dasc @if errorlevel 1 goto :BAD +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +minilua host\genversion.lua + %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_ARM -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLJ_TARGET_PSVITA=1 host\buildvm*.c @if errorlevel 1 goto :BAD %LJLINK% /out:buildvm.exe buildvm*.obj diff --git a/source/libs/luajit/LuaJIT-src/src/vm_arm.dasc b/source/libs/luajit/LuaJIT-src/src/vm_arm.dasc index 780cc16e6dedac3588f497541ff0c1b77711c157..ca08fc117e98490c3c7fcd834544e1aa230b7692 100644 --- a/source/libs/luajit/LuaJIT-src/src/vm_arm.dasc +++ b/source/libs/luajit/LuaJIT-src/src/vm_arm.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for ARM CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h | |.arch arm |.section code_op, code_sub @@ -539,13 +539,13 @@ static void build_subroutines(BuildCtx *ctx) | cmp CARG1, #1 |.endif | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->field_pc | mvn INS, #~LJ_TNIL | add CARG2, RA, RC | str INS, [CARG2, #-4] // Ensure one valid arg. |.if FFI | bls >1 |.endif + | ldr CARG3, LFUNC:CARG3->field_pc | ldr KBASE, [CARG3, #PC2PROTO(k)] | // BASE = base, RA = resultptr, CARG4 = meta base | bx CARG1 @@ -699,6 +699,7 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_tsetr: | str BASE, L->base | .IOS mov RC, BASE + | mov CARG1, L | str PC, SAVE_PC | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // Returns TValue *. @@ -1011,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx) | cmp TAB:RB, #0 | beq ->fff_restv | ldr CARG3, TAB:RB->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:RB->node - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |3: // Rearranged logic, because we expect _not_ to find the key. @@ -1110,24 +1111,18 @@ static void build_subroutines(BuildCtx *ctx) | checktab CARG2, ->fff_fallback | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. | ldr PC, [BASE, FRAME_PC] - | mov CARG2, CARG1 - | str BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | str BASE, L->top // Dummy frame length is ok. - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. + | add CARG2, BASE, #8 + | sub CARG3, BASE, #8 + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. | .IOS ldr BASE, L->base | cmp CRET1, #0 - | mvneq CRET2, #~LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | ldrd CARG12, [BASE, #8] // Copy key and value to results. - | ldrd CARG34, [BASE, #16] - | mov RC, #(2+1)*8 - | strd CARG12, [BASE, #-8] - | strd CARG34, [BASE] - | b ->fff_res + | mov RC, #(2+1)*8 + | bgt ->fff_res // Found key/value. + | bmi ->fff_fallback // Invalid key. + | // End of traversal: return nil. + | mvn CRET2, #~LJ_TNIL + | b ->fff_restv | |.ffunc_1 pairs | checktab CARG2, ->fff_fallback @@ -1200,8 +1195,11 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | ldr RB, L->maxstack + | add INS, BASE, NARGS8:RC | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] | cmp NARGS8:RC, #8 + | cmphs RB, INS | blo ->fff_fallback | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. | mov RB, BASE @@ -1212,7 +1210,11 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_call_dispatch | |.ffunc_2 xpcall + | ldr RB, L->maxstack + | add INS, BASE, NARGS8:RC | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] + | cmp RB, INS + | blo ->fff_fallback | checkfunc CARG4, ->fff_fallback // Traceback must be a function. | mov RB, BASE | strd CARG12, [BASE, #8] // Swap function and traceback. @@ -1715,8 +1717,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif |.endmacro | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le | |//-- String library ----------------------------------------------------- | @@ -1809,7 +1811,7 @@ static void build_subroutines(BuildCtx *ctx) | str BASE, L->base | str PC, SAVE_PC | str L, SBUF:CARG1->L - | str CARG4, SBUF:CARG1->p + | str CARG4, SBUF:CARG1->w | bl extern lj_buf_putstr_ .. name | bl extern lj_buf_tostr | b ->fff_resstr @@ -1821,9 +1823,19 @@ static void build_subroutines(BuildCtx *ctx) | |//-- Bit library -------------------------------------------------------- | - |// FP number to bit conversion for soft-float. Clobbers r0-r3. |->vm_tobit_fb: | bhi ->fff_fallback + |.if FPU + |// FP number to bit conversion for hard-float. Clobbers r0, d0-d1. + | vldr d1, >9 + | vmov d0, CARG1, CARG2 + | vadd.f64 d0, d0, d1 + | vmov CARG1, s0 + | bx lr + |9: + | .long 0, 0x43380000 // (double)(2^52 + 2^51). + |.else + |// FP number to bit conversion for soft-float. Clobbers r0-r3. |->vm_tobit: | lsl RB, CARG2, #1 | adds RB, RB, #0x00200000 @@ -1848,6 +1860,7 @@ static void build_subroutines(BuildCtx *ctx) | orr CARG1, CARG4, CARG1, lsl RB | rsblt CARG1, CARG1, #0 | bx lr + |.endif | |.macro .ffunc_bit, name | .ffunc_1 bit_..name @@ -2201,8 +2214,8 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | ldr L, SAVE_L |1: - | cmp CARG1, #0 - | blt >9 // Check for error from exit. + | cmn CARG1, #LUA_ERRERR + | bhs >9 // Check for error from exit. | lsl RC, CARG1, #3 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | str RC, SAVE_MULTRES @@ -2218,6 +2231,8 @@ static void build_subroutines(BuildCtx *ctx) | ldr INS, [PC], #4 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. | st_vmstate CARG4 + | cmn CARG1, #17 // Static dispatch? + | beq >5 | cmp OP, #BC_FUNCC+2 // Fast function? | bhs >4 |2: @@ -2243,10 +2258,21 @@ static void build_subroutines(BuildCtx *ctx) | ldr KBASE, [CARG3, #PC2PROTO(k)] | b <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + | decode_RD RC, INS + | ldr TRACE:CARG1, [CARG1, RC, lsl #2] + | ldr INS, TRACE:CARG1->startins + | decode_OP OP, INS + | decode_RA8 RA, INS + | add OP, DISPATCH, OP, lsl #2 + | decode_RD RC, INS + | ldr pc, [OP, #GG_DISP2STATIC] + | |9: // Rethrow error from the right C frame. | rsb CARG2, CARG1, #0 | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) + | bl extern lj_err_trace // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- @@ -2429,6 +2455,64 @@ static void build_subroutines(BuildCtx *ctx) |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_TMP0, CARG3 + |.define NEXT_TMP1, CARG4 + |.define NEXT_LIM, r12 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY_I, [sp, #8] + |.define NEXT_RES_KEY_IT, [sp, #12] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | ldr NEXT_TMP0, NEXT_TAB->array + | ldr NEXT_LIM, NEXT_TAB->asize + | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 + |1: // Traverse array part. + | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM + | bhs >5 + | ldr NEXT_TMP1, [NEXT_TMP0, #4] + | str NEXT_IDX, NEXT_RES_KEY_I + | add NEXT_TMP0, NEXT_TMP0, #8 + | add NEXT_IDX, NEXT_IDX, #1 + | checktp NEXT_TMP1, LJ_TNIL + | beq <1 // Skip holes in array part. + | ldr NEXT_TMP0, [NEXT_TMP0, #-8] + | mov NEXT_RES, NEXT_RES_PTR + | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too. + | mvn NEXT_TMP0, #~LJ_TISNUM + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + | + |5: // Traverse hash part. + | ldr NEXT_TMP0, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP1, NODE:NEXT_RES->val.it + | checktp NEXT_TMP1, LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | bxne lr + | // Skip holes in hash part. + | add NEXT_RES, NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | mvn NEXT_TMP0, #0 + | mov NEXT_RES, NEXT_RES_PTR + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -2505,16 +2589,16 @@ static void build_subroutines(BuildCtx *ctx) |.endif | mov r11, sp | sub sp, sp, CARG1 // Readjust stack. - | subs CARG2, CARG2, #1 + | subs CARG2, CARG2, #4 |.if HFABI | vldm RB, {d0-d7} |.endif | ldr RB, CCSTATE->func | bmi >2 |1: // Copy stack slots. - | ldr CARG4, [CARG3, CARG2, lsl #2] - | str CARG4, [sp, CARG2, lsl #2] - | subs CARG2, CARG2, #1 + | ldr CARG4, [CARG3, CARG2] + | str CARG4, [sp, CARG2] + | subs CARG2, CARG2, #4 | bpl <1 |2: | ldrd CARG12, CCSTATE->gpr[0] @@ -3499,10 +3583,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:CARG1->node | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |1: @@ -3646,10 +3730,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:CARG1->node | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | mov CARG4, #0 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 @@ -3919,10 +4003,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) | add RA, BASE, RA | ldr TAB:RB, [RA, #-16] | ldr CARG1, [RA, #-8] // Get index from control var. @@ -3988,7 +4073,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next1 | ins_next2 | mov CARG1, #0 - | mvn CARG2, #0x00018000 + | mvn CARG2, #~LJ_KEYINDEX | strd CARG1, [RA, #-8] // Initialize control var. |1: | ins_next3 @@ -3997,9 +4082,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov OP, #BC_ITERC | strb CARG1, [PC, #-4] | sub PC, RC, #0x20000 + |.if JIT + | ldrb CARG1, [PC] + | cmp CARG1, #BC_ITERN + | bne >6 + |.endif | strb OP, [PC] // Subsumes ins_next1. | ins_next2 | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + | ldrh CARG2, [PC, #2] + | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] + | // Subsumes ins_next1 and ins_next2. + | ldr INS, TRACE:CARG1->startins + | bfi INS, OP, #0, #8 + | str INS, [PC], #4 + | b <1 + |.endif break; case BC_VARG: diff --git a/source/libs/luajit/LuaJIT-src/src/vm_arm64.dasc b/source/libs/luajit/LuaJIT-src/src/vm_arm64.dasc index 3eaf37638e68d1b7801b74ec1bd95e7f044fc732..58efe400e47633410407edcf625ecebe3a33dfa5 100644 --- a/source/libs/luajit/LuaJIT-src/src/vm_arm64.dasc +++ b/source/libs/luajit/LuaJIT-src/src/vm_arm64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for ARM64 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h | |.arch arm64 |.section code_op, code_sub @@ -77,51 +77,94 @@ |.define CRET1, x0 |.define CRET1w, w0 | +|//----------------------------------------------------------------------- +| +|// ARM64e pointer authentication codes (PAC). +|.if PAUTH +|.macro sp_auth; pacibsp; .endmacro +|.macro br_auth, reg; braaz reg; .endmacro +|.macro blr_auth, reg; blraaz reg; .endmacro +|.macro ret_auth; retab; .endmacro +|.else +|.macro sp_auth; .endmacro +|.macro br_auth, reg; br reg; .endmacro +|.macro blr_auth, reg; blr reg; .endmacro +|.macro ret_auth; ret; .endmacro +|.endif +| +|//----------------------------------------------------------------------- +| |// Stack layout while in interpreter. Must match with lj_frame.h. | |.define CFRAME_SPACE, 208 |//----- 16 byte aligned, <-- sp entering interpreter -|// Unused [sp, #204] // 32 bit values -|.define SAVE_NRES, [sp, #200] -|.define SAVE_ERRF, [sp, #196] -|.define SAVE_MULTRES, [sp, #192] -|.define TMPD, [sp, #184] // 64 bit values -|.define SAVE_L, [sp, #176] -|.define SAVE_PC, [sp, #168] -|.define SAVE_CFRAME, [sp, #160] -|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves -|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves -|.define SAVE_LR, [sp, #8] -|.define SAVE_FP, [sp] +|.define SAVE_FP_LR_, 192 +|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves +|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves +|// Unused [sp, #44] // 32 bit values +|.define SAVE_NRES, [sp, #40] +|.define SAVE_ERRF, [sp, #36] +|.define SAVE_MULTRES, [sp, #32] +|.define TMPD, [sp, #24] // 64 bit values +|.define SAVE_L, [sp, #16] +|.define SAVE_PC, [sp, #8] +|.define SAVE_CFRAME, [sp, #0] |//----- 16 byte aligned, <-- sp while in interpreter. | -|.define TMPDofs, #184 +|.define TMPDofs, #24 +| +|.if WIN +|// Windows unwind data is suited to r1 stored first. +|.macro stp_unwind, r1, r2, where +| stp r1, r2, where +|.endmacro +|.macro ldp_unwind, r1, r2, where +| ldp r1, r2, where +|.endmacro +|.macro ldp_unwind, r1, r2, where, post_index +| ldp r1, r2, where, post_index +|.endmacro +|.else +|// Otherwise store r2 first for compact unwind info (OSX). +|.macro stp_unwind, r1, r2, where +| stp r2, r1, where +|.endmacro +|.macro ldp_unwind, r1, r2, where +| ldp r2, r1, where +|.endmacro +|.macro ldp_unwind, r1, r2, where, post_index +| ldp r2, r1, where, post_index +|.endmacro +|.endif | |.macro save_, gpr1, gpr2, fpr1, fpr2 -| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] +| stp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8] +| stp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8] |.endmacro |.macro rest_, gpr1, gpr2, fpr1, fpr2 -| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] +| ldp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8] +| ldp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8] |.endmacro | |.macro saveregs -| stp fp, lr, [sp, #-CFRAME_SPACE]! -| add fp, sp, #0 -| stp x19, x20, [sp, # SAVE_GPR_] +| sp_auth +| sub sp, sp, # CFRAME_SPACE +| stp fp, lr, [sp, # SAVE_FP_LR_] +| add fp, sp, # SAVE_FP_LR_ +| stp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8] | save_ 21, 22, 8, 9 | save_ 23, 24, 10, 11 | save_ 25, 26, 12, 13 | save_ 27, 28, 14, 15 |.endmacro |.macro restoreregs -| ldp x19, x20, [sp, # SAVE_GPR_] +| ldp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8] | rest_ 21, 22, 8, 9 | rest_ 23, 24, 10, 11 | rest_ 25, 26, 12, 13 | rest_ 27, 28, 14, 15 -| ldp fp, lr, [sp], # CFRAME_SPACE +| ldp fp, lr, [sp, # SAVE_FP_LR_] +| add sp, sp, # CFRAME_SPACE |.endmacro | |// Type definitions. Some of these are only used for documentation. @@ -179,7 +222,7 @@ | decode_RA RA, INS | ldr TMP0, [TMP1, #GG_G2DISP] | decode_RD RC, INS -| br TMP0 +| br_auth TMP0 |.endmacro | |// Instruction footer. @@ -208,7 +251,7 @@ | decode_RA RA, INS | ldr TMP0, [TMP1, #GG_G2DISP] | add RA, BASE, RA, lsl #3 -| br TMP0 +| br_auth TMP0 |.endmacro | |.macro ins_call @@ -248,8 +291,17 @@ | blo target |.endmacro | +|.macro init_constants +| movn TISNIL, #0 +| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 +| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 +|.endmacro +| |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro +|.macro mov_nil, reg; mov reg, TISNIL; .endmacro +|.macro cmp_nil, reg; cmp reg, TISNIL; .endmacro +|.macro add_TISNUM, dst, src; add dst, src, TISNUM; .endmacro | #define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) | @@ -355,7 +407,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_leave_unw: | restoreregs - | ret + | ret_auth | |6: | bgt >7 // Less results wanted? @@ -387,26 +439,26 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | // (void *cframe, int errcode) + | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. | ldr L, SAVE_L - | mv_vmstate TMP0w, C | ldr GL, L->glref + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | mv_vmstate TMP0w, C | st_vmstate TMP0w | b ->vm_leave_unw | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) - | and sp, CARG1, #CFRAME_RAWMASK - |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | add fp, CARG1, # SAVE_FP_LR_ + | mov sp, CARG1 | ldr L, SAVE_L - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | init_constants + | ldr GL, L->glref // Setup pointer to global state. + |->vm_unwind_ff_eh: // Landing pad for external unwinder. | mov RC, #16 // 2 results: false + error message. | ldr BASE, L->base - | ldr GL, L->glref // Setup pointer to global state. | mov_false TMP0 | sub RA, BASE, #8 // Results start at BASE-8. | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. @@ -467,11 +519,9 @@ static void build_subroutines(BuildCtx *ctx) | str L, GL->cur_L | mov RA, BASE | ldp BASE, CARG1, L->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | init_constants | ldr PC, [BASE, FRAME_PC] | strb wzr, L->status - | movn TISNIL, #0 | sub RC, CARG1, BASE | ands CARG1, PC, #FRAME_TYPE | add RC, RC, #8 @@ -500,16 +550,15 @@ static void build_subroutines(BuildCtx *ctx) | ldr GL, L->glref // Setup pointer to global state. | mov BASE, CARG2 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. + | add TMP0, sp, #0 + | str RC, SAVE_CFRAME + | str TMP0, L->cframe // Add our C frame to cframe chain. | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | str L, GL->cur_L | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | add PC, PC, BASE - | movn TISNIL, #0 + | init_constants | sub PC, PC, RB // PC = frame delta + frame type | sub NARGS8:RC, CARG1, BASE | st_vmstate ST_INTERP @@ -536,10 +585,11 @@ static void build_subroutines(BuildCtx *ctx) | sub RA, RA, RB // Compute -savestack(L, L->top). | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. | str wzr, SAVE_ERRF // No error function. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. + | add TMP0, sp, #0 + | str RC, SAVE_CFRAME + | str TMP0, L->cframe // Add our C frame to cframe chain. | str L, GL->cur_L - | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | blr_auth CARG4 // (lua_State *L, lua_CFunction func, void *ud) | mov BASE, CRET1 | mov PC, #FRAME_CP | cbnz BASE, <3 // Else continue with the call. @@ -562,15 +612,15 @@ static void build_subroutines(BuildCtx *ctx) | cmp CARG1, #1 |.endif | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->pc | add TMP0, RA, RC | str TISNIL, [TMP0, #-8] // Ensure one valid arg. |.if FFI | bls >1 |.endif + | ldr CARG3, LFUNC:CARG3->pc | ldr KBASE, [CARG3, #PC2PROTO(k)] | // BASE = base, RA = resultptr, CARG4 = meta base - | br CARG1 + | br_auth CARG1 | |.if FFI |1: @@ -617,7 +667,7 @@ static void build_subroutines(BuildCtx *ctx) | b >1 | |->vmeta_tgetb: // RB = table, RC = index - | add RC, RC, TISNUM + | add_TISNUM RC, RC | add CARG2, BASE, RB, lsl #3 | add CARG3, sp, TMPDofs | str RC, TMPD @@ -652,7 +702,7 @@ static void build_subroutines(BuildCtx *ctx) | sxtw CARG2, TMP1w | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | // Returns cTValue * or NULL. - | mov TMP0, TISNIL + | mov_nil TMP0 | cbz CRET1, ->BC_TGETR_Z | ldr TMP0, [CRET1] | b ->BC_TGETR_Z @@ -675,7 +725,7 @@ static void build_subroutines(BuildCtx *ctx) | b >1 | |->vmeta_tsetb: // RB = table, RC = index - | add RC, RC, TISNUM + | add_TISNUM RC, RC | add CARG2, BASE, RB, lsl #3 | add CARG3, sp, TMPDofs | str RC, TMPD @@ -711,6 +761,7 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_tsetr: | sxtw CARG3, TMP1w | str BASE, L->base + | mov CARG1, L | str PC, SAVE_PC | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // Returns TValue *. @@ -988,13 +1039,13 @@ static void build_subroutines(BuildCtx *ctx) |1: // Field metatable must be at same offset for GCtab and GCudata! | ldr TAB:RB, TAB:CARG1->metatable |2: - | mov CARG1, TISNIL + | mov_nil CARG1 | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] | cbz TAB:RB, ->fff_restv | ldr TMP1w, TAB:RB->hmask - | ldr TMP2w, STR:RC->hash + | ldr TMP2w, STR:RC->sid | ldr NODE:CARG3, TAB:RB->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | add TMP1, TMP1, TMP1, lsl #1 | movn CARG4, #~LJ_TSTR | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 @@ -1010,7 +1061,7 @@ static void build_subroutines(BuildCtx *ctx) | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 | b ->fff_restv |5: - | cmp TMP0, TISNIL + | cmp_nil TMP0 | bne ->fff_restv | b <4 | @@ -1085,21 +1136,19 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: iterators ------------------------------------------- | |.ffunc_1 next - | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback + | checktp CARG1, LJ_TTAB, ->fff_fallback | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. | ldr PC, [BASE, FRAME_PC] - | stp BASE, BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. + | add CARG2, BASE, #8 + | sub CARG3, BASE, #16 + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | mov RC, #(2+1)*8 + | tbnz CRET1w, #31, ->fff_fallback // Invalid key. + | cbnz CRET1, ->fff_res // Found key/value. + | // End of traversal: return nil. | str TISNIL, [BASE, #-16] - | cbz CRET1, ->fff_res1 // End of traversal: return nil. - | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results. - | mov RC, #(2+1)*8 - | stp CARG1, CARG2, [BASE, #-16] - | b ->fff_res + | b ->fff_res1 | |.ffunc_1 pairs | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback @@ -1112,8 +1161,8 @@ static void build_subroutines(BuildCtx *ctx) | cbnz TAB:CARG2, ->fff_fallback #endif | mov RC, #(3+1)*8 - | stp CARG1, TISNIL, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] + | stp CFUNC:CARG4, CARG1, [BASE, #-16] + | str TISNIL, [BASE] | b ->fff_res | |.ffunc_2 ipairs_aux @@ -1125,14 +1174,14 @@ static void build_subroutines(BuildCtx *ctx) | add CARG2w, CARG2w, #1 | cmp CARG2w, TMP1w | ldr PC, [BASE, FRAME_PC] - | add TMP2, CARG2, TISNUM + | add_TISNUM TMP2, CARG2 | mov RC, #(0+1)*8 | str TMP2, [BASE, #-16] | bhs >2 // Not in array part? | ldr TMP0, [CARG3, CARG2, lsl #3] |1: | mov TMP1, #(2+1)*8 - | cmp TMP0, TISNIL + | cmp_nil TMP0 | str TMP0, [BASE, #-8] | csel RC, RC, TMP1, eq | b ->fff_res @@ -1155,16 +1204,21 @@ static void build_subroutines(BuildCtx *ctx) | cbnz TAB:CARG2, ->fff_fallback #endif | mov RC, #(3+1)*8 - | stp CARG1, TISNUM, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] + | stp CFUNC:CARG4, CARG1, [BASE, #-16] + | str TISNUM, [BASE] | b ->fff_res | |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | ldr TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | cmp TMP1, TMP2 + | blo ->fff_fallback + | cmp NARGS8:RC, #8 | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #8 | blo ->fff_fallback + | sub NARGS8:RC, NARGS8:RC, #8 | mov RB, BASE | add BASE, BASE, #16 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 @@ -1180,17 +1234,22 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_call_dispatch | |.ffunc xpcall + | ldr TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | cmp TMP1, TMP2 + | blo ->fff_fallback | ldp CARG1, CARG2, [BASE] | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #16 + | subs NARGS8:TMP1, NARGS8:RC, #16 | blo ->fff_fallback | mov RB, BASE - | add BASE, BASE, #24 | asr ITYPE, CARG2, #47 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 | cmn ITYPE, #-LJ_TFUNC | add PC, TMP0, #24+FRAME_PCALL | bne ->fff_fallback // Traceback must be a function. + | mov NARGS8:RC, NARGS8:TMP1 + | add BASE, BASE, #24 | stp CARG2, CARG1, [RB] // Swap function and traceback. | cbz NARGS8:RC, ->vm_call_dispatch | b <1 @@ -1344,7 +1403,7 @@ static void build_subroutines(BuildCtx *ctx) | eor CARG2w, CARG1w, CARG1w, asr #31 | movz CARG3, #0x41e0, lsl #48 // 2^31. | subs CARG1w, CARG2w, CARG1w, asr #31 - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | csel CARG1, CARG1, CARG3, pl | // Fallthrough. | @@ -1435,7 +1494,7 @@ static void build_subroutines(BuildCtx *ctx) | ldr PC, [BASE, FRAME_PC] | str d0, [BASE, #-16] | mov RC, #(2+1)*8 - | add CARG2, CARG2, TISNUM + | add_TISNUM CARG2, CARG2 | str CARG2, [BASE, #-8] | b ->fff_res | @@ -1487,8 +1546,8 @@ static void build_subroutines(BuildCtx *ctx) | b <6 |.endmacro | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le | |//-- String library ----------------------------------------------------- | @@ -1501,7 +1560,7 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). | ldr CARG3w, STR:CARG1->len - | add TMP0, TMP0, TISNUM + | add_TISNUM TMP0, TMP0 | str TMP0, [BASE, #-16] | mov RC, #(0+1)*8 | cbz CARG3, ->fff_res @@ -1587,7 +1646,7 @@ static void build_subroutines(BuildCtx *ctx) | str BASE, L->base | str PC, SAVE_PC | str L, GL->tmpbuf.L - | str TMP0, GL->tmpbuf.p + | str TMP0, GL->tmpbuf.w | bl extern lj_buf_putstr_ .. name | bl extern lj_buf_tostr | b ->fff_resstr @@ -1599,22 +1658,14 @@ static void build_subroutines(BuildCtx *ctx) | |//-- Bit library -------------------------------------------------------- | - |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3 + |// FP number to bit conversion. Clobbers CARG1-CARG2, FARG1-FARG2. |->vm_tobit_fb: | bls ->fff_fallback - | add CARG2, CARG1, CARG1 - | mov CARG3, #1076 - | sub CARG3, CARG3, CARG2, lsr #53 - | cmp CARG3, #53 - | bhi >1 - | and CARG2, CARG2, #U64x(001fffff,ffffffff) - | orr CARG2, CARG2, #U64x(00200000,00000000) - | cmp CARG1, #0 - | lsr CARG2, CARG2, CARG3 - | cneg CARG1w, CARG2w, mi - | br lr - |1: - | mov CARG1w, #0 + | fmov FARG1, CARG1 + | movz CARG2, #0x4338, lsl #48 + | fmov FARG2, CARG2 + | fadd FARG1, FARG1, FARG2 + | fmov CARG1w, s0 | br lr | |.macro .ffunc_bit, name @@ -1647,17 +1698,17 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_bit tobit | mov TMP0w, CARG1w |9: // Label reused by .ffunc_bit_op users. - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv | |.ffunc_bit bswap | rev TMP0w, CARG1w - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv | |.ffunc_bit bnot | mvn TMP0w, CARG1w - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv | |.macro .ffunc_bit_sh, name, ins, shmod @@ -1678,7 +1729,7 @@ static void build_subroutines(BuildCtx *ctx) | checkint CARG1, ->vm_tobit_fb |2: | ins TMP0w, CARG1w, TMP1w - | add CARG1, TMP0, TISNUM + | add_TISNUM CARG1, TMP0 | b ->fff_restv |.endmacro | @@ -1703,7 +1754,7 @@ static void build_subroutines(BuildCtx *ctx) | cmp TMP1, TMP2 | mov CARG1, L | bhi >5 // Need to grow stack. - | blr CARG3 // (lua_State *L) + | blr_auth CARG3 // (lua_State *L) | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ldr BASE, L->base | cmp CRET1w, #0 @@ -1739,6 +1790,7 @@ static void build_subroutines(BuildCtx *ctx) | |->fff_gcstep: // Call GC step function. | // BASE = new base, RC = nargs*8 + | sp_auth | add CARG2, BASE, NARGS8:RC // Calculate L->top. | mov RA, lr | stp BASE, CARG2, L->base @@ -1750,7 +1802,7 @@ static void build_subroutines(BuildCtx *ctx) | mov lr, RA // Help return address predictor. | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ret + | ret_auth | |//----------------------------------------------------------------------- |//-- Special dispatch targets ------------------------------------------- @@ -1777,7 +1829,7 @@ static void build_subroutines(BuildCtx *ctx) | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? |5: // Re-dispatch to static ins. | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | br TMP0 + | br_auth TMP0 | |->vm_inshook: // Dispatch target for instr/line hooks. | ldrb TMP2w, GL->hookmask @@ -1803,7 +1855,7 @@ static void build_subroutines(BuildCtx *ctx) | decode_RA RA, INS | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] | decode_RD RC, INS - | br TMP0 + | br_auth TMP0 | |->cont_hook: // Continue from hook yield. | ldr CARG1, [CARG4, #-40] @@ -1853,7 +1905,7 @@ static void build_subroutines(BuildCtx *ctx) | sub NARGS8:RC, TMP1, BASE | ldr INSw, [PC, #-4] | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | br CRET1 + | br_auth CRET1 | |->cont_stitch: // Trace stitching. |.if JIT @@ -1866,8 +1918,7 @@ static void build_subroutines(BuildCtx *ctx) | and CARG3, CARG3, #LJ_GCVMASK | beq >2 |1: // Move results down. - | ldr CARG1, [RA] - | add RA, RA, #8 + | ldr CARG1, [RA], #8 | subs RB, RB, #8 | str CARG1, [BASE, RC, lsl #3] | add RC, RC, #1 @@ -1982,13 +2033,11 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | ldr L, SAVE_L |1: - | cmp CARG1w, #0 - | blt >9 // Check for error from exit. - | lsl RC, CARG1, #3 + | init_constants + | cmn CARG1w, #LUA_ERRERR + | bhs >9 // Check for error from exit. | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | lsl RC, CARG1, #3 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | str RCw, SAVE_MULTRES | str BASE, L->base @@ -2000,6 +2049,8 @@ static void build_subroutines(BuildCtx *ctx) | ldrb RBw, [PC, # OFS_OP] | ldr INSw, [PC], #4 | st_vmstate CARG4w + | cmn CARG1w, #17 // Static dispatch? + | beq >5 | cmp RBw, #BC_FUNCC+2 // Fast function? | add TMP1, GL, INS, uxtb #3 | bhs >4 @@ -2010,13 +2061,13 @@ static void build_subroutines(BuildCtx *ctx) | decode_RA RA, INS | lsr TMP0, INS, #16 | csel RC, TMP0, RC, lo - | blo >5 + | blo >3 | ldr CARG3, [BASE, FRAME_FUNC] | sub RC, RC, #8 | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |5: - | br RB + |3: + | br_auth RB | |4: // Check frame below fast function. | ldr CARG1, [BASE, FRAME_PC] @@ -2032,10 +2083,21 @@ static void build_subroutines(BuildCtx *ctx) | ldr KBASE, [CARG3, #PC2PROTO(k)] | b <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ldr RA, [GL, #GL_J(trace)] + | decode_RD RC, INS + | ldr TRACE:RA, [RA, RC, lsl #3] + | ldr INSw, TRACE:RA->startins + | add TMP0, GL, INS, uxtb #3 + | decode_RA RA, INS + | ldr RB, [TMP0, #GG_G2DISP+GG_DISP2STATIC] + | decode_RD RC, INS + | br_auth RB + | |9: // Rethrow error from the right C frame. - | neg CARG2, CARG1 + | neg CARG2w, CARG1w | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) + | bl extern lj_err_trace // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- @@ -2064,12 +2126,69 @@ static void build_subroutines(BuildCtx *ctx) |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2w + |.define NEXT_LIM, CARG3w + |.define NEXT_TMP0, TMP0 + |.define NEXT_TMP0w, TMP0w + |.define NEXT_TMP1, TMP1 + |.define NEXT_TMP1w, TMP1w + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY, [sp, #8] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2w. + |->vm_next: + |.if JIT + | ldr NEXT_LIM, NEXT_TAB->asize + | ldr NEXT_TMP1, NEXT_TAB->array + |1: // Traverse array part. + | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM + | bhs >5 // Index points after array part? + | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3] + | cmn NEXT_TMP0, #-LJ_TNIL + | cinc NEXT_IDX, NEXT_IDX, eq + | beq <1 // Skip holes in array part. + | str NEXT_TMP0, NEXT_RES_VAL + | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY + | add NEXT_IDX, NEXT_IDX, #1 + | mov NEXT_RES, NEXT_RES_PTR + |4: + | ret + | + |5: // Traverse hash part. + | ldr NEXT_TMP1w, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP0, NODE:NEXT_RES->val + | cmn NEXT_TMP0, #-LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | bne <4 + | // Skip holes in hash part. + | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | movn NEXT_TMP0, #0 + | str NEXT_TMP0, NEXT_RES_KEY + | mov NEXT_RES, NEXT_RES_PTR + | ret + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- | |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. + |// Saveregs already performed. Callback slot number in w9, g in x10. |->vm_ffi_callback: |.if FFI |.type CTSTATE, CTState, PC @@ -2093,9 +2212,7 @@ static void build_subroutines(BuildCtx *ctx) | bl extern lj_ccallback_enter // (CTState *cts, void *cf) | // Returns lua_State *. | ldp BASE, RC, L:CRET1->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | init_constants | mov L, CRET1 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | sub RC, RC, BASE @@ -2121,21 +2238,22 @@ static void build_subroutines(BuildCtx *ctx) | // Caveat: needs special frame unwinding, see below. |.if FFI | .type CCSTATE, CCallState, x19 - | stp fp, lr, [sp, #-32]! - | add fp, sp, #0 - | str CCSTATE, [sp, #16] + | sp_auth + | stp_unwind CCSTATE, x20, [sp, #-32]! + | stp fp, lr, [sp, #16] + | add fp, sp, #16 | mov CCSTATE, x0 | ldr TMP0w, CCSTATE:x0->spadj | ldrb TMP1w, CCSTATE->nsp | add TMP2, CCSTATE, #offsetof(CCallState, stack) - | subs TMP1, TMP1, #1 + | subs TMP1, TMP1, #8 | ldr TMP3, CCSTATE->func - | sub sp, fp, TMP0 + | sub sp, sp, TMP0 | bmi >2 |1: // Copy stack slots - | ldr TMP0, [TMP2, TMP1, lsl #3] - | str TMP0, [sp, TMP1, lsl #3] - | subs TMP1, TMP1, #1 + | ldr TMP0, [TMP2, TMP1] + | str TMP0, [sp, TMP1] + | subs TMP1, TMP1, #8 | bpl <1 |2: | ldp x0, x1, CCSTATE->gpr[0] @@ -2147,14 +2265,14 @@ static void build_subroutines(BuildCtx *ctx) | ldp x6, x7, CCSTATE->gpr[6] | ldp d6, d7, CCSTATE->fpr[6] | ldr x8, CCSTATE->retp - | blr TMP3 - | mov sp, fp + | blr_auth TMP3 + | sub sp, fp, #16 | stp x0, x1, CCSTATE->gpr[0] | stp d0, d1, CCSTATE->fpr[0] | stp d2, d3, CCSTATE->fpr[2] - | ldr CCSTATE, [sp, #16] - | ldp fp, lr, [sp], #32 - | ret + | ldp fp, lr, [sp, #16] + | ldp_unwind CCSTATE, x20, [sp], #32 + | ret_auth |.endif |// Note: vm_ffi_call must be the last function in this object file! | @@ -2473,7 +2591,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne >5 | negs TMP0w, TMP0w | movz CARG3, #0x41e0, lsl #48 // 2^31. - | add TMP0, TMP0, TISNUM + | add_TISNUM TMP0, TMP0 | csel TMP0, TMP0, CARG3, vc |5: | str TMP0, [BASE, RA, lsl #3] @@ -2488,7 +2606,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne >2 | ldr CARG1w, STR:CARG1->len |1: - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | str CARG1, [BASE, RA, lsl #3] | ins_next | @@ -2575,7 +2693,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.macro ins_arithmod, res, reg1, reg2 | fdiv d2, reg1, reg2 | frintm d2, d2 - | fmsub res, d2, reg2, reg1 + | // Cannot use fmsub, because FMA is not enabled by default. + | fmul d2, d2, reg2 + | fsub res, reg1, d2 |.endmacro | |.macro ins_arithdn, intins, fpins @@ -2594,7 +2714,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | intins CARG1w, CARG1w, CARG2w | ins_arithfallback bvs |.endif - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | str CARG1, [BASE, RA, lsl #3] |4: | ins_next @@ -2687,7 +2807,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KSHORT: | // RA = dst, RC = int16_literal | sxth RCw, RCw - | add TMP0, RC, TISNUM + | add_TISNUM TMP0, RC | str TMP0, [BASE, RA, lsl #3] | ins_next break; @@ -2779,7 +2899,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: // Check if string is white and ensure upvalue is closed. | ldrb TMP0w, UPVAL:CARG1->closed | tst TMP1w, #LJ_GC_WHITES // iswhite(str) - | ccmp TMP0w, #0, #0, ne + | ccmp TMP0w, #0, #4, ne | beq <1 | // Crossed a write barrier. Move the barrier forward. | mov CARG1, GL @@ -2883,7 +3003,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_GGET: | // RA = dst, RC = str_const (~) case BC_GSET: - | // RA = dst, RC = str_const (~) + | // RA = src, RC = str_const (~) | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] | mvn RC, RC | and LFUNC:CARG1, CARG1, #LJ_GCVMASK @@ -2910,7 +3030,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp TMP1w, CARG1w // In array part? | bhs ->vmeta_tgetv | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL + | cmp_nil TMP0 | beq >5 |1: | str TMP0, [BASE, RA, lsl #3] @@ -2941,9 +3061,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash + | ldr TMP2w, STR:RC->sid | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | add TMP1, TMP1, TMP1, lsl #1 | movn CARG4, #~LJ_TSTR | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 @@ -2953,7 +3073,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr NODE:CARG3, NODE:CARG3->next | cmp CARG1, CARG4 | bne >4 - | cmp TMP0, TISNIL + | cmp_nil TMP0 | beq >5 |3: | str TMP0, [BASE, RA, lsl #3] @@ -2962,7 +3082,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |4: // Follow hash chain. | cbnz NODE:CARG3, <1 | // End of hash chain: key not found, nil result. - | mov TMP0, TISNIL + | mov_nil TMP0 | |5: // Check for __index if table value is nil. | ldr TAB:CARG1, TAB:CARG2->metatable @@ -2983,7 +3103,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp RCw, CARG1w // In array part? | bhs ->vmeta_tgetb | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL + | cmp_nil TMP0 | beq >5 |1: | str TMP0, [BASE, RA, lsl #3] @@ -3030,7 +3150,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr TMP1, [CARG3] | ldr TMP0, [BASE, RA, lsl #3] | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? + | cmp_nil TMP1 // Previous value is nil? | beq >5 |1: | str TMP0, [CARG3] @@ -3067,9 +3187,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash + | ldr TMP2w, STR:RC->sid | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | add TMP1, TMP1, TMP1, lsl #1 | movn CARG4, #~LJ_TSTR | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 @@ -3082,7 +3202,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp CARG1, CARG4 | bne >5 | ldr TMP0, [BASE, RA, lsl #3] - | cmp TMP1, TISNIL // Previous value is nil? + | cmp_nil TMP1 // Previous value is nil? | beq >4 |2: | str TMP0, NODE:CARG3->val @@ -3141,7 +3261,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr TMP1, [CARG3] | ldr TMP0, [BASE, RA, lsl #3] | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? + | cmp_nil TMP1 // Previous value is nil? | beq >5 |1: | str TMP0, [CARG3] @@ -3240,9 +3360,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_CALL_Z: | mov RB, BASE // Save old BASE for vmeta_call. | add BASE, BASE, RA, lsl #3 - | ldr CARG3, [BASE] + | ldr CARG3, [BASE], #16 | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #16 | checkfunc CARG3, ->vmeta_call | ins_call break; @@ -3258,9 +3377,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base, (RB = 0,) RC = (nargs+1)*8 |->BC_CALLT1_Z: | add RA, BASE, RA, lsl #3 - | ldr TMP1, [RA] + | ldr TMP1, [RA], #16 | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #16 | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt | ldr PC, [BASE, FRAME_PC] |->BC_CALLT2_Z: @@ -3320,10 +3438,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop |.endif + |->vm_IITERN: + | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] | ldrh TMP3w, [PC, # OFS_RD] @@ -3339,10 +3458,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add CARG3, CARG2, CARG1, lsl #3 | bhs >5 // Index points after array part? | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL + | cmp_nil TMP0 | cinc CARG1, CARG1, eq // Skip holes in array part. | beq <1 - | add CARG1, CARG1, TISNUM + | add_TISNUM CARG1, CARG1 | stp CARG1, TMP0, [RA] | add CARG1, CARG1, #1 |3: @@ -3360,7 +3479,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 | bhi <4 | ldp TMP0, CARG1, NODE:CARG3->val - | cmp TMP0, TISNIL + | cmp_nil TMP0 | add RC, RC, #1 | beq <6 // Skip holes in hash part. | stp CARG1, TMP0, [RA] @@ -3378,11 +3497,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checkfunc CFUNC:CARG1, >5 | asr TMP0, TAB:CARG3, #47 | ldrb TMP1w, CFUNC:CARG1->ffid - | cmn TMP0, #-LJ_TTAB - | ccmp CARG4, TISNIL, #0, eq + | cmp_nil CARG4 + | ccmn TMP0, #-LJ_TTAB, #0, eq | ccmp TMP1w, #FF_next_N, #0, eq | bne >5 - | mov TMP0w, #0xfffe7fff + | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX | lsl TMP0, TMP0, #32 | str TMP0, [RA, #-8] // Initialize control var. |1: @@ -3390,11 +3509,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next | |5: // Despecialize bytecode if any of the checks fail. + |.if JIT + | ldrb TMP2w, [RC, # OFS_OP] + |.endif | mov TMP0, #BC_JMP | mov TMP1, #BC_ITERC | strb TMP0w, [PC, #-4+OFS_OP] + |.if JIT + | cmp TMP2w, #BC_ITERN + | bne >6 + |.endif | strb TMP1w, [RC, # OFS_OP] | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | ldr RA, [GL, #GL_J(trace)] + | ldrh TMP2w, [RC, # OFS_RD] + | ldr TRACE:RA, [RA, TMP2, lsl #3] + | ldr TMP2w, TRACE:RA->startins + | bfxil TMP2w, TMP1w, #0, #8 + | str TMP2w, [RC] + | b <1 + |.endif break; case BC_VARG: @@ -3402,51 +3538,51 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | and RC, RC, #255 | // RA = base, RB = (nresults+1), RC = numparams | ldr TMP1, [BASE, FRAME_PC] - | add RC, BASE, RC, lsl #3 - | add RA, BASE, RA, lsl #3 - | add RC, RC, #FRAME_VARG - | add TMP2, RA, RB, lsl #3 - | sub RC, RC, TMP1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | add TMP0, BASE, RC, lsl #3 + | add RC, BASE, RA, lsl #3 // RC = destination + | add TMP0, TMP0, #FRAME_VARG + | add TMP2, RC, RB, lsl #3 + | sub RA, TMP0, TMP1 // RA = vbase + | // Note: RA may now be even _above_ BASE if nargs was < numparams. | sub TMP3, BASE, #16 // TMP3 = vtop | cbz RB, >5 | sub TMP2, TMP2, #16 |1: // Copy vararg slots to destination slots. - | cmp RC, TMP3 - | ldr TMP0, [RC], #8 - | csel TMP0, TMP0, TISNIL, lo - | cmp RA, TMP2 - | str TMP0, [RA], #8 + | cmp RA, TMP3 + | ldr TMP0, [RA], #8 + | csinv TMP0, TMP0, xzr, lo // TISNIL = ~xzr + | cmp RC, TMP2 + | str TMP0, [RC], #8 | blo <1 |2: | ins_next | |5: // Copy all varargs. | ldr TMP0, L->maxstack - | subs TMP2, TMP3, RC + | subs TMP2, TMP3, RA | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 | add RB, RB, #8 - | add TMP1, RA, TMP2 + | add TMP1, RC, TMP2 | str RBw, SAVE_MULTRES | ble <2 // Nothing to copy. | cmp TMP1, TMP0 | bhi >7 |6: - | ldr TMP0, [RC], #8 - | str TMP0, [RA], #8 - | cmp RC, TMP3 + | ldr TMP0, [RA], #8 + | str TMP0, [RC], #8 + | cmp RA, TMP3 | blo <6 | b <2 | |7: // Grow stack for varargs. | lsr CARG2, TMP2, #3 - | stp BASE, RA, L->base + | stp BASE, RC, L->base | mov CARG1, L - | sub RC, RC, BASE // Need delta, because BASE may change. + | sub RA, RA, BASE // Need delta, because BASE may change. | str PC, SAVE_PC | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RA, L->base - | add RC, BASE, RC + | ldp BASE, RC, L->base + | add RA, BASE, RA | sub TMP3, BASE, #16 | b <6 break; @@ -3590,7 +3726,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } else { | adds CARG1w, CARG1w, CARG3w | bvs >2 - | add TMP0, CARG1, TISNUM + | add_TISNUM TMP0, CARG1 | tbnz CARG3w, #31, >4 | cmp CARG1w, CARG2w } @@ -3669,7 +3805,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base, RC = target | ldr CARG1, [BASE, RA, lsl #3] | add TMP1, BASE, RA, lsl #3 - | cmp CARG1, TISNIL + | cmp_nil CARG1 | beq >1 // Stop if iterator returned nil. if (op == BC_JITERL) { | str CARG1, [TMP1, #-8] @@ -3702,15 +3838,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if JIT | // RA = base (ignored), RC = traceno | ldr CARG1, [GL, #GL_J(trace)] - | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. + | st_vmstate wzr // Traces on ARM64 don't store the trace #, so use 0. | ldr TRACE:RC, [CARG1, RC, lsl #3] - | st_vmstate CARG2w + |.if PAUTH + | ldr RA, TRACE:RC->mcauth + |.else | ldr RA, TRACE:RC->mcode + |.endif | str BASE, GL->jit_base | str L, GL->tmpbuf.L | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. + |.if PAUTH + | braa RA, RC + |.else | br RA |.endif + |.endif break; case BC_JMP: @@ -3771,6 +3914,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add TMP2, BASE, RC | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | add RA, RA, RC + | sub CARG1, CARG1, #8 | add TMP0, RC, #16+FRAME_VARG | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. | ldr KBASE, [PC, #-4+PC2PROTO(k)] @@ -3820,7 +3964,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov CARG1, L | bhi ->vm_growstack_c // Need to grow stack. | st_vmstate TMP0w - | blr CARG4 // (lua_State *L [, lua_CFunction f]) + | blr_auth CARG4 // (lua_State *L [, lua_CFunction f]) | // Returns nresults. | ldp BASE, TMP1, L->base | str L, GL->cur_L @@ -3859,7 +4003,7 @@ static int build_backend(BuildCtx *ctx) static void emit_asm_debug(BuildCtx *ctx) { int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i, cf = CFRAME_SIZE >> 3; + int i; switch (ctx->mode) { case BUILD_elfasm: fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); @@ -3873,7 +4017,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.uleb128 0x1\n" "\t.sleb128 -8\n" "\t.byte 30\n" /* Return address is in lr. */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ "\t.align 3\n" ".LECIE0:\n\n"); fprintf(ctx->fp, @@ -3883,15 +4027,14 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long .Lframe0\n" "\t.quad .Lbegin\n" "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + fcofs); for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); for (i = 8; i <= 15; i++) /* offset d8-d15 */ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); + 64+i, i+(3+(28-19+1)-8)); fprintf(ctx->fp, "\t.align 3\n" ".LEFDE0:\n\n"); @@ -3903,13 +4046,14 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long .Lframe0\n" "\t.quad lj_vm_ffi_call\n" "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ "\t.align 3\n" ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif +#if !LJ_NO_UNWIND fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); fprintf(ctx->fp, ".Lframe1:\n" @@ -3925,7 +4069,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0x1b\n" /* pcrel|sdata4 */ "\t.long lj_err_unwind_dwarf-.\n" "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ "\t.align 3\n" ".LECIE1:\n\n"); fprintf(ctx->fp, @@ -3936,15 +4080,14 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long .Lbegin-.\n" "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + fcofs); for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); for (i = 8; i <= 15; i++) /* offset d8-d15 */ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); + 64+i, i+(3+(28-19+1)-8)); fprintf(ctx->fp, "\t.align 3\n" ".LEFDE2:\n\n"); @@ -3961,7 +4104,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 30\n" /* Return address is in lr. */ "\t.uleb128 1\n" /* augmentation length */ "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ "\t.align 3\n" ".LECIE2:\n\n"); fprintf(ctx->fp, @@ -3972,14 +4115,106 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long lj_vm_ffi_call-.\n" "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ "\t.align 3\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif + break; +#if !LJ_NO_UNWIND + case BUILD_machasm: { +#if LJ_HASFFI + int fcsize = 0; +#endif + int j; + fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); + fprintf(ctx->fp, + "EH_frame1:\n" + "\t.set L$set$x,LECIEX-LSCIEX\n" + "\t.long L$set$x\n" + "LSCIEX:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zPR\\0\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ + "\t.long _lj_err_unwind_dwarf@GOT-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + "LECIEX:\n\n"); + for (j = 0; j < ctx->nsym; j++) { + const char *name = ctx->sym[j].name; + int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs; + if (size == 0) continue; +#if LJ_HASFFI + if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } +#endif + fprintf(ctx->fp, + "LSFDE%d:\n" + "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" + "\t.long L$set$%d\n" + "LASFDE%d:\n" + "\t.long LASFDE%d-EH_frame1\n" + "\t.long %s-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + j, j, j, j, j, j, j, name, size); + for (i = 19; i <= 28; i++) /* offset x19-x28 */ + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); + for (i = 8; i <= 15; i++) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", + 64+i, i+(3+(28-19+1)-8)); + fprintf(ctx->fp, + "\t.align 3\n" + "LEFDE%d:\n\n", j); + } +#if LJ_HASFFI + if (fcsize) { + fprintf(ctx->fp, + "EH_frame2:\n" + "\t.set L$set$y,LECIEY-LSCIEY\n" + "\t.long L$set$y\n" + "LSCIEY:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zR\\0\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + "LECIEY:\n\n"); + fprintf(ctx->fp, + "LSFDEY:\n" + "\t.set L$set$yy,LEFDEY-LASFDEY\n" + "\t.long L$set$yy\n" + "LASFDEY:\n" + "\t.long LASFDEY-EH_frame2\n" + "\t.long _lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.align 3\n" + "LEFDEY:\n\n", fcsize); + } +#endif + } break; +#endif default: break; } diff --git a/source/libs/luajit/LuaJIT-src/src/vm_mips.dasc b/source/libs/luajit/LuaJIT-src/src/vm_mips.dasc index 1afd61187a75e47ab52b3eadd29582b4c06817ae..02e588eebdc8ee0d622ca05a72a1ce62a34e01ac 100644 --- a/source/libs/luajit/LuaJIT-src/src/vm_mips.dasc +++ b/source/libs/luajit/LuaJIT-src/src/vm_mips.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for MIPS CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h |// |// MIPS soft-float support contributed by Djordje Kovacevic and |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. @@ -190,7 +190,7 @@ |//----------------------------------------------------------------------- | |// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro +|.macro NYI; .long 0xec1cf0f0; .endmacro | |// Macros to mark delay slots. |.macro ., a; a; .endmacro @@ -399,7 +399,7 @@ static void build_subroutines(BuildCtx *ctx) | xori AT, TMP0, FRAME_C | and TMP2, PC, TMP2 | bnez AT, ->vm_returnp - | subu TMP2, BASE, TMP2 // TMP2 = previous base. + |. subu TMP2, BASE, TMP2 // TMP2 = previous base. | | addiu TMP1, RD, -8 | sw TMP2, L->base @@ -501,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_returnc |. li RD, 16 // 2 results: false + error message. | + |->vm_unwind_stub: // Jump to exit stub from unwinder. + | jr CARG1 + |. move ra, CARG2 + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//----------------------------------------------------------------------- @@ -669,11 +673,11 @@ static void build_subroutines(BuildCtx *ctx) |.endif | lw PC, -16+HI(RB) // Restore PC from [cont|PC]. | addu TMP2, RA, RD - | lw TMP1, LFUNC:TMP1->pc |.if FFI | bnez AT, >1 |.endif |. sw TISNIL, -8+HI(TMP2) // Ensure one valid arg. + | lw TMP1, LFUNC:TMP1->pc | // BASE = base, RA = resultptr, RB = meta base | jr TMP0 // Jump to continuation. |. lw KBASE, PC2PROTO(k)(TMP1) @@ -1152,9 +1156,9 @@ static void build_subroutines(BuildCtx *ctx) |. li SFARG1HI, LJ_TNIL | lw TMP0, TAB:SFARG1LO->hmask | li SFARG1HI, LJ_TTAB // Use metatable as default result. - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:SFARG1LO->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -1258,35 +1262,27 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: iterators ------------------------------------------- | |.ffunc next - | lw CARG1, HI(BASE) - | lw TAB:CARG2, LO(BASE) + | lw CARG2, HI(BASE) + | lw TAB:CARG1, LO(BASE) | beqz NARGS8:RC, ->fff_fallback |. addu TMP2, BASE, NARGS8:RC | li AT, LJ_TTAB | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. - | bne CARG1, AT, ->fff_fallback + | bne CARG2, AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) | load_got lj_tab_next - | sw BASE, L->base // Add frame since C call can throw. - | sw BASE, L->top // Dummy frame length is ok. - | addiu CARG3, BASE, 8 - | sw PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. + | addiu CARG2, BASE, 8 + | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + |. addiu CARG3, BASE, -8 + | // Returns 1=found, 0=end, -1=error. + | addiu RA, BASE, -8 + | bgtz CRET1, ->fff_res // Found key/value. + |. li RD, (2+1)*8 | beqz CRET1, ->fff_restv // End of traversal: return nil. |. li SFARG1HI, LJ_TNIL - | lw TMP0, 8+HI(BASE) - | lw TMP1, 8+LO(BASE) - | addiu RA, BASE, -8 - | lw TMP2, 16+HI(BASE) - | lw TMP3, 16+LO(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | sw TMP2, 8+HI(RA) - | sw TMP3, 8+LO(RA) - | b ->fff_res - |. li RD, (2+1)*8 + | lw CFUNC:RB, FRAME_FUNC(BASE) + | b ->fff_fallback // Invalid key. + |. li RC, 2*8 | |.ffunc_1 pairs | li AT, LJ_TTAB @@ -1378,9 +1374,13 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | lw TMP1, L->maxstack + | addu TMP2, BASE, NARGS8:RC | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | beqz NARGS8:RC, ->fff_fallback - | move TMP2, BASE + |. sltu AT, TMP1, TMP2 + | bnez AT, ->fff_fallback + |. move TMP2, BASE | addiu BASE, BASE, 8 | // Remember active hook before pcall. | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT @@ -1390,8 +1390,12 @@ static void build_subroutines(BuildCtx *ctx) |. addiu NARGS8:RC, NARGS8:RC, -8 | |.ffunc xpcall + | lw TMP1, L->maxstack + | addu TMP2, BASE, NARGS8:RC | sltiu AT, NARGS8:RC, 16 | lw CARG4, 8+HI(BASE) + | sltu TMP1, TMP1, TMP2 + | or AT, AT, TMP1 | bnez AT, ->fff_fallback |. lw CARG3, 8+LO(BASE) | lw CARG1, LO(BASE) @@ -1768,7 +1772,7 @@ static void build_subroutines(BuildCtx *ctx) | b ->fff_res |. li RD, (2+1)*8 | - |.macro math_minmax, name, intins, fpins + |.macro math_minmax, name, intins, ismax | .ffunc_1 name | addu TMP3, BASE, NARGS8:RC | bne SFARG1HI, TISNUM, >5 @@ -1822,13 +1826,21 @@ static void build_subroutines(BuildCtx *ctx) |.endif |7: |.if FPU + |.if ismax + | c.olt.d FARG1, FRET1 + |.else | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 + |.endif + | movf.d FRET1, FARG1 + |.else + |.if ismax + | bal ->vm_sfcmpogt |.else | bal ->vm_sfcmpolt + |.endif |. nop - | intins SFARG1LO, SFARG2LO, CRET1 - | intins SFARG1HI, SFARG2HI, CRET1 + | movz SFARG1LO, SFARG2LO, CRET1 + | movz SFARG1HI, SFARG2HI, CRET1 |.endif | b <6 |. addiu TMP2, TMP2, 8 @@ -1849,8 +1861,8 @@ static void build_subroutines(BuildCtx *ctx) | |.endmacro | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d + | math_minmax math_min, movz, 0 + | math_minmax math_max, movn, 1 | |//-- String library ----------------------------------------------------- | @@ -1959,7 +1971,7 @@ static void build_subroutines(BuildCtx *ctx) | lw TMP0, SBUF:CARG1->b | sw L, SBUF:CARG1->L | sw BASE, L->base - | sw TMP0, SBUF:CARG1->p + | sw TMP0, SBUF:CARG1->w | call_intern extern lj_buf_putstr_ .. name |. sw PC, SAVE_PC | load_got lj_buf_tostr @@ -2462,7 +2474,8 @@ static void build_subroutines(BuildCtx *ctx) | addiu DISPATCH, JGL, -GG_DISP2G-32768 | sw BASE, L->base |1: - | bltz CRET1, >9 // Check for error from exit. + | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. + | beqz TMP0, >9 |. lw LFUNC:RB, FRAME_FUNC(BASE) | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | sll MULTRES, CRET1, 3 @@ -2476,14 +2489,16 @@ static void build_subroutines(BuildCtx *ctx) | .FPU cvt.d.s TOBIT, TOBIT | // Modified copy of ins_next which handles function header dispatch, too. | lw INS, 0(PC) - | addiu PC, PC, 4 + | addiu CRET1, CRET1, 17 // Static dispatch? | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_RD8a RD, INS + | beqz CRET1, >5 + |. addiu PC, PC, 4 | decode_OP4a TMP1, INS | decode_OP4b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*4 | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS + | sltiu TMP2, TMP1, BC_FUNCF*4 | lw AT, 0(TMP0) | decode_RA8a RA, INS | beqz TMP2, >2 @@ -2511,10 +2526,26 @@ static void build_subroutines(BuildCtx *ctx) | jr AT |. addu RA, RA, BASE | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | lw TMP0, DISPATCH_J(trace)(DISPATCH) + | decode_RD4b RD + | addu TMP0, TMP0, RD + | lw TRACE:TMP2, 0(TMP0) + | lw INS, TRACE:TMP2->startins + | decode_OP4a TMP1, INS + | decode_OP4b TMP1 + | addu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | lw AT, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | jr AT + |. decode_RA8b RA + | |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) + | load_got lj_err_trace + | sub CARG2, r0, CRET1 + | call_intern lj_err_trace // (lua_State *L, int errcode) |. move CARG1, L |.endif | @@ -2692,6 +2723,43 @@ static void build_subroutines(BuildCtx *ctx) |. move CRET1, CRET2 |.endif | + |->vm_sfcmpogt: + |.if not FPU + | sll AT, SFARG2HI, 1 + | sll TMP0, SFARG1HI, 1 + | or CRET1, SFARG2LO, SFARG1LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 0. + |. sltu CRET1, r0, SFARG2LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG1LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, SFARG2HI, SFARG1HI + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |5: // Swap conditions if both operands are negative. + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG1LO, SFARG2LO + | jr ra + |. slt CRET1, SFARG1HI, SFARG2HI + |8: + | jr ra + |. nop + |9: + | jr ra + |. li CRET1, 0 + |.endif + | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. |->vm_sfcmpolex: @@ -2734,29 +2802,96 @@ static void build_subroutines(BuildCtx *ctx) |. li CRET1, 0 |.endif | - |.macro sfmin_max, name, intins + |.macro sfmin_max, name, fpcall |->vm_sf .. name: |.if JIT and not FPU | move TMP2, ra - | bal ->vm_sfcmpolt + | bal ->fpcall |. nop | move TMP0, CRET1 | move SFRETHI, SFARG1HI | move SFRETLO, SFARG1LO | move ra, TMP2 - | intins SFRETHI, SFARG2HI, TMP0 + | movz SFRETHI, SFARG2HI, TMP0 | jr ra - |. intins SFRETLO, SFARG2LO, TMP0 + |. movz SFRETLO, SFARG2LO, TMP0 |.endif |.endmacro | - | sfmin_max min, movz - | sfmin_max max, movn + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, r12 + |.define NEXT_TMP1, r13 + |.define NEXT_TMP2, r14 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL_I, 0(sp) + |.define NEXT_RES_VAL_IT, 4(sp) + |.define NEXT_RES_KEY_I, 8(sp) + |.define NEXT_RES_KEY_IT, 12(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT and ENDIAN_LE + | lw NEXT_ASIZE, NEXT_TAB->asize + | lw NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | sltu AT, NEXT_IDX, NEXT_ASIZE + | sll NEXT_TMP1, NEXT_IDX, 3 + | beqz AT, >5 + |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | lw NEXT_TMP2, 4(NEXT_TMP1) + | sw NEXT_IDX, NEXT_RES_KEY_I + | beq NEXT_TMP2, NEXT_NIL, <1 + |. addiu NEXT_IDX, NEXT_IDX, 1 + | lw NEXT_TMP0, 0(NEXT_TMP1) + | li AT, LJ_TISNUM + | sw NEXT_TMP2, NEXT_RES_VAL_IT + | sw AT, NEXT_RES_KEY_IT + | sw NEXT_TMP0, NEXT_RES_VAL_I + | move NEXT_RES_VK, NEXT_RES_PTR + | jr ra + |. move NEXT_RES_IDX, NEXT_IDX + | + |5: // Traverse hash part. + | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | lw NODE:NEXT_RES_VK, NEXT_TAB->node + | sll NEXT_TMP2, NEXT_RES_IDX, 5 + | lw NEXT_TMP0, NEXT_TAB->hmask + | sll AT, NEXT_RES_IDX, 3 + | subu AT, NEXT_TMP2, AT + | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT + |6: + | sltu AT, NEXT_TMP0, NEXT_RES_IDX + | bnez AT, >8 + |. nop + | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it + | bne NEXT_TMP2, NEXT_NIL, >9 + |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 + | // Skip holes in hash part. + | b <6 + |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | + |8: // End of iteration. Set the key to nil (not the value). + | sw NEXT_NIL, NEXT_RES_KEY_IT + | move NEXT_RES_VK, NEXT_RES_PTR + |9: + | jr ra + |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -2824,7 +2959,6 @@ static void build_subroutines(BuildCtx *ctx) | move TMP2, sp | subu sp, sp, TMP1 | sw ra, -4(TMP2) - | sll CARG2, CARG2, 2 | sw r16, -8(TMP2) | sw CCSTATE, -12(TMP2) | move r16, TMP2 @@ -3984,9 +4118,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -4158,10 +4292,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:RB->node | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -4317,7 +4451,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next2 | |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 + | barrierback TAB:CARG2, TMP3, CRET1, <2 break; case BC_TSETM: @@ -4480,10 +4614,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. + |.if JIT and ENDIAN_LE + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | addu RA, BASE, RA | lw TAB:RB, -16+LO(RA) | lw RC, -8+LO(RA) // Get index from control var. @@ -4562,9 +4697,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addiu CARG2, CARG2, -FF_next_N | or CARG2, CARG2, CARG3 | bnez CARG2, >5 - |. lui TMP1, 0xfffe + |. lui TMP1, (LJ_KEYINDEX >> 16) | addu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff + | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) | sw r0, -8+LO(RA) // Initialize control var. | sw TMP1, -8+HI(RA) |1: @@ -4573,9 +4708,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP3, BC_JMP | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 + | addu PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li AT, BC_ITERN + | bne TMP0, AT, >6 + |. lhu TMP2, OFS_RD(PC) + |.endif | b <1 |. sb TMP1, OFS_OP(PC) + |.if JIT + |6: // Unpatch JLOOP. + | lw TMP0, DISPATCH_J(trace)(DISPATCH) + | sll TMP2, TMP2, 2 + | addu TMP0, TMP0, TMP2 + | lw TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | li AT, -256 + | and TMP0, TMP0, AT + | or TMP0, TMP0, TMP1 + | b <1 + |. sw TMP0, 0(PC) + |.endif break; case BC_VARG: diff --git a/source/libs/luajit/LuaJIT-src/src/vm_mips64.dasc b/source/libs/luajit/LuaJIT-src/src/vm_mips64.dasc index c06270a0afc4f188d4165594811be872e967405b..859c0aee84825b213349dad9cefcb3f11192d062 100644 --- a/source/libs/luajit/LuaJIT-src/src/vm_mips64.dasc +++ b/source/libs/luajit/LuaJIT-src/src/vm_mips64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for MIPS64 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h |// |// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. |// Sponsored by Cisco Systems, Inc. @@ -83,6 +83,10 @@ | |.define FRET1, f0 |.define FRET2, f2 +| +|.define FTMP0, f20 +|.define FTMP1, f21 +|.define FTMP2, f22 |.endif | |// Stack layout while in interpreter. Must match with lj_frame.h. @@ -189,7 +193,7 @@ |//----------------------------------------------------------------------- | |// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro +|.macro NYI; .long 0xec1cf0f0; .endmacro | |// Macros to mark delay slots. |.macro ., a; a; .endmacro @@ -310,10 +314,10 @@ |.endmacro | |// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) +#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | @@ -492,8 +496,15 @@ static void build_subroutines(BuildCtx *ctx) |7: // Less results wanted. | subu TMP0, RD, TMP2 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. + |.if MIPSR6 + | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? + | seleqz BASE, BASE, TMP2 + | b <3 + |. or BASE, BASE, TMP0 + |.else | b <3 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? + |.endif | |8: // Corner case: need to grow stack for filling up results. | // This can happen if: @@ -545,6 +556,10 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_returnc |. li RD, 16 // 2 results: false + error message. | + |->vm_unwind_stub: // Jump to exit stub from unwinder. + | jr CARG1 + |. move ra, CARG2 + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//----------------------------------------------------------------------- @@ -713,11 +728,11 @@ static void build_subroutines(BuildCtx *ctx) | ld PC, -24(RB) // Restore PC from [cont|PC]. | cleartp LFUNC:TMP1 | daddu TMP2, RA, RD - | ld TMP1, LFUNC:TMP1->pc |.if FFI | bnez AT, >1 |.endif |. sd TISNIL, -8(TMP2) // Ensure one valid arg. + | ld TMP1, LFUNC:TMP1->pc | // BASE = base, RA = resultptr, RB = meta base | jr TMP0 // Jump to continuation. |. ld KBASE, PC2PROTO(k)(TMP1) @@ -1121,11 +1136,16 @@ static void build_subroutines(BuildCtx *ctx) |.endmacro | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! + |// MIPSR6: no delay slot, but a forbidden slot. |.macro ffgccheck | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | dsubu AT, TMP0, TMP1 + |.if MIPSR6 + | bgezalc AT, ->fff_gcstep + |.else | bgezal AT, ->fff_gcstep + |.endif |.endmacro | |//-- Base library: checks ----------------------------------------------- @@ -1153,7 +1173,13 @@ static void build_subroutines(BuildCtx *ctx) | sltu TMP1, TISNUM, TMP0 | not TMP2, TMP0 | li TMP3, ~LJ_TISNUM + |.if MIPSR6 + | selnez TMP2, TMP2, TMP1 + | seleqz TMP3, TMP3, TMP1 + | or TMP2, TMP2, TMP3 + |.else | movz TMP2, TMP3, TMP1 + |.endif | dsll TMP2, TMP2, 3 | daddu TMP2, CFUNC:RB, TMP2 | b ->fff_restv @@ -1165,7 +1191,11 @@ static void build_subroutines(BuildCtx *ctx) | gettp TMP2, CARG1 | daddiu TMP0, TMP2, -LJ_TTAB | daddiu TMP1, TMP2, -LJ_TUDATA + |.if MIPSR6 + | selnez TMP0, TMP1, TMP0 + |.else | movn TMP0, TMP1, TMP0 + |.endif | bnez TMP0, >6 |. cleartp TAB:CARG1 |1: // Field metatable must be at same offset for GCtab and GCudata! @@ -1175,9 +1205,9 @@ static void build_subroutines(BuildCtx *ctx) | beqz TAB:RB, ->fff_restv |. li CARG1, LJ_TNIL | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | dsll TMP0, TMP1, 5 | dsll TMP1, TMP1, 3 | dsubu TMP1, TMP0, TMP1 @@ -1204,7 +1234,13 @@ static void build_subroutines(BuildCtx *ctx) | |6: | sltiu AT, TMP2, LJ_TISNUM + |.if MIPSR6 + | selnez TMP0, TISNUM, AT + | seleqz AT, TMP2, AT + | or TMP2, TMP0, AT + |.else | movn TMP2, TISNUM, AT + |.endif | dsll TMP2, TMP2, 3 | dsubu TMP0, DISPATCH, TMP2 | b <2 @@ -1266,8 +1302,13 @@ static void build_subroutines(BuildCtx *ctx) | or TMP0, TMP0, TMP1 | bnez TMP0, ->fff_fallback |. sd BASE, L->base // Add frame since C call can throw. + |.if MIPSR6 + | sd PC, SAVE_PC // Redundant (but a defined value). + | ffgccheck + |.else | ffgccheck |. sd PC, SAVE_PC // Redundant (but a defined value). + |.endif | load_got lj_strfmt_number | move CARG1, L | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) @@ -1281,27 +1322,24 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: iterators ------------------------------------------- | |.ffunc_1 next - | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback + | checktp CARG1, -LJ_TTAB, ->fff_fallback | daddu TMP2, BASE, NARGS8:RC | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. - | ld PC, FRAME_PC(BASE) | load_got lj_tab_next - | sd BASE, L->base // Add frame since C call can throw. - | sd BASE, L->top // Dummy frame length is ok. - | daddiu CARG3, BASE, 8 - | sd PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. + | ld PC, FRAME_PC(BASE) + | daddiu CARG2, BASE, 8 + | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + |. daddiu CARG3, BASE, -16 + | // Returns 1=found, 0=end, -1=error. + | daddiu RA, BASE, -16 + | bgtz CRET1, ->fff_res // Found key/value. + |. li RD, (2+1)*8 | beqz CRET1, ->fff_restv // End of traversal: return nil. |. move CARG1, TISNIL - | ld TMP0, 8(BASE) - | daddiu RA, BASE, -16 - | ld TMP2, 16(BASE) - | sd TMP0, 0(RA) - | sd TMP2, 8(RA) - | b ->fff_res - |. li RD, (2+1)*8 + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | b ->fff_fallback // Invalid key. + |. li RC, 2*8 | |.ffunc_1 pairs | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback @@ -1377,10 +1415,15 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall - | daddiu NARGS8:RC, NARGS8:RC, -8 - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | bltz NARGS8:RC, ->fff_fallback + | ld TMP1, L->maxstack + | daddu TMP2, BASE, NARGS8:RC + | sltu AT, TMP1, TMP2 + | bnez AT, ->fff_fallback + |. lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | daddiu NARGS8:TMP0, NARGS8:RC, -8 + | bltz NARGS8:TMP0, ->fff_fallback |. move TMP2, BASE + | move NARGS8:RC, NARGS8:TMP0 | daddiu BASE, BASE, 16 | // Remember active hook before pcall. | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT @@ -1399,15 +1442,20 @@ static void build_subroutines(BuildCtx *ctx) |. nop | |.ffunc xpcall - | daddiu NARGS8:RC, NARGS8:RC, -16 - | ld CARG1, 0(BASE) + | ld TMP1, L->maxstack + | daddu TMP2, BASE, NARGS8:RC + | sltu AT, TMP1, TMP2 + | bnez AT, ->fff_fallback + |. ld CARG1, 0(BASE) + | daddiu NARGS8:TMP0, NARGS8:RC, -16 | ld CARG2, 8(BASE) - | bltz NARGS8:RC, ->fff_fallback + | bltz NARGS8:TMP0, ->fff_fallback |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | gettp AT, CARG2 | daddiu AT, AT, -LJ_TFUNC | bnez AT, ->fff_fallback // Traceback must be a function. |. move TMP2, BASE + | move NARGS8:RC, NARGS8:TMP0 | daddiu BASE, BASE, 24 | // Remember active hook before pcall. | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT @@ -1437,8 +1485,15 @@ static void build_subroutines(BuildCtx *ctx) | addiu AT, TMP0, -LUA_YIELD | daddu CARG3, CARG2, TMP0 | daddiu TMP3, CARG2, 8 + |.if MIPSR6 + | seleqz CARG2, CARG2, AT + | selnez TMP3, TMP3, AT + | bgtz AT, ->fff_fallback // st > LUA_YIELD? + |. or CARG2, TMP3, CARG2 + |.else | bgtz AT, ->fff_fallback // st > LUA_YIELD? |. movn CARG2, TMP3, AT + |.endif | xor TMP2, TMP2, CARG3 | bnez TMP1, ->fff_fallback // cframe != 0? |. or AT, TMP2, TMP0 @@ -1750,7 +1805,7 @@ static void build_subroutines(BuildCtx *ctx) | b ->fff_res |. li RD, (2+1)*8 | - |.macro math_minmax, name, intins, fpins + |.macro math_minmax, name, intins, intinsc, fpins | .ffunc_1 name | daddu TMP3, BASE, NARGS8:RC | checkint CARG1, >5 @@ -1762,7 +1817,13 @@ static void build_subroutines(BuildCtx *ctx) |. sextw CARG1, CARG1 | lw CARG2, LO(TMP2) |. slt AT, CARG1, CARG2 + |.if MIPSR6 + | intins TMP1, CARG2, AT + | intinsc CARG1, CARG1, AT + | or CARG1, CARG1, TMP1 + |.else | intins CARG1, CARG2, AT + |.endif | daddiu TMP2, TMP2, 8 | zextw CARG1, CARG1 | b <1 @@ -1798,12 +1859,30 @@ static void build_subroutines(BuildCtx *ctx) |. nop |7: |.if FPU + |.if MIPSR6 + | fpins FRET1, FRET1, FARG1 + |.else + |.if fpins // ismax + | c.olt.d FARG1, FRET1 + |.else | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 + |.endif + | movf.d FRET1, FARG1 + |.endif + |.else + |.if fpins // ismax + | bal ->vm_sfcmpogt |.else | bal ->vm_sfcmpolt + |.endif |. nop - | intins CARG1, CARG2, CRET1 + |.if MIPSR6 + | seleqz AT, CARG2, CRET1 + | selnez CARG1, CARG1, CRET1 + | or CARG1, CARG1, AT + |.else + | movz CARG1, CARG2, CRET1 + |.endif |.endif | b <6 |. daddiu TMP2, TMP2, 8 @@ -1824,8 +1903,13 @@ static void build_subroutines(BuildCtx *ctx) | |.endmacro | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d + |.if MIPSR6 + | math_minmax math_min, seleqz, selnez, min.d + | math_minmax math_max, selnez, seleqz, max.d + |.else + | math_minmax math_min, movz, _, 0 + | math_minmax math_max, movn, _, 1 + |.endif | |//-- String library ----------------------------------------------------- | @@ -1850,7 +1934,9 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc string_char // Only handle the 1-arg case here. | ffgccheck + |.if not MIPSR6 |. nop + |.endif | ld CARG1, 0(BASE) | gettp TMP0, CARG1 | xori AT, NARGS8:RC, 8 // Exactly 1 argument. @@ -1880,7 +1966,9 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc string_sub | ffgccheck + |.if not MIPSR6 |. nop + |.endif | addiu AT, NARGS8:RC, -16 | ld TMP0, 0(BASE) | bltz AT, ->fff_fallback @@ -1903,8 +1991,30 @@ static void build_subroutines(BuildCtx *ctx) | addiu TMP0, CARG2, 1 | addu TMP1, CARG4, TMP0 | slt TMP3, CARG3, r0 + |.if MIPSR6 + | seleqz CARG4, CARG4, AT + | selnez TMP1, TMP1, AT + | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1 + |.else | movn CARG4, TMP1, AT // if (end < 0) end += len+1 + |.endif | addu TMP1, CARG3, TMP0 + |.if MIPSR6 + | selnez TMP1, TMP1, TMP3 + | seleqz CARG3, CARG3, TMP3 + | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1 + | li TMP2, 1 + | slt AT, CARG4, r0 + | slt TMP3, r0, CARG3 + | seleqz CARG4, CARG4, AT // if (end < 0) end = 0 + | selnez CARG3, CARG3, TMP3 + | seleqz TMP2, TMP2, TMP3 + | or CARG3, TMP2, CARG3 // if (start < 1) start = 1 + | slt AT, CARG2, CARG4 + | seleqz CARG4, CARG4, AT + | selnez CARG2, CARG2, AT + | or CARG4, CARG2, CARG4 // if (end > len) end = len + |.else | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 | li TMP2, 1 | slt AT, CARG4, r0 @@ -1913,6 +2023,7 @@ static void build_subroutines(BuildCtx *ctx) | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 | slt AT, CARG2, CARG4 | movn CARG4, CARG2, AT // if (end > len) end = len + |.endif | daddu CARG2, STR:CARG1, CARG3 | subu CARG3, CARG4, CARG3 // len = end - start | daddiu CARG2, CARG2, sizeof(GCstr)-1 @@ -1936,7 +2047,7 @@ static void build_subroutines(BuildCtx *ctx) | ld TMP0, SBUF:CARG1->b | sd L, SBUF:CARG1->L | sd BASE, L->base - | sd TMP0, SBUF:CARG1->p + | sd TMP0, SBUF:CARG1->w | call_intern extern lj_buf_putstr_ .. name |. sd PC, SAVE_PC | load_got lj_buf_tostr @@ -1974,12 +2085,63 @@ static void build_subroutines(BuildCtx *ctx) | slt AT, CARG1, r0 | dsrlv CRET1, TMP0, CARG3 | dsubu TMP0, r0, CRET1 + |.if MIPSR6 + | selnez TMP0, TMP0, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP0 + |.else | movn CRET1, TMP0, AT + |.endif | jr ra |. zextw CRET1, CRET1 |1: | jr ra |. move CRET1, r0 + | + |// FP number to int conversion with a check for soft-float. + |// Modifies CARG1, CRET1, CRET2, TMP0, AT. + |->vm_tointg: + |.if JIT + | dsll CRET2, CARG1, 1 + | beqz CRET2, >2 + |. li TMP0, 1076 + | dsrl AT, CRET2, 53 + | dsubu TMP0, TMP0, AT + | sltiu AT, TMP0, 54 + | beqz AT, >1 + |. dextm CRET2, CRET2, 0, 20 + | dinsu CRET2, AT, 21, 21 + | slt AT, CARG1, r0 + | dsrlv CRET1, CRET2, TMP0 + | dsubu CARG1, r0, CRET1 + |.if MIPSR6 + | seleqz CRET1, CRET1, AT + | selnez CARG1, CARG1, AT + | or CRET1, CRET1, CARG1 + |.else + | movn CRET1, CARG1, AT + |.endif + | li CARG1, 64 + | subu TMP0, CARG1, TMP0 + | dsllv CRET2, CRET2, TMP0 // Integer check. + | sextw AT, CRET1 + | xor AT, CRET1, AT // Range check. + |.if MIPSR6 + | seleqz AT, AT, CRET2 + | selnez CRET2, CRET2, CRET2 + | jr ra + |. or CRET2, AT, CRET2 + |.else + | jr ra + |. movz CRET2, AT, CRET2 + |.endif + |1: + | jr ra + |. li CRET2, 1 + |2: + | jr ra + |. move CRET1, r0 + |.endif |.endif | |.macro .ffunc_bit, name @@ -2418,7 +2580,8 @@ static void build_subroutines(BuildCtx *ctx) | daddiu DISPATCH, JGL, -GG_DISP2G-32768 | sd BASE, L->base |1: - | bltz CRET1, >9 // Check for error from exit. + | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. + | beqz TMP0, >9 |. ld LFUNC:RB, FRAME_FUNC(BASE) | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | dsll MULTRES, CRET1, 3 @@ -2433,14 +2596,16 @@ static void build_subroutines(BuildCtx *ctx) | .FPU cvt.d.s TOBIT, TOBIT | // Modified copy of ins_next which handles function header dispatch, too. | lw INS, 0(PC) - | daddiu PC, PC, 4 + | addiu CRET1, CRET1, 17 // Static dispatch? | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_RD8a RD, INS + | beqz CRET1, >5 + |. daddiu PC, PC, 4 | decode_OP8a TMP1, INS | decode_OP8b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*8 | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS + | sltiu TMP2, TMP1, BC_FUNCF*8 | ld AT, 0(TMP0) | decode_RA8a RA, INS | beqz TMP2, >2 @@ -2469,10 +2634,26 @@ static void build_subroutines(BuildCtx *ctx) | jr AT |. daddu RA, RA, BASE | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ld TMP0, DISPATCH_J(trace)(DISPATCH) + | decode_RD8b RD + | daddu TMP0, TMP0, RD + | ld TRACE:TMP2, 0(TMP0) + | lw INS, TRACE:TMP2->startins + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | jr AT + |. decode_RA8b RA + | |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) + | load_got lj_err_trace + | sub CARG2, r0, CRET1 + | call_intern lj_err_trace // (lua_State *L, int errcode) |. move CARG1, L |.endif | @@ -2482,15 +2663,22 @@ static void build_subroutines(BuildCtx *ctx) | |// Hard-float round to integer. |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. + |// MIPSR6: Modifies FTMP1, too. |.macro vm_round_hf, func | lui TMP0, 0x4330 // Hiword of 2^52 (double). | dsll TMP0, TMP0, 32 | dmtc1 TMP0, f4 | abs.d FRET2, FARG1 // |x| | dmfc1 AT, FARG1 + |.if MIPSR6 + | cmp.lt.d FTMP1, FRET2, f4 + | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 + | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52. + |.else | c.olt.d 0, FRET2, f4 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | bc1f 0, >1 // Truncate only if |x| < 2^52. + |.endif |. sub.d FRET1, FRET1, f4 | slt AT, AT, r0 |.if "func" == "ceil" @@ -2501,16 +2689,38 @@ static void build_subroutines(BuildCtx *ctx) |.if "func" == "trunc" | dsll TMP0, TMP0, 32 | dmtc1 TMP0, f4 + |.if MIPSR6 + | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result? + | sub.d FRET2, FRET1, f4 + | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1. + | dmtc1 AT, FRET1 + | neg.d FRET2, FTMP1 + | jr ra + |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in. + |.else | c.olt.d 0, FRET2, FRET1 // |x| < result? | sub.d FRET2, FRET1, f4 | movt.d FRET1, FRET2, 0 // If yes, subtract +1. | neg.d FRET2, FRET1 | jr ra |. movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.endif |.else | neg.d FRET2, FRET1 | dsll TMP0, TMP0, 32 | dmtc1 TMP0, f4 + |.if MIPSR6 + | dmtc1 AT, FTMP1 + | sel.d FTMP1, FRET1, FRET2 + |.if "func" == "ceil" + | cmp.lt.d FRET1, FTMP1, FARG1 // x > result? + |.else + | cmp.lt.d FRET1, FARG1, FTMP1 // x < result? + |.endif + | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1. + | jr ra + |. sel.d FRET1, FTMP1, FRET2 + |.else | movn.d FRET1, FRET2, AT // Merge sign bit back in. |.if "func" == "ceil" | c.olt.d 0, FRET1, FARG1 // x > result? @@ -2521,6 +2731,7 @@ static void build_subroutines(BuildCtx *ctx) | jr ra |. movt.d FRET1, FRET2, 0 |.endif + |.endif |1: | jr ra |. mov.d FRET1, FARG1 @@ -2628,12 +2839,40 @@ static void build_subroutines(BuildCtx *ctx) |. slt CRET1, CARG2, CARG1 |8: | jr ra - |. nop + |. li CRET1, 0 |9: | jr ra |. move CRET1, CRET2 |.endif | + |->vm_sfcmpogt: + |.if not FPU + | dsll AT, CARG2, 1 + | dsll TMP0, CARG1, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 0. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, CARG2, CARG1 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG2, CARG1 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG1, CARG2 + |8: + | jr ra + |. li CRET1, 0 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. |->vm_sfcmpolex: @@ -2665,10 +2904,98 @@ static void build_subroutines(BuildCtx *ctx) |. li CRET1, 0 |.endif | + |.macro sfmin_max, name, fpcall + |->vm_sf .. name: + |.if JIT and not FPU + | move TMP2, ra + | bal ->fpcall + |. nop + | move ra, TMP2 + | move TMP0, CRET1 + | move CRET1, CARG1 + |.if MIPSR6 + | selnez CRET1, CRET1, TMP0 + | seleqz TMP0, CARG2, TMP0 + | jr ra + |. or CRET1, CRET1, TMP0 + |.else + | jr ra + |. movz CRET1, CARG2, TMP0 + |.endif + |.endif + |.endmacro + | + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt + | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, r12 + |.define NEXT_TMP1, r13 + |.define NEXT_TMP2, r14 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, 0(sp) + |.define NEXT_RES_KEY, 8(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT and ENDIAN_LE + | lw NEXT_ASIZE, NEXT_TAB->asize + | ld NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | sltu AT, NEXT_IDX, NEXT_ASIZE + | sll NEXT_TMP1, NEXT_IDX, 3 + | beqz AT, >5 + |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | li AT, LJ_TISNUM + | ld NEXT_TMP2, 0(NEXT_TMP1) + | dsll AT, AT, 47 + | or NEXT_TMP1, NEXT_IDX, AT + | beq NEXT_TMP2, NEXT_NIL, <1 + |. addiu NEXT_IDX, NEXT_IDX, 1 + | sd NEXT_TMP2, NEXT_RES_VAL + | sd NEXT_TMP1, NEXT_RES_KEY + | move NEXT_RES_VK, NEXT_RES_PTR + | jr ra + |. move NEXT_RES_IDX, NEXT_IDX + | + |5: // Traverse hash part. + | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | ld NODE:NEXT_RES_VK, NEXT_TAB->node + | sll NEXT_TMP2, NEXT_RES_IDX, 5 + | lw NEXT_TMP0, NEXT_TAB->hmask + | sll AT, NEXT_RES_IDX, 3 + | subu AT, NEXT_TMP2, AT + | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT + |6: + | sltu AT, NEXT_TMP0, NEXT_RES_IDX + | bnez AT, >8 + |. nop + | ld NEXT_TMP2, NODE:NEXT_RES_VK->val + | bne NEXT_TMP2, NEXT_NIL, >9 + |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 + | // Skip holes in hash part. + | b <6 + |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | + |8: // End of iteration. Set the key to nil (not the value). + | sd NEXT_NIL, NEXT_RES_KEY + | move NEXT_RES_VK, NEXT_RES_PTR + |9: + | jr ra + |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -2747,7 +3074,6 @@ static void build_subroutines(BuildCtx *ctx) | move TMP2, sp | dsubu sp, sp, TMP1 | sd ra, -8(TMP2) - | sll CARG2, CARG2, 3 | sd r16, -16(TMP2) | sd CCSTATE, -24(TMP2) | move r16, TMP2 @@ -2832,7 +3158,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | slt AT, CARG1, CARG2 | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + | movop TMP2, TMP2, AT + |.else | movop TMP2, r0, AT + |.endif |1: | daddu PC, PC, TMP2 | ins_next @@ -2850,16 +3180,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |3: // RA and RD are both numbers. |.if FPU - | fcomp f20, f22 + |.if MIPSR6 + | fcomp FTMP0, FTMP0, FTMP2 + | addu TMP2, TMP2, TMP3 + | mfc1 TMP3, FTMP0 + | b <1 + |. fmovop TMP2, TMP2, TMP3 + |.else + | fcomp FTMP0, FTMP2 | addu TMP2, TMP2, TMP3 | b <1 |. fmovop TMP2, r0 + |.endif |.else | bal sfcomp |. addu TMP2, TMP2, TMP3 | b <1 + |.if MIPSR6 + |. movop TMP2, TMP2, CRET1 + |.else |. movop TMP2, r0, CRET1 |.endif + |.endif | |4: // RA is a number, RD is not a number. | bne CARG4, TISNUM, ->vmeta_comp @@ -2906,15 +3248,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |.endmacro | + |.if MIPSR6 + if (op == BC_ISLT) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult + } else { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult + } + |.else if (op == BC_ISLT) { - | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt + | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt } else if (op == BC_ISGE) { - | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt + | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt } else if (op == BC_ISLE) { - | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult + | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult } else { - | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult + | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult } + |.endif break; case BC_ISEQV: case BC_ISNEV: @@ -2960,7 +3314,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: // Check if the tags are the same and it's a table or userdata. | xor AT, CARG3, CARG4 // Same type? | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? + |.if MIPSR6 + | seleqz TMP0, TMP0, AT + |.else | movn TMP0, r0, AT + |.endif if (vk) { | beqz TMP0, <1 } else { @@ -3010,11 +3368,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | xor TMP1, CARG1, CARG2 | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, TMP1 + } else { + | selnez TMP2, TMP2, TMP1 + } + |.else if (vk) { | movn TMP2, r0, TMP1 } else { | movz TMP2, r0, TMP1 } + |.endif | daddu PC, PC, TMP2 | ins_next break; @@ -3041,6 +3407,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne CARG4, TISNUM, >6 |. addu TMP2, TMP2, TMP3 | xor AT, CARG1, CARG2 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, AT + |1: + | daddu PC, PC, TMP2 + |2: + } else { + | selnez TMP2, TMP2, AT + |1: + |2: + | daddu PC, PC, TMP2 + } + |.else if (vk) { | movn TMP2, r0, AT |1: @@ -3052,6 +3431,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: | daddu PC, PC, TMP2 } + |.endif | ins_next | |3: // RA is not an integer. @@ -3064,30 +3444,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. addu TMP2, TMP2, TMP3 | sltu AT, CARG4, TISNUM |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) + | ldc1 FTMP0, 0(RA) + | ldc1 FTMP2, 0(RD) |.endif | beqz AT, >5 |. nop |4: // RA and RD are both numbers. |.if FPU - | c.eq.d f20, f22 + |.if MIPSR6 + | cmp.eq.d FTMP0, FTMP0, FTMP2 + | dmfc1 TMP1, FTMP0 + | b <1 + if (vk) { + |. selnez TMP2, TMP2, TMP1 + } else { + |. seleqz TMP2, TMP2, TMP1 + } + |.else + | c.eq.d FTMP0, FTMP2 | b <1 if (vk) { |. movf TMP2, r0 } else { |. movt TMP2, r0 } + |.endif |.else | bal ->vm_sfcmpeq |. nop | b <1 + |.if MIPSR6 + if (vk) { + |. selnez TMP2, TMP2, CRET1 + } else { + |. seleqz TMP2, TMP2, CRET1 + } + |.else if (vk) { |. movz TMP2, r0, CRET1 } else { |. movn TMP2, r0, CRET1 } |.endif + |.endif | |5: // RA is a number, RD is not a number. |.if FFI @@ -3097,9 +3496,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | // RA is a number, RD is an integer. Convert RD to a number. |.if FPU - |. lwc1 f22, LO(RD) + |. lwc1 FTMP2, LO(RD) | b <4 - |. cvt.d.w f22, f22 + |. cvt.d.w FTMP2, FTMP2 |.else |. sextw CARG2, CARG2 | bal ->vm_sfi2d_2 @@ -3117,10 +3516,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | // RA is an integer, RD is a number. Convert RA to a number. |.if FPU - |. lwc1 f20, LO(RA) - | ldc1 f22, 0(RD) + |. lwc1 FTMP0, LO(RA) + | ldc1 FTMP2, 0(RD) | b <4 - | cvt.d.w f20, f20 + | cvt.d.w FTMP0, FTMP0 |.else |. sextw CARG1, CARG1 | bal ->vm_sfi2d_1 @@ -3163,11 +3562,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RD4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, TMP0 + } else { + | selnez TMP2, TMP2, TMP0 + } + |.else if (vk) { | movn TMP2, r0, TMP0 } else { | movz TMP2, r0, TMP0 } + |.endif | daddu PC, PC, TMP2 | ins_next break; @@ -3186,11 +3593,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RD4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (op == BC_IST) { + | selnez TMP2, TMP2, TMP0; + } else { + | seleqz TMP2, TMP2, TMP0; + } + |.else if (op == BC_IST) { | movz TMP2, r0, TMP0 } else { | movn TMP2, r0, TMP0 } + |.endif | daddu PC, PC, TMP2 } else { | ld CRET1, 0(RD) @@ -3433,9 +3848,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bltz TMP1, ->vmeta_arith |. daddu RA, BASE, RA |.elif "intins" == "mult" + |.if MIPSR6 + |. nop + | mul CRET1, CARG3, CARG4 + | muh TMP2, CARG3, CARG4 + |.else |. intins CARG3, CARG4 | mflo CRET1 | mfhi TMP2 + |.endif | sra TMP1, CRET1, 31 | bne TMP1, TMP2, ->vmeta_arith |. daddu RA, BASE, RA @@ -3458,16 +3879,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) + | .FPU ldc1 FTMP0, 0(RB) | sltu AT, TMP0, TISNUM | sltu TMP0, TMP1, TISNUM - | .FPU ldc1 f22, 0(RC) + | .FPU ldc1 FTMP2, 0(RC) | and AT, AT, TMP0 | beqz AT, ->vmeta_arith |. daddu RA, BASE, RA | |.if FPU - | fpins FRET1, f20, f22 + | fpins FRET1, FTMP0, FTMP2 |.elif "fpcall" == "sfpmod" | sfpmod |.else @@ -3797,7 +4218,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP0, 0x801 | addiu AT, CARG2, -0x7ff | srl CARG3, RD, 14 + |.if MIPSR6 + | seleqz TMP0, TMP0, AT + | selnez CARG2, CARG2, AT + | or CARG2, CARG2, TMP0 + |.else | movz CARG2, TMP0, AT + |.endif | // (lua_State *L, int32_t asize, uint32_t hbits) | call_intern lj_tab_new |. move CARG1, L @@ -3904,9 +4331,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -4067,10 +4494,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | ld NODE:TMP2, TAB:RB->node | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -4078,7 +4505,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | settp STR:RC, TMP3 // Tagged key to look for. |.if FPU - | ldc1 f20, 0(RA) + | ldc1 FTMP0, 0(RA) |.else | ld CRET1, 0(RA) |.endif @@ -4094,7 +4521,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | bnez AT, >7 |.if FPU - |. sdc1 f20, NODE:TMP2->val + |. sdc1 FTMP0, NODE:TMP2->val |.else |. sd CRET1, NODE:TMP2->val |.endif @@ -4135,7 +4562,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld BASE, L->base |.if FPU | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) + |. sdc1 FTMP0, 0(CRET1) |.else | ld CARG1, 0(RA) | b <3 // No 2nd write barrier needed. @@ -4213,7 +4640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next2 | |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 + | barrierback TAB:CARG2, TMP3, CRET1, <2 break; case BC_TSETM: @@ -4364,10 +4791,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. + |.if JIT and ENDIAN_LE + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | daddu RA, BASE, RA | ld TAB:RB, -16(RA) | lw RC, -8+LO(RA) // Get index from control var. @@ -4388,11 +4816,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. addiu RC, RC, 1 | sd TMP2, 0(RA) | sd CARG1, 8(RA) - | or TMP0, RC, CARG3 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | decode_RD4b RD | daddu RD, RD, TMP3 - | sw TMP0, -8+LO(RA) // Update control var. + | sw RC, -8+LO(RA) // Update control var. | daddu PC, PC, RD |3: | ins_next @@ -4442,9 +4869,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | daddiu TMP1, TMP1, -FF_next_N | or AT, AT, TMP1 | bnez AT, >5 - |. lui TMP1, 0xfffe + |. lui TMP1, (LJ_KEYINDEX >> 16) | daddu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff + | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) | dsll TMP1, TMP1, 32 | sd TMP1, -8(RA) |1: @@ -4454,8 +4881,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) | daddu PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li AT, BC_ITERN + | bne TMP0, AT, >6 + |. lhu TMP2, OFS_RD(PC) + |.endif | b <1 |. sb TMP1, OFS_OP(PC) + |.if JIT + |6: // Unpatch JLOOP. + | ld TMP0, DISPATCH_J(trace)(DISPATCH) + | sll TMP2, TMP2, 3 + | daddu TMP0, TMP0, TMP2 + | ld TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | li AT, -256 + | and TMP0, TMP0, AT + | or TMP0, TMP0, TMP1 + | b <1 + |. sw TMP0, 0(PC) + |.endif break; case BC_VARG: @@ -4478,7 +4924,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld CARG1, 0(RC) | sltu AT, RC, TMP3 | daddiu RC, RC, 8 + |.if MIPSR6 + | selnez CARG1, CARG1, AT + | seleqz AT, TISNIL, AT + | or CARG1, CARG1, AT + |.else | movz CARG1, TISNIL, AT + |.endif | sd CARG1, 0(RA) | sltu AT, RA, TMP2 | bnez AT, <1 @@ -4667,7 +5119,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | dext AT, CRET1, 31, 0 | slt CRET1, CARG2, CARG3 | slt TMP1, CARG3, CARG2 + |.if MIPSR6 + | selnez TMP1, TMP1, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP1 + |.else | movn CRET1, TMP1, AT + |.endif } else { | bne CARG3, TISNUM, >5 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type @@ -4683,20 +5141,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slt CRET1, CRET1, CARG1 | slt AT, CARG2, r0 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. + |.if MIPSR6 + | selnez TMP1, TMP1, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP1 + |.else | movn CRET1, TMP1, AT + |.endif | or CRET1, CRET1, TMP0 | zextw CARG1, CARG1 | settp CARG1, TISNUM } |1: if (op == BC_FORI) { + |.if MIPSR6 + | selnez TMP2, TMP2, CRET1 + |.else | movz TMP2, r0, CRET1 + |.endif | daddu PC, PC, TMP2 } else if (op == BC_JFORI) { | daddu PC, PC, TMP2 | lhu RD, -4+OFS_RD(PC) } else if (op == BC_IFORL) { + |.if MIPSR6 + | seleqz TMP2, TMP2, CRET1 + |.else | movn TMP2, r0, CRET1 + |.endif | daddu PC, PC, TMP2 } if (vk) { @@ -4726,6 +5198,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | and AT, AT, TMP0 | beqz AT, ->vmeta_for |. slt TMP3, TMP3, r0 + |.if MIPSR6 + | dmtc1 TMP3, FTMP2 + | cmp.lt.d FTMP0, f0, f2 + | cmp.lt.d FTMP1, f2, f0 + | sel.d FTMP2, FTMP1, FTMP0 + | b <1 + |. dmfc1 CRET1, FTMP2 + |.else | c.ole.d 0, f0, f2 | c.ole.d 1, f2, f0 | li CRET1, 1 @@ -4733,12 +5213,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movt AT, r0, 1 | b <1 |. movn CRET1, AT, TMP3 + |.endif } else { | ldc1 f0, FORL_IDX*8(RA) | ldc1 f4, FORL_STEP*8(RA) | ldc1 f2, FORL_STOP*8(RA) | ld TMP3, FORL_STEP*8(RA) | add.d f0, f0, f4 + |.if MIPSR6 + | slt TMP3, TMP3, r0 + | dmtc1 TMP3, FTMP2 + | cmp.lt.d FTMP0, f0, f2 + | cmp.lt.d FTMP1, f2, f0 + | sel.d FTMP2, FTMP1, FTMP0 + | dmfc1 CRET1, FTMP2 + if (op == BC_IFORL) { + | seleqz TMP2, TMP2, CRET1 + | daddu PC, PC, TMP2 + } + |.else | c.ole.d 0, f0, f2 | c.ole.d 1, f2, f0 | slt TMP3, TMP3, r0 @@ -4751,6 +5244,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movn TMP2, r0, CRET1 | daddu PC, PC, TMP2 } + |.endif | sdc1 f0, FORL_IDX*8(RA) | ins_next1 | b <2 @@ -4911,6 +5405,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | settp LFUNC:RB, TMP0 | daddu TMP0, RA, RC | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | daddiu TMP2, TMP2, -8 | daddiu TMP3, RC, 16+FRAME_VARG | sltu AT, TMP0, TMP2 | ld KBASE, -4+PC2PROTO(k)(PC) @@ -4926,8 +5421,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld TMP0, 0(RA) | sltu AT, RA, RC // Less args than parameters? | move CARG1, TMP0 + |.if MIPSR6 + | selnez TMP0, TMP0, AT + | seleqz TMP3, TISNIL, AT + | or TMP0, TMP0, TMP3 + | seleqz TMP3, CARG1, AT + | selnez CARG1, TISNIL, AT + | or CARG1, CARG1, TMP3 + |.else | movz TMP0, TISNIL, AT // Clear missing parameters. | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). + |.endif | addiu TMP2, TMP2, -1 | sd TMP0, 16(TMP1) | daddiu TMP1, TMP1, 8 diff --git a/source/libs/luajit/LuaJIT-src/src/vm_ppc.dasc b/source/libs/luajit/LuaJIT-src/src/vm_ppc.dasc index b4260ebc10726059f2b577bfdf4230a36a22d539..2ddeefbfea70d7cc116f7412c23b66937116a4d9 100644 --- a/source/libs/luajit/LuaJIT-src/src/vm_ppc.dasc +++ b/source/libs/luajit/LuaJIT-src/src/vm_ppc.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h | |.arch ppc |.section code_op, code_sub @@ -18,7 +18,6 @@ |// DynASM defines used by the PPC port: |// |// P64 64 bit pointers (only for GPR64 testing). -|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port. |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). |// Affects reg saves, stack layout, carry/overflow/dot flags etc. |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). @@ -103,6 +102,18 @@ |// Fixed register assignments for the interpreter. |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) | +|.macro .FPU, a, b +|.if FPU +| a, b +|.endif +|.endmacro +| +|.macro .FPU, a, b, c +|.if FPU +| a, b, c +|.endif +|.endmacro +| |// The following must be C callee-save (but BASE is often refetched). |.define BASE, r14 // Base of current Lua stack frame. |.define KBASE, r15 // Constants of current Lua function. @@ -116,8 +127,10 @@ |.define TISNUM, r22 |.define TISNIL, r23 |.define ZERO, r24 +|.if FPU |.define TOBIT, f30 // 2^52 + 2^51. |.define TONUM, f31 // 2^52 + 2^51 + 2^31. +|.endif | |// The following temporaries are not saved across C calls, except for RA. |.define RA, r20 // Callee-save. @@ -133,6 +146,7 @@ | |// Saved temporaries. |.define SAVE0, r21 +|.define SAVE1, r25 | |// Calling conventions. |.define CARG1, r3 @@ -141,8 +155,10 @@ |.define CARG4, r6 // Overlaps TMP3. |.define CARG5, r7 // Overlaps INS. | +|.if FPU |.define FARG1, f1 |.define FARG2, f2 +|.endif | |.define CRET1, r3 |.define CRET2, r4 @@ -213,10 +229,16 @@ |.endif |.else | +|.if FPU |.define SAVE_LR, 276(sp) |.define CFRAME_SPACE, 272 // Delta for sp. |// Back chain for sp: 272(sp) <-- sp entering interpreter |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. +|.else +|.define SAVE_LR, 132(sp) +|.define CFRAME_SPACE, 128 // Delta for sp. +|// Back chain for sp: 128(sp) <-- sp entering interpreter +|.endif |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. |.define SAVE_CR, 52(sp) // 32 bit CR save. |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. @@ -226,16 +248,25 @@ |.define SAVE_PC, 32(sp) |.define SAVE_MULTRES, 28(sp) |.define UNUSED1, 24(sp) +|.if FPU |.define TMPD_LO, 20(sp) |.define TMPD_HI, 16(sp) |.define TONUM_LO, 12(sp) |.define TONUM_HI, 8(sp) +|.else +|.define SFSAVE_4, 20(sp) +|.define SFSAVE_3, 16(sp) +|.define SFSAVE_2, 12(sp) +|.define SFSAVE_1, 8(sp) +|.endif |// Next frame lr: 4(sp) |// Back chain for sp: 0(sp) <-- sp while in interpreter | +|.if FPU |.define TMPD_BLO, 23(sp) |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI +|.endif | |.endif | @@ -245,7 +276,7 @@ |.else | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) |.endif -| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) +| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |.endmacro |.macro rest_, reg |.if GPR64 @@ -253,7 +284,7 @@ |.else | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) |.endif -| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) +| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |.endmacro | |.macro saveregs @@ -323,6 +354,7 @@ |// Trap for not-yet-implemented parts. |.macro NYI; tw 4, sp, sp; .endmacro | +|.if FPU |// int/FP conversions. |.macro tonum_i, freg, reg | xoris reg, reg, 0x8000 @@ -346,6 +378,7 @@ |.macro toint, reg, freg | toint reg, freg, freg |.endmacro +|.endif | |//----------------------------------------------------------------------- | @@ -533,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx) | beq >2 |1: | addic. TMP1, TMP1, -8 + |.if FPU | lfd f0, 0(RA) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + |.endif | addi RA, RA, 8 + |.if FPU | stfd f0, 0(BASE) + |.else + | stw CARG1, 0(BASE) + | stw CARG2, 4(BASE) + |.endif | addi BASE, BASE, 8 | bney <1 | @@ -613,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx) | .toc ld TOCREG, SAVE_TOC | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp BASE, L->base - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | li ZERO, 0 - | stw TMP3, TMPD + | .FPU stw TMP3, TMPD | li TMP1, LJ_TFALSE - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | li TISNIL, LJ_TNIL | li_vmstate INTERP - | lfs TOBIT, TMPD + | .FPU lfs TOBIT, TMPD | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. | la RA, -8(BASE) // Results start at BASE-8. - | stw TMP3, TMPD + | .FPU stw TMP3, TMPD | addi DISPATCH, DISPATCH, GG_G2DISP | stw TMP1, 0(RA) // Prepend false to error message. | li RD, 16 // 2 results: false + error message. | st_vmstate - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | b ->vm_returnc | |//----------------------------------------------------------------------- @@ -690,22 +733,22 @@ static void build_subroutines(BuildCtx *ctx) | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp TMP1, L->top | lwz PC, FRAME_PC(BASE) - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | stb CARG3, L->status - | stw TMP3, TMPD - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD + | .FPU stw TMP3, TMPD + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU lfs TOBIT, TMPD | sub RD, TMP1, BASE - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | addi RD, RD, 8 - | stw TMP0, TONUM_HI + | .FPU stw TMP0, TONUM_HI | li_vmstate INTERP | li ZERO, 0 | st_vmstate | andix. TMP0, PC, FRAME_TYPE | mr MULTRES, RD - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | li TISNIL, LJ_TNIL | beq ->BC_RET_Z | b ->vm_return @@ -739,19 +782,19 @@ static void build_subroutines(BuildCtx *ctx) | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp TMP1, L->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | add PC, PC, BASE - | stw TMP3, TMPD + | .FPU stw TMP3, TMPD | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU lfs TOBIT, TMPD | sub PC, PC, TMP2 // PC = frame delta + frame type - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | sub NARGS8:RC, TMP1, BASE - | stw TMP0, TONUM_HI + | .FPU stw TMP0, TONUM_HI | li_vmstate INTERP - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | li TISNIL, LJ_TNIL | st_vmstate | @@ -816,11 +859,11 @@ static void build_subroutines(BuildCtx *ctx) |.endif | lwz PC, -16(RB) // Restore PC from [cont|PC]. | subi TMP2, RD, 8 - | lwz TMP1, LFUNC:TMP1->pc | stwx TISNIL, RA, TMP2 // Ensure one valid arg. |.if FFI | ble >1 |.endif + | lwz TMP1, LFUNC:TMP1->pc | lwz KBASE, PC2PROTO(k)(TMP1) | // BASE = base, RA = resultptr, RB = meta base | mtctr TMP0 @@ -839,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx) | lwz INS, -4(PC) | subi CARG2, RB, 16 | decode_RB8 SAVE0, INS + |.if FPU | lfd f0, 0(RA) + |.else + | lwz TMP2, 0(RA) + | lwz TMP3, 4(RA) + |.endif | add TMP1, BASE, SAVE0 | stp BASE, L->base | cmplw TMP1, CARG2 | sub CARG3, CARG2, TMP1 | decode_RA8 RA, INS + |.if FPU | stfd f0, 0(CARG2) + |.else + | stw TMP2, 0(CARG2) + | stw TMP3, 4(CARG2) + |.endif | bney ->BC_CAT_Z + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux TMP2, RA, BASE + | stw TMP3, 4(RA) + |.endif | b ->cont_nop | |//-- Table indexing metamethods ----------------------------------------- @@ -900,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx) | // Returns TValue * (finished) or NULL (metamethod). | cmplwi CRET1, 0 | beq >3 + |.if FPU | lfd f0, 0(CRET1) + |.else + | lwz TMP0, 0(CRET1) + | lwz TMP1, 4(CRET1) + |.endif | ins_next1 + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 | |3: // Call __index metamethod. @@ -920,7 +988,12 @@ static void build_subroutines(BuildCtx *ctx) | // Returns cTValue * or NULL. | cmplwi CRET1, 0 | beq >1 + |.if FPU | lfd f14, 0(CRET1) + |.else + | lwz SAVE0, 0(CRET1) + | lwz SAVE1, 4(CRET1) + |.endif | b ->BC_TGETR_Z |1: | stwx TISNIL, BASE, RA @@ -975,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx) | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | // Returns TValue * (finished) or NULL (metamethod). | cmplwi CRET1, 0 + |.if FPU | lfdx f0, BASE, RA + |.else + | lwzux TMP2, RA, BASE + | lwz TMP3, 4(RA) + |.endif | beq >3 | // NOBARRIER: lj_meta_tset ensures the table is not black. | ins_next1 + |.if FPU | stfd f0, 0(CRET1) + |.else + | stw TMP2, 0(CRET1) + | stw TMP3, 4(CRET1) + |.endif | ins_next2 | |3: // Call __newindex metamethod. @@ -990,15 +1073,26 @@ static void build_subroutines(BuildCtx *ctx) | add PC, TMP1, BASE | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | li NARGS8:RC, 24 // 3 args for func(t, k, v) + |.if FPU | stfd f0, 16(BASE) // Copy value to third argument. + |.else + | stw TMP2, 16(BASE) + | stw TMP3, 20(BASE) + |.endif | b ->vm_call_dispatch_f | |->vmeta_tsetr: | stp BASE, L->base + | mr CARG1, L | stw PC, SAVE_PC | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // Returns TValue *. + |.if FPU | stfd f14, 0(CRET1) + |.else + | stw SAVE0, 0(CRET1) + | stw SAVE1, 4(CRET1) + |.endif | b ->cont_nop | |//-- Comparison metamethods --------------------------------------------- @@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_ra: // RA = resultptr | lwz INS, -4(PC) + |.if FPU | lfd f0, 0(RA) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + |.endif | decode_RA8 TMP1, INS + |.if FPU | stfdx f0, BASE, TMP1 + |.else + | stwux CARG1, TMP1, BASE + | stw CARG2, 4(TMP1) + |.endif | b ->cont_nop | |->cont_condt: // RA = resultptr @@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_n, name |->ff_ .. name: | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) + | lwz CARG1, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG2, 4(BASE) + |.endif | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback + | checknum CARG1; bge ->fff_fallback |.endmacro | |.macro .ffunc_nn, name |->ff_ .. name: | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) + | lwz CARG1, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) + | lwz CARG3, 8(BASE) | lfd FARG2, 8(BASE) + |.else + | lwz CARG2, 4(BASE) + | lwz CARG3, 8(BASE) + | lwz CARG4, 12(BASE) + |.endif | blt ->fff_fallback + | checknum CARG1; bge ->fff_fallback | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bge ->fff_fallback |.endmacro | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. @@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx) | bge cr1, ->fff_fallback | stw CARG3, 0(RA) | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | addi TMP1, BASE, 8 + | add TMP2, RA, NARGS8:RC | stw CARG1, 4(RA) | beq ->fff_res // Done if exactly 1 argument. - | li TMP1, 8 - | subi RC, RC, 8 |1: - | cmplw TMP1, RC - | lfdx f0, BASE, TMP1 - | stfdx f0, RA, TMP1 + | cmplw TMP1, TMP2 + |.if FPU + | lfd f0, 0(TMP1) + | stfd f0, 0(TMP1) + |.else + | lwz CARG1, 0(TMP1) + | lwz CARG2, 4(TMP1) + | stw CARG1, -8(TMP1) + | stw CARG2, -4(TMP1) + |.endif | addi TMP1, TMP1, 8 | bney <1 | b ->fff_res @@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx) | orc TMP1, TMP2, TMP0 | addi TMP1, TMP1, ~LJ_TISNUM+1 | slwi TMP1, TMP1, 3 + |.if FPU | la TMP2, CFUNC:RB->upvalue | lfdx FARG1, TMP2, TMP1 + |.else + | add TMP1, CFUNC:RB, TMP1 + | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi + | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo + |.endif | b ->fff_resn | |//-- Base library: getters and setters --------------------------------- @@ -1320,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx) | beq ->fff_restv | lwz TMP0, TAB:CARG1->hmask | li CARG3, LJ_TTAB // Use metatable as default result. - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:CARG1->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG1, L | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | // Returns cTValue *. + |.if FPU | lfd FARG1, 0(CRET1) + |.else + | lwz CARG2, 4(CRET1) + | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. + |.endif | b ->fff_resn | |//-- Base library: conversions ------------------------------------------ @@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx) | // Only handles the number case inline (without a base argument). | cmplwi NARGS8:RC, 8 | lwz CARG1, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG2, 4(BASE) + |.endif | bne ->fff_fallback // Exactly one argument. | checknum CARG1; bgt ->fff_fallback | b ->fff_resn @@ -1423,32 +1559,24 @@ static void build_subroutines(BuildCtx *ctx) | |//-- Base library: iterators ------------------------------------------- | - |.ffunc next - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback + |.ffunc_1 next | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. - | checktab CARG1 + | checktab CARG3 | lwz PC, FRAME_PC(BASE) | bne ->fff_fallback - | stp BASE, L->base // Add frame since C call can throw. - | mr CARG1, L - | stp BASE, L->top // Dummy frame length is ok. - | la CARG3, 8(BASE) - | stw PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | cmplwi CRET1, 0 - | li CARG3, LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | lfd f0, 8(BASE) // Copy key and value to results. + | la CARG2, 8(BASE) + | la CARG3, -8(BASE) + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | cmpwi CRET1, 0 | la RA, -8(BASE) - | lfd f1, 16(BASE) - | stfd f0, 0(RA) | li RD, (2+1)*8 - | stfd f1, 8(RA) - | b ->fff_res + | bgt ->fff_res // Found key/value. + | li CARG3, LJ_TNIL + | beq ->fff_restv // End of traversal: return nil. + | lwz CFUNC:RB, FRAME_FUNC(BASE) + | li NARGS8:RC, 2*8 + | b ->fff_fallback // Invalid key. | |.ffunc_1 pairs | checktab CARG3 @@ -1456,17 +1584,32 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback #if LJ_52 | lwz TAB:TMP2, TAB:CARG1->metatable + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | cmplwi TAB:TMP2, 0 | la RA, -8(BASE) | bne ->fff_fallback #else + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | la RA, -8(BASE) #endif | stw TISNIL, 8(BASE) | li RD, (3+1)*8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw TMP0, 0(RA) + | stw TMP1, 4(RA) + |.endif | b ->fff_res | |.ffunc ipairs_aux @@ -1512,14 +1655,24 @@ static void build_subroutines(BuildCtx *ctx) | stfd FARG2, 0(RA) |.endif | ble >2 // Not in array part? + |.if FPU | lwzx TMP2, TMP1, TMP3 | lfdx f0, TMP1, TMP3 + |.else + | lwzux TMP2, TMP1, TMP3 + | lwz TMP3, 4(TMP1) + |.endif |1: | checknil TMP2 | li RD, (0+1)*8 | beq ->fff_res // End of iteration, return 0 results. | li RD, (2+1)*8 + |.if FPU | stfd f0, 8(RA) + |.else + | stw TMP2, 8(RA) + | stw TMP3, 12(RA) + |.endif | b ->fff_res |2: // Check for empty hash part first. Otherwise call C function. | lwz TMP0, TAB:CARG1->hmask @@ -1533,7 +1686,11 @@ static void build_subroutines(BuildCtx *ctx) | li RD, (0+1)*8 | beq ->fff_res | lwz TMP2, 0(CRET1) + |.if FPU | lfd f0, 0(CRET1) + |.else + | lwz TMP3, 4(CRET1) + |.endif | b <1 | |.ffunc_1 ipairs @@ -1542,12 +1699,22 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback #if LJ_52 | lwz TAB:TMP2, TAB:CARG1->metatable + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | cmplwi TAB:TMP2, 0 | la RA, -8(BASE) | bne ->fff_fallback #else + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | la RA, -8(BASE) #endif |.if DUALNUM @@ -1557,14 +1724,23 @@ static void build_subroutines(BuildCtx *ctx) |.endif | stw ZERO, 12(BASE) | li RD, (3+1)*8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw TMP0, 0(RA) + | stw TMP1, 4(RA) + |.endif | b ->fff_res | |//-- Base library: catch errors ---------------------------------------- | |.ffunc pcall + | lwz TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC | cmplwi NARGS8:RC, 8 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | cmplw cr1, TMP1, TMP2 + | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | blt ->fff_fallback | mr TMP2, BASE | la BASE, 8(BASE) @@ -1575,20 +1751,38 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_call_dispatch | |.ffunc xpcall + | lwz TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC | cmplwi NARGS8:RC, 16 - | lwz CARG4, 8(BASE) + | lwz CARG3, 8(BASE) + | cmplw cr1, TMP1, TMP2 + |.if FPU | lfd FARG2, 8(BASE) + | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | lfd FARG1, 0(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | lwz CARG4, 12(BASE) + |.endif | blt ->fff_fallback | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) | mr TMP2, BASE - | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. + | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. | la BASE, 16(BASE) | // Remember active hook before pcall. | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 + |.if FPU | stfd FARG2, 0(TMP2) // Swap function and traceback. - | subi NARGS8:RC, NARGS8:RC, 16 | stfd FARG1, 8(TMP2) + |.else + | stw CARG3, 0(TMP2) + | stw CARG4, 4(TMP2) + | stw CARG1, 8(TMP2) + | stw CARG2, 12(TMP2) + |.endif + | subi NARGS8:RC, NARGS8:RC, 16 | addi PC, TMP1, 16+FRAME_PCALL | b ->vm_call_dispatch | @@ -1631,9 +1825,21 @@ static void build_subroutines(BuildCtx *ctx) | stp BASE, L->top |2: // Move args to coroutine. | cmpw TMP1, NARGS8:RC + |.if FPU | lfdx f0, BASE, TMP1 + |.else + | add CARG3, BASE, TMP1 + | lwz TMP2, 0(CARG3) + | lwz TMP3, 4(CARG3) + |.endif | beq >3 + |.if FPU | stfdx f0, CARG2, TMP1 + |.else + | add CARG3, CARG2, TMP1 + | stw TMP2, 0(CARG3) + | stw TMP3, 4(CARG3) + |.endif | addi TMP1, TMP1, 8 | b <2 |3: @@ -1664,8 +1870,17 @@ static void build_subroutines(BuildCtx *ctx) | stp TMP2, L:SAVE0->top // Clear coroutine stack. |5: // Move results from coroutine. | cmplw TMP1, TMP3 + |.if FPU | lfdx f0, TMP2, TMP1 | stfdx f0, BASE, TMP1 + |.else + | add CARG3, TMP2, TMP1 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + | add CARG3, BASE, TMP1 + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif | addi TMP1, TMP1, 8 | bne <5 |6: @@ -1690,12 +1905,22 @@ static void build_subroutines(BuildCtx *ctx) | andix. TMP0, PC, FRAME_TYPE | la TMP3, -8(TMP3) | li TMP1, LJ_TFALSE + |.if FPU | lfd f0, 0(TMP3) + |.else + | lwz CARG1, 0(TMP3) + | lwz CARG2, 4(TMP3) + |.endif | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. | li RD, (2+1)*8 | stw TMP1, -8(BASE) // Prepend false to results. | la RA, -8(BASE) + |.if FPU | stfd f0, 0(BASE) // Copy error message. + |.else + | stw CARG1, 0(BASE) // Copy error message. + | stw CARG2, 4(BASE) + |.endif | b <7 |.else | mr CARG1, L @@ -1874,7 +2099,12 @@ static void build_subroutines(BuildCtx *ctx) | lus CARG1, 0x8000 // -(2^31). | beqy ->fff_resi |5: + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + |.endif | blex func | b ->fff_resn |.endmacro @@ -1898,10 +2128,14 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc math_log | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) + | lwz CARG1, 0(BASE) | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG3; bge ->fff_fallback + | checknum CARG1; bge ->fff_fallback + |.if FPU + | lfd FARG1, 0(BASE) + |.else + | lwz CARG2, 4(BASE) + |.endif | blex log | b ->fff_resn | @@ -1923,17 +2157,24 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM |.ffunc math_ldexp | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) + | lwz TMP0, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + |.endif + | lwz TMP1, 8(BASE) |.if GPR64 | lwz CARG2, 12(BASE) - |.else + |.elif FPU | lwz CARG1, 12(BASE) + |.else + | lwz CARG3, 12(BASE) |.endif | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bne ->fff_fallback + | checknum TMP0; bge ->fff_fallback + | checknum TMP1; bne ->fff_fallback |.else |.ffunc_nn math_ldexp |.if GPR64 @@ -1948,8 +2189,10 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_n math_frexp |.if GPR64 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - |.else + |.elif FPU | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) + |.else + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) |.endif | lwz PC, FRAME_PC(BASE) | blex frexp @@ -1958,7 +2201,12 @@ static void build_subroutines(BuildCtx *ctx) |.if not DUALNUM | tonum_i FARG2, TMP1 |.endif + |.if FPU | stfd FARG1, 0(RA) + |.else + | stw CRET1, 0(RA) + | stw CRET2, 4(RA) + |.endif | li RD, (2+1)*8 |.if DUALNUM | stw TISNUM, 8(RA) @@ -1971,13 +2219,20 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_n math_modf |.if GPR64 | la CARG2, -8(BASE) - |.else + |.elif FPU | la CARG1, -8(BASE) + |.else + | la CARG3, -8(BASE) |.endif | lwz PC, FRAME_PC(BASE) | blex modf | la RA, -8(BASE) + |.if FPU | stfd FARG1, 0(BASE) + |.else + | stw CRET1, 0(BASE) + | stw CRET2, 4(BASE) + |.endif | li RD, (2+1)*8 | b ->fff_res | @@ -1985,13 +2240,13 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | .ffunc_1 name | checknum CARG3 - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC + | addi SAVE0, BASE, 8 + | add SAVE1, BASE, NARGS8:RC | bne >4 |1: // Handle integers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lwz CARG2, 4(TMP1) + | lwz CARG4, 0(SAVE0) + | cmplw cr1, SAVE0, SAVE1 + | lwz CARG2, 4(SAVE0) | bge cr1, ->fff_resi | checknum CARG4 | xoris TMP0, CARG1, 0x8000 @@ -2008,36 +2263,76 @@ static void build_subroutines(BuildCtx *ctx) |.if GPR64 | rldicl CARG1, CARG1, 0, 32 |.endif - | addi TMP1, TMP1, 8 + | addi SAVE0, SAVE0, 8 | b <1 |3: | bge ->fff_fallback | // Convert intermediate result to number and continue below. + |.if FPU | tonum_i FARG1, CARG1 - | lfd FARG2, 0(TMP1) + | lfd FARG2, 0(SAVE0) + |.else + | mr CARG2, CARG1 + | bl ->vm_sfi2d_1 + | lwz CARG3, 0(SAVE0) + | lwz CARG4, 4(SAVE0) + |.endif | b >6 |4: + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + |.endif | bge ->fff_fallback |5: // Handle numbers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lfd FARG2, 0(TMP1) + | lwz CARG3, 0(SAVE0) + | cmplw cr1, SAVE0, SAVE1 + |.if FPU + | lfd FARG2, 0(SAVE0) + |.else + | lwz CARG4, 4(SAVE0) + |.endif | bge cr1, ->fff_resn - | checknum CARG4; bge >7 + | checknum CARG3; bge >7 |6: - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 + | addi SAVE0, SAVE0, 8 + |.if FPU |.if ismax + | fsub f0, FARG1, FARG2 + |.else + | fsub f0, FARG2, FARG1 + |.endif | fsel FARG1, f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | stw CARG1, SFSAVE_1 + | stw CARG2, SFSAVE_2 + | stw CARG3, SFSAVE_3 + | stw CARG4, SFSAVE_4 + | blex __ledf2 + | cmpwi CRET1, 0 + |.if ismax + | blt >8 + |.else + | bge >8 + |.endif + | lwz CARG1, SFSAVE_1 + | lwz CARG2, SFSAVE_2 + | b <5 + |8: + | lwz CARG1, SFSAVE_3 + | lwz CARG2, SFSAVE_4 |.endif | b <5 |7: // Convert integer to number and continue above. - | lwz CARG2, 4(TMP1) + | lwz CARG3, 4(SAVE0) | bne ->fff_fallback - | tonum_i FARG2, CARG2 + |.if FPU + | tonum_i FARG2, CARG3 + |.else + | bl ->vm_sfi2d_2 + |.endif | b <6 |.else | .ffunc_n name @@ -2049,13 +2344,13 @@ static void build_subroutines(BuildCtx *ctx) | checknum CARG2 | bge cr1, ->fff_resn | bge ->fff_fallback - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 |.if ismax - | fsel FARG1, f0, FARG1, FARG2 + | fsub f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | fsub f0, FARG2, FARG1 |.endif + | addi TMP1, TMP1, 8 + | fsel FARG1, f0, FARG1, FARG2 | b <1 |.endif |.endmacro @@ -2211,7 +2506,7 @@ static void build_subroutines(BuildCtx *ctx) | stw L, SBUF:CARG1->L | stp BASE, L->base | stw PC, SAVE_PC - | stw TMP0, SBUF:CARG1->p + | stw TMP0, SBUF:CARG1->w | bl extern lj_buf_putstr_ .. name | bl extern lj_buf_tostr | b ->fff_resstr @@ -2237,28 +2532,37 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc_bit_op, name, ins | .ffunc_bit name - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC + | addi SAVE0, BASE, 8 + | add SAVE1, BASE, NARGS8:RC |1: - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 + | lwz CARG4, 0(SAVE0) + | cmplw cr1, SAVE0, SAVE1 |.if DUALNUM - | lwz CARG2, 4(TMP1) + | lwz CARG2, 4(SAVE0) |.else - | lfd FARG1, 0(TMP1) + | lfd FARG1, 0(SAVE0) |.endif | bgey cr1, ->fff_resi | checknum CARG4 |.if DUALNUM + |.if FPU | bnel ->fff_bitop_fb |.else + | beq >3 + | stw CARG1, SFSAVE_1 + | bl ->fff_bitop_fb + | mr CARG2, CARG1 + | lwz CARG1, SFSAVE_1 + |3: + |.endif + |.else | fadd FARG1, FARG1, TOBIT | bge ->fff_fallback | stfd FARG1, TMPD | lwz CARG2, TMPD_LO |.endif | ins CARG1, CARG1, CARG2 - | addi TMP1, TMP1, 8 + | addi SAVE0, SAVE0, 8 | b <1 |.endmacro | @@ -2280,7 +2584,14 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_bit_sh, name, ins, shmod |.if DUALNUM | .ffunc_2 bit_..name + |.if FPU | checknum CARG3; bnel ->fff_tobit_fb + |.else + | checknum CARG3; beq >1 + | bl ->fff_tobit_fb + | lwz CARG2, 12(BASE) // Conversion polluted CARG2. + |1: + |.endif | // Note: no inline conversion from number for 2nd argument! | checknum CARG4; bne ->fff_fallback |.else @@ -2317,27 +2628,77 @@ static void build_subroutines(BuildCtx *ctx) |->fff_resn: | lwz PC, FRAME_PC(BASE) | la RA, -8(BASE) + |.if FPU | stfd FARG1, -8(BASE) + |.else + | stw CARG1, -8(BASE) + | stw CARG2, -4(BASE) + |.endif | b ->fff_res1 | |// Fallback FP number to bit conversion. |->fff_tobit_fb: |.if DUALNUM + |.if FPU | lfd FARG1, 0(BASE) | bgt ->fff_fallback | fadd FARG1, FARG1, TOBIT | stfd FARG1, TMPD | lwz CARG1, TMPD_LO | blr + |.else + | bgt ->fff_fallback + | mr CARG2, CARG1 + | mr CARG1, CARG3 + |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. + |->vm_tobit: + | slwi TMP2, CARG1, 1 + | addis TMP2, TMP2, 0x0020 + | cmpwi TMP2, 0 + | bge >2 + | li TMP1, 0x3e0 + | srawi TMP2, TMP2, 21 + | not TMP1, TMP1 + | sub. TMP2, TMP1, TMP2 + | cmpwi cr7, CARG1, 0 + | blt >1 + | slwi TMP1, CARG1, 11 + | srwi TMP0, CARG2, 21 + | oris TMP1, TMP1, 0x8000 + | or TMP1, TMP1, TMP0 + | srw CARG1, TMP1, TMP2 + | bclr 4, 28 // Return if cr7[lt] == 0, no hint. + | neg CARG1, CARG1 + | blr + |1: + | addi TMP2, TMP2, 21 + | srw TMP1, CARG2, TMP2 + | slwi CARG2, CARG1, 12 + | subfic TMP2, TMP2, 20 + | slw TMP0, CARG2, TMP2 + | or CARG1, TMP1, TMP0 + | bclr 4, 28 // Return if cr7[lt] == 0, no hint. + | neg CARG1, CARG1 + | blr + |2: + | li CARG1, 0 + | blr + |.endif |.endif |->fff_bitop_fb: |.if DUALNUM - | lfd FARG1, 0(TMP1) + |.if FPU + | lfd FARG1, 0(SAVE0) | bgt ->fff_fallback | fadd FARG1, FARG1, TOBIT | stfd FARG1, TMPD | lwz CARG2, TMPD_LO | blr + |.else + | bgt ->fff_fallback + | mr CARG1, CARG4 + | b ->vm_tobit + |.endif |.endif | |//----------------------------------------------------------------------- @@ -2530,10 +2891,21 @@ static void build_subroutines(BuildCtx *ctx) | decode_RA8 RC, INS // Call base. | beq >2 |1: // Move results down. + |.if FPU | lfd f0, 0(RA) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + |.endif | addic. TMP1, TMP1, -8 | addi RA, RA, 8 + |.if FPU | stfdx f0, BASE, RC + |.else + | add CARG3, BASE, RC + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif | addi RC, RC, 8 | bne <1 |2: @@ -2586,10 +2958,12 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |.macro savex_, a, b, c, d + |.if FPU | stfd f..a, 16+a*8(sp) | stfd f..b, 16+b*8(sp) | stfd f..c, 16+c*8(sp) | stfd f..d, 16+d*8(sp) + |.endif |.endmacro | |->vm_exit_handler: @@ -2650,8 +3024,9 @@ static void build_subroutines(BuildCtx *ctx) | addi DISPATCH, JGL, -GG_DISP2G-32768 | stp BASE, L->base |1: - | cmpwi CARG1, 0 - | blt >9 // Check for error from exit. + | li TMP2, -LUA_ERRERR + | cmplw CARG1, TMP2 + | bge >9 // Check for error from exit. | lwz LFUNC:RB, FRAME_FUNC(BASE) | slwi MULTRES, CARG1, 3 | li TMP2, 0 @@ -2661,21 +3036,23 @@ static void build_subroutines(BuildCtx *ctx) | lwz KBASE, PC2PROTO(k)(TMP1) | // Setup type comparison constants. | li TISNUM, LJ_TISNUM - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stw TMP3, TMPD + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU stw TMP3, TMPD | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU lfs TOBIT, TMPD + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | li TISNIL, LJ_TNIL - | stw TMP0, TONUM_HI - | lfs TONUM, TMPD + | .FPU stw TMP0, TONUM_HI + | .FPU lfs TONUM, TMPD | // Modified copy of ins_next which handles function header dispatch, too. | lwz INS, 0(PC) | addi PC, PC, 4 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | cmpwi CARG1, -17 // Static dispatch? + | beq >5 | decode_OPP TMP1, INS | decode_RA8 RA, INS | lpx TMP0, DISPATCH, TMP1 @@ -2705,17 +3082,60 @@ static void build_subroutines(BuildCtx *ctx) | add RA, RA, BASE | bctr | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | lwz TMP1, DISPATCH_J(trace)(DISPATCH) + | decode_RD4 RD, INS + | lwzx TRACE:TMP1, TMP1, RD + | lwz INS, TRACE:TMP1->startins + | decode_OPP TMP1, INS + | addi TMP1, TMP1, GG_DISP2STATIC + | lpx TMP0, DISPATCH, TMP1 + | mtctr TMP0 + | decode_RB8 RB, INS + | decode_RD8 RD, INS + | decode_RA8 RA, INS + | decode_RC8 RC, INS + | bctr + | |9: // Rethrow error from the right C frame. | neg CARG2, CARG1 | mr CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) + | bl extern lj_err_trace // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- |//-- Math helper functions ---------------------------------------------- |//----------------------------------------------------------------------- | - |// NYI: Use internal implementations of floor, ceil, trunc. + |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. + | + |.macro sfi2d, AHI, ALO + |.if not FPU + | mr. AHI, ALO + | bclr 12, 2 // Handle zero first. + | srawi TMP0, ALO, 31 + | xor TMP1, ALO, TMP0 + | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. + | cntlzw AHI, TMP1 + | andix. TMP0, TMP0, 0x800 // Mask sign bit. + | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. + | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. + | slwi ALO, TMP1, 21 + | or AHI, AHI, TMP0 // Sign | Exponent. + | srwi TMP1, TMP1, 11 + | slwi AHI, AHI, 20 // Align left. + | add AHI, AHI, TMP1 // Add mantissa, increment exponent. + | blr + |.endif + |.endmacro + | + |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. + |->vm_sfi2d_1: + | sfi2d CARG1, CARG2 + | + |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. + |->vm_sfi2d_2: + | sfi2d CARG3, CARG4 | |->vm_modi: | divwo. TMP0, CARG1, CARG2 @@ -2770,6 +3190,11 @@ static void build_subroutines(BuildCtx *ctx) | blr |.endif | + |->vm_next: + |.if JIT + | NYI // On big-endian. + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -2783,21 +3208,21 @@ static void build_subroutines(BuildCtx *ctx) | addi DISPATCH, r12, GG_G2DISP | stw r11, CTSTATE->cb.slot | stw r3, CTSTATE->cb.gpr[0] - | stfd f1, CTSTATE->cb.fpr[0] + | .FPU stfd f1, CTSTATE->cb.fpr[0] | stw r4, CTSTATE->cb.gpr[1] - | stfd f2, CTSTATE->cb.fpr[1] + | .FPU stfd f2, CTSTATE->cb.fpr[1] | stw r5, CTSTATE->cb.gpr[2] - | stfd f3, CTSTATE->cb.fpr[2] + | .FPU stfd f3, CTSTATE->cb.fpr[2] | stw r6, CTSTATE->cb.gpr[3] - | stfd f4, CTSTATE->cb.fpr[3] + | .FPU stfd f4, CTSTATE->cb.fpr[3] | stw r7, CTSTATE->cb.gpr[4] - | stfd f5, CTSTATE->cb.fpr[4] + | .FPU stfd f5, CTSTATE->cb.fpr[4] | stw r8, CTSTATE->cb.gpr[5] - | stfd f6, CTSTATE->cb.fpr[5] + | .FPU stfd f6, CTSTATE->cb.fpr[5] | stw r9, CTSTATE->cb.gpr[6] - | stfd f7, CTSTATE->cb.fpr[6] + | .FPU stfd f7, CTSTATE->cb.fpr[6] | stw r10, CTSTATE->cb.gpr[7] - | stfd f8, CTSTATE->cb.fpr[7] + | .FPU stfd f8, CTSTATE->cb.fpr[7] | addi TMP0, sp, CFRAME_SPACE+8 | stw TMP0, CTSTATE->cb.stack | mr CARG1, CTSTATE @@ -2808,21 +3233,21 @@ static void build_subroutines(BuildCtx *ctx) | lp BASE, L:CRET1->base | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp RC, L:CRET1->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | li ZERO, 0 | mr L, CRET1 - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | lwz LFUNC:RB, FRAME_FUNC(BASE) - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | stw TMP0, TONUM_HI + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU stw TMP0, TONUM_HI | li TISNIL, LJ_TNIL | li_vmstate INTERP - | lfs TOBIT, TMPD - | stw TMP3, TMPD + | .FPU lfs TOBIT, TMPD + | .FPU stw TMP3, TMPD | sub RC, RC, BASE | st_vmstate - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | ins_callt |.endif | @@ -2836,7 +3261,7 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG2, RA | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | lwz CRET1, CTSTATE->cb.gpr[0] - | lfd FARG1, CTSTATE->cb.fpr[0] + | .FPU lfd FARG1, CTSTATE->cb.fpr[0] | lwz CRET2, CTSTATE->cb.gpr[1] | b ->vm_leave_unw |.endif @@ -2853,14 +3278,13 @@ static void build_subroutines(BuildCtx *ctx) | stw TMP0, 4(sp) | cmpwi cr1, CARG3, 0 | mr TMP2, sp - | addic. CARG2, CARG2, -1 + | addic. CARG2, CARG2, -4 | stwux sp, sp, TMP1 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. | stw r14, -4(TMP2) | stw CCSTATE, -8(TMP2) | mr r14, TMP2 | la TMP1, CCSTATE->stack - | slwi CARG2, CARG2, 2 | blty >2 | la TMP2, 8(sp) |1: @@ -2870,14 +3294,14 @@ static void build_subroutines(BuildCtx *ctx) | bge <1 |2: | bney cr1, >3 - | lfd f1, CCSTATE->fpr[0] - | lfd f2, CCSTATE->fpr[1] - | lfd f3, CCSTATE->fpr[2] - | lfd f4, CCSTATE->fpr[3] - | lfd f5, CCSTATE->fpr[4] - | lfd f6, CCSTATE->fpr[5] - | lfd f7, CCSTATE->fpr[6] - | lfd f8, CCSTATE->fpr[7] + | .FPU lfd f1, CCSTATE->fpr[0] + | .FPU lfd f2, CCSTATE->fpr[1] + | .FPU lfd f3, CCSTATE->fpr[2] + | .FPU lfd f4, CCSTATE->fpr[3] + | .FPU lfd f5, CCSTATE->fpr[4] + | .FPU lfd f6, CCSTATE->fpr[5] + | .FPU lfd f7, CCSTATE->fpr[6] + | .FPU lfd f8, CCSTATE->fpr[7] |3: | lp TMP0, CCSTATE->func | lwz CARG2, CCSTATE->gpr[1] @@ -2894,7 +3318,7 @@ static void build_subroutines(BuildCtx *ctx) | lwz TMP2, -4(r14) | lwz TMP0, 4(r14) | stw CARG1, CCSTATE:TMP1->gpr[0] - | stfd FARG1, CCSTATE:TMP1->fpr[0] + | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] | stw CARG2, CCSTATE:TMP1->gpr[1] | mtlr TMP0 | stw CARG3, CCSTATE:TMP1->gpr[2] @@ -2923,19 +3347,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1*8, RD = src2*8, JMP with RD = target |.if DUALNUM - | lwzux TMP0, RA, BASE + | lwzux CARG1, RA, BASE | addi PC, PC, 4 | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE + | lwzux CARG3, RD, BASE | lwz TMP2, -4(PC) - | checknum cr0, TMP0 - | lwz CARG3, 4(RD) + | checknum cr0, CARG1 + | lwz CARG4, 4(RD) | decode_RD4 TMP2, TMP2 - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | checknum cr1, CARG3 + | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) | bne cr0, >7 | bne cr1, >8 - | cmpw CARG2, CARG3 + | cmpw CARG2, CARG4 if (op == BC_ISLT) { | bge >2 } else if (op == BC_ISGE) { @@ -2946,28 +3370,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ble >2 } |1: - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |2: | ins_next | |7: // RA is not an integer. | bgt cr0, ->vmeta_comp | // RA is a number. - | lfd f0, 0(RA) + | .FPU lfd f0, 0(RA) | bgt cr1, ->vmeta_comp | blt cr1, >4 | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 + |.if FPU + | tonum_i f1, CARG4 + |.else + | bl ->vm_sfi2d_2 + |.endif | b >5 | |8: // RA is an integer, RD is not an integer. | bgt cr1, ->vmeta_comp | // RA is an integer, RD is a number. + |.if FPU | tonum_i f0, CARG2 + |.else + | bl ->vm_sfi2d_1 + |.endif |4: - | lfd f1, 0(RD) + | .FPU lfd f1, 0(RD) |5: + |.if FPU | fcmpu cr0, f0, f1 + |.else + | blex __ledf2 + | cmpwi CRET1, 0 + |.endif if (op == BC_ISLT) { | bge <2 } else if (op == BC_ISGE) { @@ -3015,42 +3452,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQV; | // RA = src1*8, RD = src2*8, JMP with RD = target |.if DUALNUM - | lwzux TMP0, RA, BASE + | lwzux CARG1, RA, BASE | addi PC, PC, 4 | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) + | lwzux CARG3, RD, BASE + | checknum cr0, CARG1 + | lwz SAVE0, -4(PC) + | checknum cr1, CARG3 + | decode_RD4 SAVE0, SAVE0 + | lwz CARG4, 4(RD) | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { | ble cr7, ->BC_ISEQN_Z } else { | ble cr7, ->BC_ISNEN_Z } |.else - | lwzux TMP0, RA, BASE - | lwz TMP2, 0(PC) + | lwzux CARG1, RA, BASE + | lwz SAVE0, 0(PC) | lfd f0, 0(RA) | addi PC, PC, 4 - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | decode_RD4 TMP2, TMP2 + | lwzux CARG3, RD, BASE + | checknum cr0, CARG1 + | decode_RD4 SAVE0, SAVE0 | lfd f1, 0(RD) - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | checknum cr1, CARG3 + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) | bge cr0, >5 | bge cr1, >5 | fcmpu cr0, f0, f1 if (vk) { | bne >1 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 } else { | beq >1 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 } |1: | ins_next @@ -3058,36 +3495,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Either or both types are not numbers. |.if not DUALNUM | lwz CARG2, 4(RA) - | lwz CARG3, 4(RD) + | lwz CARG4, 4(RD) |.endif |.if FFI - | cmpwi cr7, TMP0, LJ_TCDATA - | cmpwi cr5, TMP1, LJ_TCDATA + | cmpwi cr7, CARG1, LJ_TCDATA + | cmpwi cr5, CARG3, LJ_TCDATA |.endif - | not TMP3, TMP0 - | cmplw TMP0, TMP1 - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? + | not TMP2, CARG1 + | cmplw CARG1, CARG3 + | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? |.if FFI | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq |.endif - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? + | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? |.if FFI | beq cr7, ->vmeta_equal_cd |.endif - | cmplw cr5, CARG2, CARG3 + | cmplw cr5, CARG2, CARG4 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. - | mr SAVE0, PC + | mr SAVE1, PC | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. if (vk) { | bne cr0, >6 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |6: } else { | beq cr0, >6 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |6: } |.if DUALNUM @@ -3102,6 +3539,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | | // Different tables or userdatas. Need to check __eq metamethod. | // Field metatable must be at same offset for GCtab and GCudata! + | mr CARG3, CARG4 | lwz TAB:TMP2, TAB:CARG2->metatable | li CARG4, 1-vk // ne = 0 or 1. | cmplwi TAB:TMP2, 0 @@ -3109,7 +3547,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lbz TMP2, TAB:TMP2->nomm | andix. TMP2, TMP2, 1<<MM_eq | bne <1 // Or 'no __eq' flag set? - | mr PC, SAVE0 // Restore old PC. + | mr PC, SAVE1 // Restore old PC. | b ->vmeta_equal // Handle __eq metamethod. break; @@ -3150,16 +3588,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQN; | // RA = src*8, RD = num_const*8, JMP with RD = target |.if DUALNUM - | lwzux TMP0, RA, BASE + | lwzux CARG1, RA, BASE | addi PC, PC, 4 | lwz CARG2, 4(RA) - | lwzux TMP1, RD, KBASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | lwzux CARG3, RD, KBASE + | checknum cr0, CARG1 + | lwz SAVE0, -4(PC) + | checknum cr1, CARG3 + | decode_RD4 SAVE0, SAVE0 + | lwz CARG4, 4(RD) + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { |->BC_ISEQN_Z: } else { @@ -3167,7 +3605,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | bne cr0, >7 | bne cr1, >8 - | cmpw CARG2, CARG3 + | cmpw CARG2, CARG4 |4: |.else if (vk) { @@ -3175,20 +3613,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } else { |->BC_ISNEN_Z: // Dummy label. } - | lwzx TMP0, BASE, RA + | lwzx CARG1, BASE, RA | addi PC, PC, 4 | lfdx f0, BASE, RA - | lwz TMP2, -4(PC) + | lwz SAVE0, -4(PC) | lfdx f1, KBASE, RD - | decode_RD4 TMP2, TMP2 - | checknum TMP0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | decode_RD4 SAVE0, SAVE0 + | checknum CARG1 + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) | bge >3 | fcmpu cr0, f0, f1 |.endif if (vk) { | bne >1 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |1: |.if not FFI |3: @@ -3199,13 +3637,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if not FFI |3: |.endif - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |2: } | ins_next |.if FFI |3: - | cmpwi TMP0, LJ_TCDATA + | cmpwi CARG1, LJ_TCDATA | beq ->vmeta_equal_cd | b <1 |.endif @@ -3213,18 +3651,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |7: // RA is not an integer. | bge cr0, <3 | // RA is a number. - | lfd f0, 0(RA) + | .FPU lfd f0, 0(RA) | blt cr1, >1 | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 + |.if FPU + | tonum_i f1, CARG4 + |.else + | bl ->vm_sfi2d_2 + |.endif | b >2 | |8: // RA is an integer, RD is a number. + |.if FPU | tonum_i f0, CARG2 + |.else + | bl ->vm_sfi2d_1 + |.endif |1: - | lfd f1, 0(RD) + | .FPU lfd f1, 0(RD) |2: + |.if FPU | fcmpu cr0, f0, f1 + |.else + | blex __ledf2 + | cmpwi CRET1, 0 + |.endif | b <4 |.endif break; @@ -3279,7 +3730,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add PC, PC, TMP2 } else { | li TMP1, LJ_TFALSE + |.if FPU | lfdx f0, BASE, RD + |.else + | lwzux CARG1, RD, BASE + | lwz CARG2, 4(RD) + |.endif | cmplw TMP0, TMP1 if (op == BC_ISTC) { | bge >1 @@ -3288,7 +3744,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | addis PC, PC, -(BCBIAS_J*4 >> 16) | decode_RD4 TMP2, INS + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux CARG1, RA, BASE + | stw CARG2, 4(RA) + |.endif | add PC, PC, TMP2 |1: } @@ -3323,8 +3784,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_MOV: | // RA = dst*8, RD = src*8 | ins_next1 + |.if FPU | lfdx f0, BASE, RD | stfdx f0, BASE, RA + |.else + | lwzux TMP0, RD, BASE + | lwz TMP1, 4(RD) + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 break; case BC_NOT: @@ -3426,44 +3894,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzx TMP1, BASE, RB + | lwzx CARG1, BASE, RB | .if DUALNUM - | lwzx TMP2, KBASE, RC + | lwzx CARG3, KBASE, RC | .endif + | .if FPU | lfdx f14, BASE, RB | lfdx f15, KBASE, RC + | .else + | add TMP1, BASE, RB + | add TMP2, KBASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + | .endif | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_vn | .else - | checknum TMP1; bge ->vmeta_arith_vn + | checknum CARG1; bge ->vmeta_arith_vn | .endif || break; ||case 1: - | lwzx TMP1, BASE, RB + | lwzx CARG1, BASE, RB | .if DUALNUM - | lwzx TMP2, KBASE, RC + | lwzx CARG3, KBASE, RC | .endif + | .if FPU | lfdx f15, BASE, RB | lfdx f14, KBASE, RC + | .else + | add TMP1, BASE, RB + | add TMP2, KBASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + | .endif | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_nv | .else - | checknum TMP1; bge ->vmeta_arith_nv + | checknum CARG1; bge ->vmeta_arith_nv | .endif || break; ||default: - | lwzx TMP1, BASE, RB - | lwzx TMP2, BASE, RC + | lwzx CARG1, BASE, RB + | lwzx CARG3, BASE, RC + | .if FPU | lfdx f14, BASE, RB | lfdx f15, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 + | .else + | add TMP1, BASE, RB + | add TMP2, BASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + | .endif + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_vv || break; @@ -3497,48 +3986,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | fsub a, b, a // b - floor(b/c)*c |.endmacro | + |.macro sfpmod + |->BC_MODVN_Z: + | stw CARG1, SFSAVE_1 + | stw CARG2, SFSAVE_2 + | mr SAVE0, CARG3 + | mr SAVE1, CARG4 + | blex __divdf3 + | blex floor + | mr CARG3, SAVE0 + | mr CARG4, SAVE1 + | blex __muldf3 + | mr CARG3, CRET1 + | mr CARG4, CRET2 + | lwz CARG1, SFSAVE_1 + | lwz CARG2, SFSAVE_2 + | blex __subdf3 + |.endmacro + | |.macro ins_arithfp, fpins | ins_arithpre |.if "fpins" == "fpmod_" | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else + |.elif FPU | fpins f0, f14, f15 | ins_next1 | stfdx f0, BASE, RA | ins_next2 + |.else + | blex __divdf3 // Only soft-float div uses this macro. + | ins_next1 + | stwux CRET1, RA, BASE + | stw CRET2, 4(RA) + | ins_next2 |.endif |.endmacro | - |.macro ins_arithdn, intins, fpins + |.macro ins_arithdn, intins, fpins, fpcall | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, KBASE + | lwz CARG2, 4(RB) + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 || break; ||case 1: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG2, 4(RB) - | checknum cr0, TMP1 - | lwz CARG1, 4(RC) + | lwzux CARG3, RB, BASE + | lwzux CARG1, RC, KBASE + | lwz CARG4, 4(RB) + | checknum cr0, CARG3 + | lwz CARG2, 4(RC) + | checknum cr1, CARG1 || break; ||default: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, BASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, BASE + | lwz CARG2, 4(RB) + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 || break; ||} - | checknum cr1, TMP2 | bne >5 | bne cr1, >5 - | intins CARG1, CARG1, CARG2 + |.if "intins" == "intmod" + | mr CARG1, CARG2 + | mr CARG2, CARG4 + |.endif + | intins CARG1, CARG2, CARG4 | bso >4 |1: | ins_next1 @@ -3550,29 +4069,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checkov TMP0, <1 // Ignore unrelated overflow. | ins_arithfallback b |5: // FP variant. + |.if FPU ||if (vk == 1) { | lfd f15, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | lfd f14, 0(RC) ||} else { | lfd f14, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | lfd f15, 0(RC) ||} + |.endif + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ins_arithfallback bge |.if "fpins" == "fpmod_" | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |.else + |.if FPU | fpins f0, f14, f15 - | ins_next1 | stfdx f0, BASE, RA + |.else + |.if "fpcall" == "sfpmod" + | sfpmod + |.else + | blex fpcall + |.endif + | stwux CRET1, RA, BASE + | stw CRET2, 4(RA) + |.endif + | ins_next1 | b <2 |.endif |.endmacro | - |.macro ins_arith, intins, fpins + |.macro ins_arith, intins, fpins, fpcall |.if DUALNUM - | ins_arithdn intins, fpins + | ins_arithdn intins, fpins, fpcall |.else | ins_arithfp fpins |.endif @@ -3583,13 +4113,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.macro addo32., y, a, b | // Need to check overflow for (a<<32) + (b<<32). | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | addo. TMP0, TMP0, TMP3 + | rldicr TMP1, b, 32, 31 + | addo. TMP0, TMP0, TMP1 | add y, a, b |.endmacro - | ins_arith addo32., fadd + | ins_arith addo32., fadd, __adddf3 |.else - | ins_arith addo., fadd + | ins_arith addo., fadd, __adddf3 |.endif break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: @@ -3597,40 +4127,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.macro subo32., y, a, b | // Need to check overflow for (a<<32) - (b<<32). | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | subo. TMP0, TMP0, TMP3 + | rldicr TMP1, b, 32, 31 + | subo. TMP0, TMP0, TMP1 | sub y, a, b |.endmacro - | ins_arith subo32., fsub + | ins_arith subo32., fsub, __subdf3 |.else - | ins_arith subo., fsub + | ins_arith subo., fsub, __subdf3 |.endif break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mullwo., fmul + | ins_arith mullwo., fmul, __muldf3 break; case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ins_arithfp fdiv break; case BC_MODVN: - | ins_arith intmod, fpmod + | ins_arith intmod, fpmod, sfpmod break; case BC_MODNV: case BC_MODVV: - | ins_arith intmod, fpmod_ + | ins_arith intmod, fpmod_, sfpmod break; case BC_POW: | // NYI: (partial) integer arithmetic. - | lwzx TMP1, BASE, RB + | lwzx CARG1, BASE, RB + | lwzx CARG3, BASE, RC + |.if FPU | lfdx FARG1, BASE, RB - | lwzx TMP2, BASE, RC | lfdx FARG2, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 + |.else + | add TMP1, BASE, RB + | add TMP2, BASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + |.endif + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_vv | blex pow | ins_next1 + |.if FPU | stfdx FARG1, BASE, RA + |.else + | stwux CARG1, RA, BASE + | stw CARG2, 4(RA) + |.endif | ins_next2 break; @@ -3650,8 +4192,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lp BASE, L->base | bne ->vmeta_binop | ins_next1 + |.if FPU | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. | stfdx f0, BASE, RA + |.else + | lwzux TMP0, SAVE0, BASE + | lwz TMP1, 4(SAVE0) + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 break; @@ -3714,8 +4263,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KNUM: | // RA = dst*8, RD = num_const*8 | ins_next1 + |.if FPU | lfdx f0, KBASE, RD | stfdx f0, BASE, RA + |.else + | lwzux TMP0, RD, KBASE + | lwz TMP1, 4(RD) + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 break; case BC_KPRI: @@ -3748,8 +4304,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwzx UPVAL:RB, LFUNC:RB, RD | ins_next1 | lwz TMP1, UPVAL:RB->v + |.if FPU | lfd f0, 0(TMP1) | stfdx f0, BASE, RA + |.else + | lwz TMP2, 0(TMP1) + | lwz TMP3, 4(TMP1) + | stwux TMP2, RA, BASE + | stw TMP3, 4(RA) + |.endif | ins_next2 break; case BC_USETV: @@ -3757,14 +4320,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz LFUNC:RB, FRAME_FUNC(BASE) | srwi RA, RA, 1 | addi RA, RA, offsetof(GCfuncL, uvptr) + |.if FPU | lfdux f0, RD, BASE + |.else + | lwzux CARG1, RD, BASE + | lwz CARG3, 4(RD) + |.endif | lwzx UPVAL:RB, LFUNC:RB, RA | lbz TMP3, UPVAL:RB->marked | lwz CARG2, UPVAL:RB->v | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | lbz TMP0, UPVAL:RB->closed | lwz TMP2, 0(RD) + |.if FPU | stfd f0, 0(CARG2) + |.else + | stw CARG1, 0(CARG2) + | stw CARG3, 4(CARG2) + |.endif | cmplwi cr1, TMP0, 0 | lwz TMP1, 4(RD) | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq @@ -3820,11 +4393,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz LFUNC:RB, FRAME_FUNC(BASE) | srwi RA, RA, 1 | addi RA, RA, offsetof(GCfuncL, uvptr) + |.if FPU | lfdx f0, KBASE, RD + |.else + | lwzux TMP2, RD, KBASE + | lwz TMP3, 4(RD) + |.endif | lwzx UPVAL:RB, LFUNC:RB, RA | ins_next1 | lwz TMP1, UPVAL:RB->v + |.if FPU | stfd f0, 0(TMP1) + |.else + | stw TMP2, 0(TMP1) + | stw TMP3, 4(TMP1) + |.endif | ins_next2 break; case BC_USETP: @@ -3972,11 +4555,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | ble ->vmeta_tgetv // Integer key and in array part? | lwzx TMP0, TMP1, TMP2 + |.if FPU | lfdx f14, TMP1, TMP2 + |.else + | lwzux SAVE0, TMP1, TMP2 + | lwz SAVE1, 4(TMP1) + |.endif | checknil TMP0; beq >2 |1: | ins_next1 + |.if FPU | stfdx f14, BASE, RA + |.else + | stwux SAVE0, RA, BASE + | stw SAVE1, 4(RA) + |.endif | ins_next2 | |2: // Check for __index if table value is nil. @@ -4007,9 +4600,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -4052,12 +4645,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz TMP1, TAB:RB->asize | lwz TMP2, TAB:RB->array | cmplw TMP0, TMP1; bge ->vmeta_tgetb + |.if FPU | lwzx TMP1, TMP2, RC | lfdx f0, TMP2, RC + |.else + | lwzux TMP1, TMP2, RC + | lwz TMP3, 4(TMP2) + |.endif | checknil TMP1; beq >5 |1: | ins_next1 + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux TMP1, RA, BASE + | stw TMP3, 4(RA) + |.endif | ins_next2 | |5: // Check for __index if table value is nil. @@ -4087,10 +4690,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmplw TMP0, CARG2 | slwi TMP2, CARG2, 3 | ble ->vmeta_tgetr // In array part? + |.if FPU | lfdx f14, TMP1, TMP2 + |.else + | lwzux SAVE0, TMP2, TMP1 + | lwz SAVE1, 4(TMP2) + |.endif |->BC_TGETR_Z: | ins_next1 + |.if FPU | stfdx f14, BASE, RA + |.else + | stwux SAVE0, RA, BASE + | stw SAVE1, 4(RA) + |.endif | ins_next2 break; @@ -4131,11 +4744,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ble ->vmeta_tsetv // Integer key and in array part? | lwzx TMP2, TMP1, TMP0 | lbz TMP3, TAB:RB->marked + |.if FPU | lfdx f14, BASE, RA + |.else + | add SAVE1, BASE, RA + | lwz SAVE0, 0(SAVE1) + | lwz SAVE1, 4(SAVE1) + |.endif | checknil TMP2; beq >3 |1: | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | stfdx f14, TMP1, TMP0 + |.else + | stwux SAVE0, TMP1, TMP0 + | stw SAVE1, 4(TMP1) + |.endif | bne >7 |2: | ins_next @@ -4172,11 +4796,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:RB->node | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + |.if FPU | lfdx f14, BASE, RA + |.else + | add CARG2, BASE, RA + | lwz SAVE0, 0(CARG2) + | lwz SAVE1, 4(CARG2) + |.endif | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -4192,7 +4822,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checknil CARG2; beq >4 // Key found, but nil value? |2: | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | stfd f14, NODE:TMP2->val + |.else + | stw SAVE0, NODE:TMP2->val.u32.hi + | stw SAVE1, NODE:TMP2->val.u32.lo + |.endif | bne >7 |3: | ins_next @@ -4231,7 +4866,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | // Returns TValue *. | lp BASE, L->base + |.if FPU | stfd f14, 0(CRET1) + |.else + | stw SAVE0, 0(CRET1) + | stw SAVE1, 4(CRET1) + |.endif | b <3 // No 2nd write barrier needed. | |7: // Possible table write barrier for the value. Skip valiswhite check. @@ -4248,13 +4888,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz TMP2, TAB:RB->array | lbz TMP3, TAB:RB->marked | cmplw TMP0, TMP1 + |.if FPU | lfdx f14, BASE, RA + |.else + | add CARG2, BASE, RA + | lwz SAVE0, 0(CARG2) + | lwz SAVE1, 4(CARG2) + |.endif | bge ->vmeta_tsetb | lwzx TMP1, TMP2, RC | checknil TMP1; beq >5 |1: | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | stfdx f14, TMP2, RC + |.else + | stwux SAVE0, RC, TMP2 + | stw SAVE1, 4(RC) + |.endif | bne >7 |2: | ins_next @@ -4294,10 +4945,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: | cmplw TMP0, CARG3 | slwi TMP2, CARG3, 3 + |.if FPU | lfdx f14, BASE, RA + |.else + | lwzux SAVE0, RA, BASE + | lwz SAVE1, 4(RA) + |.endif | ble ->vmeta_tsetr // In array part? | ins_next1 + |.if FPU | stfdx f14, TMP1, TMP2 + |.else + | stwux SAVE0, TMP1, TMP2 + | stw SAVE1, 4(TMP1) + |.endif | ins_next2 | |7: // Possible table write barrier for the value. Skip valiswhite check. @@ -4327,10 +4988,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add TMP1, TMP1, TMP0 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |3: // Copy result slots to table. + |.if FPU | lfd f0, 0(RA) + |.else + | lwz SAVE0, 0(RA) + | lwz SAVE1, 4(RA) + |.endif | addi RA, RA, 8 | cmpw cr1, RA, TMP2 + |.if FPU | stfd f0, 0(TMP1) + |.else + | stw SAVE0, 0(TMP1) + | stw SAVE1, 4(TMP1) + |.endif | addi TMP1, TMP1, 8 | blt cr1, <3 | bne >7 @@ -4397,9 +5068,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beq cr1, >3 |2: | addi TMP3, TMP2, 8 + |.if FPU | lfdx f0, RA, TMP2 + |.else + | add CARG3, RA, TMP2 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | cmplw cr1, TMP3, NARGS8:RC + |.if FPU | stfdx f0, BASE, TMP2 + |.else + | stwux CARG1, TMP2, BASE + | stw CARG2, 4(TMP2) + |.endif | mr TMP2, TMP3 | bne cr1, <2 |3: @@ -4432,14 +5114,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add BASE, BASE, RA | lwz TMP1, -24(BASE) | lwz LFUNC:RB, -20(BASE) + |.if FPU | lfd f1, -8(BASE) | lfd f0, -16(BASE) + |.else + | lwz CARG1, -8(BASE) + | lwz CARG2, -4(BASE) + | lwz CARG3, -16(BASE) + | lwz CARG4, -12(BASE) + |.endif | stw TMP1, 0(BASE) // Copy callable. | stw LFUNC:RB, 4(BASE) | checkfunc TMP1 - | stfd f1, 16(BASE) // Copy control var. | li NARGS8:RC, 16 // Iterators get 2 arguments. + |.if FPU + | stfd f1, 16(BASE) // Copy control var. | stfdu f0, 8(BASE) // Copy state. + |.else + | stw CARG1, 16(BASE) // Copy control var. + | stw CARG2, 20(BASE) + | stwu CARG3, 8(BASE) // Copy state. + | stw CARG4, 4(BASE) + |.endif | bne ->vmeta_call | ins_call break; @@ -4447,8 +5143,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | // NYI on big-endian |.endif + |->vm_IITERN: | add RA, BASE, RA | lwz TAB:RB, -12(RA) | lwz RC, -4(RA) // Get index from control var. @@ -4460,7 +5157,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi TMP3, RC, 3 | bge >5 // Index points after array part? | lwzx TMP2, TMP1, TMP3 + |.if FPU | lfdx f0, TMP1, TMP3 + |.else + | lwzux CARG1, TMP3, TMP1 + | lwz CARG2, 4(TMP3) + |.endif | checknil TMP2 | lwz INS, -4(PC) | beq >4 @@ -4472,7 +5174,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | addi RC, RC, 1 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) + |.if FPU | stfd f0, 8(RA) + |.else + | stw CARG1, 8(RA) + | stw CARG2, 12(RA) + |.endif | decode_RD4 TMP1, INS | stw RC, -4(RA) // Update control var. | add PC, TMP1, TMP3 @@ -4497,17 +5204,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi RB, RC, 3 | sub TMP3, TMP3, RB | lwzx RB, TMP2, TMP3 + |.if FPU | lfdx f0, TMP2, TMP3 + |.else + | add CARG3, TMP2, TMP3 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | add NODE:TMP3, TMP2, TMP3 | checknil RB | lwz INS, -4(PC) | beq >7 + |.if FPU | lfd f1, NODE:TMP3->key + |.else + | lwz CARG3, NODE:TMP3->key.u32.hi + | lwz CARG4, NODE:TMP3->key.u32.lo + |.endif | addis TMP2, PC, -(BCBIAS_J*4 >> 16) + |.if FPU | stfd f0, 8(RA) + |.else + | stw CARG1, 8(RA) + | stw CARG2, 12(RA) + |.endif | add RC, RC, TMP0 | decode_RD4 TMP1, INS + |.if FPU | stfd f1, 0(RA) + |.else + | stw CARG3, 0(RA) + | stw CARG4, 4(RA) + |.endif | addi RC, RC, 1 | add PC, TMP1, TMP2 | stw RC, -4(RA) // Update control var. @@ -4536,8 +5264,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | add TMP3, PC, TMP0 | bne cr0, >5 - | lus TMP1, 0xfffe - | ori TMP1, TMP1, 0x7fff + | lus TMP1, (LJ_KEYINDEX >> 16) + | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) | stw ZERO, -4(RA) // Initialize control var. | stw TMP1, -8(RA) | addis PC, TMP3, -(BCBIAS_J*4 >> 16) @@ -4548,6 +5276,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, BC_ITERC | stb TMP0, -1(PC) | addis PC, TMP3, -(BCBIAS_J*4 >> 16) + | // NYI on big-endian: unpatch JLOOP. | stb TMP1, 3(PC) | b <1 break; @@ -4573,9 +5302,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | subi TMP2, TMP2, 16 | ble >2 // No vararg slots? |1: // Copy vararg slots to destination slots. + |.if FPU | lfd f0, 0(RC) + |.else + | lwz CARG1, 0(RC) + | lwz CARG2, 4(RC) + |.endif | addi RC, RC, 8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw CARG1, 0(RA) + | stw CARG2, 4(RA) + |.endif | cmplw RA, TMP2 | cmplw cr1, RC, TMP3 | bge >3 // All destination slots filled? @@ -4598,9 +5337,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addi MULTRES, TMP1, 8 | bgt >7 |6: + |.if FPU | lfd f0, 0(RC) + |.else + | lwz CARG1, 0(RC) + | lwz CARG2, 4(RC) + |.endif | addi RC, RC, 8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw CARG1, 0(RA) + | stw CARG2, 4(RA) + |.endif | cmplw RC, TMP3 | addi RA, RA, 8 | blt <6 // More vararg slots? @@ -4651,14 +5400,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, 0 |2: | addi TMP3, TMP1, 8 + |.if FPU | lfdx f0, RA, TMP1 + |.else + | add CARG3, RA, TMP1 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | cmpw TMP3, RC + |.if FPU | stfdx f0, TMP2, TMP1 + |.else + | add CARG3, TMP2, TMP1 + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif | beq >3 | addi TMP1, TMP3, 8 + |.if FPU | lfdx f1, RA, TMP3 + |.else + | add CARG3, RA, TMP3 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | cmpw TMP1, RC + |.if FPU | stfdx f1, TMP2, TMP3 + |.else + | add CARG3, TMP2, TMP3 + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif | bne <2 |3: |5: @@ -4700,8 +5473,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | subi TMP2, BASE, 8 | decode_RB8 RB, INS if (op == BC_RET1) { + |.if FPU | lfd f0, 0(RA) | stfd f0, 0(TMP2) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + | stw CARG1, 0(TMP2) + | stw CARG2, 4(TMP2) + |.endif } |5: | cmplw RB, RD @@ -4762,11 +5542,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |4: | stw CARG1, FORL_IDX*8+4(RA) } else { - | lwz TMP3, FORL_STEP*8(RA) + | lwz SAVE0, FORL_STEP*8(RA) | lwz CARG3, FORL_STEP*8+4(RA) | lwz TMP2, FORL_STOP*8(RA) | lwz CARG2, FORL_STOP*8+4(RA) - | cmplw cr7, TMP3, TISNUM + | cmplw cr7, SAVE0, TISNUM | cmplw cr1, TMP2, TISNUM | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq @@ -4809,41 +5589,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (vk) { |.if DUALNUM |9: // FP loop. + |.if FPU | lfd f1, FORL_IDX*8(RA) |.else + | lwz CARG1, FORL_IDX*8(RA) + | lwz CARG2, FORL_IDX*8+4(RA) + |.endif + |.else | lfdux f1, RA, BASE |.endif + |.if FPU | lfd f3, FORL_STEP*8(RA) | lfd f2, FORL_STOP*8(RA) - | lwz TMP3, FORL_STEP*8(RA) | fadd f1, f1, f3 | stfd f1, FORL_IDX*8(RA) + |.else + | lwz CARG3, FORL_STEP*8(RA) + | lwz CARG4, FORL_STEP*8+4(RA) + | mr SAVE1, RD + | blex __adddf3 + | mr RD, SAVE1 + | stw CRET1, FORL_IDX*8(RA) + | stw CRET2, FORL_IDX*8+4(RA) + | lwz CARG3, FORL_STOP*8(RA) + | lwz CARG4, FORL_STOP*8+4(RA) + |.endif + | lwz SAVE0, FORL_STEP*8(RA) } else { |.if DUALNUM |9: // FP loop. |.else | lwzux TMP1, RA, BASE - | lwz TMP3, FORL_STEP*8(RA) + | lwz SAVE0, FORL_STEP*8(RA) | lwz TMP2, FORL_STOP*8(RA) | cmplw cr0, TMP1, TISNUM - | cmplw cr7, TMP3, TISNUM + | cmplw cr7, SAVE0, TISNUM | cmplw cr1, TMP2, TISNUM |.endif + |.if FPU | lfd f1, FORL_IDX*8(RA) + |.else + | lwz CARG1, FORL_IDX*8(RA) + | lwz CARG2, FORL_IDX*8+4(RA) + |.endif | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + |.if FPU | lfd f2, FORL_STOP*8(RA) + |.else + | lwz CARG3, FORL_STOP*8(RA) + | lwz CARG4, FORL_STOP*8+4(RA) + |.endif | bge ->vmeta_for } - | cmpwi cr6, TMP3, 0 + | cmpwi cr6, SAVE0, 0 if (op != BC_JFORL) { | srwi RD, RD, 1 } + |.if FPU | stfd f1, FORL_EXT*8(RA) + |.else + | stw CARG1, FORL_EXT*8(RA) + | stw CARG2, FORL_EXT*8+4(RA) + |.endif if (op != BC_JFORL) { | add RD, PC, RD } + |.if FPU | fcmpu cr0, f1, f2 + |.else + | mr SAVE1, RD + | blex __ledf2 + | cmpwi CRET1, 0 + | mr RD, SAVE1 + |.endif if (op == BC_JFORI) { | addis PC, RD, -(BCBIAS_J*4 >> 16) } diff --git a/source/libs/luajit/LuaJIT-src/src/vm_x64.dasc b/source/libs/luajit/LuaJIT-src/src/vm_x64.dasc index a003fb4f6bf5c9d0d25695e3695e466d476d4625..f501495b1170c9b35733640c083ee08a2d21384f 100644 --- a/source/libs/luajit/LuaJIT-src/src/vm_x64.dasc +++ b/source/libs/luajit/LuaJIT-src/src/vm_x64.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for x64 CPUs in LJ_GC64 mode. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h | |.arch x64 |.section code_op, code_sub @@ -359,9 +359,6 @@ |.macro sseconst_1, reg, tmp // Synthesize 1.0. | sseconst_hi reg, tmp, 3ff00000 |.endmacro -|.macro sseconst_m1, reg, tmp // Synthesize -1.0. -| sseconst_hi reg, tmp, bff00000 -|.endmacro |.macro sseconst_2p52, reg, tmp // Synthesize 2^52. | sseconst_hi reg, tmp, 43300000 |.endmacro @@ -1230,7 +1227,7 @@ static void build_subroutines(BuildCtx *ctx) | mov [BASE-16], TAB:RC // Store metatable as default result. | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] | mov RAd, TAB:RB->hmask - | and RAd, STR:RC->hash + | and RAd, STR:RC->sid | settp STR:RC, LJ_TSTR | imul RAd, #NODE | add NODE:RA, TAB:RB->node @@ -1346,44 +1343,28 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_1 next | je >2 // Missing 2nd arg? |1: - |.if X64WIN - | mov RA, [BASE] - | checktab RA, ->fff_fallback - |.else - | mov CARG2, [BASE] - | checktab CARG2, ->fff_fallback - |.endif - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov L:RB->top, BASE // Dummy frame length is ok. + | mov CARG1, [BASE] | mov PC, [BASE-8] + | checktab CARG1, ->fff_fallback + | mov RB, BASE // Save BASE. |.if X64WIN - | lea CARG3, [BASE+8] - | mov CARG2, RA // Caveat: CARG2 == BASE. - | mov CARG1, L:RB + | lea CARG3, [BASE-16] + | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE. |.else - | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. - | mov CARG1, L:RB + | lea CARG2, [BASE+8] + | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE. |.endif - | mov SAVE_PC, PC // Needed for ITERN fallback. - | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Flag returned in eax (RD). - | mov BASE, L:RB->base - | test RDd, RDd; jz >3 // End of traversal? - | // Copy key and value to results. - | mov RB, [BASE+8] - | mov RD, [BASE+16] - | mov [BASE-16], RB - | mov [BASE-8], RD - |->fff_res2: - | mov RDd, 1+2 - | jmp ->fff_res + | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // 1=found, 0=end, -1=error returned in eax (RD). + | mov BASE, RB // Restore BASE. + | test RDd, RDd; jg ->fff_res2 // Found key/value. + | js ->fff_fallback_2 // Invalid key. + | // End of traversal: return nil. + | mov aword [BASE-16], LJ_TNIL + | jmp ->fff_res1 |2: // Set missing 2nd arg to nil. | mov aword [BASE+8], LJ_TNIL | jmp <1 - |3: // End of traversal: return nil. - | mov aword [BASE-16], LJ_TNIL - | jmp ->fff_res1 | |.ffunc_1 pairs | mov TAB:RB, [BASE] @@ -1432,7 +1413,9 @@ static void build_subroutines(BuildCtx *ctx) | // Copy array slot. | mov RB, [RD] | mov [BASE-8], RB - | jmp ->fff_res2 + |->fff_res2: + | mov RDd, 1+2 + | jmp ->fff_res |2: // Check for empty hash part first. Otherwise call C function. | cmp dword TAB:RB->hmask, 0; je ->fff_res0 |.if X64WIN @@ -1480,6 +1463,9 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc_1 pcall + | mov L:RB, SAVE_L + | lea RA, [BASE+NARGS:RD*8] + | cmp RA, L:RB->maxstack; ja ->fff_fallback | lea RA, [BASE+16] | sub NARGS:RDd, 1 | mov PCd, 16+FRAME_PCALL @@ -1498,6 +1484,9 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->vm_call_dispatch | |.ffunc_2 xpcall + | mov L:RB, SAVE_L + | lea RA, [BASE+NARGS:RD*8] + | cmp RA, L:RB->maxstack; ja ->fff_fallback | mov LFUNC:RA, [BASE+8] | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback | mov LFUNC:RB, [BASE] // Swap function and traceback. @@ -1840,7 +1829,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res | |.macro math_minmax, name, cmovop, sseop - | .ffunc name + | .ffunc_1 name | mov RAd, 2 |.if DUALNUM | mov RB, [BASE] @@ -2011,7 +2000,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | mov RC, SBUF:CARG1->b | mov SBUF:CARG1->L, L:RB - | mov SBUF:CARG1->p, RC + | mov SBUF:CARG1->w, RC | mov SAVE_PC, PC | call extern lj_buf_putstr_ .. name | mov CARG1, rax @@ -2470,7 +2459,7 @@ static void build_subroutines(BuildCtx *ctx) | mov r12, [RA] | mov rsp, RA // Reposition stack to C frame. |.endif - | test RDd, RDd; js >9 // Check for error from exit. + | cmp RDd, -LUA_ERRERR; jae >9 // Check for error from exit. | mov L:RB, SAVE_L | mov MULTRES, RDd | mov LFUNC:KBASE, [BASE-16] @@ -2486,6 +2475,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx OP, RCL | add PC, 4 | shr RCd, 16 + | cmp MULTRES, -17 // Static dispatch? + | je >5 | cmp OP, BC_FUNCF // Function header? | jb >3 | cmp OP, BC_FUNCC+2 // Fast function? @@ -2508,11 +2499,20 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, [KBASE+PC2PROTO(k)] | jmp <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RA, [RA+RD*8] + | mov RCd, TRACE:RA->startins + | movzx RAd, RCH + | movzx OP, RCL + | shr RCd, 16 + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] + | |9: // Rethrow error from the right C frame. - | neg RD + | mov CARG2d, RDd | mov CARG1, L:RB - | mov CARG2, RD - | call extern lj_err_throw // (lua_State *L, int errcode) + | neg CARG2d + | call extern lj_err_trace // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- @@ -2544,15 +2544,17 @@ static void build_subroutines(BuildCtx *ctx) | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | subsd xmm1, xmm3 | orpd xmm1, xmm2 // Merge sign bit back in. + | sseconst_1 xmm3, RD | .if mode == 1 // ceil(x)? - | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0. | cmpsd xmm0, xmm1, 6 // x > result? + | andpd xmm0, xmm3 + | addsd xmm1, xmm0 // If yes, add 1. + | orpd xmm1, xmm2 // Merge sign bit back in (again). | .else // floor(x)? - | sseconst_1 xmm2, RD | cmpsd xmm0, xmm1, 1 // x < result? + | andpd xmm0, xmm3 + | subsd xmm1, xmm0 // If yes, subtract 1. | .endif - | andpd xmm0, xmm2 - | subsd xmm1, xmm0 // If yes, subtract +-1. |.endif | movaps xmm0, xmm1 |1: @@ -2593,41 +2595,6 @@ static void build_subroutines(BuildCtx *ctx) | subsd xmm0, xmm1 | ret | - |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. - |->vm_powi_sse: - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | mulsd xmm0, xmm0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | movaps xmm1, xmm0 - |3: // Handle trailing bits. - | mulsd xmm0, xmm0 - | shr eax, 1; jz >4 - | jnc <3 - | mulsd xmm1, xmm0 - | jmp <3 - |4: - | mulsd xmm0, xmm1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 // x^0 ==> 1 - | neg eax - | call <1 - | sseconst_1 xmm1, RD - | divsd xmm1, xmm0 - | movaps xmm0, xmm1 - | ret - |7: - | sseconst_1 xmm0, RD - | ret - | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- @@ -2647,6 +2614,67 @@ static void build_subroutines(BuildCtx *ctx) | .if X64WIN; pop rsi; .endif | ret | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2d + |.define NEXT_IDXa, CARG2 + |.define NEXT_PTR, RC + |.define NEXT_PTRd, RCd + |.define NEXT_TMP, CARG3 + |.define NEXT_ASIZE, CARG4d + |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.if X64WIN + |.define NEXT_RES_PTR, [rsp+aword*5] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.else + |.define NEXT_RES_PTR, [rsp+aword*1] + |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.endif + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in edx. + |->vm_next: + |.if JIT + | mov NEXT_ASIZE, NEXT_TAB->asize + |1: // Traverse array part. + | cmp NEXT_IDX, NEXT_ASIZE; jae >5 + | mov NEXT_TMP, NEXT_TAB->array + | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] + | cmp NEXT_TMP, LJ_TNIL; je >2 + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR], NEXT_TMP + |.if DUALNUM + | setint NEXT_TMP, NEXT_IDXa + | mov qword [NEXT_PTR+qword*1], NEXT_TMP + |.else + | cvtsi2sd xmm0, NEXT_IDX + | movsd qword [NEXT_PTR+qword*1], xmm0 + |.endif + | NEXT_RES_IDX 1 + | ret + |2: // Skip holes in array part. + | add NEXT_IDX, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub NEXT_IDX, NEXT_ASIZE + |6: + | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 + | imul NEXT_PTRd, NEXT_IDX, #NODE + | add NODE:NEXT_PTR, NEXT_TAB->node + | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 + | NEXT_RES_IDXL NEXT_ASIZE+1 + | ret + |7: // Skip holes in hash part. + | add NEXT_IDX, 1 + | jmp <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | NEXT_RES_IDX NEXT_ASIZE + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR+qword*1], LJ_TNIL + | ret + |.endif + | |//----------------------------------------------------------------------- |//-- Assertions --------------------------------------------------------- |//----------------------------------------------------------------------- @@ -2733,12 +2761,12 @@ static void build_subroutines(BuildCtx *ctx) | | // Copy stack slots. | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 + | sub ecx, 8 | js >2 |1: - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax - | sub ecx, 1 + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)] + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax + | sub ecx, 8 | jns <1 |2: | @@ -3674,7 +3702,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checktab TAB:RB, ->vmeta_tgets |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * | mov TMPRd, TAB:RB->hmask - | and TMPRd, STR:RC->hash + | and TMPRd, STR:RC->sid | imul TMPRd, #NODE | add NODE:TMPR, TAB:RB->node | settp ITYPE, STR:RC, LJ_TSTR @@ -3806,7 +3834,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checktab TAB:RB, ->vmeta_tsets |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * | mov TMPRd, TAB:RB->hmask - | and TMPRd, STR:RC->hash + | and TMPRd, STR:RC->sid | imul TMPRd, #NODE | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | add NODE:TMPR, TAB:RB->node @@ -4058,10 +4086,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop RBd |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TAB:RB, [BASE+RA*8-16] | cleartp TAB:RB | mov RCd, [BASE+RA*8-8] // Get index from control var. @@ -4125,15 +4154,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | branchPC RD - | mov64 TMPR, U64x(fffe7fff, 00000000) + | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32) | mov [BASE+RA*8-8], TMPR // Initialize control var. |1: | ins_next |5: // Despecialize bytecode if any of the checks fail. | mov PC_OP, BC_JMP | branchPC RD + |.if JIT + | cmp byte [PC], BC_ITERN + | jne >6 + |.endif | mov byte [PC], BC_ITERC | jmp <1 + |.if JIT + |6: // Unpatch JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | movzx RCd, word [PC+2] + | mov TRACE:RA, [RA+RC*8] + | mov eax, TRACE:RA->startins + | mov al, BC_ITERC + | mov dword [PC], eax + | jmp <1 + |.endif break; case BC_VARG: @@ -4734,7 +4777,7 @@ static void emit_asm_debug(BuildCtx *ctx) ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif #if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) +#if LJ_TARGET_SOLARIS fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); #else fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); @@ -4898,7 +4941,6 @@ static void emit_asm_debug(BuildCtx *ctx) "LEFDEY:\n\n", fcsize); } #endif - fprintf(ctx->fp, ".subsections_via_symbols\n"); } break; #endif diff --git a/source/libs/luajit/LuaJIT-src/src/vm_x86.dasc b/source/libs/luajit/LuaJIT-src/src/vm_x86.dasc index 211ae7b922ad4cb0d12ef9d09f012b6b5c25b3ec..77c4069d451819b34b7b6d4631f39954a5dc3648 100644 --- a/source/libs/luajit/LuaJIT-src/src/vm_x86.dasc +++ b/source/libs/luajit/LuaJIT-src/src/vm_x86.dasc @@ -1,6 +1,6 @@ |// Low-level VM code for x86 CPUs. |// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h | |.if P64 |.arch x64 @@ -464,9 +464,6 @@ |.macro sseconst_1, reg, tmp // Synthesize 1.0. | sseconst_hi reg, tmp, 3ff00000 |.endmacro -|.macro sseconst_m1, reg, tmp // Synthesize -1.0. -| sseconst_hi reg, tmp, bff00000 -|.endmacro |.macro sseconst_2p52, reg, tmp // Synthesize 2^52. | sseconst_hi reg, tmp, 43300000 |.endmacro @@ -1372,7 +1369,11 @@ static void build_subroutines(BuildCtx *ctx) | mov LFUNC:RB, [RA-8] | add NARGS:RD, 1 | // This is fragile. L->base must not move, KBASE must always be defined. + |.if X64 + | cmp KBASEa, rdx // Continue with CALLT if flag set. + |.else | cmp KBASE, BASE // Continue with CALLT if flag set. + |.endif | je ->BC_CALLT_Z | mov BASE, RA | ins_call // Otherwise call resolved metamethod. @@ -1522,7 +1523,7 @@ static void build_subroutines(BuildCtx *ctx) | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. | mov [BASE-8], TAB:RB | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | add NODE:RA, TAB:RB->node |3: // Rearranged logic, because we expect _not_ to find the key. @@ -1669,55 +1670,35 @@ static void build_subroutines(BuildCtx *ctx) | je >2 // Missing 2nd arg? |1: | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov L:RB->top, BASE // Dummy frame length is ok. | mov PC, [BASE-4] + | mov RB, BASE // Save BASE. |.if X64WIN - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, L:RB + | mov CARG1d, [BASE] + | lea CARG3d, [BASE-8] + | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE. |.elif X64 - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, L:RB + | mov CARG1d, [BASE] + | lea CARG2d, [BASE+8] + | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE. |.else | mov TAB:RD, [BASE] - | mov ARG2, TAB:RD - | mov ARG1, L:RB + | mov ARG1, TAB:RD | add BASE, 8 + | mov ARG2, BASE + | sub BASE, 8+8 | mov ARG3, BASE |.endif - | mov SAVE_PC, PC // Needed for ITERN fallback. - | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Flag returned in eax (RD). - | mov BASE, L:RB->base - | test RD, RD; jz >3 // End of traversal? - | // Copy key and value to results. - |.if X64 - | mov RBa, [BASE+8] - | mov RDa, [BASE+16] - | mov [BASE-8], RBa - | mov [BASE], RDa - |.else - | mov RB, [BASE+8] - | mov RD, [BASE+12] - | mov [BASE-8], RB - | mov [BASE-4], RD - | mov RB, [BASE+16] - | mov RD, [BASE+20] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - |->fff_res2: - | mov RD, 1+2 - | jmp ->fff_res + | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // 1=found, 0=end, -1=error returned in eax (RD). + | mov BASE, RB // Restore BASE. + | test RD, RD; jg ->fff_res2 // Found key/value. + | js ->fff_fallback_2 // Invalid key. + | // End of traversal: return nil. + | mov dword [BASE-4], LJ_TNIL + | jmp ->fff_res1 |2: // Set missing 2nd arg to nil. | mov dword [BASE+12], LJ_TNIL | jmp <1 - |3: // End of traversal: return nil. - | mov dword [BASE-4], LJ_TNIL - | jmp ->fff_res1 | |.ffunc_1 pairs | mov TAB:RB, [BASE] @@ -1771,7 +1752,9 @@ static void build_subroutines(BuildCtx *ctx) | mov [BASE], RB | mov [BASE+4], RD |.endif - | jmp ->fff_res2 + |->fff_res2: + | mov RD, 1+2 + | jmp ->fff_res |2: // Check for empty hash part first. Otherwise call C function. | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | mov FCARG1, TAB:RB @@ -1810,6 +1793,9 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: catch errors ---------------------------------------- | |.ffunc_1 pcall + | mov L:RB, SAVE_L + | lea RA, [BASE+NARGS:RD*8] + | cmp RA, L:RB->maxstack; ja ->fff_fallback | lea RA, [BASE+8] | sub NARGS:RD, 1 | mov PC, 8+FRAME_PCALL @@ -1821,6 +1807,9 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->vm_call_dispatch | |.ffunc_2 xpcall + | mov L:RB, SAVE_L + | lea RA, [BASE+NARGS:RD*8] + | cmp RA, L:RB->maxstack; ja ->fff_fallback | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback | mov RB, [BASE+4] // Swap function and traceback. | mov [BASE+12], RB @@ -2233,7 +2222,7 @@ static void build_subroutines(BuildCtx *ctx) | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | |.macro math_minmax, name, cmovop, sseop - | .ffunc name + | .ffunc_1 name | mov RA, 2 | cmp dword [BASE+4], LJ_TISNUM |.if DUALNUM @@ -2419,9 +2408,9 @@ static void build_subroutines(BuildCtx *ctx) | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] | mov L:RB->base, BASE | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE - | mov RC, SBUF:FCARG1->b + | mov RCa, SBUF:FCARG1->b | mov SBUF:FCARG1->L, L:RB - | mov SBUF:FCARG1->p, RC + | mov SBUF:FCARG1->w, RCa | mov SAVE_PC, PC | call extern lj_buf_putstr_ .. name .. @8 | mov FCARG1, eax @@ -2919,7 +2908,7 @@ static void build_subroutines(BuildCtx *ctx) | mov r13, TMPa | mov r12, TMPQ |.endif - | test RD, RD; js >9 // Check for error from exit. + | cmp RD, -LUA_ERRERR; jae >9 // Check for error from exit. | mov L:RB, SAVE_L | mov MULTRES, RD | mov LFUNC:KBASE, [BASE-8] @@ -2934,6 +2923,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx OP, RCL | add PC, 4 | shr RC, 16 + | cmp MULTRES, -17 // Static dispatch? + | je >5 | cmp OP, BC_FUNCF // Function header? | jb >3 | cmp OP, BC_FUNCC+2 // Fast function? @@ -2959,11 +2950,24 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, [KBASE+PC2PROTO(k)] | jmp <2 | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RA, [RA+RD*4] + | mov RC, TRACE:RA->startins + | movzx RA, RCH + | movzx OP, RCL + | shr RC, 16 + |.if X64 + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] + |.else + | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] + |.endif + | |9: // Rethrow error from the right C frame. - | neg RD - | mov FCARG1, L:RB | mov FCARG2, RD - | call extern lj_err_throw@8 // (lua_State *L, int errcode) + | mov FCARG1, L:RB + | neg FCARG2 + | call extern lj_err_trace@8 // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- @@ -3003,15 +3007,17 @@ static void build_subroutines(BuildCtx *ctx) | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | subsd xmm1, xmm3 | orpd xmm1, xmm2 // Merge sign bit back in. + | sseconst_1 xmm3, RDa | .if mode == 1 // ceil(x)? - | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0. | cmpsd xmm0, xmm1, 6 // x > result? + | andpd xmm0, xmm3 + | addsd xmm1, xmm0 // If yes, add 1. + | orpd xmm1, xmm2 // Merge sign bit back in (again). | .else // floor(x)? - | sseconst_1 xmm2, RDa | cmpsd xmm0, xmm1, 1 // x < result? + | andpd xmm0, xmm3 + | subsd xmm1, xmm0 // If yes, subtract 1. | .endif - | andpd xmm0, xmm2 - | subsd xmm1, xmm0 // If yes, subtract +-1. |.endif | movaps xmm0, xmm1 |1: @@ -3052,41 +3058,6 @@ static void build_subroutines(BuildCtx *ctx) | subsd xmm0, xmm1 | ret | - |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. - |->vm_powi_sse: - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | mulsd xmm0, xmm0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | movaps xmm1, xmm0 - |3: // Handle trailing bits. - | mulsd xmm0, xmm0 - | shr eax, 1; jz >4 - | jnc <3 - | mulsd xmm1, xmm0 - | jmp <3 - |4: - | mulsd xmm0, xmm1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 // x^0 ==> 1 - | neg eax - | call <1 - | sseconst_1 xmm1, RDa - | divsd xmm1, xmm0 - | movaps xmm0, xmm1 - | ret - |7: - | sseconst_1 xmm0, RDa - | ret - | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- @@ -3134,6 +3105,86 @@ static void build_subroutines(BuildCtx *ctx) | ret |.endif | + |.define NEXT_TAB, TAB:FCARG1 + |.define NEXT_IDX, FCARG2 + |.define NEXT_PTR, RCa + |.define NEXT_PTRd, RC + |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.if X64 + |.define NEXT_TMP, CARG3d + |.define NEXT_TMPq, CARG3 + |.define NEXT_ASIZE, CARG4d + |.macro NEXT_ENTER; .endmacro + |.macro NEXT_LEAVE; ret; .endmacro + |.if X64WIN + |.define NEXT_RES_PTR, [rsp+aword*5] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.else + |.define NEXT_RES_PTR, [rsp+aword*1] + |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.endif + |.else + |.define NEXT_ASIZE, esi + |.define NEXT_TMP, edi + |.macro NEXT_ENTER; push esi; push edi; .endmacro + |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro + |.define NEXT_RES_PTR, [esp+dword*3] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.endif + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in edx. + |->vm_next: + |.if JIT + | NEXT_ENTER + | mov NEXT_ASIZE, NEXT_TAB->asize + |1: // Traverse array part. + | cmp NEXT_IDX, NEXT_ASIZE; jae >5 + | mov NEXT_TMP, NEXT_TAB->array + | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 + | lea NEXT_PTR, NEXT_RES_PTR + |.if X64 + | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] + | mov qword [NEXT_PTR], NEXT_TMPq + |.else + | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] + | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] + | mov dword [NEXT_PTR+4], NEXT_ASIZE + | mov dword [NEXT_PTR], NEXT_TMP + |.endif + |.if DUALNUM + | mov dword [NEXT_PTR+dword*3], LJ_TISNUM + | mov dword [NEXT_PTR+dword*2], NEXT_IDX + |.else + | cvtsi2sd xmm0, NEXT_IDX + | movsd qword [NEXT_PTR+dword*2], xmm0 + |.endif + | NEXT_RES_IDX 1 + | NEXT_LEAVE + |2: // Skip holes in array part. + | add NEXT_IDX, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub NEXT_IDX, NEXT_ASIZE + |6: + | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 + | imul NEXT_PTRd, NEXT_IDX, #NODE + | add NODE:NEXT_PTRd, dword NEXT_TAB->node + | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 + | NEXT_RES_IDXL NEXT_ASIZE+1 + | NEXT_LEAVE + |7: // Skip holes in hash part. + | add NEXT_IDX, 1 + | jmp <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | NEXT_RES_IDX NEXT_ASIZE + | lea NEXT_PTR, NEXT_RES_PTR + | mov dword [NEXT_PTR+dword*3], LJ_TNIL + | NEXT_LEAVE + |.endif + | |//----------------------------------------------------------------------- |//-- Assertions --------------------------------------------------------- |//----------------------------------------------------------------------- @@ -3269,19 +3320,25 @@ static void build_subroutines(BuildCtx *ctx) | | // Copy stack slots. | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 + |.if X64 + | sub ecx, 8 | js >2 |1: - |.if X64 - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)] + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax + | sub ecx, 8 + | jns <1 + |2: |.else - | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] - | mov [esp+ecx*4], eax - |.endif - | sub ecx, 1 + | sub ecx, 4 + | js >2 + |1: + | mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)] + | mov [esp+ecx], eax + | sub ecx, 4 | jns <1 |2: + |.endif | |.if X64 | movzx eax, byte CCSTATE->nfpr @@ -4286,7 +4343,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov TAB:RB, [BASE+RB*8] |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | add NODE:RA, TAB:RB->node |1: @@ -4457,7 +4514,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov TAB:RB, [BASE+RB*8] |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | add NODE:RA, TAB:RB->node @@ -4785,10 +4842,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop RB |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TMP1, KBASE // Need two more free registers. | mov TMP2, DISPATCH | mov TAB:RB, [BASE+RA*8-16] @@ -4876,14 +4934,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | branchPC RD | mov dword [BASE+RA*8-8], 0 // Initialize control var. - | mov dword [BASE+RA*8-4], 0xfffe7fff + | mov dword [BASE+RA*8-4], LJ_KEYINDEX |1: | ins_next |5: // Despecialize bytecode if any of the checks fail. | mov PC_OP, BC_JMP | branchPC RD + |.if JIT + | cmp byte [PC], BC_ITERN + | jne >6 + |.endif | mov byte [PC], BC_ITERC | jmp <1 + |.if JIT + |6: // Unpatch JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | movzx RC, word [PC+2] + | mov TRACE:RA, [RA+RC*4] + | mov eax, TRACE:RA->startins + | mov al, BC_ITERC + | mov dword [PC], eax + | jmp <1 + |.endif break; case BC_VARG: @@ -5548,7 +5620,7 @@ static void emit_asm_debug(BuildCtx *ctx) ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif #if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) +#if LJ_TARGET_SOLARIS #if LJ_64 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); #else @@ -5769,7 +5841,6 @@ static void emit_asm_debug(BuildCtx *ctx) fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); } #endif - fprintf(ctx->fp, ".subsections_via_symbols\n"); } break; #endif diff --git a/source/libs/luajit/LuaJIT-src/src/xb1build.bat b/source/libs/luajit/LuaJIT-src/src/xb1build.bat index 847e84a555778ad59c4ec6156c5b0f2a5c20a79d..019d6ebe7d8055511de44cc3edb452d8cc1bdfd0 100644 --- a/source/libs/luajit/LuaJIT-src/src/xb1build.bat +++ b/source/libs/luajit/LuaJIT-src/src/xb1build.bat @@ -9,12 +9,12 @@ @setlocal @echo ---- Host compiler ---- -@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /DLUAJIT_ENABLE_GC64 +@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE @set LJLINK=link /nologo @set LJMT=mt /nologo @set DASMDIR=..\dynasm @set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c %LJCOMPILE% host\minilua.c @if errorlevel 1 goto :BAD @@ -31,6 +31,9 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc @if errorlevel 1 goto :BAD +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +minilua host\genversion.lua + %LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c @if errorlevel 1 goto :BAD %LJLINK% /out:buildvm.exe buildvm*.obj diff --git a/source/libs/luajit/LuaJIT-src/src/xedkbuild.bat b/source/libs/luajit/LuaJIT-src/src/xedkbuild.bat index 240ec878daf8eb4656ea6922eec7568fea2c3004..5444024eeae8243a2c657ceb5b76338010f72495 100644 --- a/source/libs/luajit/LuaJIT-src/src/xedkbuild.bat +++ b/source/libs/luajit/LuaJIT-src/src/xedkbuild.bat @@ -14,7 +14,7 @@ @set LJMT=mt /nologo @set DASMDIR=..\dynasm @set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c %LJCOMPILE% host\minilua.c @if errorlevel 1 goto :BAD @@ -31,6 +31,9 @@ if exist minilua.exe.manifest^ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_ppc.dasc @if errorlevel 1 goto :BAD +if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +minilua host\genversion.lua + %LJCOMPILE% /I "." /I %DASMDIR% /D_XBOX_VER=200 /DLUAJIT_TARGET=LUAJIT_ARCH_PPC host\buildvm*.c @if errorlevel 1 goto :BAD %LJLINK% /out:buildvm.exe buildvm*.obj diff --git a/source/libs/luajit/Makefile.am b/source/libs/luajit/Makefile.am index e431b30d49a90f446199c58fc9b9a86b07e45f9a..825676f1bd4508704ca882c5ebc52de93b6821d2 100644 --- a/source/libs/luajit/Makefile.am +++ b/source/libs/luajit/Makefile.am @@ -14,6 +14,8 @@ EXTRA_DIST = $(LUAJIT_TREE) ## EXTRA_DIST += TLpatches + + # Files not to be distributed include $(srcdir)/../../am/dist_hook.am NEVER_NAMES += $(NEVER_NAMES_SUB) $(NEVER_NAMES_LT) @@ -22,6 +24,7 @@ AM_CPPFLAGS = -I$(srcdir)/$(LUAJIT_TREE)/src $(LUAJIT_DEFINES) -U_FORTIFY_SOURCE AM_CFLAGS = $(LUAJIT_CFLAGS) $(LUAJIT_HAS_NOT_LOG2) -Wall AM_CCASFLAGS = $(LUAJIT_CFLAGS) $(LUAJIT_HAS_NOT_LOG2) + SUBDIRS = . native include lib_LTLIBRARIES = libtexluajit.la @@ -31,20 +34,34 @@ libtexluajit_la_LDFLAGS = -bindir @bindir@ -no-undefined -version-info $(LUAJIT_ dist_libtexluajit_la_SOURCES = \ lbitlib.c + nodist_libtexluajit_la_SOURCES = \ $(ljlib_sources) \ @LUAJIT_TREE@/src/lib_aux.c \ + @LUAJIT_TREE@/src/lib_base.c \ + @LUAJIT_TREE@/src/lib_bit.c \ + @LUAJIT_TREE@/src/lib_buffer.c \ + @LUAJIT_TREE@/src/lib_debug.c \ + @LUAJIT_TREE@/src/lib_ffi.c \ @LUAJIT_TREE@/src/lib_init.c \ + @LUAJIT_TREE@/src/lib_io.c \ + @LUAJIT_TREE@/src/lib_jit.c \ + @LUAJIT_TREE@/src/lib_math.c \ + @LUAJIT_TREE@/src/lib_os.c \ + @LUAJIT_TREE@/src/lib_package.c \ + @LUAJIT_TREE@/src/lib_string.c \ + @LUAJIT_TREE@/src/lib_table.c \ @LUAJIT_TREE@/src/lj_alloc.c \ @LUAJIT_TREE@/src/lj_api.c \ @LUAJIT_TREE@/src/lj_asm.c \ + @LUAJIT_TREE@/src/lj_assert.c \ @LUAJIT_TREE@/src/lj_bc.c \ @LUAJIT_TREE@/src/lj_bcread.c \ @LUAJIT_TREE@/src/lj_bcwrite.c \ @LUAJIT_TREE@/src/lj_buf.c \ @LUAJIT_TREE@/src/lj_carith.c \ - @LUAJIT_TREE@/src/lj_ccall.c \ @LUAJIT_TREE@/src/lj_ccallback.c \ + @LUAJIT_TREE@/src/lj_ccall.c \ @LUAJIT_TREE@/src/lj_cconv.c \ @LUAJIT_TREE@/src/lj_cdata.c \ @LUAJIT_TREE@/src/lj_char.c \ @@ -74,8 +91,10 @@ nodist_libtexluajit_la_SOURCES = \ @LUAJIT_TREE@/src/lj_opt_sink.c \ @LUAJIT_TREE@/src/lj_opt_split.c \ @LUAJIT_TREE@/src/lj_parse.c \ + @LUAJIT_TREE@/src/lj_prng.c \ @LUAJIT_TREE@/src/lj_profile.c \ @LUAJIT_TREE@/src/lj_record.c \ + @LUAJIT_TREE@/src/lj_serialize.c \ @LUAJIT_TREE@/src/lj_snap.c \ @LUAJIT_TREE@/src/lj_state.c \ @LUAJIT_TREE@/src/lj_str.c \ @@ -86,7 +105,16 @@ nodist_libtexluajit_la_SOURCES = \ @LUAJIT_TREE@/src/lj_trace.c \ @LUAJIT_TREE@/src/lj_udata.c \ @LUAJIT_TREE@/src/lj_vmevent.c \ - @LUAJIT_TREE@/src/lj_vmmath.c + @LUAJIT_TREE@/src/lj_vmmath.c \ + @LUAJIT_TREE@/src/luajit.c \ + @LUAJIT_TREE@/src/vm_arm64.dasc \ + @LUAJIT_TREE@/src/vm_arm.dasc \ + @LUAJIT_TREE@/src/vm_mips64.dasc \ + @LUAJIT_TREE@/src/vm_mips.dasc \ + @LUAJIT_TREE@/src/vm_ppc.dasc \ + @LUAJIT_TREE@/src/vm_x64.dasc \ + @LUAJIT_TREE@/src/vm_x86.dasc + if PEOBJ libtexluajit_la_LIBADD = lj_vm_obj.lo @@ -95,17 +123,21 @@ nodist_libtexluajit_la_SOURCES += lj_vm_asm.S endif !PEOBJ ljlib_sources = \ + lib_aux.c \ lib_base.c \ - lib_math.c \ lib_bit.c \ - lib_string.c \ - lib_table.c \ + lib_buffer.c \ + lib_debug.c \ + lib_ffi.c \ + lib_init.c \ lib_io.c \ + lib_jit.c \ + lib_math.c \ lib_os.c \ lib_package.c \ - lib_debug.c \ - lib_jit.c \ - lib_ffi.c + lib_string.c \ + lib_table.c + $(libtexluajit_la_OBJECTS): $(HDRGEN) @@ -114,7 +146,15 @@ $(libtexluajit_la_OBJECTS): $(HDRGEN) native/buildvm-stamp: $(ljlib_sources) lj_opt_fold.c cd native && $(MAKE) $(AM_MAKEFLAGS) buildvm-stamp -HDRGEN = lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h +native/luajit.h: + cd native && $(MAKE) $(AM_MAKEFLAGS) luajit.h + + +HDRGEN = lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h luajit.h + +luajit.h: native/luajit.h + $(LN_S) native/luajit.h + lj_bcdef.h: native/buildvm-stamp $(AM_V_GEN)native/buildvm -m bcdef -o $@ $(ljlib_sources) @@ -154,8 +194,8 @@ luajitincludedir = ${includedir}/texluajit luajitinclude_HEADERS = \ $(LUAJIT_TREE)/src/lauxlib.h \ $(LUAJIT_TREE)/src/luaconf.h \ - $(LUAJIT_TREE)/src/luajit.h \ $(LUAJIT_TREE)/src/lua.h \ + $(LUAJIT_TREE)/src/luajit_rolling.h \ $(LUAJIT_TREE)/src/lua.hpp \ $(LUAJIT_TREE)/src/lualib.h diff --git a/source/libs/luajit/Makefile.in b/source/libs/luajit/Makefile.in index df7d4986bee6b05cbe2b9f781e63b2331b5f4fd4..57d9a5d40d68a0e64ca95e89cee3b1b8d3b815cd 100644 --- a/source/libs/luajit/Makefile.in +++ b/source/libs/luajit/Makefile.in @@ -150,45 +150,54 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)" \ LTLIBRARIES = $(lib_LTLIBRARIES) @PEOBJ_TRUE@libtexluajit_la_DEPENDENCIES = lj_vm_obj.lo dist_libtexluajit_la_OBJECTS = lbitlib.lo -am__objects_1 = lib_base.lo lib_math.lo lib_bit.lo lib_string.lo \ - lib_table.lo lib_io.lo lib_os.lo lib_package.lo lib_debug.lo \ - lib_jit.lo lib_ffi.lo +am__objects_1 = lib_aux.lo lib_base.lo lib_bit.lo lib_buffer.lo \ + lib_debug.lo lib_ffi.lo lib_init.lo lib_io.lo lib_jit.lo \ + lib_math.lo lib_os.lo lib_package.lo lib_string.lo \ + lib_table.lo am__dirstamp = $(am__leading_dot)dirstamp @PEOBJ_FALSE@am__objects_2 = lj_vm_asm.lo nodist_libtexluajit_la_OBJECTS = $(am__objects_1) \ - @LUAJIT_TREE@/src/lib_aux.lo @LUAJIT_TREE@/src/lib_init.lo \ + @LUAJIT_TREE@/src/lib_aux.lo @LUAJIT_TREE@/src/lib_base.lo \ + @LUAJIT_TREE@/src/lib_bit.lo @LUAJIT_TREE@/src/lib_buffer.lo \ + @LUAJIT_TREE@/src/lib_debug.lo @LUAJIT_TREE@/src/lib_ffi.lo \ + @LUAJIT_TREE@/src/lib_init.lo @LUAJIT_TREE@/src/lib_io.lo \ + @LUAJIT_TREE@/src/lib_jit.lo @LUAJIT_TREE@/src/lib_math.lo \ + @LUAJIT_TREE@/src/lib_os.lo @LUAJIT_TREE@/src/lib_package.lo \ + @LUAJIT_TREE@/src/lib_string.lo @LUAJIT_TREE@/src/lib_table.lo \ @LUAJIT_TREE@/src/lj_alloc.lo @LUAJIT_TREE@/src/lj_api.lo \ - @LUAJIT_TREE@/src/lj_asm.lo @LUAJIT_TREE@/src/lj_bc.lo \ - @LUAJIT_TREE@/src/lj_bcread.lo @LUAJIT_TREE@/src/lj_bcwrite.lo \ - @LUAJIT_TREE@/src/lj_buf.lo @LUAJIT_TREE@/src/lj_carith.lo \ - @LUAJIT_TREE@/src/lj_ccall.lo \ + @LUAJIT_TREE@/src/lj_asm.lo @LUAJIT_TREE@/src/lj_assert.lo \ + @LUAJIT_TREE@/src/lj_bc.lo @LUAJIT_TREE@/src/lj_bcread.lo \ + @LUAJIT_TREE@/src/lj_bcwrite.lo @LUAJIT_TREE@/src/lj_buf.lo \ + @LUAJIT_TREE@/src/lj_carith.lo \ @LUAJIT_TREE@/src/lj_ccallback.lo \ - @LUAJIT_TREE@/src/lj_cconv.lo @LUAJIT_TREE@/src/lj_cdata.lo \ - @LUAJIT_TREE@/src/lj_char.lo @LUAJIT_TREE@/src/lj_clib.lo \ - @LUAJIT_TREE@/src/lj_cparse.lo @LUAJIT_TREE@/src/lj_crecord.lo \ - @LUAJIT_TREE@/src/lj_ctype.lo @LUAJIT_TREE@/src/lj_debug.lo \ - @LUAJIT_TREE@/src/lj_dispatch.lo @LUAJIT_TREE@/src/lj_err.lo \ - @LUAJIT_TREE@/src/lj_ffrecord.lo @LUAJIT_TREE@/src/lj_func.lo \ - @LUAJIT_TREE@/src/lj_gc.lo @LUAJIT_TREE@/src/lj_gdbjit.lo \ - @LUAJIT_TREE@/src/lj_ir.lo @LUAJIT_TREE@/src/lj_lex.lo \ - @LUAJIT_TREE@/src/lj_lib.lo @LUAJIT_TREE@/src/lj_load.lo \ - @LUAJIT_TREE@/src/lj_mcode.lo @LUAJIT_TREE@/src/lj_meta.lo \ - @LUAJIT_TREE@/src/lj_obj.lo @LUAJIT_TREE@/src/lj_opt_dce.lo \ + @LUAJIT_TREE@/src/lj_ccall.lo @LUAJIT_TREE@/src/lj_cconv.lo \ + @LUAJIT_TREE@/src/lj_cdata.lo @LUAJIT_TREE@/src/lj_char.lo \ + @LUAJIT_TREE@/src/lj_clib.lo @LUAJIT_TREE@/src/lj_cparse.lo \ + @LUAJIT_TREE@/src/lj_crecord.lo @LUAJIT_TREE@/src/lj_ctype.lo \ + @LUAJIT_TREE@/src/lj_debug.lo @LUAJIT_TREE@/src/lj_dispatch.lo \ + @LUAJIT_TREE@/src/lj_err.lo @LUAJIT_TREE@/src/lj_ffrecord.lo \ + @LUAJIT_TREE@/src/lj_func.lo @LUAJIT_TREE@/src/lj_gc.lo \ + @LUAJIT_TREE@/src/lj_gdbjit.lo @LUAJIT_TREE@/src/lj_ir.lo \ + @LUAJIT_TREE@/src/lj_lex.lo @LUAJIT_TREE@/src/lj_lib.lo \ + @LUAJIT_TREE@/src/lj_load.lo @LUAJIT_TREE@/src/lj_mcode.lo \ + @LUAJIT_TREE@/src/lj_meta.lo @LUAJIT_TREE@/src/lj_obj.lo \ + @LUAJIT_TREE@/src/lj_opt_dce.lo \ @LUAJIT_TREE@/src/lj_opt_fold.lo \ @LUAJIT_TREE@/src/lj_opt_loop.lo \ @LUAJIT_TREE@/src/lj_opt_mem.lo \ @LUAJIT_TREE@/src/lj_opt_narrow.lo \ @LUAJIT_TREE@/src/lj_opt_sink.lo \ @LUAJIT_TREE@/src/lj_opt_split.lo \ - @LUAJIT_TREE@/src/lj_parse.lo @LUAJIT_TREE@/src/lj_profile.lo \ - @LUAJIT_TREE@/src/lj_record.lo @LUAJIT_TREE@/src/lj_snap.lo \ + @LUAJIT_TREE@/src/lj_parse.lo @LUAJIT_TREE@/src/lj_prng.lo \ + @LUAJIT_TREE@/src/lj_profile.lo @LUAJIT_TREE@/src/lj_record.lo \ + @LUAJIT_TREE@/src/lj_serialize.lo @LUAJIT_TREE@/src/lj_snap.lo \ @LUAJIT_TREE@/src/lj_state.lo @LUAJIT_TREE@/src/lj_str.lo \ @LUAJIT_TREE@/src/lj_strfmt.lo \ @LUAJIT_TREE@/src/lj_strfmt_num.lo \ @LUAJIT_TREE@/src/lj_strscan.lo @LUAJIT_TREE@/src/lj_tab.lo \ @LUAJIT_TREE@/src/lj_trace.lo @LUAJIT_TREE@/src/lj_udata.lo \ @LUAJIT_TREE@/src/lj_vmevent.lo @LUAJIT_TREE@/src/lj_vmmath.lo \ - $(am__objects_2) + @LUAJIT_TREE@/src/luajit.lo $(am__objects_2) libtexluajit_la_OBJECTS = $(dist_libtexluajit_la_OBJECTS) \ $(nodist_libtexluajit_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) @@ -228,18 +237,32 @@ DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/../../build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/jittest-jittest.Po \ - ./$(DEPDIR)/lbitlib.Plo ./$(DEPDIR)/lib_base.Plo \ - ./$(DEPDIR)/lib_bit.Plo ./$(DEPDIR)/lib_debug.Plo \ - ./$(DEPDIR)/lib_ffi.Plo ./$(DEPDIR)/lib_io.Plo \ - ./$(DEPDIR)/lib_jit.Plo ./$(DEPDIR)/lib_math.Plo \ - ./$(DEPDIR)/lib_os.Plo ./$(DEPDIR)/lib_package.Plo \ - ./$(DEPDIR)/lib_string.Plo ./$(DEPDIR)/lib_table.Plo \ - ./$(DEPDIR)/lj_vm_asm.Plo \ + ./$(DEPDIR)/lbitlib.Plo ./$(DEPDIR)/lib_aux.Plo \ + ./$(DEPDIR)/lib_base.Plo ./$(DEPDIR)/lib_bit.Plo \ + ./$(DEPDIR)/lib_buffer.Plo ./$(DEPDIR)/lib_debug.Plo \ + ./$(DEPDIR)/lib_ffi.Plo ./$(DEPDIR)/lib_init.Plo \ + ./$(DEPDIR)/lib_io.Plo ./$(DEPDIR)/lib_jit.Plo \ + ./$(DEPDIR)/lib_math.Plo ./$(DEPDIR)/lib_os.Plo \ + ./$(DEPDIR)/lib_package.Plo ./$(DEPDIR)/lib_string.Plo \ + ./$(DEPDIR)/lib_table.Plo ./$(DEPDIR)/lj_vm_asm.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lib_aux.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_base.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_bit.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_buffer.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_debug.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_ffi.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lib_init.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_io.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_jit.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_math.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_os.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_package.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_string.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lib_table.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_alloc.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_api.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_asm.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lj_assert.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_bc.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_bcread.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_bcwrite.Plo \ @@ -276,8 +299,10 @@ am__depfiles_remade = ./$(DEPDIR)/jittest-jittest.Po \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_sink.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_split.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_parse.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lj_prng.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_profile.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_record.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/lj_serialize.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_snap.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_state.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_str.Plo \ @@ -289,6 +314,7 @@ am__depfiles_remade = ./$(DEPDIR)/jittest-jittest.Po \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_udata.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_vmevent.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/lj_vmmath.Plo \ + @LUAJIT_TREE@/src/$(DEPDIR)/luajit.Plo \ @LUAJIT_TREE@/src/$(DEPDIR)/luajittry-luajit.Po am__mv = mv -f CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ @@ -768,51 +794,65 @@ dist_libtexluajit_la_SOURCES = \ lbitlib.c nodist_libtexluajit_la_SOURCES = $(ljlib_sources) \ - @LUAJIT_TREE@/src/lib_aux.c @LUAJIT_TREE@/src/lib_init.c \ + @LUAJIT_TREE@/src/lib_aux.c @LUAJIT_TREE@/src/lib_base.c \ + @LUAJIT_TREE@/src/lib_bit.c @LUAJIT_TREE@/src/lib_buffer.c \ + @LUAJIT_TREE@/src/lib_debug.c @LUAJIT_TREE@/src/lib_ffi.c \ + @LUAJIT_TREE@/src/lib_init.c @LUAJIT_TREE@/src/lib_io.c \ + @LUAJIT_TREE@/src/lib_jit.c @LUAJIT_TREE@/src/lib_math.c \ + @LUAJIT_TREE@/src/lib_os.c @LUAJIT_TREE@/src/lib_package.c \ + @LUAJIT_TREE@/src/lib_string.c @LUAJIT_TREE@/src/lib_table.c \ @LUAJIT_TREE@/src/lj_alloc.c @LUAJIT_TREE@/src/lj_api.c \ - @LUAJIT_TREE@/src/lj_asm.c @LUAJIT_TREE@/src/lj_bc.c \ - @LUAJIT_TREE@/src/lj_bcread.c @LUAJIT_TREE@/src/lj_bcwrite.c \ - @LUAJIT_TREE@/src/lj_buf.c @LUAJIT_TREE@/src/lj_carith.c \ - @LUAJIT_TREE@/src/lj_ccall.c @LUAJIT_TREE@/src/lj_ccallback.c \ - @LUAJIT_TREE@/src/lj_cconv.c @LUAJIT_TREE@/src/lj_cdata.c \ - @LUAJIT_TREE@/src/lj_char.c @LUAJIT_TREE@/src/lj_clib.c \ - @LUAJIT_TREE@/src/lj_cparse.c @LUAJIT_TREE@/src/lj_crecord.c \ - @LUAJIT_TREE@/src/lj_ctype.c @LUAJIT_TREE@/src/lj_debug.c \ - @LUAJIT_TREE@/src/lj_dispatch.c @LUAJIT_TREE@/src/lj_err.c \ - @LUAJIT_TREE@/src/lj_ffrecord.c @LUAJIT_TREE@/src/lj_func.c \ - @LUAJIT_TREE@/src/lj_gc.c @LUAJIT_TREE@/src/lj_gdbjit.c \ - @LUAJIT_TREE@/src/lj_ir.c @LUAJIT_TREE@/src/lj_lex.c \ - @LUAJIT_TREE@/src/lj_lib.c @LUAJIT_TREE@/src/lj_load.c \ - @LUAJIT_TREE@/src/lj_mcode.c @LUAJIT_TREE@/src/lj_meta.c \ - @LUAJIT_TREE@/src/lj_obj.c @LUAJIT_TREE@/src/lj_opt_dce.c \ - @LUAJIT_TREE@/src/lj_opt_fold.c \ + @LUAJIT_TREE@/src/lj_asm.c @LUAJIT_TREE@/src/lj_assert.c \ + @LUAJIT_TREE@/src/lj_bc.c @LUAJIT_TREE@/src/lj_bcread.c \ + @LUAJIT_TREE@/src/lj_bcwrite.c @LUAJIT_TREE@/src/lj_buf.c \ + @LUAJIT_TREE@/src/lj_carith.c @LUAJIT_TREE@/src/lj_ccallback.c \ + @LUAJIT_TREE@/src/lj_ccall.c @LUAJIT_TREE@/src/lj_cconv.c \ + @LUAJIT_TREE@/src/lj_cdata.c @LUAJIT_TREE@/src/lj_char.c \ + @LUAJIT_TREE@/src/lj_clib.c @LUAJIT_TREE@/src/lj_cparse.c \ + @LUAJIT_TREE@/src/lj_crecord.c @LUAJIT_TREE@/src/lj_ctype.c \ + @LUAJIT_TREE@/src/lj_debug.c @LUAJIT_TREE@/src/lj_dispatch.c \ + @LUAJIT_TREE@/src/lj_err.c @LUAJIT_TREE@/src/lj_ffrecord.c \ + @LUAJIT_TREE@/src/lj_func.c @LUAJIT_TREE@/src/lj_gc.c \ + @LUAJIT_TREE@/src/lj_gdbjit.c @LUAJIT_TREE@/src/lj_ir.c \ + @LUAJIT_TREE@/src/lj_lex.c @LUAJIT_TREE@/src/lj_lib.c \ + @LUAJIT_TREE@/src/lj_load.c @LUAJIT_TREE@/src/lj_mcode.c \ + @LUAJIT_TREE@/src/lj_meta.c @LUAJIT_TREE@/src/lj_obj.c \ + @LUAJIT_TREE@/src/lj_opt_dce.c @LUAJIT_TREE@/src/lj_opt_fold.c \ @LUAJIT_TREE@/src/lj_opt_loop.c @LUAJIT_TREE@/src/lj_opt_mem.c \ @LUAJIT_TREE@/src/lj_opt_narrow.c \ @LUAJIT_TREE@/src/lj_opt_sink.c \ @LUAJIT_TREE@/src/lj_opt_split.c @LUAJIT_TREE@/src/lj_parse.c \ - @LUAJIT_TREE@/src/lj_profile.c @LUAJIT_TREE@/src/lj_record.c \ + @LUAJIT_TREE@/src/lj_prng.c @LUAJIT_TREE@/src/lj_profile.c \ + @LUAJIT_TREE@/src/lj_record.c @LUAJIT_TREE@/src/lj_serialize.c \ @LUAJIT_TREE@/src/lj_snap.c @LUAJIT_TREE@/src/lj_state.c \ @LUAJIT_TREE@/src/lj_str.c @LUAJIT_TREE@/src/lj_strfmt.c \ @LUAJIT_TREE@/src/lj_strfmt_num.c \ @LUAJIT_TREE@/src/lj_strscan.c @LUAJIT_TREE@/src/lj_tab.c \ @LUAJIT_TREE@/src/lj_trace.c @LUAJIT_TREE@/src/lj_udata.c \ @LUAJIT_TREE@/src/lj_vmevent.c @LUAJIT_TREE@/src/lj_vmmath.c \ + @LUAJIT_TREE@/src/luajit.c @LUAJIT_TREE@/src/vm_arm64.dasc \ + @LUAJIT_TREE@/src/vm_arm.dasc @LUAJIT_TREE@/src/vm_mips64.dasc \ + @LUAJIT_TREE@/src/vm_mips.dasc @LUAJIT_TREE@/src/vm_ppc.dasc \ + @LUAJIT_TREE@/src/vm_x64.dasc @LUAJIT_TREE@/src/vm_x86.dasc \ $(am__append_1) @PEOBJ_TRUE@libtexluajit_la_LIBADD = lj_vm_obj.lo ljlib_sources = \ + lib_aux.c \ lib_base.c \ - lib_math.c \ lib_bit.c \ - lib_string.c \ - lib_table.c \ + lib_buffer.c \ + lib_debug.c \ + lib_ffi.c \ + lib_init.c \ lib_io.c \ + lib_jit.c \ + lib_math.c \ lib_os.c \ lib_package.c \ - lib_debug.c \ - lib_jit.c \ - lib_ffi.c + lib_string.c \ + lib_table.c -HDRGEN = lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h +HDRGEN = lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h luajit.h CLEANFILES = $(HDRGEN) $(ljlib_sources) lj_opt_fold.c lj_vm_asm.S \ jiterr.out jiterr.tmp rebuild.stamp CONFIG_CLEAN_FILES = dynasm_flags native_flags system_flags @@ -820,8 +860,8 @@ luajitincludedir = ${includedir}/texluajit luajitinclude_HEADERS = \ $(LUAJIT_TREE)/src/lauxlib.h \ $(LUAJIT_TREE)/src/luaconf.h \ - $(LUAJIT_TREE)/src/luajit.h \ $(LUAJIT_TREE)/src/lua.h \ + $(LUAJIT_TREE)/src/luajit_rolling.h \ $(LUAJIT_TREE)/src/lua.hpp \ $(LUAJIT_TREE)/src/lualib.h @@ -944,14 +984,40 @@ clean-libLTLIBRARIES: @: >>@LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lib_aux.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_base.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_bit.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_buffer.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_debug.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_ffi.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lib_init.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_io.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_jit.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_math.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_os.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_package.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_string.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lib_table.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_alloc.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_api.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_asm.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lj_assert.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_bc.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_bcread.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @@ -962,10 +1028,10 @@ clean-libLTLIBRARIES: @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_carith.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) -@LUAJIT_TREE@/src/lj_ccall.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ - @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_ccallback.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lj_ccall.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_cconv.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_cdata.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @@ -1024,10 +1090,14 @@ clean-libLTLIBRARIES: @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_parse.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lj_prng.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_profile.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_record.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/lj_serialize.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_snap.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_state.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @@ -1050,6 +1120,8 @@ clean-libLTLIBRARIES: @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) @LUAJIT_TREE@/src/lj_vmmath.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) +@LUAJIT_TREE@/src/luajit.lo: @LUAJIT_TREE@/src/$(am__dirstamp) \ + @LUAJIT_TREE@/src/$(DEPDIR)/$(am__dirstamp) libtexluajit.la: $(libtexluajit_la_OBJECTS) $(libtexluajit_la_DEPENDENCIES) $(EXTRA_libtexluajit_la_DEPENDENCIES) $(AM_V_CCLD)$(libtexluajit_la_LINK) -rpath $(libdir) $(libtexluajit_la_OBJECTS) $(libtexluajit_la_LIBADD) $(LIBS) @@ -1075,10 +1147,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jittest-jittest.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lbitlib.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_aux.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_bit.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_buffer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_debug.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_ffi.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_io.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_jit.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_math.Plo@am__quote@ # am--include-marker @@ -1088,10 +1163,23 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib_table.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lj_vm_asm.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_aux.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_base.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_bit.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_buffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_debug.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_ffi.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_io.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_jit.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_math.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_os.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_package.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_string.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lib_table.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_alloc.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_api.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_assert.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_bc.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_bcread.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_bcwrite.Plo@am__quote@ # am--include-marker @@ -1128,8 +1216,10 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_sink.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_split.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_parse.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_prng.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_profile.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_record.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_serialize.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_snap.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_state.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_str.Plo@am__quote@ # am--include-marker @@ -1141,6 +1231,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_udata.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_vmevent.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/lj_vmmath.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/luajit.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@@LUAJIT_TREE@/src/$(DEPDIR)/luajittry-luajit.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @@ -1822,10 +1913,13 @@ distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -f ./$(DEPDIR)/jittest-jittest.Po -rm -f ./$(DEPDIR)/lbitlib.Plo + -rm -f ./$(DEPDIR)/lib_aux.Plo -rm -f ./$(DEPDIR)/lib_base.Plo -rm -f ./$(DEPDIR)/lib_bit.Plo + -rm -f ./$(DEPDIR)/lib_buffer.Plo -rm -f ./$(DEPDIR)/lib_debug.Plo -rm -f ./$(DEPDIR)/lib_ffi.Plo + -rm -f ./$(DEPDIR)/lib_init.Plo -rm -f ./$(DEPDIR)/lib_io.Plo -rm -f ./$(DEPDIR)/lib_jit.Plo -rm -f ./$(DEPDIR)/lib_math.Plo @@ -1835,10 +1929,23 @@ distclean: distclean-recursive -rm -f ./$(DEPDIR)/lib_table.Plo -rm -f ./$(DEPDIR)/lj_vm_asm.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_aux.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_base.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_bit.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_buffer.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_debug.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_ffi.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_init.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_io.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_jit.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_math.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_os.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_package.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_string.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_table.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_alloc.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_api.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_asm.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_assert.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_bc.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_bcread.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_bcwrite.Plo @@ -1875,8 +1982,10 @@ distclean: distclean-recursive -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_sink.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_split.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_parse.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_prng.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_profile.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_record.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_serialize.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_snap.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_state.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_str.Plo @@ -1888,6 +1997,7 @@ distclean: distclean-recursive -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_udata.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_vmevent.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_vmmath.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/luajit.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/luajittry-luajit.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ @@ -1938,10 +2048,13 @@ maintainer-clean: maintainer-clean-recursive -rm -rf $(top_srcdir)/autom4te.cache -rm -f ./$(DEPDIR)/jittest-jittest.Po -rm -f ./$(DEPDIR)/lbitlib.Plo + -rm -f ./$(DEPDIR)/lib_aux.Plo -rm -f ./$(DEPDIR)/lib_base.Plo -rm -f ./$(DEPDIR)/lib_bit.Plo + -rm -f ./$(DEPDIR)/lib_buffer.Plo -rm -f ./$(DEPDIR)/lib_debug.Plo -rm -f ./$(DEPDIR)/lib_ffi.Plo + -rm -f ./$(DEPDIR)/lib_init.Plo -rm -f ./$(DEPDIR)/lib_io.Plo -rm -f ./$(DEPDIR)/lib_jit.Plo -rm -f ./$(DEPDIR)/lib_math.Plo @@ -1951,10 +2064,23 @@ maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/lib_table.Plo -rm -f ./$(DEPDIR)/lj_vm_asm.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_aux.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_base.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_bit.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_buffer.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_debug.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_ffi.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_init.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_io.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_jit.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_math.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_os.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_package.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_string.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lib_table.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_alloc.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_api.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_asm.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_assert.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_bc.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_bcread.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_bcwrite.Plo @@ -1991,8 +2117,10 @@ maintainer-clean: maintainer-clean-recursive -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_sink.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_opt_split.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_parse.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_prng.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_profile.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_record.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_serialize.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_snap.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_state.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_str.Plo @@ -2004,6 +2132,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_udata.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_vmevent.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/lj_vmmath.Plo + -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/luajit.Plo -rm -f @LUAJIT_TREE@/src/$(DEPDIR)/luajittry-luajit.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic @@ -2060,6 +2189,12 @@ $(libtexluajit_la_OBJECTS): $(HDRGEN) native/buildvm-stamp: $(ljlib_sources) lj_opt_fold.c cd native && $(MAKE) $(AM_MAKEFLAGS) buildvm-stamp +native/luajit.h: + cd native && $(MAKE) $(AM_MAKEFLAGS) luajit.h + +luajit.h: native/luajit.h + $(LN_S) native/luajit.h + lj_bcdef.h: native/buildvm-stamp $(AM_V_GEN)native/buildvm -m bcdef -o $@ $(ljlib_sources) diff --git a/source/libs/luajit/TLpatches/ChangeLog b/source/libs/luajit/TLpatches/ChangeLog index 2e29cb46ab0ffd038e6dc2b72d4574703f8202cf..4be65139b1c233448c5adebc45eda40b786e9ba5 100644 --- a/source/libs/luajit/TLpatches/ChangeLog +++ b/source/libs/luajit/TLpatches/ChangeLog @@ -1,3 +1,7 @@ +2025-02-02 Luigi Scarso <luigi.scarso@gmail.com> + * LuaJIT 2.1.1736781742 + + 2017-20-06 Luigi Scarso <luigi.scarso@gmail.com> * Luajit-2.1.0-beta3 * Removed lj_bcdef.h, lj_ffdef.h, lj_folddef.h, lj_libdef.h diff --git a/source/libs/luajit/TLpatches/patch-01 b/source/libs/luajit/TLpatches/patch-01 deleted file mode 100644 index 7fcb2cadf0b211a7be9ee9adde0351f04ce4bddc..0000000000000000000000000000000000000000 --- a/source/libs/luajit/TLpatches/patch-01 +++ /dev/null @@ -1,21 +0,0 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/Makefile LuaJIT-2.1.0-beta3/src/Makefile ---- LuaJIT-2.1.0-beta3-orig/src/Makefile 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/Makefile 2017-06-16 17:14:56.792102569 +0200 -@@ -99,7 +99,7 @@ - # enabled by default. Some other features that *might* break some existing - # code (e.g. __pairs or os.execute() return values) can be enabled here. - # Note: this does not provide full compatibility with Lua 5.2 at this time. --#XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT -+XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT - # - # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. - #XCFLAGS+= -DLUAJIT_DISABLE_JIT -@@ -475,7 +475,7 @@ - LJVM_BOUT= $(LJVM_S) - LJVM_MODE= elfasm - --LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ -+LJLIB_O= lib_base.o lib_math.o lbitlib.o lib_bit.o lib_string.o lib_table.o \ - lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o - LJLIB_C= $(LJLIB_O:.o=.c) - diff --git a/source/libs/luajit/TLpatches/patch-02 b/source/libs/luajit/TLpatches/patch-02 deleted file mode 100644 index 2f75f8b84f536ef13f4eca28b9a2131a3ede4456..0000000000000000000000000000000000000000 --- a/source/libs/luajit/TLpatches/patch-02 +++ /dev/null @@ -1,11 +0,0 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/Makefile.dep LuaJIT-2.1.0-beta3/src/Makefile.dep ---- LuaJIT-2.1.0-beta3-orig/src/Makefile.dep 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/Makefile.dep 2017-06-16 18:35:00.582631351 +0200 -@@ -6,6 +6,7 @@ - lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ - lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ - lj_strfmt.h lj_lib.h lj_libdef.h -+lbitlib.o: lbitlib.c lua.h luaconf.h lauxlib.h lualib.h - lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ - lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ diff --git a/source/libs/luajit/TLpatches/patch-03 b/source/libs/luajit/TLpatches/patch-03 deleted file mode 100644 index 0caddf8bfd40e1e57939c77e7f480411e762533d..0000000000000000000000000000000000000000 --- a/source/libs/luajit/TLpatches/patch-03 +++ /dev/null @@ -1,12 +0,0 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/host/buildvm.c LuaJIT-2.1.0-beta3/src/host/buildvm.c ---- LuaJIT-2.1.0-beta3-orig/src/host/buildvm.c 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/host/buildvm.c 2017-06-16 18:56:14.538560987 +0200 -@@ -113,7 +113,7 @@ - name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ - else - *p = '\0'; --#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE -+#elif LJ_TARGET_PPC && !LJ_TARGET_OSX && !LJ_TARGET_CONSOLE - /* Keep @plt etc. */ - #else - *p = '\0'; diff --git a/source/libs/luajit/TLpatches/patch-05 b/source/libs/luajit/TLpatches/patch-05 deleted file mode 100644 index 6014f5597c12d4517ecd0d21f1132999c2f36a24..0000000000000000000000000000000000000000 --- a/source/libs/luajit/TLpatches/patch-05 +++ /dev/null @@ -1,11 +0,0 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lib_init.c LuaJIT-2.1.0-beta3/src/lib_init.c ---- LuaJIT-2.1.0-beta3-orig/src/lib_init.c 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lib_init.c 2017-06-16 19:19:00.352961298 +0200 -@@ -26,6 +26,7 @@ - { LUA_DBLIBNAME, luaopen_debug }, - { LUA_BITLIBNAME, luaopen_bit }, - { LUA_JITLIBNAME, luaopen_jit }, -+ { LUA_BITLIBNAME_32, luaopen_bit32 }, - { NULL, NULL } - }; - diff --git a/source/libs/luajit/TLpatches/patch-07 b/source/libs/luajit/TLpatches/patch-07 deleted file mode 100644 index f37d1ded06d1039bb0213d59e0ce276756d2e4c4..0000000000000000000000000000000000000000 --- a/source/libs/luajit/TLpatches/patch-07 +++ /dev/null @@ -1,12 +0,0 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lj_arch.h LuaJIT-2.1.0-beta3/src/lj_arch.h ---- LuaJIT-2.1.0-beta3-orig/src/lj_arch.h 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lj_arch.h 2017-06-19 17:51:41.683624004 +0200 -@@ -376,7 +376,7 @@ - #if __GNUC__ < 4 - #error "Need at least GCC 4.0 or newer" - #endif --#elif LJ_TARGET_ARM -+#elif LJ_TARGET_ARM || LJ_TARGET_PPC - #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) - #error "Need at least GCC 4.2 or newer" - #endif diff --git a/source/libs/luajit/TLpatches/patch-10 b/source/libs/luajit/TLpatches/patch-10 deleted file mode 100644 index 840f154b1dd09be4762f1ce9b37e5110ae431c2e..0000000000000000000000000000000000000000 --- a/source/libs/luajit/TLpatches/patch-10 +++ /dev/null @@ -1,59 +0,0 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lj_str.c LuaJIT-2.1.0-beta3/src/lj_str.c ---- LuaJIT-2.1.0-beta3-orig/src/lj_str.c 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lj_str.c 2017-06-19 18:20:09.668443066 +0200 -@@ -118,6 +118,16 @@ - g->strhash = newhash; - } - -+/* -+** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to -+** compute its hash -+*/ -+#if !defined(LUAI_HASHLIMIT) -+#define LUAI_HASHLIMIT 5 -+#endif -+ -+#define cast(t, exp) ((t)(exp)) -+int luajittex_choose_hash_function = 0 ; - /* Intern a string and return string object. */ - GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) - { -@@ -126,9 +136,22 @@ - GCobj *o; - MSize len = (MSize)lenx; - MSize a, b, h = len; -+ size_t step ; -+ size_t l1 ; - if (lenx >= LJ_MAX_STR) - lj_err_msg(L, LJ_ERR_STROV); - g = G(L); -+ -+ if (len==0) -+ return &g->strempty; -+ if (luajittex_choose_hash_function==0) { -+ /* Lua 5.1.5 hash function */ -+ /* for 5.2 max methods we also need to patch the vm eq */ -+ step = (len>>LUAI_HASHLIMIT)+1; /* if string is too long, don't hash all its chars */ -+ for (l1=len; l1>=step; l1-=step) /* compute hash */ -+ h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); -+ } else { -+ /* LuaJIT 2.0.2 hash function */ - /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ - if (len >= 4) { /* Caveat: unaligned access! */ - a = lj_getu32(str); -@@ -142,11 +165,15 @@ - b = *(const uint8_t *)(str+(len>>1)); - h ^= b; h -= lj_rol(b, 14); - } else { -+ /* Already done, kept for reference */ - return &g->strempty; - } - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); -+ } -+ -+ - /* Check if the string has already been interned. */ - o = gcref(g->strhash[h & g->strmask]); - if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { diff --git a/source/libs/luajit/TLpatches/patch-13 b/source/libs/luajit/TLpatches/patch-13 deleted file mode 100644 index a4aaeedbe5dcc235b88ff601203f9b9c15cbfa76..0000000000000000000000000000000000000000 --- a/source/libs/luajit/TLpatches/patch-13 +++ /dev/null @@ -1,13 +0,0 @@ -diff -bur lj_alloc.c.orig lj_alloc.c ---- lj_alloc.c.orig 2018-10-27 20:24:05.499136144 +0200 -+++ lj_alloc.c 2018-10-27 20:24:28.790707009 +0200 -@@ -343,7 +343,7 @@ - } - #endif - --#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 -+#if ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - - #include <sys/resource.h> - - diff --git a/source/libs/luajit/TLpatches/patch-04 b/source/libs/luajit/TLpatches/patch-lauxlib.h similarity index 78% rename from source/libs/luajit/TLpatches/patch-04 rename to source/libs/luajit/TLpatches/patch-lauxlib.h index c382ca119931d45d60d47337d971a239c6c93053..bc7a642fbe3202b4124a0de4dffd5ad540731a50 100644 --- a/source/libs/luajit/TLpatches/patch-04 +++ b/source/libs/luajit/TLpatches/patch-lauxlib.h @@ -1,12 +1,9 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lauxlib.h LuaJIT-2.1.0-beta3/src/lauxlib.h ---- LuaJIT-2.1.0-beta3-orig/src/lauxlib.h 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lauxlib.h 2017-06-16 19:12:18.325405265 +0200 -@@ -92,6 +92,31 @@ - LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); +--- a/source/libs/luajit/LuaJIT-src/src/lauxlib.h ++++ b/source/libs/luajit/LuaJIT-src/src/lauxlib.h +@@ -93,6 +93,31 @@ -+ -+/* + /* +** {====================================================== +** File handles for IO library +** ======================================================= @@ -30,14 +27,16 @@ diff -bur LuaJIT-2.1.0-beta3-orig/src/lauxlib.h LuaJIT-2.1.0-beta3/src/lauxlib.h + + + - /* ++ ++/* ** =============================================================== ** some useful macros + ** =============================================================== @@ -155,6 +180,12 @@ LUALIB_API void (luaL_addvalue) (luaL_Buffer *B); LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); -+/* -- Luajittex needs this one because it's faster than make it Lua -- */ ++/* -- Luajittex needs this one because it's faster than make it with Lua -- */ +LUA_API int (RESERVED_lua_dump) (lua_State *L, lua_Writer writer, void *data, int strip); +/* Luajittex needs this one because it overloads loadfile */ +LUALIB_API int (RESERVED_load_aux_JIT) (lua_State *L, int status, int envarg); diff --git a/source/libs/luajit/TLpatches/patch-lib_init.c b/source/libs/luajit/TLpatches/patch-lib_init.c new file mode 100644 index 0000000000000000000000000000000000000000..5890d4cd9283de0698a57d4784b7b8c6fedb98b2 --- /dev/null +++ b/source/libs/luajit/TLpatches/patch-lib_init.c @@ -0,0 +1,10 @@ +--- a/source/libs/luajit/LuaJIT-src/src/lib_init.c ++++ b/source/libs/luajit/LuaJIT-src/src/lib_init.c +@@ -26,6 +26,7 @@ + { LUA_DBLIBNAME, luaopen_debug }, + { LUA_BITLIBNAME, luaopen_bit }, + { LUA_JITLIBNAME, luaopen_jit }, ++ { LUA_BITLIBNAME_32, luaopen_bit32 }, + { NULL, NULL } + }; + diff --git a/source/libs/luajit/TLpatches/patch-06 b/source/libs/luajit/TLpatches/patch-lib_package.c similarity index 79% rename from source/libs/luajit/TLpatches/patch-06 rename to source/libs/luajit/TLpatches/patch-lib_package.c index 8786dd4d621c4d5395690e70dbec195397f1342e..18752fc32ad5417eabe3bafb2c5e5ee15df9e34b 100644 --- a/source/libs/luajit/TLpatches/patch-06 +++ b/source/libs/luajit/TLpatches/patch-lib_package.c @@ -1,7 +1,6 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lib_package.c LuaJIT-2.1.0-beta3/src/lib_package.c ---- LuaJIT-2.1.0-beta3-orig/src/lib_package.c 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lib_package.c 2017-06-19 16:54:20.092831408 +0200 -@@ -361,6 +361,28 @@ +--- a/source/libs/luajit/LuaJIT-src/src/lib_package.c ++++ b/source/libs/luajit/LuaJIT-src/src/lib_package.c +@@ -378,6 +378,29 @@ return 1; /* library loaded successfully */ } @@ -27,10 +26,11 @@ diff -bur LuaJIT-2.1.0-beta3-orig/src/lib_package.c LuaJIT-2.1.0-beta3/src/lib_p + return 1; /* library loaded successfully */ +} + - static int lj_cf_package_loader_croot(lua_State *L) ++ + static int lj_cf_package_loader_c(lua_State *L) { - const char *filename; -@@ -380,6 +402,20 @@ + const char *name = luaL_checkstring(L, 1); +@@ -407,6 +430,20 @@ return 1; } diff --git a/source/libs/luajit/TLpatches/patch-lj_arch.h b/source/libs/luajit/TLpatches/patch-lj_arch.h new file mode 100644 index 0000000000000000000000000000000000000000..e50fb9883993daf8fe526e04e7218dd3b20565b9 --- /dev/null +++ b/source/libs/luajit/TLpatches/patch-lj_arch.h @@ -0,0 +1,11 @@ +--- a/source/libs/luajit/LuaJIT-src/src/lj_arch.h ++++ b/source/libs/luajit/LuaJIT-src/src/lj_arch.h +@@ -455,7 +455,7 @@ + #if __GNUC__ < 4 + #error "Need at least GCC 4.0 or newer" + #endif +-#elif LJ_TARGET_ARM ++#elif LJ_TARGET_ARM || LJ_TARGET_PPC + #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) + #error "Need at least GCC 4.2 or newer" + #endif diff --git a/source/libs/luajit/TLpatches/patch-08 b/source/libs/luajit/TLpatches/patch-lj_def.h similarity index 57% rename from source/libs/luajit/TLpatches/patch-08 rename to source/libs/luajit/TLpatches/patch-lj_def.h index 80a214fb39d3b5d78bd75a3c5a4489e444f05b2c..5e06ff87b0f55a8adbf97077c28e537f193b1771 100644 --- a/source/libs/luajit/TLpatches/patch-08 +++ b/source/libs/luajit/TLpatches/patch-lj_def.h @@ -1,6 +1,5 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lj_def.h LuaJIT-2.1.0-beta3/src/lj_def.h ---- LuaJIT-2.1.0-beta3-orig/src/lj_def.h 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lj_def.h 2017-06-19 17:52:54.085933629 +0200 +--- a/source/libs/luajit/LuaJIT-src/src/lj_def.h ++++ b/source/libs/luajit/LuaJIT-src/src/lj_def.h @@ -66,7 +66,7 @@ #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ @@ -9,4 +8,4 @@ diff -bur LuaJIT-2.1.0-beta3-orig/src/lj_def.h LuaJIT-2.1.0-beta3/src/lj_def.h +#define LJ_MAX_UPVAL 249 /* Max. # of upvalues. */ #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ - #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ + #define LJ_STACK_EXTRA (5+3*LJ_FR2) /* Extra stack space (metamethods). */ diff --git a/source/libs/luajit/TLpatches/patch-09 b/source/libs/luajit/TLpatches/patch-lj_load.c similarity index 72% rename from source/libs/luajit/TLpatches/patch-09 rename to source/libs/luajit/TLpatches/patch-lj_load.c index d0fcf9b2946164103c88bbfa775611b9c7a4be91..b302b205e07cc389802a3392860941cc3a3bf413 100644 --- a/source/libs/luajit/TLpatches/patch-09 +++ b/source/libs/luajit/TLpatches/patch-lj_load.c @@ -1,15 +1,14 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lj_load.c LuaJIT-2.1.0-beta3/src/lj_load.c ---- LuaJIT-2.1.0-beta3-orig/src/lj_load.c 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lj_load.c 2017-06-19 18:05:35.864190001 +0200 -@@ -166,3 +166,31 @@ +--- a/source/libs/luajit/LuaJIT-src/src/lj_load.c ++++ b/source/libs/luajit/LuaJIT-src/src/lj_load.c +@@ -182,3 +182,32 @@ return 1; } -+/* -- Luajittex needs this one because it's faster than make it Lua -- */ ++/* -- Luajittex needs this one because it's faster than make it with Lua -- */ +LUA_API int RESERVED_lua_dump(lua_State *L, lua_Writer writer, void *data, int strip) +{ + cTValue *o = L->top-1; -+ api_check(L, L->top > L->base); ++ lj_checkapi(L->top > L->base, "top slot empty"); + if (tvisfunc(o) && isluafunc(funcV(o))) + return lj_bcwrite(L, funcproto(funcV(o)), writer, data, strip); + else @@ -33,3 +32,4 @@ diff -bur LuaJIT-2.1.0-beta3-orig/src/lj_load.c LuaJIT-2.1.0-beta3/src/lj_load.c + } +} + ++ diff --git a/source/libs/luajit/TLpatches/patch-lj_str.c b/source/libs/luajit/TLpatches/patch-lj_str.c new file mode 100644 index 0000000000000000000000000000000000000000..2b241b04dc08e95d08e671be98826d13db8a3763 --- /dev/null +++ b/source/libs/luajit/TLpatches/patch-lj_str.c @@ -0,0 +1,10 @@ +--- a/source/libs/luajit/LuaJIT-src/src/lj_str.c ++++ b/source/libs/luajit/LuaJIT-src/src/lj_str.c +@@ -310,6 +310,7 @@ + return s; /* Return newly interned string. */ + } + ++int luajittex_choose_hash_function = 0 ; + /* Intern a string and return string object. */ + GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) + { diff --git a/source/libs/luajit/TLpatches/patch-11 b/source/libs/luajit/TLpatches/patch-lua.h similarity index 55% rename from source/libs/luajit/TLpatches/patch-11 rename to source/libs/luajit/TLpatches/patch-lua.h index b4b187813fc3d29aeb69f797b04d516072dbb08a..48111225b319fb6d43450cbbee4152939c35ab7b 100644 --- a/source/libs/luajit/TLpatches/patch-11 +++ b/source/libs/luajit/TLpatches/patch-lua.h @@ -1,17 +1,16 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lua.h LuaJIT-2.1.0-beta3/src/lua.h ---- LuaJIT-2.1.0-beta3-orig/src/lua.h 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lua.h 2017-06-19 18:26:51.143299683 +0200 -@@ -103,7 +103,8 @@ - /* type for integer functions */ +--- a/source/libs/luajit/LuaJIT-src/src/lua.h ++++ b/source/libs/luajit/LuaJIT-src/src/lua.h +@@ -104,6 +104,9 @@ typedef LUA_INTEGER lua_Integer; -- + +/* communication with LuaJiTTeX */ +LUA_API int luajittex_choose_hash_function; ++ /* ** state manipulation -@@ -353,6 +354,14 @@ +@@ -353,6 +356,15 @@ LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); @@ -20,8 +19,9 @@ diff -bur LuaJIT-2.1.0-beta3-orig/src/lua.h LuaJIT-2.1.0-beta3/src/lua.h +#define LUA_OPLT 1 +#define LUA_OPLE 2 + -+/* see http://comments.gmane.org/gmane.comp.programming.swig/18673 */ ++/* see https://sourceforge.net/p/swig/mailman/swig-user/thread/4FB2AEBE.9010807%40fultondesigns.co.uk/#msg29268198 */ +# define lua_rawlen lua_objlen ++ + /* From Lua 5.3. */ LUA_API int lua_isyieldable (lua_State *L); diff --git a/source/libs/luajit/TLpatches/patch-12 b/source/libs/luajit/TLpatches/patch-lualib.h similarity index 58% rename from source/libs/luajit/TLpatches/patch-12 rename to source/libs/luajit/TLpatches/patch-lualib.h index 27be2c8cdaaaa893b4b6ac6e83ad26433dc98beb..b935ac1fe8861f0d457dbfc1f2b12d05f1fef470 100644 --- a/source/libs/luajit/TLpatches/patch-12 +++ b/source/libs/luajit/TLpatches/patch-lualib.h @@ -1,18 +1,17 @@ -diff -bur LuaJIT-2.1.0-beta3-orig/src/lualib.h LuaJIT-2.1.0-beta3/src/lualib.h ---- LuaJIT-2.1.0-beta3-orig/src/lualib.h 2017-05-01 21:05:00.000000000 +0200 -+++ LuaJIT-2.1.0-beta3/src/lualib.h 2017-06-19 18:27:55.065836605 +0200 +--- a/source/libs/luajit/LuaJIT-src/src/lualib.h ++++ b/source/libs/luajit/LuaJIT-src/src/lualib.h @@ -22,6 +22,8 @@ #define LUA_JITLIBNAME "jit" #define LUA_FFILIBNAME "ffi" +#define LUA_BITLIBNAME_32 "bit32" -+ ++ LUALIB_API int luaopen_base(lua_State *L); LUALIB_API int luaopen_math(lua_State *L); LUALIB_API int luaopen_string(lua_State *L); -@@ -34,6 +36,8 @@ - LUALIB_API int luaopen_jit(lua_State *L); +@@ -35,6 +37,8 @@ LUALIB_API int luaopen_ffi(lua_State *L); + LUALIB_API int luaopen_string_buffer(lua_State *L); +LUALIB_API int luaopen_bit32(lua_State *L); + diff --git a/source/libs/luajit/configure b/source/libs/luajit/configure index 35283cb67b07866db0dd4e5495306a51db51819b..e17160a46ce81029568e202cd5254ff1de9f7a36 100755 --- a/source/libs/luajit/configure +++ b/source/libs/luajit/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.72 for luajit for TeX Live 2.1.0-beta3. +# Generated by GNU Autoconf 2.72 for luajit for TeX Live 2.1.81742. # # Report bugs to <tex-k@tug.org>. # @@ -614,12 +614,12 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='luajit for TeX Live' PACKAGE_TARNAME='luajit-for-tex-live' -PACKAGE_VERSION='2.1.0-beta3' -PACKAGE_STRING='luajit for TeX Live 2.1.0-beta3' +PACKAGE_VERSION='2.1.81742' +PACKAGE_STRING='luajit for TeX Live 2.1.81742' PACKAGE_BUGREPORT='tex-k@tug.org' PACKAGE_URL='' -ac_unique_file="LuaJIT-src/src/luajit.h" +ac_unique_file="LuaJIT-src/src/luajit_rolling.h" # Factoring default headers for most tests. ac_includes_default="\ #include <stddef.h> @@ -1385,7 +1385,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -'configure' configures luajit for TeX Live 2.1.0-beta3 to adapt to many kinds of systems. +'configure' configures luajit for TeX Live 2.1.81742 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1457,7 +1457,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of luajit for TeX Live 2.1.0-beta3:";; + short | recursive ) echo "Configuration of luajit for TeX Live 2.1.81742:";; esac cat <<\_ACEOF @@ -1578,7 +1578,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -luajit for TeX Live configure 2.1.0-beta3 +luajit for TeX Live configure 2.1.81742 generated by GNU Autoconf 2.72 Copyright (C) 2023 Free Software Foundation, Inc. @@ -2134,7 +2134,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by luajit for TeX Live $as_me 2.1.0-beta3, which was +It was created by luajit for TeX Live $as_me 2.1.81742, which was generated by GNU Autoconf 2.72. Invocation command line was $ $0$ac_configure_args_raw @@ -5095,7 +5095,7 @@ fi # Define the identity of the package. PACKAGE='luajit-for-tex-live' - VERSION='2.1.0-beta3' + VERSION='2.1.81742' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -6569,10 +6569,10 @@ printf "%s\n" "no, using $LN_S" >&6; } fi -LUAJITVERSION=2.1.0-beta3 +LUAJITVERSION=2.1.81742 -LUAJIT_LT_VERSINFO=3:0:1 +LUAJIT_LT_VERSINFO=3:81742:1 case `pwd` in @@ -16329,10 +16329,14 @@ fi elif grep 'LJ_TARGET_MIPS ' conftest.i >/dev/null 2>&1 then : LJARCH=mips +elif grep 'LJ_TARGET_MIPS64 ' conftest.i >/dev/null 2>&1 +then : + LJARCH=mips64 if grep 'MIPSEL ' conftest.i >/dev/null 2>&1 then : echo '-D__MIPSEL__=1' >>native_flags fi + else case e in #( e) as_fn_error $? "Sorry, unsupported architecture" "$LINENO" 5 ;; esac @@ -16392,6 +16396,11 @@ else case e in #( echo '-DLJ_ABI_SOFTFP=0' >>native_flags ;; esac fi +if grep 'LJ_ABI_PAUTH 1' conftest.i >/dev/null 2>&1 +then : + echo '-D PAUTH' >>dynasm_flags + echo '-DLJ_ABI_PAUTH=1' >>native_flags +fi echo '-D VER='`grep 'LJ_ARCH_VERSION ' conftest.i 2>&1 | \ sed 's/^.*LJ_ARCH_VERSION //'` >>dynasm_flags if test "x$LJHOST" = xWindows @@ -16442,7 +16451,7 @@ if test "x$build" != "x$host" then : case $LJHOST in #( Windows) : - echo '-DLUAJIT_OS=LUAJIT_OS_WINDOWS' >>native_flags ;; #( + echo '-malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS' >>native_flags ;; #( Darwin | iOS) : echo '-DLUAJIT_OS=LUAJIT_OS_OSX' >>native_flags ;; #( Linux) : @@ -16469,6 +16478,15 @@ case $LJHOST in #( *) : LJVM_MODE=elfasm ;; esac + +if echo 'extern void b(void);int a(void){b();return 0;}' | $CC -c -x c - -o tmpunwind.o && { sed -n /eh_frame/p tmpunwind.o >tmpunwind_test || sed -n /__unwind_info/p tmpunwind.o >>tmpunwind_test && test -s tmpunwind_test; } +then : + LUAJIT_CFLAGS="$LUAJIT_CFLAGS -DLUAJIT_UNWIND_EXTERNAL" + rm tmpunwind.o tmpunwind_test +else case e in #( + e) rm tmpunwind.o tmpunwind_test ;; +esac +fi lj_save_CFLAGS=$CFLAGS CFLAGS="$CFLAGS -fno-stack-protector" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -17225,7 +17243,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by luajit for TeX Live $as_me 2.1.0-beta3, which was +This file was extended by luajit for TeX Live $as_me 2.1.81742, which was generated by GNU Autoconf 2.72. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -17293,7 +17311,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -luajit for TeX Live config.status 2.1.0-beta3 +luajit for TeX Live config.status 2.1.81742 configured by $0, generated by GNU Autoconf 2.72, with options \\"\$ac_cs_config\\" diff --git a/source/libs/luajit/configure.ac b/source/libs/luajit/configure.ac index 04bd56136477c762acc09fa37648a3b00c703516..21c31bc10e310ea6a96e7e37f3353414c3424843 100644 --- a/source/libs/luajit/configure.ac +++ b/source/libs/luajit/configure.ac @@ -10,7 +10,7 @@ dnl m4_include([version.ac])[] dnl define luajit_version AC_INIT([luajit for TeX Live], luajit_version, [tex-k@tug.org]) AC_PREREQ([2.65]) -AC_CONFIG_SRCDIR([LuaJIT-src/src/luajit.h]) +AC_CONFIG_SRCDIR([LuaJIT-src/src/luajit_rolling.h]) AC_CONFIG_AUX_DIR([../../build-aux]) AC_CONFIG_MACRO_DIRS([../../m4 m4]) @@ -53,6 +53,13 @@ AS_CASE([$LJHOST], LUAJIT_CFLAGS="$LUAJIT_CFLAGS -malign-double"], [Darwin | iOS], [LJVM_MODE=machasm], [LJVM_MODE=elfasm]) + +dnl Original test, adapted. +dnl AS_IF([ echo 'extern void b(void);int a(void){b();return 0;}' | $CC -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; }], +AS_IF([ echo 'extern void b(void);int a(void){b();return 0;}' | $CC -c -x c - -o tmpunwind.o && { sed -n /eh_frame/p tmpunwind.o >tmpunwind_test || sed -n /__unwind_info/p tmpunwind.o >>tmpunwind_test && test -s tmpunwind_test; }], + [LUAJIT_CFLAGS="$LUAJIT_CFLAGS -DLUAJIT_UNWIND_EXTERNAL" + rm tmpunwind.o tmpunwind_test], + [rm tmpunwind.o tmpunwind_test]) lj_save_CFLAGS=$CFLAGS CFLAGS="$CFLAGS -fno-stack-protector" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]],[[]])], diff --git a/source/libs/luajit/include/Makefile.am b/source/libs/luajit/include/Makefile.am index 6634fd88b19d93c4081b6eea695ca8b28268dd55..938ca6279f83de5acacaa823358b1d54dcd8bb23 100644 --- a/source/libs/luajit/include/Makefile.am +++ b/source/libs/luajit/include/Makefile.am @@ -8,6 +8,9 @@ ## LUAJIT_SRC = $(top_srcdir)/$(LUAJIT_TREE)/src +#luajit.h: ../native/luajit.h +# $(LN_S) ../native/luajit.h + hdr_links = \ $(LUAJIT_SRC)/lauxlib.h \ $(LUAJIT_SRC)/lj_arch.h \ @@ -17,7 +20,8 @@ hdr_links = \ $(LUAJIT_SRC)/lua.h \ $(LUAJIT_SRC)/luaconf.h \ $(LUAJIT_SRC)/luajit.h \ - $(LUAJIT_SRC)/lualib.h + $(LUAJIT_SRC)/lualib.h \ + ../native/luajit.h include $(top_srcdir)/../../am/hdr_links.am diff --git a/source/libs/luajit/include/Makefile.in b/source/libs/luajit/include/Makefile.in index c8c9a36cc51a1e947462531ef53520a0629f181f..1fa7b697ed4cf395cdf95e6b9dea32e26504bd90 100644 --- a/source/libs/luajit/include/Makefile.in +++ b/source/libs/luajit/include/Makefile.in @@ -276,6 +276,9 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LUAJIT_SRC = $(top_srcdir)/$(LUAJIT_TREE)/src + +#luajit.h: ../native/luajit.h +# $(LN_S) ../native/luajit.h hdr_links = \ $(LUAJIT_SRC)/lauxlib.h \ $(LUAJIT_SRC)/lj_arch.h \ @@ -285,7 +288,8 @@ hdr_links = \ $(LUAJIT_SRC)/lua.h \ $(LUAJIT_SRC)/luaconf.h \ $(LUAJIT_SRC)/luajit.h \ - $(LUAJIT_SRC)/lualib.h + $(LUAJIT_SRC)/lualib.h \ + ../native/luajit.h all: all-am diff --git a/source/libs/luajit/lbitlib.c b/source/libs/luajit/lbitlib.c index 5187bbff0dc4815d13d38ecb29bd52807e8f953b..1c1c9f281069dc774f596940d718570aa03e5efb 100644 --- a/source/libs/luajit/lbitlib.c +++ b/source/libs/luajit/lbitlib.c @@ -7,6 +7,7 @@ #define lbitlib_c #define LUA_LIB +#include <limits.h> #include "lua.h" #include "lauxlib.h" diff --git a/source/libs/luajit/m4/lj-system.m4 b/source/libs/luajit/m4/lj-system.m4 index 73ba282af845c90d102873c6550f1bbff8bdb7da..8ed5fdd64e88e17bbbe2000a2d3afa4b8db9405f 100644 --- a/source/libs/luajit/m4/lj-system.m4 +++ b/source/libs/luajit/m4/lj-system.m4 @@ -35,9 +35,12 @@ AS_IF([grep 'LJ_TARGET_X64 ' conftest.i >/dev/null 2>&1], [echo '-DLJ_ARCH_ENDIAN=LUAJIT_LE' >>native_flags], [echo '-DLJ_ARCH_ENDIAN=LUAJIT_BE' >>native_flags])], [grep 'LJ_TARGET_MIPS ' conftest.i >/dev/null 2>&1], - [LJARCH=mips + [LJARCH=mips], + [grep 'LJ_TARGET_MIPS64 ' conftest.i >/dev/null 2>&1], + [LJARCH=mips64 AS_IF([grep 'MIPSEL ' conftest.i >/dev/null 2>&1], - [echo '-D__MIPSEL__=1' >>native_flags])], + [echo '-D__MIPSEL__=1' >>native_flags]) + ], [AC_MSG_ERROR([Sorry, unsupported architecture])]) AS_IF([grep 'LJ_TARGET_PS3 1'conftest.i >/dev/null 2>&1], [LJHOST='PS3' @@ -68,6 +71,9 @@ AS_IF([grep 'LJ_ABI_SOFTFP 1' conftest.i >/dev/null 2>&1], [echo '-DDLJ_ABI_SOFTFP=1' >>native_flags], [echo '-D HFABI' >>dynasm_flags echo '-DLJ_ABI_SOFTFP=0' >>native_flags]) +AS_IF([grep 'LJ_ABI_PAUTH 1' conftest.i >/dev/null 2>&1], + [echo '-D PAUTH' >>dynasm_flags + echo '-DLJ_ABI_PAUTH=1' >>native_flags]) echo '-D VER='`grep 'LJ_ARCH_VERSION ' conftest.i 2>&1 | \ sed 's/^.*LJ_ARCH_VERSION //'` >>dynasm_flags AS_IF([test "x$LJHOST" = xWindows], @@ -91,7 +97,7 @@ AS_CASE([$LJARCH], AS_IF([test "x$build" != "x$host"], [AS_CASE([$LJHOST], - [Windows], [echo '-DLUAJIT_OS=LUAJIT_OS_WINDOWS' >>native_flags], + [Windows], [echo '-malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS' >>native_flags], [Darwin | iOS], [echo '-DLUAJIT_OS=LUAJIT_OS_OSX' >>native_flags], [Linux], [echo '-DLUAJIT_OS=LUAJIT_OS_LINUX' >>native_flags], [echo '-DLUAJIT_OS=LUAJIT_OS_OTHER' >>native_flags])]) diff --git a/source/libs/luajit/native/Makefile.am b/source/libs/luajit/native/Makefile.am index c9ed9d7a1148365160205ecaf7b33d6480b33441..5c066a20684336285809bcb98a090c98ac388072 100644 --- a/source/libs/luajit/native/Makefile.am +++ b/source/libs/luajit/native/Makefile.am @@ -8,7 +8,7 @@ AM_CFLAGS = -Wall EXTRA_PROGRAMS = buildvm minilua -all-local: buildvm-stamp +all-local: buildvm-stamp luajit.h buildvm-stamp: buildvm$(EXEEXT) $(AM_V_at)echo timestamp >$@ @@ -21,7 +21,13 @@ nodist_buildvm_SOURCES = \ @LUAJIT_TREE@/src/host/buildvm_fold.c \ @LUAJIT_TREE@/src/host/buildvm_lib.c \ @LUAJIT_TREE@/src/host/buildvm_peobj.c -$(buildvm_OBJECTS): buildvm_arch.h +$(buildvm_OBJECTS): buildvm_arch.h luajit.h + + +luajit.h: minilua$(EXEEXT) + $(AM_V_GEN)./minilua$(EXEEXT) $(srcdir)/$(LUAJIT_TREE)/src/host/genversion.lua $(srcdir)/$(LUAJIT_TREE)/src/luajit_rolling.h $(srcdir)/$(LUAJIT_TREE)/.relver +# $(LN_S) luajit.h ../ + buildvm_arch.h: minilua$(EXEEXT) $(LUAJIT_TREE)/dynasm/dynasm.lua $(AM_V_GEN)./minilua $(srcdir)/$(LUAJIT_TREE)/dynasm/dynasm.lua \ @@ -33,5 +39,5 @@ nodist_minilua_SOURCES = \ @LUAJIT_TREE@/src/host/minilua.c minilua_LDADD = $(MATH_LIB) -CLEANFILES = buildvm-stamp buildvm_arch.h +CLEANFILES = buildvm-stamp buildvm_arch.h luajit.h diff --git a/source/libs/luajit/native/Makefile.in b/source/libs/luajit/native/Makefile.in index 7abe9db4b037520633153a801d78ecf73c6a615e..09270aeb4252b8d775bb3be1561cd10574ef23b6 100644 --- a/source/libs/luajit/native/Makefile.in +++ b/source/libs/luajit/native/Makefile.in @@ -341,7 +341,7 @@ nodist_minilua_SOURCES = \ @LUAJIT_TREE@/src/host/minilua.c minilua_LDADD = $(MATH_LIB) -CLEANFILES = buildvm-stamp buildvm_arch.h +CLEANFILES = buildvm-stamp buildvm_arch.h luajit.h all: config.h $(MAKE) $(AM_MAKEFLAGS) all-am @@ -918,11 +918,15 @@ uninstall-am: .PRECIOUS: Makefile -all-local: buildvm-stamp +all-local: buildvm-stamp luajit.h buildvm-stamp: buildvm$(EXEEXT) $(AM_V_at)echo timestamp >$@ -$(buildvm_OBJECTS): buildvm_arch.h +$(buildvm_OBJECTS): buildvm_arch.h luajit.h + +luajit.h: minilua$(EXEEXT) + $(AM_V_GEN)./minilua$(EXEEXT) $(srcdir)/$(LUAJIT_TREE)/src/host/genversion.lua $(srcdir)/$(LUAJIT_TREE)/src/luajit_rolling.h $(srcdir)/$(LUAJIT_TREE)/.relver +# $(LN_S) luajit.h ../ buildvm_arch.h: minilua$(EXEEXT) $(LUAJIT_TREE)/dynasm/dynasm.lua $(AM_V_GEN)./minilua $(srcdir)/$(LUAJIT_TREE)/dynasm/dynasm.lua \ diff --git a/source/libs/luajit/native/configure b/source/libs/luajit/native/configure index b3a12edcde849b9beb56471552097c66b0d4d7c1..ef11432ba33ec5c1123f29766e07201c8c6c6cbf 100755 --- a/source/libs/luajit/native/configure +++ b/source/libs/luajit/native/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.72 for luajit native 2.1.0-beta3. +# Generated by GNU Autoconf 2.72 for luajit native 2.1.81742. # # Report bugs to <tex-k@tug.org>. # @@ -604,12 +604,12 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='luajit native' PACKAGE_TARNAME='luajit-native' -PACKAGE_VERSION='2.1.0-beta3' -PACKAGE_STRING='luajit native 2.1.0-beta3' +PACKAGE_VERSION='2.1.81742' +PACKAGE_STRING='luajit native 2.1.81742' PACKAGE_BUGREPORT='tex-k@tug.org' PACKAGE_URL='' -ac_unique_file="../LuaJIT-src/src/luajit.h" +ac_unique_file="../LuaJIT-src/src/luajit_rolling.h" # Factoring default headers for most tests. ac_includes_default="\ #include <stddef.h> @@ -1316,7 +1316,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -'configure' configures luajit native 2.1.0-beta3 to adapt to many kinds of systems. +'configure' configures luajit native 2.1.81742 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1387,7 +1387,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of luajit native 2.1.0-beta3:";; + short | recursive ) echo "Configuration of luajit native 2.1.81742:";; esac cat <<\_ACEOF @@ -1484,7 +1484,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -luajit native configure 2.1.0-beta3 +luajit native configure 2.1.81742 generated by GNU Autoconf 2.72 Copyright (C) 2023 Free Software Foundation, Inc. @@ -1883,7 +1883,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by luajit native $as_me 2.1.0-beta3, which was +It was created by luajit native $as_me 2.1.81742, which was generated by GNU Autoconf 2.72. Invocation command line was $ $0$ac_configure_args_raw @@ -4844,7 +4844,7 @@ fi # Define the identity of the package. PACKAGE='luajit-native' - VERSION='2.1.0-beta3' + VERSION='2.1.81742' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -6771,7 +6771,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by luajit native $as_me 2.1.0-beta3, which was +This file was extended by luajit native $as_me 2.1.81742, which was generated by GNU Autoconf 2.72. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6839,7 +6839,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -luajit native config.status 2.1.0-beta3 +luajit native config.status 2.1.81742 configured by $0, generated by GNU Autoconf 2.72, with options \\"\$ac_cs_config\\" diff --git a/source/libs/luajit/native/configure.ac b/source/libs/luajit/native/configure.ac index 4d7940412e3d9ee3ccb167d1e947d0443534d750..332618905de54d5afc75465c1b7551be0ffe1426 100644 --- a/source/libs/luajit/native/configure.ac +++ b/source/libs/luajit/native/configure.ac @@ -9,7 +9,7 @@ dnl m4_include([../version.ac])[] dnl define luajit_version AC_INIT([luajit native], luajit_version, [tex-k@tug.org]) AC_PREREQ([2.63]) -AC_CONFIG_SRCDIR([../LuaJIT-src/src/luajit.h]) +AC_CONFIG_SRCDIR([../LuaJIT-src/src/luajit_rolling.h]) AC_CONFIG_AUX_DIR([../../../build-aux]) AC_CONFIG_MACRO_DIR([../../../m4]) diff --git a/source/libs/luajit/version.ac b/source/libs/luajit/version.ac index 4c6b5b3b67ce3b8305702c32c25353565ff4b1c6..4aac6497c13067c06212e195ee69875c1d051516 100644 --- a/source/libs/luajit/version.ac +++ b/source/libs/luajit/version.ac @@ -8,4 +8,7 @@ dnl dnl -------------------------------------------------------- dnl dnl m4-include this file to define the current luajit version -m4_define([luajit_version], [2.1.0-beta3]) +dnl m4_define([luajit_version], [2.1.1736781742]) +dnl libtool: error: REVISION '1736781742' must be a nonnegative integer +dnl libtool: error: '3:1736781742:1' is not valid version information +m4_define([luajit_version], [2.1.81742]) diff --git a/source/texk/web2c/luatexdir/ChangeLog b/source/texk/web2c/luatexdir/ChangeLog index 677b18e4c6e2dfe92a283994d95596b7af85675a..fda39ba269100797c433e381194f981e401f9c0e 100644 --- a/source/texk/web2c/luatexdir/ChangeLog +++ b/source/texk/web2c/luatexdir/ChangeLog @@ -1,3 +1,8 @@ +2025-02-01 Luigi Scarso <luigi.scarso@gmail.com> + * LuaJIT 2.1.1736781742 (work in progress) + * LuaTeX 1.21.0 + + 2025-02-01 Luigi Scarso <luigi.scarso@gmail.com> * Again on missing displacement when leqno is used in rtl paragraph (thanks to udifoglle@gmail.com) * LuaTeX 1.20.8 diff --git a/source/texk/web2c/luatexdir/luatex.c b/source/texk/web2c/luatexdir/luatex.c index ae9df921c752e1877a69fab8013cc7be2d2d4e48..d64947b916220ac9a35994dc4f1612b8feeb4bdd 100644 --- a/source/texk/web2c/luatexdir/luatex.c +++ b/source/texk/web2c/luatexdir/luatex.c @@ -32,9 +32,9 @@ stick to "0" upto "9" so users can expect a number represented as string. */ -int luatex_version = 120; -int luatex_revision = '8'; -const char *luatex_version_string = "1.20.8"; +int luatex_version = 121; +int luatex_revision = '0'; +const char *luatex_version_string = "1.21.0"; const char *engine_name = my_name; #include <kpathsea/c-ctype.h> diff --git a/source/texk/web2c/luatexdir/luatex_svnversion.h b/source/texk/web2c/luatexdir/luatex_svnversion.h index e5c0fba46eeb64ef407295d59d0ac9d5225f655d..59d787ee2806faaa97d1094a4ce757a186cad421 100644 --- a/source/texk/web2c/luatexdir/luatex_svnversion.h +++ b/source/texk/web2c/luatexdir/luatex_svnversion.h @@ -1,4 +1,4 @@ #ifndef luatex_svn_revision_h #define luatex_svn_revision_h -#define luatex_svn_revision 7651 +#define luatex_svn_revision 7652 #endif