From a9eb58d0da1220b0ccc6d604981c03830a81598f Mon Sep 17 00:00:00 2001
From: Luigi Scarso <luigi.scarso@gmail.com>
Date: Sat, 18 Feb 2023 18:27:44 +0100
Subject: [PATCH] =?UTF-8?q?Fixed=20the=20"Invalid=20unicode=20ranges=20in?=
 =?UTF-8?q?=20CMap=20beginbfrange=20operator=20bug"=20as=20in=20pdfTeX=20(?=
 =?UTF-8?q?H=C3=A0n=20Th=E1=BA=BF=20Th=C3=A0nh)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 source/texk/web2c/luatexdir/ChangeLog          |  4 +++-
 source/texk/web2c/luatexdir/font/tounicode.c   | 18 +++++++++++++++++-
 .../texk/web2c/luatexdir/luatex_svnversion.h   |  2 +-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/source/texk/web2c/luatexdir/ChangeLog b/source/texk/web2c/luatexdir/ChangeLog
index 15bb53b73..ba163208a 100644
--- a/source/texk/web2c/luatexdir/ChangeLog
+++ b/source/texk/web2c/luatexdir/ChangeLog
@@ -1,8 +1,10 @@
+2023-02-18  Luigi Scarso <luigi.scarso@gmail.com> 
+    * Fixed the "Invalid unicode ranges in CMap beginbfrange operator bug" as in pdfTeX (Hàn Thế Thành)
+
 2023-02-18  Luigi Scarso <luigi.scarso@gmail.com> 
     * new "late special" feature by Phelype Oleinik,
       heavily adapted by H.Hagen.
 
-
 2023-02-05  Luigi Scarso <luigi.scarso@gmail.com> 
     *  Fix typos (thanks to J. Friedrich) 
 
diff --git a/source/texk/web2c/luatexdir/font/tounicode.c b/source/texk/web2c/luatexdir/font/tounicode.c
index b1e08e7ce..68b6e01fd 100644
--- a/source/texk/web2c/luatexdir/font/tounicode.c
+++ b/source/texk/web2c/luatexdir/font/tounicode.c
@@ -305,6 +305,21 @@ static void set_cid_glyph_unicode(long index, glyph_unicode_entry * gp, internal
 }
 */
 
+static boolean is_last_byte_valid(int srcCode1, int srcCode2, long code)
+{
+    /*tex
+       Followin pdfTeX, when defining ranges of this type, the value of the last byte in the
+       string shall be less than or equal to 255 − (srcCode2 − srcCode1). This
+       ensures that the last byte of the string shall not be incremented past
+       255; otherwise, the result of mapping is undefined. 
+    */
+    char *s = strend(utf16be_str(code)) - 2;
+    long l = strtol(s, NULL, 16);
+    return l < 255 - (srcCode2 - srcCode1);
+}
+
+
+
 static int do_write_tounicode(PDF pdf, char **glyph_names, char *name, internal_font_number f)
 {
     char buf[SMALL_BUF_SIZE], *p, *s;
@@ -403,7 +418,8 @@ static int do_write_tounicode(PDF pdf, char **glyph_names, char *name, internal_
         } else {
             /*tex |gtab[i].code >= 0| */
             j = i;
-            while (i < 256 && gtab[i + 1].code >= 0 && gtab[i].code + 1 == gtab[i + 1].code)
+            while (i < 256 && gtab[i + 1].code >= 0 && gtab[i].code + 1 == gtab[i + 1].code && is_last_byte_valid(j, i, gtab[i].code)
+)
                 i++;
             /*tex
                 At this point |i| is the last entry of the subrange so we move |i| to
diff --git a/source/texk/web2c/luatexdir/luatex_svnversion.h b/source/texk/web2c/luatexdir/luatex_svnversion.h
index a8e86fb40..d900e530e 100644
--- a/source/texk/web2c/luatexdir/luatex_svnversion.h
+++ b/source/texk/web2c/luatexdir/luatex_svnversion.h
@@ -1,4 +1,4 @@
 #ifndef luatex_svn_revision_h
 #define luatex_svn_revision_h
-#define luatex_svn_revision 7559
+#define luatex_svn_revision 7560
 #endif
-- 
GitLab