From a9eb58d0da1220b0ccc6d604981c03830a81598f Mon Sep 17 00:00:00 2001 From: Luigi Scarso <luigi.scarso@gmail.com> Date: Sat, 18 Feb 2023 18:27:44 +0100 Subject: [PATCH] =?UTF-8?q?Fixed=20the=20"Invalid=20unicode=20ranges=20in?= =?UTF-8?q?=20CMap=20beginbfrange=20operator=20bug"=20as=20in=20pdfTeX=20(?= =?UTF-8?q?H=C3=A0n=20Th=E1=BA=BF=20Th=C3=A0nh)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/texk/web2c/luatexdir/ChangeLog | 4 +++- source/texk/web2c/luatexdir/font/tounicode.c | 18 +++++++++++++++++- .../texk/web2c/luatexdir/luatex_svnversion.h | 2 +- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/source/texk/web2c/luatexdir/ChangeLog b/source/texk/web2c/luatexdir/ChangeLog index 15bb53b73..ba163208a 100644 --- a/source/texk/web2c/luatexdir/ChangeLog +++ b/source/texk/web2c/luatexdir/ChangeLog @@ -1,8 +1,10 @@ +2023-02-18 Luigi Scarso <luigi.scarso@gmail.com> + * Fixed the "Invalid unicode ranges in CMap beginbfrange operator bug" as in pdfTeX (Hà n Thế Thà nh) + 2023-02-18 Luigi Scarso <luigi.scarso@gmail.com> * new "late special" feature by Phelype Oleinik, heavily adapted by H.Hagen. - 2023-02-05 Luigi Scarso <luigi.scarso@gmail.com> * Fix typos (thanks to J. Friedrich) diff --git a/source/texk/web2c/luatexdir/font/tounicode.c b/source/texk/web2c/luatexdir/font/tounicode.c index b1e08e7ce..68b6e01fd 100644 --- a/source/texk/web2c/luatexdir/font/tounicode.c +++ b/source/texk/web2c/luatexdir/font/tounicode.c @@ -305,6 +305,21 @@ static void set_cid_glyph_unicode(long index, glyph_unicode_entry * gp, internal } */ +static boolean is_last_byte_valid(int srcCode1, int srcCode2, long code) +{ + /*tex + Followin pdfTeX, when defining ranges of this type, the value of the last byte in the + string shall be less than or equal to 255 − (srcCode2 − srcCode1). This + ensures that the last byte of the string shall not be incremented past + 255; otherwise, the result of mapping is undefined. + */ + char *s = strend(utf16be_str(code)) - 2; + long l = strtol(s, NULL, 16); + return l < 255 - (srcCode2 - srcCode1); +} + + + static int do_write_tounicode(PDF pdf, char **glyph_names, char *name, internal_font_number f) { char buf[SMALL_BUF_SIZE], *p, *s; @@ -403,7 +418,8 @@ static int do_write_tounicode(PDF pdf, char **glyph_names, char *name, internal_ } else { /*tex |gtab[i].code >= 0| */ j = i; - while (i < 256 && gtab[i + 1].code >= 0 && gtab[i].code + 1 == gtab[i + 1].code) + while (i < 256 && gtab[i + 1].code >= 0 && gtab[i].code + 1 == gtab[i + 1].code && is_last_byte_valid(j, i, gtab[i].code) +) i++; /*tex At this point |i| is the last entry of the subrange so we move |i| to diff --git a/source/texk/web2c/luatexdir/luatex_svnversion.h b/source/texk/web2c/luatexdir/luatex_svnversion.h index a8e86fb40..d900e530e 100644 --- a/source/texk/web2c/luatexdir/luatex_svnversion.h +++ b/source/texk/web2c/luatexdir/luatex_svnversion.h @@ -1,4 +1,4 @@ #ifndef luatex_svn_revision_h #define luatex_svn_revision_h -#define luatex_svn_revision 7559 +#define luatex_svn_revision 7560 #endif -- GitLab