代码拉取完成,页面将自动刷新
From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 22 Nov 2023 10:22:59 +0000
Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT
---
src/pcre2_jit_compile.c | 6 +++---
testdata/testinput12 | 4 ++++
testdata/testoutput12-16 | 9 +++++++++
testdata/testoutput12-32 | 5 +++++
4 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 510c392..8d64e1c 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -8718,7 +8718,7 @@ c = *cc++;
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x110000)
- return NULL;
+ return cc;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
lgb = UCD_GRAPHBREAK(c);
@@ -8958,7 +8958,7 @@ switch(type)
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
- if (!common->utf || common->invalid_utf)
+ if (common->invalid_utf)
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
#endif
@@ -12044,7 +12044,7 @@ switch(opcode)
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
+ if (type == OP_EXTUNI || common->utf)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
detect_partial_match(common, &no_match);
diff --git a/testdata/testinput12 b/testdata/testinput12
index 5a2d8d2..a6678bb 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -569,4 +569,8 @@
/\x{802a0000}*/
\x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 9ac403e..f3b40a3 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1814,4 +1814,13 @@ No match
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
\x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+ 0: a\x00\x{ffff}
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 9396305..dd42f86 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1812,4 +1812,9 @@ No match
\x{802a0000}\x{802a0000}
0: \x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+ 0: a\x{110000}\x{ffffffff}
+
# End of testinput12
--
2.33.0
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。