From ee56bcafb4b98e60323963a0447f4cb747d3f5c7 Mon Sep 17 00:00:00 2001 From: Minwoo Kang <10624086+mwkang@users.noreply.github.com> Date: Wed, 20 Jul 2022 01:10:38 +0900 Subject: [PATCH] HBASE-27219 Change JONI encoding in RegexStringComparator (#4632) Signed-off-by: Andrew Purtell --- .../apache/hadoop/hbase/filter/RegexStringComparator.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java index 6841c80194a..4bdc057bd9e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java @@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; import org.jcodings.Encoding; import org.jcodings.EncodingDB; -import org.jcodings.specific.UTF8Encoding; +import org.jcodings.specific.NonStrictUTF8Encoding; import org.joni.Matcher; import org.joni.Option; import org.joni.Regex; @@ -312,7 +312,9 @@ public class RegexStringComparator extends ByteArrayComparable { * NOTE: Only the {@link Pattern} flags CASE_INSENSITIVE, DOTALL, and MULTILINE are supported. */ static class JoniRegexEngine implements Engine { - private Encoding encoding = UTF8Encoding.INSTANCE; + // When using UTF8Encoding, an infinite loop can occur if an invalid UTF8 is encountered. + // Use NonStrictUTF8Encoding instead of UTF8Encoding to avoid the issue. + private Encoding encoding = NonStrictUTF8Encoding.INSTANCE; private String regex; private Regex pattern;