diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java index 6841c80194a..4bdc057bd9e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java @@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; import org.jcodings.Encoding; import org.jcodings.EncodingDB; -import org.jcodings.specific.UTF8Encoding; +import org.jcodings.specific.NonStrictUTF8Encoding; import org.joni.Matcher; import org.joni.Option; import org.joni.Regex; @@ -312,7 +312,9 @@ public class RegexStringComparator extends ByteArrayComparable { * NOTE: Only the {@link Pattern} flags CASE_INSENSITIVE, DOTALL, and MULTILINE are supported. */ static class JoniRegexEngine implements Engine { - private Encoding encoding = UTF8Encoding.INSTANCE; + // When using UTF8Encoding, an infinite loop can occur if an invalid UTF8 is encountered. + // Use NonStrictUTF8Encoding instead of UTF8Encoding to avoid the issue. + private Encoding encoding = NonStrictUTF8Encoding.INSTANCE; private String regex; private Regex pattern;