HBASE-27219 Change JONI encoding in RegexStringComparator (#4632)

Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
Minwoo Kang 2022-07-20 01:10:38 +09:00 committed by Andrew Purtell
parent 25c375e186
commit ee56bcafb4
1 changed files with 4 additions and 2 deletions

View File

@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.util.Bytes;
import org.apache.yetus.audience.InterfaceAudience;
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.specific.NonStrictUTF8Encoding;
import org.joni.Matcher;
import org.joni.Option;
import org.joni.Regex;
@ -312,7 +312,9 @@ public class RegexStringComparator extends ByteArrayComparable {
* NOTE: Only the {@link Pattern} flags CASE_INSENSITIVE, DOTALL, and MULTILINE are supported.
*/
static class JoniRegexEngine implements Engine {
private Encoding encoding = UTF8Encoding.INSTANCE;
// When using UTF8Encoding, an infinite loop can occur if an invalid UTF8 is encountered.
// Use NonStrictUTF8Encoding instead of UTF8Encoding to avoid the issue.
private Encoding encoding = NonStrictUTF8Encoding.INSTANCE;
private String regex;
private Regex pattern;