LUCENE-9098 Use multibyte code-points for complex fuzzy query (#1194)

This commit is contained in:
Mike 2020-01-21 12:16:42 -06:00 committed by GitHub
parent 8894babd4a
commit ec6a9aab09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 14 additions and 4 deletions

View File

@ -499,21 +499,31 @@ public class TestFuzzyQuery extends LuceneTestCase {
assertTrue(expected.getMessage().contains("maxExpansions must be positive"));
}
private String randomRealisticMultiByteUnicode(int length) {
while (true) {
// There is 1 single-byte unicode block, and 194 multi-byte blocks
String value = RandomizedTest.randomRealisticUnicodeOfCodepointLength(length);
if (value.charAt(0) > Byte.MAX_VALUE) {
return value;
}
}
}
public void testErrorMessage() {
// 45 states per vector from Lev2TParametricDescription
int length = (Operations.DEFAULT_MAX_DETERMINIZED_STATES / 45) + 10;
final int length = (Operations.DEFAULT_MAX_DETERMINIZED_STATES / 45) + 10;
final String value = randomRealisticMultiByteUnicode(length);
String value = RandomizedTest.randomRealisticUnicodeOfCodepointLength(length);
FuzzyTermsEnum.FuzzyTermsException expected = expectThrows(FuzzyTermsEnum.FuzzyTermsException.class, () -> {
new FuzzyQuery(new Term("field", value)).getTermsEnum(new Terms() {
@Override
public TermsEnum iterator() {
throw new UnsupportedOperationException();
return TermsEnum.EMPTY;
}
@Override
public long size() {
throw new UnsupportedOperationException();
return 0;
}
@Override