fix TestAutomaton.testRandomFinite test bug that was trying to StringsToAutomaton.build a Collection of BytesRefs containing a too-massive (> 1000 UTF-8 bytes) term; corrected the exception message to make it clear the limit is in UTF-8 bytes, not java (UTF-16) characters

This commit is contained in:
Mike McCandless 2023-10-29 11:41:40 -04:00
parent a8c52e2e19
commit 11436a848c
3 changed files with 12 additions and 3 deletions

View File

@ -269,7 +269,7 @@ final class StringsToAutomaton {
throw new IllegalArgumentException(
"This builder doesn't allow terms that are larger than "
+ Automata.MAX_STRING_UNION_TERM_LENGTH
+ " characters, got "
+ " UTF-8 bytes, got "
+ current);
}
assert stateRegistry != null : "Automaton already built.";

View File

@ -790,9 +790,18 @@ public class TestAutomaton extends LuceneTestCase {
return null;
}
private static boolean hasMassiveTerm(Collection<BytesRef> terms) {
for (BytesRef term : terms) {
if (term.length > Automata.MAX_STRING_UNION_TERM_LENGTH) {
return true;
}
}
return false;
}
private Automaton unionTerms(Collection<BytesRef> terms) {
Automaton a;
if (random().nextBoolean()) {
if (random().nextBoolean() || hasMassiveTerm(terms)) {
if (VERBOSE) {
System.out.println("TEST: unionTerms: use union");
}

View File

@ -103,7 +103,7 @@ public class TestStringsToAutomaton extends LuceneTestCase {
.startsWith(
"This builder doesn't allow terms that are larger than "
+ Automata.MAX_STRING_UNION_TERM_LENGTH
+ " characters"));
+ " UTF-8 bytes"));
byte[] b1k = ArrayUtil.copyOfSubArray(b10k, 0, 1000);
build(Collections.singleton(new BytesRef(b1k)), false); // no exception