mirror of https://github.com/apache/lucene.git
GITHUB#12451: Update TestStringsToAutomaton validation to work around GH#12458 (#12461)
This commit is contained in:
parent
20e97fbd00
commit
2b3b028734
|
@ -172,6 +172,9 @@ Bug Fixes
|
|||
|
||||
* GITHUB#12423: Respect timeouts in ExitableDirectoryReader when searching with byte[] vectors (Ben Trent).
|
||||
|
||||
* GITHUB#12451: Change TestStringsToAutomaton validation to avoid automaton conversion bug discovered in GH#12458
|
||||
(Greg Miller).
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
public class TestStringsToAutomaton extends LuceneTestCase {
|
||||
|
@ -141,11 +142,22 @@ public class TestStringsToAutomaton extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// Make sure every term produced by the automaton is expected
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
FiniteStringsIterator it = new FiniteStringsIterator(c.automaton);
|
||||
for (IntsRef r = it.next(); r != null; r = it.next()) {
|
||||
BytesRef t = Util.toBytesRef(r, scratch);
|
||||
assertTrue(expected.contains(t));
|
||||
FiniteStringsIterator it = new FiniteStringsIterator(a);
|
||||
if (isBinary) {
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
for (IntsRef r = it.next(); r != null; r = it.next()) {
|
||||
BytesRef t = Util.toBytesRef(r, scratch);
|
||||
assertTrue(t + " unexpectedly produced by automaton", expected.contains(t));
|
||||
}
|
||||
} else {
|
||||
// Note that we validate against the original automaton, not the compiled one as the compiled
|
||||
// automaton can incorrectly produce invalid/overlong utf8 terms (see: GH#12458). This means
|
||||
// we need slightly different logic here since the automaton "speaks" code points and not
|
||||
// utf8 bytes.
|
||||
for (IntsRef r = it.next(); r != null; r = it.next()) {
|
||||
BytesRef t = newBytesRef(UnicodeUtil.newString(r.ints, r.offset, r.length));
|
||||
assertTrue(t + " unexpectedly produced by automaton", expected.contains(t));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue