mirror of https://github.com/apache/lucene.git
GITHUB#12451: Update TestStringsToAutomaton validation to work around GH#12458 (#12461)
This commit is contained in:
parent
20e97fbd00
commit
2b3b028734
|
@ -172,6 +172,9 @@ Bug Fixes
|
||||||
|
|
||||||
* GITHUB#12423: Respect timeouts in ExitableDirectoryReader when searching with byte[] vectors (Ben Trent).
|
* GITHUB#12423: Respect timeouts in ExitableDirectoryReader when searching with byte[] vectors (Ben Trent).
|
||||||
|
|
||||||
|
* GITHUB#12451: Change TestStringsToAutomaton validation to avoid automaton conversion bug discovered in GH#12458
|
||||||
|
(Greg Miller).
|
||||||
|
|
||||||
Other
|
Other
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
|
||||||
public class TestStringsToAutomaton extends LuceneTestCase {
|
public class TestStringsToAutomaton extends LuceneTestCase {
|
||||||
|
@ -141,11 +142,22 @@ public class TestStringsToAutomaton extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure every term produced by the automaton is expected
|
// Make sure every term produced by the automaton is expected
|
||||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
FiniteStringsIterator it = new FiniteStringsIterator(a);
|
||||||
FiniteStringsIterator it = new FiniteStringsIterator(c.automaton);
|
if (isBinary) {
|
||||||
for (IntsRef r = it.next(); r != null; r = it.next()) {
|
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
BytesRef t = Util.toBytesRef(r, scratch);
|
for (IntsRef r = it.next(); r != null; r = it.next()) {
|
||||||
assertTrue(expected.contains(t));
|
BytesRef t = Util.toBytesRef(r, scratch);
|
||||||
|
assertTrue(t + " unexpectedly produced by automaton", expected.contains(t));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Note that we validate against the original automaton, not the compiled one as the compiled
|
||||||
|
// automaton can incorrectly produce invalid/overlong utf8 terms (see: GH#12458). This means
|
||||||
|
// we need slightly different logic here since the automaton "speaks" code points and not
|
||||||
|
// utf8 bytes.
|
||||||
|
for (IntsRef r = it.next(); r != null; r = it.next()) {
|
||||||
|
BytesRef t = newBytesRef(UnicodeUtil.newString(r.ints, r.offset, r.length));
|
||||||
|
assertTrue(t + " unexpectedly produced by automaton", expected.contains(t));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue