mirror of https://github.com/apache/lucene.git
Beef up `Terms#intersect` checks in `CheckIndex`. (#12926)
Now also testing what happens with a non-null `startTerm`. This found bugs in `DirectPostingsFormat`.
This commit is contained in:
parent
5d6086e199
commit
bf45ab79ec
|
@ -994,7 +994,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
|
||||
while (label > states[i].transitionMax) {
|
||||
states[i].transitionUpto++;
|
||||
assert states[i].transitionUpto < states[i].transitionCount;
|
||||
if (states[i].transitionUpto >= states[i].transitionCount) {
|
||||
// All transitions compare less than the required label
|
||||
break;
|
||||
}
|
||||
transitionAccessor.getNextTransition(states[i].transition);
|
||||
states[i].transitionMin = states[i].transition.min;
|
||||
states[i].transitionMax = states[i].transition.max;
|
||||
|
@ -1119,12 +1122,14 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
final int termOffset = termOffsets[termOrd];
|
||||
final int termLen = termOffsets[1 + termOrd] - termOffset;
|
||||
if (termOrd >= 0) {
|
||||
final int termOffset = termOffsets[termOrd];
|
||||
final int termLen = termOffsets[1 + termOrd] - termOffset;
|
||||
|
||||
if (termOrd >= 0 && !startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
|
||||
stateUpto -= skipUpto;
|
||||
termOrd--;
|
||||
if (!startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
|
||||
stateUpto -= skipUpto;
|
||||
termOrd--;
|
||||
}
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" +
|
||||
|
|
|
@ -2305,31 +2305,26 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
|
||||
// Test Terms#intersect
|
||||
TermsEnum allTerms = terms.iterator();
|
||||
// An automaton that should match a good number of terms
|
||||
Automaton a =
|
||||
Automaton automaton =
|
||||
Operations.concatenate(
|
||||
Arrays.asList(
|
||||
Automata.makeAnyBinary(),
|
||||
Automata.makeCharRange('a', 'e'),
|
||||
Automata.makeAnyBinary()));
|
||||
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a);
|
||||
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
|
||||
TermsEnum filteredTerms = terms.intersect(ca, null);
|
||||
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) {
|
||||
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (Objects.equals(term, filteredTerm) == false) {
|
||||
throw new CheckIndexException(
|
||||
"Expected next filtered term: " + term + ", but got " + filteredTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (filteredTerm != null) {
|
||||
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
|
||||
}
|
||||
BytesRef startTerm = null;
|
||||
checkTermsIntersect(terms, automaton, startTerm);
|
||||
|
||||
startTerm = new BytesRef();
|
||||
checkTermsIntersect(terms, automaton, startTerm);
|
||||
|
||||
automaton = Automata.makeAnyBinary();
|
||||
startTerm = new BytesRef(new byte[] {'l'});
|
||||
checkTermsIntersect(terms, automaton, startTerm);
|
||||
|
||||
// a term that likely compares greater than every other term in the dictionary
|
||||
startTerm = new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF});
|
||||
checkTermsIntersect(terms, automaton, startTerm);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2370,6 +2365,45 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
private static void checkTermsIntersect(Terms terms, Automaton automaton, BytesRef startTerm)
|
||||
throws IOException {
|
||||
TermsEnum allTerms = terms.iterator();
|
||||
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton);
|
||||
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(automaton);
|
||||
TermsEnum filteredTerms = terms.intersect(compiledAutomaton, startTerm);
|
||||
BytesRef term;
|
||||
if (startTerm != null) {
|
||||
switch (allTerms.seekCeil(startTerm)) {
|
||||
case FOUND:
|
||||
term = allTerms.next();
|
||||
break;
|
||||
case NOT_FOUND:
|
||||
term = allTerms.term();
|
||||
break;
|
||||
case END:
|
||||
default:
|
||||
term = null;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
term = allTerms.next();
|
||||
}
|
||||
for (; term != null; term = allTerms.next()) {
|
||||
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (Objects.equals(term, filteredTerm) == false) {
|
||||
throw new CheckIndexException(
|
||||
"Expected next filtered term: " + term + ", but got " + filteredTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (filteredTerm != null) {
|
||||
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For use in tests only.
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue