Beef up `Terms#intersect` checks in `CheckIndex`. (#12926)

Now also testing what happens with a non-null `startTerm`. This found bugs in
`DirectPostingsFormat`.
This commit is contained in:
Adrien Grand 2023-12-19 11:17:38 +01:00 committed by GitHub
parent 5d6086e199
commit bf45ab79ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 25 deletions

View File

@ -994,7 +994,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
while (label > states[i].transitionMax) { while (label > states[i].transitionMax) {
states[i].transitionUpto++; states[i].transitionUpto++;
assert states[i].transitionUpto < states[i].transitionCount; if (states[i].transitionUpto >= states[i].transitionCount) {
// All transitions compare less than the required label
break;
}
transitionAccessor.getNextTransition(states[i].transition); transitionAccessor.getNextTransition(states[i].transition);
states[i].transitionMin = states[i].transition.min; states[i].transitionMin = states[i].transition.min;
states[i].transitionMax = states[i].transition.max; states[i].transitionMax = states[i].transition.max;
@ -1119,12 +1122,14 @@ public final class DirectPostingsFormat extends PostingsFormat {
} }
} }
final int termOffset = termOffsets[termOrd]; if (termOrd >= 0) {
final int termLen = termOffsets[1 + termOrd] - termOffset; final int termOffset = termOffsets[termOrd];
final int termLen = termOffsets[1 + termOrd] - termOffset;
if (termOrd >= 0 && !startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) { if (!startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
stateUpto -= skipUpto; stateUpto -= skipUpto;
termOrd--; termOrd--;
}
} }
// if (DEBUG) { // if (DEBUG) {
// System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" + // System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" +

View File

@ -2305,31 +2305,26 @@ public final class CheckIndex implements Closeable {
} }
// Test Terms#intersect // Test Terms#intersect
TermsEnum allTerms = terms.iterator();
// An automaton that should match a good number of terms // An automaton that should match a good number of terms
Automaton a = Automaton automaton =
Operations.concatenate( Operations.concatenate(
Arrays.asList( Arrays.asList(
Automata.makeAnyBinary(), Automata.makeAnyBinary(),
Automata.makeCharRange('a', 'e'), Automata.makeCharRange('a', 'e'),
Automata.makeAnyBinary())); Automata.makeAnyBinary()));
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); BytesRef startTerm = null;
CompiledAutomaton ca = new CompiledAutomaton(a); checkTermsIntersect(terms, automaton, startTerm);
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
TermsEnum filteredTerms = terms.intersect(ca, null); startTerm = new BytesRef();
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) { checkTermsIntersect(terms, automaton, startTerm);
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
BytesRef filteredTerm = filteredTerms.next(); automaton = Automata.makeAnyBinary();
if (Objects.equals(term, filteredTerm) == false) { startTerm = new BytesRef(new byte[] {'l'});
throw new CheckIndexException( checkTermsIntersect(terms, automaton, startTerm);
"Expected next filtered term: " + term + ", but got " + filteredTerm);
} // a term that likely compares greater than every other term in the dictionary
} startTerm = new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF});
} checkTermsIntersect(terms, automaton, startTerm);
BytesRef filteredTerm = filteredTerms.next();
if (filteredTerm != null) {
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
}
} }
} }
@ -2370,6 +2365,45 @@ public final class CheckIndex implements Closeable {
return status; return status;
} }
private static void checkTermsIntersect(Terms terms, Automaton automaton, BytesRef startTerm)
throws IOException {
TermsEnum allTerms = terms.iterator();
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton);
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(automaton);
TermsEnum filteredTerms = terms.intersect(compiledAutomaton, startTerm);
BytesRef term;
if (startTerm != null) {
switch (allTerms.seekCeil(startTerm)) {
case FOUND:
term = allTerms.next();
break;
case NOT_FOUND:
term = allTerms.term();
break;
case END:
default:
term = null;
break;
}
} else {
term = allTerms.next();
}
for (; term != null; term = allTerms.next()) {
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
BytesRef filteredTerm = filteredTerms.next();
if (Objects.equals(term, filteredTerm) == false) {
throw new CheckIndexException(
"Expected next filtered term: " + term + ", but got " + filteredTerm);
}
}
}
BytesRef filteredTerm = filteredTerms.next();
if (filteredTerm != null) {
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
}
}
/** /**
* For use in tests only. * For use in tests only.
* *