Beef up `Terms#intersect` checks in `CheckIndex`. (#12926)

Now also testing what happens with a non-null `startTerm`. This found bugs in
`DirectPostingsFormat`.
This commit is contained in:
Adrien Grand 2023-12-19 11:17:38 +01:00 committed by GitHub
parent 5d6086e199
commit bf45ab79ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 25 deletions

View File

@ -994,7 +994,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
while (label > states[i].transitionMax) {
states[i].transitionUpto++;
assert states[i].transitionUpto < states[i].transitionCount;
if (states[i].transitionUpto >= states[i].transitionCount) {
// All transitions compare less than the required label
break;
}
transitionAccessor.getNextTransition(states[i].transition);
states[i].transitionMin = states[i].transition.min;
states[i].transitionMax = states[i].transition.max;
@ -1119,12 +1122,14 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
}
final int termOffset = termOffsets[termOrd];
final int termLen = termOffsets[1 + termOrd] - termOffset;
if (termOrd >= 0) {
final int termOffset = termOffsets[termOrd];
final int termLen = termOffsets[1 + termOrd] - termOffset;
if (termOrd >= 0 && !startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
stateUpto -= skipUpto;
termOrd--;
if (!startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
stateUpto -= skipUpto;
termOrd--;
}
}
// if (DEBUG) {
// System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" +

View File

@ -2305,31 +2305,26 @@ public final class CheckIndex implements Closeable {
}
// Test Terms#intersect
TermsEnum allTerms = terms.iterator();
// An automaton that should match a good number of terms
Automaton a =
Automaton automaton =
Operations.concatenate(
Arrays.asList(
Automata.makeAnyBinary(),
Automata.makeCharRange('a', 'e'),
Automata.makeAnyBinary()));
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton ca = new CompiledAutomaton(a);
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
TermsEnum filteredTerms = terms.intersect(ca, null);
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) {
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
BytesRef filteredTerm = filteredTerms.next();
if (Objects.equals(term, filteredTerm) == false) {
throw new CheckIndexException(
"Expected next filtered term: " + term + ", but got " + filteredTerm);
}
}
}
BytesRef filteredTerm = filteredTerms.next();
if (filteredTerm != null) {
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
}
BytesRef startTerm = null;
checkTermsIntersect(terms, automaton, startTerm);
startTerm = new BytesRef();
checkTermsIntersect(terms, automaton, startTerm);
automaton = Automata.makeAnyBinary();
startTerm = new BytesRef(new byte[] {'l'});
checkTermsIntersect(terms, automaton, startTerm);
// a term that likely compares greater than every other term in the dictionary
startTerm = new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF});
checkTermsIntersect(terms, automaton, startTerm);
}
}
@ -2370,6 +2365,45 @@ public final class CheckIndex implements Closeable {
return status;
}
private static void checkTermsIntersect(Terms terms, Automaton automaton, BytesRef startTerm)
throws IOException {
TermsEnum allTerms = terms.iterator();
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton);
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(automaton);
TermsEnum filteredTerms = terms.intersect(compiledAutomaton, startTerm);
BytesRef term;
if (startTerm != null) {
switch (allTerms.seekCeil(startTerm)) {
case FOUND:
term = allTerms.next();
break;
case NOT_FOUND:
term = allTerms.term();
break;
case END:
default:
term = null;
break;
}
} else {
term = allTerms.next();
}
for (; term != null; term = allTerms.next()) {
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
BytesRef filteredTerm = filteredTerms.next();
if (Objects.equals(term, filteredTerm) == false) {
throw new CheckIndexException(
"Expected next filtered term: " + term + ", but got " + filteredTerm);
}
}
}
BytesRef filteredTerm = filteredTerms.next();
if (filteredTerm != null) {
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
}
}
/**
* For use in tests only.
*