mirror of https://github.com/apache/lucene.git
Beef up `Terms#intersect` checks in `CheckIndex`. (#12926)
Now also testing what happens with a non-null `startTerm`. This found bugs in `DirectPostingsFormat`.
This commit is contained in:
parent
5d6086e199
commit
bf45ab79ec
|
@ -994,7 +994,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
while (label > states[i].transitionMax) {
|
while (label > states[i].transitionMax) {
|
||||||
states[i].transitionUpto++;
|
states[i].transitionUpto++;
|
||||||
assert states[i].transitionUpto < states[i].transitionCount;
|
if (states[i].transitionUpto >= states[i].transitionCount) {
|
||||||
|
// All transitions compare less than the required label
|
||||||
|
break;
|
||||||
|
}
|
||||||
transitionAccessor.getNextTransition(states[i].transition);
|
transitionAccessor.getNextTransition(states[i].transition);
|
||||||
states[i].transitionMin = states[i].transition.min;
|
states[i].transitionMin = states[i].transition.min;
|
||||||
states[i].transitionMax = states[i].transition.max;
|
states[i].transitionMax = states[i].transition.max;
|
||||||
|
@ -1119,12 +1122,14 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final int termOffset = termOffsets[termOrd];
|
if (termOrd >= 0) {
|
||||||
final int termLen = termOffsets[1 + termOrd] - termOffset;
|
final int termOffset = termOffsets[termOrd];
|
||||||
|
final int termLen = termOffsets[1 + termOrd] - termOffset;
|
||||||
|
|
||||||
if (termOrd >= 0 && !startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
|
if (!startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
|
||||||
stateUpto -= skipUpto;
|
stateUpto -= skipUpto;
|
||||||
termOrd--;
|
termOrd--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" +
|
// System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" +
|
||||||
|
|
|
@ -2305,31 +2305,26 @@ public final class CheckIndex implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test Terms#intersect
|
// Test Terms#intersect
|
||||||
TermsEnum allTerms = terms.iterator();
|
|
||||||
// An automaton that should match a good number of terms
|
// An automaton that should match a good number of terms
|
||||||
Automaton a =
|
Automaton automaton =
|
||||||
Operations.concatenate(
|
Operations.concatenate(
|
||||||
Arrays.asList(
|
Arrays.asList(
|
||||||
Automata.makeAnyBinary(),
|
Automata.makeAnyBinary(),
|
||||||
Automata.makeCharRange('a', 'e'),
|
Automata.makeCharRange('a', 'e'),
|
||||||
Automata.makeAnyBinary()));
|
Automata.makeAnyBinary()));
|
||||||
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
BytesRef startTerm = null;
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(a);
|
checkTermsIntersect(terms, automaton, startTerm);
|
||||||
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
|
|
||||||
TermsEnum filteredTerms = terms.intersect(ca, null);
|
startTerm = new BytesRef();
|
||||||
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) {
|
checkTermsIntersect(terms, automaton, startTerm);
|
||||||
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
|
|
||||||
BytesRef filteredTerm = filteredTerms.next();
|
automaton = Automata.makeAnyBinary();
|
||||||
if (Objects.equals(term, filteredTerm) == false) {
|
startTerm = new BytesRef(new byte[] {'l'});
|
||||||
throw new CheckIndexException(
|
checkTermsIntersect(terms, automaton, startTerm);
|
||||||
"Expected next filtered term: " + term + ", but got " + filteredTerm);
|
|
||||||
}
|
// a term that likely compares greater than every other term in the dictionary
|
||||||
}
|
startTerm = new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF});
|
||||||
}
|
checkTermsIntersect(terms, automaton, startTerm);
|
||||||
BytesRef filteredTerm = filteredTerms.next();
|
|
||||||
if (filteredTerm != null) {
|
|
||||||
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2370,6 +2365,45 @@ public final class CheckIndex implements Closeable {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void checkTermsIntersect(Terms terms, Automaton automaton, BytesRef startTerm)
|
||||||
|
throws IOException {
|
||||||
|
TermsEnum allTerms = terms.iterator();
|
||||||
|
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||||
|
CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton);
|
||||||
|
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(automaton);
|
||||||
|
TermsEnum filteredTerms = terms.intersect(compiledAutomaton, startTerm);
|
||||||
|
BytesRef term;
|
||||||
|
if (startTerm != null) {
|
||||||
|
switch (allTerms.seekCeil(startTerm)) {
|
||||||
|
case FOUND:
|
||||||
|
term = allTerms.next();
|
||||||
|
break;
|
||||||
|
case NOT_FOUND:
|
||||||
|
term = allTerms.term();
|
||||||
|
break;
|
||||||
|
case END:
|
||||||
|
default:
|
||||||
|
term = null;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
term = allTerms.next();
|
||||||
|
}
|
||||||
|
for (; term != null; term = allTerms.next()) {
|
||||||
|
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
|
||||||
|
BytesRef filteredTerm = filteredTerms.next();
|
||||||
|
if (Objects.equals(term, filteredTerm) == false) {
|
||||||
|
throw new CheckIndexException(
|
||||||
|
"Expected next filtered term: " + term + ", but got " + filteredTerm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BytesRef filteredTerm = filteredTerms.next();
|
||||||
|
if (filteredTerm != null) {
|
||||||
|
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For use in tests only.
|
* For use in tests only.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue