mirror of https://github.com/apache/lucene.git
Check `Terms#intersect` in CheckIndex. (#12925)
This commit adds coverage to `Terms#intersect` to `CheckIndex` and indexes `LineFileDocs` in `BasePostingsFormatTestCase` to get some coverage with real-world data. With this change, `TestLucene90PostingsFormat` now exhibits #12895.
This commit is contained in:
parent
05b14e23b1
commit
e0f4321b40
|
@ -83,6 +83,11 @@ import org.apache.lucene.util.NamedThreadFactory;
|
|||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
|
||||
/**
|
||||
* Basic tool and API to check the health of an index and write a new segments file that removes
|
||||
|
@ -2298,6 +2303,33 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test Terms#intersect
|
||||
TermsEnum allTerms = terms.iterator();
|
||||
// An automaton that should match a good number of terms
|
||||
Automaton a =
|
||||
Operations.concatenate(
|
||||
Arrays.asList(
|
||||
Automata.makeAnyBinary(),
|
||||
Automata.makeCharRange('a', 'e'),
|
||||
Automata.makeAnyBinary()));
|
||||
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a);
|
||||
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
|
||||
TermsEnum filteredTerms = terms.intersect(ca, null);
|
||||
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) {
|
||||
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (Objects.equals(term, filteredTerm) == false) {
|
||||
throw new CheckIndexException(
|
||||
"Expected next filtered term: " + term + ", but got " + filteredTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (filteredTerm != null) {
|
||||
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.codecs.NormsProducer;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
|
@ -47,6 +48,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MultiTerms;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -1610,4 +1612,29 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Test realistic data, which is often better at uncovering real bugs. */
|
||||
@Nightly // this test takes a few seconds
|
||||
public void testLineFileDocs() throws IOException {
|
||||
// Use a FS dir and a non-randomized IWC to not slow down indexing
|
||||
try (Directory dir = newFSDirectory(createTempDir())) {
|
||||
try (LineFileDocs docs = new LineFileDocs(random());
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
|
||||
final int numDocs = atLeast(10_000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
// Only keep the body field, and don't index term vectors on it, we only care about
|
||||
// postings
|
||||
Document doc = docs.nextDoc();
|
||||
IndexableField body = doc.getField("body");
|
||||
assertNotNull(body);
|
||||
assertNotNull(body.stringValue());
|
||||
assertNotEquals(IndexOptions.NONE, body.fieldType().indexOptions());
|
||||
body = new TextField("body", body.stringValue(), Store.NO);
|
||||
w.addDocument(Collections.singletonList(body));
|
||||
}
|
||||
w.forceMerge(1);
|
||||
}
|
||||
TestUtil.checkIndex(dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue