mirror of https://github.com/apache/lucene.git
LUCENE-4752: Preliminaries:
- move useful assert*Equals from TestDuelingCodecs to LuceneTestCase, - rename sort to wrap in SortingAtomicReader to better suggest that the returned reader is a view. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459037 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
05a1bb2cb9
commit
affb12e11b
|
@ -18,32 +18,18 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
* Compares one codec against another
|
||||
|
@ -157,646 +143,8 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
/**
|
||||
* checks the two indexes are equivalent
|
||||
*/
|
||||
public void testEquals() throws Exception {
|
||||
assertReaderStatistics(leftReader, rightReader);
|
||||
assertFields(MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true);
|
||||
assertNorms(leftReader, rightReader);
|
||||
assertStoredFields(leftReader, rightReader);
|
||||
assertTermVectors(leftReader, rightReader);
|
||||
assertDocValues(leftReader, rightReader);
|
||||
assertDeletedDocs(leftReader, rightReader);
|
||||
assertFieldInfos(leftReader, rightReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that reader-level statistics are the same
|
||||
*/
|
||||
public void assertReaderStatistics(IndexReader leftReader, IndexReader rightReader) throws Exception {
|
||||
// Somewhat redundant: we never delete docs
|
||||
assertEquals(info, leftReader.maxDoc(), rightReader.maxDoc());
|
||||
assertEquals(info, leftReader.numDocs(), rightReader.numDocs());
|
||||
assertEquals(info, leftReader.numDeletedDocs(), rightReader.numDeletedDocs());
|
||||
assertEquals(info, leftReader.hasDeletions(), rightReader.hasDeletions());
|
||||
}
|
||||
|
||||
/**
|
||||
* Fields api equivalency
|
||||
*/
|
||||
public void assertFields(Fields leftFields, Fields rightFields, boolean deep) throws Exception {
|
||||
// Fields could be null if there are no postings,
|
||||
// but then it must be null for both
|
||||
if (leftFields == null || rightFields == null) {
|
||||
assertNull(info, leftFields);
|
||||
assertNull(info, rightFields);
|
||||
return;
|
||||
}
|
||||
assertFieldStatistics(leftFields, rightFields);
|
||||
|
||||
Iterator<String> leftEnum = leftFields.iterator();
|
||||
Iterator<String> rightEnum = rightFields.iterator();
|
||||
|
||||
while (leftEnum.hasNext()) {
|
||||
String field = leftEnum.next();
|
||||
assertEquals(info, field, rightEnum.next());
|
||||
assertTerms(leftFields.terms(field), rightFields.terms(field), deep);
|
||||
}
|
||||
assertFalse(rightEnum.hasNext());
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that top-level statistics on Fields are the same
|
||||
*/
|
||||
public void assertFieldStatistics(Fields leftFields, Fields rightFields) throws Exception {
|
||||
if (leftFields.size() != -1 && rightFields.size() != -1) {
|
||||
assertEquals(info, leftFields.size(), rightFields.size());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Terms api equivalency
|
||||
*/
|
||||
public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
|
||||
if (leftTerms == null || rightTerms == null) {
|
||||
assertNull(info, leftTerms);
|
||||
assertNull(info, rightTerms);
|
||||
return;
|
||||
}
|
||||
assertTermsStatistics(leftTerms, rightTerms);
|
||||
assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets());
|
||||
assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions());
|
||||
assertEquals(leftTerms.hasPayloads(), rightTerms.hasPayloads());
|
||||
|
||||
TermsEnum leftTermsEnum = leftTerms.iterator(null);
|
||||
TermsEnum rightTermsEnum = rightTerms.iterator(null);
|
||||
assertTermsEnum(leftTermsEnum, rightTermsEnum, true);
|
||||
|
||||
assertTermsSeeking(leftTerms, rightTerms);
|
||||
|
||||
if (deep) {
|
||||
int numIntersections = atLeast(3);
|
||||
for (int i = 0; i < numIntersections; i++) {
|
||||
String re = AutomatonTestUtil.randomRegexp(random());
|
||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
|
||||
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
// TODO: test start term too
|
||||
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
|
||||
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
|
||||
assertTermsEnum(leftIntersection, rightIntersection, rarely());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
|
||||
TermsEnum leftEnum = null;
|
||||
TermsEnum rightEnum = null;
|
||||
|
||||
// just an upper bound
|
||||
int numTests = atLeast(20);
|
||||
Random random = random();
|
||||
|
||||
// collect this number of terms from the left side
|
||||
HashSet<BytesRef> tests = new HashSet<BytesRef>();
|
||||
int numPasses = 0;
|
||||
while (numPasses < 10 && tests.size() < numTests) {
|
||||
leftEnum = leftTerms.iterator(leftEnum);
|
||||
BytesRef term = null;
|
||||
while ((term = leftEnum.next()) != null) {
|
||||
int code = random.nextInt(10);
|
||||
if (code == 0) {
|
||||
// the term
|
||||
tests.add(BytesRef.deepCopyOf(term));
|
||||
} else if (code == 1) {
|
||||
// truncated subsequence of term
|
||||
term = BytesRef.deepCopyOf(term);
|
||||
if (term.length > 0) {
|
||||
// truncate it
|
||||
term.length = random.nextInt(term.length);
|
||||
}
|
||||
} else if (code == 2) {
|
||||
// term, but ensure a non-zero offset
|
||||
byte newbytes[] = new byte[term.length+5];
|
||||
System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
|
||||
tests.add(new BytesRef(newbytes, 5, term.length));
|
||||
} else if (code == 3) {
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
tests.add(new BytesRef()); // before the first term
|
||||
break;
|
||||
case 1:
|
||||
tests.add(new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF})); // past the last term
|
||||
break;
|
||||
case 2:
|
||||
tests.add(new BytesRef(_TestUtil.randomSimpleString(random()))); // random term
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
}
|
||||
numPasses++;
|
||||
}
|
||||
|
||||
rightEnum = rightTerms.iterator(rightEnum);
|
||||
|
||||
ArrayList<BytesRef> shuffledTests = new ArrayList<BytesRef>(tests);
|
||||
Collections.shuffle(shuffledTests, random);
|
||||
|
||||
for (BytesRef b : shuffledTests) {
|
||||
if (rarely()) {
|
||||
// reuse the enums
|
||||
leftEnum = leftTerms.iterator(leftEnum);
|
||||
rightEnum = rightTerms.iterator(rightEnum);
|
||||
}
|
||||
|
||||
final boolean useCache = random().nextBoolean();
|
||||
final boolean seekExact = random().nextBoolean();
|
||||
|
||||
if (seekExact) {
|
||||
assertEquals(info, leftEnum.seekExact(b, useCache), rightEnum.seekExact(b, useCache));
|
||||
} else {
|
||||
SeekStatus leftStatus = leftEnum.seekCeil(b, useCache);
|
||||
SeekStatus rightStatus = rightEnum.seekCeil(b, useCache);
|
||||
assertEquals(info, leftStatus, rightStatus);
|
||||
if (leftStatus != SeekStatus.END) {
|
||||
assertEquals(info, leftEnum.term(), rightEnum.term());
|
||||
assertTermStats(leftEnum, rightEnum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks collection-level statistics on Terms
|
||||
*/
|
||||
public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception {
|
||||
assert leftTerms.getComparator() == rightTerms.getComparator();
|
||||
if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) {
|
||||
assertEquals(info, leftTerms.getDocCount(), rightTerms.getDocCount());
|
||||
}
|
||||
if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) {
|
||||
assertEquals(info, leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
|
||||
}
|
||||
if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) {
|
||||
assertEquals(info, leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
|
||||
}
|
||||
if (leftTerms.size() != -1 && rightTerms.size() != -1) {
|
||||
assertEquals(info, leftTerms.size(), rightTerms.size());
|
||||
}
|
||||
public void testEquals() throws IOException {
|
||||
assertReaderEquals(info, leftReader, rightReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks the terms enum sequentially
|
||||
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
|
||||
*/
|
||||
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep) throws Exception {
|
||||
BytesRef term;
|
||||
Bits randomBits = new RandomBits(leftReader.maxDoc(), random().nextDouble(), random());
|
||||
DocsAndPositionsEnum leftPositions = null;
|
||||
DocsAndPositionsEnum rightPositions = null;
|
||||
DocsEnum leftDocs = null;
|
||||
DocsEnum rightDocs = null;
|
||||
|
||||
while ((term = leftTermsEnum.next()) != null) {
|
||||
assertEquals(info, term, rightTermsEnum.next());
|
||||
assertTermStats(leftTermsEnum, rightTermsEnum);
|
||||
if (deep) {
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
|
||||
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions));
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
|
||||
|
||||
// with freqs:
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs),
|
||||
true);
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs),
|
||||
true);
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
|
||||
// with freqs:
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs),
|
||||
true);
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs),
|
||||
true);
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
}
|
||||
}
|
||||
assertNull(info, rightTermsEnum.next());
|
||||
}
|
||||
|
||||
/**
|
||||
* checks term-level statistics
|
||||
*/
|
||||
public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws Exception {
|
||||
assertEquals(info, leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
|
||||
if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) {
|
||||
assertEquals(info, leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks docs + freqs + positions + payloads, sequentially
|
||||
*/
|
||||
public void assertDocsAndPositionsEnum(DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws Exception {
|
||||
if (leftDocs == null || rightDocs == null) {
|
||||
assertNull(leftDocs);
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
assertTrue(info, leftDocs.docID() == -1 || leftDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(info, rightDocs.docID() == -1 || rightDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
int docid;
|
||||
while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
int freq = leftDocs.freq();
|
||||
assertEquals(info, freq, rightDocs.freq());
|
||||
for (int i = 0; i < freq; i++) {
|
||||
assertEquals(info, leftDocs.nextPosition(), rightDocs.nextPosition());
|
||||
assertEquals(info, leftDocs.getPayload(), rightDocs.getPayload());
|
||||
assertEquals(info, leftDocs.startOffset(), rightDocs.startOffset());
|
||||
assertEquals(info, leftDocs.endOffset(), rightDocs.endOffset());
|
||||
}
|
||||
}
|
||||
assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
|
||||
}
|
||||
|
||||
/**
|
||||
* checks docs + freqs, sequentially
|
||||
*/
|
||||
public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
assertTrue(info, leftDocs.docID() == -1 || leftDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(info, rightDocs.docID() == -1 || rightDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
int docid;
|
||||
while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
if (hasFreqs) {
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
}
|
||||
}
|
||||
assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
|
||||
}
|
||||
|
||||
/**
|
||||
* checks advancing docs
|
||||
*/
|
||||
public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
int docid = -1;
|
||||
int averageGap = leftReader.maxDoc() / (1+docFreq);
|
||||
int skipInterval = 16;
|
||||
|
||||
while (true) {
|
||||
if (random().nextBoolean()) {
|
||||
// nextDoc()
|
||||
docid = leftDocs.nextDoc();
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
} else {
|
||||
// advance()
|
||||
int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
|
||||
docid = leftDocs.advance(skip);
|
||||
assertEquals(info, docid, rightDocs.advance(skip));
|
||||
}
|
||||
|
||||
if (docid == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return;
|
||||
}
|
||||
if (hasFreqs) {
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks advancing docs + positions
|
||||
*/
|
||||
public void assertPositionsSkipping(int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws Exception {
|
||||
if (leftDocs == null || rightDocs == null) {
|
||||
assertNull(leftDocs);
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
|
||||
int docid = -1;
|
||||
int averageGap = leftReader.maxDoc() / (1+docFreq);
|
||||
int skipInterval = 16;
|
||||
|
||||
while (true) {
|
||||
if (random().nextBoolean()) {
|
||||
// nextDoc()
|
||||
docid = leftDocs.nextDoc();
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
} else {
|
||||
// advance()
|
||||
int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
|
||||
docid = leftDocs.advance(skip);
|
||||
assertEquals(info, docid, rightDocs.advance(skip));
|
||||
}
|
||||
|
||||
if (docid == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return;
|
||||
}
|
||||
int freq = leftDocs.freq();
|
||||
assertEquals(info, freq, rightDocs.freq());
|
||||
for (int i = 0; i < freq; i++) {
|
||||
assertEquals(info, leftDocs.nextPosition(), rightDocs.nextPosition());
|
||||
assertEquals(info, leftDocs.getPayload(), rightDocs.getPayload());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that norms are the same across all fields
|
||||
*/
|
||||
public void assertNorms(IndexReader leftReader, IndexReader rightReader) throws Exception {
|
||||
Fields leftFields = MultiFields.getFields(leftReader);
|
||||
Fields rightFields = MultiFields.getFields(rightReader);
|
||||
// Fields could be null if there are no postings,
|
||||
// but then it must be null for both
|
||||
if (leftFields == null || rightFields == null) {
|
||||
assertNull(info, leftFields);
|
||||
assertNull(info, rightFields);
|
||||
return;
|
||||
}
|
||||
|
||||
for (String field : leftFields) {
|
||||
NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
|
||||
NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
|
||||
if (leftNorms != null && rightNorms != null) {
|
||||
assertDocValues(leftReader.maxDoc(), leftNorms, rightNorms);
|
||||
} else {
|
||||
assertNull(info, leftNorms);
|
||||
assertNull(info, rightNorms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that stored fields of all documents are the same
|
||||
*/
|
||||
public void assertStoredFields(IndexReader leftReader, IndexReader rightReader) throws Exception {
|
||||
assert leftReader.maxDoc() == rightReader.maxDoc();
|
||||
for (int i = 0; i < leftReader.maxDoc(); i++) {
|
||||
StoredDocument leftDoc = leftReader.document(i);
|
||||
StoredDocument rightDoc = rightReader.document(i);
|
||||
|
||||
// TODO: I think this is bogus because we don't document what the order should be
|
||||
// from these iterators, etc. I think the codec/IndexReader should be free to order this stuff
|
||||
// in whatever way it wants (e.g. maybe it packs related fields together or something)
|
||||
// To fix this, we sort the fields in both documents by name, but
|
||||
// we still assume that all instances with same name are in order:
|
||||
Comparator<StorableField> comp = new Comparator<StorableField>() {
|
||||
@Override
|
||||
public int compare(StorableField arg0, StorableField arg1) {
|
||||
return arg0.name().compareTo(arg1.name());
|
||||
}
|
||||
};
|
||||
Collections.sort(leftDoc.getFields(), comp);
|
||||
Collections.sort(rightDoc.getFields(), comp);
|
||||
|
||||
Iterator<StorableField> leftIterator = leftDoc.iterator();
|
||||
Iterator<StorableField> rightIterator = rightDoc.iterator();
|
||||
while (leftIterator.hasNext()) {
|
||||
assertTrue(info, rightIterator.hasNext());
|
||||
assertStoredField(leftIterator.next(), rightIterator.next());
|
||||
}
|
||||
assertFalse(info, rightIterator.hasNext());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that two stored fields are equivalent
|
||||
*/
|
||||
public void assertStoredField(StorableField leftField, StorableField rightField) {
|
||||
assertEquals(info, leftField.name(), rightField.name());
|
||||
assertEquals(info, leftField.binaryValue(), rightField.binaryValue());
|
||||
assertEquals(info, leftField.stringValue(), rightField.stringValue());
|
||||
assertEquals(info, leftField.numericValue(), rightField.numericValue());
|
||||
// TODO: should we check the FT at all?
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that term vectors across all fields are equivalent
|
||||
*/
|
||||
public void assertTermVectors(IndexReader leftReader, IndexReader rightReader) throws Exception {
|
||||
assert leftReader.maxDoc() == rightReader.maxDoc();
|
||||
for (int i = 0; i < leftReader.maxDoc(); i++) {
|
||||
Fields leftFields = leftReader.getTermVectors(i);
|
||||
Fields rightFields = rightReader.getTermVectors(i);
|
||||
assertFields(leftFields, rightFields, rarely());
|
||||
}
|
||||
}
|
||||
|
||||
private static Set<String> getDVFields(IndexReader reader) {
|
||||
Set<String> fields = new HashSet<String>();
|
||||
for(FieldInfo fi : MultiFields.getMergedFieldInfos(reader)) {
|
||||
if (fi.hasDocValues()) {
|
||||
fields.add(fi.name);
|
||||
}
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that docvalues across all fields are equivalent
|
||||
*/
|
||||
public void assertDocValues(IndexReader leftReader, IndexReader rightReader) throws Exception {
|
||||
Set<String> leftFields = getDVFields(leftReader);
|
||||
Set<String> rightFields = getDVFields(rightReader);
|
||||
assertEquals(info, leftFields, rightFields);
|
||||
|
||||
for (String field : leftFields) {
|
||||
// TODO: clean this up... very messy
|
||||
{
|
||||
NumericDocValues leftValues = MultiDocValues.getNumericValues(leftReader, field);
|
||||
NumericDocValues rightValues = MultiDocValues.getNumericValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
assertDocValues(leftReader.maxDoc(), leftValues, rightValues);
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
BinaryDocValues leftValues = MultiDocValues.getBinaryValues(leftReader, field);
|
||||
BinaryDocValues rightValues = MultiDocValues.getBinaryValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
BytesRef scratchLeft = new BytesRef();
|
||||
BytesRef scratchRight = new BytesRef();
|
||||
for(int docID=0;docID<leftReader.maxDoc();docID++) {
|
||||
leftValues.get(docID, scratchLeft);
|
||||
rightValues.get(docID, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
SortedDocValues leftValues = MultiDocValues.getSortedValues(leftReader, field);
|
||||
SortedDocValues rightValues = MultiDocValues.getSortedValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
// numOrds
|
||||
assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount());
|
||||
// ords
|
||||
BytesRef scratchLeft = new BytesRef();
|
||||
BytesRef scratchRight = new BytesRef();
|
||||
for (int i = 0; i < leftValues.getValueCount(); i++) {
|
||||
leftValues.lookupOrd(i, scratchLeft);
|
||||
rightValues.lookupOrd(i, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
// bytes
|
||||
for(int docID=0;docID<leftReader.maxDoc();docID++) {
|
||||
leftValues.get(docID, scratchLeft);
|
||||
rightValues.get(docID, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
SortedSetDocValues leftValues = MultiDocValues.getSortedSetValues(leftReader, field);
|
||||
SortedSetDocValues rightValues = MultiDocValues.getSortedSetValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
// numOrds
|
||||
assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount());
|
||||
// ords
|
||||
BytesRef scratchLeft = new BytesRef();
|
||||
BytesRef scratchRight = new BytesRef();
|
||||
for (int i = 0; i < leftValues.getValueCount(); i++) {
|
||||
leftValues.lookupOrd(i, scratchLeft);
|
||||
rightValues.lookupOrd(i, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
// ord lists
|
||||
for(int docID=0;docID<leftReader.maxDoc();docID++) {
|
||||
leftValues.setDocument(docID);
|
||||
rightValues.setDocument(docID);
|
||||
long ord;
|
||||
while ((ord = leftValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
assertEquals(info, ord, rightValues.nextOrd());
|
||||
}
|
||||
assertEquals(info, SortedSetDocValues.NO_MORE_ORDS, rightValues.nextOrd());
|
||||
}
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void assertDocValues(int num, NumericDocValues leftDocValues, NumericDocValues rightDocValues) throws Exception {
|
||||
assertNotNull(info, leftDocValues);
|
||||
assertNotNull(info, rightDocValues);
|
||||
for(int docID=0;docID<num;docID++) {
|
||||
assertEquals(leftDocValues.get(docID),
|
||||
rightDocValues.get(docID));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this is kinda stupid, we don't delete documents in the test.
|
||||
public void assertDeletedDocs(IndexReader leftReader, IndexReader rightReader) throws Exception {
|
||||
assert leftReader.numDeletedDocs() == rightReader.numDeletedDocs();
|
||||
Bits leftBits = MultiFields.getLiveDocs(leftReader);
|
||||
Bits rightBits = MultiFields.getLiveDocs(rightReader);
|
||||
|
||||
if (leftBits == null || rightBits == null) {
|
||||
assertNull(info, leftBits);
|
||||
assertNull(info, rightBits);
|
||||
return;
|
||||
}
|
||||
|
||||
assert leftReader.maxDoc() == rightReader.maxDoc();
|
||||
assertEquals(info, leftBits.length(), rightBits.length());
|
||||
for (int i = 0; i < leftReader.maxDoc(); i++) {
|
||||
assertEquals(info, leftBits.get(i), rightBits.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
public void assertFieldInfos(IndexReader leftReader, IndexReader rightReader) throws Exception {
|
||||
FieldInfos leftInfos = MultiFields.getMergedFieldInfos(leftReader);
|
||||
FieldInfos rightInfos = MultiFields.getMergedFieldInfos(rightReader);
|
||||
|
||||
// TODO: would be great to verify more than just the names of the fields!
|
||||
TreeSet<String> left = new TreeSet<String>();
|
||||
TreeSet<String> right = new TreeSet<String>();
|
||||
|
||||
for (FieldInfo fi : leftInfos) {
|
||||
left.add(fi.name);
|
||||
}
|
||||
|
||||
for (FieldInfo fi : rightInfos) {
|
||||
right.add(fi.name);
|
||||
}
|
||||
|
||||
assertEquals(info, left, right);
|
||||
}
|
||||
|
||||
|
||||
private static class RandomBits implements Bits {
|
||||
FixedBitSet bits;
|
||||
|
||||
RandomBits(int maxDoc, double pctLive, Random random) {
|
||||
bits = new FixedBitSet(maxDoc);
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (random.nextDouble() <= pctLive) {
|
||||
bits.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return bits.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return bits.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -393,7 +393,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docIt >= upto ? NO_MORE_DOCS : docs[docIt];
|
||||
return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -605,7 +605,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
/** Return a sorted view of <code>reader</code> according to the order
|
||||
* defined by <code>sorter</code>. If the reader is already sorted, this
|
||||
* method might return the reader as-is. */
|
||||
public static AtomicReader sort(AtomicReader reader, Sorter sorter) throws IOException {
|
||||
public static AtomicReader wrap(AtomicReader reader, Sorter sorter) throws IOException {
|
||||
final Sorter.DocMap docMap = sorter.sort(reader);
|
||||
if (docMap == null) {
|
||||
// the reader is already sorter
|
||||
|
|
|
@ -61,7 +61,7 @@ public class IndexSortingTest extends SorterTestBase {
|
|||
|
||||
Directory target = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(target, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||
reader = SortingAtomicReader.sort(reader, sorter);
|
||||
reader = SortingAtomicReader.wrap(reader, sorter);
|
||||
writer.addIndexes(reader);
|
||||
writer.close();
|
||||
reader.close();
|
||||
|
|
|
@ -47,7 +47,6 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
|
|||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
|
|
@ -56,7 +56,7 @@ public class SortingAtomicReaderTest extends SorterTestBase {
|
|||
System.out.println("sortedValues: " + Arrays.toString(sortedValues));
|
||||
}
|
||||
|
||||
reader = SortingAtomicReader.sort(reader, new Sorter() {
|
||||
reader = SortingAtomicReader.wrap(reader, new Sorter() {
|
||||
@Override
|
||||
public Sorter.DocMap sort(AtomicReader reader) throws IOException {
|
||||
return docMap;
|
||||
|
@ -76,5 +76,5 @@ public class SortingAtomicReaderTest extends SorterTestBase {
|
|||
|
||||
_TestUtil.checkReader(reader);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.document.StringField;
|
|||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.FieldCache.CacheEntry;
|
||||
import org.apache.lucene.search.QueryUtils.FCInvisibleMultiReader;
|
||||
|
@ -41,6 +42,9 @@ import org.apache.lucene.store.*;
|
|||
import org.apache.lucene.store.IOContext.Context;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
|
||||
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
|
||||
import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.*;
|
||||
import org.junit.rules.RuleChain;
|
||||
import org.junit.rules.TestRule;
|
||||
|
@ -1311,4 +1315,648 @@ public abstract class LuceneTestCase extends Assert {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
assertReaderStatisticsEquals(info, leftReader, rightReader);
|
||||
assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true);
|
||||
assertNormsEquals(info, leftReader, rightReader);
|
||||
assertStoredFieldsEquals(info, leftReader, rightReader);
|
||||
assertTermVectorsEquals(info, leftReader, rightReader);
|
||||
assertDocValuesEquals(info, leftReader, rightReader);
|
||||
assertDeletedDocsEquals(info, leftReader, rightReader);
|
||||
assertFieldInfosEquals(info, leftReader, rightReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that reader-level statistics are the same
|
||||
*/
|
||||
public void assertReaderStatisticsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
// Somewhat redundant: we never delete docs
|
||||
assertEquals(info, leftReader.maxDoc(), rightReader.maxDoc());
|
||||
assertEquals(info, leftReader.numDocs(), rightReader.numDocs());
|
||||
assertEquals(info, leftReader.numDeletedDocs(), rightReader.numDeletedDocs());
|
||||
assertEquals(info, leftReader.hasDeletions(), rightReader.hasDeletions());
|
||||
}
|
||||
|
||||
/**
|
||||
* Fields api equivalency
|
||||
*/
|
||||
public void assertFieldsEquals(String info, IndexReader leftReader, Fields leftFields, Fields rightFields, boolean deep) throws IOException {
|
||||
// Fields could be null if there are no postings,
|
||||
// but then it must be null for both
|
||||
if (leftFields == null || rightFields == null) {
|
||||
assertNull(info, leftFields);
|
||||
assertNull(info, rightFields);
|
||||
return;
|
||||
}
|
||||
assertFieldStatisticsEquals(info, leftFields, rightFields);
|
||||
|
||||
Iterator<String> leftEnum = leftFields.iterator();
|
||||
Iterator<String> rightEnum = rightFields.iterator();
|
||||
|
||||
while (leftEnum.hasNext()) {
|
||||
String field = leftEnum.next();
|
||||
assertEquals(info, field, rightEnum.next());
|
||||
assertTermsEquals(info, leftReader, leftFields.terms(field), rightFields.terms(field), deep);
|
||||
}
|
||||
assertFalse(rightEnum.hasNext());
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that top-level statistics on Fields are the same
|
||||
*/
|
||||
public void assertFieldStatisticsEquals(String info, Fields leftFields, Fields rightFields) throws IOException {
|
||||
if (leftFields.size() != -1 && rightFields.size() != -1) {
|
||||
assertEquals(info, leftFields.size(), rightFields.size());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Terms api equivalency
|
||||
*/
|
||||
public void assertTermsEquals(String info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, boolean deep) throws IOException {
|
||||
if (leftTerms == null || rightTerms == null) {
|
||||
assertNull(info, leftTerms);
|
||||
assertNull(info, rightTerms);
|
||||
return;
|
||||
}
|
||||
assertTermsStatisticsEquals(info, leftTerms, rightTerms);
|
||||
assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets());
|
||||
assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions());
|
||||
assertEquals(leftTerms.hasPayloads(), rightTerms.hasPayloads());
|
||||
|
||||
TermsEnum leftTermsEnum = leftTerms.iterator(null);
|
||||
TermsEnum rightTermsEnum = rightTerms.iterator(null);
|
||||
assertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true);
|
||||
|
||||
assertTermsSeekingEquals(info, leftTerms, rightTerms);
|
||||
|
||||
if (deep) {
|
||||
int numIntersections = atLeast(3);
|
||||
for (int i = 0; i < numIntersections; i++) {
|
||||
String re = AutomatonTestUtil.randomRegexp(random());
|
||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
|
||||
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
// TODO: test start term too
|
||||
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
|
||||
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
|
||||
assertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, rarely());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks collection-level statistics on Terms
|
||||
*/
|
||||
public void assertTermsStatisticsEquals(String info, Terms leftTerms, Terms rightTerms) throws IOException {
|
||||
assert leftTerms.getComparator() == rightTerms.getComparator();
|
||||
if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) {
|
||||
assertEquals(info, leftTerms.getDocCount(), rightTerms.getDocCount());
|
||||
}
|
||||
if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) {
|
||||
assertEquals(info, leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq());
|
||||
}
|
||||
if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) {
|
||||
assertEquals(info, leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq());
|
||||
}
|
||||
if (leftTerms.size() != -1 && rightTerms.size() != -1) {
|
||||
assertEquals(info, leftTerms.size(), rightTerms.size());
|
||||
}
|
||||
}
|
||||
|
||||
private static class RandomBits implements Bits {
|
||||
FixedBitSet bits;
|
||||
|
||||
RandomBits(int maxDoc, double pctLive, Random random) {
|
||||
bits = new FixedBitSet(maxDoc);
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (random.nextDouble() <= pctLive) {
|
||||
bits.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return bits.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return bits.length();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks the terms enum sequentially
|
||||
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
|
||||
*/
|
||||
public void assertTermsEnumEquals(String info, IndexReader leftReader, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep) throws IOException {
|
||||
BytesRef term;
|
||||
Bits randomBits = new RandomBits(leftReader.maxDoc(), random().nextDouble(), random());
|
||||
DocsAndPositionsEnum leftPositions = null;
|
||||
DocsAndPositionsEnum rightPositions = null;
|
||||
DocsEnum leftDocs = null;
|
||||
DocsEnum rightDocs = null;
|
||||
|
||||
while ((term = leftTermsEnum.next()) != null) {
|
||||
assertEquals(info, term, rightTermsEnum.next());
|
||||
assertTermStatsEquals(info, leftTermsEnum, rightTermsEnum);
|
||||
if (deep) {
|
||||
assertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions));
|
||||
assertDocsAndPositionsEnumEquals(info, leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
|
||||
|
||||
assertPositionsSkippingEquals(info, leftReader, leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions));
|
||||
assertPositionsSkippingEquals(info, leftReader, leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
|
||||
|
||||
// with freqs:
|
||||
assertDocsEnumEquals(info, leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs),
|
||||
true);
|
||||
assertDocsEnumEquals(info, leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs),
|
||||
true);
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsEnumEquals(info, leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
assertDocsEnumEquals(info, leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
|
||||
// with freqs:
|
||||
assertDocsSkippingEquals(info, leftReader, leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs),
|
||||
true);
|
||||
assertDocsSkippingEquals(info, leftReader, leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs),
|
||||
true);
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsSkippingEquals(info, leftReader, leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
assertDocsSkippingEquals(info, leftReader, leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE),
|
||||
false);
|
||||
}
|
||||
}
|
||||
assertNull(info, rightTermsEnum.next());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* checks docs + freqs + positions + payloads, sequentially
|
||||
*/
|
||||
public void assertDocsAndPositionsEnumEquals(String info, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws IOException {
|
||||
if (leftDocs == null || rightDocs == null) {
|
||||
assertNull(leftDocs);
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
assertTrue(info, leftDocs.docID() == -1 || leftDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(info, rightDocs.docID() == -1 || rightDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
int docid;
|
||||
while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
int freq = leftDocs.freq();
|
||||
assertEquals(info, freq, rightDocs.freq());
|
||||
for (int i = 0; i < freq; i++) {
|
||||
assertEquals(info, leftDocs.nextPosition(), rightDocs.nextPosition());
|
||||
assertEquals(info, leftDocs.getPayload(), rightDocs.getPayload());
|
||||
assertEquals(info, leftDocs.startOffset(), rightDocs.startOffset());
|
||||
assertEquals(info, leftDocs.endOffset(), rightDocs.endOffset());
|
||||
}
|
||||
}
|
||||
assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
|
||||
}
|
||||
|
||||
/**
|
||||
* checks docs + freqs, sequentially
|
||||
*/
|
||||
public void assertDocsEnumEquals(String info, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws IOException {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
assertTrue(info, leftDocs.docID() == -1 || leftDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(info, rightDocs.docID() == -1 || rightDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
int docid;
|
||||
while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
if (hasFreqs) {
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
}
|
||||
}
|
||||
assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
|
||||
}
|
||||
|
||||
/**
|
||||
* checks advancing docs
|
||||
*/
|
||||
public void assertDocsSkippingEquals(String info, IndexReader leftReader, int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws IOException {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
int docid = -1;
|
||||
int averageGap = leftReader.maxDoc() / (1+docFreq);
|
||||
int skipInterval = 16;
|
||||
|
||||
while (true) {
|
||||
if (random().nextBoolean()) {
|
||||
// nextDoc()
|
||||
docid = leftDocs.nextDoc();
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
} else {
|
||||
// advance()
|
||||
int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
|
||||
docid = leftDocs.advance(skip);
|
||||
assertEquals(info, docid, rightDocs.advance(skip));
|
||||
}
|
||||
|
||||
if (docid == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return;
|
||||
}
|
||||
if (hasFreqs) {
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks advancing docs + positions
|
||||
*/
|
||||
public void assertPositionsSkippingEquals(String info, IndexReader leftReader, int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws IOException {
|
||||
if (leftDocs == null || rightDocs == null) {
|
||||
assertNull(leftDocs);
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
|
||||
int docid = -1;
|
||||
int averageGap = leftReader.maxDoc() / (1+docFreq);
|
||||
int skipInterval = 16;
|
||||
|
||||
while (true) {
|
||||
if (random().nextBoolean()) {
|
||||
// nextDoc()
|
||||
docid = leftDocs.nextDoc();
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
} else {
|
||||
// advance()
|
||||
int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
|
||||
docid = leftDocs.advance(skip);
|
||||
assertEquals(info, docid, rightDocs.advance(skip));
|
||||
}
|
||||
|
||||
if (docid == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return;
|
||||
}
|
||||
int freq = leftDocs.freq();
|
||||
assertEquals(info, freq, rightDocs.freq());
|
||||
for (int i = 0; i < freq; i++) {
|
||||
assertEquals(info, leftDocs.nextPosition(), rightDocs.nextPosition());
|
||||
assertEquals(info, leftDocs.getPayload(), rightDocs.getPayload());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void assertTermsSeekingEquals(String info, Terms leftTerms, Terms rightTerms) throws IOException {
|
||||
TermsEnum leftEnum = null;
|
||||
TermsEnum rightEnum = null;
|
||||
|
||||
// just an upper bound
|
||||
int numTests = atLeast(20);
|
||||
Random random = random();
|
||||
|
||||
// collect this number of terms from the left side
|
||||
HashSet<BytesRef> tests = new HashSet<BytesRef>();
|
||||
int numPasses = 0;
|
||||
while (numPasses < 10 && tests.size() < numTests) {
|
||||
leftEnum = leftTerms.iterator(leftEnum);
|
||||
BytesRef term = null;
|
||||
while ((term = leftEnum.next()) != null) {
|
||||
int code = random.nextInt(10);
|
||||
if (code == 0) {
|
||||
// the term
|
||||
tests.add(BytesRef.deepCopyOf(term));
|
||||
} else if (code == 1) {
|
||||
// truncated subsequence of term
|
||||
term = BytesRef.deepCopyOf(term);
|
||||
if (term.length > 0) {
|
||||
// truncate it
|
||||
term.length = random.nextInt(term.length);
|
||||
}
|
||||
} else if (code == 2) {
|
||||
// term, but ensure a non-zero offset
|
||||
byte newbytes[] = new byte[term.length+5];
|
||||
System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
|
||||
tests.add(new BytesRef(newbytes, 5, term.length));
|
||||
} else if (code == 3) {
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
tests.add(new BytesRef()); // before the first term
|
||||
break;
|
||||
case 1:
|
||||
tests.add(new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF})); // past the last term
|
||||
break;
|
||||
case 2:
|
||||
tests.add(new BytesRef(_TestUtil.randomSimpleString(random()))); // random term
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
}
|
||||
numPasses++;
|
||||
}
|
||||
|
||||
rightEnum = rightTerms.iterator(rightEnum);
|
||||
|
||||
ArrayList<BytesRef> shuffledTests = new ArrayList<BytesRef>(tests);
|
||||
Collections.shuffle(shuffledTests, random);
|
||||
|
||||
for (BytesRef b : shuffledTests) {
|
||||
if (rarely()) {
|
||||
// reuse the enums
|
||||
leftEnum = leftTerms.iterator(leftEnum);
|
||||
rightEnum = rightTerms.iterator(rightEnum);
|
||||
}
|
||||
|
||||
final boolean useCache = random().nextBoolean();
|
||||
final boolean seekExact = random().nextBoolean();
|
||||
|
||||
if (seekExact) {
|
||||
assertEquals(info, leftEnum.seekExact(b, useCache), rightEnum.seekExact(b, useCache));
|
||||
} else {
|
||||
SeekStatus leftStatus = leftEnum.seekCeil(b, useCache);
|
||||
SeekStatus rightStatus = rightEnum.seekCeil(b, useCache);
|
||||
assertEquals(info, leftStatus, rightStatus);
|
||||
if (leftStatus != SeekStatus.END) {
|
||||
assertEquals(info, leftEnum.term(), rightEnum.term());
|
||||
assertTermStatsEquals(info, leftEnum, rightEnum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks term-level statistics
|
||||
*/
|
||||
public void assertTermStatsEquals(String info, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws IOException {
|
||||
assertEquals(info, leftTermsEnum.docFreq(), rightTermsEnum.docFreq());
|
||||
if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) {
|
||||
assertEquals(info, leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that norms are the same across all fields
|
||||
*/
|
||||
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
Fields leftFields = MultiFields.getFields(leftReader);
|
||||
Fields rightFields = MultiFields.getFields(rightReader);
|
||||
// Fields could be null if there are no postings,
|
||||
// but then it must be null for both
|
||||
if (leftFields == null || rightFields == null) {
|
||||
assertNull(info, leftFields);
|
||||
assertNull(info, rightFields);
|
||||
return;
|
||||
}
|
||||
|
||||
for (String field : leftFields) {
|
||||
NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
|
||||
NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
|
||||
if (leftNorms != null && rightNorms != null) {
|
||||
assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
|
||||
} else {
|
||||
assertNull(info, leftNorms);
|
||||
assertNull(info, rightNorms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that stored fields of all documents are the same
|
||||
*/
|
||||
public void assertStoredFieldsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
assert leftReader.maxDoc() == rightReader.maxDoc();
|
||||
for (int i = 0; i < leftReader.maxDoc(); i++) {
|
||||
StoredDocument leftDoc = leftReader.document(i);
|
||||
StoredDocument rightDoc = rightReader.document(i);
|
||||
|
||||
// TODO: I think this is bogus because we don't document what the order should be
|
||||
// from these iterators, etc. I think the codec/IndexReader should be free to order this stuff
|
||||
// in whatever way it wants (e.g. maybe it packs related fields together or something)
|
||||
// To fix this, we sort the fields in both documents by name, but
|
||||
// we still assume that all instances with same name are in order:
|
||||
Comparator<StorableField> comp = new Comparator<StorableField>() {
|
||||
@Override
|
||||
public int compare(StorableField arg0, StorableField arg1) {
|
||||
return arg0.name().compareTo(arg1.name());
|
||||
}
|
||||
};
|
||||
Collections.sort(leftDoc.getFields(), comp);
|
||||
Collections.sort(rightDoc.getFields(), comp);
|
||||
|
||||
Iterator<StorableField> leftIterator = leftDoc.iterator();
|
||||
Iterator<StorableField> rightIterator = rightDoc.iterator();
|
||||
while (leftIterator.hasNext()) {
|
||||
assertTrue(info, rightIterator.hasNext());
|
||||
assertStoredFieldEquals(info, leftIterator.next(), rightIterator.next());
|
||||
}
|
||||
assertFalse(info, rightIterator.hasNext());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that two stored fields are equivalent
|
||||
*/
|
||||
public void assertStoredFieldEquals(String info, StorableField leftField, StorableField rightField) {
|
||||
assertEquals(info, leftField.name(), rightField.name());
|
||||
assertEquals(info, leftField.binaryValue(), rightField.binaryValue());
|
||||
assertEquals(info, leftField.stringValue(), rightField.stringValue());
|
||||
assertEquals(info, leftField.numericValue(), rightField.numericValue());
|
||||
// TODO: should we check the FT at all?
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that term vectors across all fields are equivalent
|
||||
*/
|
||||
public void assertTermVectorsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
assert leftReader.maxDoc() == rightReader.maxDoc();
|
||||
for (int i = 0; i < leftReader.maxDoc(); i++) {
|
||||
Fields leftFields = leftReader.getTermVectors(i);
|
||||
Fields rightFields = rightReader.getTermVectors(i);
|
||||
assertFieldsEquals(info, leftReader, leftFields, rightFields, rarely());
|
||||
}
|
||||
}
|
||||
|
||||
private static Set<String> getDVFields(IndexReader reader) {
|
||||
Set<String> fields = new HashSet<String>();
|
||||
for(FieldInfo fi : MultiFields.getMergedFieldInfos(reader)) {
|
||||
if (fi.hasDocValues()) {
|
||||
fields.add(fi.name);
|
||||
}
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that docvalues across all fields are equivalent
|
||||
*/
|
||||
public void assertDocValuesEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
Set<String> leftFields = getDVFields(leftReader);
|
||||
Set<String> rightFields = getDVFields(rightReader);
|
||||
assertEquals(info, leftFields, rightFields);
|
||||
|
||||
for (String field : leftFields) {
|
||||
// TODO: clean this up... very messy
|
||||
{
|
||||
NumericDocValues leftValues = MultiDocValues.getNumericValues(leftReader, field);
|
||||
NumericDocValues rightValues = MultiDocValues.getNumericValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
assertDocValuesEquals(info, leftReader.maxDoc(), leftValues, rightValues);
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
BinaryDocValues leftValues = MultiDocValues.getBinaryValues(leftReader, field);
|
||||
BinaryDocValues rightValues = MultiDocValues.getBinaryValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
BytesRef scratchLeft = new BytesRef();
|
||||
BytesRef scratchRight = new BytesRef();
|
||||
for(int docID=0;docID<leftReader.maxDoc();docID++) {
|
||||
leftValues.get(docID, scratchLeft);
|
||||
rightValues.get(docID, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
SortedDocValues leftValues = MultiDocValues.getSortedValues(leftReader, field);
|
||||
SortedDocValues rightValues = MultiDocValues.getSortedValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
// numOrds
|
||||
assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount());
|
||||
// ords
|
||||
BytesRef scratchLeft = new BytesRef();
|
||||
BytesRef scratchRight = new BytesRef();
|
||||
for (int i = 0; i < leftValues.getValueCount(); i++) {
|
||||
leftValues.lookupOrd(i, scratchLeft);
|
||||
rightValues.lookupOrd(i, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
// bytes
|
||||
for(int docID=0;docID<leftReader.maxDoc();docID++) {
|
||||
leftValues.get(docID, scratchLeft);
|
||||
rightValues.get(docID, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
SortedSetDocValues leftValues = MultiDocValues.getSortedSetValues(leftReader, field);
|
||||
SortedSetDocValues rightValues = MultiDocValues.getSortedSetValues(rightReader, field);
|
||||
if (leftValues != null && rightValues != null) {
|
||||
// numOrds
|
||||
assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount());
|
||||
// ords
|
||||
BytesRef scratchLeft = new BytesRef();
|
||||
BytesRef scratchRight = new BytesRef();
|
||||
for (int i = 0; i < leftValues.getValueCount(); i++) {
|
||||
leftValues.lookupOrd(i, scratchLeft);
|
||||
rightValues.lookupOrd(i, scratchRight);
|
||||
assertEquals(info, scratchLeft, scratchRight);
|
||||
}
|
||||
// ord lists
|
||||
for(int docID=0;docID<leftReader.maxDoc();docID++) {
|
||||
leftValues.setDocument(docID);
|
||||
rightValues.setDocument(docID);
|
||||
long ord;
|
||||
while ((ord = leftValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
assertEquals(info, ord, rightValues.nextOrd());
|
||||
}
|
||||
assertEquals(info, SortedSetDocValues.NO_MORE_ORDS, rightValues.nextOrd());
|
||||
}
|
||||
} else {
|
||||
assertNull(info, leftValues);
|
||||
assertNull(info, rightValues);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void assertDocValuesEquals(String info, int num, NumericDocValues leftDocValues, NumericDocValues rightDocValues) throws IOException {
|
||||
assertNotNull(info, leftDocValues);
|
||||
assertNotNull(info, rightDocValues);
|
||||
for(int docID=0;docID<num;docID++) {
|
||||
assertEquals(leftDocValues.get(docID),
|
||||
rightDocValues.get(docID));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this is kinda stupid, we don't delete documents in the test.
|
||||
public void assertDeletedDocsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
assert leftReader.numDeletedDocs() == rightReader.numDeletedDocs();
|
||||
Bits leftBits = MultiFields.getLiveDocs(leftReader);
|
||||
Bits rightBits = MultiFields.getLiveDocs(rightReader);
|
||||
|
||||
if (leftBits == null || rightBits == null) {
|
||||
assertNull(info, leftBits);
|
||||
assertNull(info, rightBits);
|
||||
return;
|
||||
}
|
||||
|
||||
assert leftReader.maxDoc() == rightReader.maxDoc();
|
||||
assertEquals(info, leftBits.length(), rightBits.length());
|
||||
for (int i = 0; i < leftReader.maxDoc(); i++) {
|
||||
assertEquals(info, leftBits.get(i), rightBits.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
public void assertFieldInfosEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
FieldInfos leftInfos = MultiFields.getMergedFieldInfos(leftReader);
|
||||
FieldInfos rightInfos = MultiFields.getMergedFieldInfos(rightReader);
|
||||
|
||||
// TODO: would be great to verify more than just the names of the fields!
|
||||
TreeSet<String> left = new TreeSet<String>();
|
||||
TreeSet<String> right = new TreeSet<String>();
|
||||
|
||||
for (FieldInfo fi : leftInfos) {
|
||||
left.add(fi.name);
|
||||
}
|
||||
|
||||
for (FieldInfo fi : rightInfos) {
|
||||
right.add(fi.name);
|
||||
}
|
||||
|
||||
assertEquals(info, left, right);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue