Improve backwards compatibility tests for sorted indexes. (#2276)

This commit also cleans up some old checks that only applied to pre-6.0 indices.
This commit is contained in:
Julie Tibshirani 2021-02-03 09:27:40 -08:00 committed by GitHub
parent 8a0c1f5a0e
commit 902ce0809d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 186 additions and 172 deletions

View File

@ -60,7 +60,6 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.IndexFormatTooOldException;
@ -93,11 +92,16 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -1057,21 +1061,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
TestUtil.checkIndex(dir); TestUtil.checkIndex(dir);
// true if this is a 4.0+ index
final boolean is40Index = FieldInfos.getMergedFieldInfos(reader).fieldInfo("content5") != null;
// true if this is a 4.2+ index
final boolean is42Index =
FieldInfos.getMergedFieldInfos(reader).fieldInfo("dvSortedSet") != null;
// true if this is a 4.9+ index
final boolean is49Index =
FieldInfos.getMergedFieldInfos(reader).fieldInfo("dvSortedNumeric") != null;
// true if this index has points (>= 6.0)
final boolean hasPoints =
FieldInfos.getMergedFieldInfos(reader).fieldInfo("intPoint1d") != null;
assert is40Index;
final Bits liveDocs = MultiBits.getLiveDocs(reader); final Bits liveDocs = MultiBits.getLiveDocs(reader);
assertNotNull(liveDocs);
for (int i = 0; i < 35; i++) { for (int i = 0; i < 35; i++) {
if (liveDocs.get(i)) { if (liveDocs.get(i)) {
@ -1079,8 +1070,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
List<IndexableField> fields = d.getFields(); List<IndexableField> fields = d.getFields();
boolean isProxDoc = d.getField("content3") == null; boolean isProxDoc = d.getField("content3") == null;
if (isProxDoc) { if (isProxDoc) {
final int numFields = is40Index ? 7 : 5; assertEquals(7, fields.size());
assertEquals(numFields, fields.size());
IndexableField f = d.getField("id"); IndexableField f = d.getField("id");
assertEquals("" + i, f.stringValue()); assertEquals("" + i, f.stringValue());
@ -1109,87 +1099,78 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
} }
} }
if (is40Index) { // check docvalues fields
// check docvalues fields NumericDocValues dvByte = MultiDocValues.getNumericValues(reader, "dvByte");
NumericDocValues dvByte = MultiDocValues.getNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.getBinaryValues(reader, "dvBytesDerefFixed");
BinaryDocValues dvBytesDerefFixed = BinaryDocValues dvBytesDerefVar = MultiDocValues.getBinaryValues(reader, "dvBytesDerefVar");
MultiDocValues.getBinaryValues(reader, "dvBytesDerefFixed"); SortedDocValues dvBytesSortedFixed =
BinaryDocValues dvBytesDerefVar = MultiDocValues.getBinaryValues(reader, "dvBytesDerefVar"); MultiDocValues.getSortedValues(reader, "dvBytesSortedFixed");
SortedDocValues dvBytesSortedFixed = SortedDocValues dvBytesSortedVar = MultiDocValues.getSortedValues(reader, "dvBytesSortedVar");
MultiDocValues.getSortedValues(reader, "dvBytesSortedFixed"); BinaryDocValues dvBytesStraightFixed =
SortedDocValues dvBytesSortedVar = MultiDocValues.getSortedValues(reader, "dvBytesSortedVar"); MultiDocValues.getBinaryValues(reader, "dvBytesStraightFixed");
BinaryDocValues dvBytesStraightFixed = BinaryDocValues dvBytesStraightVar =
MultiDocValues.getBinaryValues(reader, "dvBytesStraightFixed"); MultiDocValues.getBinaryValues(reader, "dvBytesStraightVar");
BinaryDocValues dvBytesStraightVar = NumericDocValues dvDouble = MultiDocValues.getNumericValues(reader, "dvDouble");
MultiDocValues.getBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvFloat = MultiDocValues.getNumericValues(reader, "dvFloat");
NumericDocValues dvDouble = MultiDocValues.getNumericValues(reader, "dvDouble"); NumericDocValues dvInt = MultiDocValues.getNumericValues(reader, "dvInt");
NumericDocValues dvFloat = MultiDocValues.getNumericValues(reader, "dvFloat"); NumericDocValues dvLong = MultiDocValues.getNumericValues(reader, "dvLong");
NumericDocValues dvInt = MultiDocValues.getNumericValues(reader, "dvInt"); NumericDocValues dvPacked = MultiDocValues.getNumericValues(reader, "dvPacked");
NumericDocValues dvLong = MultiDocValues.getNumericValues(reader, "dvLong"); NumericDocValues dvShort = MultiDocValues.getNumericValues(reader, "dvShort");
NumericDocValues dvPacked = MultiDocValues.getNumericValues(reader, "dvPacked");
NumericDocValues dvShort = MultiDocValues.getNumericValues(reader, "dvShort");
SortedSetDocValues dvSortedSet = null;
if (is42Index) {
dvSortedSet = MultiDocValues.getSortedSetValues(reader, "dvSortedSet");
}
SortedNumericDocValues dvSortedNumeric = null;
if (is49Index) {
dvSortedNumeric = MultiDocValues.getSortedNumericValues(reader, "dvSortedNumeric");
}
for (int i = 0; i < 35; i++) { SortedSetDocValues dvSortedSet = MultiDocValues.getSortedSetValues(reader, "dvSortedSet");
int id = Integer.parseInt(reader.document(i).get("id")); SortedNumericDocValues dvSortedNumeric =
assertEquals(i, dvByte.nextDoc()); MultiDocValues.getSortedNumericValues(reader, "dvSortedNumeric");
assertEquals(id, dvByte.longValue());
byte bytes[] = for (int i = 0; i < 35; i++) {
new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id}; int id = Integer.parseInt(reader.document(i).get("id"));
BytesRef expectedRef = new BytesRef(bytes); assertEquals(i, dvByte.nextDoc());
assertEquals(id, dvByte.longValue());
assertEquals(i, dvBytesDerefFixed.nextDoc()); byte bytes[] =
BytesRef term = dvBytesDerefFixed.binaryValue(); new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id};
assertEquals(expectedRef, term); BytesRef expectedRef = new BytesRef(bytes);
assertEquals(i, dvBytesDerefVar.nextDoc());
term = dvBytesDerefVar.binaryValue();
assertEquals(expectedRef, term);
assertEquals(i, dvBytesSortedFixed.nextDoc());
term = dvBytesSortedFixed.binaryValue();
assertEquals(expectedRef, term);
assertEquals(i, dvBytesSortedVar.nextDoc());
term = dvBytesSortedVar.binaryValue();
assertEquals(expectedRef, term);
assertEquals(i, dvBytesStraightFixed.nextDoc());
term = dvBytesStraightFixed.binaryValue();
assertEquals(expectedRef, term);
assertEquals(i, dvBytesStraightVar.nextDoc());
term = dvBytesStraightVar.binaryValue();
assertEquals(expectedRef, term);
assertEquals(i, dvDouble.nextDoc()); assertEquals(i, dvBytesDerefFixed.nextDoc());
assertEquals((double) id, Double.longBitsToDouble(dvDouble.longValue()), 0D); BytesRef term = dvBytesDerefFixed.binaryValue();
assertEquals(i, dvFloat.nextDoc()); assertEquals(expectedRef, term);
assertEquals((float) id, Float.intBitsToFloat((int) dvFloat.longValue()), 0F); assertEquals(i, dvBytesDerefVar.nextDoc());
assertEquals(i, dvInt.nextDoc()); term = dvBytesDerefVar.binaryValue();
assertEquals(id, dvInt.longValue()); assertEquals(expectedRef, term);
assertEquals(i, dvLong.nextDoc()); assertEquals(i, dvBytesSortedFixed.nextDoc());
assertEquals(id, dvLong.longValue()); term = dvBytesSortedFixed.binaryValue();
assertEquals(i, dvPacked.nextDoc()); assertEquals(expectedRef, term);
assertEquals(id, dvPacked.longValue()); assertEquals(i, dvBytesSortedVar.nextDoc());
assertEquals(i, dvShort.nextDoc()); term = dvBytesSortedVar.binaryValue();
assertEquals(id, dvShort.longValue()); assertEquals(expectedRef, term);
if (is42Index) { assertEquals(i, dvBytesStraightFixed.nextDoc());
assertEquals(i, dvSortedSet.nextDoc()); term = dvBytesStraightFixed.binaryValue();
long ord = dvSortedSet.nextOrd(); assertEquals(expectedRef, term);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.nextOrd()); assertEquals(i, dvBytesStraightVar.nextDoc());
term = dvSortedSet.lookupOrd(ord); term = dvBytesStraightVar.binaryValue();
assertEquals(expectedRef, term); assertEquals(expectedRef, term);
}
if (is49Index) { assertEquals(i, dvDouble.nextDoc());
assertEquals(i, dvSortedNumeric.nextDoc()); assertEquals((double) id, Double.longBitsToDouble(dvDouble.longValue()), 0D);
assertEquals(1, dvSortedNumeric.docValueCount()); assertEquals(i, dvFloat.nextDoc());
assertEquals(id, dvSortedNumeric.nextValue()); assertEquals((float) id, Float.intBitsToFloat((int) dvFloat.longValue()), 0F);
} assertEquals(i, dvInt.nextDoc());
} assertEquals(id, dvInt.longValue());
assertEquals(i, dvLong.nextDoc());
assertEquals(id, dvLong.longValue());
assertEquals(i, dvPacked.nextDoc());
assertEquals(id, dvPacked.longValue());
assertEquals(i, dvShort.nextDoc());
assertEquals(id, dvShort.longValue());
assertEquals(i, dvSortedSet.nextDoc());
long ord = dvSortedSet.nextOrd();
assertEquals(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.nextOrd());
term = dvSortedSet.lookupOrd(ord);
assertEquals(expectedRef, term);
assertEquals(i, dvSortedNumeric.nextDoc());
assertEquals(1, dvSortedNumeric.docValueCount());
assertEquals(id, dvSortedNumeric.nextValue());
} }
ScoreDoc[] hits = ScoreDoc[] hits =
@ -1201,17 +1182,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
doTestHits(hits, 34, searcher.getIndexReader()); doTestHits(hits, 34, searcher.getIndexReader());
if (is40Index) { hits = searcher.search(new TermQuery(new Term(new String("content5"), "aaa")), 1000).scoreDocs;
hits =
searcher.search(new TermQuery(new Term(new String("content5"), "aaa")), 1000).scoreDocs;
doTestHits(hits, 34, searcher.getIndexReader()); doTestHits(hits, 34, searcher.getIndexReader());
hits = hits = searcher.search(new TermQuery(new Term(new String("content6"), "aaa")), 1000).scoreDocs;
searcher.search(new TermQuery(new Term(new String("content6"), "aaa")), 1000).scoreDocs;
doTestHits(hits, 34, searcher.getIndexReader()); doTestHits(hits, 34, searcher.getIndexReader());
}
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), 1000).scoreDocs; hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), 1000).scoreDocs;
assertEquals(34, hits.length); assertEquals(34, hits.length);
@ -1223,72 +1200,67 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), 1000).scoreDocs; hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), 1000).scoreDocs;
assertEquals(34, hits.length); assertEquals(34, hits.length);
if (hasPoints) { doTestHits(
doTestHits( searcher.search(IntPoint.newRangeQuery("intPoint1d", 0, 34), 1000).scoreDocs,
searcher.search(IntPoint.newRangeQuery("intPoint1d", 0, 34), 1000).scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); doTestHits(
doTestHits( searcher.search(
searcher.search( IntPoint.newRangeQuery("intPoint2d", new int[] {0, 0}, new int[] {34, 68}), 1000)
IntPoint.newRangeQuery("intPoint2d", new int[] {0, 0}, new int[] {34, 68}), 1000) .scoreDocs,
.scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); doTestHits(
doTestHits( searcher.search(FloatPoint.newRangeQuery("floatPoint1d", 0f, 34f), 1000).scoreDocs,
searcher.search(FloatPoint.newRangeQuery("floatPoint1d", 0f, 34f), 1000).scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); doTestHits(
doTestHits( searcher.search(
searcher.search( FloatPoint.newRangeQuery(
FloatPoint.newRangeQuery( "floatPoint2d", new float[] {0f, 0f}, new float[] {34f, 68f}),
"floatPoint2d", new float[] {0f, 0f}, new float[] {34f, 68f}), 1000)
1000) .scoreDocs,
.scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); doTestHits(
doTestHits( searcher.search(LongPoint.newRangeQuery("longPoint1d", 0, 34), 1000).scoreDocs,
searcher.search(LongPoint.newRangeQuery("longPoint1d", 0, 34), 1000).scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); doTestHits(
doTestHits( searcher.search(
searcher.search( LongPoint.newRangeQuery("longPoint2d", new long[] {0, 0}, new long[] {34, 68}),
LongPoint.newRangeQuery("longPoint2d", new long[] {0, 0}, new long[] {34, 68}), 1000)
1000) .scoreDocs,
.scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); doTestHits(
doTestHits( searcher.search(DoublePoint.newRangeQuery("doublePoint1d", 0.0, 34.0), 1000).scoreDocs,
searcher.search(DoublePoint.newRangeQuery("doublePoint1d", 0.0, 34.0), 1000).scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); doTestHits(
doTestHits( searcher.search(
searcher.search( DoublePoint.newRangeQuery(
DoublePoint.newRangeQuery( "doublePoint2d", new double[] {0.0, 0.0}, new double[] {34.0, 68.0}),
"doublePoint2d", new double[] {0.0, 0.0}, new double[] {34.0, 68.0}), 1000)
1000) .scoreDocs,
.scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader());
byte[] bytes1 = new byte[4]; byte[] bytes1 = new byte[4];
byte[] bytes2 = new byte[] {0, 0, 0, (byte) 34}; byte[] bytes2 = new byte[] {0, 0, 0, (byte) 34};
doTestHits( doTestHits(
searcher.search(BinaryPoint.newRangeQuery("binaryPoint1d", bytes1, bytes2), 1000) searcher.search(BinaryPoint.newRangeQuery("binaryPoint1d", bytes1, bytes2), 1000).scoreDocs,
.scoreDocs, 34,
34, searcher.getIndexReader());
searcher.getIndexReader()); byte[] bytes3 = new byte[] {0, 0, 0, (byte) 68};
byte[] bytes3 = new byte[] {0, 0, 0, (byte) 68}; doTestHits(
doTestHits( searcher.search(
searcher.search( BinaryPoint.newRangeQuery(
BinaryPoint.newRangeQuery( "binaryPoint2d", new byte[][] {bytes1, bytes1}, new byte[][] {bytes2, bytes3}),
"binaryPoint2d", 1000)
new byte[][] {bytes1, bytes1}, .scoreDocs,
new byte[][] {bytes2, bytes3}), 34,
1000) searcher.getIndexReader());
.scoreDocs,
34,
searcher.getIndexReader());
}
reader.close(); reader.close();
} }
@ -1791,9 +1763,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
Path oldIndexDir = createTempDir("moreterms"); Path oldIndexDir = createTempDir("moreterms");
TestUtil.unzip(getDataInputStream(moreTermsIndex), oldIndexDir); TestUtil.unzip(getDataInputStream(moreTermsIndex), oldIndexDir);
Directory dir = newFSDirectory(oldIndexDir); Directory dir = newFSDirectory(oldIndexDir);
DirectoryReader reader = DirectoryReader.open(dir);
verifyUsesDefaultCodec(dir, moreTermsIndex); verifyUsesDefaultCodec(dir, moreTermsIndex);
// TODO: more tests
TestUtil.checkIndex(dir); TestUtil.checkIndex(dir);
searchExampleIndex(reader);
reader.close();
dir.close(); dir.close();
} }
@ -1978,22 +1954,60 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertNotNull("Sorted index index " + name + " not found", resource); assertNotNull("Sorted index index " + name + " not found", resource);
TestUtil.unzip(resource, path); TestUtil.unzip(resource, path);
// TODO: more tests
Directory dir = newFSDirectory(path); Directory dir = newFSDirectory(path);
DirectoryReader reader = DirectoryReader.open(dir); DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size()); assertEquals(1, reader.leaves().size());
Sort sort = reader.leaves().get(0).reader().getMetaData().getSort(); Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
assertNotNull(sort); assertNotNull(sort);
assertEquals("<long: \"dateDV\">!", sort.toString()); assertEquals("<long: \"dateDV\">!", sort.toString());
reader.close();
// this will confirm the docs really are sorted: // This will confirm the docs are really sorted
TestUtil.checkIndex(dir); TestUtil.checkIndex(dir);
searchExampleIndex(reader);
reader.close();
dir.close(); dir.close();
} }
} }
private void searchExampleIndex(DirectoryReader reader) throws IOException {
IndexSearcher searcher = newSearcher(reader);
TopDocs topDocs = searcher.search(new NormsFieldExistsQuery("titleTokenized"), 10);
assertEquals(50, topDocs.totalHits.value);
topDocs = searcher.search(new DocValuesFieldExistsQuery("titleDV"), 10);
assertEquals(50, topDocs.totalHits.value);
topDocs = searcher.search(new TermQuery(new Term("body", "ja")), 10);
assertTrue(topDocs.totalHits.value > 0);
topDocs =
searcher.search(
IntPoint.newRangeQuery("docid_int", 42, 44),
10,
new Sort(new SortField("docid_intDV", SortField.Type.INT)));
assertEquals(3, topDocs.totalHits.value);
assertEquals(3, topDocs.scoreDocs.length);
assertEquals(42, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
assertEquals(43, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
assertEquals(44, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
topDocs = searcher.search(new TermQuery(new Term("body", "the")), 5);
assertTrue(topDocs.totalHits.value > 0);
topDocs =
searcher.search(
new MatchAllDocsQuery(), 5, new Sort(new SortField("dateDV", SortField.Type.LONG)));
assertEquals(50, topDocs.totalHits.value);
assertEquals(5, topDocs.scoreDocs.length);
long firstDate = (Long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0];
long lastDate = (Long) ((FieldDoc) topDocs.scoreDocs[4]).fields[0];
assertTrue(firstDate <= lastDate);
}
static long getValue(BinaryDocValues bdv) throws IOException { static long getValue(BinaryDocValues bdv) throws IOException {
BytesRef term = bdv.binaryValue(); BytesRef term = bdv.binaryValue();
int idx = term.offset; int idx = term.offset;