Improve backwards compatibility tests for sorted indexes. (#2276)

This commit also cleans up some old checks that only applied to pre-6.0 indices.
This commit is contained in:
Julie Tibshirani 2021-02-03 09:27:40 -08:00 committed by GitHub
parent 8a0c1f5a0e
commit 902ce0809d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 186 additions and 172 deletions

View File

@ -60,7 +60,6 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.IndexFormatTooOldException;
@ -93,11 +92,16 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -1057,21 +1061,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
TestUtil.checkIndex(dir); TestUtil.checkIndex(dir);
// true if this is a 4.0+ index
final boolean is40Index = FieldInfos.getMergedFieldInfos(reader).fieldInfo("content5") != null;
// true if this is a 4.2+ index
final boolean is42Index =
FieldInfos.getMergedFieldInfos(reader).fieldInfo("dvSortedSet") != null;
// true if this is a 4.9+ index
final boolean is49Index =
FieldInfos.getMergedFieldInfos(reader).fieldInfo("dvSortedNumeric") != null;
// true if this index has points (>= 6.0)
final boolean hasPoints =
FieldInfos.getMergedFieldInfos(reader).fieldInfo("intPoint1d") != null;
assert is40Index;
final Bits liveDocs = MultiBits.getLiveDocs(reader); final Bits liveDocs = MultiBits.getLiveDocs(reader);
assertNotNull(liveDocs);
for (int i = 0; i < 35; i++) { for (int i = 0; i < 35; i++) {
if (liveDocs.get(i)) { if (liveDocs.get(i)) {
@ -1079,8 +1070,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
List<IndexableField> fields = d.getFields(); List<IndexableField> fields = d.getFields();
boolean isProxDoc = d.getField("content3") == null; boolean isProxDoc = d.getField("content3") == null;
if (isProxDoc) { if (isProxDoc) {
final int numFields = is40Index ? 7 : 5; assertEquals(7, fields.size());
assertEquals(numFields, fields.size());
IndexableField f = d.getField("id"); IndexableField f = d.getField("id");
assertEquals("" + i, f.stringValue()); assertEquals("" + i, f.stringValue());
@ -1109,11 +1099,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
} }
} }
if (is40Index) {
// check docvalues fields // check docvalues fields
NumericDocValues dvByte = MultiDocValues.getNumericValues(reader, "dvByte"); NumericDocValues dvByte = MultiDocValues.getNumericValues(reader, "dvByte");
BinaryDocValues dvBytesDerefFixed = BinaryDocValues dvBytesDerefFixed = MultiDocValues.getBinaryValues(reader, "dvBytesDerefFixed");
MultiDocValues.getBinaryValues(reader, "dvBytesDerefFixed");
BinaryDocValues dvBytesDerefVar = MultiDocValues.getBinaryValues(reader, "dvBytesDerefVar"); BinaryDocValues dvBytesDerefVar = MultiDocValues.getBinaryValues(reader, "dvBytesDerefVar");
SortedDocValues dvBytesSortedFixed = SortedDocValues dvBytesSortedFixed =
MultiDocValues.getSortedValues(reader, "dvBytesSortedFixed"); MultiDocValues.getSortedValues(reader, "dvBytesSortedFixed");
@ -1128,14 +1116,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
NumericDocValues dvLong = MultiDocValues.getNumericValues(reader, "dvLong"); NumericDocValues dvLong = MultiDocValues.getNumericValues(reader, "dvLong");
NumericDocValues dvPacked = MultiDocValues.getNumericValues(reader, "dvPacked"); NumericDocValues dvPacked = MultiDocValues.getNumericValues(reader, "dvPacked");
NumericDocValues dvShort = MultiDocValues.getNumericValues(reader, "dvShort"); NumericDocValues dvShort = MultiDocValues.getNumericValues(reader, "dvShort");
SortedSetDocValues dvSortedSet = null;
if (is42Index) { SortedSetDocValues dvSortedSet = MultiDocValues.getSortedSetValues(reader, "dvSortedSet");
dvSortedSet = MultiDocValues.getSortedSetValues(reader, "dvSortedSet"); SortedNumericDocValues dvSortedNumeric =
} MultiDocValues.getSortedNumericValues(reader, "dvSortedNumeric");
SortedNumericDocValues dvSortedNumeric = null;
if (is49Index) {
dvSortedNumeric = MultiDocValues.getSortedNumericValues(reader, "dvSortedNumeric");
}
for (int i = 0; i < 35; i++) { for (int i = 0; i < 35; i++) {
int id = Integer.parseInt(reader.document(i).get("id")); int id = Integer.parseInt(reader.document(i).get("id"));
@ -1177,20 +1161,17 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertEquals(id, dvPacked.longValue()); assertEquals(id, dvPacked.longValue());
assertEquals(i, dvShort.nextDoc()); assertEquals(i, dvShort.nextDoc());
assertEquals(id, dvShort.longValue()); assertEquals(id, dvShort.longValue());
if (is42Index) {
assertEquals(i, dvSortedSet.nextDoc()); assertEquals(i, dvSortedSet.nextDoc());
long ord = dvSortedSet.nextOrd(); long ord = dvSortedSet.nextOrd();
assertEquals(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.nextOrd());
term = dvSortedSet.lookupOrd(ord); term = dvSortedSet.lookupOrd(ord);
assertEquals(expectedRef, term); assertEquals(expectedRef, term);
}
if (is49Index) {
assertEquals(i, dvSortedNumeric.nextDoc()); assertEquals(i, dvSortedNumeric.nextDoc());
assertEquals(1, dvSortedNumeric.docValueCount()); assertEquals(1, dvSortedNumeric.docValueCount());
assertEquals(id, dvSortedNumeric.nextValue()); assertEquals(id, dvSortedNumeric.nextValue());
} }
}
}
ScoreDoc[] hits = ScoreDoc[] hits =
searcher.search(new TermQuery(new Term(new String("content"), "aaa")), 1000).scoreDocs; searcher.search(new TermQuery(new Term(new String("content"), "aaa")), 1000).scoreDocs;
@ -1201,17 +1182,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
doTestHits(hits, 34, searcher.getIndexReader()); doTestHits(hits, 34, searcher.getIndexReader());
if (is40Index) { hits = searcher.search(new TermQuery(new Term(new String("content5"), "aaa")), 1000).scoreDocs;
hits =
searcher.search(new TermQuery(new Term(new String("content5"), "aaa")), 1000).scoreDocs;
doTestHits(hits, 34, searcher.getIndexReader()); doTestHits(hits, 34, searcher.getIndexReader());
hits = hits = searcher.search(new TermQuery(new Term(new String("content6"), "aaa")), 1000).scoreDocs;
searcher.search(new TermQuery(new Term(new String("content6"), "aaa")), 1000).scoreDocs;
doTestHits(hits, 34, searcher.getIndexReader()); doTestHits(hits, 34, searcher.getIndexReader());
}
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), 1000).scoreDocs; hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), 1000).scoreDocs;
assertEquals(34, hits.length); assertEquals(34, hits.length);
@ -1223,7 +1200,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), 1000).scoreDocs; hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), 1000).scoreDocs;
assertEquals(34, hits.length); assertEquals(34, hits.length);
if (hasPoints) {
doTestHits( doTestHits(
searcher.search(IntPoint.newRangeQuery("intPoint1d", 0, 34), 1000).scoreDocs, searcher.search(IntPoint.newRangeQuery("intPoint1d", 0, 34), 1000).scoreDocs,
34, 34,
@ -1273,22 +1249,18 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
byte[] bytes1 = new byte[4]; byte[] bytes1 = new byte[4];
byte[] bytes2 = new byte[] {0, 0, 0, (byte) 34}; byte[] bytes2 = new byte[] {0, 0, 0, (byte) 34};
doTestHits( doTestHits(
searcher.search(BinaryPoint.newRangeQuery("binaryPoint1d", bytes1, bytes2), 1000) searcher.search(BinaryPoint.newRangeQuery("binaryPoint1d", bytes1, bytes2), 1000).scoreDocs,
.scoreDocs,
34, 34,
searcher.getIndexReader()); searcher.getIndexReader());
byte[] bytes3 = new byte[] {0, 0, 0, (byte) 68}; byte[] bytes3 = new byte[] {0, 0, 0, (byte) 68};
doTestHits( doTestHits(
searcher.search( searcher.search(
BinaryPoint.newRangeQuery( BinaryPoint.newRangeQuery(
"binaryPoint2d", "binaryPoint2d", new byte[][] {bytes1, bytes1}, new byte[][] {bytes2, bytes3}),
new byte[][] {bytes1, bytes1},
new byte[][] {bytes2, bytes3}),
1000) 1000)
.scoreDocs, .scoreDocs,
34, 34,
searcher.getIndexReader()); searcher.getIndexReader());
}
reader.close(); reader.close();
} }
@ -1791,9 +1763,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
Path oldIndexDir = createTempDir("moreterms"); Path oldIndexDir = createTempDir("moreterms");
TestUtil.unzip(getDataInputStream(moreTermsIndex), oldIndexDir); TestUtil.unzip(getDataInputStream(moreTermsIndex), oldIndexDir);
Directory dir = newFSDirectory(oldIndexDir); Directory dir = newFSDirectory(oldIndexDir);
DirectoryReader reader = DirectoryReader.open(dir);
verifyUsesDefaultCodec(dir, moreTermsIndex); verifyUsesDefaultCodec(dir, moreTermsIndex);
// TODO: more tests
TestUtil.checkIndex(dir); TestUtil.checkIndex(dir);
searchExampleIndex(reader);
reader.close();
dir.close(); dir.close();
} }
@ -1978,22 +1954,60 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertNotNull("Sorted index index " + name + " not found", resource); assertNotNull("Sorted index index " + name + " not found", resource);
TestUtil.unzip(resource, path); TestUtil.unzip(resource, path);
// TODO: more tests
Directory dir = newFSDirectory(path); Directory dir = newFSDirectory(path);
DirectoryReader reader = DirectoryReader.open(dir); DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size()); assertEquals(1, reader.leaves().size());
Sort sort = reader.leaves().get(0).reader().getMetaData().getSort(); Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
assertNotNull(sort); assertNotNull(sort);
assertEquals("<long: \"dateDV\">!", sort.toString()); assertEquals("<long: \"dateDV\">!", sort.toString());
reader.close();
// this will confirm the docs really are sorted: // This will confirm the docs are really sorted
TestUtil.checkIndex(dir); TestUtil.checkIndex(dir);
searchExampleIndex(reader);
reader.close();
dir.close(); dir.close();
} }
} }
private void searchExampleIndex(DirectoryReader reader) throws IOException {
IndexSearcher searcher = newSearcher(reader);
TopDocs topDocs = searcher.search(new NormsFieldExistsQuery("titleTokenized"), 10);
assertEquals(50, topDocs.totalHits.value);
topDocs = searcher.search(new DocValuesFieldExistsQuery("titleDV"), 10);
assertEquals(50, topDocs.totalHits.value);
topDocs = searcher.search(new TermQuery(new Term("body", "ja")), 10);
assertTrue(topDocs.totalHits.value > 0);
topDocs =
searcher.search(
IntPoint.newRangeQuery("docid_int", 42, 44),
10,
new Sort(new SortField("docid_intDV", SortField.Type.INT)));
assertEquals(3, topDocs.totalHits.value);
assertEquals(3, topDocs.scoreDocs.length);
assertEquals(42, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
assertEquals(43, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
assertEquals(44, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
topDocs = searcher.search(new TermQuery(new Term("body", "the")), 5);
assertTrue(topDocs.totalHits.value > 0);
topDocs =
searcher.search(
new MatchAllDocsQuery(), 5, new Sort(new SortField("dateDV", SortField.Type.LONG)));
assertEquals(50, topDocs.totalHits.value);
assertEquals(5, topDocs.scoreDocs.length);
long firstDate = (Long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0];
long lastDate = (Long) ((FieldDoc) topDocs.scoreDocs[4]).fields[0];
assertTrue(firstDate <= lastDate);
}
static long getValue(BinaryDocValues bdv) throws IOException { static long getValue(BinaryDocValues bdv) throws IOException {
BytesRef term = bdv.binaryValue(); BytesRef term = bdv.binaryValue();
int idx = term.offset; int idx = term.offset;