From da1f954601c284574b53863de3bc96f1c46d91c8 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 20 Sep 2024 14:37:45 +0200 Subject: [PATCH] Improve testing of mismatched field numbers. (#13812) This improves testing of mismatched field numbers by - improving `AssertingDocValuesProducer` to detect mismatched field numbers, - introducing a `MismatchedCodecReader` to actually test mismatched field numbers on `DocValuesProducer` (a `MismatchedLeafReader` wrapping a `SlowCodecReaderWrapper` doesn't work since `SlowCodecReaderWrapper` implicitly resolves the correct `FieldInfo` object), - introducing an explicit test for mismatched field numbers for doc values, points, postings and knn vectors. These new tests uncovered a bug when merging sorted doc values, which would call the underlying doc values producer with the merged field info. Closes #13805 --- .../TestLucene90HnswVectorsFormat.java | 5 + .../TestLucene91HnswVectorsFormat.java | 5 + .../TestLucene92HnswVectorsFormat.java | 5 + .../lucene/codecs/DocValuesConsumer.java | 2 +- .../codecs/lucene90/Lucene90PointsReader.java | 6 +- .../codecs/lucene90/Lucene90PointsWriter.java | 2 +- .../asserting/AssertingDocValuesFormat.java | 17 +- .../index/BaseDocValuesFormatTestCase.java | 75 ++++++ .../index/BaseKnnVectorsFormatTestCase.java | 49 ++++ .../tests/index/BasePointsFormatTestCase.java | 78 +++++++ .../index/BasePostingsFormatTestCase.java | 40 ++++ .../tests/index/MismatchedCodecReader.java | 216 ++++++++++++++++++ .../tests/index/MismatchedLeafReader.java | 22 +- .../tests/index/MockRandomMergePolicy.java | 3 +- .../lucene/tests/util/LuceneTestCase.java | 5 +- 15 files changed, 502 insertions(+), 28 deletions(-) create mode 100644 lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java index 2c689d5c0e5..37e2745c247 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java @@ -83,4 +83,9 @@ public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase public void testMergingWithDifferentByteKnnFields() { // unimplemented } + + @Override + public void testMismatchedFields() throws Exception { + // requires byte support + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java index df79316db0a..7bf2d426eac 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java @@ -82,4 +82,9 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase public void testMergingWithDifferentByteKnnFields() { // unimplemented } + + @Override + public void testMismatchedFields() throws Exception { + // requires byte support + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java index 0e003dafc3b..192f70a6397 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java @@ -72,4 +72,9 @@ public class TestLucene92HnswVectorsFormat extends BaseKnnVectorsFormatTestCase public void testMergingWithDifferentByteKnnFields() { // unimplemented } + + @Override + public void testMismatchedFields() throws Exception { + // requires byte support + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index cbb906788e5..08c08ec5075 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -613,7 +613,7 @@ public abstract class DocValuesConsumer implements Closeable { if (docValuesProducer != null) { FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name); if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) { - values = docValuesProducer.getSorted(fieldInfo); + values = docValuesProducer.getSorted(readerFieldInfo); } } if (values == null) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java index d3f256cbf00..82910e23ab9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java @@ -33,9 +33,9 @@ import org.apache.lucene.util.bkd.BKDReader; /** Reads point values previously written with {@link Lucene90PointsWriter} */ public class Lucene90PointsReader extends PointsReader { - final IndexInput indexIn, dataIn; - final SegmentReadState readState; - final IntObjectHashMap readers = new IntObjectHashMap<>(); + private final IndexInput indexIn, dataIn; + private final SegmentReadState readState; + private final IntObjectHashMap readers = new IntObjectHashMap<>(); /** Sole constructor */ public Lucene90PointsReader(SegmentReadState readState) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java index e50d6a0fdb5..45a946e8ac4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java @@ -253,7 +253,7 @@ public class Lucene90PointsWriter extends PointsWriter { FieldInfos readerFieldInfos = mergeState.fieldInfos[i]; FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name); if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) { - PointValues aPointValues = reader90.readers.get(readerFieldInfo.number); + PointValues aPointValues = reader90.getValues(readerFieldInfo.name); if (aPointValues != null) { pointValues.add(aPointValues); docMaps.add(mergeState.docMaps[i]); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingDocValuesFormat.java index 619b5b02b80..046fd850304 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingDocValuesFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingDocValuesFormat.java @@ -27,6 +27,7 @@ import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; @@ -59,7 +60,8 @@ public class AssertingDocValuesFormat extends DocValuesFormat { assert state.fieldInfos.hasDocValues(); DocValuesProducer producer = in.fieldsProducer(state); assert producer != null; - return new AssertingDocValuesProducer(producer, state.segmentInfo.maxDoc(), false); + return new AssertingDocValuesProducer( + producer, state.fieldInfos, state.segmentInfo.maxDoc(), false); } static class AssertingDocValuesConsumer extends DocValuesConsumer { @@ -214,12 +216,15 @@ public class AssertingDocValuesFormat extends DocValuesFormat { static class AssertingDocValuesProducer extends DocValuesProducer { private final DocValuesProducer in; + private final FieldInfos fieldInfos; private final int maxDoc; private final boolean merging; private final Thread creationThread; - AssertingDocValuesProducer(DocValuesProducer in, int maxDoc, boolean merging) { + AssertingDocValuesProducer( + DocValuesProducer in, FieldInfos fieldInfos, int maxDoc, boolean merging) { this.in = in; + this.fieldInfos = fieldInfos; this.maxDoc = maxDoc; this.merging = merging; this.creationThread = Thread.currentThread(); @@ -229,6 +234,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { + assert fieldInfos.fieldInfo(field.name).number == field.number; if (merging) { AssertingCodec.assertThread("DocValuesProducer", creationThread); } @@ -240,6 +246,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { + assert fieldInfos.fieldInfo(field.name).number == field.number; if (merging) { AssertingCodec.assertThread("DocValuesProducer", creationThread); } @@ -251,6 +258,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { + assert fieldInfos.fieldInfo(field.name).number == field.number; if (merging) { AssertingCodec.assertThread("DocValuesProducer", creationThread); } @@ -262,6 +270,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + assert fieldInfos.fieldInfo(field.name).number == field.number; if (merging) { AssertingCodec.assertThread("DocValuesProducer", creationThread); } @@ -273,6 +282,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + assert fieldInfos.fieldInfo(field.name).number == field.number; if (merging) { AssertingCodec.assertThread("DocValuesProducer", creationThread); } @@ -284,6 +294,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { + assert fieldInfos.fieldInfo(field.name).number == field.number; assert field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE; DocValuesSkipper skipper = in.getSkipper(field); assert skipper != null; @@ -303,7 +314,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public DocValuesProducer getMergeInstance() { - return new AssertingDocValuesProducer(in.getMergeInstance(), maxDoc, true); + return new AssertingDocValuesProducer(in.getMergeInstance(), fieldInfos, maxDoc, true); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java index a312b42a910..9b99aeecba6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.PrintStream; import java.util.function.Supplier; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; @@ -31,22 +32,26 @@ import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.util.TestUtil; @@ -832,4 +837,74 @@ public abstract class BaseDocValuesFormatTestCase extends LegacyBaseDocValuesFor int docID(); } + + public void testMismatchedFields() throws Exception { + Directory dir1 = newDirectory(); + IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig()); + Document doc = new Document(); + doc.add(new BinaryDocValuesField("binary", new BytesRef("lucene"))); + doc.add(new NumericDocValuesField("numeric", 0L)); + doc.add(new SortedDocValuesField("sorted", new BytesRef("search"))); + doc.add(new SortedNumericDocValuesField("sorted_numeric", 1L)); + doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("engine"))); + w1.addDocument(doc); + + Directory dir2 = newDirectory(); + IndexWriter w2 = + new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler())); + w2.addDocument(doc); + w2.commit(); + + DirectoryReader reader = DirectoryReader.open(w1); + w1.close(); + w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random())); + reader.close(); + w2.forceMerge(1); + reader = DirectoryReader.open(w2); + w2.close(); + + LeafReader leafReader = getOnlyLeafReader(reader); + + BinaryDocValues bdv = leafReader.getBinaryDocValues("binary"); + assertNotNull(bdv); + assertEquals(0, bdv.nextDoc()); + assertEquals(new BytesRef("lucene"), bdv.binaryValue()); + assertEquals(1, bdv.nextDoc()); + assertEquals(new BytesRef("lucene"), bdv.binaryValue()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, bdv.nextDoc()); + + NumericDocValues ndv = leafReader.getNumericDocValues("numeric"); + assertNotNull(ndv); + assertEquals(0, ndv.nextDoc()); + assertEquals(0, ndv.longValue()); + assertEquals(1, ndv.nextDoc()); + assertEquals(0, ndv.longValue()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, ndv.nextDoc()); + + SortedDocValues sdv = leafReader.getSortedDocValues("sorted"); + assertNotNull(sdv); + assertEquals(0, sdv.nextDoc()); + assertEquals(new BytesRef("search"), sdv.lookupOrd(sdv.ordValue())); + assertEquals(1, sdv.nextDoc()); + assertEquals(new BytesRef("search"), sdv.lookupOrd(sdv.ordValue())); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, sdv.nextDoc()); + + SortedNumericDocValues sndv = leafReader.getSortedNumericDocValues("sorted_numeric"); + assertNotNull(sndv); + assertEquals(0, sndv.nextDoc()); + assertEquals(1, sndv.nextValue()); + assertEquals(1, sndv.nextDoc()); + assertEquals(1, sndv.nextValue()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, sndv.nextDoc()); + + SortedSetDocValues ssdv = leafReader.getSortedSetDocValues("sorted_set"); + assertNotNull(ssdv); + assertEquals(0, ssdv.nextDoc()); + assertEquals(new BytesRef("engine"), ssdv.lookupOrd(ssdv.nextOrd())); + assertEquals(1, ssdv.nextDoc()); + assertEquals(new BytesRef("engine"), ssdv.lookupOrd(ssdv.nextOrd())); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, ssdv.nextDoc()); + + IOUtils.close(reader, w2, dir1, dir2); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java index 4c9165e1a10..fd8259e2636 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java @@ -63,6 +63,7 @@ import org.apache.lucene.index.MergeTrigger; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.VectorEncoding; @@ -78,6 +79,7 @@ import org.apache.lucene.store.FSDirectory; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.VectorUtil; @@ -1810,4 +1812,51 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe } } } + + public void testMismatchedFields() throws Exception { + Directory dir1 = newDirectory(); + IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig()); + Document doc = new Document(); + doc.add(new KnnFloatVectorField("float", new float[] {1f})); + doc.add(new KnnByteVectorField("byte", new byte[] {42})); + w1.addDocument(doc); + + Directory dir2 = newDirectory(); + IndexWriter w2 = + new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler())); + w2.addDocument(doc); + w2.commit(); + + DirectoryReader reader = DirectoryReader.open(w1); + w1.close(); + w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random())); + reader.close(); + w2.forceMerge(1); + reader = DirectoryReader.open(w2); + w2.close(); + + LeafReader leafReader = getOnlyLeafReader(reader); + + ByteVectorValues byteVectors = leafReader.getByteVectorValues("byte"); + assertNotNull(byteVectors); + assertEquals(0, byteVectors.nextDoc()); + assertArrayEquals(new byte[] {42}, byteVectors.vectorValue()); + assertEquals(1, byteVectors.nextDoc()); + assertArrayEquals(new byte[] {42}, byteVectors.vectorValue()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, byteVectors.nextDoc()); + + FloatVectorValues floatVectors = leafReader.getFloatVectorValues("float"); + assertNotNull(floatVectors); + assertEquals(0, floatVectors.nextDoc()); + float[] vector = floatVectors.vectorValue(); + assertEquals(1, vector.length); + assertEquals(1f, vector[0], 0f); + assertEquals(1, floatVectors.nextDoc()); + vector = floatVectors.vectorValue(); + assertEquals(1, vector.length); + assertEquals(1f, vector[0], 0f); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, floatVectors.nextDoc()); + + IOUtils.close(reader, w2, dir1, dir2); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePointsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePointsFormatTestCase.java index 265e3f073be..a15dd07a79e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePointsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePointsFormatTestCase.java @@ -30,6 +30,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.CodecReader; @@ -46,6 +47,7 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; +import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.Term; import org.apache.lucene.internal.tests.ConcurrentMergeSchedulerAccess; import org.apache.lucene.internal.tests.TestSecrets; @@ -1408,4 +1410,80 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa } }; } + + public void testMismatchedFields() throws Exception { + Directory dir1 = newDirectory(); + IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig()); + Document doc = new Document(); + doc.add(new LongPoint("f", 1L)); + doc.add(new LongPoint("g", 42L, 43L)); + w1.addDocument(doc); + + Directory dir2 = newDirectory(); + IndexWriter w2 = + new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler())); + w2.addDocument(doc); + w2.commit(); + + DirectoryReader reader = DirectoryReader.open(w1); + w1.close(); + w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random())); + reader.close(); + w2.forceMerge(1); + reader = DirectoryReader.open(w2); + w2.close(); + + LeafReader leafReader = getOnlyLeafReader(reader); + assertEquals(2, leafReader.maxDoc()); + + PointValues fPoints = leafReader.getPointValues("f"); + assertEquals(2, fPoints.size()); + fPoints.intersect( + new IntersectVisitor() { + + int expectedDoc = 0; + + @Override + public void visit(int docID, byte[] packedValue) throws IOException { + assertEquals(LongPoint.pack(1L), new BytesRef(packedValue)); + assertEquals(expectedDoc++, docID); + } + + @Override + public void visit(int docID) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return Relation.CELL_CROSSES_QUERY; + } + }); + + PointValues gPoints = leafReader.getPointValues("g"); + assertEquals(2, fPoints.size()); + gPoints.intersect( + new IntersectVisitor() { + + int expectedDoc = 0; + + @Override + public void visit(int docID, byte[] packedValue) throws IOException { + assertEquals(LongPoint.pack(42L, 43L), new BytesRef(packedValue)); + assertEquals(expectedDoc++, docID); + } + + @Override + public void visit(int docID) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return Relation.CELL_CROSSES_QUERY; + } + }); + + IOUtils.close(reader, w2, dir1, dir2); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java index 8f8233ee680..8e0292b3f8d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java @@ -42,6 +42,7 @@ import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexOptions; @@ -54,6 +55,7 @@ import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -71,6 +73,7 @@ import org.apache.lucene.tests.util.LineFileDocs; import org.apache.lucene.tests.util.RamUsageTester; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -1728,4 +1731,41 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest TestUtil.checkIndex(dir); } } + + public void testMismatchedFields() throws Exception { + Directory dir1 = newDirectory(); + IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig()); + Document doc = new Document(); + doc.add(new StringField("f", "a", Store.NO)); + doc.add(new StringField("g", "b", Store.NO)); + w1.addDocument(doc); + + Directory dir2 = newDirectory(); + IndexWriter w2 = + new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler())); + w2.addDocument(doc); + w2.commit(); + + DirectoryReader reader = DirectoryReader.open(w1); + w1.close(); + w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random())); + reader.close(); + w2.forceMerge(1); + reader = DirectoryReader.open(w2); + w2.close(); + + LeafReader leafReader = getOnlyLeafReader(reader); + + TermsEnum te = leafReader.terms("f").iterator(); + assertEquals("a", te.next().utf8ToString()); + assertEquals(2, te.docFreq()); + assertNull(te.next()); + + te = leafReader.terms("g").iterator(); + assertEquals("b", te.next().utf8ToString()); + assertEquals(2, te.docFreq()); + assertNull(te.next()); + + IOUtils.close(reader, w2, dir1, dir2); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java new file mode 100644 index 00000000000..8c856aafcba --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.tests.index; + +import java.io.IOException; +import java.util.Objects; +import java.util.Random; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.NormsProducer; +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DocValuesSkipper; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FilterCodecReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.StoredFieldVisitor; + +/** + * Shuffles field numbers around to try to trip bugs where field numbers are assumed to always be + * consistent across segments. + */ +public class MismatchedCodecReader extends FilterCodecReader { + + private final FieldInfos shuffled; + + /** Sole constructor. */ + public MismatchedCodecReader(CodecReader in, Random random) { + super(in); + shuffled = MismatchedLeafReader.shuffleInfos(in.getFieldInfos(), random); + } + + @Override + public FieldInfos getFieldInfos() { + return shuffled; + } + + @Override + public CacheHelper getCoreCacheHelper() { + return in.getCoreCacheHelper(); + } + + @Override + public CacheHelper getReaderCacheHelper() { + return in.getReaderCacheHelper(); + } + + @Override + public StoredFieldsReader getFieldsReader() { + StoredFieldsReader in = super.getFieldsReader(); + if (in == null) { + return null; + } + return new MismatchedStoredFieldsReader(in, shuffled); + } + + private static class MismatchedStoredFieldsReader extends StoredFieldsReader { + + private final StoredFieldsReader in; + private final FieldInfos shuffled; + + MismatchedStoredFieldsReader(StoredFieldsReader in, FieldInfos shuffled) { + this.in = Objects.requireNonNull(in); + this.shuffled = shuffled; + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public StoredFieldsReader clone() { + return new MismatchedStoredFieldsReader(in.clone(), shuffled); + } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } + + @Override + public void document(int docID, StoredFieldVisitor visitor) throws IOException { + in.document(docID, new MismatchedLeafReader.MismatchedVisitor(visitor, shuffled)); + } + } + + @Override + public DocValuesProducer getDocValuesReader() { + DocValuesProducer in = super.getDocValuesReader(); + if (in == null) { + return null; + } + return new MismatchedDocValuesProducer(in, shuffled, super.getFieldInfos()); + } + + private static class MismatchedDocValuesProducer extends DocValuesProducer { + + private final DocValuesProducer in; + private final FieldInfos shuffled; + private final FieldInfos orig; + + MismatchedDocValuesProducer(DocValuesProducer in, FieldInfos shuffled, FieldInfos orig) { + this.in = Objects.requireNonNull(in); + this.shuffled = shuffled; + this.orig = orig; + } + + @Override + public void close() throws IOException { + in.close(); + } + + private FieldInfo remapFieldInfo(FieldInfo field) { + FieldInfo fi = shuffled.fieldInfo(field.name); + assert fi != null && fi.number == field.number; + return orig.fieldInfo(field.name); + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + return in.getNumeric(remapFieldInfo(field)); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + return in.getBinary(remapFieldInfo(field)); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return in.getSorted(remapFieldInfo(field)); + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + return in.getSortedNumeric(remapFieldInfo(field)); + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + return in.getSortedSet(remapFieldInfo(field)); + } + + @Override + public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { + return in.getSkipper(remapFieldInfo(field)); + } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } + } + + @Override + public NormsProducer getNormsReader() { + NormsProducer in = super.getNormsReader(); + if (in == null) { + return null; + } + return new MismatchedNormsProducer(in, shuffled, super.getFieldInfos()); + } + + private static class MismatchedNormsProducer extends NormsProducer { + + private final NormsProducer in; + private final FieldInfos shuffled; + private final FieldInfos orig; + + MismatchedNormsProducer(NormsProducer in, FieldInfos shuffled, FieldInfos orig) { + this.in = Objects.requireNonNull(in); + this.shuffled = shuffled; + this.orig = orig; + } + + @Override + public void close() throws IOException { + in.close(); + } + + private FieldInfo remapFieldInfo(FieldInfo field) { + FieldInfo fi = shuffled.fieldInfo(field.name); + assert fi != null && fi.number == field.number; + return orig.fieldInfo(field.name); + } + + @Override + public NumericDocValues getNorms(FieldInfo field) throws IOException { + return in.getNorms(remapFieldInfo(field)); + } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java index eddee35240f..46404f514c6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java @@ -28,8 +28,6 @@ import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.StoredFields; -import org.apache.lucene.search.KnnCollector; -import org.apache.lucene.util.Bits; /** * Shuffles field numbers around to try to trip bugs where field numbers are assumed to always be @@ -55,7 +53,7 @@ public class MismatchedLeafReader extends FilterLeafReader { return new StoredFields() { @Override public void document(int docID, StoredFieldVisitor visitor) throws IOException { - inStoredFields.document(docID, new MismatchedVisitor(visitor)); + inStoredFields.document(docID, new MismatchedVisitor(visitor, shuffled)); } }; } @@ -70,18 +68,6 @@ public class MismatchedLeafReader extends FilterLeafReader { return in.getReaderCacheHelper(); } - @Override - public void searchNearestVectors( - String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - in.searchNearestVectors(field, target, knnCollector, acceptDocs); - } - - @Override - public void searchNearestVectors( - String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - in.searchNearestVectors(field, target, knnCollector, acceptDocs); - } - static FieldInfos shuffleInfos(FieldInfos infos, Random random) { // first, shuffle the order List shuffled = new ArrayList<>(); @@ -124,11 +110,13 @@ public class MismatchedLeafReader extends FilterLeafReader { /** StoredFieldsVisitor that remaps actual field numbers to our new shuffled ones. */ // TODO: its strange this part of our IR api exposes FieldInfo, // no other "user-accessible" codec apis do this? - class MismatchedVisitor extends StoredFieldVisitor { + static class MismatchedVisitor extends StoredFieldVisitor { final StoredFieldVisitor in; + final FieldInfos shuffled; - MismatchedVisitor(StoredFieldVisitor in) { + MismatchedVisitor(StoredFieldVisitor in, FieldInfos shuffled) { this.in = in; + this.shuffled = shuffled; } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java index 74f3b87ed5d..d3f202ad9dc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MockRandomMergePolicy.java @@ -237,8 +237,7 @@ public class MockRandomMergePolicy extends MergePolicy { "NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader=" + reader); } - return SlowCodecReaderWrapper.wrap( - new MismatchedLeafReader(new MergeReaderWrapper(reader), r)); + return new MismatchedCodecReader(reader, r); } else { // otherwise, reader is unchanged return reader; diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java index 26dd29e27b9..84fa120b88b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LuceneTestCase.java @@ -182,6 +182,7 @@ import org.apache.lucene.tests.index.AssertingLeafReader; import org.apache.lucene.tests.index.FieldFilterLeafReader; import org.apache.lucene.tests.index.MergingCodecReader; import org.apache.lucene.tests.index.MergingDirectoryReaderWrapper; +import org.apache.lucene.tests.index.MismatchedCodecReader; import org.apache.lucene.tests.index.MismatchedDirectoryReader; import org.apache.lucene.tests.index.MismatchedLeafReader; import org.apache.lucene.tests.index.MockIndexWriterEventListener; @@ -1746,12 +1747,14 @@ public abstract class LuceneTestCase extends Assert { System.out.println( "NOTE: LuceneTestCase.wrapReader: wrapping previous reader=" + r - + " with MismatchedLeaf/DirectoryReader"); + + " with MismatchedLeaf/Directory/CodecReader"); } if (r instanceof LeafReader) { r = new MismatchedLeafReader((LeafReader) r, random); } else if (r instanceof DirectoryReader) { r = new MismatchedDirectoryReader((DirectoryReader) r, random); + } else if (r instanceof CodecReader) { + r = new MismatchedCodecReader((CodecReader) r, random); } break; case 4: