Improve testing of mismatched field numbers. (#13812)

This improves testing of mismatched field numbers by
 - improving `AssertingDocValuesProducer` to detect mismatched field numbers,
 - introducing a `MismatchedCodecReader` to actually test mismatched field
   numbers on `DocValuesProducer` (a `MismatchedLeafReader` wrapping a
`SlowCodecReaderWrapper` doesn't work since `SlowCodecReaderWrapper` implicitly
resolves the correct `FieldInfo` object),
 - introducing an explicit test for mismatched field numbers for doc values, points,
postings and knn vectors.

These new tests uncovered a bug when merging sorted doc values, which would
call the underlying doc values producer with the merged field info.

Closes #13805
This commit is contained in:
Adrien Grand 2024-09-20 14:37:45 +02:00 committed by GitHub
parent 7ef7122eba
commit da1f954601
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 502 additions and 28 deletions

View File

@ -83,4 +83,9 @@ public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}
@Override
public void testMismatchedFields() throws Exception {
// requires byte support
}
}

View File

@ -82,4 +82,9 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}
@Override
public void testMismatchedFields() throws Exception {
// requires byte support
}
}

View File

@ -72,4 +72,9 @@ public class TestLucene92HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}
@Override
public void testMismatchedFields() throws Exception {
// requires byte support
}
}

View File

@ -613,7 +613,7 @@ public abstract class DocValuesConsumer implements Closeable {
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
values = docValuesProducer.getSorted(fieldInfo);
values = docValuesProducer.getSorted(readerFieldInfo);
}
}
if (values == null) {

View File

@ -33,9 +33,9 @@ import org.apache.lucene.util.bkd.BKDReader;
/** Reads point values previously written with {@link Lucene90PointsWriter} */
public class Lucene90PointsReader extends PointsReader {
final IndexInput indexIn, dataIn;
final SegmentReadState readState;
final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();
private final IndexInput indexIn, dataIn;
private final SegmentReadState readState;
private final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();
/** Sole constructor */
public Lucene90PointsReader(SegmentReadState readState) throws IOException {

View File

@ -253,7 +253,7 @@ public class Lucene90PointsWriter extends PointsWriter {
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
PointValues aPointValues = reader90.readers.get(readerFieldInfo.number);
PointValues aPointValues = reader90.getValues(readerFieldInfo.name);
if (aPointValues != null) {
pointValues.add(aPointValues);
docMaps.add(mergeState.docMaps[i]);

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@ -59,7 +60,8 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
assert state.fieldInfos.hasDocValues();
DocValuesProducer producer = in.fieldsProducer(state);
assert producer != null;
return new AssertingDocValuesProducer(producer, state.segmentInfo.maxDoc(), false);
return new AssertingDocValuesProducer(
producer, state.fieldInfos, state.segmentInfo.maxDoc(), false);
}
static class AssertingDocValuesConsumer extends DocValuesConsumer {
@ -214,12 +216,15 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
static class AssertingDocValuesProducer extends DocValuesProducer {
private final DocValuesProducer in;
private final FieldInfos fieldInfos;
private final int maxDoc;
private final boolean merging;
private final Thread creationThread;
AssertingDocValuesProducer(DocValuesProducer in, int maxDoc, boolean merging) {
AssertingDocValuesProducer(
DocValuesProducer in, FieldInfos fieldInfos, int maxDoc, boolean merging) {
this.in = in;
this.fieldInfos = fieldInfos;
this.maxDoc = maxDoc;
this.merging = merging;
this.creationThread = Thread.currentThread();
@ -229,6 +234,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
@ -240,6 +246,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
@ -251,6 +258,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
@ -262,6 +270,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
@ -273,6 +282,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
@ -284,6 +294,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
assert field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE;
DocValuesSkipper skipper = in.getSkipper(field);
assert skipper != null;
@ -303,7 +314,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public DocValuesProducer getMergeInstance() {
return new AssertingDocValuesProducer(in.getMergeInstance(), maxDoc, true);
return new AssertingDocValuesProducer(in.getMergeInstance(), fieldInfos, maxDoc, true);
}
@Override

View File

@ -24,6 +24,7 @@ import java.io.IOException;
import java.io.PrintStream;
import java.util.function.Supplier;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
@ -31,22 +32,26 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.util.TestUtil;
@ -832,4 +837,74 @@ public abstract class BaseDocValuesFormatTestCase extends LegacyBaseDocValuesFor
int docID();
}
public void testMismatchedFields() throws Exception {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("lucene")));
doc.add(new NumericDocValuesField("numeric", 0L));
doc.add(new SortedDocValuesField("sorted", new BytesRef("search")));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 1L));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("engine")));
w1.addDocument(doc);
Directory dir2 = newDirectory();
IndexWriter w2 =
new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()));
w2.addDocument(doc);
w2.commit();
DirectoryReader reader = DirectoryReader.open(w1);
w1.close();
w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random()));
reader.close();
w2.forceMerge(1);
reader = DirectoryReader.open(w2);
w2.close();
LeafReader leafReader = getOnlyLeafReader(reader);
BinaryDocValues bdv = leafReader.getBinaryDocValues("binary");
assertNotNull(bdv);
assertEquals(0, bdv.nextDoc());
assertEquals(new BytesRef("lucene"), bdv.binaryValue());
assertEquals(1, bdv.nextDoc());
assertEquals(new BytesRef("lucene"), bdv.binaryValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, bdv.nextDoc());
NumericDocValues ndv = leafReader.getNumericDocValues("numeric");
assertNotNull(ndv);
assertEquals(0, ndv.nextDoc());
assertEquals(0, ndv.longValue());
assertEquals(1, ndv.nextDoc());
assertEquals(0, ndv.longValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, ndv.nextDoc());
SortedDocValues sdv = leafReader.getSortedDocValues("sorted");
assertNotNull(sdv);
assertEquals(0, sdv.nextDoc());
assertEquals(new BytesRef("search"), sdv.lookupOrd(sdv.ordValue()));
assertEquals(1, sdv.nextDoc());
assertEquals(new BytesRef("search"), sdv.lookupOrd(sdv.ordValue()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sdv.nextDoc());
SortedNumericDocValues sndv = leafReader.getSortedNumericDocValues("sorted_numeric");
assertNotNull(sndv);
assertEquals(0, sndv.nextDoc());
assertEquals(1, sndv.nextValue());
assertEquals(1, sndv.nextDoc());
assertEquals(1, sndv.nextValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sndv.nextDoc());
SortedSetDocValues ssdv = leafReader.getSortedSetDocValues("sorted_set");
assertNotNull(ssdv);
assertEquals(0, ssdv.nextDoc());
assertEquals(new BytesRef("engine"), ssdv.lookupOrd(ssdv.nextOrd()));
assertEquals(1, ssdv.nextDoc());
assertEquals(new BytesRef("engine"), ssdv.lookupOrd(ssdv.nextOrd()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, ssdv.nextDoc());
IOUtils.close(reader, w2, dir1, dir2);
}
}

View File

@ -63,6 +63,7 @@ import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.VectorEncoding;
@ -78,6 +79,7 @@ import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.VectorUtil;
@ -1810,4 +1812,51 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
}
}
}
public void testMismatchedFields() throws Exception {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
Document doc = new Document();
doc.add(new KnnFloatVectorField("float", new float[] {1f}));
doc.add(new KnnByteVectorField("byte", new byte[] {42}));
w1.addDocument(doc);
Directory dir2 = newDirectory();
IndexWriter w2 =
new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()));
w2.addDocument(doc);
w2.commit();
DirectoryReader reader = DirectoryReader.open(w1);
w1.close();
w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random()));
reader.close();
w2.forceMerge(1);
reader = DirectoryReader.open(w2);
w2.close();
LeafReader leafReader = getOnlyLeafReader(reader);
ByteVectorValues byteVectors = leafReader.getByteVectorValues("byte");
assertNotNull(byteVectors);
assertEquals(0, byteVectors.nextDoc());
assertArrayEquals(new byte[] {42}, byteVectors.vectorValue());
assertEquals(1, byteVectors.nextDoc());
assertArrayEquals(new byte[] {42}, byteVectors.vectorValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, byteVectors.nextDoc());
FloatVectorValues floatVectors = leafReader.getFloatVectorValues("float");
assertNotNull(floatVectors);
assertEquals(0, floatVectors.nextDoc());
float[] vector = floatVectors.vectorValue();
assertEquals(1, vector.length);
assertEquals(1f, vector[0], 0f);
assertEquals(1, floatVectors.nextDoc());
vector = floatVectors.vectorValue();
assertEquals(1, vector.length);
assertEquals(1f, vector[0], 0f);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, floatVectors.nextDoc());
IOUtils.close(reader, w2, dir1, dir2);
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CodecReader;
@ -46,6 +47,7 @@ import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.internal.tests.ConcurrentMergeSchedulerAccess;
import org.apache.lucene.internal.tests.TestSecrets;
@ -1408,4 +1410,80 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
}
};
}
public void testMismatchedFields() throws Exception {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
Document doc = new Document();
doc.add(new LongPoint("f", 1L));
doc.add(new LongPoint("g", 42L, 43L));
w1.addDocument(doc);
Directory dir2 = newDirectory();
IndexWriter w2 =
new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()));
w2.addDocument(doc);
w2.commit();
DirectoryReader reader = DirectoryReader.open(w1);
w1.close();
w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random()));
reader.close();
w2.forceMerge(1);
reader = DirectoryReader.open(w2);
w2.close();
LeafReader leafReader = getOnlyLeafReader(reader);
assertEquals(2, leafReader.maxDoc());
PointValues fPoints = leafReader.getPointValues("f");
assertEquals(2, fPoints.size());
fPoints.intersect(
new IntersectVisitor() {
int expectedDoc = 0;
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
assertEquals(LongPoint.pack(1L), new BytesRef(packedValue));
assertEquals(expectedDoc++, docID);
}
@Override
public void visit(int docID) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
PointValues gPoints = leafReader.getPointValues("g");
assertEquals(2, fPoints.size());
gPoints.intersect(
new IntersectVisitor() {
int expectedDoc = 0;
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
assertEquals(LongPoint.pack(42L, 43L), new BytesRef(packedValue));
assertEquals(expectedDoc++, docID);
}
@Override
public void visit(int docID) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
IOUtils.close(reader, w2, dir1, dir2);
}
}

View File

@ -42,6 +42,7 @@ import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexOptions;
@ -54,6 +55,7 @@ import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -71,6 +73,7 @@ import org.apache.lucene.tests.util.LineFileDocs;
import org.apache.lucene.tests.util.RamUsageTester;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -1728,4 +1731,41 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
TestUtil.checkIndex(dir);
}
}
public void testMismatchedFields() throws Exception {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StringField("f", "a", Store.NO));
doc.add(new StringField("g", "b", Store.NO));
w1.addDocument(doc);
Directory dir2 = newDirectory();
IndexWriter w2 =
new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()));
w2.addDocument(doc);
w2.commit();
DirectoryReader reader = DirectoryReader.open(w1);
w1.close();
w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random()));
reader.close();
w2.forceMerge(1);
reader = DirectoryReader.open(w2);
w2.close();
LeafReader leafReader = getOnlyLeafReader(reader);
TermsEnum te = leafReader.terms("f").iterator();
assertEquals("a", te.next().utf8ToString());
assertEquals(2, te.docFreq());
assertNull(te.next());
te = leafReader.terms("g").iterator();
assertEquals("b", te.next().utf8ToString());
assertEquals(2, te.docFreq());
assertNull(te.next());
IOUtils.close(reader, w2, dir1, dir2);
}
}

View File

@ -0,0 +1,216 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.tests.index;
import java.io.IOException;
import java.util.Objects;
import java.util.Random;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterCodecReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
/**
* Shuffles field numbers around to try to trip bugs where field numbers are assumed to always be
* consistent across segments.
*/
public class MismatchedCodecReader extends FilterCodecReader {
private final FieldInfos shuffled;
/** Sole constructor. */
public MismatchedCodecReader(CodecReader in, Random random) {
super(in);
shuffled = MismatchedLeafReader.shuffleInfos(in.getFieldInfos(), random);
}
@Override
public FieldInfos getFieldInfos() {
return shuffled;
}
@Override
public CacheHelper getCoreCacheHelper() {
return in.getCoreCacheHelper();
}
@Override
public CacheHelper getReaderCacheHelper() {
return in.getReaderCacheHelper();
}
@Override
public StoredFieldsReader getFieldsReader() {
StoredFieldsReader in = super.getFieldsReader();
if (in == null) {
return null;
}
return new MismatchedStoredFieldsReader(in, shuffled);
}
private static class MismatchedStoredFieldsReader extends StoredFieldsReader {
private final StoredFieldsReader in;
private final FieldInfos shuffled;
MismatchedStoredFieldsReader(StoredFieldsReader in, FieldInfos shuffled) {
this.in = Objects.requireNonNull(in);
this.shuffled = shuffled;
}
@Override
public void close() throws IOException {
in.close();
}
@Override
public StoredFieldsReader clone() {
return new MismatchedStoredFieldsReader(in.clone(), shuffled);
}
@Override
public void checkIntegrity() throws IOException {
in.checkIntegrity();
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
in.document(docID, new MismatchedLeafReader.MismatchedVisitor(visitor, shuffled));
}
}
@Override
public DocValuesProducer getDocValuesReader() {
DocValuesProducer in = super.getDocValuesReader();
if (in == null) {
return null;
}
return new MismatchedDocValuesProducer(in, shuffled, super.getFieldInfos());
}
private static class MismatchedDocValuesProducer extends DocValuesProducer {
private final DocValuesProducer in;
private final FieldInfos shuffled;
private final FieldInfos orig;
MismatchedDocValuesProducer(DocValuesProducer in, FieldInfos shuffled, FieldInfos orig) {
this.in = Objects.requireNonNull(in);
this.shuffled = shuffled;
this.orig = orig;
}
@Override
public void close() throws IOException {
in.close();
}
private FieldInfo remapFieldInfo(FieldInfo field) {
FieldInfo fi = shuffled.fieldInfo(field.name);
assert fi != null && fi.number == field.number;
return orig.fieldInfo(field.name);
}
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
return in.getNumeric(remapFieldInfo(field));
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
return in.getBinary(remapFieldInfo(field));
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return in.getSorted(remapFieldInfo(field));
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return in.getSortedNumeric(remapFieldInfo(field));
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
return in.getSortedSet(remapFieldInfo(field));
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
return in.getSkipper(remapFieldInfo(field));
}
@Override
public void checkIntegrity() throws IOException {
in.checkIntegrity();
}
}
@Override
public NormsProducer getNormsReader() {
NormsProducer in = super.getNormsReader();
if (in == null) {
return null;
}
return new MismatchedNormsProducer(in, shuffled, super.getFieldInfos());
}
private static class MismatchedNormsProducer extends NormsProducer {
private final NormsProducer in;
private final FieldInfos shuffled;
private final FieldInfos orig;
MismatchedNormsProducer(NormsProducer in, FieldInfos shuffled, FieldInfos orig) {
this.in = Objects.requireNonNull(in);
this.shuffled = shuffled;
this.orig = orig;
}
@Override
public void close() throws IOException {
in.close();
}
private FieldInfo remapFieldInfo(FieldInfo field) {
FieldInfo fi = shuffled.fieldInfo(field.name);
assert fi != null && fi.number == field.number;
return orig.fieldInfo(field.name);
}
@Override
public NumericDocValues getNorms(FieldInfo field) throws IOException {
return in.getNorms(remapFieldInfo(field));
}
@Override
public void checkIntegrity() throws IOException {
in.checkIntegrity();
}
}
}

View File

@ -28,8 +28,6 @@ import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.util.Bits;
/**
* Shuffles field numbers around to try to trip bugs where field numbers are assumed to always be
@ -55,7 +53,7 @@ public class MismatchedLeafReader extends FilterLeafReader {
return new StoredFields() {
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
inStoredFields.document(docID, new MismatchedVisitor(visitor));
inStoredFields.document(docID, new MismatchedVisitor(visitor, shuffled));
}
};
}
@ -70,18 +68,6 @@ public class MismatchedLeafReader extends FilterLeafReader {
return in.getReaderCacheHelper();
}
@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
in.searchNearestVectors(field, target, knnCollector, acceptDocs);
}
@Override
public void searchNearestVectors(
String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
in.searchNearestVectors(field, target, knnCollector, acceptDocs);
}
static FieldInfos shuffleInfos(FieldInfos infos, Random random) {
// first, shuffle the order
List<FieldInfo> shuffled = new ArrayList<>();
@ -124,11 +110,13 @@ public class MismatchedLeafReader extends FilterLeafReader {
/** StoredFieldsVisitor that remaps actual field numbers to our new shuffled ones. */
// TODO: its strange this part of our IR api exposes FieldInfo,
// no other "user-accessible" codec apis do this?
class MismatchedVisitor extends StoredFieldVisitor {
static class MismatchedVisitor extends StoredFieldVisitor {
final StoredFieldVisitor in;
final FieldInfos shuffled;
MismatchedVisitor(StoredFieldVisitor in) {
MismatchedVisitor(StoredFieldVisitor in, FieldInfos shuffled) {
this.in = in;
this.shuffled = shuffled;
}
@Override

View File

@ -237,8 +237,7 @@ public class MockRandomMergePolicy extends MergePolicy {
"NOTE: MockRandomMergePolicy now swaps in a MismatchedLeafReader for merging reader="
+ reader);
}
return SlowCodecReaderWrapper.wrap(
new MismatchedLeafReader(new MergeReaderWrapper(reader), r));
return new MismatchedCodecReader(reader, r);
} else {
// otherwise, reader is unchanged
return reader;

View File

@ -182,6 +182,7 @@ import org.apache.lucene.tests.index.AssertingLeafReader;
import org.apache.lucene.tests.index.FieldFilterLeafReader;
import org.apache.lucene.tests.index.MergingCodecReader;
import org.apache.lucene.tests.index.MergingDirectoryReaderWrapper;
import org.apache.lucene.tests.index.MismatchedCodecReader;
import org.apache.lucene.tests.index.MismatchedDirectoryReader;
import org.apache.lucene.tests.index.MismatchedLeafReader;
import org.apache.lucene.tests.index.MockIndexWriterEventListener;
@ -1746,12 +1747,14 @@ public abstract class LuceneTestCase extends Assert {
System.out.println(
"NOTE: LuceneTestCase.wrapReader: wrapping previous reader="
+ r
+ " with MismatchedLeaf/DirectoryReader");
+ " with MismatchedLeaf/Directory/CodecReader");
}
if (r instanceof LeafReader) {
r = new MismatchedLeafReader((LeafReader) r, random);
} else if (r instanceof DirectoryReader) {
r = new MismatchedDirectoryReader((DirectoryReader) r, random);
} else if (r instanceof CodecReader) {
r = new MismatchedCodecReader((CodecReader) r, random);
}
break;
case 4: