LUCENE-10354: Clarify contract of codec APIs with missing/disabled fields. (#583)

This commit is contained in:
Adrien Grand 2022-01-05 18:47:35 +01:00 committed by GitHub
parent 7fdba36941
commit c8651afde7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 57 additions and 32 deletions

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
@ -37,31 +38,38 @@ public abstract class DocValuesProducer implements Closeable {
/**
* Returns {@link NumericDocValues} for this field. The returned instance need not be thread-safe:
* it will only be used by a single thread.
* it will only be used by a single thread. The behavior is undefined if the doc values type of
* the given field is not {@link DocValuesType#NUMERIC}. The return value is never {@code null}.
*/
public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;
/**
* Returns {@link BinaryDocValues} for this field. The returned instance need not be thread-safe:
* it will only be used by a single thread.
* it will only be used by a single thread. The behavior is undefined if the doc values type of
* the given field is not {@link DocValuesType#BINARY}. The return value is never {@code null}.
*/
public abstract BinaryDocValues getBinary(FieldInfo field) throws IOException;
/**
* Returns {@link SortedDocValues} for this field. The returned instance need not be thread-safe:
* it will only be used by a single thread.
* it will only be used by a single thread. The behavior is undefined if the doc values type of
* the given field is not {@link DocValuesType#SORTED}. The return value is never {@code null}.
*/
public abstract SortedDocValues getSorted(FieldInfo field) throws IOException;
/**
* Returns {@link SortedNumericDocValues} for this field. The returned instance need not be
* thread-safe: it will only be used by a single thread.
* thread-safe: it will only be used by a single thread. The behavior is undefined if the doc
* values type of the given field is not {@link DocValuesType#SORTED_NUMERIC}. The return value is
* never {@code null}.
*/
public abstract SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException;
/**
* Returns {@link SortedSetDocValues} for this field. The returned instance need not be
* thread-safe: it will only be used by a single thread.
* thread-safe: it will only be used by a single thread. The behavior is undefined if the doc
* values type of the given field is not {@link DocValuesType#SORTED_SET}. The return value is
* never {@code null}.
*/
public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Accountable;
@ -40,7 +41,11 @@ public abstract class KnnVectorsReader implements Closeable, Accountable {
*/
public abstract void checkIntegrity() throws IOException;
/** Returns the {@link VectorValues} for the given {@code field} */
/**
* Returns the {@link VectorValues} for the given {@code field}. The behavior is undefined if the
* given field doesn't have KNN vectors enabled on its {@link FieldInfo}. The return value is
* never {@code null}.
*/
public abstract VectorValues getVectorValues(String field) throws IOException;
/**
@ -53,6 +58,9 @@ public abstract class KnnVectorsReader implements Closeable, Accountable {
* true k closest neighbors. For large values of k (for example when k is close to the total
* number of documents), the search may also retrieve fewer than k documents.
*
* <p>The behavior is undefined if the given field doesn't have KNN vectors enabled on its {@link
* FieldInfo}. The return value is never {@code null}.
*
* @param field the vector field to search
* @param target the vector-valued query
* @param k the number of docs to return

View File

@ -33,7 +33,8 @@ public abstract class NormsProducer implements Closeable {
/**
* Returns {@link NumericDocValues} for this field. The returned instance need not be thread-safe:
* it will only be used by a single thread.
* it will only be used by a single thread. The behavior is undefined if the given field doesn't
* have norms enabled on its {@link FieldInfo}. The return value is never {@code null}.
*/
public abstract NumericDocValues getNorms(FieldInfo field) throws IOException;

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.PointValues;
/**
@ -40,7 +41,10 @@ public abstract class PointsReader implements Closeable {
*/
public abstract void checkIntegrity() throws IOException;
/** Return {@link PointValues} for the given {@code field}. */
/**
* Return {@link PointValues} for the given {@code field}. The behavior is undefined if the given
* field doesn't have points enabled on its {@link FieldInfo}.
*/
public abstract PointValues getValues(String field) throws IOException;
/**

View File

@ -226,19 +226,13 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
@Override
public VectorValues getVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null || fieldEntry.dimension == 0) {
return null;
}
return getOffHeapVectorValues(fieldEntry);
}
@Override
public TopDocs search(String field, float[] target, int k, Bits acceptDocs) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null || fieldEntry.dimension == 0) {
return null;
}
if (fieldEntry.size() == 0) {
return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
}

View File

@ -105,15 +105,13 @@ public abstract class CodecReader extends LeafReader {
@Override
public final Terms terms(String field) throws IOException {
// ensureOpen(); no; getPostingsReader calls this
FieldsProducer fieldsProducer = getPostingsReader();
if (fieldsProducer == null) {
ensureOpen();
FieldInfo fi = getFieldInfos().fieldInfo(field);
if (fi == null || fi.getIndexOptions() == IndexOptions.NONE) {
// Field does not exist or does not index postings
return null;
}
// We could check the FieldInfo IndexOptions but there's no point since
// PostingsReader will simply return null for fields that don't exist or that have no terms
// index.
return fieldsProducer.terms(field);
return getPostingsReader().terms(field);
}
// returns the FieldInfo that corresponds to the given field and type, or

View File

@ -22,6 +22,7 @@ import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorValues;
@ -45,7 +46,7 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
@Override
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new AssertingKnnVectorsReader(delegate.fieldsReader(state));
return new AssertingKnnVectorsReader(delegate.fieldsReader(state), state.fieldInfos);
}
static class AssertingKnnVectorsWriter extends KnnVectorsWriter {
@ -76,10 +77,12 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
static class AssertingKnnVectorsReader extends KnnVectorsReader {
final KnnVectorsReader delegate;
final FieldInfos fis;
AssertingKnnVectorsReader(KnnVectorsReader delegate) {
AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) {
assert delegate != null;
this.delegate = delegate;
this.fis = fis;
}
@Override
@ -89,17 +92,20 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
@Override
public VectorValues getVectorValues(String field) throws IOException {
FieldInfo fi = fis.fieldInfo(field);
assert fi != null && fi.getVectorDimension() > 0;
VectorValues values = delegate.getVectorValues(field);
if (values != null) {
assert values.docID() == -1;
assert values.size() >= 0;
assert values.dimension() > 0;
}
assert values != null;
assert values.docID() == -1;
assert values.size() >= 0;
assert values.dimension() > 0;
return values;
}
@Override
public TopDocs search(String field, float[] target, int k, Bits acceptDocs) throws IOException {
FieldInfo fi = fis.fieldInfo(field);
assert fi != null && fi.getVectorDimension() > 0;
TopDocs hits = delegate.search(field, target, k, acceptDocs);
assert hits != null;
assert hits.scoreDocs.length <= k;

View File

@ -21,6 +21,7 @@ import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
@ -54,18 +55,21 @@ public final class AssertingPointsFormat extends PointsFormat {
@Override
public PointsReader fieldsReader(SegmentReadState state) throws IOException {
return new AssertingPointsReader(state.segmentInfo.maxDoc(), in.fieldsReader(state), false);
return new AssertingPointsReader(
state.segmentInfo.maxDoc(), in.fieldsReader(state), state.fieldInfos, false);
}
static class AssertingPointsReader extends PointsReader {
private final PointsReader in;
private final int maxDoc;
private final FieldInfos fis;
private final boolean merging;
private final Thread creationThread;
AssertingPointsReader(int maxDoc, PointsReader in, boolean merging) {
AssertingPointsReader(int maxDoc, PointsReader in, FieldInfos fis, boolean merging) {
this.in = in;
this.maxDoc = maxDoc;
this.fis = fis;
this.merging = merging;
this.creationThread = Thread.currentThread();
// do a few simple checks on init
@ -80,6 +84,8 @@ public final class AssertingPointsFormat extends PointsFormat {
@Override
public PointValues getValues(String field) throws IOException {
FieldInfo fi = fis.fieldInfo(field);
assert fi != null && fi.getPointDimensionCount() > 0;
if (merging) {
AssertingCodec.assertThread("PointsReader", creationThread);
}
@ -97,7 +103,7 @@ public final class AssertingPointsFormat extends PointsFormat {
@Override
public PointsReader getMergeInstance() {
return new AssertingPointsReader(maxDoc, in.getMergeInstance(), true);
return new AssertingPointsReader(maxDoc, in.getMergeInstance(), fis, true);
}
@Override