LUCENE-9855: Rename knn search vector format (#218)

This commit is contained in:
Tomoko Uchida 2021-07-24 12:03:15 +09:00 committed by GitHub
parent ad7746d6e3
commit df807dbe8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
49 changed files with 423 additions and 409 deletions

View File

@ -7,9 +7,9 @@ http://s.apache.org/luceneversions
New Features
* LUCENE-9322: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida)
* LUCENE-9322 LUCENE-9855: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida)
* LUCENE-9004: Approximate nearest vector search via NSW graphs
* LUCENE-9004: Approximate nearest vector search via NSW graphs (Mike Sokolov, Tomoko Uchida et al.)
* LUCENE-9659: SpanPayloadCheckQuery now supports inequalities. (Kevin Watters, Gus Heck)

View File

@ -28,6 +28,7 @@ import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
@ -35,7 +36,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -122,8 +122,8 @@ public class Lucene70Codec extends Codec {
}
@Override
public VectorFormat vectorFormat() {
return VectorFormat.EMPTY;
public KnnVectorsFormat knnVectorsFormat() {
return KnnVectorsFormat.EMPTY;
}
@Override

View File

@ -27,6 +27,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
@ -34,7 +35,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -129,7 +129,7 @@ public class Lucene80Codec extends Codec {
}
@Override
public final VectorFormat vectorFormat() {
return VectorFormat.EMPTY;
public final KnnVectorsFormat knnVectorsFormat() {
return KnnVectorsFormat.EMPTY;
}
}

View File

@ -31,6 +31,7 @@ import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
@ -38,7 +39,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -134,8 +134,8 @@ public class Lucene84Codec extends Codec {
}
@Override
public VectorFormat vectorFormat() {
return VectorFormat.EMPTY;
public KnnVectorsFormat knnVectorsFormat() {
return KnnVectorsFormat.EMPTY;
}
/**

View File

@ -30,6 +30,7 @@ import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
@ -37,7 +38,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -133,8 +133,8 @@ public class Lucene86Codec extends Codec {
}
@Override
public final VectorFormat vectorFormat() {
return VectorFormat.EMPTY;
public final KnnVectorsFormat knnVectorsFormat() {
return KnnVectorsFormat.EMPTY;
}
/**

View File

@ -32,6 +32,7 @@ import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
@ -39,7 +40,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -157,8 +157,8 @@ public class Lucene87Codec extends Codec {
}
@Override
public final VectorFormat vectorFormat() {
return VectorFormat.EMPTY;
public final KnnVectorsFormat knnVectorsFormat() {
return KnnVectorsFormat.EMPTY;
}
/**

View File

@ -20,6 +20,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
@ -27,7 +28,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
/**
* plain text index format.
@ -47,7 +47,7 @@ public final class SimpleTextCodec extends Codec {
private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
private final CompoundFormat compoundFormat = new SimpleTextCompoundFormat();
private final PointsFormat pointsFormat = new SimpleTextPointsFormat();
private final VectorFormat vectorFormat = new SimpleTextVectorFormat();
private final KnnVectorsFormat knnVectorsFormat = new SimpleTextKnnVectorsFormat();
public SimpleTextCodec() {
super("SimpleText");
@ -104,7 +104,7 @@ public final class SimpleTextCodec extends Codec {
}
@Override
public VectorFormat vectorFormat() {
return vectorFormat;
public KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}
}

View File

@ -17,9 +17,9 @@
package org.apache.lucene.codecs.simpletext;
import java.io.IOException;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@ -31,20 +31,20 @@ import org.apache.lucene.index.SegmentWriteState;
*
* @lucene.experimental
*/
public final class SimpleTextVectorFormat extends VectorFormat {
public final class SimpleTextKnnVectorsFormat extends KnnVectorsFormat {
public SimpleTextVectorFormat() {
super("SimpleTextVectorFormat");
public SimpleTextKnnVectorsFormat() {
super("SimpleTextKnnVectorsFormat");
}
@Override
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new SimpleTextVectorWriter(state);
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new SimpleTextKnnVectorsWriter(state);
}
@Override
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
return new SimpleTextVectorReader(state);
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new SimpleTextKnnVectorsReader(state);
}
/** Extension of vectors data file */

View File

@ -17,14 +17,14 @@
package org.apache.lucene.codecs.simpletext;
import static org.apache.lucene.codecs.simpletext.SimpleTextVectorWriter.*;
import static org.apache.lucene.codecs.simpletext.SimpleTextKnnVectorsWriter.*;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@ -49,10 +49,10 @@ import org.apache.lucene.util.StringHelper;
*
* @lucene.experimental
*/
public class SimpleTextVectorReader extends VectorReader {
public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
// shallowSizeOfInstance for fieldEntries map is included in ramBytesUsed() calculation
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(SimpleTextVectorReader.class)
RamUsageEstimator.shallowSizeOfInstance(SimpleTextKnnVectorsReader.class)
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
private static final BytesRef EMPTY = new BytesRef("");
@ -62,18 +62,18 @@ public class SimpleTextVectorReader extends VectorReader {
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final Map<String, FieldEntry> fieldEntries = new HashMap<>();
SimpleTextVectorReader(SegmentReadState readState) throws IOException {
SimpleTextKnnVectorsReader(SegmentReadState readState) throws IOException {
this.readState = readState;
String metaFileName =
IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
SimpleTextVectorFormat.META_EXTENSION);
SimpleTextKnnVectorsFormat.META_EXTENSION);
String vectorFileName =
IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
SimpleTextVectorFormat.VECTOR_EXTENSION);
SimpleTextKnnVectorsFormat.VECTOR_EXTENSION);
boolean success = false;
try (ChecksumIndexInput in =
@ -110,7 +110,7 @@ public class SimpleTextVectorReader extends VectorReader {
FieldInfo info = readState.fieldInfos.fieldInfo(field);
if (info == null) {
// mirror the handling in Lucene90VectorReader#getVectorValues
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
// needed to pass TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs
return null;
}
int dimension = info.getVectorDimension();
@ -120,7 +120,7 @@ public class SimpleTextVectorReader extends VectorReader {
FieldEntry fieldEntry = fieldEntries.get(field);
if (fieldEntry == null) {
// mirror the handling in Lucene90VectorReader#getVectorValues
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
// needed to pass TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs
return null;
}
if (dimension != fieldEntry.dimension) {
@ -153,7 +153,7 @@ public class SimpleTextVectorReader extends VectorReader {
ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
// when there's no actual vector data written (e.g. tested in
// TestSimpleTextVectorFormat#testDeleteAllVectorDocs)
// TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs)
// the first line in dataInput will be, checksum 00000000000000000000
if (footerStartPos == 0) {
SimpleTextUtil.checkFooter(input);
@ -271,7 +271,7 @@ public class SimpleTextVectorReader extends VectorReader {
} else if (curOrd >= entry.size()) {
// when call to advance / nextDoc below already returns NO_MORE_DOCS, calling docID
// immediately afterward should also return NO_MORE_DOCS
// this is needed for TestSimpleTextVectorFormat.testAdvance test case
// this is needed for TestSimpleTextKnnVectorsFormat.testAdvance test case
return NO_MORE_DOCS;
}

View File

@ -23,7 +23,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@ -34,7 +34,7 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
/** Writes vector-valued fields in a plain text format */
public class SimpleTextVectorWriter extends VectorWriter {
public class SimpleTextKnnVectorsWriter extends KnnVectorsWriter {
static final BytesRef FIELD_NUMBER = new BytesRef("field-number ");
static final BytesRef FIELD_NAME = new BytesRef("field-name ");
@ -46,20 +46,24 @@ public class SimpleTextVectorWriter extends VectorWriter {
private final IndexOutput meta, vectorData;
private final BytesRefBuilder scratch = new BytesRefBuilder();
SimpleTextVectorWriter(SegmentWriteState state) throws IOException {
SimpleTextKnnVectorsWriter(SegmentWriteState state) throws IOException {
assert state.fieldInfos.hasVectorValues();
boolean success = false;
// exception handling to pass TestSimpleTextVectorFormat#testRandomExceptions
// exception handling to pass TestSimpleTextKnnVectorsFormat#testRandomExceptions
try {
String metaFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.META_EXTENSION);
state.segmentInfo.name,
state.segmentSuffix,
SimpleTextKnnVectorsFormat.META_EXTENSION);
meta = state.directory.createOutput(metaFileName, state.context);
String vectorDataFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.VECTOR_EXTENSION);
state.segmentInfo.name,
state.segmentSuffix,
SimpleTextKnnVectorsFormat.VECTOR_EXTENSION);
vectorData = state.directory.createOutput(vectorDataFileName, state.context);
success = true;
} finally {

View File

@ -17,9 +17,9 @@
package org.apache.lucene.codecs.simpletext;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseVectorFormatTestCase;
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
public class TestSimpleTextVectorFormat extends BaseVectorFormatTestCase {
public class TestSimpleTextKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
@Override
protected Codec getCodec() {
return new SimpleTextCodec();

View File

@ -111,7 +111,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
public abstract PointsFormat pointsFormat();
/** Encodes/decodes numeric vector fields */
public abstract VectorFormat vectorFormat();
public abstract KnnVectorsFormat knnVectorsFormat();
/** looks up a codec by name */
public static Codec forName(String name) {

View File

@ -108,7 +108,7 @@ public abstract class FilterCodec extends Codec {
}
@Override
public VectorFormat vectorFormat() {
return delegate.vectorFormat();
public KnnVectorsFormat knnVectorsFormat() {
return delegate.knnVectorsFormat();
}
}

View File

@ -29,23 +29,23 @@ import org.apache.lucene.util.NamedSPILoader;
* Encodes/decodes per-document vector and any associated indexing structures required to support
* nearest-neighbor search
*/
public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
public abstract class KnnVectorsFormat implements NamedSPILoader.NamedSPI {
/**
* This static holder class prevents classloading deadlock by delaying init of doc values formats
* until needed.
*/
private static final class Holder {
private static final NamedSPILoader<VectorFormat> LOADER =
new NamedSPILoader<>(VectorFormat.class);
private static final NamedSPILoader<KnnVectorsFormat> LOADER =
new NamedSPILoader<>(KnnVectorsFormat.class);
private Holder() {}
static NamedSPILoader<VectorFormat> getLoader() {
static NamedSPILoader<KnnVectorsFormat> getLoader() {
if (LOADER == null) {
throw new IllegalStateException(
"You tried to lookup a VectorFormat name before all formats could be initialized. "
+ "This likely happens if you call VectorFormat#forName from a VectorFormat's ctor.");
"You tried to lookup a KnnVectorsFormat name before all formats could be initialized. "
+ "This likely happens if you call KnnVectorsFormat#forName from a KnnVectorsFormat's ctor.");
}
return LOADER;
}
@ -54,7 +54,7 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
private final String name;
/** Sole constructor */
protected VectorFormat(String name) {
protected KnnVectorsFormat(String name) {
NamedSPILoader.checkServiceName(name);
this.name = name;
}
@ -65,31 +65,31 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
}
/** looks up a format by name */
public static VectorFormat forName(String name) {
public static KnnVectorsFormat forName(String name) {
return Holder.getLoader().lookup(name);
}
/** Returns a {@link VectorWriter} to write the vectors to the index. */
public abstract VectorWriter fieldsWriter(SegmentWriteState state) throws IOException;
/** Returns a {@link KnnVectorsWriter} to write the vectors to the index. */
public abstract KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException;
/** Returns a {@link VectorReader} to read the vectors from the index. */
public abstract VectorReader fieldsReader(SegmentReadState state) throws IOException;
/** Returns a {@link KnnVectorsReader} to read the vectors from the index. */
public abstract KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException;
/**
* EMPTY throws an exception when written. It acts as a sentinel indicating a Codec that does not
* support vectors.
*/
public static final VectorFormat EMPTY =
new VectorFormat("EMPTY") {
public static final KnnVectorsFormat EMPTY =
new KnnVectorsFormat("EMPTY") {
@Override
public VectorWriter fieldsWriter(SegmentWriteState state) {
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) {
throw new UnsupportedOperationException(
"Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene90");
}
@Override
public VectorReader fieldsReader(SegmentReadState state) {
return new VectorReader() {
public KnnVectorsReader fieldsReader(SegmentReadState state) {
return new KnnVectorsReader() {
@Override
public void checkIntegrity() {}

View File

@ -24,10 +24,10 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Accountable;
/** Reads vectors from an index. */
public abstract class VectorReader implements Closeable, Accountable {
public abstract class KnnVectorsReader implements Closeable, Accountable {
/** Sole constructor */
protected VectorReader() {}
protected KnnVectorsReader() {}
/**
* Checks consistency of this reader.
@ -61,7 +61,7 @@ public abstract class VectorReader implements Closeable, Accountable {
*
* <p>The default implementation returns {@code this}
*/
public VectorReader getMergeInstance() {
public KnnVectorsReader getMergeInstance() {
return this;
}
}

View File

@ -34,10 +34,10 @@ import org.apache.lucene.index.VectorValues;
import org.apache.lucene.util.BytesRef;
/** Writes vectors to an index. */
public abstract class VectorWriter implements Closeable {
public abstract class KnnVectorsWriter implements Closeable {
/** Sole constructor */
protected VectorWriter() {}
protected KnnVectorsWriter() {}
/** Write all values contained in the provided reader */
public abstract void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException;
@ -48,7 +48,7 @@ public abstract class VectorWriter implements Closeable {
/** Merge the vector values from multiple segments, for all fields */
public void merge(MergeState mergeState) throws IOException {
for (int i = 0; i < mergeState.fieldInfos.length; i++) {
VectorReader reader = mergeState.vectorReaders[i];
KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
assert reader != null || mergeState.fieldInfos[i].hasVectorValues() == false;
if (reader != null) {
reader.checkIntegrity();
@ -71,9 +71,9 @@ public abstract class VectorWriter implements Closeable {
int dimension = -1;
VectorSimilarityFunction similarityFunction = null;
int nonEmptySegmentIndex = 0;
for (int i = 0; i < mergeState.vectorReaders.length; i++) {
VectorReader vectorReader = mergeState.vectorReaders[i];
if (vectorReader != null) {
for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
KnnVectorsReader knnVectorsReader = mergeState.knnVectorsReaders[i];
if (knnVectorsReader != null) {
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
int segmentDimension = mergeFieldInfo.getVectorDimension();
VectorSimilarityFunction segmentSimilarityFunction =
@ -98,7 +98,7 @@ public abstract class VectorWriter implements Closeable {
+ "!="
+ segmentSimilarityFunction);
}
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
VectorValues values = knnVectorsReader.getVectorValues(mergeFieldInfo.name);
if (values != null) {
subs.add(new VectorValuesSub(nonEmptySegmentIndex++, mergeState.docMaps[i], values));
}

View File

@ -22,6 +22,7 @@ import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
@ -29,10 +30,9 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
/**
* Implements the Lucene 9.0 index format
@ -83,12 +83,12 @@ public class Lucene90Codec extends Codec {
}
};
private final VectorFormat defaultVectorFormat;
private final VectorFormat vectorFormat =
new PerFieldVectorFormat() {
private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat =
new PerFieldKnnVectorsFormat() {
@Override
public VectorFormat getVectorFormatForField(String field) {
return Lucene90Codec.this.getVectorFormatForField(field);
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Lucene90Codec.this.getKnnVectorsFormatForField(field);
}
};
@ -110,7 +110,7 @@ public class Lucene90Codec extends Codec {
new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
this.defaultPostingsFormat = new Lucene90PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultVectorFormat = new Lucene90HnswVectorFormat();
this.defaultKnnVectorsFormat = new Lucene90HnswVectorsFormat();
}
@Override
@ -154,8 +154,8 @@ public class Lucene90Codec extends Codec {
}
@Override
public final VectorFormat vectorFormat() {
return vectorFormat;
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}
/**
@ -191,8 +191,8 @@ public class Lucene90Codec extends Codec {
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public VectorFormat getVectorFormatForField(String field) {
return defaultVectorFormat;
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}
@Override

View File

@ -18,9 +18,9 @@
package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.hnsw.HnswGraph;
@ -65,11 +65,11 @@ import org.apache.lucene.util.hnsw.HnswGraph;
*
* @lucene.experimental
*/
public final class Lucene90HnswVectorFormat extends VectorFormat {
public final class Lucene90HnswVectorsFormat extends KnnVectorsFormat {
static final String META_CODEC_NAME = "Lucene90HnswVectorFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorFormatData";
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorFormatIndex";
static final String META_CODEC_NAME = "Lucene90HnswVectorsFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorsFormatData";
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorsFormatIndex";
static final String META_EXTENSION = "vem";
static final String VECTOR_DATA_EXTENSION = "vec";
static final String VECTOR_INDEX_EXTENSION = "vex";
@ -82,36 +82,34 @@ public final class Lucene90HnswVectorFormat extends VectorFormat {
/**
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
* {@link Lucene90HnswVectorFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
* {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
*/
private final int maxConn;
/**
* The number of candidate neighbors to track while searching the graph for each newly inserted
* node. Defaults to to {@link Lucene90HnswVectorFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph}
* for details.
* node. Defaults to to {@link Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
* HnswGraph} for details.
*/
private final int beamWidth;
public Lucene90HnswVectorFormat() {
super("Lucene90HnswVectorFormat");
this.maxConn = DEFAULT_MAX_CONN;
this.beamWidth = DEFAULT_BEAM_WIDTH;
public Lucene90HnswVectorsFormat() {
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
}
public Lucene90HnswVectorFormat(int maxConn, int beamWidth) {
super("Lucene90HnswVectorFormat");
public Lucene90HnswVectorsFormat(int maxConn, int beamWidth) {
super("Lucene90HnswVectorsFormat");
this.maxConn = maxConn;
this.beamWidth = beamWidth;
}
@Override
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene90HnswVectorWriter(state, maxConn, beamWidth);
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene90HnswVectorsWriter(state, maxConn, beamWidth);
}
@Override
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
return new Lucene90HnswVectorReader(state);
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new Lucene90HnswVectorsReader(state);
}
}

View File

@ -26,7 +26,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -54,7 +54,7 @@ import org.apache.lucene.util.hnsw.NeighborQueue;
*
* @lucene.experimental
*/
public final class Lucene90HnswVectorReader extends VectorReader {
public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
private final FieldInfos fieldInfos;
private final Map<String, FieldEntry> fields = new HashMap<>();
@ -62,10 +62,10 @@ public final class Lucene90HnswVectorReader extends VectorReader {
private final IndexInput vectorIndex;
private final long checksumSeed;
Lucene90HnswVectorReader(SegmentReadState state) throws IOException {
Lucene90HnswVectorsReader(SegmentReadState state) throws IOException {
this.fieldInfos = state.fieldInfos;
int versionMeta = readMetadata(state, Lucene90HnswVectorFormat.META_EXTENSION);
int versionMeta = readMetadata(state, Lucene90HnswVectorsFormat.META_EXTENSION);
long[] checksumRef = new long[1];
boolean success = false;
try {
@ -73,15 +73,15 @@ public final class Lucene90HnswVectorReader extends VectorReader {
openDataInput(
state,
versionMeta,
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION,
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION,
Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
checksumRef);
vectorIndex =
openDataInput(
state,
versionMeta,
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION,
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION,
Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
checksumRef);
success = true;
} finally {
@ -102,9 +102,9 @@ public final class Lucene90HnswVectorReader extends VectorReader {
versionMeta =
CodecUtil.checkIndexHeader(
meta,
Lucene90HnswVectorFormat.META_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_START,
Lucene90HnswVectorFormat.VERSION_CURRENT,
Lucene90HnswVectorsFormat.META_CODEC_NAME,
Lucene90HnswVectorsFormat.VERSION_START,
Lucene90HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
readFields(meta, state.fieldInfos);
@ -131,8 +131,8 @@ public final class Lucene90HnswVectorReader extends VectorReader {
CodecUtil.checkIndexHeader(
in,
codecName,
Lucene90HnswVectorFormat.VERSION_START,
Lucene90HnswVectorFormat.VERSION_CURRENT,
Lucene90HnswVectorsFormat.VERSION_START,
Lucene90HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
if (versionMeta != versionVectorData) {
@ -205,7 +205,7 @@ public final class Lucene90HnswVectorReader extends VectorReader {
@Override
public long ramBytesUsed() {
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorReader.class);
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorsReader.class);
totalBytes +=
RamUsageEstimator.sizeOfMap(
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));

View File

@ -22,7 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
@ -41,7 +41,7 @@ import org.apache.lucene.util.hnsw.NeighborArray;
*
* @lucene.experimental
*/
public final class Lucene90HnswVectorWriter extends VectorWriter {
public final class Lucene90HnswVectorsWriter extends KnnVectorsWriter {
private final SegmentWriteState segmentWriteState;
private final IndexOutput meta, vectorData, vectorIndex;
@ -50,7 +50,8 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
private final int beamWidth;
private boolean finished;
Lucene90HnswVectorWriter(SegmentWriteState state, int maxConn, int beamWidth) throws IOException {
Lucene90HnswVectorsWriter(SegmentWriteState state, int maxConn, int beamWidth)
throws IOException {
this.maxConn = maxConn;
this.beamWidth = beamWidth;
@ -59,19 +60,19 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
String metaFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorFormat.META_EXTENSION);
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorsFormat.META_EXTENSION);
String vectorDataFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION);
Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION);
String indexDataFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION);
Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION);
boolean success = false;
try {
@ -81,20 +82,20 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
CodecUtil.writeIndexHeader(
meta,
Lucene90HnswVectorFormat.META_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_CURRENT,
Lucene90HnswVectorsFormat.META_CODEC_NAME,
Lucene90HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.writeIndexHeader(
vectorData,
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_CURRENT,
Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
Lucene90HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.writeIndexHeader(
vectorIndex,
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
Lucene90HnswVectorFormat.VERSION_CURRENT,
Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
Lucene90HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
success = true;

View File

@ -180,7 +180,7 @@
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
* intersection (2D, 3D).
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}. The
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat Vector values}. The
* vector format stores numeric vectors in a format optimized for random access and
* computation, supporting high-dimensional nearest-neighbor search.
* </ul>
@ -310,7 +310,7 @@
* <td>Holds indexed points</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}</td>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat Vector values}</td>
* <td>.vec, .vem</td>
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
* <code>.vem</code> the vector metadata</td>

View File

@ -23,9 +23,9 @@ import java.util.HashMap;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.TreeMap;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@ -50,30 +50,30 @@ import org.apache.lucene.util.IOUtils;
* @see ServiceLoader
* @lucene.experimental
*/
public abstract class PerFieldVectorFormat extends VectorFormat {
/** Name of this {@link VectorFormat}. */
public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat {
/** Name of this {@link KnnVectorsFormat}. */
public static final String PER_FIELD_NAME = "PerFieldVectors90";
/** {@link FieldInfo} attribute name used to store the format name for each field. */
public static final String PER_FIELD_FORMAT_KEY =
PerFieldVectorFormat.class.getSimpleName() + ".format";
PerFieldKnnVectorsFormat.class.getSimpleName() + ".format";
/** {@link FieldInfo} attribute name used to store the segment suffix name for each field. */
public static final String PER_FIELD_SUFFIX_KEY =
PerFieldVectorFormat.class.getSimpleName() + ".suffix";
PerFieldKnnVectorsFormat.class.getSimpleName() + ".suffix";
/** Sole constructor. */
protected PerFieldVectorFormat() {
protected PerFieldKnnVectorsFormat() {
super(PER_FIELD_NAME);
}
@Override
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new FieldsWriter(state);
}
@Override
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new FieldsReader(state);
}
@ -84,10 +84,10 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
* <p>The field to format mapping is written to the index, so this method is only invoked when
* writing, not when reading.
*/
public abstract VectorFormat getVectorFormatForField(String field);
public abstract KnnVectorsFormat getKnnVectorsFormatForField(String field);
private class FieldsWriter extends VectorWriter {
private final Map<VectorFormat, WriterAndSuffix> formats;
private class FieldsWriter extends KnnVectorsWriter {
private final Map<KnnVectorsFormat, WriterAndSuffix> formats;
private final Map<String, Integer> suffixes = new HashMap<>();
private final SegmentWriteState segmentWriteState;
@ -113,11 +113,11 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
IOUtils.close(formats.values());
}
private VectorWriter getInstance(FieldInfo field) throws IOException {
VectorFormat format = getVectorFormatForField(field.name);
private KnnVectorsWriter getInstance(FieldInfo field) throws IOException {
KnnVectorsFormat format = getKnnVectorsFormatForField(field.name);
if (format == null) {
throw new IllegalStateException(
"invalid null VectorFormat for field=\"" + field.name + "\"");
"invalid null KnnVectorsFormat for field=\"" + field.name + "\"");
}
final String formatName = format.getName();
@ -164,13 +164,13 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
}
/** VectorReader that can wrap multiple delegate readers, selected by field. */
public static class FieldsReader extends VectorReader {
public static class FieldsReader extends KnnVectorsReader {
private final Map<String, VectorReader> fields = new TreeMap<>();
private final Map<String, KnnVectorsReader> fields = new TreeMap<>();
/**
* Create a FieldsReader over a segment, opening VectorReaders for each VectorFormat specified
* by the indexed numeric vector fields.
* Create a FieldsReader over a segment, opening VectorReaders for each KnnVectorsFormat
* specified by the indexed numeric vector fields.
*
* @param readState defines the fields
* @throws IOException if one of the delegate readers throws
@ -179,7 +179,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
// Init each unique format:
boolean success = false;
Map<String, VectorReader> formats = new HashMap<>();
Map<String, KnnVectorsReader> formats = new HashMap<>();
try {
// Read field name -> format name
for (FieldInfo fi : readState.fieldInfos) {
@ -193,7 +193,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
throw new IllegalStateException(
"missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
}
VectorFormat format = VectorFormat.forName(formatName);
KnnVectorsFormat format = KnnVectorsFormat.forName(formatName);
String segmentSuffix =
getFullSegmentSuffix(readState.segmentSuffix, getSuffix(formatName, suffix));
if (!formats.containsKey(segmentSuffix)) {
@ -218,34 +218,34 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
*
* @param field the name of a numeric vector field
*/
public VectorReader getFieldReader(String field) {
public KnnVectorsReader getFieldReader(String field) {
return fields.get(field);
}
@Override
public void checkIntegrity() throws IOException {
for (VectorReader reader : fields.values()) {
for (KnnVectorsReader reader : fields.values()) {
reader.checkIntegrity();
}
}
@Override
public VectorValues getVectorValues(String field) throws IOException {
VectorReader vectorReader = fields.get(field);
if (vectorReader == null) {
KnnVectorsReader knnVectorsReader = fields.get(field);
if (knnVectorsReader == null) {
return null;
} else {
return vectorReader.getVectorValues(field);
return knnVectorsReader.getVectorValues(field);
}
}
@Override
public TopDocs search(String field, float[] target, int k) throws IOException {
VectorReader vectorReader = fields.get(field);
if (vectorReader == null) {
KnnVectorsReader knnVectorsReader = fields.get(field);
if (knnVectorsReader == null) {
return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
} else {
return vectorReader.search(field, target, k);
return knnVectorsReader.search(field, target, k);
}
}
@ -257,7 +257,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
@Override
public long ramBytesUsed() {
long total = 0;
for (VectorReader reader : fields.values()) {
for (KnnVectorsReader reader : fields.values()) {
total += reader.ramBytesUsed();
}
return total;
@ -277,10 +277,10 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
}
private static class WriterAndSuffix implements Closeable {
final VectorWriter writer;
final KnnVectorsWriter writer;
final int suffix;
WriterAndSuffix(VectorWriter writer, int suffix) {
WriterAndSuffix(KnnVectorsWriter writer, int suffix) {
this.writer = writer;
this.suffix = suffix;
}

View File

@ -25,14 +25,14 @@ import org.apache.lucene.index.VectorValues;
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
* {@link VectorValues}, which is a forward-only docID-based iterator and also offers random-access
* by dense ordinal (not docId). VectorValues.SearchSimlarity may be used to compare vectors at
* query time (for example as part of result ranking). A VectorField may be associated with a search
* similarity function defining the metric used for nearest-neighbor search among vectors of that
* field.
* by dense ordinal (not docId). {@link VectorSimilarityFunction} may be used to compare vectors at
* query time (for example as part of result ranking). A KnnVectorField may be associated with a
* search similarity function defining the metric used for nearest-neighbor search among vectors of
* that field.
*
* @lucene.experimental
*/
public class VectorField extends Field {
public class KnnVectorField extends Field {
private static FieldType createType(float[] v, VectorSimilarityFunction similarityFunction) {
if (v == null) {
@ -82,7 +82,7 @@ public class VectorField extends Field {
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
* dimension &gt; 1024.
*/
public VectorField(String name, float[] vector, VectorSimilarityFunction similarityFunction) {
public KnnVectorField(String name, float[] vector, VectorSimilarityFunction similarityFunction) {
super(name, createType(vector, similarityFunction));
fieldsData = vector;
}
@ -97,7 +97,7 @@ public class VectorField extends Field {
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
* dimension &gt; 1024.
*/
public VectorField(String name, float[] vector) {
public KnnVectorField(String name, float[] vector) {
this(name, vector, VectorSimilarityFunction.EUCLIDEAN);
}
@ -111,7 +111,7 @@ public class VectorField extends Field {
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
* dimension &gt; 1024.
*/
public VectorField(String name, float[] vector, FieldType fieldType) {
public KnnVectorField(String name, float[] vector, FieldType fieldType) {
super(name, fieldType);
fieldsData = vector;
}

View File

@ -359,7 +359,7 @@ public final class CheckIndex implements Closeable {
public long totalVectorValues;
/** Total number of fields with vectors. */
public int totalVectorFields;
public int totalKnnVectorFields;
/** Exception thrown during vector values test (null on success) */
public Throwable error = null;
@ -2310,7 +2310,7 @@ public final class CheckIndex implements Closeable {
continue;
}
status.totalVectorFields++;
status.totalKnnVectorFields++;
int docCount = 0;
while (values.nextDoc() != NO_MORE_DOCS) {
@ -2346,7 +2346,7 @@ public final class CheckIndex implements Closeable {
String.format(
Locale.ROOT,
"OK [%d fields, %d vectors] [took %.3f sec]",
status.totalVectorFields,
status.totalKnnVectorFields,
status.totalVectorValues,
nsToSec(System.nanoTime() - startNS)));

View File

@ -20,11 +20,11 @@ import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.search.TopDocs;
/** LeafReader implemented by codec APIs. */
@ -81,7 +81,7 @@ public abstract class CodecReader extends LeafReader {
*
* @lucene.internal
*/
public abstract VectorReader getVectorReader();
public abstract KnnVectorsReader getVectorReader();
@Override
public final void document(int docID, StoredFieldVisitor visitor) throws IOException {

View File

@ -20,11 +20,11 @@ import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.util.Bits;
/**
@ -100,7 +100,7 @@ public abstract class FilterCodecReader extends CodecReader {
}
@Override
public VectorReader getVectorReader() {
public KnnVectorsReader getVectorReader() {
return in.getVectorReader();
}

View File

@ -30,15 +30,15 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
@ -430,7 +430,7 @@ final class IndexingChain implements Accountable {
/** Writes all buffered vectors. */
private void writeVectors(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
VectorWriter vectorWriter = null;
KnnVectorsWriter knnVectorsWriter = null;
boolean success = false;
try {
for (int i = 0; i < fieldHash.length; i++) {
@ -446,19 +446,19 @@ final class IndexingChain implements Accountable {
+ perField.fieldInfo.name
+ "\" has no vectors but wrote them");
}
if (vectorWriter == null) {
if (knnVectorsWriter == null) {
// lazy init
VectorFormat fmt = state.segmentInfo.getCodec().vectorFormat();
KnnVectorsFormat fmt = state.segmentInfo.getCodec().knnVectorsFormat();
if (fmt == null) {
throw new IllegalStateException(
"field=\""
+ perField.fieldInfo.name
+ "\" was indexed as vectors but codec does not support vectors");
}
vectorWriter = fmt.fieldsWriter(state);
knnVectorsWriter = fmt.fieldsWriter(state);
}
perField.vectorValuesWriter.flush(sortMap, vectorWriter);
perField.vectorValuesWriter.flush(sortMap, knnVectorsWriter);
perField.vectorValuesWriter = null;
} else if (perField.fieldInfo != null && perField.fieldInfo.getVectorDimension() != 0) {
// BUG
@ -472,15 +472,15 @@ final class IndexingChain implements Accountable {
perField = perField.next;
}
}
if (vectorWriter != null) {
vectorWriter.finish();
if (knnVectorsWriter != null) {
knnVectorsWriter.finish();
}
success = true;
} finally {
if (success) {
IOUtils.close(vectorWriter);
IOUtils.close(knnVectorsWriter);
} else {
IOUtils.closeWhileHandlingException(vectorWriter);
IOUtils.closeWhileHandlingException(knnVectorsWriter);
}
}
}
@ -761,7 +761,7 @@ final class IndexingChain implements Accountable {
pf.pointValuesWriter.addPackedValue(docID, field.binaryValue());
}
if (fieldType.vectorDimension() != 0) {
pf.vectorValuesWriter.addValue(docID, ((VectorField) field).vectorValue());
pf.vectorValuesWriter.addValue(docID, ((KnnVectorField) field).vectorValue());
}
return indexedField;
}

View File

@ -24,11 +24,11 @@ import java.util.List;
import java.util.Locale;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
@ -80,7 +80,7 @@ public class MergeState {
public final PointsReader[] pointsReaders;
/** Vector readers to merge */
public final VectorReader[] vectorReaders;
public final KnnVectorsReader[] knnVectorsReaders;
/** Max docs per reader */
public final int[] maxDocs;
@ -109,7 +109,7 @@ public class MergeState {
termVectorsReaders = new TermVectorsReader[numReaders];
docValuesProducers = new DocValuesProducer[numReaders];
pointsReaders = new PointsReader[numReaders];
vectorReaders = new VectorReader[numReaders];
knnVectorsReaders = new KnnVectorsReader[numReaders];
fieldInfos = new FieldInfos[numReaders];
liveDocs = new Bits[numReaders];
@ -147,9 +147,9 @@ public class MergeState {
pointsReaders[i] = pointsReaders[i].getMergeInstance();
}
vectorReaders[i] = reader.getVectorReader();
if (vectorReaders[i] != null) {
vectorReaders[i] = vectorReaders[i].getMergeInstance();
knnVectorsReaders[i] = reader.getVectorReader();
if (knnVectorsReaders[i] != null) {
knnVectorsReaders[i] = knnVectorsReaders[i].getMergeInstance();
}
numDocs += reader.numDocs();

View File

@ -28,12 +28,12 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundDirectory;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.index.IndexReader.CacheKey;
import org.apache.lucene.index.IndexReader.ClosedListener;
import org.apache.lucene.store.AlreadyClosedException;
@ -59,7 +59,7 @@ final class SegmentCoreReaders {
final StoredFieldsReader fieldsReaderOrig;
final TermVectorsReader termVectorsReader;
final PointsReader pointsReader;
final VectorReader vectorReader;
final KnnVectorsReader knnVectorsReader;
final CompoundDirectory cfsReader;
final String segment;
/**
@ -142,9 +142,9 @@ final class SegmentCoreReaders {
}
if (coreFieldInfos.hasVectorValues()) {
vectorReader = codec.vectorFormat().fieldsReader(segmentReadState);
knnVectorsReader = codec.knnVectorsFormat().fieldsReader(segmentReadState);
} else {
vectorReader = null;
knnVectorsReader = null;
}
success = true;
@ -185,7 +185,7 @@ final class SegmentCoreReaders {
cfsReader,
normsProducer,
pointsReader,
vectorReader);
knnVectorsReader);
}
}
}

View File

@ -21,12 +21,12 @@ import java.util.List;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.InfoStream;
@ -236,7 +236,7 @@ final class SegmentMerger {
}
private void mergeVectorValues(SegmentWriteState segmentWriteState) throws IOException {
try (VectorWriter writer = codec.vectorFormat().fieldsWriter(segmentWriteState)) {
try (KnnVectorsWriter writer = codec.knnVectorsFormat().fieldsWriter(segmentWriteState)) {
writer.merge(mergeState);
}
}

View File

@ -24,11 +24,11 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
@ -267,8 +267,8 @@ public final class SegmentReader extends CodecReader {
}
@Override
public VectorReader getVectorReader() {
return core.vectorReader;
public KnnVectorsReader getVectorReader() {
return core.knnVectorsReader;
}
@Override

View File

@ -22,11 +22,11 @@ import java.util.Collections;
import java.util.Iterator;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
@ -78,7 +78,7 @@ public final class SlowCodecReaderWrapper {
}
@Override
public VectorReader getVectorReader() {
public KnnVectorsReader getVectorReader() {
reader.ensureOpen();
return readerToVectorReader(reader);
}
@ -159,8 +159,8 @@ public final class SlowCodecReaderWrapper {
};
}
private static VectorReader readerToVectorReader(LeafReader reader) {
return new VectorReader() {
private static KnnVectorsReader readerToVectorReader(LeafReader reader) {
return new KnnVectorsReader() {
@Override
public VectorValues getVectorValues(String field) throws IOException {
return reader.getVectorValues(field);

View File

@ -26,11 +26,11 @@ import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
@ -301,9 +301,9 @@ public final class SortingCodecReader extends FilterCodecReader {
}
@Override
public VectorReader getVectorReader() {
VectorReader delegate = in.getVectorReader();
return new VectorReader() {
public KnnVectorsReader getVectorReader() {
KnnVectorsReader delegate = in.getVectorReader();
return new KnnVectorsReader() {
@Override
public void checkIntegrity() throws IOException {
delegate.checkIntegrity();

View File

@ -17,12 +17,13 @@
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
/**
* This class provides access to per-document floating point vector values indexed as {@link
* org.apache.lucene.document.VectorField}.
* KnnVectorField}.
*
* @lucene.experimental
*/

View File

@ -22,7 +22,7 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -103,16 +103,16 @@ class VectorValuesWriter {
*
* @param sortMap specifies the order of documents being flushed, or null if they are to be
* flushed in docid order
* @param vectorWriter the Codec's vector writer that handles the actual encoding and I/O
* @param knnVectorsWriter the Codec's vector writer that handles the actual encoding and I/O
* @throws IOException if there is an error writing the field and its values
*/
public void flush(Sorter.DocMap sortMap, VectorWriter vectorWriter) throws IOException {
public void flush(Sorter.DocMap sortMap, KnnVectorsWriter knnVectorsWriter) throws IOException {
VectorValues vectorValues =
new BufferedVectorValues(docsWithField, vectors, fieldInfo.getVectorDimension());
if (sortMap != null) {
vectorWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
knnVectorsWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
} else {
vectorWriter.writeField(fieldInfo, vectorValues);
knnVectorsWriter.writeField(fieldInfo, vectorValues);
}
}

View File

@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat

View File

@ -17,10 +17,10 @@
package org.apache.lucene.codecs.lucene90;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseVectorFormatTestCase;
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
import org.apache.lucene.util.TestUtil;
public class TestLucene90HnswVectorFormat extends BaseVectorFormatTestCase {
public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase {
@Override
protected Codec getCodec() {
return TestUtil.getDefaultCodec();

View File

@ -25,14 +25,14 @@ import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.index.BaseVectorFormatTestCase;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
@ -49,7 +49,7 @@ import org.apache.lucene.util.TestUtil;
import org.hamcrest.MatcherAssert;
/** Basic tests of PerFieldDocValuesFormat */
public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
private Codec codec;
@Override
@ -67,14 +67,14 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
try (Directory directory = newDirectory()) {
// we don't use RandomIndexWriter because it might add more values than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
WriteRecordingVectorFormat format1 =
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
WriteRecordingVectorFormat format2 =
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
WriteRecordingKnnVectorsFormat format1 =
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
WriteRecordingKnnVectorsFormat format2 =
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
iwc.setCodec(
new AssertingCodec() {
@Override
public VectorFormat getVectorFormatForField(String field) {
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
if ("field1".equals(field)) {
return format1;
} else {
@ -86,12 +86,12 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
try (IndexWriter iwriter = new IndexWriter(directory, iwc)) {
Document doc = new Document();
doc.add(newTextField("id", "1", Field.Store.YES));
doc.add(new VectorField("field1", new float[] {1, 2, 3}));
doc.add(new KnnVectorField("field1", new float[] {1, 2, 3}));
iwriter.addDocument(doc);
doc.clear();
doc.add(newTextField("id", "2", Field.Store.YES));
doc.add(new VectorField("field2", new float[] {4, 5, 6}));
doc.add(new KnnVectorField("field2", new float[] {4, 5, 6}));
iwriter.addDocument(doc);
}
@ -128,19 +128,19 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
for (int i = 0; i < 3; i++) {
Document doc = new Document();
doc.add(newTextField("id", "1", Field.Store.YES));
doc.add(new VectorField("field", new float[] {1, 2, 3}));
doc.add(new KnnVectorField("field", new float[] {1, 2, 3}));
iw.addDocument(doc);
iw.commit();
}
}
IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
WriteRecordingVectorFormat newFormat =
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
WriteRecordingKnnVectorsFormat newFormat =
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
newConfig.setCodec(
new AssertingCodec() {
@Override
public VectorFormat getVectorFormatForField(String field) {
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return newFormat;
}
});
@ -154,20 +154,20 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
}
}
private static class WriteRecordingVectorFormat extends VectorFormat {
private final VectorFormat delegate;
private static class WriteRecordingKnnVectorsFormat extends KnnVectorsFormat {
private final KnnVectorsFormat delegate;
private final Set<String> fieldsWritten;
public WriteRecordingVectorFormat(VectorFormat delegate) {
public WriteRecordingKnnVectorsFormat(KnnVectorsFormat delegate) {
super(delegate.getName());
this.delegate = delegate;
this.fieldsWritten = new HashSet<>();
}
@Override
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
VectorWriter writer = delegate.fieldsWriter(state);
return new VectorWriter() {
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
KnnVectorsWriter writer = delegate.fieldsWriter(state);
return new KnnVectorsWriter() {
@Override
public void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException {
fieldsWritten.add(fieldInfo.name);
@ -187,7 +187,7 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
}
@Override
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return delegate.fieldsReader(state);
}
}

View File

@ -87,21 +87,21 @@ public class TestPerFieldConsistency extends LuceneTestCase {
}
}
private static Field randomVectorField(Random random, String fieldName) {
private static Field randomKnnVectorField(Random random, String fieldName) {
VectorSimilarityFunction similarityFunction =
RandomPicks.randomFrom(random, VectorSimilarityFunction.values());
float[] values = new float[randomIntBetween(1, 10)];
for (int i = 0; i < values.length; i++) {
values[i] = randomFloat();
}
return new VectorField(fieldName, values, similarityFunction);
return new KnnVectorField(fieldName, values, similarityFunction);
}
private static Field[] randomFieldsWithTheSameName(String fieldName) {
final Field textField = randomIndexedField(random(), fieldName);
final Field docValuesField = randomDocValuesField(random(), fieldName);
final Field pointField = randomPointField(random(), fieldName);
final Field vectorField = randomVectorField(random(), fieldName);
final Field vectorField = randomKnnVectorField(random(), fieldName);
return new Field[] {textField, docValuesField, pointField, vectorField};
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
@ -40,7 +41,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeSource;
@ -381,6 +381,7 @@ public class TestDocumentWriter extends LuceneTestCase {
public void testRAMUsageVector() throws IOException {
doTestRAMUsage(
field ->
new VectorField(field, new float[] {1, 2, 3, 4}, VectorSimilarityFunction.EUCLIDEAN));
new KnnVectorField(
field, new float[] {1, 2, 3, 4}, VectorSimilarityFunction.EUCLIDEAN));
}
}

View File

@ -27,17 +27,17 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
@ -54,7 +54,7 @@ public class TestKnnGraph extends LuceneTestCase {
private static final String KNN_GRAPH_FIELD = "vector";
private static int maxConn = Lucene90HnswVectorFormat.DEFAULT_MAX_CONN;
private static int maxConn = Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
private Codec codec;
private VectorSimilarityFunction similarityFunction;
@ -69,9 +69,9 @@ public class TestKnnGraph extends LuceneTestCase {
codec =
new Lucene90Codec() {
@Override
public VectorFormat getVectorFormatForField(String field) {
return new Lucene90HnswVectorFormat(
maxConn, Lucene90HnswVectorFormat.DEFAULT_BEAM_WIDTH);
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new Lucene90HnswVectorsFormat(
maxConn, Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH);
}
};
@ -81,7 +81,7 @@ public class TestKnnGraph extends LuceneTestCase {
@After
public void cleanup() {
maxConn = Lucene90HnswVectorFormat.DEFAULT_MAX_CONN;
maxConn = Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
}
/** Basic test of creating documents in a graph */
@ -182,11 +182,11 @@ public class TestKnnGraph extends LuceneTestCase {
iw.forceMerge(1);
}
try (IndexReader reader = DirectoryReader.open(dir)) {
PerFieldVectorFormat.FieldsReader perFieldReader =
(PerFieldVectorFormat.FieldsReader)
PerFieldKnnVectorsFormat.FieldsReader perFieldReader =
(PerFieldKnnVectorsFormat.FieldsReader)
((CodecReader) getOnlyLeafReader(reader)).getVectorReader();
Lucene90HnswVectorReader vectorReader =
(Lucene90HnswVectorReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
Lucene90HnswVectorsReader vectorReader =
(Lucene90HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
graph = copyGraph(vectorReader.getGraphValues(KNN_GRAPH_FIELD));
}
}
@ -323,13 +323,13 @@ public class TestKnnGraph extends LuceneTestCase {
for (LeafReaderContext ctx : dr.leaves()) {
LeafReader reader = ctx.reader();
VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
PerFieldVectorFormat.FieldsReader perFieldReader =
(PerFieldVectorFormat.FieldsReader) ((CodecReader) reader).getVectorReader();
PerFieldKnnVectorsFormat.FieldsReader perFieldReader =
(PerFieldKnnVectorsFormat.FieldsReader) ((CodecReader) reader).getVectorReader();
if (perFieldReader == null) {
continue;
}
Lucene90HnswVectorReader vectorReader =
(Lucene90HnswVectorReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
Lucene90HnswVectorsReader vectorReader =
(Lucene90HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
KnnGraphValues graphValues = vectorReader.getGraphValues(KNN_GRAPH_FIELD);
assertEquals((vectorValues == null), (graphValues == null));
if (vectorValues == null) {
@ -458,8 +458,8 @@ public class TestKnnGraph extends LuceneTestCase {
throws IOException {
Document doc = new Document();
if (vector != null) {
FieldType fieldType = VectorField.createFieldType(vector.length, similarityFunction);
doc.add(new VectorField(KNN_GRAPH_FIELD, vector, fieldType));
FieldType fieldType = KnnVectorField.createFieldType(vector.length, similarityFunction);
doc.add(new KnnVectorField(KNN_GRAPH_FIELD, vector, fieldType));
}
String idString = Integer.toString(id);
doc.add(new StringField("id", idString, Field.Store.YES));

View File

@ -31,6 +31,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
@ -38,7 +39,6 @@ import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
@ -127,7 +127,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
doc.add(new BinaryDocValuesField("binary_dv", new BytesRef(Integer.toString(docId))));
doc.add(
new SortedSetDocValuesField("sorted_set_dv", new BytesRef(Integer.toString(docId))));
doc.add(new VectorField("vector", new float[] {(float) docId}));
doc.add(new KnnVectorField("vector", new float[] {(float) docId}));
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);

View File

@ -35,14 +35,14 @@ import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
@ -240,7 +240,7 @@ public class KnnGraphTester {
for (LeafReaderContext context : reader.leaves()) {
LeafReader leafReader = context.reader();
KnnGraphValues knnValues =
((Lucene90HnswVectorReader) ((CodecReader) leafReader).getVectorReader())
((Lucene90HnswVectorsReader) ((CodecReader) leafReader).getVectorReader())
.getGraphValues(KNN_FIELD);
System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
printGraphFanout(knnValues, leafReader.maxDoc());
@ -573,15 +573,15 @@ public class KnnGraphTester {
iwc.setCodec(
new Lucene90Codec() {
@Override
public VectorFormat getVectorFormatForField(String field) {
return new Lucene90HnswVectorFormat(maxConn, beamWidth);
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new Lucene90HnswVectorsFormat(maxConn, beamWidth);
}
});
// iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setRAMBufferSizeMB(1994d);
// iwc.setMaxBufferedDocs(10000);
FieldType fieldType = VectorField.createFieldType(dim, VectorSimilarityFunction.DOT_PRODUCT);
FieldType fieldType = KnnVectorField.createFieldType(dim, VectorSimilarityFunction.DOT_PRODUCT);
if (quiet == false) {
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
System.out.println("creating index in " + indexPath);
@ -606,7 +606,7 @@ public class KnnGraphTester {
vectors.get(vector);
Document doc = new Document();
// System.out.println("vector=" + vector[0] + "," + vector[1] + "...");
doc.add(new VectorField(KNN_FIELD, vector, fieldType));
doc.add(new KnnVectorField(KNN_FIELD, vector, fieldType));
doc.add(new StoredField(ID_FIELD, i));
iw.addDocument(doc);
}

View File

@ -24,14 +24,14 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
@ -80,8 +80,8 @@ public class TestHnsw extends LuceneTestCase {
.setCodec(
new Lucene90Codec() {
@Override
public VectorFormat getVectorFormatForField(String field) {
return new Lucene90HnswVectorFormat(maxConn, beamWidth);
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new Lucene90HnswVectorsFormat(maxConn, beamWidth);
}
});
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
@ -92,7 +92,7 @@ public class TestHnsw extends LuceneTestCase {
indexedDoc++;
}
Document doc = new Document();
doc.add(new VectorField("field", v2.vectorValue()));
doc.add(new KnnVectorField("field", v2.vectorValue()));
doc.add(new StoredField("id", v2.docID()));
iw.addDocument(doc);
nVec++;
@ -108,8 +108,8 @@ public class TestHnsw extends LuceneTestCase {
assertEquals(indexedDoc, ctx.reader().numDocs());
assertVectorsEqual(v3, values);
KnnGraphValues graphValues =
((Lucene90HnswVectorReader)
((PerFieldVectorFormat.FieldsReader)
((Lucene90HnswVectorsReader)
((PerFieldKnnVectorsFormat.FieldsReader)
((CodecReader) ctx.reader()).getVectorReader())
.getFieldReader("field"))
.getGraphValues("field");

View File

@ -18,16 +18,16 @@ package org.apache.lucene.codecs.asserting;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
import org.apache.lucene.util.TestUtil;
/** Acts like the default codec but with additional asserts. */
@ -62,11 +62,11 @@ public class AssertingCodec extends FilterCodec {
}
};
private final VectorFormat vectorFormat =
new PerFieldVectorFormat() {
private final KnnVectorsFormat knnVectorsFormat =
new PerFieldKnnVectorsFormat() {
@Override
public VectorFormat getVectorFormatForField(String field) {
return AssertingCodec.this.getVectorFormatForField(field);
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return AssertingCodec.this.getKnnVectorsFormatForField(field);
}
};
@ -77,7 +77,7 @@ public class AssertingCodec extends FilterCodec {
private final PostingsFormat defaultFormat = new AssertingPostingsFormat();
private final DocValuesFormat defaultDVFormat = new AssertingDocValuesFormat();
private final PointsFormat pointsFormat = new AssertingPointsFormat();
private final VectorFormat defaultVectorFormat = new AssertingVectorFormat();
private final KnnVectorsFormat defaultKnnVectorsFormat = new AssertingKnnVectorsFormat();
public AssertingCodec() {
super("Asserting", TestUtil.getDefaultCodec());
@ -119,8 +119,8 @@ public class AssertingCodec extends FilterCodec {
}
@Override
public VectorFormat vectorFormat() {
return vectorFormat;
public KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}
@Override
@ -152,7 +152,7 @@ public class AssertingCodec extends FilterCodec {
*
* <p>The default implementation always returns "Asserting"
*/
public VectorFormat getVectorFormatForField(String field) {
return defaultVectorFormat;
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}
}

View File

@ -18,9 +18,9 @@
package org.apache.lucene.codecs.asserting;
import java.io.IOException;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@ -28,29 +28,29 @@ import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.TestUtil;
/** Wraps the default VectorFormat and provides additional assertions. */
public class AssertingVectorFormat extends VectorFormat {
/** Wraps the default KnnVectorsFormat and provides additional assertions. */
public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
private final VectorFormat delegate = TestUtil.getDefaultVectorFormat();
private final KnnVectorsFormat delegate = TestUtil.getDefaultKnnVectorsFormat();
public AssertingVectorFormat() {
public AssertingKnnVectorsFormat() {
super("Asserting");
}
@Override
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new AssertingVectorWriter(delegate.fieldsWriter(state));
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new AssertingKnnVectorsWriter(delegate.fieldsWriter(state));
}
@Override
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
return new AssertingVectorReader(delegate.fieldsReader(state));
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new AssertingKnnVectorsReader(delegate.fieldsReader(state));
}
static class AssertingVectorWriter extends VectorWriter {
final VectorWriter delegate;
static class AssertingKnnVectorsWriter extends KnnVectorsWriter {
final KnnVectorsWriter delegate;
AssertingVectorWriter(VectorWriter delegate) {
AssertingKnnVectorsWriter(KnnVectorsWriter delegate) {
assert delegate != null;
this.delegate = delegate;
}
@ -73,10 +73,10 @@ public class AssertingVectorFormat extends VectorFormat {
}
}
static class AssertingVectorReader extends VectorReader {
final VectorReader delegate;
static class AssertingKnnVectorsReader extends KnnVectorsReader {
final KnnVectorsReader delegate;
AssertingVectorReader(VectorReader delegate) {
AssertingKnnVectorsReader(KnnVectorsReader delegate) {
assert delegate != null;
this.delegate = delegate;
}

View File

@ -22,13 +22,13 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
@ -39,44 +39,45 @@ import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.VectorUtil;
/**
* Base class aiming at testing {@link VectorFormat vectors formats}. To test a new format, all you
* need is to register a new {@link Codec} which uses it and extend this class and override {@link
* #getCodec()}.
* Base class aiming at testing {@link KnnVectorsFormat vectors formats}. To test a new format, all
* you need is to register a new {@link Codec} which uses it and extend this class and override
* {@link #getCodec()}.
*
* @lucene.experimental
*/
public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCase {
public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTestCase {
@Override
protected void addRandomFields(Document doc) {
doc.add(new VectorField("v2", randomVector(30), VectorSimilarityFunction.EUCLIDEAN));
doc.add(new KnnVectorField("v2", randomVector(30), VectorSimilarityFunction.EUCLIDEAN));
}
public void testFieldConstructor() {
float[] v = new float[1];
VectorField field = new VectorField("f", v);
KnnVectorField field = new KnnVectorField("f", v);
assertEquals(1, field.fieldType().vectorDimension());
assertEquals(VectorSimilarityFunction.EUCLIDEAN, field.fieldType().vectorSimilarityFunction());
assertSame(v, field.vectorValue());
}
public void testFieldConstructorExceptions() {
expectThrows(IllegalArgumentException.class, () -> new VectorField(null, new float[1]));
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", null));
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField(null, new float[1]));
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField("f", null));
expectThrows(
IllegalArgumentException.class,
() -> new VectorField("f", new float[1], (VectorSimilarityFunction) null));
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", new float[0]));
() -> new KnnVectorField("f", new float[1], (VectorSimilarityFunction) null));
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField("f", new float[0]));
expectThrows(
IllegalArgumentException.class,
() -> new VectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1]));
() -> new KnnVectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1]));
expectThrows(
IllegalArgumentException.class,
() -> new VectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1], (FieldType) null));
() ->
new KnnVectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1], (FieldType) null));
}
public void testFieldSetValue() {
VectorField field = new VectorField("f", new float[1]);
KnnVectorField field = new KnnVectorField("f", new float[1]);
float[] v1 = new float[1];
field.setVectorValue(v1);
assertSame(v1, field.vectorValue());
@ -90,11 +91,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
doc2.add(new KnnVectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
@ -106,12 +107,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.commit();
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
doc2.add(new KnnVectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
@ -126,11 +127,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
@ -142,12 +143,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.commit();
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
@ -161,13 +162,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.DOT_PRODUCT));
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
assertEquals(
@ -182,13 +183,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
assertEquals(
@ -202,7 +203,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
public void testAddIndexesDirectory0() throws Exception {
String fieldName = "field";
Document doc = new Document();
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
try (Directory dir = newDirectory();
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
@ -230,7 +231,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
w.addDocument(doc);
}
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
w2.addDocument(doc);
w2.addIndexes(dir);
@ -250,7 +251,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
String fieldName = "field";
float[] vector = new float[1];
Document doc = new Document();
doc.add(new VectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT));
try (Directory dir = newDirectory();
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
@ -281,12 +282,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
w2.addDocument(doc);
IllegalArgumentException expected =
expectThrows(
@ -304,12 +305,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
w2.addDocument(doc);
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir));
@ -326,12 +327,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
@ -352,12 +353,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
@ -378,12 +379,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
@ -402,12 +403,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
@ -425,8 +426,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertEquals(
@ -443,13 +444,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
IllegalArgumentException.class,
() ->
doc.add(
new VectorField(
new KnnVectorField(
"f",
new float[VectorValues.MAX_DIMENSIONS + 1],
VectorSimilarityFunction.DOT_PRODUCT)));
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
w.addDocument(doc2);
}
}
@ -462,11 +463,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
expectThrows(
IllegalArgumentException.class,
() ->
doc.add(new VectorField("f", new float[0], VectorSimilarityFunction.EUCLIDEAN)));
doc.add(
new KnnVectorField("f", new float[0], VectorSimilarityFunction.EUCLIDEAN)));
assertEquals("cannot index an empty vector", e.getMessage());
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
w.addDocument(doc2);
}
}
@ -476,14 +478,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(Codec.forName("SimpleText"));
try (IndexWriter w = new IndexWriter(dir, iwc)) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.forceMerge(1);
}
@ -497,20 +499,21 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, iwc)) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.forceMerge(1);
}
}
}
public void testInvalidVectorFieldUsage() {
VectorField field = new VectorField("field", new float[2], VectorSimilarityFunction.EUCLIDEAN);
public void testInvalidKnnVectorFieldUsage() {
KnnVectorField field =
new KnnVectorField("field", new float[2], VectorSimilarityFunction.EUCLIDEAN);
expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14));
@ -524,7 +527,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
doc.add(new VectorField("v", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
doc.add(new KnnVectorField("v", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.addDocument(new Document());
w.commit();
@ -544,17 +547,19 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
}
}
public void testVectorFieldMissingFromOneSegment() throws Exception {
public void testKnnVectorFieldMissingFromOneSegment() throws Exception {
try (Directory dir = FSDirectory.open(createTempDir());
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
doc.add(new VectorField("v0", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
doc.add(
new KnnVectorField("v0", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new VectorField("v1", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
doc.add(
new KnnVectorField("v1", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.forceMerge(1);
}
@ -581,7 +586,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
String fieldName = "int" + field;
if (random().nextInt(100) == 17) {
float[] v = randomVector(fieldDims[field]);
doc.add(new VectorField(fieldName, v, fieldSearchStrategies[field]));
doc.add(new KnnVectorField(fieldName, v, fieldSearchStrategies[field]));
fieldDocCounts[field]++;
fieldTotals[field] += v[0];
}
@ -618,15 +623,15 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc1 = new Document();
doc1.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
doc1.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
v[0] = 1;
Document doc2 = new Document();
doc2.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
doc2.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
iw.addDocument(doc1);
iw.addDocument(doc2);
v[0] = 2;
Document doc3 = new Document();
doc3.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
doc3.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
iw.addDocument(doc3);
iw.forceMerge(1);
try (IndexReader reader = iw.getReader()) {
@ -676,19 +681,21 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
}
}
public void testIndexMultipleVectorFields() throws Exception {
public void testIndexMultipleKnnVectorFields() throws Exception {
try (Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
float[] v = new float[] {1};
doc.add(new VectorField("field1", v, VectorSimilarityFunction.EUCLIDEAN));
doc.add(new VectorField("field2", new float[] {1, 2, 3}, VectorSimilarityFunction.EUCLIDEAN));
doc.add(new KnnVectorField("field1", v, VectorSimilarityFunction.EUCLIDEAN));
doc.add(
new KnnVectorField("field2", new float[] {1, 2, 3}, VectorSimilarityFunction.EUCLIDEAN));
iw.addDocument(doc);
v[0] = 2;
iw.addDocument(doc);
doc = new Document();
doc.add(
new VectorField("field3", new float[] {1, 2, 3}, VectorSimilarityFunction.DOT_PRODUCT));
new KnnVectorField(
"field3", new float[] {1, 2, 3}, VectorSimilarityFunction.DOT_PRODUCT));
iw.addDocument(doc);
iw.forceMerge(1);
try (IndexReader reader = iw.getReader()) {
@ -879,7 +886,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
throws IOException {
Document doc = new Document();
if (vector != null) {
doc.add(new VectorField(field, vector, similarityFunction));
doc.add(new KnnVectorField(field, vector, similarityFunction));
}
doc.add(new NumericDocValuesField("sortkey", sortkey));
String idString = Integer.toString(id);
@ -901,10 +908,10 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("v1", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
doc.add(new KnnVectorField("v1", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
w.addDocument(doc);
doc.add(new VectorField("v2", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
doc.add(new KnnVectorField("v2", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
w.addDocument(doc);
}
@ -915,7 +922,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
// total 3 vector values were indexed:
assertEquals(3, segStatus.vectorValuesStatus.totalVectorValues);
// ... across 2 fields:
assertEquals(2, segStatus.vectorValuesStatus.totalVectorFields);
assertEquals(2, segStatus.vectorValuesStatus.totalKnnVectorFields);
// Make sure CheckIndex in fact declares that it is testing vectors!
assertTrue(output.toString(IOUtils.UTF_8).contains("test: vectors..."));
@ -939,7 +946,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Document doc = new Document();
// randomly add a vector field
if (random().nextInt(4) == 3) {
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.EUCLIDEAN));
doc.add(
new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.EUCLIDEAN));
}
w.addDocument(doc);
}

View File

@ -49,14 +49,14 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.blockterms.LuceneFixedGap;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@ -1297,10 +1297,11 @@ public final class TestUtil {
}
/**
* Returns the actual default vector format (e.g. LuceneMNVectorFormat for this version of Lucene.
* Returns the actual default vector format (e.g. LuceneMNKnnVectorsFormat for this version of
* Lucene.
*/
public static VectorFormat getDefaultVectorFormat() {
return new Lucene90HnswVectorFormat();
public static KnnVectorsFormat getDefaultKnnVectorsFormat() {
return new Lucene90HnswVectorsFormat();
}
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {

View File

@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.codecs.asserting.AssertingVectorFormat
org.apache.lucene.codecs.asserting.AssertingKnnVectorsFormat