mirror of https://github.com/apache/lucene.git
LUCENE-9855: Rename knn search vector format (#218)
This commit is contained in:
parent
ad7746d6e3
commit
df807dbe8f
|
@ -7,9 +7,9 @@ http://s.apache.org/luceneversions
|
|||
|
||||
New Features
|
||||
|
||||
* LUCENE-9322: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida)
|
||||
* LUCENE-9322 LUCENE-9855: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida)
|
||||
|
||||
* LUCENE-9004: Approximate nearest vector search via NSW graphs
|
||||
* LUCENE-9004: Approximate nearest vector search via NSW graphs (Mike Sokolov, Tomoko Uchida et al.)
|
||||
|
||||
* LUCENE-9659: SpanPayloadCheckQuery now supports inequalities. (Kevin Watters, Gus Heck)
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
|
@ -35,7 +36,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
|
@ -122,8 +122,8 @@ public class Lucene70Codec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorFormat vectorFormat() {
|
||||
return VectorFormat.EMPTY;
|
||||
public KnnVectorsFormat knnVectorsFormat() {
|
||||
return KnnVectorsFormat.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
|
@ -34,7 +35,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
|
@ -129,7 +129,7 @@ public class Lucene80Codec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final VectorFormat vectorFormat() {
|
||||
return VectorFormat.EMPTY;
|
||||
public final KnnVectorsFormat knnVectorsFormat() {
|
||||
return KnnVectorsFormat.EMPTY;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
|
@ -38,7 +39,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
|
@ -134,8 +134,8 @@ public class Lucene84Codec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorFormat vectorFormat() {
|
||||
return VectorFormat.EMPTY;
|
||||
public KnnVectorsFormat knnVectorsFormat() {
|
||||
return KnnVectorsFormat.EMPTY;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
|
@ -37,7 +38,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
|
@ -133,8 +133,8 @@ public class Lucene86Codec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final VectorFormat vectorFormat() {
|
||||
return VectorFormat.EMPTY;
|
||||
public final KnnVectorsFormat knnVectorsFormat() {
|
||||
return KnnVectorsFormat.EMPTY;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
|
@ -39,7 +40,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
|
@ -157,8 +157,8 @@ public class Lucene87Codec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final VectorFormat vectorFormat() {
|
||||
return VectorFormat.EMPTY;
|
||||
public final KnnVectorsFormat knnVectorsFormat() {
|
||||
return KnnVectorsFormat.EMPTY;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,6 +20,7 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
|
@ -27,7 +28,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
|
||||
/**
|
||||
* plain text index format.
|
||||
|
@ -47,7 +47,7 @@ public final class SimpleTextCodec extends Codec {
|
|||
private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
|
||||
private final CompoundFormat compoundFormat = new SimpleTextCompoundFormat();
|
||||
private final PointsFormat pointsFormat = new SimpleTextPointsFormat();
|
||||
private final VectorFormat vectorFormat = new SimpleTextVectorFormat();
|
||||
private final KnnVectorsFormat knnVectorsFormat = new SimpleTextKnnVectorsFormat();
|
||||
|
||||
public SimpleTextCodec() {
|
||||
super("SimpleText");
|
||||
|
@ -104,7 +104,7 @@ public final class SimpleTextCodec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorFormat vectorFormat() {
|
||||
return vectorFormat;
|
||||
public KnnVectorsFormat knnVectorsFormat() {
|
||||
return knnVectorsFormat;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,9 +17,9 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
|
@ -31,20 +31,20 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class SimpleTextVectorFormat extends VectorFormat {
|
||||
public final class SimpleTextKnnVectorsFormat extends KnnVectorsFormat {
|
||||
|
||||
public SimpleTextVectorFormat() {
|
||||
super("SimpleTextVectorFormat");
|
||||
public SimpleTextKnnVectorsFormat() {
|
||||
super("SimpleTextKnnVectorsFormat");
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new SimpleTextVectorWriter(state);
|
||||
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new SimpleTextKnnVectorsWriter(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new SimpleTextVectorReader(state);
|
||||
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new SimpleTextKnnVectorsReader(state);
|
||||
}
|
||||
|
||||
/** Extension of vectors data file */
|
|
@ -17,14 +17,14 @@
|
|||
|
||||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextVectorWriter.*;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextKnnVectorsWriter.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -49,10 +49,10 @@ import org.apache.lucene.util.StringHelper;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextVectorReader extends VectorReader {
|
||||
public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
|
||||
// shallowSizeOfInstance for fieldEntries map is included in ramBytesUsed() calculation
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextVectorReader.class)
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextKnnVectorsReader.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
|
||||
|
||||
private static final BytesRef EMPTY = new BytesRef("");
|
||||
|
@ -62,18 +62,18 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
private final Map<String, FieldEntry> fieldEntries = new HashMap<>();
|
||||
|
||||
SimpleTextVectorReader(SegmentReadState readState) throws IOException {
|
||||
SimpleTextKnnVectorsReader(SegmentReadState readState) throws IOException {
|
||||
this.readState = readState;
|
||||
String metaFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
readState.segmentInfo.name,
|
||||
readState.segmentSuffix,
|
||||
SimpleTextVectorFormat.META_EXTENSION);
|
||||
SimpleTextKnnVectorsFormat.META_EXTENSION);
|
||||
String vectorFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
readState.segmentInfo.name,
|
||||
readState.segmentSuffix,
|
||||
SimpleTextVectorFormat.VECTOR_EXTENSION);
|
||||
SimpleTextKnnVectorsFormat.VECTOR_EXTENSION);
|
||||
|
||||
boolean success = false;
|
||||
try (ChecksumIndexInput in =
|
||||
|
@ -110,7 +110,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
FieldInfo info = readState.fieldInfos.fieldInfo(field);
|
||||
if (info == null) {
|
||||
// mirror the handling in Lucene90VectorReader#getVectorValues
|
||||
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
|
||||
// needed to pass TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs
|
||||
return null;
|
||||
}
|
||||
int dimension = info.getVectorDimension();
|
||||
|
@ -120,7 +120,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
FieldEntry fieldEntry = fieldEntries.get(field);
|
||||
if (fieldEntry == null) {
|
||||
// mirror the handling in Lucene90VectorReader#getVectorValues
|
||||
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
|
||||
// needed to pass TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs
|
||||
return null;
|
||||
}
|
||||
if (dimension != fieldEntry.dimension) {
|
||||
|
@ -153,7 +153,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
|
||||
|
||||
// when there's no actual vector data written (e.g. tested in
|
||||
// TestSimpleTextVectorFormat#testDeleteAllVectorDocs)
|
||||
// TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs)
|
||||
// the first line in dataInput will be, checksum 00000000000000000000
|
||||
if (footerStartPos == 0) {
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
|
@ -271,7 +271,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
} else if (curOrd >= entry.size()) {
|
||||
// when call to advance / nextDoc below already returns NO_MORE_DOCS, calling docID
|
||||
// immediately afterward should also return NO_MORE_DOCS
|
||||
// this is needed for TestSimpleTextVectorFormat.testAdvance test case
|
||||
// this is needed for TestSimpleTextKnnVectorsFormat.testAdvance test case
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
|
@ -23,7 +23,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -34,7 +34,7 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** Writes vector-valued fields in a plain text format */
|
||||
public class SimpleTextVectorWriter extends VectorWriter {
|
||||
public class SimpleTextKnnVectorsWriter extends KnnVectorsWriter {
|
||||
|
||||
static final BytesRef FIELD_NUMBER = new BytesRef("field-number ");
|
||||
static final BytesRef FIELD_NAME = new BytesRef("field-name ");
|
||||
|
@ -46,20 +46,24 @@ public class SimpleTextVectorWriter extends VectorWriter {
|
|||
private final IndexOutput meta, vectorData;
|
||||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
||||
SimpleTextVectorWriter(SegmentWriteState state) throws IOException {
|
||||
SimpleTextKnnVectorsWriter(SegmentWriteState state) throws IOException {
|
||||
assert state.fieldInfos.hasVectorValues();
|
||||
|
||||
boolean success = false;
|
||||
// exception handling to pass TestSimpleTextVectorFormat#testRandomExceptions
|
||||
// exception handling to pass TestSimpleTextKnnVectorsFormat#testRandomExceptions
|
||||
try {
|
||||
String metaFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.META_EXTENSION);
|
||||
state.segmentInfo.name,
|
||||
state.segmentSuffix,
|
||||
SimpleTextKnnVectorsFormat.META_EXTENSION);
|
||||
meta = state.directory.createOutput(metaFileName, state.context);
|
||||
|
||||
String vectorDataFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.VECTOR_EXTENSION);
|
||||
state.segmentInfo.name,
|
||||
state.segmentSuffix,
|
||||
SimpleTextKnnVectorsFormat.VECTOR_EXTENSION);
|
||||
vectorData = state.directory.createOutput(vectorDataFileName, state.context);
|
||||
success = true;
|
||||
} finally {
|
|
@ -17,9 +17,9 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
||||
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
|
||||
|
||||
public class TestSimpleTextVectorFormat extends BaseVectorFormatTestCase {
|
||||
public class TestSimpleTextKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return new SimpleTextCodec();
|
|
@ -111,7 +111,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
|
|||
public abstract PointsFormat pointsFormat();
|
||||
|
||||
/** Encodes/decodes numeric vector fields */
|
||||
public abstract VectorFormat vectorFormat();
|
||||
public abstract KnnVectorsFormat knnVectorsFormat();
|
||||
|
||||
/** looks up a codec by name */
|
||||
public static Codec forName(String name) {
|
||||
|
|
|
@ -108,7 +108,7 @@ public abstract class FilterCodec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorFormat vectorFormat() {
|
||||
return delegate.vectorFormat();
|
||||
public KnnVectorsFormat knnVectorsFormat() {
|
||||
return delegate.knnVectorsFormat();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,23 +29,23 @@ import org.apache.lucene.util.NamedSPILoader;
|
|||
* Encodes/decodes per-document vector and any associated indexing structures required to support
|
||||
* nearest-neighbor search
|
||||
*/
|
||||
public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
|
||||
public abstract class KnnVectorsFormat implements NamedSPILoader.NamedSPI {
|
||||
|
||||
/**
|
||||
* This static holder class prevents classloading deadlock by delaying init of doc values formats
|
||||
* until needed.
|
||||
*/
|
||||
private static final class Holder {
|
||||
private static final NamedSPILoader<VectorFormat> LOADER =
|
||||
new NamedSPILoader<>(VectorFormat.class);
|
||||
private static final NamedSPILoader<KnnVectorsFormat> LOADER =
|
||||
new NamedSPILoader<>(KnnVectorsFormat.class);
|
||||
|
||||
private Holder() {}
|
||||
|
||||
static NamedSPILoader<VectorFormat> getLoader() {
|
||||
static NamedSPILoader<KnnVectorsFormat> getLoader() {
|
||||
if (LOADER == null) {
|
||||
throw new IllegalStateException(
|
||||
"You tried to lookup a VectorFormat name before all formats could be initialized. "
|
||||
+ "This likely happens if you call VectorFormat#forName from a VectorFormat's ctor.");
|
||||
"You tried to lookup a KnnVectorsFormat name before all formats could be initialized. "
|
||||
+ "This likely happens if you call KnnVectorsFormat#forName from a KnnVectorsFormat's ctor.");
|
||||
}
|
||||
return LOADER;
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
|
|||
private final String name;
|
||||
|
||||
/** Sole constructor */
|
||||
protected VectorFormat(String name) {
|
||||
protected KnnVectorsFormat(String name) {
|
||||
NamedSPILoader.checkServiceName(name);
|
||||
this.name = name;
|
||||
}
|
||||
|
@ -65,31 +65,31 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
|
|||
}
|
||||
|
||||
/** looks up a format by name */
|
||||
public static VectorFormat forName(String name) {
|
||||
public static KnnVectorsFormat forName(String name) {
|
||||
return Holder.getLoader().lookup(name);
|
||||
}
|
||||
|
||||
/** Returns a {@link VectorWriter} to write the vectors to the index. */
|
||||
public abstract VectorWriter fieldsWriter(SegmentWriteState state) throws IOException;
|
||||
/** Returns a {@link KnnVectorsWriter} to write the vectors to the index. */
|
||||
public abstract KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException;
|
||||
|
||||
/** Returns a {@link VectorReader} to read the vectors from the index. */
|
||||
public abstract VectorReader fieldsReader(SegmentReadState state) throws IOException;
|
||||
/** Returns a {@link KnnVectorsReader} to read the vectors from the index. */
|
||||
public abstract KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException;
|
||||
|
||||
/**
|
||||
* EMPTY throws an exception when written. It acts as a sentinel indicating a Codec that does not
|
||||
* support vectors.
|
||||
*/
|
||||
public static final VectorFormat EMPTY =
|
||||
new VectorFormat("EMPTY") {
|
||||
public static final KnnVectorsFormat EMPTY =
|
||||
new KnnVectorsFormat("EMPTY") {
|
||||
@Override
|
||||
public VectorWriter fieldsWriter(SegmentWriteState state) {
|
||||
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene90");
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorReader fieldsReader(SegmentReadState state) {
|
||||
return new VectorReader() {
|
||||
public KnnVectorsReader fieldsReader(SegmentReadState state) {
|
||||
return new KnnVectorsReader() {
|
||||
@Override
|
||||
public void checkIntegrity() {}
|
||||
|
|
@ -24,10 +24,10 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.util.Accountable;
|
||||
|
||||
/** Reads vectors from an index. */
|
||||
public abstract class VectorReader implements Closeable, Accountable {
|
||||
public abstract class KnnVectorsReader implements Closeable, Accountable {
|
||||
|
||||
/** Sole constructor */
|
||||
protected VectorReader() {}
|
||||
protected KnnVectorsReader() {}
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
|
@ -61,7 +61,7 @@ public abstract class VectorReader implements Closeable, Accountable {
|
|||
*
|
||||
* <p>The default implementation returns {@code this}
|
||||
*/
|
||||
public VectorReader getMergeInstance() {
|
||||
public KnnVectorsReader getMergeInstance() {
|
||||
return this;
|
||||
}
|
||||
}
|
|
@ -34,10 +34,10 @@ import org.apache.lucene.index.VectorValues;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Writes vectors to an index. */
|
||||
public abstract class VectorWriter implements Closeable {
|
||||
public abstract class KnnVectorsWriter implements Closeable {
|
||||
|
||||
/** Sole constructor */
|
||||
protected VectorWriter() {}
|
||||
protected KnnVectorsWriter() {}
|
||||
|
||||
/** Write all values contained in the provided reader */
|
||||
public abstract void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException;
|
||||
|
@ -48,7 +48,7 @@ public abstract class VectorWriter implements Closeable {
|
|||
/** Merge the vector values from multiple segments, for all fields */
|
||||
public void merge(MergeState mergeState) throws IOException {
|
||||
for (int i = 0; i < mergeState.fieldInfos.length; i++) {
|
||||
VectorReader reader = mergeState.vectorReaders[i];
|
||||
KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
|
||||
assert reader != null || mergeState.fieldInfos[i].hasVectorValues() == false;
|
||||
if (reader != null) {
|
||||
reader.checkIntegrity();
|
||||
|
@ -71,9 +71,9 @@ public abstract class VectorWriter implements Closeable {
|
|||
int dimension = -1;
|
||||
VectorSimilarityFunction similarityFunction = null;
|
||||
int nonEmptySegmentIndex = 0;
|
||||
for (int i = 0; i < mergeState.vectorReaders.length; i++) {
|
||||
VectorReader vectorReader = mergeState.vectorReaders[i];
|
||||
if (vectorReader != null) {
|
||||
for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
|
||||
KnnVectorsReader knnVectorsReader = mergeState.knnVectorsReaders[i];
|
||||
if (knnVectorsReader != null) {
|
||||
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
|
||||
int segmentDimension = mergeFieldInfo.getVectorDimension();
|
||||
VectorSimilarityFunction segmentSimilarityFunction =
|
||||
|
@ -98,7 +98,7 @@ public abstract class VectorWriter implements Closeable {
|
|||
+ "!="
|
||||
+ segmentSimilarityFunction);
|
||||
}
|
||||
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
|
||||
VectorValues values = knnVectorsReader.getVectorValues(mergeFieldInfo.name);
|
||||
if (values != null) {
|
||||
subs.add(new VectorValuesSub(nonEmptySegmentIndex++, mergeState.docMaps[i], values));
|
||||
}
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
|
@ -29,10 +30,9 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
||||
|
||||
/**
|
||||
* Implements the Lucene 9.0 index format
|
||||
|
@ -83,12 +83,12 @@ public class Lucene90Codec extends Codec {
|
|||
}
|
||||
};
|
||||
|
||||
private final VectorFormat defaultVectorFormat;
|
||||
private final VectorFormat vectorFormat =
|
||||
new PerFieldVectorFormat() {
|
||||
private final KnnVectorsFormat defaultKnnVectorsFormat;
|
||||
private final KnnVectorsFormat knnVectorsFormat =
|
||||
new PerFieldKnnVectorsFormat() {
|
||||
@Override
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
return Lucene90Codec.this.getVectorFormatForField(field);
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return Lucene90Codec.this.getKnnVectorsFormatForField(field);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -110,7 +110,7 @@ public class Lucene90Codec extends Codec {
|
|||
new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
|
||||
this.defaultPostingsFormat = new Lucene90PostingsFormat();
|
||||
this.defaultDVFormat = new Lucene90DocValuesFormat();
|
||||
this.defaultVectorFormat = new Lucene90HnswVectorFormat();
|
||||
this.defaultKnnVectorsFormat = new Lucene90HnswVectorsFormat();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -154,8 +154,8 @@ public class Lucene90Codec extends Codec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final VectorFormat vectorFormat() {
|
||||
return vectorFormat;
|
||||
public final KnnVectorsFormat knnVectorsFormat() {
|
||||
return knnVectorsFormat;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -191,8 +191,8 @@ public class Lucene90Codec extends Codec {
|
|||
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
|
||||
* future version of Lucene are only guaranteed to be able to read the default implementation.
|
||||
*/
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
return defaultVectorFormat;
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return defaultKnnVectorsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
package org.apache.lucene.codecs.lucene90;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.hnsw.HnswGraph;
|
||||
|
@ -65,11 +65,11 @@ import org.apache.lucene.util.hnsw.HnswGraph;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene90HnswVectorFormat extends VectorFormat {
|
||||
public final class Lucene90HnswVectorsFormat extends KnnVectorsFormat {
|
||||
|
||||
static final String META_CODEC_NAME = "Lucene90HnswVectorFormatMeta";
|
||||
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorFormatData";
|
||||
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorFormatIndex";
|
||||
static final String META_CODEC_NAME = "Lucene90HnswVectorsFormatMeta";
|
||||
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorsFormatData";
|
||||
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorsFormatIndex";
|
||||
static final String META_EXTENSION = "vem";
|
||||
static final String VECTOR_DATA_EXTENSION = "vec";
|
||||
static final String VECTOR_INDEX_EXTENSION = "vex";
|
||||
|
@ -82,36 +82,34 @@ public final class Lucene90HnswVectorFormat extends VectorFormat {
|
|||
|
||||
/**
|
||||
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
|
||||
* {@link Lucene90HnswVectorFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
|
||||
* {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
|
||||
*/
|
||||
private final int maxConn;
|
||||
|
||||
/**
|
||||
* The number of candidate neighbors to track while searching the graph for each newly inserted
|
||||
* node. Defaults to to {@link Lucene90HnswVectorFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph}
|
||||
* for details.
|
||||
* node. Defaults to to {@link Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
|
||||
* HnswGraph} for details.
|
||||
*/
|
||||
private final int beamWidth;
|
||||
|
||||
public Lucene90HnswVectorFormat() {
|
||||
super("Lucene90HnswVectorFormat");
|
||||
this.maxConn = DEFAULT_MAX_CONN;
|
||||
this.beamWidth = DEFAULT_BEAM_WIDTH;
|
||||
public Lucene90HnswVectorsFormat() {
|
||||
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
|
||||
}
|
||||
|
||||
public Lucene90HnswVectorFormat(int maxConn, int beamWidth) {
|
||||
super("Lucene90HnswVectorFormat");
|
||||
public Lucene90HnswVectorsFormat(int maxConn, int beamWidth) {
|
||||
super("Lucene90HnswVectorsFormat");
|
||||
this.maxConn = maxConn;
|
||||
this.beamWidth = beamWidth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new Lucene90HnswVectorWriter(state, maxConn, beamWidth);
|
||||
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new Lucene90HnswVectorsWriter(state, maxConn, beamWidth);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new Lucene90HnswVectorReader(state);
|
||||
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new Lucene90HnswVectorsReader(state);
|
||||
}
|
||||
}
|
|
@ -26,7 +26,7 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -54,7 +54,7 @@ import org.apache.lucene.util.hnsw.NeighborQueue;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene90HnswVectorReader extends VectorReader {
|
||||
public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
private final Map<String, FieldEntry> fields = new HashMap<>();
|
||||
|
@ -62,10 +62,10 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
|||
private final IndexInput vectorIndex;
|
||||
private final long checksumSeed;
|
||||
|
||||
Lucene90HnswVectorReader(SegmentReadState state) throws IOException {
|
||||
Lucene90HnswVectorsReader(SegmentReadState state) throws IOException {
|
||||
this.fieldInfos = state.fieldInfos;
|
||||
|
||||
int versionMeta = readMetadata(state, Lucene90HnswVectorFormat.META_EXTENSION);
|
||||
int versionMeta = readMetadata(state, Lucene90HnswVectorsFormat.META_EXTENSION);
|
||||
long[] checksumRef = new long[1];
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -73,15 +73,15 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
|||
openDataInput(
|
||||
state,
|
||||
versionMeta,
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION,
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
|
||||
Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION,
|
||||
Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
|
||||
checksumRef);
|
||||
vectorIndex =
|
||||
openDataInput(
|
||||
state,
|
||||
versionMeta,
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION,
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION,
|
||||
Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
checksumRef);
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -102,9 +102,9 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
|||
versionMeta =
|
||||
CodecUtil.checkIndexHeader(
|
||||
meta,
|
||||
Lucene90HnswVectorFormat.META_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_START,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorsFormat.META_CODEC_NAME,
|
||||
Lucene90HnswVectorsFormat.VERSION_START,
|
||||
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
readFields(meta, state.fieldInfos);
|
||||
|
@ -131,8 +131,8 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
|||
CodecUtil.checkIndexHeader(
|
||||
in,
|
||||
codecName,
|
||||
Lucene90HnswVectorFormat.VERSION_START,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorsFormat.VERSION_START,
|
||||
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
if (versionMeta != versionVectorData) {
|
||||
|
@ -205,7 +205,7 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorReader.class);
|
||||
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorsReader.class);
|
||||
totalBytes +=
|
||||
RamUsageEstimator.sizeOfMap(
|
||||
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
|
|
@ -22,7 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
|
||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.hnsw.NeighborArray;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene90HnswVectorWriter extends VectorWriter {
|
||||
public final class Lucene90HnswVectorsWriter extends KnnVectorsWriter {
|
||||
|
||||
private final SegmentWriteState segmentWriteState;
|
||||
private final IndexOutput meta, vectorData, vectorIndex;
|
||||
|
@ -50,7 +50,8 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
|
|||
private final int beamWidth;
|
||||
private boolean finished;
|
||||
|
||||
Lucene90HnswVectorWriter(SegmentWriteState state, int maxConn, int beamWidth) throws IOException {
|
||||
Lucene90HnswVectorsWriter(SegmentWriteState state, int maxConn, int beamWidth)
|
||||
throws IOException {
|
||||
this.maxConn = maxConn;
|
||||
this.beamWidth = beamWidth;
|
||||
|
||||
|
@ -59,19 +60,19 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
|
|||
|
||||
String metaFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorFormat.META_EXTENSION);
|
||||
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorsFormat.META_EXTENSION);
|
||||
|
||||
String vectorDataFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name,
|
||||
state.segmentSuffix,
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION);
|
||||
Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION);
|
||||
|
||||
String indexDataFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name,
|
||||
state.segmentSuffix,
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION);
|
||||
Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -81,20 +82,20 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
|
|||
|
||||
CodecUtil.writeIndexHeader(
|
||||
meta,
|
||||
Lucene90HnswVectorFormat.META_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorsFormat.META_CODEC_NAME,
|
||||
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
CodecUtil.writeIndexHeader(
|
||||
vectorData,
|
||||
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
|
||||
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
CodecUtil.writeIndexHeader(
|
||||
vectorIndex,
|
||||
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
||||
Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
|
||||
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
success = true;
|
|
@ -180,7 +180,7 @@
|
|||
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
|
||||
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
|
||||
* intersection (2D, 3D).
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}. The
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat Vector values}. The
|
||||
* vector format stores numeric vectors in a format optimized for random access and
|
||||
* computation, supporting high-dimensional nearest-neighbor search.
|
||||
* </ul>
|
||||
|
@ -310,7 +310,7 @@
|
|||
* <td>Holds indexed points</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}</td>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat Vector values}</td>
|
||||
* <td>.vec, .vem</td>
|
||||
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
|
||||
* <code>.vem</code> the vector metadata</td>
|
||||
|
|
|
@ -23,9 +23,9 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.ServiceLoader;
|
||||
import java.util.TreeMap;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -50,30 +50,30 @@ import org.apache.lucene.util.IOUtils;
|
|||
* @see ServiceLoader
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||
/** Name of this {@link VectorFormat}. */
|
||||
public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat {
|
||||
/** Name of this {@link KnnVectorsFormat}. */
|
||||
public static final String PER_FIELD_NAME = "PerFieldVectors90";
|
||||
|
||||
/** {@link FieldInfo} attribute name used to store the format name for each field. */
|
||||
public static final String PER_FIELD_FORMAT_KEY =
|
||||
PerFieldVectorFormat.class.getSimpleName() + ".format";
|
||||
PerFieldKnnVectorsFormat.class.getSimpleName() + ".format";
|
||||
|
||||
/** {@link FieldInfo} attribute name used to store the segment suffix name for each field. */
|
||||
public static final String PER_FIELD_SUFFIX_KEY =
|
||||
PerFieldVectorFormat.class.getSimpleName() + ".suffix";
|
||||
PerFieldKnnVectorsFormat.class.getSimpleName() + ".suffix";
|
||||
|
||||
/** Sole constructor. */
|
||||
protected PerFieldVectorFormat() {
|
||||
protected PerFieldKnnVectorsFormat() {
|
||||
super(PER_FIELD_NAME);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new FieldsWriter(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new FieldsReader(state);
|
||||
}
|
||||
|
||||
|
@ -84,10 +84,10 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
* <p>The field to format mapping is written to the index, so this method is only invoked when
|
||||
* writing, not when reading.
|
||||
*/
|
||||
public abstract VectorFormat getVectorFormatForField(String field);
|
||||
public abstract KnnVectorsFormat getKnnVectorsFormatForField(String field);
|
||||
|
||||
private class FieldsWriter extends VectorWriter {
|
||||
private final Map<VectorFormat, WriterAndSuffix> formats;
|
||||
private class FieldsWriter extends KnnVectorsWriter {
|
||||
private final Map<KnnVectorsFormat, WriterAndSuffix> formats;
|
||||
private final Map<String, Integer> suffixes = new HashMap<>();
|
||||
private final SegmentWriteState segmentWriteState;
|
||||
|
||||
|
@ -113,11 +113,11 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
IOUtils.close(formats.values());
|
||||
}
|
||||
|
||||
private VectorWriter getInstance(FieldInfo field) throws IOException {
|
||||
VectorFormat format = getVectorFormatForField(field.name);
|
||||
private KnnVectorsWriter getInstance(FieldInfo field) throws IOException {
|
||||
KnnVectorsFormat format = getKnnVectorsFormatForField(field.name);
|
||||
if (format == null) {
|
||||
throw new IllegalStateException(
|
||||
"invalid null VectorFormat for field=\"" + field.name + "\"");
|
||||
"invalid null KnnVectorsFormat for field=\"" + field.name + "\"");
|
||||
}
|
||||
final String formatName = format.getName();
|
||||
|
||||
|
@ -164,13 +164,13 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
}
|
||||
|
||||
/** VectorReader that can wrap multiple delegate readers, selected by field. */
|
||||
public static class FieldsReader extends VectorReader {
|
||||
public static class FieldsReader extends KnnVectorsReader {
|
||||
|
||||
private final Map<String, VectorReader> fields = new TreeMap<>();
|
||||
private final Map<String, KnnVectorsReader> fields = new TreeMap<>();
|
||||
|
||||
/**
|
||||
* Create a FieldsReader over a segment, opening VectorReaders for each VectorFormat specified
|
||||
* by the indexed numeric vector fields.
|
||||
* Create a FieldsReader over a segment, opening VectorReaders for each KnnVectorsFormat
|
||||
* specified by the indexed numeric vector fields.
|
||||
*
|
||||
* @param readState defines the fields
|
||||
* @throws IOException if one of the delegate readers throws
|
||||
|
@ -179,7 +179,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
|
||||
// Init each unique format:
|
||||
boolean success = false;
|
||||
Map<String, VectorReader> formats = new HashMap<>();
|
||||
Map<String, KnnVectorsReader> formats = new HashMap<>();
|
||||
try {
|
||||
// Read field name -> format name
|
||||
for (FieldInfo fi : readState.fieldInfos) {
|
||||
|
@ -193,7 +193,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
throw new IllegalStateException(
|
||||
"missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
|
||||
}
|
||||
VectorFormat format = VectorFormat.forName(formatName);
|
||||
KnnVectorsFormat format = KnnVectorsFormat.forName(formatName);
|
||||
String segmentSuffix =
|
||||
getFullSegmentSuffix(readState.segmentSuffix, getSuffix(formatName, suffix));
|
||||
if (!formats.containsKey(segmentSuffix)) {
|
||||
|
@ -218,34 +218,34 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
*
|
||||
* @param field the name of a numeric vector field
|
||||
*/
|
||||
public VectorReader getFieldReader(String field) {
|
||||
public KnnVectorsReader getFieldReader(String field) {
|
||||
return fields.get(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
for (VectorReader reader : fields.values()) {
|
||||
for (KnnVectorsReader reader : fields.values()) {
|
||||
reader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorValues getVectorValues(String field) throws IOException {
|
||||
VectorReader vectorReader = fields.get(field);
|
||||
if (vectorReader == null) {
|
||||
KnnVectorsReader knnVectorsReader = fields.get(field);
|
||||
if (knnVectorsReader == null) {
|
||||
return null;
|
||||
} else {
|
||||
return vectorReader.getVectorValues(field);
|
||||
return knnVectorsReader.getVectorValues(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k) throws IOException {
|
||||
VectorReader vectorReader = fields.get(field);
|
||||
if (vectorReader == null) {
|
||||
KnnVectorsReader knnVectorsReader = fields.get(field);
|
||||
if (knnVectorsReader == null) {
|
||||
return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
|
||||
} else {
|
||||
return vectorReader.search(field, target, k);
|
||||
return knnVectorsReader.search(field, target, k);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -257,7 +257,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long total = 0;
|
||||
for (VectorReader reader : fields.values()) {
|
||||
for (KnnVectorsReader reader : fields.values()) {
|
||||
total += reader.ramBytesUsed();
|
||||
}
|
||||
return total;
|
||||
|
@ -277,10 +277,10 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
}
|
||||
|
||||
private static class WriterAndSuffix implements Closeable {
|
||||
final VectorWriter writer;
|
||||
final KnnVectorsWriter writer;
|
||||
final int suffix;
|
||||
|
||||
WriterAndSuffix(VectorWriter writer, int suffix) {
|
||||
WriterAndSuffix(KnnVectorsWriter writer, int suffix) {
|
||||
this.writer = writer;
|
||||
this.suffix = suffix;
|
||||
}
|
|
@ -25,14 +25,14 @@ import org.apache.lucene.index.VectorValues;
|
|||
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
|
||||
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
|
||||
* {@link VectorValues}, which is a forward-only docID-based iterator and also offers random-access
|
||||
* by dense ordinal (not docId). VectorValues.SearchSimlarity may be used to compare vectors at
|
||||
* query time (for example as part of result ranking). A VectorField may be associated with a search
|
||||
* similarity function defining the metric used for nearest-neighbor search among vectors of that
|
||||
* field.
|
||||
* by dense ordinal (not docId). {@link VectorSimilarityFunction} may be used to compare vectors at
|
||||
* query time (for example as part of result ranking). A KnnVectorField may be associated with a
|
||||
* search similarity function defining the metric used for nearest-neighbor search among vectors of
|
||||
* that field.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class VectorField extends Field {
|
||||
public class KnnVectorField extends Field {
|
||||
|
||||
private static FieldType createType(float[] v, VectorSimilarityFunction similarityFunction) {
|
||||
if (v == null) {
|
||||
|
@ -82,7 +82,7 @@ public class VectorField extends Field {
|
|||
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
||||
* dimension > 1024.
|
||||
*/
|
||||
public VectorField(String name, float[] vector, VectorSimilarityFunction similarityFunction) {
|
||||
public KnnVectorField(String name, float[] vector, VectorSimilarityFunction similarityFunction) {
|
||||
super(name, createType(vector, similarityFunction));
|
||||
fieldsData = vector;
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ public class VectorField extends Field {
|
|||
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
||||
* dimension > 1024.
|
||||
*/
|
||||
public VectorField(String name, float[] vector) {
|
||||
public KnnVectorField(String name, float[] vector) {
|
||||
this(name, vector, VectorSimilarityFunction.EUCLIDEAN);
|
||||
}
|
||||
|
||||
|
@ -111,7 +111,7 @@ public class VectorField extends Field {
|
|||
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
||||
* dimension > 1024.
|
||||
*/
|
||||
public VectorField(String name, float[] vector, FieldType fieldType) {
|
||||
public KnnVectorField(String name, float[] vector, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = vector;
|
||||
}
|
|
@ -359,7 +359,7 @@ public final class CheckIndex implements Closeable {
|
|||
public long totalVectorValues;
|
||||
|
||||
/** Total number of fields with vectors. */
|
||||
public int totalVectorFields;
|
||||
public int totalKnnVectorFields;
|
||||
|
||||
/** Exception thrown during vector values test (null on success) */
|
||||
public Throwable error = null;
|
||||
|
@ -2310,7 +2310,7 @@ public final class CheckIndex implements Closeable {
|
|||
continue;
|
||||
}
|
||||
|
||||
status.totalVectorFields++;
|
||||
status.totalKnnVectorFields++;
|
||||
|
||||
int docCount = 0;
|
||||
while (values.nextDoc() != NO_MORE_DOCS) {
|
||||
|
@ -2346,7 +2346,7 @@ public final class CheckIndex implements Closeable {
|
|||
String.format(
|
||||
Locale.ROOT,
|
||||
"OK [%d fields, %d vectors] [took %.3f sec]",
|
||||
status.totalVectorFields,
|
||||
status.totalKnnVectorFields,
|
||||
status.totalVectorValues,
|
||||
nsToSec(System.nanoTime() - startNS)));
|
||||
|
||||
|
|
|
@ -20,11 +20,11 @@ import java.io.IOException;
|
|||
import java.util.Objects;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
||||
/** LeafReader implemented by codec APIs. */
|
||||
|
@ -81,7 +81,7 @@ public abstract class CodecReader extends LeafReader {
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract VectorReader getVectorReader();
|
||||
public abstract KnnVectorsReader getVectorReader();
|
||||
|
||||
@Override
|
||||
public final void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
||||
|
|
|
@ -20,11 +20,11 @@ import java.io.IOException;
|
|||
import java.util.Objects;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
|
@ -100,7 +100,7 @@ public abstract class FilterCodecReader extends CodecReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorReader getVectorReader() {
|
||||
public KnnVectorsReader getVectorReader() {
|
||||
return in.getVectorReader();
|
||||
}
|
||||
|
||||
|
|
|
@ -30,15 +30,15 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.codecs.NormsConsumer;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
@ -430,7 +430,7 @@ final class IndexingChain implements Accountable {
|
|||
|
||||
/** Writes all buffered vectors. */
|
||||
private void writeVectors(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
|
||||
VectorWriter vectorWriter = null;
|
||||
KnnVectorsWriter knnVectorsWriter = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
for (int i = 0; i < fieldHash.length; i++) {
|
||||
|
@ -446,19 +446,19 @@ final class IndexingChain implements Accountable {
|
|||
+ perField.fieldInfo.name
|
||||
+ "\" has no vectors but wrote them");
|
||||
}
|
||||
if (vectorWriter == null) {
|
||||
if (knnVectorsWriter == null) {
|
||||
// lazy init
|
||||
VectorFormat fmt = state.segmentInfo.getCodec().vectorFormat();
|
||||
KnnVectorsFormat fmt = state.segmentInfo.getCodec().knnVectorsFormat();
|
||||
if (fmt == null) {
|
||||
throw new IllegalStateException(
|
||||
"field=\""
|
||||
+ perField.fieldInfo.name
|
||||
+ "\" was indexed as vectors but codec does not support vectors");
|
||||
}
|
||||
vectorWriter = fmt.fieldsWriter(state);
|
||||
knnVectorsWriter = fmt.fieldsWriter(state);
|
||||
}
|
||||
|
||||
perField.vectorValuesWriter.flush(sortMap, vectorWriter);
|
||||
perField.vectorValuesWriter.flush(sortMap, knnVectorsWriter);
|
||||
perField.vectorValuesWriter = null;
|
||||
} else if (perField.fieldInfo != null && perField.fieldInfo.getVectorDimension() != 0) {
|
||||
// BUG
|
||||
|
@ -472,15 +472,15 @@ final class IndexingChain implements Accountable {
|
|||
perField = perField.next;
|
||||
}
|
||||
}
|
||||
if (vectorWriter != null) {
|
||||
vectorWriter.finish();
|
||||
if (knnVectorsWriter != null) {
|
||||
knnVectorsWriter.finish();
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(vectorWriter);
|
||||
IOUtils.close(knnVectorsWriter);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(vectorWriter);
|
||||
IOUtils.closeWhileHandlingException(knnVectorsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -761,7 +761,7 @@ final class IndexingChain implements Accountable {
|
|||
pf.pointValuesWriter.addPackedValue(docID, field.binaryValue());
|
||||
}
|
||||
if (fieldType.vectorDimension() != 0) {
|
||||
pf.vectorValuesWriter.addValue(docID, ((VectorField) field).vectorValue());
|
||||
pf.vectorValuesWriter.addValue(docID, ((KnnVectorField) field).vectorValue());
|
||||
}
|
||||
return indexedField;
|
||||
}
|
||||
|
|
|
@ -24,11 +24,11 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
|
@ -80,7 +80,7 @@ public class MergeState {
|
|||
public final PointsReader[] pointsReaders;
|
||||
|
||||
/** Vector readers to merge */
|
||||
public final VectorReader[] vectorReaders;
|
||||
public final KnnVectorsReader[] knnVectorsReaders;
|
||||
|
||||
/** Max docs per reader */
|
||||
public final int[] maxDocs;
|
||||
|
@ -109,7 +109,7 @@ public class MergeState {
|
|||
termVectorsReaders = new TermVectorsReader[numReaders];
|
||||
docValuesProducers = new DocValuesProducer[numReaders];
|
||||
pointsReaders = new PointsReader[numReaders];
|
||||
vectorReaders = new VectorReader[numReaders];
|
||||
knnVectorsReaders = new KnnVectorsReader[numReaders];
|
||||
fieldInfos = new FieldInfos[numReaders];
|
||||
liveDocs = new Bits[numReaders];
|
||||
|
||||
|
@ -147,9 +147,9 @@ public class MergeState {
|
|||
pointsReaders[i] = pointsReaders[i].getMergeInstance();
|
||||
}
|
||||
|
||||
vectorReaders[i] = reader.getVectorReader();
|
||||
if (vectorReaders[i] != null) {
|
||||
vectorReaders[i] = vectorReaders[i].getMergeInstance();
|
||||
knnVectorsReaders[i] = reader.getVectorReader();
|
||||
if (knnVectorsReaders[i] != null) {
|
||||
knnVectorsReaders[i] = knnVectorsReaders[i].getMergeInstance();
|
||||
}
|
||||
|
||||
numDocs += reader.numDocs();
|
||||
|
|
|
@ -28,12 +28,12 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CompoundDirectory;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.index.IndexReader.CacheKey;
|
||||
import org.apache.lucene.index.IndexReader.ClosedListener;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
@ -59,7 +59,7 @@ final class SegmentCoreReaders {
|
|||
final StoredFieldsReader fieldsReaderOrig;
|
||||
final TermVectorsReader termVectorsReader;
|
||||
final PointsReader pointsReader;
|
||||
final VectorReader vectorReader;
|
||||
final KnnVectorsReader knnVectorsReader;
|
||||
final CompoundDirectory cfsReader;
|
||||
final String segment;
|
||||
/**
|
||||
|
@ -142,9 +142,9 @@ final class SegmentCoreReaders {
|
|||
}
|
||||
|
||||
if (coreFieldInfos.hasVectorValues()) {
|
||||
vectorReader = codec.vectorFormat().fieldsReader(segmentReadState);
|
||||
knnVectorsReader = codec.knnVectorsFormat().fieldsReader(segmentReadState);
|
||||
} else {
|
||||
vectorReader = null;
|
||||
knnVectorsReader = null;
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
@ -185,7 +185,7 @@ final class SegmentCoreReaders {
|
|||
cfsReader,
|
||||
normsProducer,
|
||||
pointsReader,
|
||||
vectorReader);
|
||||
knnVectorsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,12 +21,12 @@ import java.util.List;
|
|||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.codecs.NormsConsumer;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
|
@ -236,7 +236,7 @@ final class SegmentMerger {
|
|||
}
|
||||
|
||||
private void mergeVectorValues(SegmentWriteState segmentWriteState) throws IOException {
|
||||
try (VectorWriter writer = codec.vectorFormat().fieldsWriter(segmentWriteState)) {
|
||||
try (KnnVectorsWriter writer = codec.knnVectorsFormat().fieldsWriter(segmentWriteState)) {
|
||||
writer.merge(mergeState);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,11 +24,11 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -267,8 +267,8 @@ public final class SegmentReader extends CodecReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorReader getVectorReader() {
|
||||
return core.vectorReader;
|
||||
public KnnVectorsReader getVectorReader() {
|
||||
return core.knnVectorsReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,11 +22,11 @@ import java.util.Collections;
|
|||
import java.util.Iterator;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -78,7 +78,7 @@ public final class SlowCodecReaderWrapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorReader getVectorReader() {
|
||||
public KnnVectorsReader getVectorReader() {
|
||||
reader.ensureOpen();
|
||||
return readerToVectorReader(reader);
|
||||
}
|
||||
|
@ -159,8 +159,8 @@ public final class SlowCodecReaderWrapper {
|
|||
};
|
||||
}
|
||||
|
||||
private static VectorReader readerToVectorReader(LeafReader reader) {
|
||||
return new VectorReader() {
|
||||
private static KnnVectorsReader readerToVectorReader(LeafReader reader) {
|
||||
return new KnnVectorsReader() {
|
||||
@Override
|
||||
public VectorValues getVectorValues(String field) throws IOException {
|
||||
return reader.getVectorValues(field);
|
||||
|
|
|
@ -26,11 +26,11 @@ import java.util.Iterator;
|
|||
import java.util.Map;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -301,9 +301,9 @@ public final class SortingCodecReader extends FilterCodecReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorReader getVectorReader() {
|
||||
VectorReader delegate = in.getVectorReader();
|
||||
return new VectorReader() {
|
||||
public KnnVectorsReader getVectorReader() {
|
||||
KnnVectorsReader delegate = in.getVectorReader();
|
||||
return new KnnVectorsReader() {
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
delegate.checkIntegrity();
|
||||
|
|
|
@ -17,12 +17,13 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This class provides access to per-document floating point vector values indexed as {@link
|
||||
* org.apache.lucene.document.VectorField}.
|
||||
* KnnVectorField}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.nio.ByteBuffer;
|
|||
import java.nio.ByteOrder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -103,16 +103,16 @@ class VectorValuesWriter {
|
|||
*
|
||||
* @param sortMap specifies the order of documents being flushed, or null if they are to be
|
||||
* flushed in docid order
|
||||
* @param vectorWriter the Codec's vector writer that handles the actual encoding and I/O
|
||||
* @param knnVectorsWriter the Codec's vector writer that handles the actual encoding and I/O
|
||||
* @throws IOException if there is an error writing the field and its values
|
||||
*/
|
||||
public void flush(Sorter.DocMap sortMap, VectorWriter vectorWriter) throws IOException {
|
||||
public void flush(Sorter.DocMap sortMap, KnnVectorsWriter knnVectorsWriter) throws IOException {
|
||||
VectorValues vectorValues =
|
||||
new BufferedVectorValues(docsWithField, vectors, fieldInfo.getVectorDimension());
|
||||
if (sortMap != null) {
|
||||
vectorWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
|
||||
knnVectorsWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
|
||||
} else {
|
||||
vectorWriter.writeField(fieldInfo, vectorValues);
|
||||
knnVectorsWriter.writeField(fieldInfo, vectorValues);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,4 +13,4 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat
|
||||
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat
|
|
@ -17,10 +17,10 @@
|
|||
package org.apache.lucene.codecs.lucene90;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
||||
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestLucene90HnswVectorFormat extends BaseVectorFormatTestCase {
|
||||
public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return TestUtil.getDefaultCodec();
|
|
@ -25,14 +25,14 @@ import java.util.Random;
|
|||
import java.util.Set;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -49,7 +49,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.hamcrest.MatcherAssert;
|
||||
|
||||
/** Basic tests of PerFieldDocValuesFormat */
|
||||
public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
||||
public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
||||
private Codec codec;
|
||||
|
||||
@Override
|
||||
|
@ -67,14 +67,14 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
|||
try (Directory directory = newDirectory()) {
|
||||
// we don't use RandomIndexWriter because it might add more values than we expect !!!!1
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
WriteRecordingVectorFormat format1 =
|
||||
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
|
||||
WriteRecordingVectorFormat format2 =
|
||||
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
|
||||
WriteRecordingKnnVectorsFormat format1 =
|
||||
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||
WriteRecordingKnnVectorsFormat format2 =
|
||||
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||
iwc.setCodec(
|
||||
new AssertingCodec() {
|
||||
@Override
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
if ("field1".equals(field)) {
|
||||
return format1;
|
||||
} else {
|
||||
|
@ -86,12 +86,12 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
|||
try (IndexWriter iwriter = new IndexWriter(directory, iwc)) {
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("id", "1", Field.Store.YES));
|
||||
doc.add(new VectorField("field1", new float[] {1, 2, 3}));
|
||||
doc.add(new KnnVectorField("field1", new float[] {1, 2, 3}));
|
||||
iwriter.addDocument(doc);
|
||||
|
||||
doc.clear();
|
||||
doc.add(newTextField("id", "2", Field.Store.YES));
|
||||
doc.add(new VectorField("field2", new float[] {4, 5, 6}));
|
||||
doc.add(new KnnVectorField("field2", new float[] {4, 5, 6}));
|
||||
iwriter.addDocument(doc);
|
||||
}
|
||||
|
||||
|
@ -128,19 +128,19 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
|||
for (int i = 0; i < 3; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("id", "1", Field.Store.YES));
|
||||
doc.add(new VectorField("field", new float[] {1, 2, 3}));
|
||||
doc.add(new KnnVectorField("field", new float[] {1, 2, 3}));
|
||||
iw.addDocument(doc);
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
WriteRecordingVectorFormat newFormat =
|
||||
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
|
||||
WriteRecordingKnnVectorsFormat newFormat =
|
||||
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||
newConfig.setCodec(
|
||||
new AssertingCodec() {
|
||||
@Override
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return newFormat;
|
||||
}
|
||||
});
|
||||
|
@ -154,20 +154,20 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static class WriteRecordingVectorFormat extends VectorFormat {
|
||||
private final VectorFormat delegate;
|
||||
private static class WriteRecordingKnnVectorsFormat extends KnnVectorsFormat {
|
||||
private final KnnVectorsFormat delegate;
|
||||
private final Set<String> fieldsWritten;
|
||||
|
||||
public WriteRecordingVectorFormat(VectorFormat delegate) {
|
||||
public WriteRecordingKnnVectorsFormat(KnnVectorsFormat delegate) {
|
||||
super(delegate.getName());
|
||||
this.delegate = delegate;
|
||||
this.fieldsWritten = new HashSet<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
VectorWriter writer = delegate.fieldsWriter(state);
|
||||
return new VectorWriter() {
|
||||
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
KnnVectorsWriter writer = delegate.fieldsWriter(state);
|
||||
return new KnnVectorsWriter() {
|
||||
@Override
|
||||
public void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException {
|
||||
fieldsWritten.add(fieldInfo.name);
|
||||
|
@ -187,7 +187,7 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return delegate.fieldsReader(state);
|
||||
}
|
||||
}
|
|
@ -87,21 +87,21 @@ public class TestPerFieldConsistency extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static Field randomVectorField(Random random, String fieldName) {
|
||||
private static Field randomKnnVectorField(Random random, String fieldName) {
|
||||
VectorSimilarityFunction similarityFunction =
|
||||
RandomPicks.randomFrom(random, VectorSimilarityFunction.values());
|
||||
float[] values = new float[randomIntBetween(1, 10)];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = randomFloat();
|
||||
}
|
||||
return new VectorField(fieldName, values, similarityFunction);
|
||||
return new KnnVectorField(fieldName, values, similarityFunction);
|
||||
}
|
||||
|
||||
private static Field[] randomFieldsWithTheSameName(String fieldName) {
|
||||
final Field textField = randomIndexedField(random(), fieldName);
|
||||
final Field docValuesField = randomDocValuesField(random(), fieldName);
|
||||
final Field pointField = randomPointField(random(), fieldName);
|
||||
final Field vectorField = randomVectorField(random(), fieldName);
|
||||
final Field vectorField = randomKnnVectorField(random(), fieldName);
|
||||
return new Field[] {textField, docValuesField, pointField, vectorField};
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
|
@ -40,7 +41,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
@ -381,6 +381,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
public void testRAMUsageVector() throws IOException {
|
||||
doTestRAMUsage(
|
||||
field ->
|
||||
new VectorField(field, new float[] {1, 2, 3, 4}, VectorSimilarityFunction.EUCLIDEAN));
|
||||
new KnnVectorField(
|
||||
field, new float[] {1, 2, 3, 4}, VectorSimilarityFunction.EUCLIDEAN));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,17 +27,17 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -54,7 +54,7 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
|
||||
private static final String KNN_GRAPH_FIELD = "vector";
|
||||
|
||||
private static int maxConn = Lucene90HnswVectorFormat.DEFAULT_MAX_CONN;
|
||||
private static int maxConn = Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
|
||||
|
||||
private Codec codec;
|
||||
private VectorSimilarityFunction similarityFunction;
|
||||
|
@ -69,9 +69,9 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
codec =
|
||||
new Lucene90Codec() {
|
||||
@Override
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
return new Lucene90HnswVectorFormat(
|
||||
maxConn, Lucene90HnswVectorFormat.DEFAULT_BEAM_WIDTH);
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return new Lucene90HnswVectorsFormat(
|
||||
maxConn, Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -81,7 +81,7 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
|
||||
@After
|
||||
public void cleanup() {
|
||||
maxConn = Lucene90HnswVectorFormat.DEFAULT_MAX_CONN;
|
||||
maxConn = Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
|
||||
}
|
||||
|
||||
/** Basic test of creating documents in a graph */
|
||||
|
@ -182,11 +182,11 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
iw.forceMerge(1);
|
||||
}
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
PerFieldVectorFormat.FieldsReader perFieldReader =
|
||||
(PerFieldVectorFormat.FieldsReader)
|
||||
PerFieldKnnVectorsFormat.FieldsReader perFieldReader =
|
||||
(PerFieldKnnVectorsFormat.FieldsReader)
|
||||
((CodecReader) getOnlyLeafReader(reader)).getVectorReader();
|
||||
Lucene90HnswVectorReader vectorReader =
|
||||
(Lucene90HnswVectorReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
||||
Lucene90HnswVectorsReader vectorReader =
|
||||
(Lucene90HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
||||
graph = copyGraph(vectorReader.getGraphValues(KNN_GRAPH_FIELD));
|
||||
}
|
||||
}
|
||||
|
@ -323,13 +323,13 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
for (LeafReaderContext ctx : dr.leaves()) {
|
||||
LeafReader reader = ctx.reader();
|
||||
VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
|
||||
PerFieldVectorFormat.FieldsReader perFieldReader =
|
||||
(PerFieldVectorFormat.FieldsReader) ((CodecReader) reader).getVectorReader();
|
||||
PerFieldKnnVectorsFormat.FieldsReader perFieldReader =
|
||||
(PerFieldKnnVectorsFormat.FieldsReader) ((CodecReader) reader).getVectorReader();
|
||||
if (perFieldReader == null) {
|
||||
continue;
|
||||
}
|
||||
Lucene90HnswVectorReader vectorReader =
|
||||
(Lucene90HnswVectorReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
||||
Lucene90HnswVectorsReader vectorReader =
|
||||
(Lucene90HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
||||
KnnGraphValues graphValues = vectorReader.getGraphValues(KNN_GRAPH_FIELD);
|
||||
assertEquals((vectorValues == null), (graphValues == null));
|
||||
if (vectorValues == null) {
|
||||
|
@ -458,8 +458,8 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
throws IOException {
|
||||
Document doc = new Document();
|
||||
if (vector != null) {
|
||||
FieldType fieldType = VectorField.createFieldType(vector.length, similarityFunction);
|
||||
doc.add(new VectorField(KNN_GRAPH_FIELD, vector, fieldType));
|
||||
FieldType fieldType = KnnVectorField.createFieldType(vector.length, similarityFunction);
|
||||
doc.add(new KnnVectorField(KNN_GRAPH_FIELD, vector, fieldType));
|
||||
}
|
||||
String idString = Integer.toString(id);
|
||||
doc.add(new StringField("id", idString, Field.Store.YES));
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
|
@ -38,7 +39,6 @@ import org.apache.lucene.document.SortedNumericDocValuesField;
|
|||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -127,7 +127,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
|
|||
doc.add(new BinaryDocValuesField("binary_dv", new BytesRef(Integer.toString(docId))));
|
||||
doc.add(
|
||||
new SortedSetDocValuesField("sorted_set_dv", new BytesRef(Integer.toString(docId))));
|
||||
doc.add(new VectorField("vector", new float[] {(float) docId}));
|
||||
doc.add(new KnnVectorField("vector", new float[] {(float) docId}));
|
||||
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
|
||||
|
||||
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
|
|
|
@ -35,14 +35,14 @@ import java.nio.file.Paths;
|
|||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -240,7 +240,7 @@ public class KnnGraphTester {
|
|||
for (LeafReaderContext context : reader.leaves()) {
|
||||
LeafReader leafReader = context.reader();
|
||||
KnnGraphValues knnValues =
|
||||
((Lucene90HnswVectorReader) ((CodecReader) leafReader).getVectorReader())
|
||||
((Lucene90HnswVectorsReader) ((CodecReader) leafReader).getVectorReader())
|
||||
.getGraphValues(KNN_FIELD);
|
||||
System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
|
||||
printGraphFanout(knnValues, leafReader.maxDoc());
|
||||
|
@ -573,15 +573,15 @@ public class KnnGraphTester {
|
|||
iwc.setCodec(
|
||||
new Lucene90Codec() {
|
||||
@Override
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
return new Lucene90HnswVectorFormat(maxConn, beamWidth);
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return new Lucene90HnswVectorsFormat(maxConn, beamWidth);
|
||||
}
|
||||
});
|
||||
// iwc.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
iwc.setRAMBufferSizeMB(1994d);
|
||||
// iwc.setMaxBufferedDocs(10000);
|
||||
|
||||
FieldType fieldType = VectorField.createFieldType(dim, VectorSimilarityFunction.DOT_PRODUCT);
|
||||
FieldType fieldType = KnnVectorField.createFieldType(dim, VectorSimilarityFunction.DOT_PRODUCT);
|
||||
if (quiet == false) {
|
||||
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
|
||||
System.out.println("creating index in " + indexPath);
|
||||
|
@ -606,7 +606,7 @@ public class KnnGraphTester {
|
|||
vectors.get(vector);
|
||||
Document doc = new Document();
|
||||
// System.out.println("vector=" + vector[0] + "," + vector[1] + "...");
|
||||
doc.add(new VectorField(KNN_FIELD, vector, fieldType));
|
||||
doc.add(new KnnVectorField(KNN_FIELD, vector, fieldType));
|
||||
doc.add(new StoredField(ID_FIELD, i));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
|
|
@ -24,14 +24,14 @@ import java.util.Arrays;
|
|||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -80,8 +80,8 @@ public class TestHnsw extends LuceneTestCase {
|
|||
.setCodec(
|
||||
new Lucene90Codec() {
|
||||
@Override
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
return new Lucene90HnswVectorFormat(maxConn, beamWidth);
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return new Lucene90HnswVectorsFormat(maxConn, beamWidth);
|
||||
}
|
||||
});
|
||||
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
|
||||
|
@ -92,7 +92,7 @@ public class TestHnsw extends LuceneTestCase {
|
|||
indexedDoc++;
|
||||
}
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("field", v2.vectorValue()));
|
||||
doc.add(new KnnVectorField("field", v2.vectorValue()));
|
||||
doc.add(new StoredField("id", v2.docID()));
|
||||
iw.addDocument(doc);
|
||||
nVec++;
|
||||
|
@ -108,8 +108,8 @@ public class TestHnsw extends LuceneTestCase {
|
|||
assertEquals(indexedDoc, ctx.reader().numDocs());
|
||||
assertVectorsEqual(v3, values);
|
||||
KnnGraphValues graphValues =
|
||||
((Lucene90HnswVectorReader)
|
||||
((PerFieldVectorFormat.FieldsReader)
|
||||
((Lucene90HnswVectorsReader)
|
||||
((PerFieldKnnVectorsFormat.FieldsReader)
|
||||
((CodecReader) ctx.reader()).getVectorReader())
|
||||
.getFieldReader("field"))
|
||||
.getGraphValues("field");
|
||||
|
|
|
@ -18,16 +18,16 @@ package org.apache.lucene.codecs.asserting;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/** Acts like the default codec but with additional asserts. */
|
||||
|
@ -62,11 +62,11 @@ public class AssertingCodec extends FilterCodec {
|
|||
}
|
||||
};
|
||||
|
||||
private final VectorFormat vectorFormat =
|
||||
new PerFieldVectorFormat() {
|
||||
private final KnnVectorsFormat knnVectorsFormat =
|
||||
new PerFieldKnnVectorsFormat() {
|
||||
@Override
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
return AssertingCodec.this.getVectorFormatForField(field);
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return AssertingCodec.this.getKnnVectorsFormatForField(field);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -77,7 +77,7 @@ public class AssertingCodec extends FilterCodec {
|
|||
private final PostingsFormat defaultFormat = new AssertingPostingsFormat();
|
||||
private final DocValuesFormat defaultDVFormat = new AssertingDocValuesFormat();
|
||||
private final PointsFormat pointsFormat = new AssertingPointsFormat();
|
||||
private final VectorFormat defaultVectorFormat = new AssertingVectorFormat();
|
||||
private final KnnVectorsFormat defaultKnnVectorsFormat = new AssertingKnnVectorsFormat();
|
||||
|
||||
public AssertingCodec() {
|
||||
super("Asserting", TestUtil.getDefaultCodec());
|
||||
|
@ -119,8 +119,8 @@ public class AssertingCodec extends FilterCodec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public VectorFormat vectorFormat() {
|
||||
return vectorFormat;
|
||||
public KnnVectorsFormat knnVectorsFormat() {
|
||||
return knnVectorsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -152,7 +152,7 @@ public class AssertingCodec extends FilterCodec {
|
|||
*
|
||||
* <p>The default implementation always returns "Asserting"
|
||||
*/
|
||||
public VectorFormat getVectorFormatForField(String field) {
|
||||
return defaultVectorFormat;
|
||||
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||
return defaultKnnVectorsFormat;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
package org.apache.lucene.codecs.asserting;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.codecs.VectorWriter;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -28,29 +28,29 @@ import org.apache.lucene.index.VectorValues;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/** Wraps the default VectorFormat and provides additional assertions. */
|
||||
public class AssertingVectorFormat extends VectorFormat {
|
||||
/** Wraps the default KnnVectorsFormat and provides additional assertions. */
|
||||
public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
|
||||
|
||||
private final VectorFormat delegate = TestUtil.getDefaultVectorFormat();
|
||||
private final KnnVectorsFormat delegate = TestUtil.getDefaultKnnVectorsFormat();
|
||||
|
||||
public AssertingVectorFormat() {
|
||||
public AssertingKnnVectorsFormat() {
|
||||
super("Asserting");
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new AssertingVectorWriter(delegate.fieldsWriter(state));
|
||||
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||
return new AssertingKnnVectorsWriter(delegate.fieldsWriter(state));
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new AssertingVectorReader(delegate.fieldsReader(state));
|
||||
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||
return new AssertingKnnVectorsReader(delegate.fieldsReader(state));
|
||||
}
|
||||
|
||||
static class AssertingVectorWriter extends VectorWriter {
|
||||
final VectorWriter delegate;
|
||||
static class AssertingKnnVectorsWriter extends KnnVectorsWriter {
|
||||
final KnnVectorsWriter delegate;
|
||||
|
||||
AssertingVectorWriter(VectorWriter delegate) {
|
||||
AssertingKnnVectorsWriter(KnnVectorsWriter delegate) {
|
||||
assert delegate != null;
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
@ -73,10 +73,10 @@ public class AssertingVectorFormat extends VectorFormat {
|
|||
}
|
||||
}
|
||||
|
||||
static class AssertingVectorReader extends VectorReader {
|
||||
final VectorReader delegate;
|
||||
static class AssertingKnnVectorsReader extends KnnVectorsReader {
|
||||
final KnnVectorsReader delegate;
|
||||
|
||||
AssertingVectorReader(VectorReader delegate) {
|
||||
AssertingKnnVectorsReader(KnnVectorsReader delegate) {
|
||||
assert delegate != null;
|
||||
this.delegate = delegate;
|
||||
}
|
|
@ -22,13 +22,13 @@ import java.io.ByteArrayOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.VectorField;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -39,44 +39,45 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.apache.lucene.util.VectorUtil;
|
||||
|
||||
/**
|
||||
* Base class aiming at testing {@link VectorFormat vectors formats}. To test a new format, all you
|
||||
* need is to register a new {@link Codec} which uses it and extend this class and override {@link
|
||||
* #getCodec()}.
|
||||
* Base class aiming at testing {@link KnnVectorsFormat vectors formats}. To test a new format, all
|
||||
* you need is to register a new {@link Codec} which uses it and extend this class and override
|
||||
* {@link #getCodec()}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCase {
|
||||
public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTestCase {
|
||||
|
||||
@Override
|
||||
protected void addRandomFields(Document doc) {
|
||||
doc.add(new VectorField("v2", randomVector(30), VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new KnnVectorField("v2", randomVector(30), VectorSimilarityFunction.EUCLIDEAN));
|
||||
}
|
||||
|
||||
public void testFieldConstructor() {
|
||||
float[] v = new float[1];
|
||||
VectorField field = new VectorField("f", v);
|
||||
KnnVectorField field = new KnnVectorField("f", v);
|
||||
assertEquals(1, field.fieldType().vectorDimension());
|
||||
assertEquals(VectorSimilarityFunction.EUCLIDEAN, field.fieldType().vectorSimilarityFunction());
|
||||
assertSame(v, field.vectorValue());
|
||||
}
|
||||
|
||||
public void testFieldConstructorExceptions() {
|
||||
expectThrows(IllegalArgumentException.class, () -> new VectorField(null, new float[1]));
|
||||
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", null));
|
||||
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField(null, new float[1]));
|
||||
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField("f", null));
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new VectorField("f", new float[1], (VectorSimilarityFunction) null));
|
||||
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", new float[0]));
|
||||
() -> new KnnVectorField("f", new float[1], (VectorSimilarityFunction) null));
|
||||
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField("f", new float[0]));
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new VectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1]));
|
||||
() -> new KnnVectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1]));
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new VectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1], (FieldType) null));
|
||||
() ->
|
||||
new KnnVectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1], (FieldType) null));
|
||||
}
|
||||
|
||||
public void testFieldSetValue() {
|
||||
VectorField field = new VectorField("f", new float[1]);
|
||||
KnnVectorField field = new KnnVectorField("f", new float[1]);
|
||||
float[] v1 = new float[1];
|
||||
field.setVectorValue(v1);
|
||||
assertSame(v1, field.vectorValue());
|
||||
|
@ -90,11 +91,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc2.add(new KnnVectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||
String errMsg =
|
||||
|
@ -106,12 +107,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc2.add(new KnnVectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||
String errMsg =
|
||||
|
@ -126,11 +127,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||
String errMsg =
|
||||
|
@ -142,12 +143,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||
String errMsg =
|
||||
|
@ -161,13 +162,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
||||
assertEquals(
|
||||
|
@ -182,13 +183,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
||||
assertEquals(
|
||||
|
@ -202,7 +203,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
public void testAddIndexesDirectory0() throws Exception {
|
||||
String fieldName = "field";
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
try (Directory dir = newDirectory();
|
||||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
|
@ -230,7 +231,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
w.addDocument(doc);
|
||||
}
|
||||
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||
w2.addDocument(doc);
|
||||
w2.addIndexes(dir);
|
||||
|
@ -250,7 +251,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
String fieldName = "field";
|
||||
float[] vector = new float[1];
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
try (Directory dir = newDirectory();
|
||||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
|
@ -281,12 +282,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w2.addDocument(doc);
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(
|
||||
|
@ -304,12 +305,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
w2.addDocument(doc);
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir));
|
||||
|
@ -326,12 +327,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w2.addDocument(doc);
|
||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||
IllegalArgumentException expected =
|
||||
|
@ -352,12 +353,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
w2.addDocument(doc);
|
||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||
IllegalArgumentException expected =
|
||||
|
@ -378,12 +379,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w2.addDocument(doc);
|
||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||
IllegalArgumentException expected =
|
||||
|
@ -402,12 +403,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
Directory dir2 = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
w2.addDocument(doc);
|
||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||
IllegalArgumentException expected =
|
||||
|
@ -425,8 +426,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
|
||||
assertEquals(
|
||||
|
@ -443,13 +444,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
IllegalArgumentException.class,
|
||||
() ->
|
||||
doc.add(
|
||||
new VectorField(
|
||||
new KnnVectorField(
|
||||
"f",
|
||||
new float[VectorValues.MAX_DIMENSIONS + 1],
|
||||
VectorSimilarityFunction.DOT_PRODUCT)));
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
||||
w.addDocument(doc2);
|
||||
}
|
||||
}
|
||||
|
@ -462,11 +463,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() ->
|
||||
doc.add(new VectorField("f", new float[0], VectorSimilarityFunction.EUCLIDEAN)));
|
||||
doc.add(
|
||||
new KnnVectorField("f", new float[0], VectorSimilarityFunction.EUCLIDEAN)));
|
||||
assertEquals("cannot index an empty vector", e.getMessage());
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
||||
w.addDocument(doc2);
|
||||
}
|
||||
}
|
||||
|
@ -476,14 +478,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
iwc.setCodec(Codec.forName("SimpleText"));
|
||||
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
@ -497,20 +499,21 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
w.forceMerge(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testInvalidVectorFieldUsage() {
|
||||
VectorField field = new VectorField("field", new float[2], VectorSimilarityFunction.EUCLIDEAN);
|
||||
public void testInvalidKnnVectorFieldUsage() {
|
||||
KnnVectorField field =
|
||||
new KnnVectorField("field", new float[2], VectorSimilarityFunction.EUCLIDEAN);
|
||||
|
||||
expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14));
|
||||
|
||||
|
@ -524,7 +527,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||
doc.add(new VectorField("v", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(new KnnVectorField("v", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
w.addDocument(new Document());
|
||||
w.commit();
|
||||
|
@ -544,17 +547,19 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
}
|
||||
}
|
||||
|
||||
public void testVectorFieldMissingFromOneSegment() throws Exception {
|
||||
public void testKnnVectorFieldMissingFromOneSegment() throws Exception {
|
||||
try (Directory dir = FSDirectory.open(createTempDir());
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||
doc.add(new VectorField("v0", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(
|
||||
new KnnVectorField("v0", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new VectorField("v1", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
doc.add(
|
||||
new KnnVectorField("v1", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
w.addDocument(doc);
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
@ -581,7 +586,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
String fieldName = "int" + field;
|
||||
if (random().nextInt(100) == 17) {
|
||||
float[] v = randomVector(fieldDims[field]);
|
||||
doc.add(new VectorField(fieldName, v, fieldSearchStrategies[field]));
|
||||
doc.add(new KnnVectorField(fieldName, v, fieldSearchStrategies[field]));
|
||||
fieldDocCounts[field]++;
|
||||
fieldTotals[field] += v[0];
|
||||
}
|
||||
|
@ -618,15 +623,15 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc1 = new Document();
|
||||
doc1.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc1.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
v[0] = 1;
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc2.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
iw.addDocument(doc1);
|
||||
iw.addDocument(doc2);
|
||||
v[0] = 2;
|
||||
Document doc3 = new Document();
|
||||
doc3.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc3.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
iw.addDocument(doc3);
|
||||
iw.forceMerge(1);
|
||||
try (IndexReader reader = iw.getReader()) {
|
||||
|
@ -676,19 +681,21 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
}
|
||||
}
|
||||
|
||||
public void testIndexMultipleVectorFields() throws Exception {
|
||||
public void testIndexMultipleKnnVectorFields() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
float[] v = new float[] {1};
|
||||
doc.add(new VectorField("field1", v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new VectorField("field2", new float[] {1, 2, 3}, VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new KnnVectorField("field1", v, VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(
|
||||
new KnnVectorField("field2", new float[] {1, 2, 3}, VectorSimilarityFunction.EUCLIDEAN));
|
||||
iw.addDocument(doc);
|
||||
v[0] = 2;
|
||||
iw.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(
|
||||
new VectorField("field3", new float[] {1, 2, 3}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
new KnnVectorField(
|
||||
"field3", new float[] {1, 2, 3}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||
iw.addDocument(doc);
|
||||
iw.forceMerge(1);
|
||||
try (IndexReader reader = iw.getReader()) {
|
||||
|
@ -879,7 +886,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
throws IOException {
|
||||
Document doc = new Document();
|
||||
if (vector != null) {
|
||||
doc.add(new VectorField(field, vector, similarityFunction));
|
||||
doc.add(new KnnVectorField(field, vector, similarityFunction));
|
||||
}
|
||||
doc.add(new NumericDocValuesField("sortkey", sortkey));
|
||||
String idString = Integer.toString(id);
|
||||
|
@ -901,10 +908,10 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new VectorField("v1", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new KnnVectorField("v1", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc.add(new VectorField("v2", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(new KnnVectorField("v2", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
|
@ -915,7 +922,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
// total 3 vector values were indexed:
|
||||
assertEquals(3, segStatus.vectorValuesStatus.totalVectorValues);
|
||||
// ... across 2 fields:
|
||||
assertEquals(2, segStatus.vectorValuesStatus.totalVectorFields);
|
||||
assertEquals(2, segStatus.vectorValuesStatus.totalKnnVectorFields);
|
||||
|
||||
// Make sure CheckIndex in fact declares that it is testing vectors!
|
||||
assertTrue(output.toString(IOUtils.UTF_8).contains("test: vectors..."));
|
||||
|
@ -939,7 +946,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
Document doc = new Document();
|
||||
// randomly add a vector field
|
||||
if (random().nextInt(4) == 3) {
|
||||
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
doc.add(
|
||||
new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
|
@ -49,14 +49,14 @@ import java.util.zip.ZipEntry;
|
|||
import java.util.zip.ZipInputStream;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.VectorFormat;
|
||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||
import org.apache.lucene.codecs.blockterms.LuceneFixedGap;
|
||||
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
@ -1297,10 +1297,11 @@ public final class TestUtil {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the actual default vector format (e.g. LuceneMNVectorFormat for this version of Lucene.
|
||||
* Returns the actual default vector format (e.g. LuceneMNKnnVectorsFormat for this version of
|
||||
* Lucene.
|
||||
*/
|
||||
public static VectorFormat getDefaultVectorFormat() {
|
||||
return new Lucene90HnswVectorFormat();
|
||||
public static KnnVectorsFormat getDefaultKnnVectorsFormat() {
|
||||
return new Lucene90HnswVectorsFormat();
|
||||
}
|
||||
|
||||
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
|
||||
|
|
|
@ -13,4 +13,4 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.lucene.codecs.asserting.AssertingVectorFormat
|
||||
org.apache.lucene.codecs.asserting.AssertingKnnVectorsFormat
|
Loading…
Reference in New Issue