mirror of https://github.com/apache/lucene.git
LUCENE-9855: Rename knn search vector format (#218)
This commit is contained in:
parent
ad7746d6e3
commit
df807dbe8f
|
@ -7,9 +7,9 @@ http://s.apache.org/luceneversions
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-9322: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida)
|
* LUCENE-9322 LUCENE-9855: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida)
|
||||||
|
|
||||||
* LUCENE-9004: Approximate nearest vector search via NSW graphs
|
* LUCENE-9004: Approximate nearest vector search via NSW graphs (Mike Sokolov, Tomoko Uchida et al.)
|
||||||
|
|
||||||
* LUCENE-9659: SpanPayloadCheckQuery now supports inequalities. (Kevin Watters, Gus Heck)
|
* LUCENE-9659: SpanPayloadCheckQuery now supports inequalities. (Kevin Watters, Gus Heck)
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
@ -35,7 +36,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
|
|
||||||
|
@ -122,8 +122,8 @@ public class Lucene70Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat vectorFormat() {
|
public KnnVectorsFormat knnVectorsFormat() {
|
||||||
return VectorFormat.EMPTY;
|
return KnnVectorsFormat.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.CompoundFormat;
|
import org.apache.lucene.codecs.CompoundFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
@ -34,7 +35,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ public class Lucene80Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final VectorFormat vectorFormat() {
|
public final KnnVectorsFormat knnVectorsFormat() {
|
||||||
return VectorFormat.EMPTY;
|
return KnnVectorsFormat.EMPTY;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
@ -38,7 +39,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
|
|
||||||
|
@ -134,8 +134,8 @@ public class Lucene84Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat vectorFormat() {
|
public KnnVectorsFormat knnVectorsFormat() {
|
||||||
return VectorFormat.EMPTY;
|
return KnnVectorsFormat.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
@ -37,7 +38,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
|
|
||||||
|
@ -133,8 +133,8 @@ public class Lucene86Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final VectorFormat vectorFormat() {
|
public final KnnVectorsFormat knnVectorsFormat() {
|
||||||
return VectorFormat.EMPTY;
|
return KnnVectorsFormat.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
@ -39,7 +40,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
|
|
||||||
|
@ -157,8 +157,8 @@ public class Lucene87Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final VectorFormat vectorFormat() {
|
public final KnnVectorsFormat knnVectorsFormat() {
|
||||||
return VectorFormat.EMPTY;
|
return KnnVectorsFormat.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -20,6 +20,7 @@ import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.CompoundFormat;
|
import org.apache.lucene.codecs.CompoundFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
@ -27,7 +28,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* plain text index format.
|
* plain text index format.
|
||||||
|
@ -47,7 +47,7 @@ public final class SimpleTextCodec extends Codec {
|
||||||
private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
|
private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
|
||||||
private final CompoundFormat compoundFormat = new SimpleTextCompoundFormat();
|
private final CompoundFormat compoundFormat = new SimpleTextCompoundFormat();
|
||||||
private final PointsFormat pointsFormat = new SimpleTextPointsFormat();
|
private final PointsFormat pointsFormat = new SimpleTextPointsFormat();
|
||||||
private final VectorFormat vectorFormat = new SimpleTextVectorFormat();
|
private final KnnVectorsFormat knnVectorsFormat = new SimpleTextKnnVectorsFormat();
|
||||||
|
|
||||||
public SimpleTextCodec() {
|
public SimpleTextCodec() {
|
||||||
super("SimpleText");
|
super("SimpleText");
|
||||||
|
@ -104,7 +104,7 @@ public final class SimpleTextCodec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat vectorFormat() {
|
public KnnVectorsFormat knnVectorsFormat() {
|
||||||
return vectorFormat;
|
return knnVectorsFormat;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,9 +17,9 @@
|
||||||
package org.apache.lucene.codecs.simpletext;
|
package org.apache.lucene.codecs.simpletext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
|
@ -31,20 +31,20 @@ import org.apache.lucene.index.SegmentWriteState;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class SimpleTextVectorFormat extends VectorFormat {
|
public final class SimpleTextKnnVectorsFormat extends KnnVectorsFormat {
|
||||||
|
|
||||||
public SimpleTextVectorFormat() {
|
public SimpleTextKnnVectorsFormat() {
|
||||||
super("SimpleTextVectorFormat");
|
super("SimpleTextKnnVectorsFormat");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||||
return new SimpleTextVectorWriter(state);
|
return new SimpleTextKnnVectorsWriter(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||||
return new SimpleTextVectorReader(state);
|
return new SimpleTextKnnVectorsReader(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Extension of vectors data file */
|
/** Extension of vectors data file */
|
|
@ -17,14 +17,14 @@
|
||||||
|
|
||||||
package org.apache.lucene.codecs.simpletext;
|
package org.apache.lucene.codecs.simpletext;
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextVectorWriter.*;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextKnnVectorsWriter.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
@ -49,10 +49,10 @@ import org.apache.lucene.util.StringHelper;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class SimpleTextVectorReader extends VectorReader {
|
public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
|
||||||
// shallowSizeOfInstance for fieldEntries map is included in ramBytesUsed() calculation
|
// shallowSizeOfInstance for fieldEntries map is included in ramBytesUsed() calculation
|
||||||
private static final long BASE_RAM_BYTES_USED =
|
private static final long BASE_RAM_BYTES_USED =
|
||||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextVectorReader.class)
|
RamUsageEstimator.shallowSizeOfInstance(SimpleTextKnnVectorsReader.class)
|
||||||
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
|
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
|
||||||
|
|
||||||
private static final BytesRef EMPTY = new BytesRef("");
|
private static final BytesRef EMPTY = new BytesRef("");
|
||||||
|
@ -62,18 +62,18 @@ public class SimpleTextVectorReader extends VectorReader {
|
||||||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
private final Map<String, FieldEntry> fieldEntries = new HashMap<>();
|
private final Map<String, FieldEntry> fieldEntries = new HashMap<>();
|
||||||
|
|
||||||
SimpleTextVectorReader(SegmentReadState readState) throws IOException {
|
SimpleTextKnnVectorsReader(SegmentReadState readState) throws IOException {
|
||||||
this.readState = readState;
|
this.readState = readState;
|
||||||
String metaFileName =
|
String metaFileName =
|
||||||
IndexFileNames.segmentFileName(
|
IndexFileNames.segmentFileName(
|
||||||
readState.segmentInfo.name,
|
readState.segmentInfo.name,
|
||||||
readState.segmentSuffix,
|
readState.segmentSuffix,
|
||||||
SimpleTextVectorFormat.META_EXTENSION);
|
SimpleTextKnnVectorsFormat.META_EXTENSION);
|
||||||
String vectorFileName =
|
String vectorFileName =
|
||||||
IndexFileNames.segmentFileName(
|
IndexFileNames.segmentFileName(
|
||||||
readState.segmentInfo.name,
|
readState.segmentInfo.name,
|
||||||
readState.segmentSuffix,
|
readState.segmentSuffix,
|
||||||
SimpleTextVectorFormat.VECTOR_EXTENSION);
|
SimpleTextKnnVectorsFormat.VECTOR_EXTENSION);
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try (ChecksumIndexInput in =
|
try (ChecksumIndexInput in =
|
||||||
|
@ -110,7 +110,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
||||||
FieldInfo info = readState.fieldInfos.fieldInfo(field);
|
FieldInfo info = readState.fieldInfos.fieldInfo(field);
|
||||||
if (info == null) {
|
if (info == null) {
|
||||||
// mirror the handling in Lucene90VectorReader#getVectorValues
|
// mirror the handling in Lucene90VectorReader#getVectorValues
|
||||||
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
|
// needed to pass TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
int dimension = info.getVectorDimension();
|
int dimension = info.getVectorDimension();
|
||||||
|
@ -120,7 +120,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
||||||
FieldEntry fieldEntry = fieldEntries.get(field);
|
FieldEntry fieldEntry = fieldEntries.get(field);
|
||||||
if (fieldEntry == null) {
|
if (fieldEntry == null) {
|
||||||
// mirror the handling in Lucene90VectorReader#getVectorValues
|
// mirror the handling in Lucene90VectorReader#getVectorValues
|
||||||
// needed to pass TestSimpleTextVectorFormat#testDeleteAllVectorDocs
|
// needed to pass TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (dimension != fieldEntry.dimension) {
|
if (dimension != fieldEntry.dimension) {
|
||||||
|
@ -153,7 +153,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
||||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
|
ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
|
||||||
|
|
||||||
// when there's no actual vector data written (e.g. tested in
|
// when there's no actual vector data written (e.g. tested in
|
||||||
// TestSimpleTextVectorFormat#testDeleteAllVectorDocs)
|
// TestSimpleTextKnnVectorsFormat#testDeleteAllVectorDocs)
|
||||||
// the first line in dataInput will be, checksum 00000000000000000000
|
// the first line in dataInput will be, checksum 00000000000000000000
|
||||||
if (footerStartPos == 0) {
|
if (footerStartPos == 0) {
|
||||||
SimpleTextUtil.checkFooter(input);
|
SimpleTextUtil.checkFooter(input);
|
||||||
|
@ -271,7 +271,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
||||||
} else if (curOrd >= entry.size()) {
|
} else if (curOrd >= entry.size()) {
|
||||||
// when call to advance / nextDoc below already returns NO_MORE_DOCS, calling docID
|
// when call to advance / nextDoc below already returns NO_MORE_DOCS, calling docID
|
||||||
// immediately afterward should also return NO_MORE_DOCS
|
// immediately afterward should also return NO_MORE_DOCS
|
||||||
// this is needed for TestSimpleTextVectorFormat.testAdvance test case
|
// this is needed for TestSimpleTextKnnVectorsFormat.testAdvance test case
|
||||||
return NO_MORE_DOCS;
|
return NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -34,7 +34,7 @@ import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/** Writes vector-valued fields in a plain text format */
|
/** Writes vector-valued fields in a plain text format */
|
||||||
public class SimpleTextVectorWriter extends VectorWriter {
|
public class SimpleTextKnnVectorsWriter extends KnnVectorsWriter {
|
||||||
|
|
||||||
static final BytesRef FIELD_NUMBER = new BytesRef("field-number ");
|
static final BytesRef FIELD_NUMBER = new BytesRef("field-number ");
|
||||||
static final BytesRef FIELD_NAME = new BytesRef("field-name ");
|
static final BytesRef FIELD_NAME = new BytesRef("field-name ");
|
||||||
|
@ -46,20 +46,24 @@ public class SimpleTextVectorWriter extends VectorWriter {
|
||||||
private final IndexOutput meta, vectorData;
|
private final IndexOutput meta, vectorData;
|
||||||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
|
|
||||||
SimpleTextVectorWriter(SegmentWriteState state) throws IOException {
|
SimpleTextKnnVectorsWriter(SegmentWriteState state) throws IOException {
|
||||||
assert state.fieldInfos.hasVectorValues();
|
assert state.fieldInfos.hasVectorValues();
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
// exception handling to pass TestSimpleTextVectorFormat#testRandomExceptions
|
// exception handling to pass TestSimpleTextKnnVectorsFormat#testRandomExceptions
|
||||||
try {
|
try {
|
||||||
String metaFileName =
|
String metaFileName =
|
||||||
IndexFileNames.segmentFileName(
|
IndexFileNames.segmentFileName(
|
||||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.META_EXTENSION);
|
state.segmentInfo.name,
|
||||||
|
state.segmentSuffix,
|
||||||
|
SimpleTextKnnVectorsFormat.META_EXTENSION);
|
||||||
meta = state.directory.createOutput(metaFileName, state.context);
|
meta = state.directory.createOutput(metaFileName, state.context);
|
||||||
|
|
||||||
String vectorDataFileName =
|
String vectorDataFileName =
|
||||||
IndexFileNames.segmentFileName(
|
IndexFileNames.segmentFileName(
|
||||||
state.segmentInfo.name, state.segmentSuffix, SimpleTextVectorFormat.VECTOR_EXTENSION);
|
state.segmentInfo.name,
|
||||||
|
state.segmentSuffix,
|
||||||
|
SimpleTextKnnVectorsFormat.VECTOR_EXTENSION);
|
||||||
vectorData = state.directory.createOutput(vectorDataFileName, state.context);
|
vectorData = state.directory.createOutput(vectorDataFileName, state.context);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
|
@ -17,9 +17,9 @@
|
||||||
package org.apache.lucene.codecs.simpletext;
|
package org.apache.lucene.codecs.simpletext;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
|
||||||
|
|
||||||
public class TestSimpleTextVectorFormat extends BaseVectorFormatTestCase {
|
public class TestSimpleTextKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected Codec getCodec() {
|
protected Codec getCodec() {
|
||||||
return new SimpleTextCodec();
|
return new SimpleTextCodec();
|
|
@ -111,7 +111,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
|
||||||
public abstract PointsFormat pointsFormat();
|
public abstract PointsFormat pointsFormat();
|
||||||
|
|
||||||
/** Encodes/decodes numeric vector fields */
|
/** Encodes/decodes numeric vector fields */
|
||||||
public abstract VectorFormat vectorFormat();
|
public abstract KnnVectorsFormat knnVectorsFormat();
|
||||||
|
|
||||||
/** looks up a codec by name */
|
/** looks up a codec by name */
|
||||||
public static Codec forName(String name) {
|
public static Codec forName(String name) {
|
||||||
|
|
|
@ -108,7 +108,7 @@ public abstract class FilterCodec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat vectorFormat() {
|
public KnnVectorsFormat knnVectorsFormat() {
|
||||||
return delegate.vectorFormat();
|
return delegate.knnVectorsFormat();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,23 +29,23 @@ import org.apache.lucene.util.NamedSPILoader;
|
||||||
* Encodes/decodes per-document vector and any associated indexing structures required to support
|
* Encodes/decodes per-document vector and any associated indexing structures required to support
|
||||||
* nearest-neighbor search
|
* nearest-neighbor search
|
||||||
*/
|
*/
|
||||||
public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
|
public abstract class KnnVectorsFormat implements NamedSPILoader.NamedSPI {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This static holder class prevents classloading deadlock by delaying init of doc values formats
|
* This static holder class prevents classloading deadlock by delaying init of doc values formats
|
||||||
* until needed.
|
* until needed.
|
||||||
*/
|
*/
|
||||||
private static final class Holder {
|
private static final class Holder {
|
||||||
private static final NamedSPILoader<VectorFormat> LOADER =
|
private static final NamedSPILoader<KnnVectorsFormat> LOADER =
|
||||||
new NamedSPILoader<>(VectorFormat.class);
|
new NamedSPILoader<>(KnnVectorsFormat.class);
|
||||||
|
|
||||||
private Holder() {}
|
private Holder() {}
|
||||||
|
|
||||||
static NamedSPILoader<VectorFormat> getLoader() {
|
static NamedSPILoader<KnnVectorsFormat> getLoader() {
|
||||||
if (LOADER == null) {
|
if (LOADER == null) {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"You tried to lookup a VectorFormat name before all formats could be initialized. "
|
"You tried to lookup a KnnVectorsFormat name before all formats could be initialized. "
|
||||||
+ "This likely happens if you call VectorFormat#forName from a VectorFormat's ctor.");
|
+ "This likely happens if you call KnnVectorsFormat#forName from a KnnVectorsFormat's ctor.");
|
||||||
}
|
}
|
||||||
return LOADER;
|
return LOADER;
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,7 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
|
||||||
private final String name;
|
private final String name;
|
||||||
|
|
||||||
/** Sole constructor */
|
/** Sole constructor */
|
||||||
protected VectorFormat(String name) {
|
protected KnnVectorsFormat(String name) {
|
||||||
NamedSPILoader.checkServiceName(name);
|
NamedSPILoader.checkServiceName(name);
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
@ -65,31 +65,31 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** looks up a format by name */
|
/** looks up a format by name */
|
||||||
public static VectorFormat forName(String name) {
|
public static KnnVectorsFormat forName(String name) {
|
||||||
return Holder.getLoader().lookup(name);
|
return Holder.getLoader().lookup(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a {@link VectorWriter} to write the vectors to the index. */
|
/** Returns a {@link KnnVectorsWriter} to write the vectors to the index. */
|
||||||
public abstract VectorWriter fieldsWriter(SegmentWriteState state) throws IOException;
|
public abstract KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException;
|
||||||
|
|
||||||
/** Returns a {@link VectorReader} to read the vectors from the index. */
|
/** Returns a {@link KnnVectorsReader} to read the vectors from the index. */
|
||||||
public abstract VectorReader fieldsReader(SegmentReadState state) throws IOException;
|
public abstract KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* EMPTY throws an exception when written. It acts as a sentinel indicating a Codec that does not
|
* EMPTY throws an exception when written. It acts as a sentinel indicating a Codec that does not
|
||||||
* support vectors.
|
* support vectors.
|
||||||
*/
|
*/
|
||||||
public static final VectorFormat EMPTY =
|
public static final KnnVectorsFormat EMPTY =
|
||||||
new VectorFormat("EMPTY") {
|
new KnnVectorsFormat("EMPTY") {
|
||||||
@Override
|
@Override
|
||||||
public VectorWriter fieldsWriter(SegmentWriteState state) {
|
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) {
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene90");
|
"Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene90");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader fieldsReader(SegmentReadState state) {
|
public KnnVectorsReader fieldsReader(SegmentReadState state) {
|
||||||
return new VectorReader() {
|
return new KnnVectorsReader() {
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() {}
|
public void checkIntegrity() {}
|
||||||
|
|
|
@ -24,10 +24,10 @@ import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
|
||||||
/** Reads vectors from an index. */
|
/** Reads vectors from an index. */
|
||||||
public abstract class VectorReader implements Closeable, Accountable {
|
public abstract class KnnVectorsReader implements Closeable, Accountable {
|
||||||
|
|
||||||
/** Sole constructor */
|
/** Sole constructor */
|
||||||
protected VectorReader() {}
|
protected KnnVectorsReader() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks consistency of this reader.
|
* Checks consistency of this reader.
|
||||||
|
@ -61,7 +61,7 @@ public abstract class VectorReader implements Closeable, Accountable {
|
||||||
*
|
*
|
||||||
* <p>The default implementation returns {@code this}
|
* <p>The default implementation returns {@code this}
|
||||||
*/
|
*/
|
||||||
public VectorReader getMergeInstance() {
|
public KnnVectorsReader getMergeInstance() {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -34,10 +34,10 @@ import org.apache.lucene.index.VectorValues;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/** Writes vectors to an index. */
|
/** Writes vectors to an index. */
|
||||||
public abstract class VectorWriter implements Closeable {
|
public abstract class KnnVectorsWriter implements Closeable {
|
||||||
|
|
||||||
/** Sole constructor */
|
/** Sole constructor */
|
||||||
protected VectorWriter() {}
|
protected KnnVectorsWriter() {}
|
||||||
|
|
||||||
/** Write all values contained in the provided reader */
|
/** Write all values contained in the provided reader */
|
||||||
public abstract void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException;
|
public abstract void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException;
|
||||||
|
@ -48,7 +48,7 @@ public abstract class VectorWriter implements Closeable {
|
||||||
/** Merge the vector values from multiple segments, for all fields */
|
/** Merge the vector values from multiple segments, for all fields */
|
||||||
public void merge(MergeState mergeState) throws IOException {
|
public void merge(MergeState mergeState) throws IOException {
|
||||||
for (int i = 0; i < mergeState.fieldInfos.length; i++) {
|
for (int i = 0; i < mergeState.fieldInfos.length; i++) {
|
||||||
VectorReader reader = mergeState.vectorReaders[i];
|
KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
|
||||||
assert reader != null || mergeState.fieldInfos[i].hasVectorValues() == false;
|
assert reader != null || mergeState.fieldInfos[i].hasVectorValues() == false;
|
||||||
if (reader != null) {
|
if (reader != null) {
|
||||||
reader.checkIntegrity();
|
reader.checkIntegrity();
|
||||||
|
@ -71,9 +71,9 @@ public abstract class VectorWriter implements Closeable {
|
||||||
int dimension = -1;
|
int dimension = -1;
|
||||||
VectorSimilarityFunction similarityFunction = null;
|
VectorSimilarityFunction similarityFunction = null;
|
||||||
int nonEmptySegmentIndex = 0;
|
int nonEmptySegmentIndex = 0;
|
||||||
for (int i = 0; i < mergeState.vectorReaders.length; i++) {
|
for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
|
||||||
VectorReader vectorReader = mergeState.vectorReaders[i];
|
KnnVectorsReader knnVectorsReader = mergeState.knnVectorsReaders[i];
|
||||||
if (vectorReader != null) {
|
if (knnVectorsReader != null) {
|
||||||
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
|
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
|
||||||
int segmentDimension = mergeFieldInfo.getVectorDimension();
|
int segmentDimension = mergeFieldInfo.getVectorDimension();
|
||||||
VectorSimilarityFunction segmentSimilarityFunction =
|
VectorSimilarityFunction segmentSimilarityFunction =
|
||||||
|
@ -98,7 +98,7 @@ public abstract class VectorWriter implements Closeable {
|
||||||
+ "!="
|
+ "!="
|
||||||
+ segmentSimilarityFunction);
|
+ segmentSimilarityFunction);
|
||||||
}
|
}
|
||||||
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
|
VectorValues values = knnVectorsReader.getVectorValues(mergeFieldInfo.name);
|
||||||
if (values != null) {
|
if (values != null) {
|
||||||
subs.add(new VectorValuesSub(nonEmptySegmentIndex++, mergeState.docMaps[i], values));
|
subs.add(new VectorValuesSub(nonEmptySegmentIndex++, mergeState.docMaps[i], values));
|
||||||
}
|
}
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.codecs.CompoundFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
@ -29,10 +30,9 @@ import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements the Lucene 9.0 index format
|
* Implements the Lucene 9.0 index format
|
||||||
|
@ -83,12 +83,12 @@ public class Lucene90Codec extends Codec {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private final VectorFormat defaultVectorFormat;
|
private final KnnVectorsFormat defaultKnnVectorsFormat;
|
||||||
private final VectorFormat vectorFormat =
|
private final KnnVectorsFormat knnVectorsFormat =
|
||||||
new PerFieldVectorFormat() {
|
new PerFieldKnnVectorsFormat() {
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return Lucene90Codec.this.getVectorFormatForField(field);
|
return Lucene90Codec.this.getKnnVectorsFormatForField(field);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ public class Lucene90Codec extends Codec {
|
||||||
new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
|
new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
|
||||||
this.defaultPostingsFormat = new Lucene90PostingsFormat();
|
this.defaultPostingsFormat = new Lucene90PostingsFormat();
|
||||||
this.defaultDVFormat = new Lucene90DocValuesFormat();
|
this.defaultDVFormat = new Lucene90DocValuesFormat();
|
||||||
this.defaultVectorFormat = new Lucene90HnswVectorFormat();
|
this.defaultKnnVectorsFormat = new Lucene90HnswVectorsFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -154,8 +154,8 @@ public class Lucene90Codec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final VectorFormat vectorFormat() {
|
public final KnnVectorsFormat knnVectorsFormat() {
|
||||||
return vectorFormat;
|
return knnVectorsFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -191,8 +191,8 @@ public class Lucene90Codec extends Codec {
|
||||||
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
|
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
|
||||||
* future version of Lucene are only guaranteed to be able to read the default implementation.
|
* future version of Lucene are only guaranteed to be able to read the default implementation.
|
||||||
*/
|
*/
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return defaultVectorFormat;
|
return defaultKnnVectorsFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,9 +18,9 @@
|
||||||
package org.apache.lucene.codecs.lucene90;
|
package org.apache.lucene.codecs.lucene90;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.util.hnsw.HnswGraph;
|
import org.apache.lucene.util.hnsw.HnswGraph;
|
||||||
|
@ -65,11 +65,11 @@ import org.apache.lucene.util.hnsw.HnswGraph;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class Lucene90HnswVectorFormat extends VectorFormat {
|
public final class Lucene90HnswVectorsFormat extends KnnVectorsFormat {
|
||||||
|
|
||||||
static final String META_CODEC_NAME = "Lucene90HnswVectorFormatMeta";
|
static final String META_CODEC_NAME = "Lucene90HnswVectorsFormatMeta";
|
||||||
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorFormatData";
|
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorsFormatData";
|
||||||
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorFormatIndex";
|
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorsFormatIndex";
|
||||||
static final String META_EXTENSION = "vem";
|
static final String META_EXTENSION = "vem";
|
||||||
static final String VECTOR_DATA_EXTENSION = "vec";
|
static final String VECTOR_DATA_EXTENSION = "vec";
|
||||||
static final String VECTOR_INDEX_EXTENSION = "vex";
|
static final String VECTOR_INDEX_EXTENSION = "vex";
|
||||||
|
@ -82,36 +82,34 @@ public final class Lucene90HnswVectorFormat extends VectorFormat {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
|
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
|
||||||
* {@link Lucene90HnswVectorFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
|
* {@link Lucene90HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
|
||||||
*/
|
*/
|
||||||
private final int maxConn;
|
private final int maxConn;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The number of candidate neighbors to track while searching the graph for each newly inserted
|
* The number of candidate neighbors to track while searching the graph for each newly inserted
|
||||||
* node. Defaults to to {@link Lucene90HnswVectorFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph}
|
* node. Defaults to to {@link Lucene90HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
|
||||||
* for details.
|
* HnswGraph} for details.
|
||||||
*/
|
*/
|
||||||
private final int beamWidth;
|
private final int beamWidth;
|
||||||
|
|
||||||
public Lucene90HnswVectorFormat() {
|
public Lucene90HnswVectorsFormat() {
|
||||||
super("Lucene90HnswVectorFormat");
|
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
|
||||||
this.maxConn = DEFAULT_MAX_CONN;
|
|
||||||
this.beamWidth = DEFAULT_BEAM_WIDTH;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Lucene90HnswVectorFormat(int maxConn, int beamWidth) {
|
public Lucene90HnswVectorsFormat(int maxConn, int beamWidth) {
|
||||||
super("Lucene90HnswVectorFormat");
|
super("Lucene90HnswVectorsFormat");
|
||||||
this.maxConn = maxConn;
|
this.maxConn = maxConn;
|
||||||
this.beamWidth = beamWidth;
|
this.beamWidth = beamWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||||
return new Lucene90HnswVectorWriter(state, maxConn, beamWidth);
|
return new Lucene90HnswVectorsWriter(state, maxConn, beamWidth);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||||
return new Lucene90HnswVectorReader(state);
|
return new Lucene90HnswVectorsReader(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -26,7 +26,7 @@ import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
@ -54,7 +54,7 @@ import org.apache.lucene.util.hnsw.NeighborQueue;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class Lucene90HnswVectorReader extends VectorReader {
|
public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
||||||
|
|
||||||
private final FieldInfos fieldInfos;
|
private final FieldInfos fieldInfos;
|
||||||
private final Map<String, FieldEntry> fields = new HashMap<>();
|
private final Map<String, FieldEntry> fields = new HashMap<>();
|
||||||
|
@ -62,10 +62,10 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
||||||
private final IndexInput vectorIndex;
|
private final IndexInput vectorIndex;
|
||||||
private final long checksumSeed;
|
private final long checksumSeed;
|
||||||
|
|
||||||
Lucene90HnswVectorReader(SegmentReadState state) throws IOException {
|
Lucene90HnswVectorsReader(SegmentReadState state) throws IOException {
|
||||||
this.fieldInfos = state.fieldInfos;
|
this.fieldInfos = state.fieldInfos;
|
||||||
|
|
||||||
int versionMeta = readMetadata(state, Lucene90HnswVectorFormat.META_EXTENSION);
|
int versionMeta = readMetadata(state, Lucene90HnswVectorsFormat.META_EXTENSION);
|
||||||
long[] checksumRef = new long[1];
|
long[] checksumRef = new long[1];
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
@ -73,15 +73,15 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
||||||
openDataInput(
|
openDataInput(
|
||||||
state,
|
state,
|
||||||
versionMeta,
|
versionMeta,
|
||||||
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION,
|
Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION,
|
||||||
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
|
Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
|
||||||
checksumRef);
|
checksumRef);
|
||||||
vectorIndex =
|
vectorIndex =
|
||||||
openDataInput(
|
openDataInput(
|
||||||
state,
|
state,
|
||||||
versionMeta,
|
versionMeta,
|
||||||
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION,
|
Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION,
|
||||||
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
|
Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
|
||||||
checksumRef);
|
checksumRef);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -102,9 +102,9 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
||||||
versionMeta =
|
versionMeta =
|
||||||
CodecUtil.checkIndexHeader(
|
CodecUtil.checkIndexHeader(
|
||||||
meta,
|
meta,
|
||||||
Lucene90HnswVectorFormat.META_CODEC_NAME,
|
Lucene90HnswVectorsFormat.META_CODEC_NAME,
|
||||||
Lucene90HnswVectorFormat.VERSION_START,
|
Lucene90HnswVectorsFormat.VERSION_START,
|
||||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||||
state.segmentInfo.getId(),
|
state.segmentInfo.getId(),
|
||||||
state.segmentSuffix);
|
state.segmentSuffix);
|
||||||
readFields(meta, state.fieldInfos);
|
readFields(meta, state.fieldInfos);
|
||||||
|
@ -131,8 +131,8 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
||||||
CodecUtil.checkIndexHeader(
|
CodecUtil.checkIndexHeader(
|
||||||
in,
|
in,
|
||||||
codecName,
|
codecName,
|
||||||
Lucene90HnswVectorFormat.VERSION_START,
|
Lucene90HnswVectorsFormat.VERSION_START,
|
||||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||||
state.segmentInfo.getId(),
|
state.segmentInfo.getId(),
|
||||||
state.segmentSuffix);
|
state.segmentSuffix);
|
||||||
if (versionMeta != versionVectorData) {
|
if (versionMeta != versionVectorData) {
|
||||||
|
@ -205,7 +205,7 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorReader.class);
|
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorsReader.class);
|
||||||
totalBytes +=
|
totalBytes +=
|
||||||
RamUsageEstimator.sizeOfMap(
|
RamUsageEstimator.sizeOfMap(
|
||||||
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
|
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
|
|
@ -22,7 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
|
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
|
||||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.hnsw.NeighborArray;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class Lucene90HnswVectorWriter extends VectorWriter {
|
public final class Lucene90HnswVectorsWriter extends KnnVectorsWriter {
|
||||||
|
|
||||||
private final SegmentWriteState segmentWriteState;
|
private final SegmentWriteState segmentWriteState;
|
||||||
private final IndexOutput meta, vectorData, vectorIndex;
|
private final IndexOutput meta, vectorData, vectorIndex;
|
||||||
|
@ -50,7 +50,8 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
|
||||||
private final int beamWidth;
|
private final int beamWidth;
|
||||||
private boolean finished;
|
private boolean finished;
|
||||||
|
|
||||||
Lucene90HnswVectorWriter(SegmentWriteState state, int maxConn, int beamWidth) throws IOException {
|
Lucene90HnswVectorsWriter(SegmentWriteState state, int maxConn, int beamWidth)
|
||||||
|
throws IOException {
|
||||||
this.maxConn = maxConn;
|
this.maxConn = maxConn;
|
||||||
this.beamWidth = beamWidth;
|
this.beamWidth = beamWidth;
|
||||||
|
|
||||||
|
@ -59,19 +60,19 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
|
||||||
|
|
||||||
String metaFileName =
|
String metaFileName =
|
||||||
IndexFileNames.segmentFileName(
|
IndexFileNames.segmentFileName(
|
||||||
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorFormat.META_EXTENSION);
|
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorsFormat.META_EXTENSION);
|
||||||
|
|
||||||
String vectorDataFileName =
|
String vectorDataFileName =
|
||||||
IndexFileNames.segmentFileName(
|
IndexFileNames.segmentFileName(
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.segmentSuffix,
|
state.segmentSuffix,
|
||||||
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION);
|
Lucene90HnswVectorsFormat.VECTOR_DATA_EXTENSION);
|
||||||
|
|
||||||
String indexDataFileName =
|
String indexDataFileName =
|
||||||
IndexFileNames.segmentFileName(
|
IndexFileNames.segmentFileName(
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.segmentSuffix,
|
state.segmentSuffix,
|
||||||
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION);
|
Lucene90HnswVectorsFormat.VECTOR_INDEX_EXTENSION);
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
@ -81,20 +82,20 @@ public final class Lucene90HnswVectorWriter extends VectorWriter {
|
||||||
|
|
||||||
CodecUtil.writeIndexHeader(
|
CodecUtil.writeIndexHeader(
|
||||||
meta,
|
meta,
|
||||||
Lucene90HnswVectorFormat.META_CODEC_NAME,
|
Lucene90HnswVectorsFormat.META_CODEC_NAME,
|
||||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||||
state.segmentInfo.getId(),
|
state.segmentInfo.getId(),
|
||||||
state.segmentSuffix);
|
state.segmentSuffix);
|
||||||
CodecUtil.writeIndexHeader(
|
CodecUtil.writeIndexHeader(
|
||||||
vectorData,
|
vectorData,
|
||||||
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
|
Lucene90HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
|
||||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||||
state.segmentInfo.getId(),
|
state.segmentInfo.getId(),
|
||||||
state.segmentSuffix);
|
state.segmentSuffix);
|
||||||
CodecUtil.writeIndexHeader(
|
CodecUtil.writeIndexHeader(
|
||||||
vectorIndex,
|
vectorIndex,
|
||||||
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
|
Lucene90HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
|
||||||
Lucene90HnswVectorFormat.VERSION_CURRENT,
|
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||||
state.segmentInfo.getId(),
|
state.segmentInfo.getId(),
|
||||||
state.segmentSuffix);
|
state.segmentSuffix);
|
||||||
success = true;
|
success = true;
|
|
@ -180,7 +180,7 @@
|
||||||
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
|
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
|
||||||
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
|
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
|
||||||
* intersection (2D, 3D).
|
* intersection (2D, 3D).
|
||||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}. The
|
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat Vector values}. The
|
||||||
* vector format stores numeric vectors in a format optimized for random access and
|
* vector format stores numeric vectors in a format optimized for random access and
|
||||||
* computation, supporting high-dimensional nearest-neighbor search.
|
* computation, supporting high-dimensional nearest-neighbor search.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
@ -310,7 +310,7 @@
|
||||||
* <td>Holds indexed points</td>
|
* <td>Holds indexed points</td>
|
||||||
* </tr>
|
* </tr>
|
||||||
* <tr>
|
* <tr>
|
||||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}</td>
|
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat Vector values}</td>
|
||||||
* <td>.vec, .vem</td>
|
* <td>.vec, .vem</td>
|
||||||
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
|
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
|
||||||
* <code>.vem</code> the vector metadata</td>
|
* <code>.vem</code> the vector metadata</td>
|
||||||
|
|
|
@ -23,9 +23,9 @@ import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.ServiceLoader;
|
import java.util.ServiceLoader;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -50,30 +50,30 @@ import org.apache.lucene.util.IOUtils;
|
||||||
* @see ServiceLoader
|
* @see ServiceLoader
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public abstract class PerFieldVectorFormat extends VectorFormat {
|
public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat {
|
||||||
/** Name of this {@link VectorFormat}. */
|
/** Name of this {@link KnnVectorsFormat}. */
|
||||||
public static final String PER_FIELD_NAME = "PerFieldVectors90";
|
public static final String PER_FIELD_NAME = "PerFieldVectors90";
|
||||||
|
|
||||||
/** {@link FieldInfo} attribute name used to store the format name for each field. */
|
/** {@link FieldInfo} attribute name used to store the format name for each field. */
|
||||||
public static final String PER_FIELD_FORMAT_KEY =
|
public static final String PER_FIELD_FORMAT_KEY =
|
||||||
PerFieldVectorFormat.class.getSimpleName() + ".format";
|
PerFieldKnnVectorsFormat.class.getSimpleName() + ".format";
|
||||||
|
|
||||||
/** {@link FieldInfo} attribute name used to store the segment suffix name for each field. */
|
/** {@link FieldInfo} attribute name used to store the segment suffix name for each field. */
|
||||||
public static final String PER_FIELD_SUFFIX_KEY =
|
public static final String PER_FIELD_SUFFIX_KEY =
|
||||||
PerFieldVectorFormat.class.getSimpleName() + ".suffix";
|
PerFieldKnnVectorsFormat.class.getSimpleName() + ".suffix";
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
protected PerFieldVectorFormat() {
|
protected PerFieldKnnVectorsFormat() {
|
||||||
super(PER_FIELD_NAME);
|
super(PER_FIELD_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||||
return new FieldsWriter(state);
|
return new FieldsWriter(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||||
return new FieldsReader(state);
|
return new FieldsReader(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,10 +84,10 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
* <p>The field to format mapping is written to the index, so this method is only invoked when
|
* <p>The field to format mapping is written to the index, so this method is only invoked when
|
||||||
* writing, not when reading.
|
* writing, not when reading.
|
||||||
*/
|
*/
|
||||||
public abstract VectorFormat getVectorFormatForField(String field);
|
public abstract KnnVectorsFormat getKnnVectorsFormatForField(String field);
|
||||||
|
|
||||||
private class FieldsWriter extends VectorWriter {
|
private class FieldsWriter extends KnnVectorsWriter {
|
||||||
private final Map<VectorFormat, WriterAndSuffix> formats;
|
private final Map<KnnVectorsFormat, WriterAndSuffix> formats;
|
||||||
private final Map<String, Integer> suffixes = new HashMap<>();
|
private final Map<String, Integer> suffixes = new HashMap<>();
|
||||||
private final SegmentWriteState segmentWriteState;
|
private final SegmentWriteState segmentWriteState;
|
||||||
|
|
||||||
|
@ -113,11 +113,11 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
IOUtils.close(formats.values());
|
IOUtils.close(formats.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
private VectorWriter getInstance(FieldInfo field) throws IOException {
|
private KnnVectorsWriter getInstance(FieldInfo field) throws IOException {
|
||||||
VectorFormat format = getVectorFormatForField(field.name);
|
KnnVectorsFormat format = getKnnVectorsFormatForField(field.name);
|
||||||
if (format == null) {
|
if (format == null) {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"invalid null VectorFormat for field=\"" + field.name + "\"");
|
"invalid null KnnVectorsFormat for field=\"" + field.name + "\"");
|
||||||
}
|
}
|
||||||
final String formatName = format.getName();
|
final String formatName = format.getName();
|
||||||
|
|
||||||
|
@ -164,13 +164,13 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** VectorReader that can wrap multiple delegate readers, selected by field. */
|
/** VectorReader that can wrap multiple delegate readers, selected by field. */
|
||||||
public static class FieldsReader extends VectorReader {
|
public static class FieldsReader extends KnnVectorsReader {
|
||||||
|
|
||||||
private final Map<String, VectorReader> fields = new TreeMap<>();
|
private final Map<String, KnnVectorsReader> fields = new TreeMap<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a FieldsReader over a segment, opening VectorReaders for each VectorFormat specified
|
* Create a FieldsReader over a segment, opening VectorReaders for each KnnVectorsFormat
|
||||||
* by the indexed numeric vector fields.
|
* specified by the indexed numeric vector fields.
|
||||||
*
|
*
|
||||||
* @param readState defines the fields
|
* @param readState defines the fields
|
||||||
* @throws IOException if one of the delegate readers throws
|
* @throws IOException if one of the delegate readers throws
|
||||||
|
@ -179,7 +179,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
|
|
||||||
// Init each unique format:
|
// Init each unique format:
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
Map<String, VectorReader> formats = new HashMap<>();
|
Map<String, KnnVectorsReader> formats = new HashMap<>();
|
||||||
try {
|
try {
|
||||||
// Read field name -> format name
|
// Read field name -> format name
|
||||||
for (FieldInfo fi : readState.fieldInfos) {
|
for (FieldInfo fi : readState.fieldInfos) {
|
||||||
|
@ -193,7 +193,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
|
"missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
|
||||||
}
|
}
|
||||||
VectorFormat format = VectorFormat.forName(formatName);
|
KnnVectorsFormat format = KnnVectorsFormat.forName(formatName);
|
||||||
String segmentSuffix =
|
String segmentSuffix =
|
||||||
getFullSegmentSuffix(readState.segmentSuffix, getSuffix(formatName, suffix));
|
getFullSegmentSuffix(readState.segmentSuffix, getSuffix(formatName, suffix));
|
||||||
if (!formats.containsKey(segmentSuffix)) {
|
if (!formats.containsKey(segmentSuffix)) {
|
||||||
|
@ -218,34 +218,34 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
*
|
*
|
||||||
* @param field the name of a numeric vector field
|
* @param field the name of a numeric vector field
|
||||||
*/
|
*/
|
||||||
public VectorReader getFieldReader(String field) {
|
public KnnVectorsReader getFieldReader(String field) {
|
||||||
return fields.get(field);
|
return fields.get(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
for (VectorReader reader : fields.values()) {
|
for (KnnVectorsReader reader : fields.values()) {
|
||||||
reader.checkIntegrity();
|
reader.checkIntegrity();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorValues getVectorValues(String field) throws IOException {
|
public VectorValues getVectorValues(String field) throws IOException {
|
||||||
VectorReader vectorReader = fields.get(field);
|
KnnVectorsReader knnVectorsReader = fields.get(field);
|
||||||
if (vectorReader == null) {
|
if (knnVectorsReader == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return vectorReader.getVectorValues(field);
|
return knnVectorsReader.getVectorValues(field);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TopDocs search(String field, float[] target, int k) throws IOException {
|
public TopDocs search(String field, float[] target, int k) throws IOException {
|
||||||
VectorReader vectorReader = fields.get(field);
|
KnnVectorsReader knnVectorsReader = fields.get(field);
|
||||||
if (vectorReader == null) {
|
if (knnVectorsReader == null) {
|
||||||
return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
|
return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
|
||||||
} else {
|
} else {
|
||||||
return vectorReader.search(field, target, k);
|
return knnVectorsReader.search(field, target, k);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,7 +257,7 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
@Override
|
@Override
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
long total = 0;
|
long total = 0;
|
||||||
for (VectorReader reader : fields.values()) {
|
for (KnnVectorsReader reader : fields.values()) {
|
||||||
total += reader.ramBytesUsed();
|
total += reader.ramBytesUsed();
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
|
@ -277,10 +277,10 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class WriterAndSuffix implements Closeable {
|
private static class WriterAndSuffix implements Closeable {
|
||||||
final VectorWriter writer;
|
final KnnVectorsWriter writer;
|
||||||
final int suffix;
|
final int suffix;
|
||||||
|
|
||||||
WriterAndSuffix(VectorWriter writer, int suffix) {
|
WriterAndSuffix(KnnVectorsWriter writer, int suffix) {
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
this.suffix = suffix;
|
this.suffix = suffix;
|
||||||
}
|
}
|
|
@ -25,14 +25,14 @@ import org.apache.lucene.index.VectorValues;
|
||||||
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
|
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
|
||||||
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
|
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
|
||||||
* {@link VectorValues}, which is a forward-only docID-based iterator and also offers random-access
|
* {@link VectorValues}, which is a forward-only docID-based iterator and also offers random-access
|
||||||
* by dense ordinal (not docId). VectorValues.SearchSimlarity may be used to compare vectors at
|
* by dense ordinal (not docId). {@link VectorSimilarityFunction} may be used to compare vectors at
|
||||||
* query time (for example as part of result ranking). A VectorField may be associated with a search
|
* query time (for example as part of result ranking). A KnnVectorField may be associated with a
|
||||||
* similarity function defining the metric used for nearest-neighbor search among vectors of that
|
* search similarity function defining the metric used for nearest-neighbor search among vectors of
|
||||||
* field.
|
* that field.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class VectorField extends Field {
|
public class KnnVectorField extends Field {
|
||||||
|
|
||||||
private static FieldType createType(float[] v, VectorSimilarityFunction similarityFunction) {
|
private static FieldType createType(float[] v, VectorSimilarityFunction similarityFunction) {
|
||||||
if (v == null) {
|
if (v == null) {
|
||||||
|
@ -82,7 +82,7 @@ public class VectorField extends Field {
|
||||||
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
||||||
* dimension > 1024.
|
* dimension > 1024.
|
||||||
*/
|
*/
|
||||||
public VectorField(String name, float[] vector, VectorSimilarityFunction similarityFunction) {
|
public KnnVectorField(String name, float[] vector, VectorSimilarityFunction similarityFunction) {
|
||||||
super(name, createType(vector, similarityFunction));
|
super(name, createType(vector, similarityFunction));
|
||||||
fieldsData = vector;
|
fieldsData = vector;
|
||||||
}
|
}
|
||||||
|
@ -97,7 +97,7 @@ public class VectorField extends Field {
|
||||||
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
||||||
* dimension > 1024.
|
* dimension > 1024.
|
||||||
*/
|
*/
|
||||||
public VectorField(String name, float[] vector) {
|
public KnnVectorField(String name, float[] vector) {
|
||||||
this(name, vector, VectorSimilarityFunction.EUCLIDEAN);
|
this(name, vector, VectorSimilarityFunction.EUCLIDEAN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ public class VectorField extends Field {
|
||||||
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
||||||
* dimension > 1024.
|
* dimension > 1024.
|
||||||
*/
|
*/
|
||||||
public VectorField(String name, float[] vector, FieldType fieldType) {
|
public KnnVectorField(String name, float[] vector, FieldType fieldType) {
|
||||||
super(name, fieldType);
|
super(name, fieldType);
|
||||||
fieldsData = vector;
|
fieldsData = vector;
|
||||||
}
|
}
|
|
@ -359,7 +359,7 @@ public final class CheckIndex implements Closeable {
|
||||||
public long totalVectorValues;
|
public long totalVectorValues;
|
||||||
|
|
||||||
/** Total number of fields with vectors. */
|
/** Total number of fields with vectors. */
|
||||||
public int totalVectorFields;
|
public int totalKnnVectorFields;
|
||||||
|
|
||||||
/** Exception thrown during vector values test (null on success) */
|
/** Exception thrown during vector values test (null on success) */
|
||||||
public Throwable error = null;
|
public Throwable error = null;
|
||||||
|
@ -2310,7 +2310,7 @@ public final class CheckIndex implements Closeable {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
status.totalVectorFields++;
|
status.totalKnnVectorFields++;
|
||||||
|
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
while (values.nextDoc() != NO_MORE_DOCS) {
|
while (values.nextDoc() != NO_MORE_DOCS) {
|
||||||
|
@ -2346,7 +2346,7 @@ public final class CheckIndex implements Closeable {
|
||||||
String.format(
|
String.format(
|
||||||
Locale.ROOT,
|
Locale.ROOT,
|
||||||
"OK [%d fields, %d vectors] [took %.3f sec]",
|
"OK [%d fields, %d vectors] [took %.3f sec]",
|
||||||
status.totalVectorFields,
|
status.totalKnnVectorFields,
|
||||||
status.totalVectorValues,
|
status.totalVectorValues,
|
||||||
nsToSec(System.nanoTime() - startNS)));
|
nsToSec(System.nanoTime() - startNS)));
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,11 @@ import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsReader;
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
|
||||||
/** LeafReader implemented by codec APIs. */
|
/** LeafReader implemented by codec APIs. */
|
||||||
|
@ -81,7 +81,7 @@ public abstract class CodecReader extends LeafReader {
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public abstract VectorReader getVectorReader();
|
public abstract KnnVectorsReader getVectorReader();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
public final void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
||||||
|
|
|
@ -20,11 +20,11 @@ import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsReader;
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -100,7 +100,7 @@ public abstract class FilterCodecReader extends CodecReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader getVectorReader() {
|
public KnnVectorsReader getVectorReader() {
|
||||||
return in.getVectorReader();
|
return in.getVectorReader();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,15 +30,15 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.codecs.NormsConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
import org.apache.lucene.codecs.PointsWriter;
|
import org.apache.lucene.codecs.PointsWriter;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.VectorField;
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
|
@ -430,7 +430,7 @@ final class IndexingChain implements Accountable {
|
||||||
|
|
||||||
/** Writes all buffered vectors. */
|
/** Writes all buffered vectors. */
|
||||||
private void writeVectors(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
|
private void writeVectors(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
|
||||||
VectorWriter vectorWriter = null;
|
KnnVectorsWriter knnVectorsWriter = null;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < fieldHash.length; i++) {
|
for (int i = 0; i < fieldHash.length; i++) {
|
||||||
|
@ -446,19 +446,19 @@ final class IndexingChain implements Accountable {
|
||||||
+ perField.fieldInfo.name
|
+ perField.fieldInfo.name
|
||||||
+ "\" has no vectors but wrote them");
|
+ "\" has no vectors but wrote them");
|
||||||
}
|
}
|
||||||
if (vectorWriter == null) {
|
if (knnVectorsWriter == null) {
|
||||||
// lazy init
|
// lazy init
|
||||||
VectorFormat fmt = state.segmentInfo.getCodec().vectorFormat();
|
KnnVectorsFormat fmt = state.segmentInfo.getCodec().knnVectorsFormat();
|
||||||
if (fmt == null) {
|
if (fmt == null) {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"field=\""
|
"field=\""
|
||||||
+ perField.fieldInfo.name
|
+ perField.fieldInfo.name
|
||||||
+ "\" was indexed as vectors but codec does not support vectors");
|
+ "\" was indexed as vectors but codec does not support vectors");
|
||||||
}
|
}
|
||||||
vectorWriter = fmt.fieldsWriter(state);
|
knnVectorsWriter = fmt.fieldsWriter(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
perField.vectorValuesWriter.flush(sortMap, vectorWriter);
|
perField.vectorValuesWriter.flush(sortMap, knnVectorsWriter);
|
||||||
perField.vectorValuesWriter = null;
|
perField.vectorValuesWriter = null;
|
||||||
} else if (perField.fieldInfo != null && perField.fieldInfo.getVectorDimension() != 0) {
|
} else if (perField.fieldInfo != null && perField.fieldInfo.getVectorDimension() != 0) {
|
||||||
// BUG
|
// BUG
|
||||||
|
@ -472,15 +472,15 @@ final class IndexingChain implements Accountable {
|
||||||
perField = perField.next;
|
perField = perField.next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (vectorWriter != null) {
|
if (knnVectorsWriter != null) {
|
||||||
vectorWriter.finish();
|
knnVectorsWriter.finish();
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
IOUtils.close(vectorWriter);
|
IOUtils.close(knnVectorsWriter);
|
||||||
} else {
|
} else {
|
||||||
IOUtils.closeWhileHandlingException(vectorWriter);
|
IOUtils.closeWhileHandlingException(knnVectorsWriter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -761,7 +761,7 @@ final class IndexingChain implements Accountable {
|
||||||
pf.pointValuesWriter.addPackedValue(docID, field.binaryValue());
|
pf.pointValuesWriter.addPackedValue(docID, field.binaryValue());
|
||||||
}
|
}
|
||||||
if (fieldType.vectorDimension() != 0) {
|
if (fieldType.vectorDimension() != 0) {
|
||||||
pf.vectorValuesWriter.addValue(docID, ((VectorField) field).vectorValue());
|
pf.vectorValuesWriter.addValue(docID, ((KnnVectorField) field).vectorValue());
|
||||||
}
|
}
|
||||||
return indexedField;
|
return indexedField;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,11 +24,11 @@ import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsReader;
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
@ -80,7 +80,7 @@ public class MergeState {
|
||||||
public final PointsReader[] pointsReaders;
|
public final PointsReader[] pointsReaders;
|
||||||
|
|
||||||
/** Vector readers to merge */
|
/** Vector readers to merge */
|
||||||
public final VectorReader[] vectorReaders;
|
public final KnnVectorsReader[] knnVectorsReaders;
|
||||||
|
|
||||||
/** Max docs per reader */
|
/** Max docs per reader */
|
||||||
public final int[] maxDocs;
|
public final int[] maxDocs;
|
||||||
|
@ -109,7 +109,7 @@ public class MergeState {
|
||||||
termVectorsReaders = new TermVectorsReader[numReaders];
|
termVectorsReaders = new TermVectorsReader[numReaders];
|
||||||
docValuesProducers = new DocValuesProducer[numReaders];
|
docValuesProducers = new DocValuesProducer[numReaders];
|
||||||
pointsReaders = new PointsReader[numReaders];
|
pointsReaders = new PointsReader[numReaders];
|
||||||
vectorReaders = new VectorReader[numReaders];
|
knnVectorsReaders = new KnnVectorsReader[numReaders];
|
||||||
fieldInfos = new FieldInfos[numReaders];
|
fieldInfos = new FieldInfos[numReaders];
|
||||||
liveDocs = new Bits[numReaders];
|
liveDocs = new Bits[numReaders];
|
||||||
|
|
||||||
|
@ -147,9 +147,9 @@ public class MergeState {
|
||||||
pointsReaders[i] = pointsReaders[i].getMergeInstance();
|
pointsReaders[i] = pointsReaders[i].getMergeInstance();
|
||||||
}
|
}
|
||||||
|
|
||||||
vectorReaders[i] = reader.getVectorReader();
|
knnVectorsReaders[i] = reader.getVectorReader();
|
||||||
if (vectorReaders[i] != null) {
|
if (knnVectorsReaders[i] != null) {
|
||||||
vectorReaders[i] = vectorReaders[i].getMergeInstance();
|
knnVectorsReaders[i] = knnVectorsReaders[i].getMergeInstance();
|
||||||
}
|
}
|
||||||
|
|
||||||
numDocs += reader.numDocs();
|
numDocs += reader.numDocs();
|
||||||
|
|
|
@ -28,12 +28,12 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.CompoundDirectory;
|
import org.apache.lucene.codecs.CompoundDirectory;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsReader;
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
|
||||||
import org.apache.lucene.index.IndexReader.CacheKey;
|
import org.apache.lucene.index.IndexReader.CacheKey;
|
||||||
import org.apache.lucene.index.IndexReader.ClosedListener;
|
import org.apache.lucene.index.IndexReader.ClosedListener;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
@ -59,7 +59,7 @@ final class SegmentCoreReaders {
|
||||||
final StoredFieldsReader fieldsReaderOrig;
|
final StoredFieldsReader fieldsReaderOrig;
|
||||||
final TermVectorsReader termVectorsReader;
|
final TermVectorsReader termVectorsReader;
|
||||||
final PointsReader pointsReader;
|
final PointsReader pointsReader;
|
||||||
final VectorReader vectorReader;
|
final KnnVectorsReader knnVectorsReader;
|
||||||
final CompoundDirectory cfsReader;
|
final CompoundDirectory cfsReader;
|
||||||
final String segment;
|
final String segment;
|
||||||
/**
|
/**
|
||||||
|
@ -142,9 +142,9 @@ final class SegmentCoreReaders {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (coreFieldInfos.hasVectorValues()) {
|
if (coreFieldInfos.hasVectorValues()) {
|
||||||
vectorReader = codec.vectorFormat().fieldsReader(segmentReadState);
|
knnVectorsReader = codec.knnVectorsFormat().fieldsReader(segmentReadState);
|
||||||
} else {
|
} else {
|
||||||
vectorReader = null;
|
knnVectorsReader = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
|
@ -185,7 +185,7 @@ final class SegmentCoreReaders {
|
||||||
cfsReader,
|
cfsReader,
|
||||||
normsProducer,
|
normsProducer,
|
||||||
pointsReader,
|
pointsReader,
|
||||||
vectorReader);
|
knnVectorsReader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,12 +21,12 @@ import java.util.List;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.codecs.NormsConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsWriter;
|
import org.apache.lucene.codecs.PointsWriter;
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
@ -236,7 +236,7 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeVectorValues(SegmentWriteState segmentWriteState) throws IOException {
|
private void mergeVectorValues(SegmentWriteState segmentWriteState) throws IOException {
|
||||||
try (VectorWriter writer = codec.vectorFormat().fieldsWriter(segmentWriteState)) {
|
try (KnnVectorsWriter writer = codec.knnVectorsFormat().fieldsWriter(segmentWriteState)) {
|
||||||
writer.merge(mergeState);
|
writer.merge(mergeState);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,11 +24,11 @@ import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsReader;
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -267,8 +267,8 @@ public final class SegmentReader extends CodecReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader getVectorReader() {
|
public KnnVectorsReader getVectorReader() {
|
||||||
return core.vectorReader;
|
return core.knnVectorsReader;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -22,11 +22,11 @@ import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsReader;
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ public final class SlowCodecReaderWrapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader getVectorReader() {
|
public KnnVectorsReader getVectorReader() {
|
||||||
reader.ensureOpen();
|
reader.ensureOpen();
|
||||||
return readerToVectorReader(reader);
|
return readerToVectorReader(reader);
|
||||||
}
|
}
|
||||||
|
@ -159,8 +159,8 @@ public final class SlowCodecReaderWrapper {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static VectorReader readerToVectorReader(LeafReader reader) {
|
private static KnnVectorsReader readerToVectorReader(LeafReader reader) {
|
||||||
return new VectorReader() {
|
return new KnnVectorsReader() {
|
||||||
@Override
|
@Override
|
||||||
public VectorValues getVectorValues(String field) throws IOException {
|
public VectorValues getVectorValues(String field) throws IOException {
|
||||||
return reader.getVectorValues(field);
|
return reader.getVectorValues(field);
|
||||||
|
|
|
@ -26,11 +26,11 @@ import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PointsReader;
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
@ -301,9 +301,9 @@ public final class SortingCodecReader extends FilterCodecReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader getVectorReader() {
|
public KnnVectorsReader getVectorReader() {
|
||||||
VectorReader delegate = in.getVectorReader();
|
KnnVectorsReader delegate = in.getVectorReader();
|
||||||
return new VectorReader() {
|
return new KnnVectorsReader() {
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
delegate.checkIntegrity();
|
delegate.checkIntegrity();
|
||||||
|
|
|
@ -17,12 +17,13 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides access to per-document floating point vector values indexed as {@link
|
* This class provides access to per-document floating point vector values indexed as {@link
|
||||||
* org.apache.lucene.document.VectorField}.
|
* KnnVectorField}.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -103,16 +103,16 @@ class VectorValuesWriter {
|
||||||
*
|
*
|
||||||
* @param sortMap specifies the order of documents being flushed, or null if they are to be
|
* @param sortMap specifies the order of documents being flushed, or null if they are to be
|
||||||
* flushed in docid order
|
* flushed in docid order
|
||||||
* @param vectorWriter the Codec's vector writer that handles the actual encoding and I/O
|
* @param knnVectorsWriter the Codec's vector writer that handles the actual encoding and I/O
|
||||||
* @throws IOException if there is an error writing the field and its values
|
* @throws IOException if there is an error writing the field and its values
|
||||||
*/
|
*/
|
||||||
public void flush(Sorter.DocMap sortMap, VectorWriter vectorWriter) throws IOException {
|
public void flush(Sorter.DocMap sortMap, KnnVectorsWriter knnVectorsWriter) throws IOException {
|
||||||
VectorValues vectorValues =
|
VectorValues vectorValues =
|
||||||
new BufferedVectorValues(docsWithField, vectors, fieldInfo.getVectorDimension());
|
new BufferedVectorValues(docsWithField, vectors, fieldInfo.getVectorDimension());
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
vectorWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
|
knnVectorsWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
|
||||||
} else {
|
} else {
|
||||||
vectorWriter.writeField(fieldInfo, vectorValues);
|
knnVectorsWriter.writeField(fieldInfo, vectorValues);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,4 +13,4 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat
|
org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat
|
|
@ -17,10 +17,10 @@
|
||||||
package org.apache.lucene.codecs.lucene90;
|
package org.apache.lucene.codecs.lucene90;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestLucene90HnswVectorFormat extends BaseVectorFormatTestCase {
|
public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected Codec getCodec() {
|
protected Codec getCodec() {
|
||||||
return TestUtil.getDefaultCodec();
|
return TestUtil.getDefaultCodec();
|
|
@ -25,14 +25,14 @@ import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.VectorField;
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.index.BaseVectorFormatTestCase;
|
import org.apache.lucene.index.BaseKnnVectorsFormatTestCase;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -49,7 +49,7 @@ import org.apache.lucene.util.TestUtil;
|
||||||
import org.hamcrest.MatcherAssert;
|
import org.hamcrest.MatcherAssert;
|
||||||
|
|
||||||
/** Basic tests of PerFieldDocValuesFormat */
|
/** Basic tests of PerFieldDocValuesFormat */
|
||||||
public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
||||||
private Codec codec;
|
private Codec codec;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -67,14 +67,14 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
||||||
try (Directory directory = newDirectory()) {
|
try (Directory directory = newDirectory()) {
|
||||||
// we don't use RandomIndexWriter because it might add more values than we expect !!!!1
|
// we don't use RandomIndexWriter because it might add more values than we expect !!!!1
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
WriteRecordingVectorFormat format1 =
|
WriteRecordingKnnVectorsFormat format1 =
|
||||||
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
|
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||||
WriteRecordingVectorFormat format2 =
|
WriteRecordingKnnVectorsFormat format2 =
|
||||||
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
|
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||||
iwc.setCodec(
|
iwc.setCodec(
|
||||||
new AssertingCodec() {
|
new AssertingCodec() {
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
if ("field1".equals(field)) {
|
if ("field1".equals(field)) {
|
||||||
return format1;
|
return format1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -86,12 +86,12 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
||||||
try (IndexWriter iwriter = new IndexWriter(directory, iwc)) {
|
try (IndexWriter iwriter = new IndexWriter(directory, iwc)) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(newTextField("id", "1", Field.Store.YES));
|
doc.add(newTextField("id", "1", Field.Store.YES));
|
||||||
doc.add(new VectorField("field1", new float[] {1, 2, 3}));
|
doc.add(new KnnVectorField("field1", new float[] {1, 2, 3}));
|
||||||
iwriter.addDocument(doc);
|
iwriter.addDocument(doc);
|
||||||
|
|
||||||
doc.clear();
|
doc.clear();
|
||||||
doc.add(newTextField("id", "2", Field.Store.YES));
|
doc.add(newTextField("id", "2", Field.Store.YES));
|
||||||
doc.add(new VectorField("field2", new float[] {4, 5, 6}));
|
doc.add(new KnnVectorField("field2", new float[] {4, 5, 6}));
|
||||||
iwriter.addDocument(doc);
|
iwriter.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -128,19 +128,19 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(newTextField("id", "1", Field.Store.YES));
|
doc.add(newTextField("id", "1", Field.Store.YES));
|
||||||
doc.add(new VectorField("field", new float[] {1, 2, 3}));
|
doc.add(new KnnVectorField("field", new float[] {1, 2, 3}));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
iw.commit();
|
iw.commit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
WriteRecordingVectorFormat newFormat =
|
WriteRecordingKnnVectorsFormat newFormat =
|
||||||
new WriteRecordingVectorFormat(TestUtil.getDefaultVectorFormat());
|
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
|
||||||
newConfig.setCodec(
|
newConfig.setCodec(
|
||||||
new AssertingCodec() {
|
new AssertingCodec() {
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return newFormat;
|
return newFormat;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -154,20 +154,20 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class WriteRecordingVectorFormat extends VectorFormat {
|
private static class WriteRecordingKnnVectorsFormat extends KnnVectorsFormat {
|
||||||
private final VectorFormat delegate;
|
private final KnnVectorsFormat delegate;
|
||||||
private final Set<String> fieldsWritten;
|
private final Set<String> fieldsWritten;
|
||||||
|
|
||||||
public WriteRecordingVectorFormat(VectorFormat delegate) {
|
public WriteRecordingKnnVectorsFormat(KnnVectorsFormat delegate) {
|
||||||
super(delegate.getName());
|
super(delegate.getName());
|
||||||
this.delegate = delegate;
|
this.delegate = delegate;
|
||||||
this.fieldsWritten = new HashSet<>();
|
this.fieldsWritten = new HashSet<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||||
VectorWriter writer = delegate.fieldsWriter(state);
|
KnnVectorsWriter writer = delegate.fieldsWriter(state);
|
||||||
return new VectorWriter() {
|
return new KnnVectorsWriter() {
|
||||||
@Override
|
@Override
|
||||||
public void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException {
|
public void writeField(FieldInfo fieldInfo, VectorValues values) throws IOException {
|
||||||
fieldsWritten.add(fieldInfo.name);
|
fieldsWritten.add(fieldInfo.name);
|
||||||
|
@ -187,7 +187,7 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||||
return delegate.fieldsReader(state);
|
return delegate.fieldsReader(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -87,21 +87,21 @@ public class TestPerFieldConsistency extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Field randomVectorField(Random random, String fieldName) {
|
private static Field randomKnnVectorField(Random random, String fieldName) {
|
||||||
VectorSimilarityFunction similarityFunction =
|
VectorSimilarityFunction similarityFunction =
|
||||||
RandomPicks.randomFrom(random, VectorSimilarityFunction.values());
|
RandomPicks.randomFrom(random, VectorSimilarityFunction.values());
|
||||||
float[] values = new float[randomIntBetween(1, 10)];
|
float[] values = new float[randomIntBetween(1, 10)];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
values[i] = randomFloat();
|
values[i] = randomFloat();
|
||||||
}
|
}
|
||||||
return new VectorField(fieldName, values, similarityFunction);
|
return new KnnVectorField(fieldName, values, similarityFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Field[] randomFieldsWithTheSameName(String fieldName) {
|
private static Field[] randomFieldsWithTheSameName(String fieldName) {
|
||||||
final Field textField = randomIndexedField(random(), fieldName);
|
final Field textField = randomIndexedField(random(), fieldName);
|
||||||
final Field docValuesField = randomDocValuesField(random(), fieldName);
|
final Field docValuesField = randomDocValuesField(random(), fieldName);
|
||||||
final Field pointField = randomPointField(random(), fieldName);
|
final Field pointField = randomPointField(random(), fieldName);
|
||||||
final Field vectorField = randomVectorField(random(), fieldName);
|
final Field vectorField = randomKnnVectorField(random(), fieldName);
|
||||||
return new Field[] {textField, docValuesField, pointField, vectorField};
|
return new Field[] {textField, docValuesField, pointField, vectorField};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.SortedDocValuesField;
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
|
@ -40,7 +41,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
import org.apache.lucene.document.StoredField;
|
import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.document.VectorField;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
@ -381,6 +381,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
public void testRAMUsageVector() throws IOException {
|
public void testRAMUsageVector() throws IOException {
|
||||||
doTestRAMUsage(
|
doTestRAMUsage(
|
||||||
field ->
|
field ->
|
||||||
new VectorField(field, new float[] {1, 2, 3, 4}, VectorSimilarityFunction.EUCLIDEAN));
|
new KnnVectorField(
|
||||||
|
field, new float[] {1, 2, 3, 4}, VectorSimilarityFunction.EUCLIDEAN));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,17 +27,17 @@ import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.document.SortedDocValuesField;
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.VectorField;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -54,7 +54,7 @@ public class TestKnnGraph extends LuceneTestCase {
|
||||||
|
|
||||||
private static final String KNN_GRAPH_FIELD = "vector";
|
private static final String KNN_GRAPH_FIELD = "vector";
|
||||||
|
|
||||||
private static int maxConn = Lucene90HnswVectorFormat.DEFAULT_MAX_CONN;
|
private static int maxConn = Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
|
||||||
|
|
||||||
private Codec codec;
|
private Codec codec;
|
||||||
private VectorSimilarityFunction similarityFunction;
|
private VectorSimilarityFunction similarityFunction;
|
||||||
|
@ -69,9 +69,9 @@ public class TestKnnGraph extends LuceneTestCase {
|
||||||
codec =
|
codec =
|
||||||
new Lucene90Codec() {
|
new Lucene90Codec() {
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return new Lucene90HnswVectorFormat(
|
return new Lucene90HnswVectorsFormat(
|
||||||
maxConn, Lucene90HnswVectorFormat.DEFAULT_BEAM_WIDTH);
|
maxConn, Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ public class TestKnnGraph extends LuceneTestCase {
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void cleanup() {
|
public void cleanup() {
|
||||||
maxConn = Lucene90HnswVectorFormat.DEFAULT_MAX_CONN;
|
maxConn = Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Basic test of creating documents in a graph */
|
/** Basic test of creating documents in a graph */
|
||||||
|
@ -182,11 +182,11 @@ public class TestKnnGraph extends LuceneTestCase {
|
||||||
iw.forceMerge(1);
|
iw.forceMerge(1);
|
||||||
}
|
}
|
||||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
PerFieldVectorFormat.FieldsReader perFieldReader =
|
PerFieldKnnVectorsFormat.FieldsReader perFieldReader =
|
||||||
(PerFieldVectorFormat.FieldsReader)
|
(PerFieldKnnVectorsFormat.FieldsReader)
|
||||||
((CodecReader) getOnlyLeafReader(reader)).getVectorReader();
|
((CodecReader) getOnlyLeafReader(reader)).getVectorReader();
|
||||||
Lucene90HnswVectorReader vectorReader =
|
Lucene90HnswVectorsReader vectorReader =
|
||||||
(Lucene90HnswVectorReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
(Lucene90HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
||||||
graph = copyGraph(vectorReader.getGraphValues(KNN_GRAPH_FIELD));
|
graph = copyGraph(vectorReader.getGraphValues(KNN_GRAPH_FIELD));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -323,13 +323,13 @@ public class TestKnnGraph extends LuceneTestCase {
|
||||||
for (LeafReaderContext ctx : dr.leaves()) {
|
for (LeafReaderContext ctx : dr.leaves()) {
|
||||||
LeafReader reader = ctx.reader();
|
LeafReader reader = ctx.reader();
|
||||||
VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
|
VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
|
||||||
PerFieldVectorFormat.FieldsReader perFieldReader =
|
PerFieldKnnVectorsFormat.FieldsReader perFieldReader =
|
||||||
(PerFieldVectorFormat.FieldsReader) ((CodecReader) reader).getVectorReader();
|
(PerFieldKnnVectorsFormat.FieldsReader) ((CodecReader) reader).getVectorReader();
|
||||||
if (perFieldReader == null) {
|
if (perFieldReader == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Lucene90HnswVectorReader vectorReader =
|
Lucene90HnswVectorsReader vectorReader =
|
||||||
(Lucene90HnswVectorReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
(Lucene90HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
|
||||||
KnnGraphValues graphValues = vectorReader.getGraphValues(KNN_GRAPH_FIELD);
|
KnnGraphValues graphValues = vectorReader.getGraphValues(KNN_GRAPH_FIELD);
|
||||||
assertEquals((vectorValues == null), (graphValues == null));
|
assertEquals((vectorValues == null), (graphValues == null));
|
||||||
if (vectorValues == null) {
|
if (vectorValues == null) {
|
||||||
|
@ -458,8 +458,8 @@ public class TestKnnGraph extends LuceneTestCase {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
if (vector != null) {
|
if (vector != null) {
|
||||||
FieldType fieldType = VectorField.createFieldType(vector.length, similarityFunction);
|
FieldType fieldType = KnnVectorField.createFieldType(vector.length, similarityFunction);
|
||||||
doc.add(new VectorField(KNN_GRAPH_FIELD, vector, fieldType));
|
doc.add(new KnnVectorField(KNN_GRAPH_FIELD, vector, fieldType));
|
||||||
}
|
}
|
||||||
String idString = Integer.toString(id);
|
String idString = Integer.toString(id);
|
||||||
doc.add(new StringField("id", idString, Field.Store.YES));
|
doc.add(new StringField("id", idString, Field.Store.YES));
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.SortedDocValuesField;
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
|
@ -38,7 +39,6 @@ import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.document.VectorField;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
|
@ -127,7 +127,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
|
||||||
doc.add(new BinaryDocValuesField("binary_dv", new BytesRef(Integer.toString(docId))));
|
doc.add(new BinaryDocValuesField("binary_dv", new BytesRef(Integer.toString(docId))));
|
||||||
doc.add(
|
doc.add(
|
||||||
new SortedSetDocValuesField("sorted_set_dv", new BytesRef(Integer.toString(docId))));
|
new SortedSetDocValuesField("sorted_set_dv", new BytesRef(Integer.toString(docId))));
|
||||||
doc.add(new VectorField("vector", new float[] {(float) docId}));
|
doc.add(new KnnVectorField("vector", new float[] {(float) docId}));
|
||||||
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
|
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
|
||||||
|
|
||||||
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
|
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
|
||||||
|
|
|
@ -35,14 +35,14 @@ import java.nio.file.Paths;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.document.StoredField;
|
import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.VectorField;
|
|
||||||
import org.apache.lucene.index.CodecReader;
|
import org.apache.lucene.index.CodecReader;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -240,7 +240,7 @@ public class KnnGraphTester {
|
||||||
for (LeafReaderContext context : reader.leaves()) {
|
for (LeafReaderContext context : reader.leaves()) {
|
||||||
LeafReader leafReader = context.reader();
|
LeafReader leafReader = context.reader();
|
||||||
KnnGraphValues knnValues =
|
KnnGraphValues knnValues =
|
||||||
((Lucene90HnswVectorReader) ((CodecReader) leafReader).getVectorReader())
|
((Lucene90HnswVectorsReader) ((CodecReader) leafReader).getVectorReader())
|
||||||
.getGraphValues(KNN_FIELD);
|
.getGraphValues(KNN_FIELD);
|
||||||
System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
|
System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
|
||||||
printGraphFanout(knnValues, leafReader.maxDoc());
|
printGraphFanout(knnValues, leafReader.maxDoc());
|
||||||
|
@ -573,15 +573,15 @@ public class KnnGraphTester {
|
||||||
iwc.setCodec(
|
iwc.setCodec(
|
||||||
new Lucene90Codec() {
|
new Lucene90Codec() {
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return new Lucene90HnswVectorFormat(maxConn, beamWidth);
|
return new Lucene90HnswVectorsFormat(maxConn, beamWidth);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
// iwc.setMergePolicy(NoMergePolicy.INSTANCE);
|
// iwc.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
iwc.setRAMBufferSizeMB(1994d);
|
iwc.setRAMBufferSizeMB(1994d);
|
||||||
// iwc.setMaxBufferedDocs(10000);
|
// iwc.setMaxBufferedDocs(10000);
|
||||||
|
|
||||||
FieldType fieldType = VectorField.createFieldType(dim, VectorSimilarityFunction.DOT_PRODUCT);
|
FieldType fieldType = KnnVectorField.createFieldType(dim, VectorSimilarityFunction.DOT_PRODUCT);
|
||||||
if (quiet == false) {
|
if (quiet == false) {
|
||||||
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
|
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
|
||||||
System.out.println("creating index in " + indexPath);
|
System.out.println("creating index in " + indexPath);
|
||||||
|
@ -606,7 +606,7 @@ public class KnnGraphTester {
|
||||||
vectors.get(vector);
|
vectors.get(vector);
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
// System.out.println("vector=" + vector[0] + "," + vector[1] + "...");
|
// System.out.println("vector=" + vector[0] + "," + vector[1] + "...");
|
||||||
doc.add(new VectorField(KNN_FIELD, vector, fieldType));
|
doc.add(new KnnVectorField(KNN_FIELD, vector, fieldType));
|
||||||
doc.add(new StoredField(ID_FIELD, i));
|
doc.add(new StoredField(ID_FIELD, i));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,14 +24,14 @@ import java.util.Arrays;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
|
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsReader;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.document.StoredField;
|
import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.VectorField;
|
|
||||||
import org.apache.lucene.index.CodecReader;
|
import org.apache.lucene.index.CodecReader;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -80,8 +80,8 @@ public class TestHnsw extends LuceneTestCase {
|
||||||
.setCodec(
|
.setCodec(
|
||||||
new Lucene90Codec() {
|
new Lucene90Codec() {
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return new Lucene90HnswVectorFormat(maxConn, beamWidth);
|
return new Lucene90HnswVectorsFormat(maxConn, beamWidth);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
|
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
|
||||||
|
@ -92,7 +92,7 @@ public class TestHnsw extends LuceneTestCase {
|
||||||
indexedDoc++;
|
indexedDoc++;
|
||||||
}
|
}
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("field", v2.vectorValue()));
|
doc.add(new KnnVectorField("field", v2.vectorValue()));
|
||||||
doc.add(new StoredField("id", v2.docID()));
|
doc.add(new StoredField("id", v2.docID()));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
nVec++;
|
nVec++;
|
||||||
|
@ -108,8 +108,8 @@ public class TestHnsw extends LuceneTestCase {
|
||||||
assertEquals(indexedDoc, ctx.reader().numDocs());
|
assertEquals(indexedDoc, ctx.reader().numDocs());
|
||||||
assertVectorsEqual(v3, values);
|
assertVectorsEqual(v3, values);
|
||||||
KnnGraphValues graphValues =
|
KnnGraphValues graphValues =
|
||||||
((Lucene90HnswVectorReader)
|
((Lucene90HnswVectorsReader)
|
||||||
((PerFieldVectorFormat.FieldsReader)
|
((PerFieldKnnVectorsFormat.FieldsReader)
|
||||||
((CodecReader) ctx.reader()).getVectorReader())
|
((CodecReader) ctx.reader()).getVectorReader())
|
||||||
.getFieldReader("field"))
|
.getFieldReader("field"))
|
||||||
.getGraphValues("field");
|
.getGraphValues("field");
|
||||||
|
|
|
@ -18,16 +18,16 @@ package org.apache.lucene.codecs.asserting;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FilterCodec;
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PointsFormat;
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldVectorFormat;
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
/** Acts like the default codec but with additional asserts. */
|
/** Acts like the default codec but with additional asserts. */
|
||||||
|
@ -62,11 +62,11 @@ public class AssertingCodec extends FilterCodec {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private final VectorFormat vectorFormat =
|
private final KnnVectorsFormat knnVectorsFormat =
|
||||||
new PerFieldVectorFormat() {
|
new PerFieldKnnVectorsFormat() {
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return AssertingCodec.this.getVectorFormatForField(field);
|
return AssertingCodec.this.getKnnVectorsFormatForField(field);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ public class AssertingCodec extends FilterCodec {
|
||||||
private final PostingsFormat defaultFormat = new AssertingPostingsFormat();
|
private final PostingsFormat defaultFormat = new AssertingPostingsFormat();
|
||||||
private final DocValuesFormat defaultDVFormat = new AssertingDocValuesFormat();
|
private final DocValuesFormat defaultDVFormat = new AssertingDocValuesFormat();
|
||||||
private final PointsFormat pointsFormat = new AssertingPointsFormat();
|
private final PointsFormat pointsFormat = new AssertingPointsFormat();
|
||||||
private final VectorFormat defaultVectorFormat = new AssertingVectorFormat();
|
private final KnnVectorsFormat defaultKnnVectorsFormat = new AssertingKnnVectorsFormat();
|
||||||
|
|
||||||
public AssertingCodec() {
|
public AssertingCodec() {
|
||||||
super("Asserting", TestUtil.getDefaultCodec());
|
super("Asserting", TestUtil.getDefaultCodec());
|
||||||
|
@ -119,8 +119,8 @@ public class AssertingCodec extends FilterCodec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorFormat vectorFormat() {
|
public KnnVectorsFormat knnVectorsFormat() {
|
||||||
return vectorFormat;
|
return knnVectorsFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -152,7 +152,7 @@ public class AssertingCodec extends FilterCodec {
|
||||||
*
|
*
|
||||||
* <p>The default implementation always returns "Asserting"
|
* <p>The default implementation always returns "Asserting"
|
||||||
*/
|
*/
|
||||||
public VectorFormat getVectorFormatForField(String field) {
|
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
|
||||||
return defaultVectorFormat;
|
return defaultKnnVectorsFormat;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,9 +18,9 @@
|
||||||
package org.apache.lucene.codecs.asserting;
|
package org.apache.lucene.codecs.asserting;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.VectorReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.VectorWriter;
|
import org.apache.lucene.codecs.KnnVectorsWriter;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -28,29 +28,29 @@ import org.apache.lucene.index.VectorValues;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
/** Wraps the default VectorFormat and provides additional assertions. */
|
/** Wraps the default KnnVectorsFormat and provides additional assertions. */
|
||||||
public class AssertingVectorFormat extends VectorFormat {
|
public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
|
||||||
|
|
||||||
private final VectorFormat delegate = TestUtil.getDefaultVectorFormat();
|
private final KnnVectorsFormat delegate = TestUtil.getDefaultKnnVectorsFormat();
|
||||||
|
|
||||||
public AssertingVectorFormat() {
|
public AssertingKnnVectorsFormat() {
|
||||||
super("Asserting");
|
super("Asserting");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
|
||||||
return new AssertingVectorWriter(delegate.fieldsWriter(state));
|
return new AssertingKnnVectorsWriter(delegate.fieldsWriter(state));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
|
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
|
||||||
return new AssertingVectorReader(delegate.fieldsReader(state));
|
return new AssertingKnnVectorsReader(delegate.fieldsReader(state));
|
||||||
}
|
}
|
||||||
|
|
||||||
static class AssertingVectorWriter extends VectorWriter {
|
static class AssertingKnnVectorsWriter extends KnnVectorsWriter {
|
||||||
final VectorWriter delegate;
|
final KnnVectorsWriter delegate;
|
||||||
|
|
||||||
AssertingVectorWriter(VectorWriter delegate) {
|
AssertingKnnVectorsWriter(KnnVectorsWriter delegate) {
|
||||||
assert delegate != null;
|
assert delegate != null;
|
||||||
this.delegate = delegate;
|
this.delegate = delegate;
|
||||||
}
|
}
|
||||||
|
@ -73,10 +73,10 @@ public class AssertingVectorFormat extends VectorFormat {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class AssertingVectorReader extends VectorReader {
|
static class AssertingKnnVectorsReader extends KnnVectorsReader {
|
||||||
final VectorReader delegate;
|
final KnnVectorsReader delegate;
|
||||||
|
|
||||||
AssertingVectorReader(VectorReader delegate) {
|
AssertingKnnVectorsReader(KnnVectorsReader delegate) {
|
||||||
assert delegate != null;
|
assert delegate != null;
|
||||||
this.delegate = delegate;
|
this.delegate = delegate;
|
||||||
}
|
}
|
|
@ -22,13 +22,13 @@ import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.VectorField;
|
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -39,44 +39,45 @@ import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.VectorUtil;
|
import org.apache.lucene.util.VectorUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class aiming at testing {@link VectorFormat vectors formats}. To test a new format, all you
|
* Base class aiming at testing {@link KnnVectorsFormat vectors formats}. To test a new format, all
|
||||||
* need is to register a new {@link Codec} which uses it and extend this class and override {@link
|
* you need is to register a new {@link Codec} which uses it and extend this class and override
|
||||||
* #getCodec()}.
|
* {@link #getCodec()}.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCase {
|
public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void addRandomFields(Document doc) {
|
protected void addRandomFields(Document doc) {
|
||||||
doc.add(new VectorField("v2", randomVector(30), VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(new KnnVectorField("v2", randomVector(30), VectorSimilarityFunction.EUCLIDEAN));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFieldConstructor() {
|
public void testFieldConstructor() {
|
||||||
float[] v = new float[1];
|
float[] v = new float[1];
|
||||||
VectorField field = new VectorField("f", v);
|
KnnVectorField field = new KnnVectorField("f", v);
|
||||||
assertEquals(1, field.fieldType().vectorDimension());
|
assertEquals(1, field.fieldType().vectorDimension());
|
||||||
assertEquals(VectorSimilarityFunction.EUCLIDEAN, field.fieldType().vectorSimilarityFunction());
|
assertEquals(VectorSimilarityFunction.EUCLIDEAN, field.fieldType().vectorSimilarityFunction());
|
||||||
assertSame(v, field.vectorValue());
|
assertSame(v, field.vectorValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFieldConstructorExceptions() {
|
public void testFieldConstructorExceptions() {
|
||||||
expectThrows(IllegalArgumentException.class, () -> new VectorField(null, new float[1]));
|
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField(null, new float[1]));
|
||||||
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", null));
|
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField("f", null));
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> new VectorField("f", new float[1], (VectorSimilarityFunction) null));
|
() -> new KnnVectorField("f", new float[1], (VectorSimilarityFunction) null));
|
||||||
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", new float[0]));
|
expectThrows(IllegalArgumentException.class, () -> new KnnVectorField("f", new float[0]));
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> new VectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1]));
|
() -> new KnnVectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1]));
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> new VectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1], (FieldType) null));
|
() ->
|
||||||
|
new KnnVectorField("f", new float[VectorValues.MAX_DIMENSIONS + 1], (FieldType) null));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFieldSetValue() {
|
public void testFieldSetValue() {
|
||||||
VectorField field = new VectorField("f", new float[1]);
|
KnnVectorField field = new KnnVectorField("f", new float[1]);
|
||||||
float[] v1 = new float[1];
|
float[] v1 = new float[1];
|
||||||
field.setVectorValue(v1);
|
field.setVectorValue(v1);
|
||||||
assertSame(v1, field.vectorValue());
|
assertSame(v1, field.vectorValue());
|
||||||
|
@ -90,11 +91,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
doc2.add(new KnnVectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
|
@ -106,12 +107,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
doc2.add(new KnnVectorField("f", new float[3], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
|
@ -126,11 +127,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
|
@ -142,12 +143,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
|
@ -161,13 +162,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.DOT_PRODUCT));
|
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@ -182,13 +183,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
doc2.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@ -202,7 +203,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
public void testAddIndexesDirectory0() throws Exception {
|
public void testAddIndexesDirectory0() throws Exception {
|
||||||
String fieldName = "field";
|
String fieldName = "field";
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
|
@ -230,7 +231,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
w2.addIndexes(dir);
|
w2.addIndexes(dir);
|
||||||
|
@ -250,7 +251,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
String fieldName = "field";
|
String fieldName = "field";
|
||||||
float[] vector = new float[1];
|
float[] vector = new float[1];
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
|
@ -281,12 +282,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(
|
expectThrows(
|
||||||
|
@ -304,12 +305,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir));
|
expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir));
|
||||||
|
@ -326,12 +327,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
|
@ -352,12 +353,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
|
@ -378,12 +379,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[5], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
|
@ -402,12 +403,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
|
@ -425,8 +426,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@ -443,13 +444,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() ->
|
() ->
|
||||||
doc.add(
|
doc.add(
|
||||||
new VectorField(
|
new KnnVectorField(
|
||||||
"f",
|
"f",
|
||||||
new float[VectorValues.MAX_DIMENSIONS + 1],
|
new float[VectorValues.MAX_DIMENSIONS + 1],
|
||||||
VectorSimilarityFunction.DOT_PRODUCT)));
|
VectorSimilarityFunction.DOT_PRODUCT)));
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
w.addDocument(doc2);
|
w.addDocument(doc2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -462,11 +463,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() ->
|
() ->
|
||||||
doc.add(new VectorField("f", new float[0], VectorSimilarityFunction.EUCLIDEAN)));
|
doc.add(
|
||||||
|
new KnnVectorField("f", new float[0], VectorSimilarityFunction.EUCLIDEAN)));
|
||||||
assertEquals("cannot index an empty vector", e.getMessage());
|
assertEquals("cannot index an empty vector", e.getMessage());
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
doc2.add(new KnnVectorField("f", new float[1], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
w.addDocument(doc2);
|
w.addDocument(doc2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -476,14 +478,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||||
iwc.setCodec(Codec.forName("SimpleText"));
|
iwc.setCodec(Codec.forName("SimpleText"));
|
||||||
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.forceMerge(1);
|
w.forceMerge(1);
|
||||||
}
|
}
|
||||||
|
@ -497,20 +499,21 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.forceMerge(1);
|
w.forceMerge(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInvalidVectorFieldUsage() {
|
public void testInvalidKnnVectorFieldUsage() {
|
||||||
VectorField field = new VectorField("field", new float[2], VectorSimilarityFunction.EUCLIDEAN);
|
KnnVectorField field =
|
||||||
|
new KnnVectorField("field", new float[2], VectorSimilarityFunction.EUCLIDEAN);
|
||||||
|
|
||||||
expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14));
|
expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14));
|
||||||
|
|
||||||
|
@ -524,7 +527,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||||
doc.add(new VectorField("v", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(new KnnVectorField("v", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.addDocument(new Document());
|
w.addDocument(new Document());
|
||||||
w.commit();
|
w.commit();
|
||||||
|
@ -544,17 +547,19 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testVectorFieldMissingFromOneSegment() throws Exception {
|
public void testKnnVectorFieldMissingFromOneSegment() throws Exception {
|
||||||
try (Directory dir = FSDirectory.open(createTempDir());
|
try (Directory dir = FSDirectory.open(createTempDir());
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||||
doc.add(new VectorField("v0", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(
|
||||||
|
new KnnVectorField("v0", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new VectorField("v1", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
doc.add(
|
||||||
|
new KnnVectorField("v1", new float[] {2, 3, 5}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.forceMerge(1);
|
w.forceMerge(1);
|
||||||
}
|
}
|
||||||
|
@ -581,7 +586,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
String fieldName = "int" + field;
|
String fieldName = "int" + field;
|
||||||
if (random().nextInt(100) == 17) {
|
if (random().nextInt(100) == 17) {
|
||||||
float[] v = randomVector(fieldDims[field]);
|
float[] v = randomVector(fieldDims[field]);
|
||||||
doc.add(new VectorField(fieldName, v, fieldSearchStrategies[field]));
|
doc.add(new KnnVectorField(fieldName, v, fieldSearchStrategies[field]));
|
||||||
fieldDocCounts[field]++;
|
fieldDocCounts[field]++;
|
||||||
fieldTotals[field] += v[0];
|
fieldTotals[field] += v[0];
|
||||||
}
|
}
|
||||||
|
@ -618,15 +623,15 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc1 = new Document();
|
Document doc1 = new Document();
|
||||||
doc1.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
doc1.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||||
v[0] = 1;
|
v[0] = 1;
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
doc2.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||||
iw.addDocument(doc1);
|
iw.addDocument(doc1);
|
||||||
iw.addDocument(doc2);
|
iw.addDocument(doc2);
|
||||||
v[0] = 2;
|
v[0] = 2;
|
||||||
Document doc3 = new Document();
|
Document doc3 = new Document();
|
||||||
doc3.add(new VectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
doc3.add(new KnnVectorField(fieldName, v, VectorSimilarityFunction.EUCLIDEAN));
|
||||||
iw.addDocument(doc3);
|
iw.addDocument(doc3);
|
||||||
iw.forceMerge(1);
|
iw.forceMerge(1);
|
||||||
try (IndexReader reader = iw.getReader()) {
|
try (IndexReader reader = iw.getReader()) {
|
||||||
|
@ -676,19 +681,21 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIndexMultipleVectorFields() throws Exception {
|
public void testIndexMultipleKnnVectorFields() throws Exception {
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
float[] v = new float[] {1};
|
float[] v = new float[] {1};
|
||||||
doc.add(new VectorField("field1", v, VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(new KnnVectorField("field1", v, VectorSimilarityFunction.EUCLIDEAN));
|
||||||
doc.add(new VectorField("field2", new float[] {1, 2, 3}, VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(
|
||||||
|
new KnnVectorField("field2", new float[] {1, 2, 3}, VectorSimilarityFunction.EUCLIDEAN));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
v[0] = 2;
|
v[0] = 2;
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(
|
doc.add(
|
||||||
new VectorField("field3", new float[] {1, 2, 3}, VectorSimilarityFunction.DOT_PRODUCT));
|
new KnnVectorField(
|
||||||
|
"field3", new float[] {1, 2, 3}, VectorSimilarityFunction.DOT_PRODUCT));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
iw.forceMerge(1);
|
iw.forceMerge(1);
|
||||||
try (IndexReader reader = iw.getReader()) {
|
try (IndexReader reader = iw.getReader()) {
|
||||||
|
@ -879,7 +886,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
if (vector != null) {
|
if (vector != null) {
|
||||||
doc.add(new VectorField(field, vector, similarityFunction));
|
doc.add(new KnnVectorField(field, vector, similarityFunction));
|
||||||
}
|
}
|
||||||
doc.add(new NumericDocValuesField("sortkey", sortkey));
|
doc.add(new NumericDocValuesField("sortkey", sortkey));
|
||||||
String idString = Integer.toString(id);
|
String idString = Integer.toString(id);
|
||||||
|
@ -901,10 +908,10 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("v1", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(new KnnVectorField("v1", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
doc.add(new VectorField("v2", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(new KnnVectorField("v2", randomVector(3), VectorSimilarityFunction.EUCLIDEAN));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -915,7 +922,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
// total 3 vector values were indexed:
|
// total 3 vector values were indexed:
|
||||||
assertEquals(3, segStatus.vectorValuesStatus.totalVectorValues);
|
assertEquals(3, segStatus.vectorValuesStatus.totalVectorValues);
|
||||||
// ... across 2 fields:
|
// ... across 2 fields:
|
||||||
assertEquals(2, segStatus.vectorValuesStatus.totalVectorFields);
|
assertEquals(2, segStatus.vectorValuesStatus.totalKnnVectorFields);
|
||||||
|
|
||||||
// Make sure CheckIndex in fact declares that it is testing vectors!
|
// Make sure CheckIndex in fact declares that it is testing vectors!
|
||||||
assertTrue(output.toString(IOUtils.UTF_8).contains("test: vectors..."));
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: vectors..."));
|
||||||
|
@ -939,7 +946,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
// randomly add a vector field
|
// randomly add a vector field
|
||||||
if (random().nextInt(4) == 3) {
|
if (random().nextInt(4) == 3) {
|
||||||
doc.add(new VectorField(fieldName, new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
doc.add(
|
||||||
|
new KnnVectorField(fieldName, new float[4], VectorSimilarityFunction.EUCLIDEAN));
|
||||||
}
|
}
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
|
@ -49,14 +49,14 @@ import java.util.zip.ZipEntry;
|
||||||
import java.util.zip.ZipInputStream;
|
import java.util.zip.ZipInputStream;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.VectorFormat;
|
|
||||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||||
import org.apache.lucene.codecs.blockterms.LuceneFixedGap;
|
import org.apache.lucene.codecs.blockterms.LuceneFixedGap;
|
||||||
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
|
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
import org.apache.lucene.codecs.lucene90.Lucene90Codec;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
|
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
|
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
|
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
|
@ -1297,10 +1297,11 @@ public final class TestUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the actual default vector format (e.g. LuceneMNVectorFormat for this version of Lucene.
|
* Returns the actual default vector format (e.g. LuceneMNKnnVectorsFormat for this version of
|
||||||
|
* Lucene.
|
||||||
*/
|
*/
|
||||||
public static VectorFormat getDefaultVectorFormat() {
|
public static KnnVectorsFormat getDefaultKnnVectorsFormat() {
|
||||||
return new Lucene90HnswVectorFormat();
|
return new Lucene90HnswVectorsFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
|
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
|
||||||
|
|
|
@ -13,4 +13,4 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
org.apache.lucene.codecs.asserting.AssertingVectorFormat
|
org.apache.lucene.codecs.asserting.AssertingKnnVectorsFormat
|
Loading…
Reference in New Issue