LUCENE-9905: rename VectorValues.SearchStrategy to VectorValues.SimilarityFunction

This commit is contained in:
Michael Sokolov 2021-04-25 14:33:36 -04:00
parent 3115f85697
commit 6d4b5eaba3
42 changed files with 382 additions and 339 deletions

View File

@ -212,7 +212,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
pointIndexDimensionCount,
pointNumBytes,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
isSoftDeletesField);
} catch (IllegalStateException e) {
throw new CorruptIndexException(

View File

@ -158,7 +158,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), VECTOR_SEARCH_STRATEGY);
String scoreFunction = readString(VECTOR_SEARCH_STRATEGY.length, scratch);
VectorValues.SearchStrategy vectorDistFunc = distanceFunction(scoreFunction);
VectorValues.SimilarityFunction vectorDistFunc = distanceFunction(scoreFunction);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SOFT_DELETES);
@ -201,8 +201,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
return DocValuesType.valueOf(dvType);
}
public VectorValues.SearchStrategy distanceFunction(String scoreFunction) {
return VectorValues.SearchStrategy.valueOf(scoreFunction);
public VectorValues.SimilarityFunction distanceFunction(String scoreFunction) {
return VectorValues.SimilarityFunction.valueOf(scoreFunction);
}
private String readString(int offset, BytesRefBuilder scratch) {
@ -298,7 +298,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, VECTOR_SEARCH_STRATEGY);
SimpleTextUtil.write(out, fi.getVectorSearchStrategy().name(), scratch);
SimpleTextUtil.write(out, fi.getVectorSimilarityFunction().name(), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, SOFT_DELETES);

View File

@ -82,8 +82,8 @@ public class SimpleTextVectorReader extends VectorReader {
while (fieldNumber != -1) {
String fieldName = readString(in, FIELD_NAME);
String scoreFunctionName = readString(in, SCORE_FUNCTION);
VectorValues.SearchStrategy searchStrategy =
VectorValues.SearchStrategy.valueOf(scoreFunctionName);
VectorValues.SimilarityFunction similarityFunction =
VectorValues.SimilarityFunction.valueOf(scoreFunctionName);
long vectorDataOffset = readLong(in, VECTOR_DATA_OFFSET);
long vectorDataLength = readLong(in, VECTOR_DATA_LENGTH);
int dimension = readInt(in, VECTOR_DIMENSION);
@ -95,7 +95,8 @@ public class SimpleTextVectorReader extends VectorReader {
assert fieldEntries.containsKey(fieldName) == false;
fieldEntries.put(
fieldName,
new FieldEntry(dimension, searchStrategy, vectorDataOffset, vectorDataLength, docIds));
new FieldEntry(
dimension, similarityFunction, vectorDataOffset, vectorDataLength, docIds));
fieldNumber = readInt(in, FIELD_NUMBER);
}
SimpleTextUtil.checkFooter(in);
@ -204,7 +205,7 @@ public class SimpleTextVectorReader extends VectorReader {
private static class FieldEntry {
final int dimension;
final VectorValues.SearchStrategy searchStrategy;
final VectorValues.SimilarityFunction similarityFunction;
final long vectorDataOffset;
final long vectorDataLength;
@ -212,12 +213,12 @@ public class SimpleTextVectorReader extends VectorReader {
FieldEntry(
int dimension,
VectorValues.SearchStrategy searchStrategy,
VectorValues.SimilarityFunction similarityFunction,
long vectorDataOffset,
long vectorDataLength,
int[] ordToDoc) {
this.dimension = dimension;
this.searchStrategy = searchStrategy;
this.similarityFunction = similarityFunction;
this.vectorDataOffset = vectorDataOffset;
this.vectorDataLength = vectorDataLength;
this.ordToDoc = ordToDoc;
@ -260,8 +261,8 @@ public class SimpleTextVectorReader extends VectorReader {
}
@Override
public SearchStrategy searchStrategy() {
return entry.searchStrategy;
public SimilarityFunction similarityFunction() {
return entry.similarityFunction;
}
@Override

View File

@ -98,7 +98,7 @@ public class SimpleTextVectorWriter extends VectorWriter {
throws IOException {
writeField(meta, FIELD_NUMBER, field.number);
writeField(meta, FIELD_NAME, field.name);
writeField(meta, SCORE_FUNCTION, field.getVectorSearchStrategy().name());
writeField(meta, SCORE_FUNCTION, field.getVectorSimilarityFunction().name());
writeField(meta, VECTOR_DATA_OFFSET, vectorDataOffset);
writeField(meta, VECTOR_DATA_LENGTH, vectorDataLength);
writeField(meta, VECTOR_DIMENSION, field.getVectorDimension());

View File

@ -116,7 +116,7 @@ public class TestBlockWriter extends LuceneTestCase {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
true);
}
}

View File

@ -203,7 +203,7 @@ public class TestSTBlockReader extends LuceneTestCase {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false);
}

View File

@ -68,18 +68,18 @@ public abstract class VectorWriter implements Closeable {
}
List<VectorValuesSub> subs = new ArrayList<>();
int dimension = -1;
VectorValues.SearchStrategy searchStrategy = null;
VectorValues.SimilarityFunction similarityFunction = null;
int nonEmptySegmentIndex = 0;
for (int i = 0; i < mergeState.vectorReaders.length; i++) {
VectorReader vectorReader = mergeState.vectorReaders[i];
if (vectorReader != null) {
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
int segmentDimension = mergeFieldInfo.getVectorDimension();
VectorValues.SearchStrategy segmentSearchStrategy =
mergeFieldInfo.getVectorSearchStrategy();
VectorValues.SimilarityFunction segmentSimilarityFunction =
mergeFieldInfo.getVectorSimilarityFunction();
if (dimension == -1) {
dimension = segmentDimension;
searchStrategy = mergeFieldInfo.getVectorSearchStrategy();
similarityFunction = mergeFieldInfo.getVectorSimilarityFunction();
} else if (dimension != segmentDimension) {
throw new IllegalStateException(
"Varying dimensions for vector-valued field "
@ -88,14 +88,14 @@ public abstract class VectorWriter implements Closeable {
+ dimension
+ "!="
+ segmentDimension);
} else if (searchStrategy != segmentSearchStrategy) {
} else if (similarityFunction != segmentSimilarityFunction) {
throw new IllegalStateException(
"Varying search strategys for vector-valued field "
"Varying similarity functions for vector-valued field "
+ mergeFieldInfo.name
+ ": "
+ searchStrategy
+ similarityFunction
+ "!="
+ segmentSearchStrategy);
+ segmentSimilarityFunction);
}
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
if (values != null) {
@ -241,8 +241,8 @@ public abstract class VectorWriter implements Closeable {
}
@Override
public SearchStrategy searchStrategy() {
return subs.get(0).values.searchStrategy();
public SimilarityFunction similarityFunction() {
return subs.get(0).values.similarityFunction();
}
class MergerRandomAccess implements RandomAccessVectorValues {
@ -272,8 +272,8 @@ public abstract class VectorWriter implements Closeable {
}
@Override
public SearchStrategy searchStrategy() {
return VectorValuesMerger.this.searchStrategy();
public SimilarityFunction similarityFunction() {
return VectorValuesMerger.this.similarityFunction();
}
@Override

View File

@ -29,8 +29,7 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.index.VectorValues.SearchStrategy;
import org.apache.lucene.index.VectorValues.SimilarityFunction;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
@ -103,8 +102,8 @@ import org.apache.lucene.store.IndexOutput;
* <li>VectorDistFunction: a byte containing distance function used for similarity calculation.
* <ul>
* <li>0: no distance function is defined for this field.
* <li>1: EUCLIDEAN_HNSW distance. ({@link SearchStrategy#EUCLIDEAN_HNSW})
* <li>2: DOT_PRODUCT_HNSW score. ({@link SearchStrategy#DOT_PRODUCT_HNSW})
* <li>1: EUCLIDEAN_HNSW distance. ({@link SimilarityFunction#EUCLIDEAN})
* <li>2: DOT_PRODUCT_HNSW score. ({@link SimilarityFunction#DOT_PRODUCT})
* </ul>
* </ul>
*
@ -173,7 +172,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
pointNumBytes = 0;
}
final int vectorDimension = input.readVInt();
final VectorValues.SearchStrategy vectorDistFunc = getDistFunc(input, input.readByte());
final SimilarityFunction vectorDistFunc = getDistFunc(input, input.readByte());
try {
infos[i] =
@ -254,12 +253,11 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
}
}
private static VectorValues.SearchStrategy getDistFunc(IndexInput input, byte b)
throws IOException {
if (b < 0 || b >= VectorValues.SearchStrategy.values().length) {
private static SimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException {
if (b < 0 || b >= SimilarityFunction.values().length) {
throw new CorruptIndexException("invalid distance function: " + b, input);
}
return VectorValues.SearchStrategy.values()[b];
return SimilarityFunction.values()[b];
}
static {
@ -348,7 +346,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
output.writeVInt(fi.getPointNumBytes());
}
output.writeVInt(fi.getVectorDimension());
output.writeByte((byte) fi.getVectorSearchStrategy().ordinal());
output.writeByte((byte) fi.getVectorSimilarityFunction().ordinal());
}
CodecUtil.writeFooter(output);
}

View File

@ -50,7 +50,7 @@ import org.apache.lucene.index.SegmentWriteState;
*
* <ul>
* <li><b>[int32]</b> field number
* <li><b>[int32]</b> vector search strategy ordinal
* <li><b>[int32]</b> vector similarity function ordinal
* <li><b>[vlong]</b> offset to this field's vectors in the .vec file
* <li><b>[vlong]</b> length of this field's vectors, in bytes
* <li><b>[vlong]</b> offset to this field's index in the .vex file

View File

@ -187,24 +187,28 @@ public final class Lucene90VectorReader extends VectorReader {
}
}
private VectorValues.SearchStrategy readSearchStrategy(DataInput input) throws IOException {
int searchStrategyId = input.readInt();
if (searchStrategyId < 0 || searchStrategyId >= VectorValues.SearchStrategy.values().length) {
throw new CorruptIndexException("Invalid search strategy id: " + searchStrategyId, input);
private VectorValues.SimilarityFunction readSimilarityFunction(DataInput input)
throws IOException {
int similarityFunctionId = input.readInt();
if (similarityFunctionId < 0
|| similarityFunctionId >= VectorValues.SimilarityFunction.values().length) {
throw new CorruptIndexException(
"Invalid similarity function id: " + similarityFunctionId, input);
}
return VectorValues.SearchStrategy.values()[searchStrategyId];
return VectorValues.SimilarityFunction.values()[similarityFunctionId];
}
private FieldEntry readField(DataInput input) throws IOException {
VectorValues.SearchStrategy searchStrategy = readSearchStrategy(input);
switch (searchStrategy) {
VectorValues.SimilarityFunction similarityFunction = readSimilarityFunction(input);
switch (similarityFunction) {
case NONE:
return new FieldEntry(input, searchStrategy);
case DOT_PRODUCT_HNSW:
case EUCLIDEAN_HNSW:
return new HnswGraphFieldEntry(input, searchStrategy);
return new FieldEntry(input, similarityFunction);
case DOT_PRODUCT:
case EUCLIDEAN:
return new HnswGraphFieldEntry(input, similarityFunction);
default:
throw new CorruptIndexException("Unknown vector search strategy: " + searchStrategy, input);
throw new CorruptIndexException(
"Unknown vector similarity function: " + similarityFunction, input);
}
}
@ -288,7 +292,7 @@ public final class Lucene90VectorReader extends VectorReader {
}
private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
if (entry.searchStrategy.isHnsw()) {
if (entry.similarityFunction.isHnsw()) {
HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
IndexInput bytesSlice =
vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);
@ -306,7 +310,7 @@ public final class Lucene90VectorReader extends VectorReader {
private static class FieldEntry {
final int dimension;
final VectorValues.SearchStrategy searchStrategy;
final VectorValues.SimilarityFunction similarityFunction;
final long vectorDataOffset;
final long vectorDataLength;
@ -314,8 +318,9 @@ public final class Lucene90VectorReader extends VectorReader {
final long indexDataLength;
final int[] ordToDoc;
FieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy) throws IOException {
this.searchStrategy = searchStrategy;
FieldEntry(DataInput input, VectorValues.SimilarityFunction similarityFunction)
throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
indexDataOffset = input.readVLong();
@ -338,9 +343,9 @@ public final class Lucene90VectorReader extends VectorReader {
final long[] ordOffsets;
HnswGraphFieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy)
HnswGraphFieldEntry(DataInput input, VectorValues.SimilarityFunction similarityFunction)
throws IOException {
super(input, searchStrategy);
super(input, similarityFunction);
ordOffsets = new long[size()];
long offset = 0;
for (int i = 0; i < ordOffsets.length; i++) {
@ -385,8 +390,8 @@ public final class Lucene90VectorReader extends VectorReader {
}
@Override
public SearchStrategy searchStrategy() {
return fieldEntry.searchStrategy;
public SimilarityFunction similarityFunction() {
return fieldEntry.similarityFunction;
}
@Override
@ -425,7 +430,7 @@ public final class Lucene90VectorReader extends VectorReader {
if (ord < 0) {
ord = -(ord + 1);
}
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
assert ord <= fieldEntry.ordToDoc.length;
if (ord == fieldEntry.ordToDoc.length) {
doc = NO_MORE_DOCS;
} else {

View File

@ -121,7 +121,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
long[] offsets = new long[count];
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
long vectorIndexOffset = vectorIndex.getFilePointer();
if (vectors.searchStrategy().isHnsw()) {
if (vectors.similarityFunction().isHnsw()) {
if (vectors instanceof RandomAccessVectorValuesProducer) {
writeGraph(
vectorIndex,
@ -146,7 +146,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
vectorIndexLength,
count,
docIds);
if (vectors.searchStrategy().isHnsw()) {
if (vectors.similarityFunction().isHnsw()) {
writeGraphOffsets(meta, offsets);
}
}
@ -162,7 +162,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
int[] docIds)
throws IOException {
meta.writeInt(field.number);
meta.writeInt(field.getVectorSearchStrategy().ordinal());
meta.writeInt(field.getVectorSimilarityFunction().ordinal());
meta.writeVLong(vectorDataOffset);
meta.writeVLong(vectorDataLength);
meta.writeVLong(indexDataOffset);

View File

@ -42,7 +42,8 @@ public class FieldType implements IndexableFieldType {
private int indexDimensionCount;
private int dimensionNumBytes;
private int vectorDimension;
private VectorValues.SearchStrategy vectorSearchStrategy = VectorValues.SearchStrategy.NONE;
private VectorValues.SimilarityFunction vectorSimilarityFunction =
VectorValues.SimilarityFunction.NONE;
private Map<String, String> attributes;
/** Create a new mutable FieldType with all of the properties from <code>ref</code> */
@ -60,7 +61,7 @@ public class FieldType implements IndexableFieldType {
this.indexDimensionCount = ref.pointIndexDimensionCount();
this.dimensionNumBytes = ref.pointNumBytes();
this.vectorDimension = ref.vectorDimension();
this.vectorSearchStrategy = ref.vectorSearchStrategy();
this.vectorSimilarityFunction = ref.vectorSimilarityFunction();
if (ref.getAttributes() != null) {
this.attributes = new HashMap<>(ref.getAttributes());
}
@ -369,8 +370,8 @@ public class FieldType implements IndexableFieldType {
}
/** Enable vector indexing, with the specified number of dimensions and distance function. */
public void setVectorDimensionsAndSearchStrategy(
int numDimensions, VectorValues.SearchStrategy distFunc) {
public void setVectorDimensionsAndSimilarityFunction(
int numDimensions, VectorValues.SimilarityFunction distFunc) {
checkIfFrozen();
if (numDimensions <= 0) {
throw new IllegalArgumentException("vector numDimensions must be > 0; got " + numDimensions);
@ -383,7 +384,7 @@ public class FieldType implements IndexableFieldType {
+ numDimensions);
}
this.vectorDimension = numDimensions;
this.vectorSearchStrategy = distFunc;
this.vectorSimilarityFunction = distFunc;
}
@Override
@ -392,8 +393,8 @@ public class FieldType implements IndexableFieldType {
}
@Override
public VectorValues.SearchStrategy vectorSearchStrategy() {
return vectorSearchStrategy;
public VectorValues.SimilarityFunction vectorSimilarityFunction() {
return vectorSimilarityFunction;
}
/**

View File

@ -25,15 +25,17 @@ import org.apache.lucene.util.hnsw.HnswGraphBuilder;
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
* {@link VectorValues}, which is a forward-only docID-based iterator and also offers random-access
* by dense ordinal (not docId). VectorValues.SearchStrategys may be used to compare vectors at
* by dense ordinal (not docId). VectorValues.SearchSimlarity may be used to compare vectors at
* query time (for example as part of result ranking). A VectorField may be associated with a search
* strategy that defines the metric used for nearest-neighbor search among vectors of that field,
* but at the moment this association is purely nominal: it is intended for future use by the
* to-be-implemented nearest neighbors search.
* similarity function defining the metric used for nearest-neighbor search among vectors of that
* field.
*
* @lucene.experimental
*/
public class VectorField extends Field {
private static FieldType createType(float[] v, VectorValues.SearchStrategy searchStrategy) {
private static FieldType createType(
float[] v, VectorValues.SimilarityFunction similarityFunction) {
if (v == null) {
throw new IllegalArgumentException("vector value must not be null");
}
@ -45,11 +47,11 @@ public class VectorField extends Field {
throw new IllegalArgumentException(
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
}
if (searchStrategy == null) {
throw new IllegalArgumentException("search strategy must not be null");
if (similarityFunction == null) {
throw new IllegalArgumentException("similarity function must not be null");
}
FieldType type = new FieldType();
type.setVectorDimensionsAndSearchStrategy(dimension, searchStrategy);
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);
type.freeze();
return type;
}
@ -59,13 +61,16 @@ public class VectorField extends Field {
* parameters that would be used by HnswGraphBuilder while constructing HNSW graph.
*
* @param dimension dimension of vectors
* @param searchStrategy a function defining vector proximity.
* @param similarityFunction a function defining vector proximity.
* @param maxConn max-connections at each HNSW graph node
* @param beamWidth size of list to be used while constructing HNSW graph
* @throws IllegalArgumentException if any parameter is null, or has dimension &gt; 1024.
*/
public static FieldType createHnswType(
int dimension, VectorValues.SearchStrategy searchStrategy, int maxConn, int beamWidth) {
int dimension,
VectorValues.SimilarityFunction similarityFunction,
int maxConn,
int beamWidth) {
if (dimension == 0) {
throw new IllegalArgumentException("cannot index an empty vector");
}
@ -73,12 +78,12 @@ public class VectorField extends Field {
throw new IllegalArgumentException(
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
}
if (searchStrategy == null || !searchStrategy.isHnsw()) {
if (similarityFunction == null || !similarityFunction.isHnsw()) {
throw new IllegalArgumentException(
"search strategy must not be null or non HNSW type, received: " + searchStrategy);
"similarity function must not be null, received: " + similarityFunction);
}
FieldType type = new FieldType();
type.setVectorDimensionsAndSearchStrategy(dimension, searchStrategy);
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);
type.putAttribute(HnswGraphBuilder.HNSW_MAX_CONN_ATTRIBUTE_KEY, String.valueOf(maxConn));
type.putAttribute(HnswGraphBuilder.HNSW_BEAM_WIDTH_ATTRIBUTE_KEY, String.valueOf(beamWidth));
type.freeze();
@ -87,25 +92,26 @@ public class VectorField extends Field {
/**
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
* no value. Vectors of a single field share the same dimension and search strategy. Note that
* no value. Vectors of a single field share the same dimension and similarity function. Note that
* some strategies (notably dot-product) require values to be unit-length, which can be enforced
* using VectorUtil.l2Normalize(float[]).
*
* @param name field name
* @param vector value
* @param searchStrategy a function defining vector proximity.
* @param similarityFunction a function defining vector proximity.
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
* dimension &gt; 1024.
*/
public VectorField(String name, float[] vector, VectorValues.SearchStrategy searchStrategy) {
super(name, createType(vector, searchStrategy));
public VectorField(
String name, float[] vector, VectorValues.SimilarityFunction similarityFunction) {
super(name, createType(vector, similarityFunction));
fieldsData = vector;
}
/**
* Creates a numeric vector field with the default EUCLIDEAN_HNSW (L2) search strategy. Fields are
* Creates a numeric vector field with the default EUCLIDEAN_HNSW (L2) similarity. Fields are
* single-valued: each document has either one value or no value. Vectors of a single field share
* the same dimension and search strategy.
* the same dimension and similarity function.
*
* @param name field name
* @param vector value
@ -113,12 +119,12 @@ public class VectorField extends Field {
* dimension &gt; 1024.
*/
public VectorField(String name, float[] vector) {
this(name, vector, VectorValues.SearchStrategy.EUCLIDEAN_HNSW);
this(name, vector, VectorValues.SimilarityFunction.EUCLIDEAN);
}
/**
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
* no value. Vectors of a single field share the same dimension and search strategy.
* no value. Vectors of a single field share the same dimension and similarity function.
*
* @param name field name
* @param vector value

View File

@ -38,6 +38,8 @@ import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
@ -2336,6 +2338,29 @@ public final class CheckIndex implements Closeable {
+ docCount
+ " docs with values");
}
VectorReader vectorReader = reader.getVectorReader();
if (vectorReader instanceof Lucene90VectorReader) {
KnnGraphValues graphValues =
((Lucene90VectorReader) vectorReader).getGraphValues(fieldInfo.name);
int size = graphValues.size();
for (int i = 0; i < size; i++) {
graphValues.seek(i);
for (int neighbor = graphValues.nextNeighbor();
neighbor != NO_MORE_DOCS;
neighbor = graphValues.nextNeighbor()) {
if (neighbor < 0 || neighbor >= size) {
throw new RuntimeException(
"Field \""
+ fieldInfo.name
+ "\" has an invalid neighbor ordinal: "
+ neighbor
+ " which should be in [0,"
+ size
+ ")");
}
}
}
}
status.totalVectorValues += docCount;
}
}

View File

@ -56,7 +56,7 @@ public final class FieldInfo {
// if it is a positive value, it means this field indexes vectors
private final int vectorDimension;
private final VectorValues.SearchStrategy vectorSearchStrategy;
private final VectorValues.SimilarityFunction vectorSimilarityFunction;
// whether this field is used as the soft-deletes field
private final boolean softDeletesField;
@ -80,7 +80,7 @@ public final class FieldInfo {
int pointIndexDimensionCount,
int pointNumBytes,
int vectorDimension,
VectorValues.SearchStrategy vectorSearchStrategy,
VectorValues.SimilarityFunction vectorSimilarityFunction,
boolean softDeletesField) {
this.name = Objects.requireNonNull(name);
this.number = number;
@ -105,7 +105,7 @@ public final class FieldInfo {
this.pointIndexDimensionCount = pointIndexDimensionCount;
this.pointNumBytes = pointNumBytes;
this.vectorDimension = vectorDimension;
this.vectorSearchStrategy = vectorSearchStrategy;
this.vectorSimilarityFunction = vectorSimilarityFunction;
this.softDeletesField = softDeletesField;
this.checkConsistency();
}
@ -194,18 +194,18 @@ public final class FieldInfo {
+ "')");
}
if (vectorSearchStrategy == null) {
if (vectorSimilarityFunction == null) {
throw new IllegalArgumentException(
"Vector search strategy must not be null (field: '" + name + "')");
"Vector similarity function must not be null (field: '" + name + "')");
}
if (vectorDimension < 0) {
throw new IllegalArgumentException(
"vectorDimension must be >=0; got " + vectorDimension + " (field: '" + name + "')");
}
if (vectorDimension == 0 && vectorSearchStrategy != VectorValues.SearchStrategy.NONE) {
if (vectorDimension == 0 && vectorSimilarityFunction != VectorValues.SimilarityFunction.NONE) {
throw new IllegalArgumentException(
"vector search strategy must be NONE when dimension = 0; got "
+ vectorSearchStrategy
"vector similarity function must be NONE when dimension = 0; got "
+ vectorSimilarityFunction
+ " (field: '"
+ name
+ "')");
@ -237,9 +237,9 @@ public final class FieldInfo {
verifySameVectorOptions(
fieldName,
this.vectorDimension,
this.vectorSearchStrategy,
this.vectorSimilarityFunction,
o.vectorDimension,
o.vectorSearchStrategy);
o.vectorSimilarityFunction);
}
/**
@ -355,21 +355,21 @@ public final class FieldInfo {
static void verifySameVectorOptions(
String fieldName,
int vd1,
VectorValues.SearchStrategy vst1,
VectorValues.SimilarityFunction vsf1,
int vd2,
VectorValues.SearchStrategy vst2) {
if (vd1 != vd2 || vst1 != vst2) {
VectorValues.SimilarityFunction vsf2) {
if (vd1 != vd2 || vsf1 != vsf2) {
throw new IllegalArgumentException(
"cannot change field \""
+ fieldName
+ "\" from vector dimension="
+ vd1
+ ", vector search strategy="
+ vst1
+ ", vector similarity function="
+ vsf1
+ " to inconsistent vector dimension="
+ vd2
+ ", vector search strategy="
+ vst2);
+ ", vector similarity function="
+ vsf2);
}
}
@ -478,9 +478,9 @@ public final class FieldInfo {
return vectorDimension;
}
/** Returns {@link VectorValues.SearchStrategy} for the field */
public VectorValues.SearchStrategy getVectorSearchStrategy() {
return vectorSearchStrategy;
/** Returns {@link VectorValues.SimilarityFunction} for the field */
public VectorValues.SimilarityFunction getVectorSimilarityFunction() {
return vectorSimilarityFunction;
}
/** Record that this field is indexed with docvalues, with the specified type */

View File

@ -299,11 +299,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
static final class FieldVectorProperties {
final int numDimensions;
final VectorValues.SearchStrategy searchStrategy;
final VectorValues.SimilarityFunction similarityFunction;
FieldVectorProperties(int numDimensions, VectorValues.SearchStrategy searchStrategy) {
FieldVectorProperties(int numDimensions, VectorValues.SimilarityFunction similarityFunction) {
this.numDimensions = numDimensions;
this.searchStrategy = searchStrategy;
this.similarityFunction = similarityFunction;
}
}
@ -384,7 +384,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
fi.getPointNumBytes()));
vectorProps.put(
fieldName,
new FieldVectorProperties(fi.getVectorDimension(), fi.getVectorSearchStrategy()));
new FieldVectorProperties(fi.getVectorDimension(), fi.getVectorSimilarityFunction()));
}
return fieldNumber.intValue();
}
@ -442,9 +442,9 @@ public class FieldInfos implements Iterable<FieldInfo> {
verifySameVectorOptions(
fieldName,
props.numDimensions,
props.searchStrategy,
props.similarityFunction,
fi.getVectorDimension(),
fi.getVectorSearchStrategy());
fi.getVectorSimilarityFunction());
}
/**
@ -486,7 +486,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
(softDeletesFieldName != null && softDeletesFieldName.equals(fieldName)));
addOrGet(fi);
}
@ -567,7 +567,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
isSoftDeletesField);
}
@ -678,7 +678,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
fi.getPointIndexDimensionCount(),
fi.getPointNumBytes(),
fi.getVectorDimension(),
fi.getVectorSearchStrategy(),
fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField());
byName.put(fiNew.getName(), fiNew);
return fiNew;

View File

@ -101,8 +101,8 @@ public interface IndexableFieldType {
/** The number of dimensions of the field's vector value */
int vectorDimension();
/** The {@link VectorValues.SearchStrategy} of the field's vector value */
VectorValues.SearchStrategy vectorSearchStrategy();
/** The {@link VectorValues.SimilarityFunction} of the field's vector value */
VectorValues.SimilarityFunction vectorSimilarityFunction();
/**
* Attributes for the field type.

View File

@ -679,7 +679,7 @@ final class IndexingChain implements Accountable {
s.pointIndexDimensionCount,
s.pointNumBytes,
s.vectorDimension,
s.vectorSearchStrategy,
s.vectorSimilarityFunction,
pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName())));
pf.setFieldInfo(fi);
if (fi.getIndexOptions() != IndexOptions.NONE) {
@ -822,7 +822,7 @@ final class IndexingChain implements Accountable {
fieldType.pointNumBytes());
}
if (fieldType.vectorDimension() != 0) {
schema.setVectors(fieldType.vectorSearchStrategy(), fieldType.vectorDimension());
schema.setVectors(fieldType.vectorSimilarityFunction(), fieldType.vectorDimension());
}
if (fieldType.getAttributes() != null && fieldType.getAttributes().isEmpty() == false) {
schema.updateAttributes(fieldType.getAttributes());
@ -1324,7 +1324,8 @@ final class IndexingChain implements Accountable {
private int pointIndexDimensionCount = 0;
private int pointNumBytes = 0;
private int vectorDimension = 0;
private VectorValues.SearchStrategy vectorSearchStrategy = VectorValues.SearchStrategy.NONE;
private VectorValues.SimilarityFunction vectorSimilarityFunction =
VectorValues.SimilarityFunction.NONE;
private static String errMsg =
"Inconsistency of field data structures across documents for field ";
@ -1379,12 +1380,12 @@ final class IndexingChain implements Accountable {
}
}
void setVectors(VectorValues.SearchStrategy searchStrategy, int dimension) {
if (vectorSearchStrategy == VectorValues.SearchStrategy.NONE) {
void setVectors(VectorValues.SimilarityFunction similarityFunction, int dimension) {
if (vectorSimilarityFunction == VectorValues.SimilarityFunction.NONE) {
this.vectorDimension = dimension;
this.vectorSearchStrategy = searchStrategy;
this.vectorSimilarityFunction = similarityFunction;
} else {
assertSame(vectorSearchStrategy == searchStrategy && vectorDimension == dimension);
assertSame(vectorSimilarityFunction == similarityFunction && vectorDimension == dimension);
}
}
@ -1399,7 +1400,7 @@ final class IndexingChain implements Accountable {
pointIndexDimensionCount = 0;
pointNumBytes = 0;
vectorDimension = 0;
vectorSearchStrategy = VectorValues.SearchStrategy.NONE;
vectorSimilarityFunction = VectorValues.SimilarityFunction.NONE;
}
void assertSameSchema(FieldInfo fi) {
@ -1413,7 +1414,7 @@ final class IndexingChain implements Accountable {
&& pointIndexDimensionCount == fi.getPointIndexDimensionCount()
&& pointNumBytes == fi.getPointNumBytes()
&& vectorDimension == fi.getVectorDimension()
&& vectorSearchStrategy == fi.getVectorSearchStrategy());
&& vectorSimilarityFunction == fi.getVectorSimilarityFunction());
}
}
}

View File

@ -33,8 +33,8 @@ public interface RandomAccessVectorValues {
/** Return the dimension of the returned vector values */
int dimension();
/** Return the search strategy used to compare these vectors */
VectorValues.SearchStrategy searchStrategy();
/** Return the similarity function used to compare these vectors */
VectorValues.SimilarityFunction similarityFunction();
/**
* Return the vector value indexed at the given ordinal. The provided floating point array may be

View File

@ -722,7 +722,7 @@ final class ReadersAndUpdates {
fi.getPointIndexDimensionCount(),
fi.getPointNumBytes(),
fi.getVectorDimension(),
fi.getVectorSearchStrategy(),
fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField());
}

View File

@ -51,8 +51,8 @@ public abstract class VectorValues extends DocIdSetIterator {
*/
public abstract int size();
/** Return the search strategy used to compare these vectors */
public abstract SearchStrategy searchStrategy();
/** Return the similarity function used to compare these vectors */
public abstract SimilarityFunction similarityFunction();
/**
* Return the vector value for the current document ID. It is illegal to call this method when the
@ -76,35 +76,36 @@ public abstract class VectorValues extends DocIdSetIterator {
}
/**
* Search strategy. This is a label describing the method used during indexing and searching of
* the vectors in order to determine the nearest neighbors.
* Vector similarity function; used in search to return top K most similar vectors to a target
* vector. This is a label describing the method used during indexing and searching of the vectors
* in order to determine the nearest neighbors.
*/
public enum SearchStrategy {
public enum SimilarityFunction {
/**
* No search strategy is provided. Note: {@link VectorReader#search(String, float[], int, int)}
* is not supported for fields specifying this strategy.
* No similarity function is provided. Note: {@link VectorReader#search(float[], int, int)} is
* not supported for fields specifying this.
*/
NONE,
/** HNSW graph built using Euclidean distance */
EUCLIDEAN_HNSW(true),
EUCLIDEAN(true),
/** HNSW graph buit using dot product */
DOT_PRODUCT_HNSW;
DOT_PRODUCT;
/**
* If true, the scores associated with vector comparisons in this strategy are in reverse order;
* that is, lower scores represent more similar vectors. Otherwise, if false, higher scores
* represent more similar vectors.
* If true, the scores associated with vector comparisons are in reverse order; that is, lower
* scores represent more similar vectors. Otherwise, if false, higher scores represent more
* similar vectors.
*/
public final boolean reversed;
SearchStrategy(boolean reversed) {
SimilarityFunction(boolean reversed) {
this.reversed = reversed;
}
SearchStrategy() {
SimilarityFunction() {
reversed = false;
}
@ -113,25 +114,25 @@ public abstract class VectorValues extends DocIdSetIterator {
*
* @param v1 a vector
* @param v2 another vector, of the same dimension
* @return the value of the strategy's score function applied to the two vectors
* @return the value of the similarity function applied to the two vectors
*/
public float compare(float[] v1, float[] v2) {
switch (this) {
case EUCLIDEAN_HNSW:
case EUCLIDEAN:
return squareDistance(v1, v2);
case DOT_PRODUCT_HNSW:
case DOT_PRODUCT:
return dotProduct(v1, v2);
case NONE:
default:
throw new IllegalStateException("Incomparable search strategy: " + this);
throw new IllegalStateException("Incomparable similarity function: " + this);
}
}
/** Return true if vectors indexed using this strategy will be indexed using an HNSW graph */
/** Return true if vectors indexed using this similarity will be indexed using an HNSW graph */
public boolean isHnsw() {
switch (this) {
case EUCLIDEAN_HNSW:
case DOT_PRODUCT_HNSW:
case EUCLIDEAN:
case DOT_PRODUCT:
return true;
case NONE:
default:
@ -158,8 +159,8 @@ public abstract class VectorValues extends DocIdSetIterator {
}
@Override
public SearchStrategy searchStrategy() {
return SearchStrategy.NONE;
public SimilarityFunction similarityFunction() {
return SimilarityFunction.NONE;
}
@Override

View File

@ -29,7 +29,11 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
/** Buffers up pending vector value(s) per doc, then flushes when segment flushes. */
/**
* Buffers up pending vector value(s) per doc, then flushes when segment flushes.
*
* @lucene.experimental
*/
class VectorValuesWriter {
private final FieldInfo fieldInfo;
@ -108,7 +112,7 @@ class VectorValuesWriter {
docsWithField,
vectors,
fieldInfo.getVectorDimension(),
fieldInfo.getVectorSearchStrategy());
fieldInfo.getVectorSimilarityFunction());
if (sortMap != null) {
vectorWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
} else {
@ -186,8 +190,8 @@ class VectorValuesWriter {
}
@Override
public SearchStrategy searchStrategy() {
return delegate.searchStrategy();
public SimilarityFunction similarityFunction() {
return delegate.similarityFunction();
}
@Override
@ -220,8 +224,8 @@ class VectorValuesWriter {
}
@Override
public SearchStrategy searchStrategy() {
return delegateRA.searchStrategy();
public SimilarityFunction similarityFunction() {
return delegateRA.similarityFunction();
}
@Override
@ -244,7 +248,7 @@ class VectorValuesWriter {
// These are always the vectors of a VectorValuesWriter, which are copied when added to it
final List<float[]> vectors;
final SearchStrategy searchStrategy;
final SimilarityFunction similarityFunction;
final int dimension;
final ByteBuffer buffer;
@ -259,11 +263,11 @@ class VectorValuesWriter {
DocsWithFieldSet docsWithField,
List<float[]> vectors,
int dimension,
SearchStrategy searchStrategy) {
SimilarityFunction similarityFunction) {
this.docsWithField = docsWithField;
this.vectors = vectors;
this.dimension = dimension;
this.searchStrategy = searchStrategy;
this.similarityFunction = similarityFunction;
buffer = ByteBuffer.allocate(dimension * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
binaryValue = new BytesRef(buffer.array());
raBuffer = ByteBuffer.allocate(dimension * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
@ -273,7 +277,7 @@ class VectorValuesWriter {
@Override
public RandomAccessVectorValues randomAccess() {
return new BufferedVectorValues(docsWithField, vectors, dimension, searchStrategy);
return new BufferedVectorValues(docsWithField, vectors, dimension, similarityFunction);
}
@Override
@ -287,8 +291,8 @@ class VectorValuesWriter {
}
@Override
public SearchStrategy searchStrategy() {
return searchStrategy;
public SimilarityFunction similarityFunction() {
return similarityFunction;
}
@Override

View File

@ -99,11 +99,11 @@ public final class HnswGraph extends KnnGraphValues {
KnnGraphValues graphValues,
Random random)
throws IOException {
VectorValues.SearchStrategy searchStrategy = vectors.searchStrategy();
VectorValues.SimilarityFunction similarityFunction = vectors.similarityFunction();
int size = graphValues.size();
// MIN heap, holding the top results
NeighborQueue results = new NeighborQueue(numSeed, searchStrategy.reversed);
NeighborQueue results = new NeighborQueue(numSeed, similarityFunction.reversed);
// set of ordinals that have been visited by search on this layer, used to avoid backtracking
SparseFixedBitSet visited = new SparseFixedBitSet(size);
@ -114,17 +114,17 @@ public final class HnswGraph extends KnnGraphValues {
if (visited.get(entryPoint) == false) {
visited.set(entryPoint);
// explore the topK starting points of some random numSeed probes
results.add(entryPoint, searchStrategy.compare(query, vectors.vectorValue(entryPoint)));
results.add(entryPoint, similarityFunction.compare(query, vectors.vectorValue(entryPoint)));
}
}
// MAX heap, from which to pull the candidate nodes
NeighborQueue candidates = results.copy(!searchStrategy.reversed);
NeighborQueue candidates = results.copy(!similarityFunction.reversed);
// Set the bound to the worst current result and below reject any newly-generated candidates
// failing
// to exceed this bound
BoundsChecker bound = BoundsChecker.create(searchStrategy.reversed);
BoundsChecker bound = BoundsChecker.create(similarityFunction.reversed);
bound.set(results.topScore());
while (candidates.size() > 0) {
// get the best candidate (closest or best scoring)
@ -143,7 +143,7 @@ public final class HnswGraph extends KnnGraphValues {
continue;
}
visited.set(friendOrd);
float score = searchStrategy.compare(query, vectors.vectorValue(friendOrd));
float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
if (results.insertWithOverflow(friendOrd, score)) {
candidates.add(friendOrd, score);
bound.set(results.topScore());

View File

@ -54,7 +54,7 @@ public final class HnswGraphBuilder {
private final int beamWidth;
private final NeighborArray scratch;
private final VectorValues.SearchStrategy searchStrategy;
private final VectorValues.SimilarityFunction similarityFunction;
private final RandomAccessVectorValues vectorValues;
private final Random random;
private final BoundsChecker bound;
@ -87,8 +87,8 @@ public final class HnswGraphBuilder {
RandomAccessVectorValuesProducer vectors, int maxConn, int beamWidth, long seed) {
vectorValues = vectors.randomAccess();
buildVectors = vectors.randomAccess();
searchStrategy = vectorValues.searchStrategy();
if (searchStrategy == VectorValues.SearchStrategy.NONE) {
similarityFunction = vectorValues.similarityFunction();
if (similarityFunction == VectorValues.SimilarityFunction.NONE) {
throw new IllegalStateException("No distance function");
}
if (maxConn <= 0) {
@ -100,7 +100,7 @@ public final class HnswGraphBuilder {
this.maxConn = maxConn;
this.beamWidth = beamWidth;
this.hnsw = new HnswGraph(maxConn);
bound = BoundsChecker.create(searchStrategy.reversed);
bound = BoundsChecker.create(similarityFunction.reversed);
random = new Random(seed);
scratch = new NeighborArray(Math.max(beamWidth, maxConn + 1));
}
@ -232,7 +232,7 @@ public final class HnswGraphBuilder {
bound.set(score);
for (int i = 0; i < neighbors.size(); i++) {
float diversityCheck =
searchStrategy.compare(candidate, vectorValues.vectorValue(neighbors.node[i]));
similarityFunction.compare(candidate, vectorValues.vectorValue(neighbors.node[i]));
if (bound.check(diversityCheck) == false) {
return false;
}
@ -269,7 +269,7 @@ public final class HnswGraphBuilder {
float[] nbrVector = vectorValues.vectorValue(nbrNode);
for (int j = maxConn; j > i; j--) {
float diversityCheck =
searchStrategy.compare(nbrVector, buildVectors.vectorValue(neighbors.node[j]));
similarityFunction.compare(nbrVector, buildVectors.vectorValue(neighbors.node[j]));
if (bound.check(diversityCheck) == false) {
// node j is too similar to node i given its score relative to the base node
// replace it with the new node, which is at [maxConn]

View File

@ -87,16 +87,16 @@ public class TestPerFieldConsistency extends LuceneTestCase {
}
private static Field randomVectorField(Random random, String fieldName) {
VectorValues.SearchStrategy searchStrategy =
RandomPicks.randomFrom(random, VectorValues.SearchStrategy.values());
while (searchStrategy == VectorValues.SearchStrategy.NONE) {
searchStrategy = RandomPicks.randomFrom(random, VectorValues.SearchStrategy.values());
VectorValues.SimilarityFunction similarityFunction =
RandomPicks.randomFrom(random, VectorValues.SimilarityFunction.values());
while (similarityFunction == VectorValues.SimilarityFunction.NONE) {
similarityFunction = RandomPicks.randomFrom(random, VectorValues.SimilarityFunction.values());
}
float[] values = new float[randomIntBetween(1, 10)];
for (int i = 0; i < values.length; i++) {
values[i] = randomFloat();
}
return new VectorField(fieldName, values, searchStrategy);
return new VectorField(fieldName, values, similarityFunction);
}
private static Field[] randomFieldsWithTheSameName(String fieldName) {

View File

@ -112,7 +112,7 @@ public class TestCodecs extends LuceneTestCase {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false));
}
this.terms = terms;

View File

@ -260,7 +260,7 @@ public class TestFieldInfos extends LuceneTestCase {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false));
}
int idx =
@ -279,7 +279,7 @@ public class TestFieldInfos extends LuceneTestCase {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false));
assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
@ -300,7 +300,7 @@ public class TestFieldInfos extends LuceneTestCase {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false));
assertEquals("Field numbers should reset after clear()", 0, idx);
}

View File

@ -63,7 +63,7 @@ public class TestFieldsReader extends LuceneTestCase {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
field.name().equals(softDeletesFieldName)));
}
dir = newDirectory();

View File

@ -113,8 +113,8 @@ public class TestIndexableField extends LuceneTestCase {
}
@Override
public VectorValues.SearchStrategy vectorSearchStrategy() {
return VectorValues.SearchStrategy.NONE;
public VectorValues.SimilarityFunction vectorSimilarityFunction() {
return VectorValues.SimilarityFunction.NONE;
}
@Override

View File

@ -34,7 +34,7 @@ import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.VectorField;
import org.apache.lucene.index.VectorValues.SearchStrategy;
import org.apache.lucene.index.VectorValues.SimilarityFunction;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
@ -53,7 +53,7 @@ public class TestKnnGraph extends LuceneTestCase {
private static int maxConn = HnswGraphBuilder.DEFAULT_MAX_CONN;
private SearchStrategy searchStrategy;
private SimilarityFunction similarityFunction;
@Before
public void setup() {
@ -61,8 +61,8 @@ public class TestKnnGraph extends LuceneTestCase {
if (random().nextBoolean()) {
maxConn = random().nextInt(256) + 3;
}
int strategy = random().nextInt(SearchStrategy.values().length - 1) + 1;
searchStrategy = SearchStrategy.values()[strategy];
int similarity = random().nextInt(SimilarityFunction.values().length - 1) + 1;
similarityFunction = SimilarityFunction.values()[similarity];
}
@After
@ -212,7 +212,7 @@ public class TestKnnGraph extends LuceneTestCase {
/** Verify that searching does something reasonable */
public void testSearch() throws Exception {
// We can't use dot product here since the vectors are laid out on a grid, not a sphere.
searchStrategy = SearchStrategy.EUCLIDEAN_HNSW;
similarityFunction = SimilarityFunction.EUCLIDEAN;
IndexWriterConfig config = newIndexWriterConfig();
config.setCodec(Codec.forName("Lucene90")); // test is not compatible with simpletext
try (Directory dir = newDirectory();
@ -434,16 +434,16 @@ public class TestKnnGraph extends LuceneTestCase {
}
private void add(IndexWriter iw, int id, float[] vector) throws IOException {
add(iw, id, vector, searchStrategy);
add(iw, id, vector, similarityFunction);
}
private void add(IndexWriter iw, int id, float[] vector, SearchStrategy searchStrategy)
private void add(IndexWriter iw, int id, float[] vector, SimilarityFunction similarityFunction)
throws IOException {
Document doc = new Document();
if (vector != null) {
FieldType fieldType =
VectorField.createHnswType(
vector.length, searchStrategy, maxConn, HnswGraphBuilder.DEFAULT_BEAM_WIDTH);
vector.length, similarityFunction, maxConn, HnswGraphBuilder.DEFAULT_BEAM_WIDTH);
doc.add(new VectorField(KNN_GRAPH_FIELD, vector, fieldType));
}
String idString = Integer.toString(id);

View File

@ -17,7 +17,7 @@
package org.apache.lucene.index;
import static org.apache.lucene.index.VectorValues.SearchStrategy.NONE;
import static org.apache.lucene.index.VectorValues.SimilarityFunction.NONE;
import java.io.IOException;
import java.util.Arrays;

View File

@ -70,8 +70,8 @@ public class KnnGraphTester {
private static final String KNN_FIELD = "knn";
private static final String ID_FIELD = "id";
private static final VectorValues.SearchStrategy SEARCH_STRATEGY =
VectorValues.SearchStrategy.DOT_PRODUCT_HNSW;
private static final VectorValues.SimilarityFunction SIMILARITY_FUNCTION =
VectorValues.SimilarityFunction.DOT_PRODUCT;
private int numDocs;
private int dim;
@ -542,10 +542,10 @@ public class KnnGraphTester {
.order(ByteOrder.LITTLE_ENDIAN)
.asFloatBuffer();
offset += blockSize;
NeighborQueue queue = new NeighborQueue(topK, SEARCH_STRATEGY.reversed);
NeighborQueue queue = new NeighborQueue(topK, SIMILARITY_FUNCTION.reversed);
for (; j < numDocs && vectors.hasRemaining(); j++) {
vectors.get(vector);
float d = SEARCH_STRATEGY.compare(query, vector);
float d = SIMILARITY_FUNCTION.compare(query, vector);
queue.insertWithOverflow(j, d);
}
result[i] = new int[topK];
@ -572,7 +572,7 @@ public class KnnGraphTester {
FieldType fieldType =
VectorField.createHnswType(
dim, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW, maxConn, beamWidth);
dim, VectorValues.SimilarityFunction.DOT_PRODUCT, maxConn, beamWidth);
if (quiet == false) {
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
System.out.println("creating index in " + indexPath);
@ -667,8 +667,8 @@ public class KnnGraphTester {
}
@Override
public VectorValues.SearchStrategy searchStrategy() {
return SEARCH_STRATEGY;
public VectorValues.SimilarityFunction similarityFunction() {
return SIMILARITY_FUNCTION;
}
@Override

View File

@ -30,13 +30,13 @@ class MockVectorValues extends VectorValues
protected final int dimension;
protected final float[][] denseValues;
protected final float[][] values;
protected final SearchStrategy searchStrategy;
protected final SimilarityFunction similarityFunction;
private final int numVectors;
private int pos = -1;
MockVectorValues(SearchStrategy searchStrategy, float[][] values) {
this.searchStrategy = searchStrategy;
MockVectorValues(SimilarityFunction similarityFunction, float[][] values) {
this.similarityFunction = similarityFunction;
this.dimension = values[0].length;
this.values = values;
int maxDoc = values.length;
@ -52,7 +52,7 @@ class MockVectorValues extends VectorValues
}
public MockVectorValues copy() {
return new MockVectorValues(searchStrategy, values);
return new MockVectorValues(similarityFunction, values);
}
@Override
@ -61,8 +61,8 @@ class MockVectorValues extends VectorValues
}
@Override
public SearchStrategy searchStrategy() {
return searchStrategy;
public SimilarityFunction similarityFunction() {
return similarityFunction;
}
@Override

View File

@ -73,7 +73,7 @@ public class TestHnsw extends LuceneTestCase {
indexedDoc++;
}
Document doc = new Document();
doc.add(new VectorField("field", v2.vectorValue(), v2.searchStrategy));
doc.add(new VectorField("field", v2.vectorValue(), v2.similarityFunction));
doc.add(new StoredField("id", v2.docID()));
iw.addDocument(doc);
nVec++;
@ -83,7 +83,7 @@ public class TestHnsw extends LuceneTestCase {
try (IndexReader reader = DirectoryReader.open(dir)) {
for (LeafReaderContext ctx : reader.leaves()) {
VectorValues values = ctx.reader().getVectorValues("field");
assertEquals(vectors.searchStrategy, values.searchStrategy());
assertEquals(vectors.similarityFunction, values.similarityFunction());
assertEquals(dim, values.dimension());
assertEquals(nVec, values.size());
assertEquals(indexedDoc, ctx.reader().maxDoc());
@ -164,7 +164,7 @@ public class TestHnsw extends LuceneTestCase {
// Some carefully checked test cases with simple 2d vectors on the unit circle:
MockVectorValues vectors =
new MockVectorValues(
VectorValues.SearchStrategy.DOT_PRODUCT_HNSW,
VectorValues.SimilarityFunction.DOT_PRODUCT,
new float[][] {
unitVector2d(0.5),
unitVector2d(0.75),
@ -236,12 +236,12 @@ public class TestHnsw extends LuceneTestCase {
for (int i = 0; i < 100; i++) {
float[] query = randomVector(random(), dim);
NeighborQueue actual = HnswGraph.search(query, topK, 100, vectors, hnsw, random());
NeighborQueue expected = new NeighborQueue(topK, vectors.searchStrategy.reversed);
NeighborQueue expected = new NeighborQueue(topK, vectors.similarityFunction.reversed);
for (int j = 0; j < size; j++) {
float[] v = vectors.vectorValue(j);
if (v != null) {
expected.insertWithOverflow(
j, vectors.searchStrategy.compare(query, vectors.vectorValue(j)));
j, vectors.similarityFunction.compare(query, vectors.vectorValue(j)));
}
}
assertEquals(topK, actual.size());
@ -288,8 +288,8 @@ public class TestHnsw extends LuceneTestCase {
}
@Override
public SearchStrategy searchStrategy() {
return SearchStrategy.DOT_PRODUCT_HNSW;
public SimilarityFunction similarityFunction() {
return SimilarityFunction.DOT_PRODUCT;
}
@Override
@ -393,12 +393,12 @@ public class TestHnsw extends LuceneTestCase {
RandomVectorValues(int size, int dimension, Random random) {
super(
SearchStrategy.values()[random.nextInt(SearchStrategy.values().length - 1) + 1],
SimilarityFunction.values()[random.nextInt(SimilarityFunction.values().length - 1) + 1],
createRandomVectors(size, dimension, random));
}
RandomVectorValues(RandomVectorValues other) {
super(other.searchStrategy, other.values);
super(other.similarityFunction, other.values);
}
@Override

View File

@ -107,7 +107,5 @@ public class TestNeighbors extends LuceneTestCase {
public void testToString() {
assertEquals("Neighbors[0]", new NeighborQueue(2, false).toString());
// assertEquals("NeighborArray[0]", new NeighborArray(2,
// VectorValues.SearchStrategy.NONE).toString());
}
}

View File

@ -96,7 +96,7 @@ public class TermVectorLeafReader extends LeafReader {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false);
fieldInfos = new FieldInfos(new FieldInfo[] {fieldInfo});
}

View File

@ -512,7 +512,7 @@ public class MemoryIndex {
fieldType.pointIndexDimensionCount(),
fieldType.pointNumBytes(),
fieldType.vectorDimension(),
fieldType.vectorSearchStrategy(),
fieldType.vectorSimilarityFunction(),
false);
}
@ -544,7 +544,7 @@ public class MemoryIndex {
info.fieldInfo.getPointIndexDimensionCount(),
info.fieldInfo.getPointNumBytes(),
info.fieldInfo.getVectorDimension(),
info.fieldInfo.getVectorSearchStrategy(),
info.fieldInfo.getVectorSimilarityFunction(),
info.fieldInfo.isSoftDeletesField());
} else if (existingDocValuesType != docValuesType) {
throw new IllegalArgumentException(

View File

@ -295,7 +295,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
fieldType.pointIndexDimensionCount(),
fieldType.pointNumBytes(),
fieldType.vectorDimension(),
fieldType.vectorSearchStrategy(),
fieldType.vectorSimilarityFunction(),
field.equals(softDeletesField));
addAttributes(fi);
builder.add(fi);
@ -341,9 +341,9 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
if (r.nextBoolean()) {
int dimension = 1 + r.nextInt(VectorValues.MAX_DIMENSIONS);
VectorValues.SearchStrategy searchStrategy =
RandomPicks.randomFrom(r, VectorValues.SearchStrategy.values());
type.setVectorDimensionsAndSearchStrategy(dimension, searchStrategy);
VectorValues.SimilarityFunction similarityFunction =
RandomPicks.randomFrom(r, VectorValues.SimilarityFunction.values());
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);
}
return type;
@ -412,7 +412,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false);
}
}

View File

@ -335,7 +335,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
proto.getPointIndexDimensionCount(),
proto.getPointNumBytes(),
proto.getVectorDimension(),
proto.getVectorSearchStrategy(),
proto.getVectorSimilarityFunction(),
proto.isSoftDeletesField());
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] {field});

View File

@ -49,7 +49,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
@Override
protected void addRandomFields(Document doc) {
doc.add(new VectorField("v2", randomVector(30), VectorValues.SearchStrategy.NONE));
doc.add(new VectorField("v2", randomVector(30), VectorValues.SimilarityFunction.NONE));
}
public void testFieldConstructor() {
@ -57,7 +57,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
VectorField field = new VectorField("f", v);
assertEquals(1, field.fieldType().vectorDimension());
assertEquals(
VectorValues.SearchStrategy.EUCLIDEAN_HNSW, field.fieldType().vectorSearchStrategy());
VectorValues.SimilarityFunction.EUCLIDEAN, field.fieldType().vectorSimilarityFunction());
assertSame(v, field.vectorValue());
}
@ -66,7 +66,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", null));
expectThrows(
IllegalArgumentException.class,
() -> new VectorField("f", new float[1], (VectorValues.SearchStrategy) null));
() -> new VectorField("f", new float[1], (VectorValues.SimilarityFunction) null));
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", new float[0]));
expectThrows(
IllegalArgumentException.class,
@ -88,13 +88,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
public void testFieldCreateFieldType() {
expectThrows(
IllegalArgumentException.class,
() -> VectorField.createHnswType(0, VectorValues.SearchStrategy.EUCLIDEAN_HNSW, 16, 16));
() -> VectorField.createHnswType(0, VectorValues.SimilarityFunction.EUCLIDEAN, 16, 16));
expectThrows(
IllegalArgumentException.class,
() ->
VectorField.createHnswType(
VectorValues.MAX_DIMENSIONS + 1,
VectorValues.SearchStrategy.EUCLIDEAN_HNSW,
VectorValues.SimilarityFunction.EUCLIDEAN,
16,
16));
expectThrows(
@ -104,7 +104,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
IllegalArgumentException.class,
() ->
VectorField.createHnswType(
VectorValues.MAX_DIMENSIONS + 1, VectorValues.SearchStrategy.NONE, 16, 16));
VectorValues.MAX_DIMENSIONS + 1, VectorValues.SimilarityFunction.NONE, 16, 16));
}
// Illegal schema change tests:
@ -113,11 +113,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[3], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc2.add(new VectorField("f", new float[3], VectorValues.SimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
@ -129,31 +129,31 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.commit();
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[3], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc2.add(new VectorField("f", new float[3], VectorValues.SimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
+ "to inconsistent vector dimension=3, vector search strategy=DOT_PRODUCT_HNSW";
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
+ "to inconsistent vector dimension=3, vector similarity function=DOT_PRODUCT";
assertEquals(errMsg, expected.getMessage());
}
}
public void testIllegalSearchStrategyChange() throws Exception {
public void testIllegalSimilarityFunctionChange() throws Exception {
// illegal change in the same segment
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc2.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
@ -165,17 +165,17 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.commit();
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc2.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
String errMsg =
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=EUCLIDEAN_HNSW";
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
+ "to inconsistent vector dimension=4, vector similarity function=EUCLIDEAN";
assertEquals(errMsg, expected.getMessage());
}
}
@ -184,39 +184,39 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[1], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc2.add(new VectorField("f", new float[1], VectorValues.SimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
assertEquals(
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
+ "to inconsistent vector dimension=1, vector search strategy=DOT_PRODUCT_HNSW",
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
+ "to inconsistent vector dimension=1, vector similarity function=DOT_PRODUCT",
expected.getMessage());
}
}
}
public void testIllegalSearchStrategyChangeTwoWriters() throws Exception {
public void testIllegalSimilarityFunctionChangeTwoWriters() throws Exception {
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc2.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
assertEquals(
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=EUCLIDEAN_HNSW",
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
+ "to inconsistent vector dimension=4, vector similarity function=EUCLIDEAN",
expected.getMessage());
}
}
@ -225,7 +225,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
public void testAddIndexesDirectory0() throws Exception {
String fieldName = "field";
Document doc = new Document();
doc.add(new VectorField(fieldName, new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField(fieldName, new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
try (Directory dir = newDirectory();
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
@ -254,7 +254,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
w.addDocument(doc);
}
doc.add(
new VectorField(fieldName, new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
new VectorField(fieldName, new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
w2.addDocument(doc);
w2.addIndexes(dir);
@ -274,7 +274,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
String fieldName = "field";
float[] vector = new float[1];
Document doc = new Document();
doc.add(new VectorField(fieldName, vector, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField(fieldName, vector, VectorValues.SimilarityFunction.DOT_PRODUCT));
try (Directory dir = newDirectory();
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
@ -305,41 +305,41 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[5], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[5], VectorValues.SimilarityFunction.DOT_PRODUCT));
w2.addDocument(doc);
IllegalArgumentException expected =
expectThrows(
IllegalArgumentException.class, () -> w2.addIndexes(new Directory[] {dir}));
assertEquals(
"cannot change field \"f\" from vector dimension=5, vector search strategy=DOT_PRODUCT_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
"cannot change field \"f\" from vector dimension=5, vector similarity function=DOT_PRODUCT "
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
expected.getMessage());
}
}
}
public void testIllegalSearchStrategyChangeViaAddIndexesDirectory() throws Exception {
public void testIllegalSimilarityFunctionChangeViaAddIndexesDirectory() throws Exception {
try (Directory dir = newDirectory();
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
w2.addDocument(doc);
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir));
assertEquals(
"cannot change field \"f\" from vector dimension=4, vector search strategy=EUCLIDEAN_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
"cannot change field \"f\" from vector dimension=4, vector similarity function=EUCLIDEAN "
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
expected.getMessage());
}
}
@ -350,12 +350,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[5], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[5], VectorValues.SimilarityFunction.DOT_PRODUCT));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
@ -363,25 +363,25 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
IllegalArgumentException.class,
() -> w2.addIndexes(new CodecReader[] {(CodecReader) getOnlyLeafReader(r)}));
assertEquals(
"cannot change field \"f\" from vector dimension=5, vector search strategy=DOT_PRODUCT_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
"cannot change field \"f\" from vector dimension=5, vector similarity function=DOT_PRODUCT "
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
expected.getMessage());
}
}
}
}
public void testIllegalSearchStrategyChangeViaAddIndexesCodecReader() throws Exception {
public void testIllegalSimilarityFunctionChangeViaAddIndexesCodecReader() throws Exception {
try (Directory dir = newDirectory();
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
@ -389,8 +389,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
IllegalArgumentException.class,
() -> w2.addIndexes(new CodecReader[] {(CodecReader) getOnlyLeafReader(r)}));
assertEquals(
"cannot change field \"f\" from vector dimension=4, vector search strategy=EUCLIDEAN_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
"cannot change field \"f\" from vector dimension=4, vector similarity function=EUCLIDEAN "
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
expected.getMessage());
}
}
@ -402,43 +402,43 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[5], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[5], VectorValues.SimilarityFunction.DOT_PRODUCT));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w2, r));
assertEquals(
"cannot change field \"f\" from vector dimension=5, vector search strategy=DOT_PRODUCT_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
"cannot change field \"f\" from vector dimension=5, vector similarity function=DOT_PRODUCT "
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
expected.getMessage());
}
}
}
}
public void testIllegalSearchStrategyChangeViaAddIndexesSlowCodecReader() throws Exception {
public void testIllegalSimilarityFunctionChangeViaAddIndexesSlowCodecReader() throws Exception {
try (Directory dir = newDirectory();
Directory dir2 = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
w2.addDocument(doc);
try (DirectoryReader r = DirectoryReader.open(dir)) {
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w2, r));
assertEquals(
"cannot change field \"f\" from vector dimension=4, vector search strategy=EUCLIDEAN_HNSW "
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
"cannot change field \"f\" from vector dimension=4, vector similarity function=EUCLIDEAN "
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
expected.getMessage());
}
}
@ -449,8 +449,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
IllegalArgumentException expected =
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertEquals(
@ -470,10 +470,10 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
new VectorField(
"f",
new float[VectorValues.MAX_DIMENSIONS + 1],
VectorValues.SearchStrategy.DOT_PRODUCT_HNSW)));
VectorValues.SimilarityFunction.DOT_PRODUCT)));
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[1], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc2.add(new VectorField("f", new float[1], VectorValues.SimilarityFunction.EUCLIDEAN));
w.addDocument(doc2);
}
}
@ -485,11 +485,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Exception e =
expectThrows(
IllegalArgumentException.class,
() -> doc.add(new VectorField("f", new float[0], VectorValues.SearchStrategy.NONE)));
() ->
doc.add(
new VectorField("f", new float[0], VectorValues.SimilarityFunction.NONE)));
assertEquals("cannot index an empty vector", e.getMessage());
Document doc2 = new Document();
doc2.add(new VectorField("f", new float[1], VectorValues.SearchStrategy.NONE));
doc2.add(new VectorField("f", new float[1], VectorValues.SimilarityFunction.NONE));
w.addDocument(doc2);
}
}
@ -499,14 +501,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(Codec.forName("SimpleText"));
try (IndexWriter w = new IndexWriter(dir, iwc)) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.forceMerge(1);
}
@ -520,12 +522,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, iwc)) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
}
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.forceMerge(1);
}
@ -533,7 +535,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
}
public void testInvalidVectorFieldUsage() {
VectorField field = new VectorField("field", new float[2], VectorValues.SearchStrategy.NONE);
VectorField field =
new VectorField("field", new float[2], VectorValues.SimilarityFunction.NONE);
expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14));
@ -548,8 +551,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
doc.add(
new VectorField(
"v", new float[] {2, 3, 5}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
new VectorField("v", new float[] {2, 3, 5}, VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.addDocument(new Document());
w.commit();
@ -572,14 +574,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
doc.add(new StringField("id", "0", Field.Store.NO));
doc.add(
new VectorField(
"v0", new float[] {2, 3, 5}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
"v0", new float[] {2, 3, 5}, VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(
new VectorField(
"v1", new float[] {2, 3, 5}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
"v1", new float[] {2, 3, 5}, VectorValues.SimilarityFunction.DOT_PRODUCT));
w.addDocument(doc);
w.forceMerge(1);
}
@ -591,13 +593,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
int[] fieldDocCounts = new int[numFields];
float[] fieldTotals = new float[numFields];
int[] fieldDims = new int[numFields];
VectorValues.SearchStrategy[] fieldSearchStrategies =
new VectorValues.SearchStrategy[numFields];
VectorValues.SimilarityFunction[] fieldSearchStrategies =
new VectorValues.SimilarityFunction[numFields];
for (int i = 0; i < numFields; i++) {
fieldDims[i] = random().nextInt(20) + 1;
fieldSearchStrategies[i] =
VectorValues.SearchStrategy.values()[
random().nextInt(VectorValues.SearchStrategy.values().length)];
VectorValues.SimilarityFunction.values()[
random().nextInt(VectorValues.SimilarityFunction.values().length)];
}
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig())) {
@ -644,15 +646,15 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc1 = new Document();
doc1.add(new VectorField(fieldName, v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc1.add(new VectorField(fieldName, v, VectorValues.SimilarityFunction.EUCLIDEAN));
v[0] = 1;
Document doc2 = new Document();
doc2.add(new VectorField(fieldName, v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc2.add(new VectorField(fieldName, v, VectorValues.SimilarityFunction.EUCLIDEAN));
iw.addDocument(doc1);
iw.addDocument(doc2);
v[0] = 2;
Document doc3 = new Document();
doc3.add(new VectorField(fieldName, v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc3.add(new VectorField(fieldName, v, VectorValues.SimilarityFunction.EUCLIDEAN));
iw.addDocument(doc3);
iw.forceMerge(1);
try (IndexReader reader = iw.getReader()) {
@ -707,15 +709,16 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
float[] v = new float[] {1};
doc.add(new VectorField("field1", v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
doc.add(new VectorField("field2", new float[] {1, 2, 3}, VectorValues.SearchStrategy.NONE));
doc.add(new VectorField("field1", v, VectorValues.SimilarityFunction.EUCLIDEAN));
doc.add(
new VectorField("field2", new float[] {1, 2, 3}, VectorValues.SimilarityFunction.NONE));
iw.addDocument(doc);
v[0] = 2;
iw.addDocument(doc);
doc = new Document();
doc.add(
new VectorField(
"field3", new float[] {1, 2, 3}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
"field3", new float[] {1, 2, 3}, VectorValues.SimilarityFunction.DOT_PRODUCT));
iw.addDocument(doc);
iw.forceMerge(1);
try (IndexReader reader = iw.getReader()) {
@ -776,9 +779,9 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
if (random().nextBoolean() && values[i] != null) {
// sometimes use a shared scratch array
System.arraycopy(values[i], 0, scratch, 0, scratch.length);
add(iw, fieldName, i, scratch, VectorValues.SearchStrategy.NONE);
add(iw, fieldName, i, scratch, VectorValues.SimilarityFunction.NONE);
} else {
add(iw, fieldName, i, values[i], VectorValues.SearchStrategy.NONE);
add(iw, fieldName, i, values[i], VectorValues.SimilarityFunction.NONE);
}
if (random().nextInt(10) == 2) {
// sometimes delete a random document
@ -826,7 +829,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
/**
* Index random vectors, sometimes skipping documents, sometimes updating a document, sometimes
* merging, sometimes sorting the index, using an HNSW search strategy so as to also produce a
* merging, sometimes sorting the index, using an HNSW similarity function so as to also produce a
* graph, and verify that the expected values can be read back consistently.
*/
public void testRandomWithUpdatesAndGraph() throws Exception {
@ -851,7 +854,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
values[i] = value;
id2value[id] = value;
id2ord[id] = i;
add(iw, fieldName, id, value, VectorValues.SearchStrategy.EUCLIDEAN_HNSW);
add(iw, fieldName, id, value, VectorValues.SimilarityFunction.EUCLIDEAN);
}
try (IndexReader reader = iw.getReader()) {
for (LeafReaderContext ctx : reader.leaves()) {
@ -888,14 +891,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
String field,
int id,
float[] vector,
VectorValues.SearchStrategy searchStrategy)
VectorValues.SimilarityFunction similarityFunction)
throws IOException {
add(iw, field, id, random().nextInt(100), vector, searchStrategy);
add(iw, field, id, random().nextInt(100), vector, similarityFunction);
}
private void add(IndexWriter iw, String field, int id, int sortkey, float[] vector)
throws IOException {
add(iw, field, id, sortkey, vector, VectorValues.SearchStrategy.NONE);
add(iw, field, id, sortkey, vector, VectorValues.SimilarityFunction.NONE);
}
private void add(
@ -904,11 +907,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
int id,
int sortkey,
float[] vector,
VectorValues.SearchStrategy searchStrategy)
VectorValues.SimilarityFunction similarityFunction)
throws IOException {
Document doc = new Document();
if (vector != null) {
doc.add(new VectorField(field, vector, searchStrategy));
doc.add(new VectorField(field, vector, similarityFunction));
}
doc.add(new NumericDocValuesField("sortkey", sortkey));
String idString = Integer.toString(id);
@ -930,10 +933,10 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new VectorField("v1", randomVector(3), VectorValues.SearchStrategy.NONE));
doc.add(new VectorField("v1", randomVector(3), VectorValues.SimilarityFunction.NONE));
w.addDocument(doc);
doc.add(new VectorField("v2", randomVector(3), VectorValues.SearchStrategy.NONE));
doc.add(new VectorField("v2", randomVector(3), VectorValues.SimilarityFunction.NONE));
w.addDocument(doc);
}
@ -951,13 +954,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
}
}
public void testSearchStrategyIdentifiers() {
// make sure we don't accidentally mess up search strategy identifiers by re-ordering their
public void testSimilarityFunctionIdentifiers() {
// make sure we don't accidentally mess up similarity function identifiers by re-ordering their
// enumerators
assertEquals(0, VectorValues.SearchStrategy.NONE.ordinal());
assertEquals(1, VectorValues.SearchStrategy.EUCLIDEAN_HNSW.ordinal());
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
assertEquals(3, VectorValues.SearchStrategy.values().length);
assertEquals(0, VectorValues.SimilarityFunction.NONE.ordinal());
assertEquals(1, VectorValues.SimilarityFunction.EUCLIDEAN.ordinal());
assertEquals(2, VectorValues.SimilarityFunction.DOT_PRODUCT.ordinal());
assertEquals(3, VectorValues.SimilarityFunction.values().length);
}
public void testAdvance() throws Exception {
@ -969,7 +972,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
Document doc = new Document();
// randomly add a vector field
if (random().nextInt(4) == 3) {
doc.add(new VectorField(fieldName, new float[4], VectorValues.SearchStrategy.NONE));
doc.add(new VectorField(fieldName, new float[4], VectorValues.SimilarityFunction.NONE));
}
w.addDocument(doc);
}

View File

@ -84,7 +84,7 @@ public class MismatchedLeafReader extends FilterLeafReader {
oldInfo.getPointNumBytes(), // dimension numBytes
oldInfo.getVectorDimension(), // number of dimensions of the field's vector
// distance function for calculating similarity of the field's vector
oldInfo.getVectorSearchStrategy(),
oldInfo.getVectorSimilarityFunction(),
oldInfo.isSoftDeletesField()); // used as soft-deletes field
shuffled.set(i, newInfo);
}

View File

@ -140,7 +140,7 @@ public class RandomPostingsTester {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false);
fieldUpto++;
@ -711,7 +711,7 @@ public class RandomPostingsTester {
0,
0,
0,
VectorValues.SearchStrategy.NONE,
VectorValues.SimilarityFunction.NONE,
false);
}