mirror of
https://github.com/apache/lucene.git
synced 2025-02-08 19:15:06 +00:00
LUCENE-9905: rename VectorValues.SearchStrategy to VectorValues.SimilarityFunction
This commit is contained in:
parent
3115f85697
commit
6d4b5eaba3
@ -212,7 +212,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||||||
pointIndexDimensionCount,
|
pointIndexDimensionCount,
|
||||||
pointNumBytes,
|
pointNumBytes,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
isSoftDeletesField);
|
isSoftDeletesField);
|
||||||
} catch (IllegalStateException e) {
|
} catch (IllegalStateException e) {
|
||||||
throw new CorruptIndexException(
|
throw new CorruptIndexException(
|
||||||
|
@ -158,7 +158,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||||||
SimpleTextUtil.readLine(input, scratch);
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
assert StringHelper.startsWith(scratch.get(), VECTOR_SEARCH_STRATEGY);
|
assert StringHelper.startsWith(scratch.get(), VECTOR_SEARCH_STRATEGY);
|
||||||
String scoreFunction = readString(VECTOR_SEARCH_STRATEGY.length, scratch);
|
String scoreFunction = readString(VECTOR_SEARCH_STRATEGY.length, scratch);
|
||||||
VectorValues.SearchStrategy vectorDistFunc = distanceFunction(scoreFunction);
|
VectorValues.SimilarityFunction vectorDistFunc = distanceFunction(scoreFunction);
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
assert StringHelper.startsWith(scratch.get(), SOFT_DELETES);
|
assert StringHelper.startsWith(scratch.get(), SOFT_DELETES);
|
||||||
@ -201,8 +201,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||||||
return DocValuesType.valueOf(dvType);
|
return DocValuesType.valueOf(dvType);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VectorValues.SearchStrategy distanceFunction(String scoreFunction) {
|
public VectorValues.SimilarityFunction distanceFunction(String scoreFunction) {
|
||||||
return VectorValues.SearchStrategy.valueOf(scoreFunction);
|
return VectorValues.SimilarityFunction.valueOf(scoreFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String readString(int offset, BytesRefBuilder scratch) {
|
private String readString(int offset, BytesRefBuilder scratch) {
|
||||||
@ -298,7 +298,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||||||
SimpleTextUtil.writeNewline(out);
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
SimpleTextUtil.write(out, VECTOR_SEARCH_STRATEGY);
|
SimpleTextUtil.write(out, VECTOR_SEARCH_STRATEGY);
|
||||||
SimpleTextUtil.write(out, fi.getVectorSearchStrategy().name(), scratch);
|
SimpleTextUtil.write(out, fi.getVectorSimilarityFunction().name(), scratch);
|
||||||
SimpleTextUtil.writeNewline(out);
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
SimpleTextUtil.write(out, SOFT_DELETES);
|
SimpleTextUtil.write(out, SOFT_DELETES);
|
||||||
|
@ -82,8 +82,8 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||||||
while (fieldNumber != -1) {
|
while (fieldNumber != -1) {
|
||||||
String fieldName = readString(in, FIELD_NAME);
|
String fieldName = readString(in, FIELD_NAME);
|
||||||
String scoreFunctionName = readString(in, SCORE_FUNCTION);
|
String scoreFunctionName = readString(in, SCORE_FUNCTION);
|
||||||
VectorValues.SearchStrategy searchStrategy =
|
VectorValues.SimilarityFunction similarityFunction =
|
||||||
VectorValues.SearchStrategy.valueOf(scoreFunctionName);
|
VectorValues.SimilarityFunction.valueOf(scoreFunctionName);
|
||||||
long vectorDataOffset = readLong(in, VECTOR_DATA_OFFSET);
|
long vectorDataOffset = readLong(in, VECTOR_DATA_OFFSET);
|
||||||
long vectorDataLength = readLong(in, VECTOR_DATA_LENGTH);
|
long vectorDataLength = readLong(in, VECTOR_DATA_LENGTH);
|
||||||
int dimension = readInt(in, VECTOR_DIMENSION);
|
int dimension = readInt(in, VECTOR_DIMENSION);
|
||||||
@ -95,7 +95,8 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||||||
assert fieldEntries.containsKey(fieldName) == false;
|
assert fieldEntries.containsKey(fieldName) == false;
|
||||||
fieldEntries.put(
|
fieldEntries.put(
|
||||||
fieldName,
|
fieldName,
|
||||||
new FieldEntry(dimension, searchStrategy, vectorDataOffset, vectorDataLength, docIds));
|
new FieldEntry(
|
||||||
|
dimension, similarityFunction, vectorDataOffset, vectorDataLength, docIds));
|
||||||
fieldNumber = readInt(in, FIELD_NUMBER);
|
fieldNumber = readInt(in, FIELD_NUMBER);
|
||||||
}
|
}
|
||||||
SimpleTextUtil.checkFooter(in);
|
SimpleTextUtil.checkFooter(in);
|
||||||
@ -204,7 +205,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||||||
private static class FieldEntry {
|
private static class FieldEntry {
|
||||||
|
|
||||||
final int dimension;
|
final int dimension;
|
||||||
final VectorValues.SearchStrategy searchStrategy;
|
final VectorValues.SimilarityFunction similarityFunction;
|
||||||
|
|
||||||
final long vectorDataOffset;
|
final long vectorDataOffset;
|
||||||
final long vectorDataLength;
|
final long vectorDataLength;
|
||||||
@ -212,12 +213,12 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||||||
|
|
||||||
FieldEntry(
|
FieldEntry(
|
||||||
int dimension,
|
int dimension,
|
||||||
VectorValues.SearchStrategy searchStrategy,
|
VectorValues.SimilarityFunction similarityFunction,
|
||||||
long vectorDataOffset,
|
long vectorDataOffset,
|
||||||
long vectorDataLength,
|
long vectorDataLength,
|
||||||
int[] ordToDoc) {
|
int[] ordToDoc) {
|
||||||
this.dimension = dimension;
|
this.dimension = dimension;
|
||||||
this.searchStrategy = searchStrategy;
|
this.similarityFunction = similarityFunction;
|
||||||
this.vectorDataOffset = vectorDataOffset;
|
this.vectorDataOffset = vectorDataOffset;
|
||||||
this.vectorDataLength = vectorDataLength;
|
this.vectorDataLength = vectorDataLength;
|
||||||
this.ordToDoc = ordToDoc;
|
this.ordToDoc = ordToDoc;
|
||||||
@ -260,8 +261,8 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return entry.searchStrategy;
|
return entry.similarityFunction;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -98,7 +98,7 @@ public class SimpleTextVectorWriter extends VectorWriter {
|
|||||||
throws IOException {
|
throws IOException {
|
||||||
writeField(meta, FIELD_NUMBER, field.number);
|
writeField(meta, FIELD_NUMBER, field.number);
|
||||||
writeField(meta, FIELD_NAME, field.name);
|
writeField(meta, FIELD_NAME, field.name);
|
||||||
writeField(meta, SCORE_FUNCTION, field.getVectorSearchStrategy().name());
|
writeField(meta, SCORE_FUNCTION, field.getVectorSimilarityFunction().name());
|
||||||
writeField(meta, VECTOR_DATA_OFFSET, vectorDataOffset);
|
writeField(meta, VECTOR_DATA_OFFSET, vectorDataOffset);
|
||||||
writeField(meta, VECTOR_DATA_LENGTH, vectorDataLength);
|
writeField(meta, VECTOR_DATA_LENGTH, vectorDataLength);
|
||||||
writeField(meta, VECTOR_DIMENSION, field.getVectorDimension());
|
writeField(meta, VECTOR_DIMENSION, field.getVectorDimension());
|
||||||
|
@ -116,7 +116,7 @@ public class TestBlockWriter extends LuceneTestCase {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -203,7 +203,7 @@ public class TestSTBlockReader extends LuceneTestCase {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,18 +68,18 @@ public abstract class VectorWriter implements Closeable {
|
|||||||
}
|
}
|
||||||
List<VectorValuesSub> subs = new ArrayList<>();
|
List<VectorValuesSub> subs = new ArrayList<>();
|
||||||
int dimension = -1;
|
int dimension = -1;
|
||||||
VectorValues.SearchStrategy searchStrategy = null;
|
VectorValues.SimilarityFunction similarityFunction = null;
|
||||||
int nonEmptySegmentIndex = 0;
|
int nonEmptySegmentIndex = 0;
|
||||||
for (int i = 0; i < mergeState.vectorReaders.length; i++) {
|
for (int i = 0; i < mergeState.vectorReaders.length; i++) {
|
||||||
VectorReader vectorReader = mergeState.vectorReaders[i];
|
VectorReader vectorReader = mergeState.vectorReaders[i];
|
||||||
if (vectorReader != null) {
|
if (vectorReader != null) {
|
||||||
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
|
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
|
||||||
int segmentDimension = mergeFieldInfo.getVectorDimension();
|
int segmentDimension = mergeFieldInfo.getVectorDimension();
|
||||||
VectorValues.SearchStrategy segmentSearchStrategy =
|
VectorValues.SimilarityFunction segmentSimilarityFunction =
|
||||||
mergeFieldInfo.getVectorSearchStrategy();
|
mergeFieldInfo.getVectorSimilarityFunction();
|
||||||
if (dimension == -1) {
|
if (dimension == -1) {
|
||||||
dimension = segmentDimension;
|
dimension = segmentDimension;
|
||||||
searchStrategy = mergeFieldInfo.getVectorSearchStrategy();
|
similarityFunction = mergeFieldInfo.getVectorSimilarityFunction();
|
||||||
} else if (dimension != segmentDimension) {
|
} else if (dimension != segmentDimension) {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"Varying dimensions for vector-valued field "
|
"Varying dimensions for vector-valued field "
|
||||||
@ -88,14 +88,14 @@ public abstract class VectorWriter implements Closeable {
|
|||||||
+ dimension
|
+ dimension
|
||||||
+ "!="
|
+ "!="
|
||||||
+ segmentDimension);
|
+ segmentDimension);
|
||||||
} else if (searchStrategy != segmentSearchStrategy) {
|
} else if (similarityFunction != segmentSimilarityFunction) {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"Varying search strategys for vector-valued field "
|
"Varying similarity functions for vector-valued field "
|
||||||
+ mergeFieldInfo.name
|
+ mergeFieldInfo.name
|
||||||
+ ": "
|
+ ": "
|
||||||
+ searchStrategy
|
+ similarityFunction
|
||||||
+ "!="
|
+ "!="
|
||||||
+ segmentSearchStrategy);
|
+ segmentSimilarityFunction);
|
||||||
}
|
}
|
||||||
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
|
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
|
||||||
if (values != null) {
|
if (values != null) {
|
||||||
@ -241,8 +241,8 @@ public abstract class VectorWriter implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return subs.get(0).values.searchStrategy();
|
return subs.get(0).values.similarityFunction();
|
||||||
}
|
}
|
||||||
|
|
||||||
class MergerRandomAccess implements RandomAccessVectorValues {
|
class MergerRandomAccess implements RandomAccessVectorValues {
|
||||||
@ -272,8 +272,8 @@ public abstract class VectorWriter implements Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return VectorValuesMerger.this.searchStrategy();
|
return VectorValuesMerger.this.similarityFunction();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -29,8 +29,7 @@ import org.apache.lucene.index.FieldInfos;
|
|||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.VectorValues;
|
import org.apache.lucene.index.VectorValues.SimilarityFunction;
|
||||||
import org.apache.lucene.index.VectorValues.SearchStrategy;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
@ -103,8 +102,8 @@ import org.apache.lucene.store.IndexOutput;
|
|||||||
* <li>VectorDistFunction: a byte containing distance function used for similarity calculation.
|
* <li>VectorDistFunction: a byte containing distance function used for similarity calculation.
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>0: no distance function is defined for this field.
|
* <li>0: no distance function is defined for this field.
|
||||||
* <li>1: EUCLIDEAN_HNSW distance. ({@link SearchStrategy#EUCLIDEAN_HNSW})
|
* <li>1: EUCLIDEAN_HNSW distance. ({@link SimilarityFunction#EUCLIDEAN})
|
||||||
* <li>2: DOT_PRODUCT_HNSW score. ({@link SearchStrategy#DOT_PRODUCT_HNSW})
|
* <li>2: DOT_PRODUCT_HNSW score. ({@link SimilarityFunction#DOT_PRODUCT})
|
||||||
* </ul>
|
* </ul>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
@ -173,7 +172,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
|||||||
pointNumBytes = 0;
|
pointNumBytes = 0;
|
||||||
}
|
}
|
||||||
final int vectorDimension = input.readVInt();
|
final int vectorDimension = input.readVInt();
|
||||||
final VectorValues.SearchStrategy vectorDistFunc = getDistFunc(input, input.readByte());
|
final SimilarityFunction vectorDistFunc = getDistFunc(input, input.readByte());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
infos[i] =
|
infos[i] =
|
||||||
@ -254,12 +253,11 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static VectorValues.SearchStrategy getDistFunc(IndexInput input, byte b)
|
private static SimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException {
|
||||||
throws IOException {
|
if (b < 0 || b >= SimilarityFunction.values().length) {
|
||||||
if (b < 0 || b >= VectorValues.SearchStrategy.values().length) {
|
|
||||||
throw new CorruptIndexException("invalid distance function: " + b, input);
|
throw new CorruptIndexException("invalid distance function: " + b, input);
|
||||||
}
|
}
|
||||||
return VectorValues.SearchStrategy.values()[b];
|
return SimilarityFunction.values()[b];
|
||||||
}
|
}
|
||||||
|
|
||||||
static {
|
static {
|
||||||
@ -348,7 +346,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
|||||||
output.writeVInt(fi.getPointNumBytes());
|
output.writeVInt(fi.getPointNumBytes());
|
||||||
}
|
}
|
||||||
output.writeVInt(fi.getVectorDimension());
|
output.writeVInt(fi.getVectorDimension());
|
||||||
output.writeByte((byte) fi.getVectorSearchStrategy().ordinal());
|
output.writeByte((byte) fi.getVectorSimilarityFunction().ordinal());
|
||||||
}
|
}
|
||||||
CodecUtil.writeFooter(output);
|
CodecUtil.writeFooter(output);
|
||||||
}
|
}
|
||||||
|
@ -50,7 +50,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||||||
*
|
*
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li><b>[int32]</b> field number
|
* <li><b>[int32]</b> field number
|
||||||
* <li><b>[int32]</b> vector search strategy ordinal
|
* <li><b>[int32]</b> vector similarity function ordinal
|
||||||
* <li><b>[vlong]</b> offset to this field's vectors in the .vec file
|
* <li><b>[vlong]</b> offset to this field's vectors in the .vec file
|
||||||
* <li><b>[vlong]</b> length of this field's vectors, in bytes
|
* <li><b>[vlong]</b> length of this field's vectors, in bytes
|
||||||
* <li><b>[vlong]</b> offset to this field's index in the .vex file
|
* <li><b>[vlong]</b> offset to this field's index in the .vex file
|
||||||
|
@ -187,24 +187,28 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private VectorValues.SearchStrategy readSearchStrategy(DataInput input) throws IOException {
|
private VectorValues.SimilarityFunction readSimilarityFunction(DataInput input)
|
||||||
int searchStrategyId = input.readInt();
|
throws IOException {
|
||||||
if (searchStrategyId < 0 || searchStrategyId >= VectorValues.SearchStrategy.values().length) {
|
int similarityFunctionId = input.readInt();
|
||||||
throw new CorruptIndexException("Invalid search strategy id: " + searchStrategyId, input);
|
if (similarityFunctionId < 0
|
||||||
|
|| similarityFunctionId >= VectorValues.SimilarityFunction.values().length) {
|
||||||
|
throw new CorruptIndexException(
|
||||||
|
"Invalid similarity function id: " + similarityFunctionId, input);
|
||||||
}
|
}
|
||||||
return VectorValues.SearchStrategy.values()[searchStrategyId];
|
return VectorValues.SimilarityFunction.values()[similarityFunctionId];
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldEntry readField(DataInput input) throws IOException {
|
private FieldEntry readField(DataInput input) throws IOException {
|
||||||
VectorValues.SearchStrategy searchStrategy = readSearchStrategy(input);
|
VectorValues.SimilarityFunction similarityFunction = readSimilarityFunction(input);
|
||||||
switch (searchStrategy) {
|
switch (similarityFunction) {
|
||||||
case NONE:
|
case NONE:
|
||||||
return new FieldEntry(input, searchStrategy);
|
return new FieldEntry(input, similarityFunction);
|
||||||
case DOT_PRODUCT_HNSW:
|
case DOT_PRODUCT:
|
||||||
case EUCLIDEAN_HNSW:
|
case EUCLIDEAN:
|
||||||
return new HnswGraphFieldEntry(input, searchStrategy);
|
return new HnswGraphFieldEntry(input, similarityFunction);
|
||||||
default:
|
default:
|
||||||
throw new CorruptIndexException("Unknown vector search strategy: " + searchStrategy, input);
|
throw new CorruptIndexException(
|
||||||
|
"Unknown vector similarity function: " + similarityFunction, input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -288,7 +292,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
|
private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
|
||||||
if (entry.searchStrategy.isHnsw()) {
|
if (entry.similarityFunction.isHnsw()) {
|
||||||
HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
|
HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
|
||||||
IndexInput bytesSlice =
|
IndexInput bytesSlice =
|
||||||
vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);
|
vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);
|
||||||
@ -306,7 +310,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||||||
private static class FieldEntry {
|
private static class FieldEntry {
|
||||||
|
|
||||||
final int dimension;
|
final int dimension;
|
||||||
final VectorValues.SearchStrategy searchStrategy;
|
final VectorValues.SimilarityFunction similarityFunction;
|
||||||
|
|
||||||
final long vectorDataOffset;
|
final long vectorDataOffset;
|
||||||
final long vectorDataLength;
|
final long vectorDataLength;
|
||||||
@ -314,8 +318,9 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||||||
final long indexDataLength;
|
final long indexDataLength;
|
||||||
final int[] ordToDoc;
|
final int[] ordToDoc;
|
||||||
|
|
||||||
FieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy) throws IOException {
|
FieldEntry(DataInput input, VectorValues.SimilarityFunction similarityFunction)
|
||||||
this.searchStrategy = searchStrategy;
|
throws IOException {
|
||||||
|
this.similarityFunction = similarityFunction;
|
||||||
vectorDataOffset = input.readVLong();
|
vectorDataOffset = input.readVLong();
|
||||||
vectorDataLength = input.readVLong();
|
vectorDataLength = input.readVLong();
|
||||||
indexDataOffset = input.readVLong();
|
indexDataOffset = input.readVLong();
|
||||||
@ -338,9 +343,9 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||||||
|
|
||||||
final long[] ordOffsets;
|
final long[] ordOffsets;
|
||||||
|
|
||||||
HnswGraphFieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy)
|
HnswGraphFieldEntry(DataInput input, VectorValues.SimilarityFunction similarityFunction)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(input, searchStrategy);
|
super(input, similarityFunction);
|
||||||
ordOffsets = new long[size()];
|
ordOffsets = new long[size()];
|
||||||
long offset = 0;
|
long offset = 0;
|
||||||
for (int i = 0; i < ordOffsets.length; i++) {
|
for (int i = 0; i < ordOffsets.length; i++) {
|
||||||
@ -385,8 +390,8 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return fieldEntry.searchStrategy;
|
return fieldEntry.similarityFunction;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -425,7 +430,7 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||||||
if (ord < 0) {
|
if (ord < 0) {
|
||||||
ord = -(ord + 1);
|
ord = -(ord + 1);
|
||||||
}
|
}
|
||||||
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
|
assert ord <= fieldEntry.ordToDoc.length;
|
||||||
if (ord == fieldEntry.ordToDoc.length) {
|
if (ord == fieldEntry.ordToDoc.length) {
|
||||||
doc = NO_MORE_DOCS;
|
doc = NO_MORE_DOCS;
|
||||||
} else {
|
} else {
|
||||||
|
@ -121,7 +121,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
|
|||||||
long[] offsets = new long[count];
|
long[] offsets = new long[count];
|
||||||
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
|
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
|
||||||
long vectorIndexOffset = vectorIndex.getFilePointer();
|
long vectorIndexOffset = vectorIndex.getFilePointer();
|
||||||
if (vectors.searchStrategy().isHnsw()) {
|
if (vectors.similarityFunction().isHnsw()) {
|
||||||
if (vectors instanceof RandomAccessVectorValuesProducer) {
|
if (vectors instanceof RandomAccessVectorValuesProducer) {
|
||||||
writeGraph(
|
writeGraph(
|
||||||
vectorIndex,
|
vectorIndex,
|
||||||
@ -146,7 +146,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
|
|||||||
vectorIndexLength,
|
vectorIndexLength,
|
||||||
count,
|
count,
|
||||||
docIds);
|
docIds);
|
||||||
if (vectors.searchStrategy().isHnsw()) {
|
if (vectors.similarityFunction().isHnsw()) {
|
||||||
writeGraphOffsets(meta, offsets);
|
writeGraphOffsets(meta, offsets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -162,7 +162,7 @@ public final class Lucene90VectorWriter extends VectorWriter {
|
|||||||
int[] docIds)
|
int[] docIds)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
meta.writeInt(field.number);
|
meta.writeInt(field.number);
|
||||||
meta.writeInt(field.getVectorSearchStrategy().ordinal());
|
meta.writeInt(field.getVectorSimilarityFunction().ordinal());
|
||||||
meta.writeVLong(vectorDataOffset);
|
meta.writeVLong(vectorDataOffset);
|
||||||
meta.writeVLong(vectorDataLength);
|
meta.writeVLong(vectorDataLength);
|
||||||
meta.writeVLong(indexDataOffset);
|
meta.writeVLong(indexDataOffset);
|
||||||
|
@ -42,7 +42,8 @@ public class FieldType implements IndexableFieldType {
|
|||||||
private int indexDimensionCount;
|
private int indexDimensionCount;
|
||||||
private int dimensionNumBytes;
|
private int dimensionNumBytes;
|
||||||
private int vectorDimension;
|
private int vectorDimension;
|
||||||
private VectorValues.SearchStrategy vectorSearchStrategy = VectorValues.SearchStrategy.NONE;
|
private VectorValues.SimilarityFunction vectorSimilarityFunction =
|
||||||
|
VectorValues.SimilarityFunction.NONE;
|
||||||
private Map<String, String> attributes;
|
private Map<String, String> attributes;
|
||||||
|
|
||||||
/** Create a new mutable FieldType with all of the properties from <code>ref</code> */
|
/** Create a new mutable FieldType with all of the properties from <code>ref</code> */
|
||||||
@ -60,7 +61,7 @@ public class FieldType implements IndexableFieldType {
|
|||||||
this.indexDimensionCount = ref.pointIndexDimensionCount();
|
this.indexDimensionCount = ref.pointIndexDimensionCount();
|
||||||
this.dimensionNumBytes = ref.pointNumBytes();
|
this.dimensionNumBytes = ref.pointNumBytes();
|
||||||
this.vectorDimension = ref.vectorDimension();
|
this.vectorDimension = ref.vectorDimension();
|
||||||
this.vectorSearchStrategy = ref.vectorSearchStrategy();
|
this.vectorSimilarityFunction = ref.vectorSimilarityFunction();
|
||||||
if (ref.getAttributes() != null) {
|
if (ref.getAttributes() != null) {
|
||||||
this.attributes = new HashMap<>(ref.getAttributes());
|
this.attributes = new HashMap<>(ref.getAttributes());
|
||||||
}
|
}
|
||||||
@ -369,8 +370,8 @@ public class FieldType implements IndexableFieldType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Enable vector indexing, with the specified number of dimensions and distance function. */
|
/** Enable vector indexing, with the specified number of dimensions and distance function. */
|
||||||
public void setVectorDimensionsAndSearchStrategy(
|
public void setVectorDimensionsAndSimilarityFunction(
|
||||||
int numDimensions, VectorValues.SearchStrategy distFunc) {
|
int numDimensions, VectorValues.SimilarityFunction distFunc) {
|
||||||
checkIfFrozen();
|
checkIfFrozen();
|
||||||
if (numDimensions <= 0) {
|
if (numDimensions <= 0) {
|
||||||
throw new IllegalArgumentException("vector numDimensions must be > 0; got " + numDimensions);
|
throw new IllegalArgumentException("vector numDimensions must be > 0; got " + numDimensions);
|
||||||
@ -383,7 +384,7 @@ public class FieldType implements IndexableFieldType {
|
|||||||
+ numDimensions);
|
+ numDimensions);
|
||||||
}
|
}
|
||||||
this.vectorDimension = numDimensions;
|
this.vectorDimension = numDimensions;
|
||||||
this.vectorSearchStrategy = distFunc;
|
this.vectorSimilarityFunction = distFunc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -392,8 +393,8 @@ public class FieldType implements IndexableFieldType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorValues.SearchStrategy vectorSearchStrategy() {
|
public VectorValues.SimilarityFunction vectorSimilarityFunction() {
|
||||||
return vectorSearchStrategy;
|
return vectorSimilarityFunction;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -25,15 +25,17 @@ import org.apache.lucene.util.hnsw.HnswGraphBuilder;
|
|||||||
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
|
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
|
||||||
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
|
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
|
||||||
* {@link VectorValues}, which is a forward-only docID-based iterator and also offers random-access
|
* {@link VectorValues}, which is a forward-only docID-based iterator and also offers random-access
|
||||||
* by dense ordinal (not docId). VectorValues.SearchStrategys may be used to compare vectors at
|
* by dense ordinal (not docId). VectorValues.SearchSimlarity may be used to compare vectors at
|
||||||
* query time (for example as part of result ranking). A VectorField may be associated with a search
|
* query time (for example as part of result ranking). A VectorField may be associated with a search
|
||||||
* strategy that defines the metric used for nearest-neighbor search among vectors of that field,
|
* similarity function defining the metric used for nearest-neighbor search among vectors of that
|
||||||
* but at the moment this association is purely nominal: it is intended for future use by the
|
* field.
|
||||||
* to-be-implemented nearest neighbors search.
|
*
|
||||||
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class VectorField extends Field {
|
public class VectorField extends Field {
|
||||||
|
|
||||||
private static FieldType createType(float[] v, VectorValues.SearchStrategy searchStrategy) {
|
private static FieldType createType(
|
||||||
|
float[] v, VectorValues.SimilarityFunction similarityFunction) {
|
||||||
if (v == null) {
|
if (v == null) {
|
||||||
throw new IllegalArgumentException("vector value must not be null");
|
throw new IllegalArgumentException("vector value must not be null");
|
||||||
}
|
}
|
||||||
@ -45,11 +47,11 @@ public class VectorField extends Field {
|
|||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
|
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
|
||||||
}
|
}
|
||||||
if (searchStrategy == null) {
|
if (similarityFunction == null) {
|
||||||
throw new IllegalArgumentException("search strategy must not be null");
|
throw new IllegalArgumentException("similarity function must not be null");
|
||||||
}
|
}
|
||||||
FieldType type = new FieldType();
|
FieldType type = new FieldType();
|
||||||
type.setVectorDimensionsAndSearchStrategy(dimension, searchStrategy);
|
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);
|
||||||
type.freeze();
|
type.freeze();
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
@ -59,13 +61,16 @@ public class VectorField extends Field {
|
|||||||
* parameters that would be used by HnswGraphBuilder while constructing HNSW graph.
|
* parameters that would be used by HnswGraphBuilder while constructing HNSW graph.
|
||||||
*
|
*
|
||||||
* @param dimension dimension of vectors
|
* @param dimension dimension of vectors
|
||||||
* @param searchStrategy a function defining vector proximity.
|
* @param similarityFunction a function defining vector proximity.
|
||||||
* @param maxConn max-connections at each HNSW graph node
|
* @param maxConn max-connections at each HNSW graph node
|
||||||
* @param beamWidth size of list to be used while constructing HNSW graph
|
* @param beamWidth size of list to be used while constructing HNSW graph
|
||||||
* @throws IllegalArgumentException if any parameter is null, or has dimension > 1024.
|
* @throws IllegalArgumentException if any parameter is null, or has dimension > 1024.
|
||||||
*/
|
*/
|
||||||
public static FieldType createHnswType(
|
public static FieldType createHnswType(
|
||||||
int dimension, VectorValues.SearchStrategy searchStrategy, int maxConn, int beamWidth) {
|
int dimension,
|
||||||
|
VectorValues.SimilarityFunction similarityFunction,
|
||||||
|
int maxConn,
|
||||||
|
int beamWidth) {
|
||||||
if (dimension == 0) {
|
if (dimension == 0) {
|
||||||
throw new IllegalArgumentException("cannot index an empty vector");
|
throw new IllegalArgumentException("cannot index an empty vector");
|
||||||
}
|
}
|
||||||
@ -73,12 +78,12 @@ public class VectorField extends Field {
|
|||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
|
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
|
||||||
}
|
}
|
||||||
if (searchStrategy == null || !searchStrategy.isHnsw()) {
|
if (similarityFunction == null || !similarityFunction.isHnsw()) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"search strategy must not be null or non HNSW type, received: " + searchStrategy);
|
"similarity function must not be null, received: " + similarityFunction);
|
||||||
}
|
}
|
||||||
FieldType type = new FieldType();
|
FieldType type = new FieldType();
|
||||||
type.setVectorDimensionsAndSearchStrategy(dimension, searchStrategy);
|
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);
|
||||||
type.putAttribute(HnswGraphBuilder.HNSW_MAX_CONN_ATTRIBUTE_KEY, String.valueOf(maxConn));
|
type.putAttribute(HnswGraphBuilder.HNSW_MAX_CONN_ATTRIBUTE_KEY, String.valueOf(maxConn));
|
||||||
type.putAttribute(HnswGraphBuilder.HNSW_BEAM_WIDTH_ATTRIBUTE_KEY, String.valueOf(beamWidth));
|
type.putAttribute(HnswGraphBuilder.HNSW_BEAM_WIDTH_ATTRIBUTE_KEY, String.valueOf(beamWidth));
|
||||||
type.freeze();
|
type.freeze();
|
||||||
@ -87,25 +92,26 @@ public class VectorField extends Field {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
|
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
|
||||||
* no value. Vectors of a single field share the same dimension and search strategy. Note that
|
* no value. Vectors of a single field share the same dimension and similarity function. Note that
|
||||||
* some strategies (notably dot-product) require values to be unit-length, which can be enforced
|
* some strategies (notably dot-product) require values to be unit-length, which can be enforced
|
||||||
* using VectorUtil.l2Normalize(float[]).
|
* using VectorUtil.l2Normalize(float[]).
|
||||||
*
|
*
|
||||||
* @param name field name
|
* @param name field name
|
||||||
* @param vector value
|
* @param vector value
|
||||||
* @param searchStrategy a function defining vector proximity.
|
* @param similarityFunction a function defining vector proximity.
|
||||||
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
|
||||||
* dimension > 1024.
|
* dimension > 1024.
|
||||||
*/
|
*/
|
||||||
public VectorField(String name, float[] vector, VectorValues.SearchStrategy searchStrategy) {
|
public VectorField(
|
||||||
super(name, createType(vector, searchStrategy));
|
String name, float[] vector, VectorValues.SimilarityFunction similarityFunction) {
|
||||||
|
super(name, createType(vector, similarityFunction));
|
||||||
fieldsData = vector;
|
fieldsData = vector;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a numeric vector field with the default EUCLIDEAN_HNSW (L2) search strategy. Fields are
|
* Creates a numeric vector field with the default EUCLIDEAN_HNSW (L2) similarity. Fields are
|
||||||
* single-valued: each document has either one value or no value. Vectors of a single field share
|
* single-valued: each document has either one value or no value. Vectors of a single field share
|
||||||
* the same dimension and search strategy.
|
* the same dimension and similarity function.
|
||||||
*
|
*
|
||||||
* @param name field name
|
* @param name field name
|
||||||
* @param vector value
|
* @param vector value
|
||||||
@ -113,12 +119,12 @@ public class VectorField extends Field {
|
|||||||
* dimension > 1024.
|
* dimension > 1024.
|
||||||
*/
|
*/
|
||||||
public VectorField(String name, float[] vector) {
|
public VectorField(String name, float[] vector) {
|
||||||
this(name, vector, VectorValues.SearchStrategy.EUCLIDEAN_HNSW);
|
this(name, vector, VectorValues.SimilarityFunction.EUCLIDEAN);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
|
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
|
||||||
* no value. Vectors of a single field share the same dimension and search strategy.
|
* no value. Vectors of a single field share the same dimension and similarity function.
|
||||||
*
|
*
|
||||||
* @param name field name
|
* @param name field name
|
||||||
* @param vector value
|
* @param vector value
|
||||||
|
@ -38,6 +38,8 @@ import org.apache.lucene.codecs.PointsReader;
|
|||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
|
import org.apache.lucene.codecs.VectorReader;
|
||||||
|
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
||||||
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
|
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
|
||||||
@ -2336,6 +2338,29 @@ public final class CheckIndex implements Closeable {
|
|||||||
+ docCount
|
+ docCount
|
||||||
+ " docs with values");
|
+ " docs with values");
|
||||||
}
|
}
|
||||||
|
VectorReader vectorReader = reader.getVectorReader();
|
||||||
|
if (vectorReader instanceof Lucene90VectorReader) {
|
||||||
|
KnnGraphValues graphValues =
|
||||||
|
((Lucene90VectorReader) vectorReader).getGraphValues(fieldInfo.name);
|
||||||
|
int size = graphValues.size();
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
graphValues.seek(i);
|
||||||
|
for (int neighbor = graphValues.nextNeighbor();
|
||||||
|
neighbor != NO_MORE_DOCS;
|
||||||
|
neighbor = graphValues.nextNeighbor()) {
|
||||||
|
if (neighbor < 0 || neighbor >= size) {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"Field \""
|
||||||
|
+ fieldInfo.name
|
||||||
|
+ "\" has an invalid neighbor ordinal: "
|
||||||
|
+ neighbor
|
||||||
|
+ " which should be in [0,"
|
||||||
|
+ size
|
||||||
|
+ ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
status.totalVectorValues += docCount;
|
status.totalVectorValues += docCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -56,7 +56,7 @@ public final class FieldInfo {
|
|||||||
|
|
||||||
// if it is a positive value, it means this field indexes vectors
|
// if it is a positive value, it means this field indexes vectors
|
||||||
private final int vectorDimension;
|
private final int vectorDimension;
|
||||||
private final VectorValues.SearchStrategy vectorSearchStrategy;
|
private final VectorValues.SimilarityFunction vectorSimilarityFunction;
|
||||||
|
|
||||||
// whether this field is used as the soft-deletes field
|
// whether this field is used as the soft-deletes field
|
||||||
private final boolean softDeletesField;
|
private final boolean softDeletesField;
|
||||||
@ -80,7 +80,7 @@ public final class FieldInfo {
|
|||||||
int pointIndexDimensionCount,
|
int pointIndexDimensionCount,
|
||||||
int pointNumBytes,
|
int pointNumBytes,
|
||||||
int vectorDimension,
|
int vectorDimension,
|
||||||
VectorValues.SearchStrategy vectorSearchStrategy,
|
VectorValues.SimilarityFunction vectorSimilarityFunction,
|
||||||
boolean softDeletesField) {
|
boolean softDeletesField) {
|
||||||
this.name = Objects.requireNonNull(name);
|
this.name = Objects.requireNonNull(name);
|
||||||
this.number = number;
|
this.number = number;
|
||||||
@ -105,7 +105,7 @@ public final class FieldInfo {
|
|||||||
this.pointIndexDimensionCount = pointIndexDimensionCount;
|
this.pointIndexDimensionCount = pointIndexDimensionCount;
|
||||||
this.pointNumBytes = pointNumBytes;
|
this.pointNumBytes = pointNumBytes;
|
||||||
this.vectorDimension = vectorDimension;
|
this.vectorDimension = vectorDimension;
|
||||||
this.vectorSearchStrategy = vectorSearchStrategy;
|
this.vectorSimilarityFunction = vectorSimilarityFunction;
|
||||||
this.softDeletesField = softDeletesField;
|
this.softDeletesField = softDeletesField;
|
||||||
this.checkConsistency();
|
this.checkConsistency();
|
||||||
}
|
}
|
||||||
@ -194,18 +194,18 @@ public final class FieldInfo {
|
|||||||
+ "')");
|
+ "')");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vectorSearchStrategy == null) {
|
if (vectorSimilarityFunction == null) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Vector search strategy must not be null (field: '" + name + "')");
|
"Vector similarity function must not be null (field: '" + name + "')");
|
||||||
}
|
}
|
||||||
if (vectorDimension < 0) {
|
if (vectorDimension < 0) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"vectorDimension must be >=0; got " + vectorDimension + " (field: '" + name + "')");
|
"vectorDimension must be >=0; got " + vectorDimension + " (field: '" + name + "')");
|
||||||
}
|
}
|
||||||
if (vectorDimension == 0 && vectorSearchStrategy != VectorValues.SearchStrategy.NONE) {
|
if (vectorDimension == 0 && vectorSimilarityFunction != VectorValues.SimilarityFunction.NONE) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"vector search strategy must be NONE when dimension = 0; got "
|
"vector similarity function must be NONE when dimension = 0; got "
|
||||||
+ vectorSearchStrategy
|
+ vectorSimilarityFunction
|
||||||
+ " (field: '"
|
+ " (field: '"
|
||||||
+ name
|
+ name
|
||||||
+ "')");
|
+ "')");
|
||||||
@ -237,9 +237,9 @@ public final class FieldInfo {
|
|||||||
verifySameVectorOptions(
|
verifySameVectorOptions(
|
||||||
fieldName,
|
fieldName,
|
||||||
this.vectorDimension,
|
this.vectorDimension,
|
||||||
this.vectorSearchStrategy,
|
this.vectorSimilarityFunction,
|
||||||
o.vectorDimension,
|
o.vectorDimension,
|
||||||
o.vectorSearchStrategy);
|
o.vectorSimilarityFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -355,21 +355,21 @@ public final class FieldInfo {
|
|||||||
static void verifySameVectorOptions(
|
static void verifySameVectorOptions(
|
||||||
String fieldName,
|
String fieldName,
|
||||||
int vd1,
|
int vd1,
|
||||||
VectorValues.SearchStrategy vst1,
|
VectorValues.SimilarityFunction vsf1,
|
||||||
int vd2,
|
int vd2,
|
||||||
VectorValues.SearchStrategy vst2) {
|
VectorValues.SimilarityFunction vsf2) {
|
||||||
if (vd1 != vd2 || vst1 != vst2) {
|
if (vd1 != vd2 || vsf1 != vsf2) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"cannot change field \""
|
"cannot change field \""
|
||||||
+ fieldName
|
+ fieldName
|
||||||
+ "\" from vector dimension="
|
+ "\" from vector dimension="
|
||||||
+ vd1
|
+ vd1
|
||||||
+ ", vector search strategy="
|
+ ", vector similarity function="
|
||||||
+ vst1
|
+ vsf1
|
||||||
+ " to inconsistent vector dimension="
|
+ " to inconsistent vector dimension="
|
||||||
+ vd2
|
+ vd2
|
||||||
+ ", vector search strategy="
|
+ ", vector similarity function="
|
||||||
+ vst2);
|
+ vsf2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -478,9 +478,9 @@ public final class FieldInfo {
|
|||||||
return vectorDimension;
|
return vectorDimension;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns {@link VectorValues.SearchStrategy} for the field */
|
/** Returns {@link VectorValues.SimilarityFunction} for the field */
|
||||||
public VectorValues.SearchStrategy getVectorSearchStrategy() {
|
public VectorValues.SimilarityFunction getVectorSimilarityFunction() {
|
||||||
return vectorSearchStrategy;
|
return vectorSimilarityFunction;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Record that this field is indexed with docvalues, with the specified type */
|
/** Record that this field is indexed with docvalues, with the specified type */
|
||||||
|
@ -299,11 +299,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||||||
|
|
||||||
static final class FieldVectorProperties {
|
static final class FieldVectorProperties {
|
||||||
final int numDimensions;
|
final int numDimensions;
|
||||||
final VectorValues.SearchStrategy searchStrategy;
|
final VectorValues.SimilarityFunction similarityFunction;
|
||||||
|
|
||||||
FieldVectorProperties(int numDimensions, VectorValues.SearchStrategy searchStrategy) {
|
FieldVectorProperties(int numDimensions, VectorValues.SimilarityFunction similarityFunction) {
|
||||||
this.numDimensions = numDimensions;
|
this.numDimensions = numDimensions;
|
||||||
this.searchStrategy = searchStrategy;
|
this.similarityFunction = similarityFunction;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,7 +384,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||||||
fi.getPointNumBytes()));
|
fi.getPointNumBytes()));
|
||||||
vectorProps.put(
|
vectorProps.put(
|
||||||
fieldName,
|
fieldName,
|
||||||
new FieldVectorProperties(fi.getVectorDimension(), fi.getVectorSearchStrategy()));
|
new FieldVectorProperties(fi.getVectorDimension(), fi.getVectorSimilarityFunction()));
|
||||||
}
|
}
|
||||||
return fieldNumber.intValue();
|
return fieldNumber.intValue();
|
||||||
}
|
}
|
||||||
@ -442,9 +442,9 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||||||
verifySameVectorOptions(
|
verifySameVectorOptions(
|
||||||
fieldName,
|
fieldName,
|
||||||
props.numDimensions,
|
props.numDimensions,
|
||||||
props.searchStrategy,
|
props.similarityFunction,
|
||||||
fi.getVectorDimension(),
|
fi.getVectorDimension(),
|
||||||
fi.getVectorSearchStrategy());
|
fi.getVectorSimilarityFunction());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -486,7 +486,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
(softDeletesFieldName != null && softDeletesFieldName.equals(fieldName)));
|
(softDeletesFieldName != null && softDeletesFieldName.equals(fieldName)));
|
||||||
addOrGet(fi);
|
addOrGet(fi);
|
||||||
}
|
}
|
||||||
@ -567,7 +567,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
isSoftDeletesField);
|
isSoftDeletesField);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -678,7 +678,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||||||
fi.getPointIndexDimensionCount(),
|
fi.getPointIndexDimensionCount(),
|
||||||
fi.getPointNumBytes(),
|
fi.getPointNumBytes(),
|
||||||
fi.getVectorDimension(),
|
fi.getVectorDimension(),
|
||||||
fi.getVectorSearchStrategy(),
|
fi.getVectorSimilarityFunction(),
|
||||||
fi.isSoftDeletesField());
|
fi.isSoftDeletesField());
|
||||||
byName.put(fiNew.getName(), fiNew);
|
byName.put(fiNew.getName(), fiNew);
|
||||||
return fiNew;
|
return fiNew;
|
||||||
|
@ -101,8 +101,8 @@ public interface IndexableFieldType {
|
|||||||
/** The number of dimensions of the field's vector value */
|
/** The number of dimensions of the field's vector value */
|
||||||
int vectorDimension();
|
int vectorDimension();
|
||||||
|
|
||||||
/** The {@link VectorValues.SearchStrategy} of the field's vector value */
|
/** The {@link VectorValues.SimilarityFunction} of the field's vector value */
|
||||||
VectorValues.SearchStrategy vectorSearchStrategy();
|
VectorValues.SimilarityFunction vectorSimilarityFunction();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attributes for the field type.
|
* Attributes for the field type.
|
||||||
|
@ -679,7 +679,7 @@ final class IndexingChain implements Accountable {
|
|||||||
s.pointIndexDimensionCount,
|
s.pointIndexDimensionCount,
|
||||||
s.pointNumBytes,
|
s.pointNumBytes,
|
||||||
s.vectorDimension,
|
s.vectorDimension,
|
||||||
s.vectorSearchStrategy,
|
s.vectorSimilarityFunction,
|
||||||
pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName())));
|
pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName())));
|
||||||
pf.setFieldInfo(fi);
|
pf.setFieldInfo(fi);
|
||||||
if (fi.getIndexOptions() != IndexOptions.NONE) {
|
if (fi.getIndexOptions() != IndexOptions.NONE) {
|
||||||
@ -822,7 +822,7 @@ final class IndexingChain implements Accountable {
|
|||||||
fieldType.pointNumBytes());
|
fieldType.pointNumBytes());
|
||||||
}
|
}
|
||||||
if (fieldType.vectorDimension() != 0) {
|
if (fieldType.vectorDimension() != 0) {
|
||||||
schema.setVectors(fieldType.vectorSearchStrategy(), fieldType.vectorDimension());
|
schema.setVectors(fieldType.vectorSimilarityFunction(), fieldType.vectorDimension());
|
||||||
}
|
}
|
||||||
if (fieldType.getAttributes() != null && fieldType.getAttributes().isEmpty() == false) {
|
if (fieldType.getAttributes() != null && fieldType.getAttributes().isEmpty() == false) {
|
||||||
schema.updateAttributes(fieldType.getAttributes());
|
schema.updateAttributes(fieldType.getAttributes());
|
||||||
@ -1324,7 +1324,8 @@ final class IndexingChain implements Accountable {
|
|||||||
private int pointIndexDimensionCount = 0;
|
private int pointIndexDimensionCount = 0;
|
||||||
private int pointNumBytes = 0;
|
private int pointNumBytes = 0;
|
||||||
private int vectorDimension = 0;
|
private int vectorDimension = 0;
|
||||||
private VectorValues.SearchStrategy vectorSearchStrategy = VectorValues.SearchStrategy.NONE;
|
private VectorValues.SimilarityFunction vectorSimilarityFunction =
|
||||||
|
VectorValues.SimilarityFunction.NONE;
|
||||||
|
|
||||||
private static String errMsg =
|
private static String errMsg =
|
||||||
"Inconsistency of field data structures across documents for field ";
|
"Inconsistency of field data structures across documents for field ";
|
||||||
@ -1379,12 +1380,12 @@ final class IndexingChain implements Accountable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void setVectors(VectorValues.SearchStrategy searchStrategy, int dimension) {
|
void setVectors(VectorValues.SimilarityFunction similarityFunction, int dimension) {
|
||||||
if (vectorSearchStrategy == VectorValues.SearchStrategy.NONE) {
|
if (vectorSimilarityFunction == VectorValues.SimilarityFunction.NONE) {
|
||||||
this.vectorDimension = dimension;
|
this.vectorDimension = dimension;
|
||||||
this.vectorSearchStrategy = searchStrategy;
|
this.vectorSimilarityFunction = similarityFunction;
|
||||||
} else {
|
} else {
|
||||||
assertSame(vectorSearchStrategy == searchStrategy && vectorDimension == dimension);
|
assertSame(vectorSimilarityFunction == similarityFunction && vectorDimension == dimension);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1399,7 +1400,7 @@ final class IndexingChain implements Accountable {
|
|||||||
pointIndexDimensionCount = 0;
|
pointIndexDimensionCount = 0;
|
||||||
pointNumBytes = 0;
|
pointNumBytes = 0;
|
||||||
vectorDimension = 0;
|
vectorDimension = 0;
|
||||||
vectorSearchStrategy = VectorValues.SearchStrategy.NONE;
|
vectorSimilarityFunction = VectorValues.SimilarityFunction.NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void assertSameSchema(FieldInfo fi) {
|
void assertSameSchema(FieldInfo fi) {
|
||||||
@ -1413,7 +1414,7 @@ final class IndexingChain implements Accountable {
|
|||||||
&& pointIndexDimensionCount == fi.getPointIndexDimensionCount()
|
&& pointIndexDimensionCount == fi.getPointIndexDimensionCount()
|
||||||
&& pointNumBytes == fi.getPointNumBytes()
|
&& pointNumBytes == fi.getPointNumBytes()
|
||||||
&& vectorDimension == fi.getVectorDimension()
|
&& vectorDimension == fi.getVectorDimension()
|
||||||
&& vectorSearchStrategy == fi.getVectorSearchStrategy());
|
&& vectorSimilarityFunction == fi.getVectorSimilarityFunction());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,8 +33,8 @@ public interface RandomAccessVectorValues {
|
|||||||
/** Return the dimension of the returned vector values */
|
/** Return the dimension of the returned vector values */
|
||||||
int dimension();
|
int dimension();
|
||||||
|
|
||||||
/** Return the search strategy used to compare these vectors */
|
/** Return the similarity function used to compare these vectors */
|
||||||
VectorValues.SearchStrategy searchStrategy();
|
VectorValues.SimilarityFunction similarityFunction();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the vector value indexed at the given ordinal. The provided floating point array may be
|
* Return the vector value indexed at the given ordinal. The provided floating point array may be
|
||||||
|
@ -722,7 +722,7 @@ final class ReadersAndUpdates {
|
|||||||
fi.getPointIndexDimensionCount(),
|
fi.getPointIndexDimensionCount(),
|
||||||
fi.getPointNumBytes(),
|
fi.getPointNumBytes(),
|
||||||
fi.getVectorDimension(),
|
fi.getVectorDimension(),
|
||||||
fi.getVectorSearchStrategy(),
|
fi.getVectorSimilarityFunction(),
|
||||||
fi.isSoftDeletesField());
|
fi.isSoftDeletesField());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,8 +51,8 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||||||
*/
|
*/
|
||||||
public abstract int size();
|
public abstract int size();
|
||||||
|
|
||||||
/** Return the search strategy used to compare these vectors */
|
/** Return the similarity function used to compare these vectors */
|
||||||
public abstract SearchStrategy searchStrategy();
|
public abstract SimilarityFunction similarityFunction();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the vector value for the current document ID. It is illegal to call this method when the
|
* Return the vector value for the current document ID. It is illegal to call this method when the
|
||||||
@ -76,35 +76,36 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search strategy. This is a label describing the method used during indexing and searching of
|
* Vector similarity function; used in search to return top K most similar vectors to a target
|
||||||
* the vectors in order to determine the nearest neighbors.
|
* vector. This is a label describing the method used during indexing and searching of the vectors
|
||||||
|
* in order to determine the nearest neighbors.
|
||||||
*/
|
*/
|
||||||
public enum SearchStrategy {
|
public enum SimilarityFunction {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* No search strategy is provided. Note: {@link VectorReader#search(String, float[], int, int)}
|
* No similarity function is provided. Note: {@link VectorReader#search(float[], int, int)} is
|
||||||
* is not supported for fields specifying this strategy.
|
* not supported for fields specifying this.
|
||||||
*/
|
*/
|
||||||
NONE,
|
NONE,
|
||||||
|
|
||||||
/** HNSW graph built using Euclidean distance */
|
/** HNSW graph built using Euclidean distance */
|
||||||
EUCLIDEAN_HNSW(true),
|
EUCLIDEAN(true),
|
||||||
|
|
||||||
/** HNSW graph buit using dot product */
|
/** HNSW graph buit using dot product */
|
||||||
DOT_PRODUCT_HNSW;
|
DOT_PRODUCT;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If true, the scores associated with vector comparisons in this strategy are in reverse order;
|
* If true, the scores associated with vector comparisons are in reverse order; that is, lower
|
||||||
* that is, lower scores represent more similar vectors. Otherwise, if false, higher scores
|
* scores represent more similar vectors. Otherwise, if false, higher scores represent more
|
||||||
* represent more similar vectors.
|
* similar vectors.
|
||||||
*/
|
*/
|
||||||
public final boolean reversed;
|
public final boolean reversed;
|
||||||
|
|
||||||
SearchStrategy(boolean reversed) {
|
SimilarityFunction(boolean reversed) {
|
||||||
this.reversed = reversed;
|
this.reversed = reversed;
|
||||||
}
|
}
|
||||||
|
|
||||||
SearchStrategy() {
|
SimilarityFunction() {
|
||||||
reversed = false;
|
reversed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,25 +114,25 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||||||
*
|
*
|
||||||
* @param v1 a vector
|
* @param v1 a vector
|
||||||
* @param v2 another vector, of the same dimension
|
* @param v2 another vector, of the same dimension
|
||||||
* @return the value of the strategy's score function applied to the two vectors
|
* @return the value of the similarity function applied to the two vectors
|
||||||
*/
|
*/
|
||||||
public float compare(float[] v1, float[] v2) {
|
public float compare(float[] v1, float[] v2) {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case EUCLIDEAN_HNSW:
|
case EUCLIDEAN:
|
||||||
return squareDistance(v1, v2);
|
return squareDistance(v1, v2);
|
||||||
case DOT_PRODUCT_HNSW:
|
case DOT_PRODUCT:
|
||||||
return dotProduct(v1, v2);
|
return dotProduct(v1, v2);
|
||||||
case NONE:
|
case NONE:
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Incomparable search strategy: " + this);
|
throw new IllegalStateException("Incomparable similarity function: " + this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return true if vectors indexed using this strategy will be indexed using an HNSW graph */
|
/** Return true if vectors indexed using this similarity will be indexed using an HNSW graph */
|
||||||
public boolean isHnsw() {
|
public boolean isHnsw() {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case EUCLIDEAN_HNSW:
|
case EUCLIDEAN:
|
||||||
case DOT_PRODUCT_HNSW:
|
case DOT_PRODUCT:
|
||||||
return true;
|
return true;
|
||||||
case NONE:
|
case NONE:
|
||||||
default:
|
default:
|
||||||
@ -158,8 +159,8 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return SearchStrategy.NONE;
|
return SimilarityFunction.NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -29,7 +29,11 @@ import org.apache.lucene.util.BytesRef;
|
|||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/** Buffers up pending vector value(s) per doc, then flushes when segment flushes. */
|
/**
|
||||||
|
* Buffers up pending vector value(s) per doc, then flushes when segment flushes.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
class VectorValuesWriter {
|
class VectorValuesWriter {
|
||||||
|
|
||||||
private final FieldInfo fieldInfo;
|
private final FieldInfo fieldInfo;
|
||||||
@ -108,7 +112,7 @@ class VectorValuesWriter {
|
|||||||
docsWithField,
|
docsWithField,
|
||||||
vectors,
|
vectors,
|
||||||
fieldInfo.getVectorDimension(),
|
fieldInfo.getVectorDimension(),
|
||||||
fieldInfo.getVectorSearchStrategy());
|
fieldInfo.getVectorSimilarityFunction());
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
vectorWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
|
vectorWriter.writeField(fieldInfo, new SortingVectorValues(vectorValues, sortMap));
|
||||||
} else {
|
} else {
|
||||||
@ -186,8 +190,8 @@ class VectorValuesWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return delegate.searchStrategy();
|
return delegate.similarityFunction();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -220,8 +224,8 @@ class VectorValuesWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return delegateRA.searchStrategy();
|
return delegateRA.similarityFunction();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -244,7 +248,7 @@ class VectorValuesWriter {
|
|||||||
|
|
||||||
// These are always the vectors of a VectorValuesWriter, which are copied when added to it
|
// These are always the vectors of a VectorValuesWriter, which are copied when added to it
|
||||||
final List<float[]> vectors;
|
final List<float[]> vectors;
|
||||||
final SearchStrategy searchStrategy;
|
final SimilarityFunction similarityFunction;
|
||||||
final int dimension;
|
final int dimension;
|
||||||
|
|
||||||
final ByteBuffer buffer;
|
final ByteBuffer buffer;
|
||||||
@ -259,11 +263,11 @@ class VectorValuesWriter {
|
|||||||
DocsWithFieldSet docsWithField,
|
DocsWithFieldSet docsWithField,
|
||||||
List<float[]> vectors,
|
List<float[]> vectors,
|
||||||
int dimension,
|
int dimension,
|
||||||
SearchStrategy searchStrategy) {
|
SimilarityFunction similarityFunction) {
|
||||||
this.docsWithField = docsWithField;
|
this.docsWithField = docsWithField;
|
||||||
this.vectors = vectors;
|
this.vectors = vectors;
|
||||||
this.dimension = dimension;
|
this.dimension = dimension;
|
||||||
this.searchStrategy = searchStrategy;
|
this.similarityFunction = similarityFunction;
|
||||||
buffer = ByteBuffer.allocate(dimension * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
buffer = ByteBuffer.allocate(dimension * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||||
binaryValue = new BytesRef(buffer.array());
|
binaryValue = new BytesRef(buffer.array());
|
||||||
raBuffer = ByteBuffer.allocate(dimension * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
raBuffer = ByteBuffer.allocate(dimension * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||||
@ -273,7 +277,7 @@ class VectorValuesWriter {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RandomAccessVectorValues randomAccess() {
|
public RandomAccessVectorValues randomAccess() {
|
||||||
return new BufferedVectorValues(docsWithField, vectors, dimension, searchStrategy);
|
return new BufferedVectorValues(docsWithField, vectors, dimension, similarityFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -287,8 +291,8 @@ class VectorValuesWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return searchStrategy;
|
return similarityFunction;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -99,11 +99,11 @@ public final class HnswGraph extends KnnGraphValues {
|
|||||||
KnnGraphValues graphValues,
|
KnnGraphValues graphValues,
|
||||||
Random random)
|
Random random)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
VectorValues.SearchStrategy searchStrategy = vectors.searchStrategy();
|
VectorValues.SimilarityFunction similarityFunction = vectors.similarityFunction();
|
||||||
int size = graphValues.size();
|
int size = graphValues.size();
|
||||||
|
|
||||||
// MIN heap, holding the top results
|
// MIN heap, holding the top results
|
||||||
NeighborQueue results = new NeighborQueue(numSeed, searchStrategy.reversed);
|
NeighborQueue results = new NeighborQueue(numSeed, similarityFunction.reversed);
|
||||||
|
|
||||||
// set of ordinals that have been visited by search on this layer, used to avoid backtracking
|
// set of ordinals that have been visited by search on this layer, used to avoid backtracking
|
||||||
SparseFixedBitSet visited = new SparseFixedBitSet(size);
|
SparseFixedBitSet visited = new SparseFixedBitSet(size);
|
||||||
@ -114,17 +114,17 @@ public final class HnswGraph extends KnnGraphValues {
|
|||||||
if (visited.get(entryPoint) == false) {
|
if (visited.get(entryPoint) == false) {
|
||||||
visited.set(entryPoint);
|
visited.set(entryPoint);
|
||||||
// explore the topK starting points of some random numSeed probes
|
// explore the topK starting points of some random numSeed probes
|
||||||
results.add(entryPoint, searchStrategy.compare(query, vectors.vectorValue(entryPoint)));
|
results.add(entryPoint, similarityFunction.compare(query, vectors.vectorValue(entryPoint)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MAX heap, from which to pull the candidate nodes
|
// MAX heap, from which to pull the candidate nodes
|
||||||
NeighborQueue candidates = results.copy(!searchStrategy.reversed);
|
NeighborQueue candidates = results.copy(!similarityFunction.reversed);
|
||||||
|
|
||||||
// Set the bound to the worst current result and below reject any newly-generated candidates
|
// Set the bound to the worst current result and below reject any newly-generated candidates
|
||||||
// failing
|
// failing
|
||||||
// to exceed this bound
|
// to exceed this bound
|
||||||
BoundsChecker bound = BoundsChecker.create(searchStrategy.reversed);
|
BoundsChecker bound = BoundsChecker.create(similarityFunction.reversed);
|
||||||
bound.set(results.topScore());
|
bound.set(results.topScore());
|
||||||
while (candidates.size() > 0) {
|
while (candidates.size() > 0) {
|
||||||
// get the best candidate (closest or best scoring)
|
// get the best candidate (closest or best scoring)
|
||||||
@ -143,7 +143,7 @@ public final class HnswGraph extends KnnGraphValues {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
visited.set(friendOrd);
|
visited.set(friendOrd);
|
||||||
float score = searchStrategy.compare(query, vectors.vectorValue(friendOrd));
|
float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
|
||||||
if (results.insertWithOverflow(friendOrd, score)) {
|
if (results.insertWithOverflow(friendOrd, score)) {
|
||||||
candidates.add(friendOrd, score);
|
candidates.add(friendOrd, score);
|
||||||
bound.set(results.topScore());
|
bound.set(results.topScore());
|
||||||
|
@ -54,7 +54,7 @@ public final class HnswGraphBuilder {
|
|||||||
private final int beamWidth;
|
private final int beamWidth;
|
||||||
private final NeighborArray scratch;
|
private final NeighborArray scratch;
|
||||||
|
|
||||||
private final VectorValues.SearchStrategy searchStrategy;
|
private final VectorValues.SimilarityFunction similarityFunction;
|
||||||
private final RandomAccessVectorValues vectorValues;
|
private final RandomAccessVectorValues vectorValues;
|
||||||
private final Random random;
|
private final Random random;
|
||||||
private final BoundsChecker bound;
|
private final BoundsChecker bound;
|
||||||
@ -87,8 +87,8 @@ public final class HnswGraphBuilder {
|
|||||||
RandomAccessVectorValuesProducer vectors, int maxConn, int beamWidth, long seed) {
|
RandomAccessVectorValuesProducer vectors, int maxConn, int beamWidth, long seed) {
|
||||||
vectorValues = vectors.randomAccess();
|
vectorValues = vectors.randomAccess();
|
||||||
buildVectors = vectors.randomAccess();
|
buildVectors = vectors.randomAccess();
|
||||||
searchStrategy = vectorValues.searchStrategy();
|
similarityFunction = vectorValues.similarityFunction();
|
||||||
if (searchStrategy == VectorValues.SearchStrategy.NONE) {
|
if (similarityFunction == VectorValues.SimilarityFunction.NONE) {
|
||||||
throw new IllegalStateException("No distance function");
|
throw new IllegalStateException("No distance function");
|
||||||
}
|
}
|
||||||
if (maxConn <= 0) {
|
if (maxConn <= 0) {
|
||||||
@ -100,7 +100,7 @@ public final class HnswGraphBuilder {
|
|||||||
this.maxConn = maxConn;
|
this.maxConn = maxConn;
|
||||||
this.beamWidth = beamWidth;
|
this.beamWidth = beamWidth;
|
||||||
this.hnsw = new HnswGraph(maxConn);
|
this.hnsw = new HnswGraph(maxConn);
|
||||||
bound = BoundsChecker.create(searchStrategy.reversed);
|
bound = BoundsChecker.create(similarityFunction.reversed);
|
||||||
random = new Random(seed);
|
random = new Random(seed);
|
||||||
scratch = new NeighborArray(Math.max(beamWidth, maxConn + 1));
|
scratch = new NeighborArray(Math.max(beamWidth, maxConn + 1));
|
||||||
}
|
}
|
||||||
@ -232,7 +232,7 @@ public final class HnswGraphBuilder {
|
|||||||
bound.set(score);
|
bound.set(score);
|
||||||
for (int i = 0; i < neighbors.size(); i++) {
|
for (int i = 0; i < neighbors.size(); i++) {
|
||||||
float diversityCheck =
|
float diversityCheck =
|
||||||
searchStrategy.compare(candidate, vectorValues.vectorValue(neighbors.node[i]));
|
similarityFunction.compare(candidate, vectorValues.vectorValue(neighbors.node[i]));
|
||||||
if (bound.check(diversityCheck) == false) {
|
if (bound.check(diversityCheck) == false) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -269,7 +269,7 @@ public final class HnswGraphBuilder {
|
|||||||
float[] nbrVector = vectorValues.vectorValue(nbrNode);
|
float[] nbrVector = vectorValues.vectorValue(nbrNode);
|
||||||
for (int j = maxConn; j > i; j--) {
|
for (int j = maxConn; j > i; j--) {
|
||||||
float diversityCheck =
|
float diversityCheck =
|
||||||
searchStrategy.compare(nbrVector, buildVectors.vectorValue(neighbors.node[j]));
|
similarityFunction.compare(nbrVector, buildVectors.vectorValue(neighbors.node[j]));
|
||||||
if (bound.check(diversityCheck) == false) {
|
if (bound.check(diversityCheck) == false) {
|
||||||
// node j is too similar to node i given its score relative to the base node
|
// node j is too similar to node i given its score relative to the base node
|
||||||
// replace it with the new node, which is at [maxConn]
|
// replace it with the new node, which is at [maxConn]
|
||||||
|
@ -87,16 +87,16 @@ public class TestPerFieldConsistency extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static Field randomVectorField(Random random, String fieldName) {
|
private static Field randomVectorField(Random random, String fieldName) {
|
||||||
VectorValues.SearchStrategy searchStrategy =
|
VectorValues.SimilarityFunction similarityFunction =
|
||||||
RandomPicks.randomFrom(random, VectorValues.SearchStrategy.values());
|
RandomPicks.randomFrom(random, VectorValues.SimilarityFunction.values());
|
||||||
while (searchStrategy == VectorValues.SearchStrategy.NONE) {
|
while (similarityFunction == VectorValues.SimilarityFunction.NONE) {
|
||||||
searchStrategy = RandomPicks.randomFrom(random, VectorValues.SearchStrategy.values());
|
similarityFunction = RandomPicks.randomFrom(random, VectorValues.SimilarityFunction.values());
|
||||||
}
|
}
|
||||||
float[] values = new float[randomIntBetween(1, 10)];
|
float[] values = new float[randomIntBetween(1, 10)];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
values[i] = randomFloat();
|
values[i] = randomFloat();
|
||||||
}
|
}
|
||||||
return new VectorField(fieldName, values, searchStrategy);
|
return new VectorField(fieldName, values, similarityFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Field[] randomFieldsWithTheSameName(String fieldName) {
|
private static Field[] randomFieldsWithTheSameName(String fieldName) {
|
||||||
|
@ -112,7 +112,7 @@ public class TestCodecs extends LuceneTestCase {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false));
|
false));
|
||||||
}
|
}
|
||||||
this.terms = terms;
|
this.terms = terms;
|
||||||
|
@ -260,7 +260,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false));
|
false));
|
||||||
}
|
}
|
||||||
int idx =
|
int idx =
|
||||||
@ -279,7 +279,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false));
|
false));
|
||||||
assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
|
assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
|
||||||
|
|
||||||
@ -300,7 +300,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false));
|
false));
|
||||||
assertEquals("Field numbers should reset after clear()", 0, idx);
|
assertEquals("Field numbers should reset after clear()", 0, idx);
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
field.name().equals(softDeletesFieldName)));
|
field.name().equals(softDeletesFieldName)));
|
||||||
}
|
}
|
||||||
dir = newDirectory();
|
dir = newDirectory();
|
||||||
|
@ -113,8 +113,8 @@ public class TestIndexableField extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorValues.SearchStrategy vectorSearchStrategy() {
|
public VectorValues.SimilarityFunction vectorSimilarityFunction() {
|
||||||
return VectorValues.SearchStrategy.NONE;
|
return VectorValues.SimilarityFunction.NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -34,7 +34,7 @@ import org.apache.lucene.document.FieldType;
|
|||||||
import org.apache.lucene.document.SortedDocValuesField;
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.VectorField;
|
import org.apache.lucene.document.VectorField;
|
||||||
import org.apache.lucene.index.VectorValues.SearchStrategy;
|
import org.apache.lucene.index.VectorValues.SimilarityFunction;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
@ -53,7 +53,7 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||||||
|
|
||||||
private static int maxConn = HnswGraphBuilder.DEFAULT_MAX_CONN;
|
private static int maxConn = HnswGraphBuilder.DEFAULT_MAX_CONN;
|
||||||
|
|
||||||
private SearchStrategy searchStrategy;
|
private SimilarityFunction similarityFunction;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
@ -61,8 +61,8 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
maxConn = random().nextInt(256) + 3;
|
maxConn = random().nextInt(256) + 3;
|
||||||
}
|
}
|
||||||
int strategy = random().nextInt(SearchStrategy.values().length - 1) + 1;
|
int similarity = random().nextInt(SimilarityFunction.values().length - 1) + 1;
|
||||||
searchStrategy = SearchStrategy.values()[strategy];
|
similarityFunction = SimilarityFunction.values()[similarity];
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
@ -212,7 +212,7 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||||||
/** Verify that searching does something reasonable */
|
/** Verify that searching does something reasonable */
|
||||||
public void testSearch() throws Exception {
|
public void testSearch() throws Exception {
|
||||||
// We can't use dot product here since the vectors are laid out on a grid, not a sphere.
|
// We can't use dot product here since the vectors are laid out on a grid, not a sphere.
|
||||||
searchStrategy = SearchStrategy.EUCLIDEAN_HNSW;
|
similarityFunction = SimilarityFunction.EUCLIDEAN;
|
||||||
IndexWriterConfig config = newIndexWriterConfig();
|
IndexWriterConfig config = newIndexWriterConfig();
|
||||||
config.setCodec(Codec.forName("Lucene90")); // test is not compatible with simpletext
|
config.setCodec(Codec.forName("Lucene90")); // test is not compatible with simpletext
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
@ -434,16 +434,16 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void add(IndexWriter iw, int id, float[] vector) throws IOException {
|
private void add(IndexWriter iw, int id, float[] vector) throws IOException {
|
||||||
add(iw, id, vector, searchStrategy);
|
add(iw, id, vector, similarityFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void add(IndexWriter iw, int id, float[] vector, SearchStrategy searchStrategy)
|
private void add(IndexWriter iw, int id, float[] vector, SimilarityFunction similarityFunction)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
if (vector != null) {
|
if (vector != null) {
|
||||||
FieldType fieldType =
|
FieldType fieldType =
|
||||||
VectorField.createHnswType(
|
VectorField.createHnswType(
|
||||||
vector.length, searchStrategy, maxConn, HnswGraphBuilder.DEFAULT_BEAM_WIDTH);
|
vector.length, similarityFunction, maxConn, HnswGraphBuilder.DEFAULT_BEAM_WIDTH);
|
||||||
doc.add(new VectorField(KNN_GRAPH_FIELD, vector, fieldType));
|
doc.add(new VectorField(KNN_GRAPH_FIELD, vector, fieldType));
|
||||||
}
|
}
|
||||||
String idString = Integer.toString(id);
|
String idString = Integer.toString(id);
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
import static org.apache.lucene.index.VectorValues.SearchStrategy.NONE;
|
import static org.apache.lucene.index.VectorValues.SimilarityFunction.NONE;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -70,8 +70,8 @@ public class KnnGraphTester {
|
|||||||
|
|
||||||
private static final String KNN_FIELD = "knn";
|
private static final String KNN_FIELD = "knn";
|
||||||
private static final String ID_FIELD = "id";
|
private static final String ID_FIELD = "id";
|
||||||
private static final VectorValues.SearchStrategy SEARCH_STRATEGY =
|
private static final VectorValues.SimilarityFunction SIMILARITY_FUNCTION =
|
||||||
VectorValues.SearchStrategy.DOT_PRODUCT_HNSW;
|
VectorValues.SimilarityFunction.DOT_PRODUCT;
|
||||||
|
|
||||||
private int numDocs;
|
private int numDocs;
|
||||||
private int dim;
|
private int dim;
|
||||||
@ -542,10 +542,10 @@ public class KnnGraphTester {
|
|||||||
.order(ByteOrder.LITTLE_ENDIAN)
|
.order(ByteOrder.LITTLE_ENDIAN)
|
||||||
.asFloatBuffer();
|
.asFloatBuffer();
|
||||||
offset += blockSize;
|
offset += blockSize;
|
||||||
NeighborQueue queue = new NeighborQueue(topK, SEARCH_STRATEGY.reversed);
|
NeighborQueue queue = new NeighborQueue(topK, SIMILARITY_FUNCTION.reversed);
|
||||||
for (; j < numDocs && vectors.hasRemaining(); j++) {
|
for (; j < numDocs && vectors.hasRemaining(); j++) {
|
||||||
vectors.get(vector);
|
vectors.get(vector);
|
||||||
float d = SEARCH_STRATEGY.compare(query, vector);
|
float d = SIMILARITY_FUNCTION.compare(query, vector);
|
||||||
queue.insertWithOverflow(j, d);
|
queue.insertWithOverflow(j, d);
|
||||||
}
|
}
|
||||||
result[i] = new int[topK];
|
result[i] = new int[topK];
|
||||||
@ -572,7 +572,7 @@ public class KnnGraphTester {
|
|||||||
|
|
||||||
FieldType fieldType =
|
FieldType fieldType =
|
||||||
VectorField.createHnswType(
|
VectorField.createHnswType(
|
||||||
dim, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW, maxConn, beamWidth);
|
dim, VectorValues.SimilarityFunction.DOT_PRODUCT, maxConn, beamWidth);
|
||||||
if (quiet == false) {
|
if (quiet == false) {
|
||||||
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
|
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
|
||||||
System.out.println("creating index in " + indexPath);
|
System.out.println("creating index in " + indexPath);
|
||||||
@ -667,8 +667,8 @@ public class KnnGraphTester {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VectorValues.SearchStrategy searchStrategy() {
|
public VectorValues.SimilarityFunction similarityFunction() {
|
||||||
return SEARCH_STRATEGY;
|
return SIMILARITY_FUNCTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -30,13 +30,13 @@ class MockVectorValues extends VectorValues
|
|||||||
protected final int dimension;
|
protected final int dimension;
|
||||||
protected final float[][] denseValues;
|
protected final float[][] denseValues;
|
||||||
protected final float[][] values;
|
protected final float[][] values;
|
||||||
protected final SearchStrategy searchStrategy;
|
protected final SimilarityFunction similarityFunction;
|
||||||
private final int numVectors;
|
private final int numVectors;
|
||||||
|
|
||||||
private int pos = -1;
|
private int pos = -1;
|
||||||
|
|
||||||
MockVectorValues(SearchStrategy searchStrategy, float[][] values) {
|
MockVectorValues(SimilarityFunction similarityFunction, float[][] values) {
|
||||||
this.searchStrategy = searchStrategy;
|
this.similarityFunction = similarityFunction;
|
||||||
this.dimension = values[0].length;
|
this.dimension = values[0].length;
|
||||||
this.values = values;
|
this.values = values;
|
||||||
int maxDoc = values.length;
|
int maxDoc = values.length;
|
||||||
@ -52,7 +52,7 @@ class MockVectorValues extends VectorValues
|
|||||||
}
|
}
|
||||||
|
|
||||||
public MockVectorValues copy() {
|
public MockVectorValues copy() {
|
||||||
return new MockVectorValues(searchStrategy, values);
|
return new MockVectorValues(similarityFunction, values);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -61,8 +61,8 @@ class MockVectorValues extends VectorValues
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return searchStrategy;
|
return similarityFunction;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -73,7 +73,7 @@ public class TestHnsw extends LuceneTestCase {
|
|||||||
indexedDoc++;
|
indexedDoc++;
|
||||||
}
|
}
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("field", v2.vectorValue(), v2.searchStrategy));
|
doc.add(new VectorField("field", v2.vectorValue(), v2.similarityFunction));
|
||||||
doc.add(new StoredField("id", v2.docID()));
|
doc.add(new StoredField("id", v2.docID()));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
nVec++;
|
nVec++;
|
||||||
@ -83,7 +83,7 @@ public class TestHnsw extends LuceneTestCase {
|
|||||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
for (LeafReaderContext ctx : reader.leaves()) {
|
for (LeafReaderContext ctx : reader.leaves()) {
|
||||||
VectorValues values = ctx.reader().getVectorValues("field");
|
VectorValues values = ctx.reader().getVectorValues("field");
|
||||||
assertEquals(vectors.searchStrategy, values.searchStrategy());
|
assertEquals(vectors.similarityFunction, values.similarityFunction());
|
||||||
assertEquals(dim, values.dimension());
|
assertEquals(dim, values.dimension());
|
||||||
assertEquals(nVec, values.size());
|
assertEquals(nVec, values.size());
|
||||||
assertEquals(indexedDoc, ctx.reader().maxDoc());
|
assertEquals(indexedDoc, ctx.reader().maxDoc());
|
||||||
@ -164,7 +164,7 @@ public class TestHnsw extends LuceneTestCase {
|
|||||||
// Some carefully checked test cases with simple 2d vectors on the unit circle:
|
// Some carefully checked test cases with simple 2d vectors on the unit circle:
|
||||||
MockVectorValues vectors =
|
MockVectorValues vectors =
|
||||||
new MockVectorValues(
|
new MockVectorValues(
|
||||||
VectorValues.SearchStrategy.DOT_PRODUCT_HNSW,
|
VectorValues.SimilarityFunction.DOT_PRODUCT,
|
||||||
new float[][] {
|
new float[][] {
|
||||||
unitVector2d(0.5),
|
unitVector2d(0.5),
|
||||||
unitVector2d(0.75),
|
unitVector2d(0.75),
|
||||||
@ -236,12 +236,12 @@ public class TestHnsw extends LuceneTestCase {
|
|||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
float[] query = randomVector(random(), dim);
|
float[] query = randomVector(random(), dim);
|
||||||
NeighborQueue actual = HnswGraph.search(query, topK, 100, vectors, hnsw, random());
|
NeighborQueue actual = HnswGraph.search(query, topK, 100, vectors, hnsw, random());
|
||||||
NeighborQueue expected = new NeighborQueue(topK, vectors.searchStrategy.reversed);
|
NeighborQueue expected = new NeighborQueue(topK, vectors.similarityFunction.reversed);
|
||||||
for (int j = 0; j < size; j++) {
|
for (int j = 0; j < size; j++) {
|
||||||
float[] v = vectors.vectorValue(j);
|
float[] v = vectors.vectorValue(j);
|
||||||
if (v != null) {
|
if (v != null) {
|
||||||
expected.insertWithOverflow(
|
expected.insertWithOverflow(
|
||||||
j, vectors.searchStrategy.compare(query, vectors.vectorValue(j)));
|
j, vectors.similarityFunction.compare(query, vectors.vectorValue(j)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assertEquals(topK, actual.size());
|
assertEquals(topK, actual.size());
|
||||||
@ -288,8 +288,8 @@ public class TestHnsw extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchStrategy searchStrategy() {
|
public SimilarityFunction similarityFunction() {
|
||||||
return SearchStrategy.DOT_PRODUCT_HNSW;
|
return SimilarityFunction.DOT_PRODUCT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -393,12 +393,12 @@ public class TestHnsw extends LuceneTestCase {
|
|||||||
|
|
||||||
RandomVectorValues(int size, int dimension, Random random) {
|
RandomVectorValues(int size, int dimension, Random random) {
|
||||||
super(
|
super(
|
||||||
SearchStrategy.values()[random.nextInt(SearchStrategy.values().length - 1) + 1],
|
SimilarityFunction.values()[random.nextInt(SimilarityFunction.values().length - 1) + 1],
|
||||||
createRandomVectors(size, dimension, random));
|
createRandomVectors(size, dimension, random));
|
||||||
}
|
}
|
||||||
|
|
||||||
RandomVectorValues(RandomVectorValues other) {
|
RandomVectorValues(RandomVectorValues other) {
|
||||||
super(other.searchStrategy, other.values);
|
super(other.similarityFunction, other.values);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -107,7 +107,5 @@ public class TestNeighbors extends LuceneTestCase {
|
|||||||
|
|
||||||
public void testToString() {
|
public void testToString() {
|
||||||
assertEquals("Neighbors[0]", new NeighborQueue(2, false).toString());
|
assertEquals("Neighbors[0]", new NeighborQueue(2, false).toString());
|
||||||
// assertEquals("NeighborArray[0]", new NeighborArray(2,
|
|
||||||
// VectorValues.SearchStrategy.NONE).toString());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ public class TermVectorLeafReader extends LeafReader {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false);
|
false);
|
||||||
fieldInfos = new FieldInfos(new FieldInfo[] {fieldInfo});
|
fieldInfos = new FieldInfos(new FieldInfo[] {fieldInfo});
|
||||||
}
|
}
|
||||||
|
@ -512,7 +512,7 @@ public class MemoryIndex {
|
|||||||
fieldType.pointIndexDimensionCount(),
|
fieldType.pointIndexDimensionCount(),
|
||||||
fieldType.pointNumBytes(),
|
fieldType.pointNumBytes(),
|
||||||
fieldType.vectorDimension(),
|
fieldType.vectorDimension(),
|
||||||
fieldType.vectorSearchStrategy(),
|
fieldType.vectorSimilarityFunction(),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -544,7 +544,7 @@ public class MemoryIndex {
|
|||||||
info.fieldInfo.getPointIndexDimensionCount(),
|
info.fieldInfo.getPointIndexDimensionCount(),
|
||||||
info.fieldInfo.getPointNumBytes(),
|
info.fieldInfo.getPointNumBytes(),
|
||||||
info.fieldInfo.getVectorDimension(),
|
info.fieldInfo.getVectorDimension(),
|
||||||
info.fieldInfo.getVectorSearchStrategy(),
|
info.fieldInfo.getVectorSimilarityFunction(),
|
||||||
info.fieldInfo.isSoftDeletesField());
|
info.fieldInfo.isSoftDeletesField());
|
||||||
} else if (existingDocValuesType != docValuesType) {
|
} else if (existingDocValuesType != docValuesType) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
|
@ -295,7 +295,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||||||
fieldType.pointIndexDimensionCount(),
|
fieldType.pointIndexDimensionCount(),
|
||||||
fieldType.pointNumBytes(),
|
fieldType.pointNumBytes(),
|
||||||
fieldType.vectorDimension(),
|
fieldType.vectorDimension(),
|
||||||
fieldType.vectorSearchStrategy(),
|
fieldType.vectorSimilarityFunction(),
|
||||||
field.equals(softDeletesField));
|
field.equals(softDeletesField));
|
||||||
addAttributes(fi);
|
addAttributes(fi);
|
||||||
builder.add(fi);
|
builder.add(fi);
|
||||||
@ -341,9 +341,9 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||||||
|
|
||||||
if (r.nextBoolean()) {
|
if (r.nextBoolean()) {
|
||||||
int dimension = 1 + r.nextInt(VectorValues.MAX_DIMENSIONS);
|
int dimension = 1 + r.nextInt(VectorValues.MAX_DIMENSIONS);
|
||||||
VectorValues.SearchStrategy searchStrategy =
|
VectorValues.SimilarityFunction similarityFunction =
|
||||||
RandomPicks.randomFrom(r, VectorValues.SearchStrategy.values());
|
RandomPicks.randomFrom(r, VectorValues.SimilarityFunction.values());
|
||||||
type.setVectorDimensionsAndSearchStrategy(dimension, searchStrategy);
|
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
return type;
|
return type;
|
||||||
@ -412,7 +412,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -335,7 +335,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
|||||||
proto.getPointIndexDimensionCount(),
|
proto.getPointIndexDimensionCount(),
|
||||||
proto.getPointNumBytes(),
|
proto.getPointNumBytes(),
|
||||||
proto.getVectorDimension(),
|
proto.getVectorDimension(),
|
||||||
proto.getVectorSearchStrategy(),
|
proto.getVectorSimilarityFunction(),
|
||||||
proto.isSoftDeletesField());
|
proto.isSoftDeletesField());
|
||||||
|
|
||||||
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] {field});
|
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] {field});
|
||||||
|
@ -49,7 +49,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void addRandomFields(Document doc) {
|
protected void addRandomFields(Document doc) {
|
||||||
doc.add(new VectorField("v2", randomVector(30), VectorValues.SearchStrategy.NONE));
|
doc.add(new VectorField("v2", randomVector(30), VectorValues.SimilarityFunction.NONE));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFieldConstructor() {
|
public void testFieldConstructor() {
|
||||||
@ -57,7 +57,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
VectorField field = new VectorField("f", v);
|
VectorField field = new VectorField("f", v);
|
||||||
assertEquals(1, field.fieldType().vectorDimension());
|
assertEquals(1, field.fieldType().vectorDimension());
|
||||||
assertEquals(
|
assertEquals(
|
||||||
VectorValues.SearchStrategy.EUCLIDEAN_HNSW, field.fieldType().vectorSearchStrategy());
|
VectorValues.SimilarityFunction.EUCLIDEAN, field.fieldType().vectorSimilarityFunction());
|
||||||
assertSame(v, field.vectorValue());
|
assertSame(v, field.vectorValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", null));
|
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", null));
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> new VectorField("f", new float[1], (VectorValues.SearchStrategy) null));
|
() -> new VectorField("f", new float[1], (VectorValues.SimilarityFunction) null));
|
||||||
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", new float[0]));
|
expectThrows(IllegalArgumentException.class, () -> new VectorField("f", new float[0]));
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
@ -88,13 +88,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
public void testFieldCreateFieldType() {
|
public void testFieldCreateFieldType() {
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> VectorField.createHnswType(0, VectorValues.SearchStrategy.EUCLIDEAN_HNSW, 16, 16));
|
() -> VectorField.createHnswType(0, VectorValues.SimilarityFunction.EUCLIDEAN, 16, 16));
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() ->
|
() ->
|
||||||
VectorField.createHnswType(
|
VectorField.createHnswType(
|
||||||
VectorValues.MAX_DIMENSIONS + 1,
|
VectorValues.MAX_DIMENSIONS + 1,
|
||||||
VectorValues.SearchStrategy.EUCLIDEAN_HNSW,
|
VectorValues.SimilarityFunction.EUCLIDEAN,
|
||||||
16,
|
16,
|
||||||
16));
|
16));
|
||||||
expectThrows(
|
expectThrows(
|
||||||
@ -104,7 +104,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() ->
|
() ->
|
||||||
VectorField.createHnswType(
|
VectorField.createHnswType(
|
||||||
VectorValues.MAX_DIMENSIONS + 1, VectorValues.SearchStrategy.NONE, 16, 16));
|
VectorValues.MAX_DIMENSIONS + 1, VectorValues.SimilarityFunction.NONE, 16, 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Illegal schema change tests:
|
// Illegal schema change tests:
|
||||||
@ -113,11 +113,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[3], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc2.add(new VectorField("f", new float[3], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
@ -129,31 +129,31 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[3], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc2.add(new VectorField("f", new float[3], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
|
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
|
||||||
+ "to inconsistent vector dimension=3, vector search strategy=DOT_PRODUCT_HNSW";
|
+ "to inconsistent vector dimension=3, vector similarity function=DOT_PRODUCT";
|
||||||
assertEquals(errMsg, expected.getMessage());
|
assertEquals(errMsg, expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIllegalSearchStrategyChange() throws Exception {
|
public void testIllegalSimilarityFunctionChange() throws Exception {
|
||||||
// illegal change in the same segment
|
// illegal change in the same segment
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc2.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
@ -165,17 +165,17 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc2.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2));
|
||||||
String errMsg =
|
String errMsg =
|
||||||
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
|
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=EUCLIDEAN_HNSW";
|
+ "to inconsistent vector dimension=4, vector similarity function=EUCLIDEAN";
|
||||||
assertEquals(errMsg, expected.getMessage());
|
assertEquals(errMsg, expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -184,39 +184,39 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[1], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc2.add(new VectorField("f", new float[1], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
|
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
|
||||||
+ "to inconsistent vector dimension=1, vector search strategy=DOT_PRODUCT_HNSW",
|
+ "to inconsistent vector dimension=1, vector similarity function=DOT_PRODUCT",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIllegalSearchStrategyChangeTwoWriters() throws Exception {
|
public void testIllegalSimilarityFunctionChangeTwoWriters() throws Exception {
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc2.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW "
|
"cannot change field \"f\" from vector dimension=4, vector similarity function=DOT_PRODUCT "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=EUCLIDEAN_HNSW",
|
+ "to inconsistent vector dimension=4, vector similarity function=EUCLIDEAN",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -225,7 +225,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
public void testAddIndexesDirectory0() throws Exception {
|
public void testAddIndexesDirectory0() throws Exception {
|
||||||
String fieldName = "field";
|
String fieldName = "field";
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField(fieldName, new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField(fieldName, new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
@ -254,7 +254,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
doc.add(
|
doc.add(
|
||||||
new VectorField(fieldName, new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
new VectorField(fieldName, new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
w2.addIndexes(dir);
|
w2.addIndexes(dir);
|
||||||
@ -274,7 +274,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
String fieldName = "field";
|
String fieldName = "field";
|
||||||
float[] vector = new float[1];
|
float[] vector = new float[1];
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField(fieldName, vector, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField(fieldName, vector, VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
@ -305,41 +305,41 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[5], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[5], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class, () -> w2.addIndexes(new Directory[] {dir}));
|
IllegalArgumentException.class, () -> w2.addIndexes(new Directory[] {dir}));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=5, vector search strategy=DOT_PRODUCT_HNSW "
|
"cannot change field \"f\" from vector dimension=5, vector similarity function=DOT_PRODUCT "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
|
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIllegalSearchStrategyChangeViaAddIndexesDirectory() throws Exception {
|
public void testIllegalSimilarityFunctionChangeViaAddIndexesDirectory() throws Exception {
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir));
|
expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=4, vector search strategy=EUCLIDEAN_HNSW "
|
"cannot change field \"f\" from vector dimension=4, vector similarity function=EUCLIDEAN "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
|
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -350,12 +350,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[5], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[5], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
@ -363,25 +363,25 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> w2.addIndexes(new CodecReader[] {(CodecReader) getOnlyLeafReader(r)}));
|
() -> w2.addIndexes(new CodecReader[] {(CodecReader) getOnlyLeafReader(r)}));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=5, vector search strategy=DOT_PRODUCT_HNSW "
|
"cannot change field \"f\" from vector dimension=5, vector similarity function=DOT_PRODUCT "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
|
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIllegalSearchStrategyChangeViaAddIndexesCodecReader() throws Exception {
|
public void testIllegalSimilarityFunctionChangeViaAddIndexesCodecReader() throws Exception {
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
@ -389,8 +389,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> w2.addIndexes(new CodecReader[] {(CodecReader) getOnlyLeafReader(r)}));
|
() -> w2.addIndexes(new CodecReader[] {(CodecReader) getOnlyLeafReader(r)}));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=4, vector search strategy=EUCLIDEAN_HNSW "
|
"cannot change field \"f\" from vector dimension=4, vector similarity function=EUCLIDEAN "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
|
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -402,43 +402,43 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[5], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[5], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w2, r));
|
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w2, r));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=5, vector search strategy=DOT_PRODUCT_HNSW "
|
"cannot change field \"f\" from vector dimension=5, vector similarity function=DOT_PRODUCT "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
|
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIllegalSearchStrategyChangeViaAddIndexesSlowCodecReader() throws Exception {
|
public void testIllegalSimilarityFunctionChangeViaAddIndexesSlowCodecReader() throws Exception {
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
Directory dir2 = newDirectory()) {
|
Directory dir2 = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
w2.addDocument(doc);
|
w2.addDocument(doc);
|
||||||
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
try (DirectoryReader r = DirectoryReader.open(dir)) {
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w2, r));
|
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w2, r));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"cannot change field \"f\" from vector dimension=4, vector search strategy=EUCLIDEAN_HNSW "
|
"cannot change field \"f\" from vector dimension=4, vector similarity function=EUCLIDEAN "
|
||||||
+ "to inconsistent vector dimension=4, vector search strategy=DOT_PRODUCT_HNSW",
|
+ "to inconsistent vector dimension=4, vector similarity function=DOT_PRODUCT",
|
||||||
expected.getMessage());
|
expected.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -449,8 +449,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
IllegalArgumentException expected =
|
IllegalArgumentException expected =
|
||||||
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
|
expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
@ -470,10 +470,10 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
new VectorField(
|
new VectorField(
|
||||||
"f",
|
"f",
|
||||||
new float[VectorValues.MAX_DIMENSIONS + 1],
|
new float[VectorValues.MAX_DIMENSIONS + 1],
|
||||||
VectorValues.SearchStrategy.DOT_PRODUCT_HNSW)));
|
VectorValues.SimilarityFunction.DOT_PRODUCT)));
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[1], VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc2.add(new VectorField("f", new float[1], VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
w.addDocument(doc2);
|
w.addDocument(doc2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -485,11 +485,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
Exception e =
|
Exception e =
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> doc.add(new VectorField("f", new float[0], VectorValues.SearchStrategy.NONE)));
|
() ->
|
||||||
|
doc.add(
|
||||||
|
new VectorField("f", new float[0], VectorValues.SimilarityFunction.NONE)));
|
||||||
assertEquals("cannot index an empty vector", e.getMessage());
|
assertEquals("cannot index an empty vector", e.getMessage());
|
||||||
|
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField("f", new float[1], VectorValues.SearchStrategy.NONE));
|
doc2.add(new VectorField("f", new float[1], VectorValues.SimilarityFunction.NONE));
|
||||||
w.addDocument(doc2);
|
w.addDocument(doc2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -499,14 +501,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||||
iwc.setCodec(Codec.forName("SimpleText"));
|
iwc.setCodec(Codec.forName("SimpleText"));
|
||||||
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.forceMerge(1);
|
w.forceMerge(1);
|
||||||
}
|
}
|
||||||
@ -520,12 +522,12 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("f", new float[4], VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
doc.add(new VectorField("f", new float[4], VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.forceMerge(1);
|
w.forceMerge(1);
|
||||||
}
|
}
|
||||||
@ -533,7 +535,8 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testInvalidVectorFieldUsage() {
|
public void testInvalidVectorFieldUsage() {
|
||||||
VectorField field = new VectorField("field", new float[2], VectorValues.SearchStrategy.NONE);
|
VectorField field =
|
||||||
|
new VectorField("field", new float[2], VectorValues.SimilarityFunction.NONE);
|
||||||
|
|
||||||
expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14));
|
expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14));
|
||||||
|
|
||||||
@ -548,8 +551,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||||
doc.add(
|
doc.add(
|
||||||
new VectorField(
|
new VectorField("v", new float[] {2, 3, 5}, VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
"v", new float[] {2, 3, 5}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.addDocument(new Document());
|
w.addDocument(new Document());
|
||||||
w.commit();
|
w.commit();
|
||||||
@ -572,14 +574,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||||
doc.add(
|
doc.add(
|
||||||
new VectorField(
|
new VectorField(
|
||||||
"v0", new float[] {2, 3, 5}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
"v0", new float[] {2, 3, 5}, VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(
|
doc.add(
|
||||||
new VectorField(
|
new VectorField(
|
||||||
"v1", new float[] {2, 3, 5}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
"v1", new float[] {2, 3, 5}, VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.forceMerge(1);
|
w.forceMerge(1);
|
||||||
}
|
}
|
||||||
@ -591,13 +593,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
int[] fieldDocCounts = new int[numFields];
|
int[] fieldDocCounts = new int[numFields];
|
||||||
float[] fieldTotals = new float[numFields];
|
float[] fieldTotals = new float[numFields];
|
||||||
int[] fieldDims = new int[numFields];
|
int[] fieldDims = new int[numFields];
|
||||||
VectorValues.SearchStrategy[] fieldSearchStrategies =
|
VectorValues.SimilarityFunction[] fieldSearchStrategies =
|
||||||
new VectorValues.SearchStrategy[numFields];
|
new VectorValues.SimilarityFunction[numFields];
|
||||||
for (int i = 0; i < numFields; i++) {
|
for (int i = 0; i < numFields; i++) {
|
||||||
fieldDims[i] = random().nextInt(20) + 1;
|
fieldDims[i] = random().nextInt(20) + 1;
|
||||||
fieldSearchStrategies[i] =
|
fieldSearchStrategies[i] =
|
||||||
VectorValues.SearchStrategy.values()[
|
VectorValues.SimilarityFunction.values()[
|
||||||
random().nextInt(VectorValues.SearchStrategy.values().length)];
|
random().nextInt(VectorValues.SimilarityFunction.values().length)];
|
||||||
}
|
}
|
||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig())) {
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig())) {
|
||||||
@ -644,15 +646,15 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory();
|
try (Directory dir = newDirectory();
|
||||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc1 = new Document();
|
Document doc1 = new Document();
|
||||||
doc1.add(new VectorField(fieldName, v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc1.add(new VectorField(fieldName, v, VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
v[0] = 1;
|
v[0] = 1;
|
||||||
Document doc2 = new Document();
|
Document doc2 = new Document();
|
||||||
doc2.add(new VectorField(fieldName, v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc2.add(new VectorField(fieldName, v, VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
iw.addDocument(doc1);
|
iw.addDocument(doc1);
|
||||||
iw.addDocument(doc2);
|
iw.addDocument(doc2);
|
||||||
v[0] = 2;
|
v[0] = 2;
|
||||||
Document doc3 = new Document();
|
Document doc3 = new Document();
|
||||||
doc3.add(new VectorField(fieldName, v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc3.add(new VectorField(fieldName, v, VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
iw.addDocument(doc3);
|
iw.addDocument(doc3);
|
||||||
iw.forceMerge(1);
|
iw.forceMerge(1);
|
||||||
try (IndexReader reader = iw.getReader()) {
|
try (IndexReader reader = iw.getReader()) {
|
||||||
@ -707,15 +709,16 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
float[] v = new float[] {1};
|
float[] v = new float[] {1};
|
||||||
doc.add(new VectorField("field1", v, VectorValues.SearchStrategy.EUCLIDEAN_HNSW));
|
doc.add(new VectorField("field1", v, VectorValues.SimilarityFunction.EUCLIDEAN));
|
||||||
doc.add(new VectorField("field2", new float[] {1, 2, 3}, VectorValues.SearchStrategy.NONE));
|
doc.add(
|
||||||
|
new VectorField("field2", new float[] {1, 2, 3}, VectorValues.SimilarityFunction.NONE));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
v[0] = 2;
|
v[0] = 2;
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(
|
doc.add(
|
||||||
new VectorField(
|
new VectorField(
|
||||||
"field3", new float[] {1, 2, 3}, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW));
|
"field3", new float[] {1, 2, 3}, VectorValues.SimilarityFunction.DOT_PRODUCT));
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
iw.forceMerge(1);
|
iw.forceMerge(1);
|
||||||
try (IndexReader reader = iw.getReader()) {
|
try (IndexReader reader = iw.getReader()) {
|
||||||
@ -776,9 +779,9 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
if (random().nextBoolean() && values[i] != null) {
|
if (random().nextBoolean() && values[i] != null) {
|
||||||
// sometimes use a shared scratch array
|
// sometimes use a shared scratch array
|
||||||
System.arraycopy(values[i], 0, scratch, 0, scratch.length);
|
System.arraycopy(values[i], 0, scratch, 0, scratch.length);
|
||||||
add(iw, fieldName, i, scratch, VectorValues.SearchStrategy.NONE);
|
add(iw, fieldName, i, scratch, VectorValues.SimilarityFunction.NONE);
|
||||||
} else {
|
} else {
|
||||||
add(iw, fieldName, i, values[i], VectorValues.SearchStrategy.NONE);
|
add(iw, fieldName, i, values[i], VectorValues.SimilarityFunction.NONE);
|
||||||
}
|
}
|
||||||
if (random().nextInt(10) == 2) {
|
if (random().nextInt(10) == 2) {
|
||||||
// sometimes delete a random document
|
// sometimes delete a random document
|
||||||
@ -826,7 +829,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Index random vectors, sometimes skipping documents, sometimes updating a document, sometimes
|
* Index random vectors, sometimes skipping documents, sometimes updating a document, sometimes
|
||||||
* merging, sometimes sorting the index, using an HNSW search strategy so as to also produce a
|
* merging, sometimes sorting the index, using an HNSW similarity function so as to also produce a
|
||||||
* graph, and verify that the expected values can be read back consistently.
|
* graph, and verify that the expected values can be read back consistently.
|
||||||
*/
|
*/
|
||||||
public void testRandomWithUpdatesAndGraph() throws Exception {
|
public void testRandomWithUpdatesAndGraph() throws Exception {
|
||||||
@ -851,7 +854,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
values[i] = value;
|
values[i] = value;
|
||||||
id2value[id] = value;
|
id2value[id] = value;
|
||||||
id2ord[id] = i;
|
id2ord[id] = i;
|
||||||
add(iw, fieldName, id, value, VectorValues.SearchStrategy.EUCLIDEAN_HNSW);
|
add(iw, fieldName, id, value, VectorValues.SimilarityFunction.EUCLIDEAN);
|
||||||
}
|
}
|
||||||
try (IndexReader reader = iw.getReader()) {
|
try (IndexReader reader = iw.getReader()) {
|
||||||
for (LeafReaderContext ctx : reader.leaves()) {
|
for (LeafReaderContext ctx : reader.leaves()) {
|
||||||
@ -888,14 +891,14 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
String field,
|
String field,
|
||||||
int id,
|
int id,
|
||||||
float[] vector,
|
float[] vector,
|
||||||
VectorValues.SearchStrategy searchStrategy)
|
VectorValues.SimilarityFunction similarityFunction)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
add(iw, field, id, random().nextInt(100), vector, searchStrategy);
|
add(iw, field, id, random().nextInt(100), vector, similarityFunction);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void add(IndexWriter iw, String field, int id, int sortkey, float[] vector)
|
private void add(IndexWriter iw, String field, int id, int sortkey, float[] vector)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
add(iw, field, id, sortkey, vector, VectorValues.SearchStrategy.NONE);
|
add(iw, field, id, sortkey, vector, VectorValues.SimilarityFunction.NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void add(
|
private void add(
|
||||||
@ -904,11 +907,11 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
int id,
|
int id,
|
||||||
int sortkey,
|
int sortkey,
|
||||||
float[] vector,
|
float[] vector,
|
||||||
VectorValues.SearchStrategy searchStrategy)
|
VectorValues.SimilarityFunction similarityFunction)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
if (vector != null) {
|
if (vector != null) {
|
||||||
doc.add(new VectorField(field, vector, searchStrategy));
|
doc.add(new VectorField(field, vector, similarityFunction));
|
||||||
}
|
}
|
||||||
doc.add(new NumericDocValuesField("sortkey", sortkey));
|
doc.add(new NumericDocValuesField("sortkey", sortkey));
|
||||||
String idString = Integer.toString(id);
|
String idString = Integer.toString(id);
|
||||||
@ -930,10 +933,10 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new VectorField("v1", randomVector(3), VectorValues.SearchStrategy.NONE));
|
doc.add(new VectorField("v1", randomVector(3), VectorValues.SimilarityFunction.NONE));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
doc.add(new VectorField("v2", randomVector(3), VectorValues.SearchStrategy.NONE));
|
doc.add(new VectorField("v2", randomVector(3), VectorValues.SimilarityFunction.NONE));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -951,13 +954,13 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSearchStrategyIdentifiers() {
|
public void testSimilarityFunctionIdentifiers() {
|
||||||
// make sure we don't accidentally mess up search strategy identifiers by re-ordering their
|
// make sure we don't accidentally mess up similarity function identifiers by re-ordering their
|
||||||
// enumerators
|
// enumerators
|
||||||
assertEquals(0, VectorValues.SearchStrategy.NONE.ordinal());
|
assertEquals(0, VectorValues.SimilarityFunction.NONE.ordinal());
|
||||||
assertEquals(1, VectorValues.SearchStrategy.EUCLIDEAN_HNSW.ordinal());
|
assertEquals(1, VectorValues.SimilarityFunction.EUCLIDEAN.ordinal());
|
||||||
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
|
assertEquals(2, VectorValues.SimilarityFunction.DOT_PRODUCT.ordinal());
|
||||||
assertEquals(3, VectorValues.SearchStrategy.values().length);
|
assertEquals(3, VectorValues.SimilarityFunction.values().length);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAdvance() throws Exception {
|
public void testAdvance() throws Exception {
|
||||||
@ -969,7 +972,7 @@ public abstract class BaseVectorFormatTestCase extends BaseIndexFileFormatTestCa
|
|||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
// randomly add a vector field
|
// randomly add a vector field
|
||||||
if (random().nextInt(4) == 3) {
|
if (random().nextInt(4) == 3) {
|
||||||
doc.add(new VectorField(fieldName, new float[4], VectorValues.SearchStrategy.NONE));
|
doc.add(new VectorField(fieldName, new float[4], VectorValues.SimilarityFunction.NONE));
|
||||||
}
|
}
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
@ -84,7 +84,7 @@ public class MismatchedLeafReader extends FilterLeafReader {
|
|||||||
oldInfo.getPointNumBytes(), // dimension numBytes
|
oldInfo.getPointNumBytes(), // dimension numBytes
|
||||||
oldInfo.getVectorDimension(), // number of dimensions of the field's vector
|
oldInfo.getVectorDimension(), // number of dimensions of the field's vector
|
||||||
// distance function for calculating similarity of the field's vector
|
// distance function for calculating similarity of the field's vector
|
||||||
oldInfo.getVectorSearchStrategy(),
|
oldInfo.getVectorSimilarityFunction(),
|
||||||
oldInfo.isSoftDeletesField()); // used as soft-deletes field
|
oldInfo.isSoftDeletesField()); // used as soft-deletes field
|
||||||
shuffled.set(i, newInfo);
|
shuffled.set(i, newInfo);
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,7 @@ public class RandomPostingsTester {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false);
|
false);
|
||||||
fieldUpto++;
|
fieldUpto++;
|
||||||
|
|
||||||
@ -711,7 +711,7 @@ public class RandomPostingsTester {
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
VectorValues.SearchStrategy.NONE,
|
VectorValues.SimilarityFunction.NONE,
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user