From 2554e05ef44be810ff04b0196a15df1000d570e2 Mon Sep 17 00:00:00 2001 From: Vigya Sharma Date: Tue, 12 Nov 2024 09:54:07 -0800 Subject: [PATCH] remove the mv sim interface --- .../lucene/index/MultiVectorSimilarity.java | 45 ------------------ .../index/MultiVectorSimilarityFunction.java | 47 +++++++++---------- 2 files changed, 23 insertions(+), 69 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarity.java diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarity.java deleted file mode 100644 index 3ddb28191c7..00000000000 --- a/lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarity.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -/** Defines comparison functions for multi-vector similarity */ -public interface MultiVectorSimilarity { - - /** - * Calculates a similarity score between the two multi-vectors with a specified function. Higher - * similarity scores correspond to closer vectors. - * - * @param t1 a multi-vector with non-empty vectors All vector values are concatenated in a single - * packed array. - * @param t2 another multi-vector, vectors of the same dimension as t1. All vector values are - * concatenated in a single packed array. - * @return the value of the similarity function applied to the two multi-vectors - */ - float compare(float[] t1, float[] t2, int dimension); - - /** - * Calculates a similarity score between the two multi-vectors with a specified function. Higher - * similarity scores correspond to closer vectors. - * - * @param t1 a multi-vector with non-empty vectors. All vector values are concatenated in a single - * packed array. - * @param t2 another multi-vector, vectors of the same dimension as t1. All vector values are - * concatenated in a single packed array. - * @return the value of the similarity function applied to the two multi-vector - */ - float compare(byte[] t1, byte[] t2, int dimension); -} diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarityFunction.java b/lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarityFunction.java index 7cc4ed67f7f..bba2e9b560b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarityFunction.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiVectorSimilarityFunction.java @@ -21,16 +21,17 @@ import java.util.List; import org.apache.lucene.util.ArrayUtil; /** - * Multi-vector similarity function; used in search to return top K most similar multi-vectors to a - * target multi-vector. This method is used during indexing and searching of the multi-vectors in - * order to determine the nearest neighbors. + * Computes similarity between two multi-vectors. + *

+ * A multi-vector is a collection of multiple vectors that represent a single document or query. + * MultiVectorSimilarityFunction is used to determine nearest neighbors during + * indexing and search on multi-vectors. */ -// no commit -public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { +public class MultiVectorSimilarityFunction { /** Aggregation function to combine similarity across multiple vector values */ public enum Aggregation { - /** Placeholder aggregation that is not intended to be used. */ + /** Selecting this aggregation indicates that the field does not have multi-vector values */ NONE { @Override public float aggregate( @@ -52,7 +53,7 @@ public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { }, /** - * SumMaxSimilarity between two multi-vectors. Aggregates using the sum of maximum similarity + * SumMaxSimilarity between two multi-vectors. Computes the sum of maximum similarity * found for each vector in the first multi-vector against all vectors in the second * multi-vector. */ @@ -64,17 +65,16 @@ public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { VectorSimilarityFunction vectorSimilarityFunction, int dimension) { if (outer.length % dimension != 0 || inner.length % dimension != 0) { - throw new IllegalArgumentException("Multi vectors do not match provided dimensions"); + throw new IllegalArgumentException("Multi vectors do not match provided dimension value"); } + // TODO: can we avoid making vector copies? List outerList = new ArrayList<>(); List innerList = new ArrayList<>(); for (int i = 0; i < outer.length; i += dimension) { -// System.out.println("copy subArray - " + i + ":" + i+dimension); outerList.add(ArrayUtil.copyOfSubArray(outer, i, i + dimension)); } for (int i = 0; i < inner.length; i += dimension) { -// System.out.println("copy subArray - " + i + ":" + i+dimension); innerList.add(ArrayUtil.copyOfSubArray(inner, i, i + dimension)); } @@ -96,18 +96,16 @@ public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { VectorSimilarityFunction vectorSimilarityFunction, int dimension) { if (outer.length % dimension != 0 || inner.length % dimension != 0) { - throw new IllegalArgumentException("Multi vectors do not match provided dimensions"); + throw new IllegalArgumentException("Multi vectors do not match provided dimension value"); } + + // TODO: can we avoid making vector copies? List outerList = new ArrayList<>(); List innerList = new ArrayList<>(); -// System.out.println("...handling outer list"); for (int i = 0; i < outer.length; i += dimension) { -// System.out.println("copy subArray - " + i + ":" + dimension); outerList.add(ArrayUtil.copyOfSubArray(outer, i, i + dimension)); } -// System.out.println("...handling inner list"); for (int i = 0; i < inner.length; i += dimension) { -// System.out.println("copy subArray - " + i + ":" + dimension); innerList.add(ArrayUtil.copyOfSubArray(inner, i, i + dimension)); } @@ -124,12 +122,15 @@ public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { }; /** - * Computes and aggregates similarity over multiple vector values + * Computes and aggregates similarity over multiple vector values. + * + * Assumes that all vector values in both provided multi-vectors have the same dimensions. Slices + * inner and outer float[] multi-vectors into dimension sized vector values for comparison. * * @param outer first multi-vector * @param inner second multi-vector * @param vectorSimilarityFunction distance function for vector proximity - * @param dimension dimension for each vector value in the multi-vector + * @param dimension dimension for each vector in the provided multi-vectors * @return similarity between the two multi-vectors */ public abstract float aggregate( @@ -139,12 +140,15 @@ public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { int dimension); /** - * Computes and aggregates similarity over multiple vector values + * Computes and aggregates similarity over multiple vector values. + * + * Assumes that all vector values in both provided multi-vectors have the same dimensions. Slices + * inner and outer byte[] multi-vectors into dimension sized vector values for comparison. * * @param outer first multi-vector * @param inner second multi-vector * @param vectorSimilarityFunction distance function for vector proximity - * @param dimension dimension for each vector value in the multi-vector + * @param dimension dimension for each vector in the provided multi-vectors * @return similarity between the two multi-vectors */ public abstract float aggregate( @@ -172,17 +176,14 @@ public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { this.aggregation = aggregation; } - @Override public float compare(float[] t1, float[] t2, int dimension) { return aggregation.aggregate(t1, t2, similarityFunction, dimension); } - @Override public float compare(byte[] t1, byte[] t2, int dimension) { return aggregation.aggregate(t1, t2, similarityFunction, dimension); } - @Override public boolean equals(Object obj) { if (obj instanceof MultiVectorSimilarityFunction == false) { return false; @@ -191,14 +192,12 @@ public class MultiVectorSimilarityFunction implements MultiVectorSimilarity { return this.similarityFunction == o.similarityFunction && this.aggregation == o.aggregation; } - @Override public int hashCode() { int result = Integer.hashCode(similarityFunction.ordinal()); result = 31 * result + Integer.hashCode(aggregation.ordinal()); return result; } - @Override public String toString() { return "MultiVectorSimilarityFunction(similarity=" + similarityFunction