Ensure negative scores are not returned by vector similarity functions (#12727)

We shouldn't ever return negative scores from vector similarity functions. Given vector panama and nearly antipodal float[] vectors, it is possible that cosine and (normalized) dot-product become slightly negative due to compounding floating point errors.

Since we don't want to make panama vector incredibly slow, we stick to float32 operations for now, and just snap to `0` if the score is negative after our correction.

closes: https://github.com/apache/lucene/issues/12700
This commit is contained in:
Benjamin Trent 2023-10-30 10:05:52 -04:00 committed by GitHub
parent 7943b7ad1c
commit 2ed60e8073
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 2 deletions

View File

@ -264,6 +264,8 @@ Bug Fixes
* GITHUB#12682: Scorer should sum up scores into a double. (Shubham Chaudhary)
* GITHUB#12727: Ensure negative scores are not returned by vector similarity functions (Ben Trent)
Build
---------------------

View File

@ -52,7 +52,7 @@ public enum VectorSimilarityFunction {
DOT_PRODUCT {
@Override
public float compare(float[] v1, float[] v2) {
return (1 + dotProduct(v1, v2)) / 2;
return Math.max((1 + dotProduct(v1, v2)) / 2, 0);
}
@Override
@ -70,7 +70,7 @@ public enum VectorSimilarityFunction {
COSINE {
@Override
public float compare(float[] v1, float[] v2) {
return (1 + cosine(v1, v2)) / 2;
return Math.max((1 + cosine(v1, v2)) / 2, 0);
}
@Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.util;
import java.util.Random;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
@ -115,6 +116,19 @@ public class TestVectorUtil extends LuceneTestCase {
expectThrows(IllegalArgumentException.class, () -> VectorUtil.l2normalize(v));
}
public void testExtremeNumerics() {
float[] v1 = new float[1536];
float[] v2 = new float[1536];
for (int i = 0; i < 1536; i++) {
v1[i] = 0.888888f;
v2[i] = -0.777777f;
}
for (VectorSimilarityFunction vectorSimilarityFunction : VectorSimilarityFunction.values()) {
float v = vectorSimilarityFunction.compare(v1, v2);
assertTrue(vectorSimilarityFunction + " expected >=0 got:" + v, v >= 0);
}
}
private static float l2(float[] v) {
float l2 = 0;
for (float x : v) {