Ensure negative scores are not returned by vector similarity functions (#12727)

We shouldn't ever return negative scores from vector similarity functions. Given vector panama and nearly antipodal float[] vectors, it is possible that cosine and (normalized) dot-product become slightly negative due to compounding floating point errors. Since we don't want to make panama vector incredibly slow, we stick to float32 operations for now, and just snap to `0` if the score is negative after our correction. closes: https://github.com/apache/lucene/issues/12700
2023-10-30 10:05:52 -04:00 · 2023-10-30 10:05:52 -04:00 · 2ed60e8073
parent 7943b7ad1c
commit 2ed60e8073
3 changed files with 18 additions and 2 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -264,6 +264,8 @@ Bug Fixes

 * GITHUB#12682: Scorer should sum up scores into a double. (Shubham Chaudhary)

+* GITHUB#12727: Ensure negative scores are not returned by vector similarity functions (Ben Trent)
+
 Build
 ---------------------

--- a/lucene/core/src/java/org/apache/lucene/index/VectorSimilarityFunction.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorSimilarityFunction.java
@ -52,7 +52,7 @@ public enum VectorSimilarityFunction {
  DOT_PRODUCT {
    @Override
    public float compare(float[] v1, float[] v2) {
-      return (1 + dotProduct(v1, v2)) / 2;
+      return Math.max((1 + dotProduct(v1, v2)) / 2, 0);
    }

    @Override
@ -70,7 +70,7 @@ public enum VectorSimilarityFunction {
  COSINE {
    @Override
    public float compare(float[] v1, float[] v2) {
-      return (1 + cosine(v1, v2)) / 2;
+      return Math.max((1 + cosine(v1, v2)) / 2, 0);
    }

    @Override
--- a/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java
@ -17,6 +17,7 @@
 package org.apache.lucene.util;

 import java.util.Random;
+import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.tests.util.TestUtil;

@ -115,6 +116,19 @@ public class TestVectorUtil extends LuceneTestCase {
    expectThrows(IllegalArgumentException.class, () -> VectorUtil.l2normalize(v));
  }

+  public void testExtremeNumerics() {
+    float[] v1 = new float[1536];
+    float[] v2 = new float[1536];
+    for (int i = 0; i < 1536; i++) {
+      v1[i] = 0.888888f;
+      v2[i] = -0.777777f;
+    }
+    for (VectorSimilarityFunction vectorSimilarityFunction : VectorSimilarityFunction.values()) {
+      float v = vectorSimilarityFunction.compare(v1, v2);
+      assertTrue(vectorSimilarityFunction + " expected >=0 got:" + v, v >= 0);
+    }
+  }
+
  private static float l2(float[] v) {
    float l2 = 0;
    for (float x : v) {