diff --git a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java index 5e7c313fd7f..34dcb803361 100644 --- a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java @@ -270,7 +270,8 @@ public final class VectorUtil { */ public static float dotProductScore(BytesRef a, BytesRef b) { // divide by 2 * 2^14 (maximum absolute value of product of 2 signed bytes) * len - return (1 + dotProduct(a, b)) / (float) (a.length * (1 << 15)); + float denom = (float) (a.length * (1 << 15)); + return 0.5f + dotProduct(a, b) / denom; } /** diff --git a/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java index 9a1a15db018..b8292722ade 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java @@ -176,7 +176,20 @@ public class TestVectorUtil extends LuceneTestCase { BytesRef a = new BytesRef(new byte[] {1, 2, 3}); BytesRef b = new BytesRef(new byte[] {-10, 0, 5}); assertEquals(5, VectorUtil.dotProduct(a, b), 0); - assertEquals(5 / (3f * (1 << 15)), VectorUtil.dotProductScore(a, b), DELTA); + float denom = a.length * (1 << 15); + assertEquals(0.5 + 5 / denom, VectorUtil.dotProductScore(a, b), DELTA); + + // dot product 0 maps to dotProductScore 0.5 + BytesRef zero = new BytesRef(new byte[] {0, 0, 0}); + assertEquals(0.5, VectorUtil.dotProductScore(a, zero), DELTA); + + BytesRef min = new BytesRef(new byte[] {-128, -128}); + BytesRef max = new BytesRef(new byte[] {127, 127}); + // minimum dot product score is not quite zero because 127 < 128 + assertEquals(0.0039, VectorUtil.dotProductScore(min, max), DELTA); + + // maximum dot product score + assertEquals(1, VectorUtil.dotProductScore(min, min), DELTA); } public void testSelfDotProductBytes() {