mirror of https://github.com/apache/lucene.git
fix VectorUtil.dotProductScore normalization (#1073)
This commit is contained in:
parent
60fa19d509
commit
798c02dd70
|
@ -270,7 +270,8 @@ public final class VectorUtil {
|
||||||
*/
|
*/
|
||||||
public static float dotProductScore(BytesRef a, BytesRef b) {
|
public static float dotProductScore(BytesRef a, BytesRef b) {
|
||||||
// divide by 2 * 2^14 (maximum absolute value of product of 2 signed bytes) * len
|
// divide by 2 * 2^14 (maximum absolute value of product of 2 signed bytes) * len
|
||||||
return (1 + dotProduct(a, b)) / (float) (a.length * (1 << 15));
|
float denom = (float) (a.length * (1 << 15));
|
||||||
|
return 0.5f + dotProduct(a, b) / denom;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -176,7 +176,20 @@ public class TestVectorUtil extends LuceneTestCase {
|
||||||
BytesRef a = new BytesRef(new byte[] {1, 2, 3});
|
BytesRef a = new BytesRef(new byte[] {1, 2, 3});
|
||||||
BytesRef b = new BytesRef(new byte[] {-10, 0, 5});
|
BytesRef b = new BytesRef(new byte[] {-10, 0, 5});
|
||||||
assertEquals(5, VectorUtil.dotProduct(a, b), 0);
|
assertEquals(5, VectorUtil.dotProduct(a, b), 0);
|
||||||
assertEquals(5 / (3f * (1 << 15)), VectorUtil.dotProductScore(a, b), DELTA);
|
float denom = a.length * (1 << 15);
|
||||||
|
assertEquals(0.5 + 5 / denom, VectorUtil.dotProductScore(a, b), DELTA);
|
||||||
|
|
||||||
|
// dot product 0 maps to dotProductScore 0.5
|
||||||
|
BytesRef zero = new BytesRef(new byte[] {0, 0, 0});
|
||||||
|
assertEquals(0.5, VectorUtil.dotProductScore(a, zero), DELTA);
|
||||||
|
|
||||||
|
BytesRef min = new BytesRef(new byte[] {-128, -128});
|
||||||
|
BytesRef max = new BytesRef(new byte[] {127, 127});
|
||||||
|
// minimum dot product score is not quite zero because 127 < 128
|
||||||
|
assertEquals(0.0039, VectorUtil.dotProductScore(min, max), DELTA);
|
||||||
|
|
||||||
|
// maximum dot product score
|
||||||
|
assertEquals(1, VectorUtil.dotProductScore(min, min), DELTA);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSelfDotProductBytes() {
|
public void testSelfDotProductBytes() {
|
||||||
|
|
Loading…
Reference in New Issue