diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dd7c4d07db4..e3376fdb07c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -242,6 +242,8 @@ Optimizations * GITHUB#12702: Disable suffix sharing for block tree index, making writing the terms dictionary index faster and less RAM hungry, while making the index a bit (~1.X% for the terms index file on wikipedia). (Guo Feng, Mike McCandless) +* GITHUB#12726: Return the same input vector if its a unit vector in VectorUtil#l2normalize. (Shubham Chaudhary) + Changes in runtime behavior --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java index a978e2d1f1f..ef52e605dbc 100644 --- a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java @@ -106,18 +106,21 @@ public final class VectorUtil { * @throws IllegalArgumentException when the vector is all zero and throwOnZero is true */ public static float[] l2normalize(float[] v, boolean throwOnZero) { - double squareSum = IMPL.dotProduct(v, v); - int dim = v.length; - if (squareSum == 0) { + double l1norm = IMPL.dotProduct(v, v); + if (l1norm == 0) { if (throwOnZero) { throw new IllegalArgumentException("Cannot normalize a zero-length vector"); } else { return v; } } - double length = Math.sqrt(squareSum); + if (Math.abs(l1norm - 1.0d) <= 1e-5) { + return v; + } + int dim = v.length; + double l2norm = Math.sqrt(l1norm); for (int i = 0; i < dim; i++) { - v[i] /= length; + v[i] /= (float) l2norm; } return v; }