From d48193e8cff854aea6f5e00c6b6bb7b176b97c72 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 14 Mar 2021 23:16:08 -0400 Subject: [PATCH] LUCENE-9837: try to improve performance of VectorUtil.dotProduct (#17) More loop unrolling for VectorUtil.dotProduct to eek out a bit more short-term performance. --- .../org/apache/lucene/util/VectorUtil.java | 66 ++++++++++++++----- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java index cc9cd15b7d7..546d13de7fd 100644 --- a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java @@ -42,25 +42,55 @@ public final class VectorUtil { if (a.length < 8) { return res; } - float s0 = 0f; - float s1 = 0f; - float s2 = 0f; - float s3 = 0f; - float s4 = 0f; - float s5 = 0f; - float s6 = 0f; - float s7 = 0f; - for (; i + 7 < a.length; i += 8) { - s0 += b[i] * a[i]; - s1 += b[i + 1] * a[i + 1]; - s2 += b[i + 2] * a[i + 2]; - s3 += b[i + 3] * a[i + 3]; - s4 += b[i + 4] * a[i + 4]; - s5 += b[i + 5] * a[i + 5]; - s6 += b[i + 6] * a[i + 6]; - s7 += b[i + 7] * a[i + 7]; + for (; i + 31 < a.length; i += 32) { + res += + b[i + 0] * a[i + 0] + + b[i + 1] * a[i + 1] + + b[i + 2] * a[i + 2] + + b[i + 3] * a[i + 3] + + b[i + 4] * a[i + 4] + + b[i + 5] * a[i + 5] + + b[i + 6] * a[i + 6] + + b[i + 7] * a[i + 7]; + res += + b[i + 8] * a[i + 8] + + b[i + 9] * a[i + 9] + + b[i + 10] * a[i + 10] + + b[i + 11] * a[i + 11] + + b[i + 12] * a[i + 12] + + b[i + 13] * a[i + 13] + + b[i + 14] * a[i + 14] + + b[i + 15] * a[i + 15]; + res += + b[i + 16] * a[i + 16] + + b[i + 17] * a[i + 17] + + b[i + 18] * a[i + 18] + + b[i + 19] * a[i + 19] + + b[i + 20] * a[i + 20] + + b[i + 21] * a[i + 21] + + b[i + 22] * a[i + 22] + + b[i + 23] * a[i + 23]; + res += + b[i + 24] * a[i + 24] + + b[i + 25] * a[i + 25] + + b[i + 26] * a[i + 26] + + b[i + 27] * a[i + 27] + + b[i + 28] * a[i + 28] + + b[i + 29] * a[i + 29] + + b[i + 30] * a[i + 30] + + b[i + 31] * a[i + 31]; + } + for (; i + 7 < a.length; i += 8) { + res += + b[i + 0] * a[i + 0] + + b[i + 1] * a[i + 1] + + b[i + 2] * a[i + 2] + + b[i + 3] * a[i + 3] + + b[i + 4] * a[i + 4] + + b[i + 5] * a[i + 5] + + b[i + 6] * a[i + 6] + + b[i + 7] * a[i + 7]; } - res += s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7; return res; }