LUCENE-9837: try to improve performance of VectorUtil.dotProduct (#17)

More loop unrolling for VectorUtil.dotProduct to eek out a bit more short-term performance.
This commit is contained in:
Robert Muir 2021-03-14 23:16:08 -04:00 committed by GitHub
parent f3a284ad83
commit d48193e8cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 48 additions and 18 deletions

View File

@ -42,25 +42,55 @@ public final class VectorUtil {
if (a.length < 8) {
return res;
}
float s0 = 0f;
float s1 = 0f;
float s2 = 0f;
float s3 = 0f;
float s4 = 0f;
float s5 = 0f;
float s6 = 0f;
float s7 = 0f;
for (; i + 7 < a.length; i += 8) {
s0 += b[i] * a[i];
s1 += b[i + 1] * a[i + 1];
s2 += b[i + 2] * a[i + 2];
s3 += b[i + 3] * a[i + 3];
s4 += b[i + 4] * a[i + 4];
s5 += b[i + 5] * a[i + 5];
s6 += b[i + 6] * a[i + 6];
s7 += b[i + 7] * a[i + 7];
for (; i + 31 < a.length; i += 32) {
res +=
b[i + 0] * a[i + 0]
+ b[i + 1] * a[i + 1]
+ b[i + 2] * a[i + 2]
+ b[i + 3] * a[i + 3]
+ b[i + 4] * a[i + 4]
+ b[i + 5] * a[i + 5]
+ b[i + 6] * a[i + 6]
+ b[i + 7] * a[i + 7];
res +=
b[i + 8] * a[i + 8]
+ b[i + 9] * a[i + 9]
+ b[i + 10] * a[i + 10]
+ b[i + 11] * a[i + 11]
+ b[i + 12] * a[i + 12]
+ b[i + 13] * a[i + 13]
+ b[i + 14] * a[i + 14]
+ b[i + 15] * a[i + 15];
res +=
b[i + 16] * a[i + 16]
+ b[i + 17] * a[i + 17]
+ b[i + 18] * a[i + 18]
+ b[i + 19] * a[i + 19]
+ b[i + 20] * a[i + 20]
+ b[i + 21] * a[i + 21]
+ b[i + 22] * a[i + 22]
+ b[i + 23] * a[i + 23];
res +=
b[i + 24] * a[i + 24]
+ b[i + 25] * a[i + 25]
+ b[i + 26] * a[i + 26]
+ b[i + 27] * a[i + 27]
+ b[i + 28] * a[i + 28]
+ b[i + 29] * a[i + 29]
+ b[i + 30] * a[i + 30]
+ b[i + 31] * a[i + 31];
}
for (; i + 7 < a.length; i += 8) {
res +=
b[i + 0] * a[i + 0]
+ b[i + 1] * a[i + 1]
+ b[i + 2] * a[i + 2]
+ b[i + 3] * a[i + 3]
+ b[i + 4] * a[i + 4]
+ b[i + 5] * a[i + 5]
+ b[i + 6] * a[i + 6]
+ b[i + 7] * a[i + 7];
}
res += s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
return res;
}