From b11e9544ea5e578ee6bce127d84817bc715ba098 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 29 Aug 2016 23:39:08 -0700 Subject: [PATCH] GroupBy v2: Improve hash code distribution. (#3407) Without this transformation, distribution of hash % X is poor in general. It is catastrophically poor when X is a multiple of 31 (many slots would be empty). --- .../java/io/druid/query/groupby/epinephelinae/Groupers.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/Groupers.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/Groupers.java index db0a6149a6a..a86ed4be965 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/Groupers.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/Groupers.java @@ -46,7 +46,9 @@ public class Groupers public static int hash(final Object obj) { // Mask off the high bit so we can use that to determine if a bucket is used or not. - return obj.hashCode() & 0x7fffffff; + // Also apply the same XOR transformation that j.u.HashMap applies, to improve distribution. + final int code = obj.hashCode(); + return (code ^ (code >>> 16)) & 0x7fffffff; } public static > Iterator> mergeIterators(