From a49e1c0062aaabcaf573f0ac530237883971436b Mon Sep 17 00:00:00 2001 From: David Causse Date: Thu, 30 Mar 2017 10:10:33 +0200 Subject: [PATCH] Use a fixed seed for computing term hashCode in TermsSliceQuery (#23795) I think this query should not use the hashCode provided BytesRef#hashCode(). It uses StringHelper#GOOD_FAST_HASH_SEED which is initialized in a static block to System.currentTimeMillis(). Running this query on different replicas may return inconsistent results. Using a fixed seed should guaranty that the docs are sliced consistently accross replicas. Fixes #23096 --- .../org/elasticsearch/search/slice/TermsSliceQuery.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/search/slice/TermsSliceQuery.java b/core/src/main/java/org/elasticsearch/search/slice/TermsSliceQuery.java index 429a3ebe892..947014f43e0 100644 --- a/core/src/main/java/org/elasticsearch/search/slice/TermsSliceQuery.java +++ b/core/src/main/java/org/elasticsearch/search/slice/TermsSliceQuery.java @@ -33,6 +33,7 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.StringHelper; import java.io.IOException; @@ -46,6 +47,9 @@ import java.io.IOException; * NOTE: Documents with no value for that field are ignored. */ public final class TermsSliceQuery extends SliceQuery { + // Fixed seed for computing term hashCode + private static final int SEED = 7919; + public TermsSliceQuery(String field, int id, int max) { super(field, id, max); } @@ -71,7 +75,9 @@ public final class TermsSliceQuery extends SliceQuery { final TermsEnum te = terms.iterator(); PostingsEnum docsEnum = null; for (BytesRef term = te.next(); term != null; term = te.next()) { - int hashCode = term.hashCode(); + // use a fixed seed instead of term.hashCode() otherwise this query may return inconsistent results when + // running on another replica (StringHelper sets its default seed at startup with current time) + int hashCode = StringHelper.murmurhash3_x86_32(term, SEED); if (contains(hashCode)) { docsEnum = te.postings(docsEnum, PostingsEnum.NONE); builder.add(docsEnum);