Use a fixed seed for computing term hashCode in TermsSliceQuery (#23795)

I think this query should not use the hashCode provided BytesRef#hashCode().
It uses StringHelper#GOOD_FAST_HASH_SEED which is initialized in a static
block to System.currentTimeMillis().
Running this query on different replicas may return inconsistent results.

Using a fixed seed should guaranty that the docs are sliced consistently
accross replicas.

Fixes #23096
This commit is contained in:
David Causse 2017-03-30 10:10:33 +02:00 committed by Jim Ferenczi
parent 1a5c36509f
commit a49e1c0062
1 changed files with 7 additions and 1 deletions

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.StringHelper;
import java.io.IOException;
@ -46,6 +47,9 @@ import java.io.IOException;
* <b>NOTE</b>: Documents with no value for that field are ignored.
*/
public final class TermsSliceQuery extends SliceQuery {
// Fixed seed for computing term hashCode
private static final int SEED = 7919;
public TermsSliceQuery(String field, int id, int max) {
super(field, id, max);
}
@ -71,7 +75,9 @@ public final class TermsSliceQuery extends SliceQuery {
final TermsEnum te = terms.iterator();
PostingsEnum docsEnum = null;
for (BytesRef term = te.next(); term != null; term = te.next()) {
int hashCode = term.hashCode();
// use a fixed seed instead of term.hashCode() otherwise this query may return inconsistent results when
// running on another replica (StringHelper sets its default seed at startup with current time)
int hashCode = StringHelper.murmurhash3_x86_32(term, SEED);
if (contains(hashCode)) {
docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
builder.add(docsEnum);