From e8ea9d75852bfc79e931143807af99ff9297da7e Mon Sep 17 00:00:00 2001 From: Chris Earle Date: Mon, 31 Mar 2014 00:02:32 -0400 Subject: [PATCH] Strengthening pseudo random number generator and adding tests to verify its behavior. Closes #5454 and #5578 --- .../search/function/RandomScoreFunction.java | 27 +-- .../function/RandomScoreFunctionTests.java | 194 ++++++++++++++++++ 2 files changed, 200 insertions(+), 21 deletions(-) create mode 100644 src/test/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunctionTests.java diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java b/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java index 89203c03e67..ff6f7887641 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java @@ -22,12 +22,11 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Explanation; /** - * + * Pseudo randomly generate a score for each {@link #score}. */ public class RandomScoreFunction extends ScoreFunction { private final PRNG prng; - private int docBase; public RandomScoreFunction(long seed) { super(CombineFunction.MULT); @@ -36,12 +35,12 @@ public class RandomScoreFunction extends ScoreFunction { @Override public void setNextReader(AtomicReaderContext context) { - this.docBase = context.docBase; + // intentionally does nothing } @Override public double score(int docId, float subQueryScore) { - return prng.random(docBase + docId); + return prng.nextFloat(); } @Override @@ -53,8 +52,7 @@ public class RandomScoreFunction extends ScoreFunction { } /** - * Algorithm largely based on {@link java.util.Random} except this one is not - * thread safe and it incorporates the doc id on next(); + * A non thread-safe PRNG */ static class PRNG { @@ -70,22 +68,9 @@ public class RandomScoreFunction extends ScoreFunction { this.seed = (seed ^ multiplier) & mask; } - public float random(int doc) { - if (doc == 0) { - doc = 0xCAFEBAB; - } - - long rand = doc; - rand |= rand << 32; - rand ^= rand; - return nextFloat(rand); - } - - public float nextFloat(long rand) { + public float nextFloat() { seed = (seed * multiplier + addend) & mask; - rand ^= seed; - double result = rand / (double)(1L << 54); - return (float) result; + return seed / (float)(1 << 24); } } diff --git a/src/test/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunctionTests.java b/src/test/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunctionTests.java new file mode 100644 index 00000000000..d03d21260b8 --- /dev/null +++ b/src/test/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunctionTests.java @@ -0,0 +1,194 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search.function; + +import com.google.common.collect.Lists; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.*; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.RAMDirectory; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.junit.After; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; + +import static org.hamcrest.Matchers.*; + +/** + * Test {@link RandomScoreFunction} + */ +public class RandomScoreFunctionTests extends ElasticsearchTestCase { + + private final String[] ids = { "1", "2", "3" }; + private IndexWriter writer; + private AtomicReader reader; + + @After + public void closeReaderAndWriterIfUsed() throws IOException { + if (reader != null) { + reader.close(); + } + + if (writer != null) { + writer.close(); + } + } + + /** + * Create a "mock" {@link IndexSearcher} that uses an in-memory directory + * containing three documents whose IDs are "1", "2", and "3" respectively. + * @return Never {@code null} + * @throws IOException if an unexpected error occurs while mocking + */ + private IndexSearcher mockSearcher() throws IOException { + writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); + for (String id : ids) { + Document document = new Document(); + document.add(new TextField("_id", id, Field.Store.YES)); + writer.addDocument(document); + } + reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true)); + return new IndexSearcher(reader); + } + + /** + * Given the same seed, the pseudo random number generator should match on + * each use given the same number of invocations. + */ + @Test + public void testPrngNextFloatIsConsistent() { + long seed = randomLong(); + + RandomScoreFunction.PRNG prng = new RandomScoreFunction.PRNG(seed); + RandomScoreFunction.PRNG prng2 = new RandomScoreFunction.PRNG(seed); + + // The seed will be changing the entire time, so each value should be + // different + assertThat(prng.nextFloat(), equalTo(prng2.nextFloat())); + assertThat(prng.nextFloat(), equalTo(prng2.nextFloat())); + assertThat(prng.nextFloat(), equalTo(prng2.nextFloat())); + assertThat(prng.nextFloat(), equalTo(prng2.nextFloat())); + } + + @Test + public void testPrngNextFloatSometimesFirstIsGreaterThanSecond() { + boolean firstWasGreater = false; + + // Since the results themselves are intended to be random, we cannot + // just do @Repeat(iterations = 100) because some iterations are + // expected to fail + for (int i = 0; i < 100; ++i) { + long seed = randomLong(); + + RandomScoreFunction.PRNG prng = new RandomScoreFunction.PRNG(seed); + + float firstRandom = prng.nextFloat(); + float secondRandom = prng.nextFloat(); + + if (firstRandom > secondRandom) { + firstWasGreater = true; + } + } + + assertTrue("First value was never greater than the second value", firstWasGreater); + } + + @Test + public void testPrngNextFloatSometimesFirstIsLessThanSecond() { + boolean firstWasLess = false; + + // Since the results themselves are intended to be random, we cannot + // just do @Repeat(iterations = 100) because some iterations are + // expected to fail + for (int i = 0; i < 1000; ++i) { + long seed = randomLong(); + + RandomScoreFunction.PRNG prng = new RandomScoreFunction.PRNG(seed); + + float firstRandom = prng.nextFloat(); + float secondRandom = prng.nextFloat(); + + if (firstRandom < secondRandom) { + firstWasLess = true; + } + } + + assertTrue("First value was never less than the second value", firstWasLess); + } + + @Test + public void testScorerResultsInRandomOrder() throws IOException { + List idsNotSpotted = Lists.newArrayList(ids); + IndexSearcher searcher = mockSearcher(); + + // Since the results themselves are intended to be random, we cannot + // just do @Repeat(iterations = 100) because some iterations are + // expected to fail + for (int i = 0; i < 100; ++i) { + // Randomly seeded to keep trying to shuffle without walking through + // values + RandomScoreFunction function = new RandomScoreFunction(randomLong()); + // fulfilling contract + function.setNextReader(reader.getContext()); + + FunctionScoreQuery query = new FunctionScoreQuery(Queries.newMatchAllQuery(), function); + + // Testing that we get a random result + TopDocs docs = searcher.search(query, 1); + + String id = reader.document(docs.scoreDocs[0].doc).getField("_id").stringValue(); + + if (idsNotSpotted.remove(id) && idsNotSpotted.isEmpty()) { + // short circuit test because we succeeded + break; + } + } + + assertThat(idsNotSpotted, empty()); + } + + @Test + public void testExplainScoreReportsOriginalSeed() { + long seed = randomLong(); + Explanation subExplanation = new Explanation(); + + RandomScoreFunction function = new RandomScoreFunction(seed); + // Trigger a random call to change the seed to ensure that we are + // reporting the _original_ seed + function.score(0, 1.0f); + + // Generate the randomScore explanation + Explanation randomExplanation = function.explainScore(0, subExplanation); + + // Original seed should be there + assertThat(randomExplanation.getDescription(), containsString("" + seed)); + assertThat(randomExplanation.getDetails(), arrayContaining(subExplanation)); + } + + +}