From c93eae8545640bf1febb191315c3dbc0c9702669 Mon Sep 17 00:00:00 2001 From: uboness Date: Fri, 16 Aug 2013 02:26:13 +0200 Subject: [PATCH] Added support for random_score function: * can be used to return matching results in random order Closes #1170 --- .../search/function/RandomScoreFunction.java | 99 +++++++++++ .../index/query/QueryBuilders.java | 2 - .../functionscore/FunctionScoreModule.java | 2 + .../random/RandomScoreFunctionBuilder.java | 62 +++++++ .../random/RandomScoreFunctionParser.java | 78 +++++++++ .../script/ScriptScoreFunctionBuilder.java | 2 +- .../RandomScoreFunctionTests.java | 159 ++++++++++++++++++ .../search/query/SimpleQueryTests.java | 1 - 8 files changed, 401 insertions(+), 4 deletions(-) create mode 100644 src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java create mode 100644 src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionBuilder.java create mode 100644 src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionParser.java create mode 100644 src/test/java/org/elasticsearch/test/integration/search/functionscore/RandomScoreFunctionTests.java diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java b/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java new file mode 100644 index 00000000000..f3c80e45abf --- /dev/null +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java @@ -0,0 +1,99 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search.function; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Explanation; + +/** + * + */ +public class RandomScoreFunction implements ScoreFunction { + + private final PRNG prng; + private int docBase; + + public RandomScoreFunction(long seed) { + this.prng = new PRNG(seed); + } + + @Override + public void setNextReader(AtomicReaderContext context) { + this.docBase = context.docBase; + } + + @Override + public double score(int docId, float subQueryScore) { + return prng.random(docBase + docId); + } + + @Override + public double factor(int docId) { + return prng.seed; + } + + @Override + public Explanation explainScore(int docId, Explanation subQueryExpl) { + Explanation exp = new Explanation(); + exp.setDescription("random score function (seed: " + prng.originalSeed + ")"); + exp.addDetail(subQueryExpl); + return exp; + } + + @Override + public Explanation explainFactor(int docId) { + Explanation exp = new Explanation(); + exp.setDescription("seed: " + prng.originalSeed + ")"); + return exp; + } + + /** + * Algorithm based on {@link java.util.Random} except this one is not + * thread safe + */ + static class PRNG { + + private static final long multiplier = 0x5DEECE66DL; + private static final long addend = 0xBL; + private static final long mask = (1L << 48) - 1; + + final long originalSeed; + long seed; + + PRNG(long seed) { + this.originalSeed = seed; + this.seed = (seed ^ multiplier) & mask; + } + + public float random(int doc) { + if (doc == 0) { + doc = -17; + } + return nextFloat() * (doc ^ 0xCAFEBAB); + } + + public float nextFloat() { + seed = (seed * multiplier + addend) & mask; + int r = (int)(seed >>> 24); + return r / ((float)(1 << 24)); + } + + } +} diff --git a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index 29e0d557ba2..9e5224cb0d5 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -545,7 +545,6 @@ public abstract class QueryBuilders { * A query that allows to define a custom scoring function. * * @param queryBuilder The query to custom score - * @param scoreFunctionBuilder The score function used to re-score the query */ public static FunctionScoreQueryBuilder functionScoreQuery(QueryBuilder queryBuilder) { return new FunctionScoreQueryBuilder(queryBuilder); @@ -555,7 +554,6 @@ public abstract class QueryBuilders { * A query that allows to define a custom scoring function. * * @param filterBuilder The query to custom score - * @param scoreFunctionBuilder The score function used to re-score the query */ public static FunctionScoreQueryBuilder functionScoreQuery(FilterBuilder filterBuilder) { return new FunctionScoreQueryBuilder(filterBuilder); diff --git a/src/main/java/org/elasticsearch/index/query/functionscore/FunctionScoreModule.java b/src/main/java/org/elasticsearch/index/query/functionscore/FunctionScoreModule.java index 0a6ff209ac3..8f0ff795a3f 100644 --- a/src/main/java/org/elasticsearch/index/query/functionscore/FunctionScoreModule.java +++ b/src/main/java/org/elasticsearch/index/query/functionscore/FunctionScoreModule.java @@ -25,6 +25,7 @@ import org.elasticsearch.index.query.functionscore.exp.ExponentialDecayFunctionP import org.elasticsearch.index.query.functionscore.factor.FactorParser; import org.elasticsearch.index.query.functionscore.gauss.GaussDecayFunctionParser; import org.elasticsearch.index.query.functionscore.lin.LinearDecayFunctionParser; +import org.elasticsearch.index.query.functionscore.random.RandomScoreFunctionParser; import org.elasticsearch.index.query.functionscore.script.ScriptScoreFunctionParser; import java.util.List; @@ -42,6 +43,7 @@ public class FunctionScoreModule extends AbstractModule { registerParser(GaussDecayFunctionParser.class); registerParser(LinearDecayFunctionParser.class); registerParser(ExponentialDecayFunctionParser.class); + registerParser(RandomScoreFunctionParser.class); } public void registerParser(Class parser) { diff --git a/src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionBuilder.java b/src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionBuilder.java new file mode 100644 index 00000000000..f5a6d189ffd --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionBuilder.java @@ -0,0 +1,62 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query.functionscore.random; + +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder; + +import java.io.IOException; + +/** + * A function that computes a random score for the matched documents + */ +public class RandomScoreFunctionBuilder implements ScoreFunctionBuilder { + + private Long seed = null; + + public RandomScoreFunctionBuilder() { + } + + @Override + public String getName() { + return RandomScoreFunctionParser.NAMES[0]; + } + + /** + * Sets the seed based on which the random number will be generated. Using the same seed is guaranteed to generate the same + * random number for a specific doc. + * + * @param seed The seed. + */ + public RandomScoreFunctionBuilder seed(long seed) { + this.seed = seed; + return this; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(getName()); + if (seed != null) { + builder.field("seed", seed.longValue()); + } + return builder.endObject(); + } + +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionParser.java b/src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionParser.java new file mode 100644 index 00000000000..77bf97c4f41 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/functionscore/random/RandomScoreFunctionParser.java @@ -0,0 +1,78 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + + +package org.elasticsearch.index.query.functionscore.random; + +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.search.function.RandomScoreFunction; +import org.elasticsearch.common.lucene.search.function.ScoreFunction; +import org.elasticsearch.common.lucene.search.function.ScriptScoreFunction; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.index.query.QueryParsingException; +import org.elasticsearch.index.query.functionscore.ScoreFunctionParser; +import org.elasticsearch.script.SearchScript; + +import java.io.IOException; +import java.util.Map; + +/** + * + */ +public class RandomScoreFunctionParser implements ScoreFunctionParser { + + public static String[] NAMES = { "random_score", "randomScore" }; + + @Inject + public RandomScoreFunctionParser() { + } + + @Override + public String[] getNames() { + return NAMES; + } + + @Override + public ScoreFunction parse(QueryParseContext parseContext, XContentParser parser) throws IOException, QueryParsingException { + + long seed = -1; + + String currentFieldName = null; + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if ("seed".equals(currentFieldName)) { + seed = parser.longValue(); + } else { + throw new QueryParsingException(parseContext.index(), NAMES[0] + " query does not support [" + currentFieldName + "]"); + } + } + } + + if (seed == -1) { + seed = parseContext.nowInMillis(); + } + + return new RandomScoreFunction(seed); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/query/functionscore/script/ScriptScoreFunctionBuilder.java b/src/main/java/org/elasticsearch/index/query/functionscore/script/ScriptScoreFunctionBuilder.java index c006689c98d..d458b81b860 100644 --- a/src/main/java/org/elasticsearch/index/query/functionscore/script/ScriptScoreFunctionBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/functionscore/script/ScriptScoreFunctionBuilder.java @@ -28,7 +28,7 @@ import java.io.IOException; import java.util.Map; /** - * A query that uses a script to compute or influence the score of documents + * A function that uses a script to compute or influence the score of documents * that match with the inner query or filter. */ public class ScriptScoreFunctionBuilder implements ScoreFunctionBuilder { diff --git a/src/test/java/org/elasticsearch/test/integration/search/functionscore/RandomScoreFunctionTests.java b/src/test/java/org/elasticsearch/test/integration/search/functionscore/RandomScoreFunctionTests.java new file mode 100644 index 00000000000..60159151871 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/integration/search/functionscore/RandomScoreFunctionTests.java @@ -0,0 +1,159 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.integration.search.functionscore; + +import org.apache.lucene.util._TestUtil; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.functionscore.random.RandomScoreFunctionBuilder; +import org.elasticsearch.test.integration.AbstractSharedClusterTest; +import org.hamcrest.CoreMatchers; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.Arrays; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery; +import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; + +public class RandomScoreFunctionTests extends AbstractSharedClusterTest { + + @Override + public Settings getSettings() { + return randomSettingsBuilder() + .put("index.number_of_shards", 5) + .put("index.number_of_replicas", 2) + .build(); + } + + @Override + protected int numberOfNodes() { + return 3; + } + + @Test + public void consistentHitsWithSameSeed() throws Exception { + prepareCreate("test").execute().actionGet(); + ensureGreen(); + + int docCount = atLeast(100); + + for (int i = 0; i < docCount; i++) { + index("test", "type", "" + docCount, jsonBuilder().startObject().endObject()); + } + + flush(); + + long seed = System.nanoTime(); + String preference = _TestUtil.randomRealisticUnicodeString(getRandom()); + + float[] scores = null; + + for (int i = 0; i < 3; i++) { + + SearchResponse searchResponse = client().prepareSearch() + .setPreference(preference) + .setQuery(functionScoreQuery(matchAllQuery()).add(new RandomScoreFunctionBuilder().seed(seed))) + .execute().actionGet(); + + assertThat("Failures " + Arrays.toString(searchResponse.getShardFailures()), searchResponse.getShardFailures().length, CoreMatchers.equalTo(0)); + + int hitCount = searchResponse.getHits().getHits().length; + + if (scores == null) { + + scores = new float[hitCount]; + for (int j = 0; j < hitCount; j++) { + scores[j] = searchResponse.getHits().getAt(j).score(); + } + } else { + for (int j = 0; j < hitCount; j++) { + assertThat(searchResponse.getHits().getAt(j).score(), CoreMatchers.equalTo(scores[j])); + } + } + } + } + + @Test @Ignore + public void distribution() throws Exception { + int count = 10000; + + prepareCreate("test").execute().actionGet(); + ensureGreen(); + + for (int i = 0; i < count; i++) { + index("test", "type", "" + i, jsonBuilder().startObject().endObject()); + } + + flush(); + + int[] matrix = new int[count]; + + for (int i = 0; i < count; i++) { + + SearchResponse searchResponse = client().prepareSearch() + .setQuery(functionScoreQuery(matchAllQuery()).add(new RandomScoreFunctionBuilder().seed(System.nanoTime()))) + .execute().actionGet(); + + matrix[Integer.valueOf(searchResponse.getHits().getAt(0).id())]++; + } + + int filled = 0; + int maxRepeat = 0; + for (int i = 0; i < matrix.length; i++) { + int value = matrix[i]; + maxRepeat = Math.max(maxRepeat, value); + if (value > 0) { + filled++; + } + } + System.out.println(); + System.out.println("max repeat: " + maxRepeat); + System.out.println("distribution: " + filled/(double)count); + + int percentile50 = filled / 2; + int percentile25 = (filled / 4); + int percentile75 = percentile50 + percentile25; + + int sum = 0; + + for (int i = 0; i < matrix.length; i++) { + if (matrix[i] == 0) { + continue; + } + sum += i * matrix[i]; + if (percentile50 == 0) { + System.out.println("median: " + i); + } else if (percentile25 == 0) { + System.out.println("percentile_25: " + i); + } else if (percentile75 == 0) { + System.out.println("percentile_75: " + i); + } + percentile50--; + percentile25--; + percentile75--; + } + + System.out.println("mean: " + sum/(double)count); + + } + +} diff --git a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java index 7c09c5b0800..36ac10c9175 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java @@ -1570,7 +1570,6 @@ public class SimpleQueryTests extends AbstractSharedClusterTest { .execute() .actionGet(); assertNoFailures(response); - } }