Added support for random_score function:

* can be used to return matching results in random order

 Closes #1170
This commit is contained in:
uboness 2013-08-16 02:26:13 +02:00
parent 610f262aac
commit c93eae8545
8 changed files with 401 additions and 4 deletions

View File

@ -0,0 +1,99 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.search.function;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Explanation;
/**
*
*/
public class RandomScoreFunction implements ScoreFunction {
private final PRNG prng;
private int docBase;
public RandomScoreFunction(long seed) {
this.prng = new PRNG(seed);
}
@Override
public void setNextReader(AtomicReaderContext context) {
this.docBase = context.docBase;
}
@Override
public double score(int docId, float subQueryScore) {
return prng.random(docBase + docId);
}
@Override
public double factor(int docId) {
return prng.seed;
}
@Override
public Explanation explainScore(int docId, Explanation subQueryExpl) {
Explanation exp = new Explanation();
exp.setDescription("random score function (seed: " + prng.originalSeed + ")");
exp.addDetail(subQueryExpl);
return exp;
}
@Override
public Explanation explainFactor(int docId) {
Explanation exp = new Explanation();
exp.setDescription("seed: " + prng.originalSeed + ")");
return exp;
}
/**
* Algorithm based on {@link java.util.Random} except this one is not
* thread safe
*/
static class PRNG {
private static final long multiplier = 0x5DEECE66DL;
private static final long addend = 0xBL;
private static final long mask = (1L << 48) - 1;
final long originalSeed;
long seed;
PRNG(long seed) {
this.originalSeed = seed;
this.seed = (seed ^ multiplier) & mask;
}
public float random(int doc) {
if (doc == 0) {
doc = -17;
}
return nextFloat() * (doc ^ 0xCAFEBAB);
}
public float nextFloat() {
seed = (seed * multiplier + addend) & mask;
int r = (int)(seed >>> 24);
return r / ((float)(1 << 24));
}
}
}

View File

@ -545,7 +545,6 @@ public abstract class QueryBuilders {
* A query that allows to define a custom scoring function.
*
* @param queryBuilder The query to custom score
* @param scoreFunctionBuilder The score function used to re-score the query
*/
public static FunctionScoreQueryBuilder functionScoreQuery(QueryBuilder queryBuilder) {
return new FunctionScoreQueryBuilder(queryBuilder);
@ -555,7 +554,6 @@ public abstract class QueryBuilders {
* A query that allows to define a custom scoring function.
*
* @param filterBuilder The query to custom score
* @param scoreFunctionBuilder The score function used to re-score the query
*/
public static FunctionScoreQueryBuilder functionScoreQuery(FilterBuilder filterBuilder) {
return new FunctionScoreQueryBuilder(filterBuilder);

View File

@ -25,6 +25,7 @@ import org.elasticsearch.index.query.functionscore.exp.ExponentialDecayFunctionP
import org.elasticsearch.index.query.functionscore.factor.FactorParser;
import org.elasticsearch.index.query.functionscore.gauss.GaussDecayFunctionParser;
import org.elasticsearch.index.query.functionscore.lin.LinearDecayFunctionParser;
import org.elasticsearch.index.query.functionscore.random.RandomScoreFunctionParser;
import org.elasticsearch.index.query.functionscore.script.ScriptScoreFunctionParser;
import java.util.List;
@ -42,6 +43,7 @@ public class FunctionScoreModule extends AbstractModule {
registerParser(GaussDecayFunctionParser.class);
registerParser(LinearDecayFunctionParser.class);
registerParser(ExponentialDecayFunctionParser.class);
registerParser(RandomScoreFunctionParser.class);
}
public void registerParser(Class<? extends ScoreFunctionParser> parser) {

View File

@ -0,0 +1,62 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.functionscore.random;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder;
import java.io.IOException;
/**
* A function that computes a random score for the matched documents
*/
public class RandomScoreFunctionBuilder implements ScoreFunctionBuilder {
private Long seed = null;
public RandomScoreFunctionBuilder() {
}
@Override
public String getName() {
return RandomScoreFunctionParser.NAMES[0];
}
/**
* Sets the seed based on which the random number will be generated. Using the same seed is guaranteed to generate the same
* random number for a specific doc.
*
* @param seed The seed.
*/
public RandomScoreFunctionBuilder seed(long seed) {
this.seed = seed;
return this;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(getName());
if (seed != null) {
builder.field("seed", seed.longValue());
}
return builder.endObject();
}
}

View File

@ -0,0 +1,78 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.functionscore.random;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.function.RandomScoreFunction;
import org.elasticsearch.common.lucene.search.function.ScoreFunction;
import org.elasticsearch.common.lucene.search.function.ScriptScoreFunction;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.query.QueryParsingException;
import org.elasticsearch.index.query.functionscore.ScoreFunctionParser;
import org.elasticsearch.script.SearchScript;
import java.io.IOException;
import java.util.Map;
/**
*
*/
public class RandomScoreFunctionParser implements ScoreFunctionParser {
public static String[] NAMES = { "random_score", "randomScore" };
@Inject
public RandomScoreFunctionParser() {
}
@Override
public String[] getNames() {
return NAMES;
}
@Override
public ScoreFunction parse(QueryParseContext parseContext, XContentParser parser) throws IOException, QueryParsingException {
long seed = -1;
String currentFieldName = null;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token.isValue()) {
if ("seed".equals(currentFieldName)) {
seed = parser.longValue();
} else {
throw new QueryParsingException(parseContext.index(), NAMES[0] + " query does not support [" + currentFieldName + "]");
}
}
}
if (seed == -1) {
seed = parseContext.nowInMillis();
}
return new RandomScoreFunction(seed);
}
}

View File

@ -28,7 +28,7 @@ import java.io.IOException;
import java.util.Map;
/**
* A query that uses a script to compute or influence the score of documents
* A function that uses a script to compute or influence the score of documents
* that match with the inner query or filter.
*/
public class ScriptScoreFunctionBuilder implements ScoreFunctionBuilder {

View File

@ -0,0 +1,159 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.functionscore;
import org.apache.lucene.util._TestUtil;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.functionscore.random.RandomScoreFunctionBuilder;
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
import org.hamcrest.CoreMatchers;
import org.junit.Ignore;
import org.junit.Test;
import java.util.Arrays;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
public class RandomScoreFunctionTests extends AbstractSharedClusterTest {
@Override
public Settings getSettings() {
return randomSettingsBuilder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 2)
.build();
}
@Override
protected int numberOfNodes() {
return 3;
}
@Test
public void consistentHitsWithSameSeed() throws Exception {
prepareCreate("test").execute().actionGet();
ensureGreen();
int docCount = atLeast(100);
for (int i = 0; i < docCount; i++) {
index("test", "type", "" + docCount, jsonBuilder().startObject().endObject());
}
flush();
long seed = System.nanoTime();
String preference = _TestUtil.randomRealisticUnicodeString(getRandom());
float[] scores = null;
for (int i = 0; i < 3; i++) {
SearchResponse searchResponse = client().prepareSearch()
.setPreference(preference)
.setQuery(functionScoreQuery(matchAllQuery()).add(new RandomScoreFunctionBuilder().seed(seed)))
.execute().actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.getShardFailures()), searchResponse.getShardFailures().length, CoreMatchers.equalTo(0));
int hitCount = searchResponse.getHits().getHits().length;
if (scores == null) {
scores = new float[hitCount];
for (int j = 0; j < hitCount; j++) {
scores[j] = searchResponse.getHits().getAt(j).score();
}
} else {
for (int j = 0; j < hitCount; j++) {
assertThat(searchResponse.getHits().getAt(j).score(), CoreMatchers.equalTo(scores[j]));
}
}
}
}
@Test @Ignore
public void distribution() throws Exception {
int count = 10000;
prepareCreate("test").execute().actionGet();
ensureGreen();
for (int i = 0; i < count; i++) {
index("test", "type", "" + i, jsonBuilder().startObject().endObject());
}
flush();
int[] matrix = new int[count];
for (int i = 0; i < count; i++) {
SearchResponse searchResponse = client().prepareSearch()
.setQuery(functionScoreQuery(matchAllQuery()).add(new RandomScoreFunctionBuilder().seed(System.nanoTime())))
.execute().actionGet();
matrix[Integer.valueOf(searchResponse.getHits().getAt(0).id())]++;
}
int filled = 0;
int maxRepeat = 0;
for (int i = 0; i < matrix.length; i++) {
int value = matrix[i];
maxRepeat = Math.max(maxRepeat, value);
if (value > 0) {
filled++;
}
}
System.out.println();
System.out.println("max repeat: " + maxRepeat);
System.out.println("distribution: " + filled/(double)count);
int percentile50 = filled / 2;
int percentile25 = (filled / 4);
int percentile75 = percentile50 + percentile25;
int sum = 0;
for (int i = 0; i < matrix.length; i++) {
if (matrix[i] == 0) {
continue;
}
sum += i * matrix[i];
if (percentile50 == 0) {
System.out.println("median: " + i);
} else if (percentile25 == 0) {
System.out.println("percentile_25: " + i);
} else if (percentile75 == 0) {
System.out.println("percentile_75: " + i);
}
percentile50--;
percentile25--;
percentile75--;
}
System.out.println("mean: " + sum/(double)count);
}
}

View File

@ -1570,7 +1570,6 @@ public class SimpleQueryTests extends AbstractSharedClusterTest {
.execute()
.actionGet();
assertNoFailures(response);
}
}