Added support for random_score function:
* can be used to return matching results in random order Closes #1170
This commit is contained in:
parent
610f262aac
commit
c93eae8545
|
@ -0,0 +1,99 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.lucene.search.function;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class RandomScoreFunction implements ScoreFunction {
|
||||||
|
|
||||||
|
private final PRNG prng;
|
||||||
|
private int docBase;
|
||||||
|
|
||||||
|
public RandomScoreFunction(long seed) {
|
||||||
|
this.prng = new PRNG(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(AtomicReaderContext context) {
|
||||||
|
this.docBase = context.docBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double score(int docId, float subQueryScore) {
|
||||||
|
return prng.random(docBase + docId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double factor(int docId) {
|
||||||
|
return prng.seed;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explainScore(int docId, Explanation subQueryExpl) {
|
||||||
|
Explanation exp = new Explanation();
|
||||||
|
exp.setDescription("random score function (seed: " + prng.originalSeed + ")");
|
||||||
|
exp.addDetail(subQueryExpl);
|
||||||
|
return exp;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explainFactor(int docId) {
|
||||||
|
Explanation exp = new Explanation();
|
||||||
|
exp.setDescription("seed: " + prng.originalSeed + ")");
|
||||||
|
return exp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Algorithm based on {@link java.util.Random} except this one is not
|
||||||
|
* thread safe
|
||||||
|
*/
|
||||||
|
static class PRNG {
|
||||||
|
|
||||||
|
private static final long multiplier = 0x5DEECE66DL;
|
||||||
|
private static final long addend = 0xBL;
|
||||||
|
private static final long mask = (1L << 48) - 1;
|
||||||
|
|
||||||
|
final long originalSeed;
|
||||||
|
long seed;
|
||||||
|
|
||||||
|
PRNG(long seed) {
|
||||||
|
this.originalSeed = seed;
|
||||||
|
this.seed = (seed ^ multiplier) & mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float random(int doc) {
|
||||||
|
if (doc == 0) {
|
||||||
|
doc = -17;
|
||||||
|
}
|
||||||
|
return nextFloat() * (doc ^ 0xCAFEBAB);
|
||||||
|
}
|
||||||
|
|
||||||
|
public float nextFloat() {
|
||||||
|
seed = (seed * multiplier + addend) & mask;
|
||||||
|
int r = (int)(seed >>> 24);
|
||||||
|
return r / ((float)(1 << 24));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -545,7 +545,6 @@ public abstract class QueryBuilders {
|
||||||
* A query that allows to define a custom scoring function.
|
* A query that allows to define a custom scoring function.
|
||||||
*
|
*
|
||||||
* @param queryBuilder The query to custom score
|
* @param queryBuilder The query to custom score
|
||||||
* @param scoreFunctionBuilder The score function used to re-score the query
|
|
||||||
*/
|
*/
|
||||||
public static FunctionScoreQueryBuilder functionScoreQuery(QueryBuilder queryBuilder) {
|
public static FunctionScoreQueryBuilder functionScoreQuery(QueryBuilder queryBuilder) {
|
||||||
return new FunctionScoreQueryBuilder(queryBuilder);
|
return new FunctionScoreQueryBuilder(queryBuilder);
|
||||||
|
@ -555,7 +554,6 @@ public abstract class QueryBuilders {
|
||||||
* A query that allows to define a custom scoring function.
|
* A query that allows to define a custom scoring function.
|
||||||
*
|
*
|
||||||
* @param filterBuilder The query to custom score
|
* @param filterBuilder The query to custom score
|
||||||
* @param scoreFunctionBuilder The score function used to re-score the query
|
|
||||||
*/
|
*/
|
||||||
public static FunctionScoreQueryBuilder functionScoreQuery(FilterBuilder filterBuilder) {
|
public static FunctionScoreQueryBuilder functionScoreQuery(FilterBuilder filterBuilder) {
|
||||||
return new FunctionScoreQueryBuilder(filterBuilder);
|
return new FunctionScoreQueryBuilder(filterBuilder);
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.elasticsearch.index.query.functionscore.exp.ExponentialDecayFunctionP
|
||||||
import org.elasticsearch.index.query.functionscore.factor.FactorParser;
|
import org.elasticsearch.index.query.functionscore.factor.FactorParser;
|
||||||
import org.elasticsearch.index.query.functionscore.gauss.GaussDecayFunctionParser;
|
import org.elasticsearch.index.query.functionscore.gauss.GaussDecayFunctionParser;
|
||||||
import org.elasticsearch.index.query.functionscore.lin.LinearDecayFunctionParser;
|
import org.elasticsearch.index.query.functionscore.lin.LinearDecayFunctionParser;
|
||||||
|
import org.elasticsearch.index.query.functionscore.random.RandomScoreFunctionParser;
|
||||||
import org.elasticsearch.index.query.functionscore.script.ScriptScoreFunctionParser;
|
import org.elasticsearch.index.query.functionscore.script.ScriptScoreFunctionParser;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -42,6 +43,7 @@ public class FunctionScoreModule extends AbstractModule {
|
||||||
registerParser(GaussDecayFunctionParser.class);
|
registerParser(GaussDecayFunctionParser.class);
|
||||||
registerParser(LinearDecayFunctionParser.class);
|
registerParser(LinearDecayFunctionParser.class);
|
||||||
registerParser(ExponentialDecayFunctionParser.class);
|
registerParser(ExponentialDecayFunctionParser.class);
|
||||||
|
registerParser(RandomScoreFunctionParser.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void registerParser(Class<? extends ScoreFunctionParser> parser) {
|
public void registerParser(Class<? extends ScoreFunctionParser> parser) {
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.query.functionscore.random;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A function that computes a random score for the matched documents
|
||||||
|
*/
|
||||||
|
public class RandomScoreFunctionBuilder implements ScoreFunctionBuilder {
|
||||||
|
|
||||||
|
private Long seed = null;
|
||||||
|
|
||||||
|
public RandomScoreFunctionBuilder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() {
|
||||||
|
return RandomScoreFunctionParser.NAMES[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the seed based on which the random number will be generated. Using the same seed is guaranteed to generate the same
|
||||||
|
* random number for a specific doc.
|
||||||
|
*
|
||||||
|
* @param seed The seed.
|
||||||
|
*/
|
||||||
|
public RandomScoreFunctionBuilder seed(long seed) {
|
||||||
|
this.seed = seed;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.startObject(getName());
|
||||||
|
if (seed != null) {
|
||||||
|
builder.field("seed", seed.longValue());
|
||||||
|
}
|
||||||
|
return builder.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
package org.elasticsearch.index.query.functionscore.random;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
import org.elasticsearch.common.lucene.search.function.RandomScoreFunction;
|
||||||
|
import org.elasticsearch.common.lucene.search.function.ScoreFunction;
|
||||||
|
import org.elasticsearch.common.lucene.search.function.ScriptScoreFunction;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.index.query.QueryParseContext;
|
||||||
|
import org.elasticsearch.index.query.QueryParsingException;
|
||||||
|
import org.elasticsearch.index.query.functionscore.ScoreFunctionParser;
|
||||||
|
import org.elasticsearch.script.SearchScript;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class RandomScoreFunctionParser implements ScoreFunctionParser {
|
||||||
|
|
||||||
|
public static String[] NAMES = { "random_score", "randomScore" };
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public RandomScoreFunctionParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String[] getNames() {
|
||||||
|
return NAMES;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScoreFunction parse(QueryParseContext parseContext, XContentParser parser) throws IOException, QueryParsingException {
|
||||||
|
|
||||||
|
long seed = -1;
|
||||||
|
|
||||||
|
String currentFieldName = null;
|
||||||
|
XContentParser.Token token;
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||||
|
if (token == XContentParser.Token.FIELD_NAME) {
|
||||||
|
currentFieldName = parser.currentName();
|
||||||
|
} else if (token.isValue()) {
|
||||||
|
if ("seed".equals(currentFieldName)) {
|
||||||
|
seed = parser.longValue();
|
||||||
|
} else {
|
||||||
|
throw new QueryParsingException(parseContext.index(), NAMES[0] + " query does not support [" + currentFieldName + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seed == -1) {
|
||||||
|
seed = parseContext.nowInMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
return new RandomScoreFunction(seed);
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,7 +28,7 @@ import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A query that uses a script to compute or influence the score of documents
|
* A function that uses a script to compute or influence the score of documents
|
||||||
* that match with the inner query or filter.
|
* that match with the inner query or filter.
|
||||||
*/
|
*/
|
||||||
public class ScriptScoreFunctionBuilder implements ScoreFunctionBuilder {
|
public class ScriptScoreFunctionBuilder implements ScoreFunctionBuilder {
|
||||||
|
|
|
@ -0,0 +1,159 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.test.integration.search.functionscore;
|
||||||
|
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.query.functionscore.random.RandomScoreFunctionBuilder;
|
||||||
|
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||||
|
import org.hamcrest.CoreMatchers;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
|
||||||
|
|
||||||
|
public class RandomScoreFunctionTests extends AbstractSharedClusterTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Settings getSettings() {
|
||||||
|
return randomSettingsBuilder()
|
||||||
|
.put("index.number_of_shards", 5)
|
||||||
|
.put("index.number_of_replicas", 2)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int numberOfNodes() {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void consistentHitsWithSameSeed() throws Exception {
|
||||||
|
prepareCreate("test").execute().actionGet();
|
||||||
|
ensureGreen();
|
||||||
|
|
||||||
|
int docCount = atLeast(100);
|
||||||
|
|
||||||
|
for (int i = 0; i < docCount; i++) {
|
||||||
|
index("test", "type", "" + docCount, jsonBuilder().startObject().endObject());
|
||||||
|
}
|
||||||
|
|
||||||
|
flush();
|
||||||
|
|
||||||
|
long seed = System.nanoTime();
|
||||||
|
String preference = _TestUtil.randomRealisticUnicodeString(getRandom());
|
||||||
|
|
||||||
|
float[] scores = null;
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client().prepareSearch()
|
||||||
|
.setPreference(preference)
|
||||||
|
.setQuery(functionScoreQuery(matchAllQuery()).add(new RandomScoreFunctionBuilder().seed(seed)))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertThat("Failures " + Arrays.toString(searchResponse.getShardFailures()), searchResponse.getShardFailures().length, CoreMatchers.equalTo(0));
|
||||||
|
|
||||||
|
int hitCount = searchResponse.getHits().getHits().length;
|
||||||
|
|
||||||
|
if (scores == null) {
|
||||||
|
|
||||||
|
scores = new float[hitCount];
|
||||||
|
for (int j = 0; j < hitCount; j++) {
|
||||||
|
scores[j] = searchResponse.getHits().getAt(j).score();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int j = 0; j < hitCount; j++) {
|
||||||
|
assertThat(searchResponse.getHits().getAt(j).score(), CoreMatchers.equalTo(scores[j]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test @Ignore
|
||||||
|
public void distribution() throws Exception {
|
||||||
|
int count = 10000;
|
||||||
|
|
||||||
|
prepareCreate("test").execute().actionGet();
|
||||||
|
ensureGreen();
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
index("test", "type", "" + i, jsonBuilder().startObject().endObject());
|
||||||
|
}
|
||||||
|
|
||||||
|
flush();
|
||||||
|
|
||||||
|
int[] matrix = new int[count];
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client().prepareSearch()
|
||||||
|
.setQuery(functionScoreQuery(matchAllQuery()).add(new RandomScoreFunctionBuilder().seed(System.nanoTime())))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
matrix[Integer.valueOf(searchResponse.getHits().getAt(0).id())]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int filled = 0;
|
||||||
|
int maxRepeat = 0;
|
||||||
|
for (int i = 0; i < matrix.length; i++) {
|
||||||
|
int value = matrix[i];
|
||||||
|
maxRepeat = Math.max(maxRepeat, value);
|
||||||
|
if (value > 0) {
|
||||||
|
filled++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
System.out.println("max repeat: " + maxRepeat);
|
||||||
|
System.out.println("distribution: " + filled/(double)count);
|
||||||
|
|
||||||
|
int percentile50 = filled / 2;
|
||||||
|
int percentile25 = (filled / 4);
|
||||||
|
int percentile75 = percentile50 + percentile25;
|
||||||
|
|
||||||
|
int sum = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < matrix.length; i++) {
|
||||||
|
if (matrix[i] == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
sum += i * matrix[i];
|
||||||
|
if (percentile50 == 0) {
|
||||||
|
System.out.println("median: " + i);
|
||||||
|
} else if (percentile25 == 0) {
|
||||||
|
System.out.println("percentile_25: " + i);
|
||||||
|
} else if (percentile75 == 0) {
|
||||||
|
System.out.println("percentile_75: " + i);
|
||||||
|
}
|
||||||
|
percentile50--;
|
||||||
|
percentile25--;
|
||||||
|
percentile75--;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("mean: " + sum/(double)count);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1570,7 +1570,6 @@ public class SimpleQueryTests extends AbstractSharedClusterTest {
|
||||||
.execute()
|
.execute()
|
||||||
.actionGet();
|
.actionGet();
|
||||||
assertNoFailures(response);
|
assertNoFailures(response);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue