Initial commit for Module to compute metrics on queries

This is an initial squashed commit of the work on a new feature for query metrics
proposed in #18798.
This commit is contained in:
Isabel Drost-Fromm 2016-06-30 16:15:08 +02:00 committed by Christoph Büscher
parent 5903966dc8
commit 6d4673fd58
22 changed files with 1540 additions and 0 deletions

View File

@ -32,6 +32,7 @@ import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.transport.TransportService;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

View File

@ -0,0 +1,30 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
esplugin {
description 'The Rank Eval module adds APIs to evaluate ranking quality.'
classname 'org.elasticsearch.index.rankeval.RankEvalPlugin'
}
integTest {
cluster {
setting 'script.inline', 'true'
setting 'script.stored', 'true'
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import java.util.Collection;
/** Returned for each search intent and search specification combination. Summarises the document ids found that were not
* annotated and the average precision of result sets in each particular combination based on the annotations given.
* */
public class EvalQueryQuality {
private double qualityLevel;
private Collection<String> unknownDocs;
public EvalQueryQuality (double qualityLevel, Collection<String> unknownDocs) {
this.qualityLevel = qualityLevel;
this.unknownDocs = unknownDocs;
}
public Collection<String> getUnknownDocs() {
return unknownDocs;
}
public double getQualityLevel() {
return qualityLevel;
}
}

View File

@ -0,0 +1,28 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.NamedWriteable;
import org.elasticsearch.search.SearchHit;
public interface Evaluator extends NamedWriteable {
public Object evaluate(SearchHit[] hits, RatedQuery intent);
}

View File

@ -0,0 +1,144 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.SearchHit;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.Map.Entry;
import javax.naming.directory.SearchResult;
/**
* Evaluate Precision at N, N being the number of search results to consider for precision calculation.
*
* Documents of unkonwn quality are ignored in the precision at n computation and returned by document id.
* */
public class PrecisionAtN implements RankedListQualityMetric {
/** Number of results to check against a given set of relevant results. */
private int n;
public static final String NAME = "precisionatn";
public PrecisionAtN(StreamInput in) throws IOException {
n = in.readInt();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeInt(n);
}
@Override
public String getWriteableName() {
return NAME;
}
/**
* Initialises n with 10
* */
public PrecisionAtN() {
this.n = 10;
}
/**
* @param n number of top results to check against a given set of relevant results.
* */
public PrecisionAtN(int n) {
this.n= n;
}
/**
* Return number of search results to check for quality.
* */
public int getN() {
return n;
}
/** Compute precisionAtN based on provided relevant document IDs.
* @return precision at n for above {@link SearchResult} list.
**/
@Override
public EvalQueryQuality evaluate(SearchHit[] hits, RatedQuery intent) {
Map<String, Integer> ratedDocIds = intent.getRatedDocuments();
Collection<String> relevantDocIds = new ArrayList<>();
for (Entry<String, Integer> entry : ratedDocIds.entrySet()) {
if (Rating.RELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) {
relevantDocIds.add(entry.getKey());
}
}
Collection<String> irrelevantDocIds = new ArrayList<>();
for (Entry<String, Integer> entry : ratedDocIds.entrySet()) {
if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) {
irrelevantDocIds.add(entry.getKey());
}
}
int good = 0;
int bad = 0;
Collection<String> unknownDocIds = new ArrayList<String>();
for (int i = 0; (i < n && i < hits.length); i++) {
String id = hits[i].getId();
if (relevantDocIds.contains(id)) {
good++;
} else if (irrelevantDocIds.contains(id)) {
bad++;
} else {
unknownDocIds.add(id);
}
}
double precision = (double) good / (good + bad);
return new EvalQueryQuality(precision, unknownDocIds);
}
public enum Rating {
RELEVANT, IRRELEVANT;
}
/**
* Needed to get the enum accross serialisation boundaries.
* */
public static class RatingMapping {
public static Integer mapFrom(Rating rating) {
if (Rating.RELEVANT.equals(rating)) {
return 0;
}
return 1;
}
public static Rating mapTo(Integer rating) {
if (rating == 0) {
return Rating.RELEVANT;
}
return Rating.IRRELEVANT;
}
}
}

View File

@ -0,0 +1,114 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Defines a QA specification: All end user supplied query intents will be mapped to the search request specified in this search request
* template and executed against the targetIndex given. Any filters that should be applied in the target system can be specified as well.
*
* The resulting document lists can then be compared against what was specified in the set of rated documents as part of a QAQuery.
* */
public class QuerySpec implements Writeable {
private int specId = 0;
private SearchSourceBuilder testRequest;
private List<String> indices = new ArrayList<>();
private List<String> types = new ArrayList<>();
public QuerySpec(
int specId, SearchSourceBuilder testRequest, List<String> indices, List<String> types) {
this.specId = specId;
this.testRequest = testRequest;
this.indices = indices;
this.types = types;
}
public QuerySpec(StreamInput in) throws IOException {
this.specId = in.readInt();
testRequest = new SearchSourceBuilder(in);
int indicesSize = in.readInt();
indices = new ArrayList<String>(indicesSize);
for (int i = 0; i < indicesSize; i++) {
this.indices.add(in.readString());
}
int typesSize = in.readInt();
types = new ArrayList<String>(typesSize);
for (int i = 0; i < typesSize; i++) {
this.types.add(in.readString());
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeInt(specId);
testRequest.writeTo(out);
out.writeInt(indices.size());
for (String index : indices) {
out.writeString(index);
}
out.writeInt(types.size());
for (String type : types) {
out.writeString(type);
}
}
public SearchSourceBuilder getTestRequest() {
return testRequest;
}
public void setTestRequest(SearchSourceBuilder testRequest) {
this.testRequest = testRequest;
}
public List<String> getIndices() {
return indices;
}
public void setIndices(List<String> indices) {
this.indices = indices;
}
public List<String> getTypes() {
return types;
}
public void setTypes(List<String> types) {
this.types = types;
}
/** Returns a user supplied spec id for easier referencing. */
public int getSpecId() {
return specId;
}
/** Sets a user supplied spec id for easier referencing. */
public void setSpecId(int specId) {
this.specId = specId;
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.Action;
import org.elasticsearch.client.ElasticsearchClient;
/**
* Action used to start precision at qa evaluations.
**/
public class RankEvalAction extends Action<RankEvalRequest, RankEvalResponse, RankEvalRequestBuilder> {
public static final RankEvalAction INSTANCE = new RankEvalAction();
public static final String NAME = "indices:data/read/quality";
private RankEvalAction() {
super(NAME);
}
@Override
public RankEvalRequestBuilder newRequestBuilder(ElasticsearchClient client) {
return new RankEvalRequestBuilder(client, this, new RankEvalRequest());
}
@Override
public RankEvalResponse newResponse() {
return new RankEvalResponse();
}
}

View File

@ -0,0 +1,46 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.ActionModule;
import org.elasticsearch.common.network.NetworkModule;
import org.elasticsearch.plugins.Plugin;
public class RankEvalPlugin extends Plugin {
public static final String NAME = "rank-eval";
@Override
public String name() {
return NAME;
}
@Override
public String description() {
return "The rank-eval module adds APIs to evaluate rankings.";
}
public void onModule(ActionModule actionModule) {
actionModule.registerAction(RankEvalAction.INSTANCE, TransportRankEvalAction.class);
}
public void onModule(NetworkModule networkModule) {
networkModule.registerRestHandler(RestRankEvalAction.class);
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import java.io.IOException;
/**
* Instances of this class represent a complete precision at request. They encode a precision task including search intents and search
* specifications to be executed subsequently.
* */
public class RankEvalRequest extends ActionRequest<RankEvalRequest> {
/** The request data to use for evaluation. */
private RankEvalSpec task;
@Override
public ActionRequestValidationException validate() {
return null; // TODO
}
/** Returns the specification of this qa run including intents to execute, specifications detailing intent translation and metrics
* to compute. */
public RankEvalSpec getRankEvalSpec() {
return task;
}
/** Returns the specification of this qa run including intents to execute, specifications detailing intent translation and metrics
* to compute. */
public void setRankEvalSpec(RankEvalSpec task) {
this.task = task;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
task = new RankEvalSpec(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
task.writeTo(out);
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.Action;
import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.client.ElasticsearchClient;
public class RankEvalRequestBuilder extends ActionRequestBuilder<RankEvalRequest, RankEvalResponse, RankEvalRequestBuilder> {
public RankEvalRequestBuilder(ElasticsearchClient client, Action<RankEvalRequest, RankEvalResponse, RankEvalRequestBuilder> action,
RankEvalRequest request) {
super(client, action, request);
}
public RankEvalRequest request() {
return request;
}
public void setRankEvalSpec(RankEvalSpec spec) {
this.request.setRankEvalSpec(spec);
}
public RankEvalSpec getRankEvalSpec() {
return this.request.getRankEvalSpec();
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.ActionResponse;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
/**
* For each qa specification identified by its id this response returns the respective
* averaged precisionAnN value.
*
* In addition for each query the document ids that haven't been found annotated is returned as well.
*
* Documents of unknown quality - i.e. those that haven't been supplied in the set of annotated documents but have been returned
* by the search are not taken into consideration when computing precision at n - they are ignored.
*
**/
public class RankEvalResponse extends ActionResponse {
private Collection<RankEvalResult> qualityResults = new ArrayList<>();
public RankEvalResponse() {
}
public RankEvalResponse(StreamInput in) throws IOException {
int size = in.readInt();
qualityResults = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
qualityResults.add(new RankEvalResult(in));
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeInt(qualityResults.size());
for (RankEvalResult result : qualityResults) {
result.writeTo(out);
}
}
public void addRankEvalResult(int specId, double quality, Map<Integer, Collection<String>> unknownDocs) {
RankEvalResult result = new RankEvalResult(specId, quality, unknownDocs);
this.qualityResults.add(result);
}
public Collection<RankEvalResult> getRankEvalResults() {
return qualityResults;
}
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
/**
* For each precision at n computation the id of the search request specification used to generate search requests is returned
* for reference. In addition the averaged precision and the ids of all documents returned but not found annotated is returned.
* */
public class RankEvalResult implements Writeable {
/**ID of specification this result was generated for.*/
private int specId;
/**Average precision observed when issueing query intents with this spec.*/
private double qualityLevel;
/**Mapping from intent id to all documents seen for this intent that were not annotated.*/
private Map<Integer, Collection<String>> unknownDocs;
@SuppressWarnings("unchecked")
public RankEvalResult(StreamInput in) throws IOException {
this.specId = in.readInt();
this.qualityLevel = in.readDouble();
this.unknownDocs = (Map<Integer, Collection<String>>) in.readGenericValue();
}
public RankEvalResult(int specId, double quality, Map<Integer, Collection<String>> unknownDocs) {
this.specId = specId;
this.qualityLevel = quality;
this.unknownDocs = unknownDocs;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeInt(specId);
out.writeDouble(qualityLevel);
out.writeGenericValue(getUnknownDocs());
}
public int getSpecId() {
return specId;
}
public double getQualityLevel() {
return qualityLevel;
}
public Map<Integer, Collection<String>> getUnknownDocs() {
return unknownDocs;
}
}

View File

@ -0,0 +1,109 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
/**
* This class defines a qa task including query intent and query spec.
*
* Each QA run is based on a set of queries to send to the index and multiple QA specifications that define how to translate the query
* intents into elastic search queries. In addition it contains the quality metrics to compute.
* */
public class RankEvalSpec implements Writeable {
/** Collection of query intents to check against including expected document ids.*/
private Collection<RatedQuery> intents = new ArrayList<>();
/** Collection of query specifications, that is e.g. search request templates to use for query translation. */
private Collection<QuerySpec> specifications = new ArrayList<>();
/** Definition of n in precision at n */
private RankedListQualityMetric eval;
public RankEvalSpec(Collection<RatedQuery> intents, Collection<QuerySpec> specs, RankedListQualityMetric metric) {
this.intents = intents;
this.specifications = specs;
this.eval = metric;
}
public RankEvalSpec(StreamInput in) throws IOException {
int intentSize = in.readInt();
intents = new ArrayList<>(intentSize);
for (int i = 0; i < intentSize; i++) {
intents.add(new RatedQuery(in));
}
int specSize = in.readInt();
specifications = new ArrayList<>(specSize);
for (int i = 0; i < specSize; i++) {
specifications.add(new QuerySpec(in));
}
eval = in.readNamedWriteable(RankedListQualityMetric.class); // TODO add to registry
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeInt(intents.size());
for (RatedQuery query : intents) {
query.writeTo(out);
}
out.writeInt(specifications.size());
for (QuerySpec spec : specifications) {
spec.writeTo(out);
}
out.writeNamedWriteable(eval);
}
/** Returns the precision at n configuration (containing level of n to consider).*/
public RankedListQualityMetric getEvaluator() {
return eval;
}
/** Sets the precision at n configuration (containing level of n to consider).*/
public void setEvaluator(RankedListQualityMetric config) {
this.eval = config;
}
/** Returns a list of search intents to evaluate. */
public Collection<RatedQuery> getIntents() {
return intents;
}
/** Set a list of search intents to evaluate. */
public void setIntents(Collection<RatedQuery> intents) {
this.intents = intents;
}
/** Returns a list of intent to query translation specifications to evaluate. */
public Collection<QuerySpec> getSpecifications() {
return specifications;
}
/** Set the list of intent to query translation specifications to evaluate. */
public void setSpecifications(Collection<QuerySpec> specifications) {
this.specifications = specifications;
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.search.SearchHit;
/**
* Classes implementing this interface provide a means to compute the quality of a result list
* returned by some search.
*
* RelevancyLevel specifies the type of object determining the relevancy level of some known docid.
* */
public interface RankedListQualityMetric extends Evaluator {
/**
* Returns a single metric representing the ranking quality of a set of returned documents
* wrt. to a set of document Ids labeled as relevant for this search.
*
* @param hits the result hits as returned by some search
* @return some metric representing the quality of the result hit list wrt. to relevant doc ids.
* */
public EvalQueryQuality evaluate(SearchHit[] hits, RatedQuery intent);
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
/**
* Objects of this class represent one type of user query to qa. Each query comprises a user supplied id for easer referencing,
* a set of parameters as supplied by the end user to the search application as well as a set of rated documents (ratings e.g.
* supplied by manual result tagging or some form of automated click log based process).
* */
public class RatedQuery implements Writeable {
private final int intentId;
private final Map<String, Object> intentParameters;
private final Map<String, Integer> ratedDocuments;
public RatedQuery(
int intentId, Map<String, Object> intentParameters, Map<String, Integer> ratedDocuments) {
this.intentId = intentId;
this.intentParameters = intentParameters;
this.ratedDocuments = ratedDocuments;
}
public RatedQuery(StreamInput in) throws IOException {
this.intentId = in.readInt();
this.intentParameters = in.readMap();
int ratedDocsSize = in.readInt();
this.ratedDocuments = new HashMap<>(ratedDocsSize);
for (int i = 0; i < ratedDocsSize; i++) {
this.ratedDocuments.put(in.readString(), in.readInt());
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeInt(intentId);
out.writeMap(intentParameters);
out.writeInt(ratedDocuments.size());
for(Entry<String, Integer> entry : ratedDocuments.entrySet()) {
out.writeString(entry.getKey());
out.writeInt(entry.getValue());
}
}
/** For easier referencing users are allowed to supply unique ids with each search intent they want to check for
* performance quality wise.*/
public int getIntentId() {
return intentId;
}
/**
* Returns a mapping from query parameter name to real parameter - ideally as parsed from real user logs.
* */
public Map<String, Object> getIntentParameters() {
return intentParameters;
}
/**
* Returns a set of documents and their ratings as supplied by the users.
* */
public Map<String, Integer> getRatedDocuments() {
return ratedDocuments;
}
}

View File

@ -0,0 +1,187 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.indices.query.IndicesQueriesRegistry;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestChannel;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.support.RestActions;
import org.elasticsearch.search.aggregations.AggregatorParsers;
import org.elasticsearch.search.suggest.Suggesters;
import java.io.IOException;
import static org.elasticsearch.rest.RestRequest.Method.GET;
import static org.elasticsearch.rest.RestRequest.Method.POST;
/**
* Accepted input format:
*
* General Format:
*
*
{ "requests": [{
"id": "human_readable_id",
"request": { ... request to check ... },
"ratings": { ... mapping from doc id to rating value ... }
}],
"metric": {
"... metric_name... ": {
"... metric_parameter_key ...": ...metric_parameter_value...
}}}
*
* Example:
*
*
{"requests": [{
"id": "amsterdam_query",
"request": {
"query": {
"bool": {
"must": [
{"match": {"beverage": "coffee"}},
{"term": {"browser": {"value": "safari"}}},
{"term": {"time_of_day": {"value": "morning","boost": 2}}},
{"term": {"ip_location": {"value": "ams","boost": 10}}}]}
},
"size": 10
}
},
"ratings": {
"1": 1,
"2": 0,
"3": 1,
"4": 1
}
}, {
"id": "berlin_query",
"request": {
"query": {
"bool": {
"must": [
{"match": {"beverage": "club mate"}},
{"term": {"browser": {"value": "chromium"}}},
{"term": {"time_of_day": {"value": "evening","boost": 2}}},
{"term": {"ip_location": {"value": "ber","boost": 10}}}]}
},
"size": 10
}
},
"ratings": {
"1": 0,
"5": 1,
"6": 1
}
}],
"metric": {
"precisionAtN": {
"size": 10}}
}
*
* Output format:
*
* General format:
*
*
{
"took": 59,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"quality_level": ... quality level ...,
"unknown_docs": [{"user_request_id": [... list of unknown docs ...]}]
}
*
* Example:
*
*
*
{
"took": 59,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"rank_eval": [{
"spec_id": "huge_weight_on_city",
"quality_level": 0.4,
"unknown_docs": [{
"amsterdam_query": [5, 10, 23]
}, {
"berlin_query": [42]
}]
}]
}
* */
public class RestRankEvalAction extends BaseRestHandler {
@Inject
public RestRankEvalAction(Settings settings, RestController controller, Client client, IndicesQueriesRegistry queryRegistry,
AggregatorParsers aggParsers, Suggesters suggesters) {
super(settings, client);
controller.registerHandler(GET, "/_rank_eval", this);
controller.registerHandler(POST, "/_rank_eval", this);
controller.registerHandler(GET, "/{index}/_rank_eval", this);
controller.registerHandler(POST, "/{index}/_rank_eval", this);
controller.registerHandler(GET, "/{index}/{type}/_rank_eval", this);
controller.registerHandler(POST, "/{index}/{type}/_rank_eval", this);
}
@Override
public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) throws IOException {
RankEvalRequest rankEvalRequest = new RankEvalRequest();
//parseRankEvalRequest(rankEvalRequest, request, parseFieldMatcher);
//client.rankEval(rankEvalRequest, new RestStatusToXContentListener<>(channel));
}
public static void parseRankEvalRequest(RankEvalRequest rankEvalRequest, RestRequest request, ParseFieldMatcher parseFieldMatcher)
throws IOException {
String[] indices = Strings.splitStringByCommaToArray(request.param("index"));
BytesReference restContent = null;
if (restContent == null) {
if (RestActions.hasBodyContent(request)) {
restContent = RestActions.getRestContent(request);
}
}
if (restContent != null) {
}
}
}

View File

@ -0,0 +1,120 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.TransportSearchAction;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.AutoCreateIndex;
import org.elasticsearch.action.support.HandledTransportAction;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.action.SearchTransportService;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.controller.SearchPhaseController;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
/**
* Instances of this class execute a collection of search intents (read: user supplied query parameters) against a set of
* possible search requests (read: search specifications, expressed as query/search request templates) and compares the result
* against a set of annotated documents per search intent.
*
* If any documents are returned that haven't been annotated the document id of those is returned per search intent.
*
* The resulting search quality is computed in terms of precision at n and returned for each search specification for the full
* set of search intents as averaged precision at n.
* */
public class TransportRankEvalAction extends HandledTransportAction<RankEvalRequest, RankEvalResponse> {
private SearchPhaseController searchPhaseController;
private TransportService transportService;
private SearchTransportService searchTransportService;
private ClusterService clusterService;
private ActionFilters actionFilters;
@Inject
public TransportRankEvalAction(Settings settings, ThreadPool threadPool, ActionFilters actionFilters,
IndexNameExpressionResolver indexNameExpressionResolver, ClusterService clusterService, ScriptService scriptService,
AutoCreateIndex autoCreateIndex, Client client, TransportService transportService, SearchPhaseController searchPhaseController,
SearchTransportService searchTransportService, NamedWriteableRegistry namedWriteableRegistry) {
super(settings, RankEvalAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver,
RankEvalRequest::new);
this.searchPhaseController = searchPhaseController;
this.transportService = transportService;
this.searchTransportService = searchTransportService;
this.clusterService = clusterService;
this.actionFilters = actionFilters;
namedWriteableRegistry.register(RankedListQualityMetric.class, PrecisionAtN.NAME, PrecisionAtN::new);
}
@Override
protected void doExecute(RankEvalRequest request, ActionListener<RankEvalResponse> listener) {
RankEvalResponse response = new RankEvalResponse();
RankEvalSpec qualityTask = request.getRankEvalSpec();
RankedListQualityMetric metric = qualityTask.getEvaluator();
for (QuerySpec spec : qualityTask.getSpecifications()) {
double qualitySum = 0;
SearchSourceBuilder specRequest = spec.getTestRequest();
String[] indices = new String[spec.getIndices().size()];
spec.getIndices().toArray(indices);
SearchRequest templatedRequest = new SearchRequest(indices, specRequest);
Map<Integer, Collection<String>> unknownDocs = new HashMap<Integer, Collection<String>>();
Collection<RatedQuery> intents = qualityTask.getIntents();
for (RatedQuery intent : intents) {
TransportSearchAction transportSearchAction = new TransportSearchAction(
settings,
threadPool,
searchPhaseController,
transportService,
searchTransportService,
clusterService,
actionFilters,
indexNameExpressionResolver);
ActionFuture<SearchResponse> searchResponse = transportSearchAction.execute(templatedRequest);
SearchHits hits = searchResponse.actionGet().getHits();
EvalQueryQuality intentQuality = metric.evaluate(hits.getHits(), intent);
qualitySum += intentQuality.getQualityLevel();
unknownDocs.put(intent.getIntentId(), intentQuality.getUnknownDocs());
}
response.addRankEvalResult(spec.getSpecId(), qualitySum / intents.size(), unknownDocs);
}
listener.onResponse(response);
}
}

View File

@ -0,0 +1,170 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.quality;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.rankeval.PrecisionAtN;
import org.elasticsearch.index.rankeval.RankEvalPlugin;
import org.elasticsearch.index.rankeval.RatedQuery;
import org.elasticsearch.index.rankeval.PrecisionAtN.Rating;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0)
// NORELEASE need to fix transport client use case
public class PrecisionAtRequestTests extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> transportClientPlugins() {
return pluginList(RankEvalPlugin.class);
}
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return pluginList(RankEvalPlugin.class);
}
@Before
public void setup() {
createIndex("test");
ensureGreen();
client().prepareIndex("test", "testtype").setId("1")
.setSource("text", "berlin").get();
client().prepareIndex("test", "testtype").setId("2")
.setSource("text", "amsterdam").get();
client().prepareIndex("test", "testtype").setId("3")
.setSource("text", "amsterdam").get();
client().prepareIndex("test", "testtype").setId("4")
.setSource("text", "amsterdam").get();
client().prepareIndex("test", "testtype").setId("5")
.setSource("text", "amsterdam").get();
client().prepareIndex("test", "testtype").setId("6")
.setSource("text", "amsterdam").get();
refresh();
}
public void testPrecisionAtFiveCalculation() throws IOException, InterruptedException, ExecutionException {
// TODO turn into unit test - no need to execute the query here to fill hits object
MatchQueryBuilder query = new MatchQueryBuilder("text", "berlin");
SearchResponse response = client().prepareSearch().setQuery(query)
.execute().actionGet();
Map<String, Integer> relevant = new HashMap<>();
relevant.put("1", Rating.RELEVANT.ordinal());
RatedQuery intent = new RatedQuery(0, new HashMap<>(), relevant);
SearchHit[] hits = response.getHits().getHits();
assertEquals(1, (new PrecisionAtN(5)).evaluate(hits, intent).getQualityLevel(), 0.00001);
}
public void testPrecisionAtFiveIgnoreOneResult() throws IOException, InterruptedException, ExecutionException {
// TODO turn into unit test - no need to actually execute the query here to fill the hits object
MatchQueryBuilder query = new MatchQueryBuilder("text", "amsterdam");
SearchResponse response = client().prepareSearch().setQuery(query)
.execute().actionGet();
Map<String, Integer> relevant = new HashMap<>();
relevant.put("2", Rating.RELEVANT.ordinal());
relevant.put("3", Rating.RELEVANT.ordinal());
relevant.put("4", Rating.RELEVANT.ordinal());
relevant.put("5", Rating.RELEVANT.ordinal());
relevant.put("6", Rating.IRRELEVANT.ordinal());
RatedQuery intent = new RatedQuery(0, new HashMap<>(), relevant);
SearchHit[] hits = response.getHits().getHits();
assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, intent).getQualityLevel(), 0.00001);
}
public void testPrecisionJSON() {
}
/* public void testPrecisionAction() {
// TODO turn into REST test?
Collection<RatedQuery> intents = new ArrayList<RatedQuery>();
RatedQuery intentAmsterdam = new RatedQuery(
0,
createParams("var", "amsterdam"),
createRelevant("2", "3", "4", "5"));
intents.add(intentAmsterdam);
RatedQuery intentBerlin = new RatedQuery(
1,
createParams("var", "berlin"),
createRelevant("1"));
intents.add(intentBerlin);
Collection<QuerySpec> specs = new ArrayList<QuerySpec>();
ArrayList<String> indices = new ArrayList<>();
indices.add("test");
ArrayList<String> types = new ArrayList<>();
types.add("testtype");
SearchSourceBuilder source = new SearchSourceBuilder();
QuerySpec spec = new QuerySpec(0, source, indices, types);
specs.add(spec);
RankEvalSpec task = new RankEvalSpec(intents, specs, new PrecisionAtN(10));
RankEvalRequestBuilder builder = new RankEvalRequestBuilder(
client(),
RankEvalAction.INSTANCE,
new RankEvalRequest());
builder.setRankEvalSpec(task);
RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet();
RankEvalResult result = response.getRankEvalResults().iterator().next();
for (Entry<Integer, Collection<String>> entry : result.getUnknownDocs().entrySet()) {
if (entry.getKey() == 0) {
assertEquals(1, entry.getValue().size());
} else {
assertEquals(0, entry.getValue().size());
}
}
}*/
private Map<String, Integer> createRelevant(String... docs) {
Map<String, Integer> relevant = new HashMap<>();
for (String doc : docs) {
relevant.put(doc, Rating.RELEVANT.ordinal());
}
return relevant;
}
private Map<String, Object> createParams(String key, String value) {
Map<String, Object> parameters = new HashMap<>();
parameters.put(key, value);
return parameters;
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.quality;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.test.rest.RestTestCandidate;
import org.elasticsearch.test.rest.parser.RestTestParseException;
import java.io.IOException;
public class RankEvalRestIT extends ESRestTestCase {
public RankEvalRestIT(@Name("yaml") RestTestCandidate testCandidate) {
super(testCandidate);
}
@ParametersFactory
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
return ESRestTestCase.createParameters(0, 1);
}
}

View File

@ -0,0 +1,48 @@
---
"Response format":
- do:
index:
index: foo
type: bar
id: 1
body: { "text": "berlin" }
- do:
index:
index: foo
type: bar
id: 2
body: { "text": "amsterdam" }
- do:
index:
index: foo
type: bar
id: 3
body: { "text": "amsterdam" }
- do:
indices.refresh: {}
- do:
rank_eval:
body:
requests: [
{
id: "amsterdam_query",
request: { query: {match : {text : "amsterdam" }}},
ratings: { "1": 0, "2": 1, "3": 1 }
}, {
id: "berlin_query",
request: { query: { match : { text : "berlin" } }, size : 10 },
ratings: {"1": 1}
}
]
metric: { precisionAtN: { size: 10}}
- match: {quality_level: 1}
- gte: { took: 0 }
- is_false: task
- is_false: deleted

View File

@ -0,0 +1,17 @@
{
"rank_eval": {
"documentation": "https://www.elastic.co/guide/en/elasticsearch/reference/master/docs-rank-eval.html",
"methods": ["POST"],
"url": {
"path": "/_rank_eval",
"paths": ["/_rank_eval"],
"parts": {},
"params": {}
},
"body": {
"description": "The search definition using the Query DSL and the prototype for the eval request.",
"required": true
}
}
}

View File

@ -25,6 +25,7 @@ List projects = [
'modules:lang-mustache',
'modules:lang-painless',
'modules:reindex',
'modules:rank-eval',
'modules:percolator',
'plugins:analysis-icu',
'plugins:analysis-kuromoji',