From b730494bfc904bcb1dadc9d3c1d58992333556d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 4 Jul 2016 12:57:18 +0200 Subject: [PATCH] Adding rest layer parsing and response rendering Adding parsers for the rest request and the various components within, also extending the existing rest test and adding rendering of the response. --- .../index/rankeval/EvalQueryQuality.java | 10 +- .../index/rankeval/Evaluator.java | 28 --- .../index/rankeval/PrecisionAtN.java | 66 ++++--- .../index/rankeval/QuerySpec.java | 96 +++++++++- .../index/rankeval/RankEvalContext.java | 65 +++++++ .../index/rankeval/RankEvalRequest.java | 16 +- .../index/rankeval/RankEvalResponse.java | 63 ++++--- .../index/rankeval/RankEvalResult.java | 30 ++-- .../index/rankeval/RankEvalSpec.java | 47 +++-- .../rankeval/RankedListQualityMetric.java | 35 +++- .../index/rankeval/RatedDocument.java | 86 +++++++++ .../index/rankeval/RatedQuery.java | 92 ---------- .../index/rankeval/RestRankEvalAction.java | 88 ++++++--- .../rankeval/TransportRankEvalAction.java | 56 +++--- .../quality/PrecisionAtRequestTests.java | 170 ------------------ .../action/quality/RankEvalRequestTests.java | 122 +++++++++++++ .../index/rankeval/PrecisionAtNTests.java | 69 +++++++ .../index/rankeval/QuerySpecTests.java | 98 ++++++++++ .../test/rank_eval/10_basic.yaml | 49 ++--- 19 files changed, 810 insertions(+), 476 deletions(-) delete mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalContext.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java delete mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java delete mode 100644 modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java create mode 100644 modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java create mode 100644 modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java create mode 100644 modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java index c5d48c2074a..54edd722126 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java @@ -21,25 +21,25 @@ package org.elasticsearch.index.rankeval; import java.util.Collection; -/** Returned for each search intent and search specification combination. Summarises the document ids found that were not - * annotated and the average precision of result sets in each particular combination based on the annotations given. +/** Returned for each search specification. Summarizes the measured quality metric for this search request + * and adds the document ids found that were in the search result but not annotated in the original request. * */ public class EvalQueryQuality { private double qualityLevel; - + private Collection unknownDocs; public EvalQueryQuality (double qualityLevel, Collection unknownDocs) { this.qualityLevel = qualityLevel; this.unknownDocs = unknownDocs; } - + public Collection getUnknownDocs() { return unknownDocs; } public double getQualityLevel() { - return qualityLevel; + return qualityLevel; } } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java deleted file mode 100644 index dba7403c652..00000000000 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.rankeval; - -import org.elasticsearch.common.io.stream.NamedWriteable; -import org.elasticsearch.search.SearchHit; - -public interface Evaluator extends NamedWriteable { - - Object evaluate(SearchHit[] hits, RatedQuery intent); -} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java index f6216eadede..1d67c45dbb8 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java @@ -19,28 +19,31 @@ package org.elasticsearch.index.rankeval; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcherSupplier; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.SearchHit; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.Map; -import java.util.Map.Entry; +import java.util.List; import javax.naming.directory.SearchResult; /** * Evaluate Precision at N, N being the number of search results to consider for precision calculation. - * + * * Documents of unkonwn quality are ignored in the precision at n computation and returned by document id. * */ -public class PrecisionAtN implements RankedListQualityMetric { - +public class PrecisionAtN extends RankedListQualityMetric { + /** Number of results to check against a given set of relevant results. */ private int n; - + public static final String NAME = "precisionatn"; public PrecisionAtN(StreamInput in) throws IOException { @@ -63,7 +66,7 @@ public class PrecisionAtN implements RankedListQualityMetric { public PrecisionAtN() { this.n = 10; } - + /** * @param n number of top results to check against a given set of relevant results. * */ @@ -78,24 +81,31 @@ public class PrecisionAtN implements RankedListQualityMetric { return n; } + private static final ParseField SIZE_FIELD = new ParseField("size"); + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "precision_at", a -> new PrecisionAtN((Integer) a[0])); + + static { + PARSER.declareInt(ConstructingObjectParser.constructorArg(), SIZE_FIELD); + } + + public static PrecisionAtN fromXContent(XContentParser parser, ParseFieldMatcherSupplier matcher) { + return PARSER.apply(parser, matcher); + } + /** Compute precisionAtN based on provided relevant document IDs. * @return precision at n for above {@link SearchResult} list. **/ @Override - public EvalQueryQuality evaluate(SearchHit[] hits, RatedQuery intent) { - Map ratedDocIds = intent.getRatedDocuments(); - - Collection relevantDocIds = new ArrayList<>(); - for (Entry entry : ratedDocIds.entrySet()) { - if (Rating.RELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) { - relevantDocIds.add(entry.getKey()); - } - } - - Collection irrelevantDocIds = new ArrayList<>(); - for (Entry entry : ratedDocIds.entrySet()) { - if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) { - irrelevantDocIds.add(entry.getKey()); + public EvalQueryQuality evaluate(SearchHit[] hits, List ratedDocs) { + + Collection relevantDocIds = new ArrayList<>(); + Collection irrelevantDocIds = new ArrayList<>(); + for (RatedDocument doc : ratedDocs) { + if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + relevantDocIds.add(doc.getDocID()); + } else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) { + irrelevantDocIds.add(doc.getDocID()); } } @@ -117,24 +127,24 @@ public class PrecisionAtN implements RankedListQualityMetric { return new EvalQueryQuality(precision, unknownDocIds); } - + public enum Rating { - RELEVANT, IRRELEVANT; + IRRELEVANT, RELEVANT; } - + /** * Needed to get the enum accross serialisation boundaries. * */ public static class RatingMapping { public static Integer mapFrom(Rating rating) { if (Rating.RELEVANT.equals(rating)) { - return 0; + return 1; } - return 1; + return 0; } - + public static Rating mapTo(Integer rating) { - if (rating == 0) { + if (rating == 1) { return Rating.RELEVANT; } return Rating.IRRELEVANT; diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java index b94e0e92bd7..2e82fd98939 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java @@ -19,9 +19,13 @@ package org.elasticsearch.index.rankeval; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.builder.SearchSourceBuilder; import java.io.IOException; @@ -32,25 +36,33 @@ import java.util.List; * Defines a QA specification: All end user supplied query intents will be mapped to the search request specified in this search request * template and executed against the targetIndex given. Any filters that should be applied in the target system can be specified as well. * - * The resulting document lists can then be compared against what was specified in the set of rated documents as part of a QAQuery. + * The resulting document lists can then be compared against what was specified in the set of rated documents as part of a QAQuery. * */ public class QuerySpec implements Writeable { - private int specId = 0; + private String specId; private SearchSourceBuilder testRequest; private List indices = new ArrayList<>(); private List types = new ArrayList<>(); - - public QuerySpec( - int specId, SearchSourceBuilder testRequest, List indices, List types) { + /** Collection of rated queries for this query QA specification.*/ + private List ratedDocs = new ArrayList<>(); + + public QuerySpec() { + // ctor that doesn't require all args to be present immediatly is easier to use with ObjectParser + // TODO decide if we can require only id as mandatory, set default values for the rest? + } + + public QuerySpec(String specId, SearchSourceBuilder testRequest, List indices, List types, + List ratedDocs) { this.specId = specId; this.testRequest = testRequest; this.indices = indices; this.types = types; + this.ratedDocs = ratedDocs; } public QuerySpec(StreamInput in) throws IOException { - this.specId = in.readInt(); + this.specId = in.readString(); testRequest = new SearchSourceBuilder(in); int indicesSize = in.readInt(); indices = new ArrayList(indicesSize); @@ -62,11 +74,16 @@ public class QuerySpec implements Writeable { for (int i = 0; i < typesSize; i++) { this.types.add(in.readString()); } + int intentSize = in.readInt(); + ratedDocs = new ArrayList<>(intentSize); + for (int i = 0; i < intentSize; i++) { + ratedDocs.add(new RatedDocument(in)); + } } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeInt(specId); + out.writeString(specId); testRequest.writeTo(out); out.writeInt(indices.size()); for (String index : indices) { @@ -76,6 +93,10 @@ public class QuerySpec implements Writeable { for (String type : types) { out.writeString(type); } + out.writeInt(ratedDocs.size()); + for (RatedDocument ratedDoc : ratedDocs) { + ratedDoc.writeTo(out); + } } public SearchSourceBuilder getTestRequest() { @@ -103,12 +124,69 @@ public class QuerySpec implements Writeable { } /** Returns a user supplied spec id for easier referencing. */ - public int getSpecId() { + public String getSpecId() { return specId; } /** Sets a user supplied spec id for easier referencing. */ - public void setSpecId(int specId) { + public void setSpecId(String specId) { this.specId = specId; } + + /** Returns a list of rated documents to evaluate. */ + public List getRatedDocs() { + return ratedDocs; + } + + /** Set a list of rated documents for this query. */ + public void setRatedDocs(List ratedDocs) { + this.ratedDocs = ratedDocs; + } + + private static final ParseField ID_FIELD = new ParseField("id"); + private static final ParseField REQUEST_FIELD = new ParseField("request"); + private static final ParseField RATINGS_FIELD = new ParseField("ratings"); + private static final ObjectParser PARSER = new ObjectParser<>("requests", QuerySpec::new); + + static { + PARSER.declareString(QuerySpec::setSpecId, ID_FIELD); + PARSER.declareObject(QuerySpec::setTestRequest, (p, c) -> { + try { + return SearchSourceBuilder.fromXContent(c.getParseContext(), c.getAggs(), c.getSuggesters()); + } catch (IOException ex) { + throw new ParsingException(p.getTokenLocation(), "error parsing request", ex); + } + } , REQUEST_FIELD); + PARSER.declareObjectArray(QuerySpec::setRatedDocs, (p, c) -> { + try { + return RatedDocument.fromXContent(p); + } catch (IOException ex) { + throw new ParsingException(p.getTokenLocation(), "error parsing ratings", ex); + } + } , RATINGS_FIELD); + } + + /** + * Parses {@link QuerySpec} from rest representation: + * + * Example: + * { + * "id": "coffee_query", + * "request": { + * "query": { + * "bool": { + * "must": [ + * {"match": {"beverage": "coffee"}}, + * {"term": {"browser": {"value": "safari"}}}, + * {"term": {"time_of_day": {"value": "morning","boost": 2}}}, + * {"term": {"ip_location": {"value": "ams","boost": 10}}}]} + * }, + * "size": 10 + * }, + * "ratings": [{ "1": 1 }, { "2": 0 }, { "3": 1 } ] + * } + */ + public static QuerySpec fromXContent(XContentParser parser, RankEvalContext context) throws IOException { + return PARSER.parse(parser, context); + } } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalContext.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalContext.java new file mode 100644 index 00000000000..780585d978d --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalContext.java @@ -0,0 +1,65 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.ParseFieldMatcherSupplier; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.search.aggregations.AggregatorParsers; +import org.elasticsearch.search.suggest.Suggesters; + +public class RankEvalContext implements ParseFieldMatcherSupplier { + + private final ParseFieldMatcher parseFieldMatcher; + private final AggregatorParsers aggs; + private final Suggesters suggesters; + private final QueryParseContext parseContext; + + public RankEvalContext(ParseFieldMatcher parseFieldMatcher, QueryParseContext parseContext, AggregatorParsers aggs, + Suggesters suggesters) { + this.parseFieldMatcher = parseFieldMatcher; + this.aggs = aggs; + this.suggesters = suggesters; + this.parseContext = parseContext; + } + + public Suggesters getSuggesters() { + return this.suggesters; + } + + public AggregatorParsers getAggs() { + return this.aggs; + } + + @Override + public ParseFieldMatcher getParseFieldMatcher() { + return this.parseFieldMatcher; + } + + public XContentParser parser() { + return this.parseContext.parser(); + } + + public QueryParseContext getParseContext() { + return this.parseContext; + } + +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java index 3e8f893faaa..8dbbef3b497 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java @@ -35,20 +35,24 @@ public class RankEvalRequest extends ActionRequest { /** The request data to use for evaluation. */ private RankEvalSpec task; - + @Override public ActionRequestValidationException validate() { - return null; // TODO + return null; // TODO } - /** Returns the specification of this qa run including intents to execute, specifications detailing intent translation and metrics - * to compute. */ + /** + * Returns the specification of this qa run including intents to execute, + * specifications detailing intent translation and metrics to compute. + */ public RankEvalSpec getRankEvalSpec() { return task; } - /** Returns the specification of this qa run including intents to execute, specifications detailing intent translation and metrics - * to compute. */ + /** + * Returns the specification of this qa run including intents to execute, + * specifications detailing intent translation and metrics to compute. + */ public void setRankEvalSpec(RankEvalSpec task) { this.task = task; } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java index f4ab789d429..7af60c24151 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java @@ -22,54 +22,65 @@ package org.elasticsearch.index.rankeval; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.Map; -/** +/** * For each qa specification identified by its id this response returns the respective * averaged precisionAnN value. - * + * * In addition for each query the document ids that haven't been found annotated is returned as well. - * + * * Documents of unknown quality - i.e. those that haven't been supplied in the set of annotated documents but have been returned * by the search are not taken into consideration when computing precision at n - they are ignored. - * + * **/ -public class RankEvalResponse extends ActionResponse { +public class RankEvalResponse extends ActionResponse implements ToXContent { - private Collection qualityResults = new ArrayList<>(); + private RankEvalResult qualityResult; public RankEvalResponse() { - + } public RankEvalResponse(StreamInput in) throws IOException { - int size = in.readInt(); - qualityResults = new ArrayList<>(size); - for (int i = 0; i < size; i++) { - qualityResults.add(new RankEvalResult(in)); - } + super.readFrom(in); + this.qualityResult = new RankEvalResult(in); } - + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeInt(qualityResults.size()); - for (RankEvalResult result : qualityResults) { - result.writeTo(out); - } - } - - public void addRankEvalResult(int specId, double quality, Map> unknownDocs) { - RankEvalResult result = new RankEvalResult(specId, quality, unknownDocs); - this.qualityResults.add(result); + qualityResult.writeTo(out); } - - public Collection getRankEvalResults() { - return qualityResults; + + public void setRankEvalResult(RankEvalResult result) { + this.qualityResult = result; + } + + public RankEvalResult getRankEvalResult() { + return qualityResult; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("rank_eval"); + builder.field("spec_id", qualityResult.getSpecId()); + builder.field("quality_level", qualityResult.getQualityLevel()); + builder.startArray("unknown_docs"); + Map> unknownDocs = qualityResult.getUnknownDocs(); + for (String key : unknownDocs.keySet()) { + builder.startObject(); + builder.field(key, unknownDocs.get(key)); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + return builder; } } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java index b5bae8b6d4c..09c55b32b25 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java @@ -31,22 +31,23 @@ import java.util.Map; * For each precision at n computation the id of the search request specification used to generate search requests is returned * for reference. In addition the averaged precision and the ids of all documents returned but not found annotated is returned. * */ +// TODO do we need an extra class for this or it RankEvalResponse enough? public class RankEvalResult implements Writeable { - /**ID of specification this result was generated for.*/ - private int specId; - /**Average precision observed when issueing query intents with this spec.*/ + /**ID of QA specification this result was generated for.*/ + private String specId; + /**Average precision observed when issuing query intents with this specification.*/ private double qualityLevel; /**Mapping from intent id to all documents seen for this intent that were not annotated.*/ - private Map> unknownDocs; + private Map> unknownDocs; @SuppressWarnings("unchecked") public RankEvalResult(StreamInput in) throws IOException { - this.specId = in.readInt(); + this.specId = in.readString(); this.qualityLevel = in.readDouble(); - this.unknownDocs = (Map>) in.readGenericValue(); + this.unknownDocs = (Map>) in.readGenericValue(); } - - public RankEvalResult(int specId, double quality, Map> unknownDocs) { + + public RankEvalResult(String specId, double quality, Map> unknownDocs) { this.specId = specId; this.qualityLevel = quality; this.unknownDocs = unknownDocs; @@ -54,12 +55,12 @@ public class RankEvalResult implements Writeable { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeInt(specId); + out.writeString(specId); out.writeDouble(qualityLevel); out.writeGenericValue(getUnknownDocs()); } - - public int getSpecId() { + + public String getSpecId() { return specId; } @@ -67,7 +68,12 @@ public class RankEvalResult implements Writeable { return qualityLevel; } - public Map> getUnknownDocs() { + public Map> getUnknownDocs() { return unknownDocs; } + + @Override + public String toString() { + return "RankEvalResult, ID :[" + specId + "], quality: " + qualityLevel + ", unknown docs: " + unknownDocs; + } } \ No newline at end of file diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java index f625401be53..71f65c43882 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java @@ -35,45 +35,54 @@ import java.util.Collection; * */ public class RankEvalSpec implements Writeable { - /** Collection of query intents to check against including expected document ids.*/ - private Collection intents = new ArrayList<>(); + /** Collection of query specifications, that is e.g. search request templates to use for query translation. */ private Collection specifications = new ArrayList<>(); - /** Definition of n in precision at n */ + /** Definition of the quality metric, e.g. precision at N */ private RankedListQualityMetric eval; + /** a unique id for the whole QA task */ + private String taskId; + public RankEvalSpec() { + // TODO think if no args ctor is okay + } - public RankEvalSpec(Collection intents, Collection specs, RankedListQualityMetric metric) { - this.intents = intents; + public RankEvalSpec(String taskId, Collection specs, RankedListQualityMetric metric) { + this.taskId = taskId; this.specifications = specs; this.eval = metric; } public RankEvalSpec(StreamInput in) throws IOException { - int intentSize = in.readInt(); - intents = new ArrayList<>(intentSize); - for (int i = 0; i < intentSize; i++) { - intents.add(new RatedQuery(in)); - } int specSize = in.readInt(); specifications = new ArrayList<>(specSize); for (int i = 0; i < specSize; i++) { specifications.add(new QuerySpec(in)); } eval = in.readNamedWriteable(RankedListQualityMetric.class); // TODO add to registry + taskId = in.readString(); } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeInt(intents.size()); - for (RatedQuery query : intents) { - query.writeTo(out); - } out.writeInt(specifications.size()); for (QuerySpec spec : specifications) { spec.writeTo(out); } out.writeNamedWriteable(eval); + out.writeString(taskId); + } + + public void setEval(RankedListQualityMetric eval) { + this.eval = eval; + } + + public void setTaskId(String taskId) { + this.taskId = taskId; + } + + public String getTaskId() { + return this.taskId; } /** Returns the precision at n configuration (containing level of n to consider).*/ @@ -86,16 +95,6 @@ public class RankEvalSpec implements Writeable { this.eval = config; } - /** Returns a list of search intents to evaluate. */ - public Collection getIntents() { - return intents; - } - - /** Set a list of search intents to evaluate. */ - public void setIntents(Collection intents) { - this.intents = intents; - } - /** Returns a list of intent to query translation specifications to evaluate. */ public Collection getSpecifications() { return specifications; diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java index 41168b10329..829be8a9bc5 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java @@ -19,15 +19,23 @@ package org.elasticsearch.index.rankeval; +import org.elasticsearch.common.ParseFieldMatcherSupplier; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.NamedWriteable; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.search.SearchHit; +import java.io.IOException; +import java.util.List; + /** * Classes implementing this interface provide a means to compute the quality of a result list * returned by some search. * * RelevancyLevel specifies the type of object determining the relevancy level of some known docid. * */ -public interface RankedListQualityMetric extends Evaluator { +public abstract class RankedListQualityMetric implements NamedWriteable { /** * Returns a single metric representing the ranking quality of a set of returned documents @@ -36,6 +44,27 @@ public interface RankedListQualityMetric extends Evaluator { * @param hits the result hits as returned by some search * @return some metric representing the quality of the result hit list wrt. to relevant doc ids. * */ - @Override - EvalQueryQuality evaluate(SearchHit[] hits, RatedQuery intent); + public abstract EvalQueryQuality evaluate(SearchHit[] hits, List ratedDocs); + + public static RankedListQualityMetric fromXContent(XContentParser parser, ParseFieldMatcherSupplier context) throws IOException { + RankedListQualityMetric rc; + Token token = parser.nextToken(); + if (token != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "[_na] missing required metric name"); + } + String metricName = parser.currentName(); + + switch (metricName) { + case PrecisionAtN.NAME: + rc = PrecisionAtN.fromXContent(parser, context); + break; + default: + throw new ParsingException(parser.getTokenLocation(), "[_na] unknown query metric name [{}]", metricName); + } + if (parser.currentToken() == XContentParser.Token.END_OBJECT) { + // if we are at END_OBJECT, move to the next one... + parser.nextToken(); + } + return rc; + } } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java new file mode 100644 index 00000000000..d1bd99b97c4 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java @@ -0,0 +1,86 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; + +import java.io.IOException; + +/** + * A document ID and its rating for the query QA use case. + * */ +public class RatedDocument implements Writeable { + + private final String docId; + private final int rating; + + public RatedDocument(String docId, int rating) { + this.docId = docId; + this.rating = rating; + } + + public RatedDocument(StreamInput in) throws IOException { + this.docId = in.readString(); + this.rating = in.readVInt(); + } + + public String getDocID() { + return docId; + } + + public int getRating() { + return rating; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(docId); + out.writeVInt(rating); + } + + public static RatedDocument fromXContent(XContentParser parser) throws IOException { + String id = null; + int rating = Integer.MIN_VALUE; + Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (parser.currentToken().equals(Token.FIELD_NAME)) { + if (id != null) { + throw new ParsingException(parser.getTokenLocation(), "only one document id allowed, found [{}] but already got [{}]", + parser.currentName(), id); + } + id = parser.currentName(); + } else if (parser.currentToken().equals(Token.VALUE_NUMBER)) { + rating = parser.intValue(); + } else { + throw new ParsingException(parser.getTokenLocation(), "unexpected token [{}] while parsing rated document", + token); + } + } + if (id == null) { + throw new ParsingException(parser.getTokenLocation(), "didn't find document id"); + } + return new RatedDocument(id, rating); + } +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java deleted file mode 100644 index 5c4c7ea1e53..00000000000 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.rankeval; - -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.io.stream.Writeable; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - -/** - * Objects of this class represent one type of user query to qa. Each query comprises a user supplied id for easer referencing, - * a set of parameters as supplied by the end user to the search application as well as a set of rated documents (ratings e.g. - * supplied by manual result tagging or some form of automated click log based process). - * */ -public class RatedQuery implements Writeable { - - private final int intentId; - private final Map intentParameters; - private final Map ratedDocuments; - - public RatedQuery( - int intentId, Map intentParameters, Map ratedDocuments) { - this.intentId = intentId; - this.intentParameters = intentParameters; - this.ratedDocuments = ratedDocuments; - } - - public RatedQuery(StreamInput in) throws IOException { - this.intentId = in.readInt(); - this.intentParameters = in.readMap(); - - int ratedDocsSize = in.readInt(); - this.ratedDocuments = new HashMap<>(ratedDocsSize); - for (int i = 0; i < ratedDocsSize; i++) { - this.ratedDocuments.put(in.readString(), in.readInt()); - } - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeInt(intentId); - out.writeMap(intentParameters); - out.writeInt(ratedDocuments.size()); - for(Entry entry : ratedDocuments.entrySet()) { - out.writeString(entry.getKey()); - out.writeInt(entry.getValue()); - } - } - - /** For easier referencing users are allowed to supply unique ids with each search intent they want to check for - * performance quality wise.*/ - public int getIntentId() { - return intentId; - } - - - /** - * Returns a mapping from query parameter name to real parameter - ideally as parsed from real user logs. - * */ - public Map getIntentParameters() { - return intentParameters; - } - - /** - * Returns a set of documents and their ratings as supplied by the users. - * */ - public Map getRatedDocuments() { - return ratedDocuments; - } - -} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java index 196fb9c5a9a..32f2438449a 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java @@ -20,21 +20,29 @@ package org.elasticsearch.index.rankeval; import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.QueryParseContext; import org.elasticsearch.indices.query.IndicesQueriesRegistry; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestChannel; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.support.RestActions; +import org.elasticsearch.rest.action.support.RestToXContentListener; import org.elasticsearch.search.aggregations.AggregatorParsers; import org.elasticsearch.search.suggest.Suggesters; import java.io.IOException; +import java.util.Arrays; +import java.util.List; import static org.elasticsearch.rest.RestRequest.Method.GET; import static org.elasticsearch.rest.RestRequest.Method.POST; @@ -45,7 +53,9 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; * General Format: * * - { "requests": [{ + { + "spec_id": "human_readable_id", + "requests": [{ "id": "human_readable_id", "request": { ... request to check ... }, "ratings": { ... mapping from doc id to rating value ... } @@ -53,12 +63,15 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; "metric": { "... metric_name... ": { "... metric_parameter_key ...": ...metric_parameter_value... - }}} + } + } + } * * Example: * * - {"requests": [{ + {"spec_id": "huge_weight_on_location", + "requests": [{ "id": "amsterdam_query", "request": { "query": { @@ -78,6 +91,7 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; "3": 1, "4": 1 } + } }, { "id": "berlin_query", "request": { @@ -100,7 +114,9 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; }], "metric": { "precisionAtN": { - "size": 10}} + "size": 10 + } + } } * @@ -135,7 +151,7 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; "failed": 0 }, "rank_eval": [{ - "spec_id": "huge_weight_on_city", + "spec_id": "huge_weight_on_location", "quality_level": 0.4, "unknown_docs": [{ "amsterdam_query": [5, 10, 23] @@ -149,10 +165,17 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; * */ public class RestRankEvalAction extends BaseRestHandler { + private IndicesQueriesRegistry queryRegistry; + private AggregatorParsers aggregators; + private Suggesters suggesters; + @Inject public RestRankEvalAction(Settings settings, RestController controller, IndicesQueriesRegistry queryRegistry, AggregatorParsers aggParsers, Suggesters suggesters) { super(settings); + this.queryRegistry = queryRegistry; + this.aggregators = aggParsers; + this.suggesters = suggesters; controller.registerHandler(GET, "/_rank_eval", this); controller.registerHandler(POST, "/_rank_eval", this); controller.registerHandler(GET, "/{index}/_rank_eval", this); @@ -164,22 +187,47 @@ public class RestRankEvalAction extends BaseRestHandler { @Override public void handleRequest(final RestRequest request, final RestChannel channel, final NodeClient client) throws IOException { RankEvalRequest rankEvalRequest = new RankEvalRequest(); - //parseRankEvalRequest(rankEvalRequest, request, parseFieldMatcher); - //client.rankEval(rankEvalRequest, new RestStatusToXContentListener<>(channel)); - } - - public static void parseRankEvalRequest(RankEvalRequest rankEvalRequest, RestRequest request, ParseFieldMatcher parseFieldMatcher) - throws IOException { - - String[] indices = Strings.splitStringByCommaToArray(request.param("index")); - BytesReference restContent = null; - if (restContent == null) { - if (RestActions.hasBodyContent(request)) { - restContent = RestActions.getRestContent(request); + BytesReference restContent = RestActions.hasBodyContent(request) ? RestActions.getRestContent(request) : null; + try (XContentParser parser = XContentFactory.xContent(restContent).createParser(restContent)) { + QueryParseContext parseContext = new QueryParseContext(queryRegistry, parser, parseFieldMatcher); + if (restContent != null) { + parseRankEvalRequest(rankEvalRequest, request, + new RankEvalContext(parseFieldMatcher, parseContext, aggregators, suggesters)); } } - if (restContent != null) { - } + client.execute(RankEvalAction.INSTANCE, rankEvalRequest, new RestToXContentListener(channel)); + } + private static final ParseField SPECID_FIELD = new ParseField("spec_id"); + private static final ParseField METRIC_FIELD = new ParseField("metric"); + private static final ParseField REQUESTS_FIELD = new ParseField("requests"); + private static final ObjectParser PARSER = new ObjectParser<>("rank_eval", RankEvalSpec::new); + + static { + PARSER.declareString(RankEvalSpec::setTaskId, SPECID_FIELD); + PARSER.declareObject(RankEvalSpec::setEvaluator, (p, c) -> { + try { + return RankedListQualityMetric.fromXContent(p, c); + } catch (IOException ex) { + throw new ParsingException(p.getTokenLocation(), "error parsing rank request", ex); + } + } , METRIC_FIELD); + PARSER.declareObjectArray(RankEvalSpec::setSpecifications, (p, c) -> { + try { + return QuerySpec.fromXContent(p, c); + } catch (IOException ex) { + throw new ParsingException(p.getTokenLocation(), "error parsing rank request", ex); + } + } , REQUESTS_FIELD); + } + + public static void parseRankEvalRequest(RankEvalRequest rankEvalRequest, RestRequest request, RankEvalContext context) + throws IOException { + List indices = Arrays.asList(Strings.splitStringByCommaToArray(request.param("index"))); + RankEvalSpec spec = PARSER.parse(context.parser(), context); + for (QuerySpec specification : spec.getSpecifications()) { + specification.setIndices(indices); + }; + rankEvalRequest.setRankEvalSpec(spec); } } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java index f75bf3a0ebd..47110412d42 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java @@ -49,18 +49,18 @@ import java.util.Map; * Instances of this class execute a collection of search intents (read: user supplied query parameters) against a set of * possible search requests (read: search specifications, expressed as query/search request templates) and compares the result * against a set of annotated documents per search intent. - * + * * If any documents are returned that haven't been annotated the document id of those is returned per search intent. - * + * * The resulting search quality is computed in terms of precision at n and returned for each search specification for the full * set of search intents as averaged precision at n. * */ public class TransportRankEvalAction extends HandledTransportAction { - private SearchPhaseController searchPhaseController; - private TransportService transportService; - private SearchTransportService searchTransportService; - private ClusterService clusterService; - private ActionFilters actionFilters; + private SearchPhaseController searchPhaseController; + private TransportService transportService; + private SearchTransportService searchTransportService; + private ClusterService clusterService; + private ActionFilters actionFilters; @Inject public TransportRankEvalAction(Settings settings, ThreadPool threadPool, ActionFilters actionFilters, @@ -75,46 +75,36 @@ public class TransportRankEvalAction extends HandledTransportAction listener) { - RankEvalResponse response = new RankEvalResponse(); RankEvalSpec qualityTask = request.getRankEvalSpec(); RankedListQualityMetric metric = qualityTask.getEvaluator(); - for (QuerySpec spec : qualityTask.getSpecifications()) { - double qualitySum = 0; - + double qualitySum = 0; + Map> unknownDocs = new HashMap>(); + Collection specifications = qualityTask.getSpecifications(); + for (QuerySpec spec : specifications) { SearchSourceBuilder specRequest = spec.getTestRequest(); - String[] indices = new String[spec.getIndices().size()]; + String[] indices = new String[spec.getIndices().size()]; spec.getIndices().toArray(indices); SearchRequest templatedRequest = new SearchRequest(indices, specRequest); + TransportSearchAction transportSearchAction = new TransportSearchAction(settings, threadPool, searchPhaseController, + transportService, searchTransportService, clusterService, actionFilters, indexNameExpressionResolver); + ActionFuture searchResponse = transportSearchAction.execute(templatedRequest); + SearchHits hits = searchResponse.actionGet().getHits(); - Map> unknownDocs = new HashMap>(); - Collection intents = qualityTask.getIntents(); - for (RatedQuery intent : intents) { - - TransportSearchAction transportSearchAction = new TransportSearchAction( - settings, - threadPool, - searchPhaseController, - transportService, - searchTransportService, - clusterService, - actionFilters, - indexNameExpressionResolver); - ActionFuture searchResponse = transportSearchAction.execute(templatedRequest); - SearchHits hits = searchResponse.actionGet().getHits(); - - EvalQueryQuality intentQuality = metric.evaluate(hits.getHits(), intent); - qualitySum += intentQuality.getQualityLevel(); - unknownDocs.put(intent.getIntentId(), intentQuality.getUnknownDocs()); - } - response.addRankEvalResult(spec.getSpecId(), qualitySum / intents.size(), unknownDocs); + EvalQueryQuality intentQuality = metric.evaluate(hits.getHits(), spec.getRatedDocs()); + qualitySum += intentQuality.getQualityLevel(); + unknownDocs.put(spec.getSpecId(), intentQuality.getUnknownDocs()); } + RankEvalResponse response = new RankEvalResponse(); + RankEvalResult result = new RankEvalResult(qualityTask.getTaskId(), qualitySum / specifications.size(), unknownDocs); + response.setRankEvalResult(result); listener.onResponse(response); } } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java deleted file mode 100644 index c6a9fa658d0..00000000000 --- a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.action.quality; - -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.index.query.MatchQueryBuilder; -import org.elasticsearch.index.rankeval.PrecisionAtN; -import org.elasticsearch.index.rankeval.RankEvalPlugin; -import org.elasticsearch.index.rankeval.RatedQuery; -import org.elasticsearch.index.rankeval.PrecisionAtN.Rating; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.test.ESIntegTestCase; -import org.junit.Before; - -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ExecutionException; - -@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0) -// NORELEASE need to fix transport client use case -public class PrecisionAtRequestTests extends ESIntegTestCase { - @Override - protected Collection> transportClientPlugins() { - return pluginList(RankEvalPlugin.class); - } - - @Override - protected Collection> nodePlugins() { - return pluginList(RankEvalPlugin.class); - } - - @Before - public void setup() { - createIndex("test"); - ensureGreen(); - - client().prepareIndex("test", "testtype").setId("1") - .setSource("text", "berlin").get(); - client().prepareIndex("test", "testtype").setId("2") - .setSource("text", "amsterdam").get(); - client().prepareIndex("test", "testtype").setId("3") - .setSource("text", "amsterdam").get(); - client().prepareIndex("test", "testtype").setId("4") - .setSource("text", "amsterdam").get(); - client().prepareIndex("test", "testtype").setId("5") - .setSource("text", "amsterdam").get(); - client().prepareIndex("test", "testtype").setId("6") - .setSource("text", "amsterdam").get(); - refresh(); - } - - - public void testPrecisionAtFiveCalculation() throws IOException, InterruptedException, ExecutionException { - // TODO turn into unit test - no need to execute the query here to fill hits object - MatchQueryBuilder query = new MatchQueryBuilder("text", "berlin"); - - SearchResponse response = client().prepareSearch().setQuery(query) - .execute().actionGet(); - - Map relevant = new HashMap<>(); - relevant.put("1", Rating.RELEVANT.ordinal()); - RatedQuery intent = new RatedQuery(0, new HashMap<>(), relevant); - SearchHit[] hits = response.getHits().getHits(); - - assertEquals(1, (new PrecisionAtN(5)).evaluate(hits, intent).getQualityLevel(), 0.00001); - } - - public void testPrecisionAtFiveIgnoreOneResult() throws IOException, InterruptedException, ExecutionException { - // TODO turn into unit test - no need to actually execute the query here to fill the hits object - MatchQueryBuilder query = new MatchQueryBuilder("text", "amsterdam"); - - SearchResponse response = client().prepareSearch().setQuery(query) - .execute().actionGet(); - - Map relevant = new HashMap<>(); - relevant.put("2", Rating.RELEVANT.ordinal()); - relevant.put("3", Rating.RELEVANT.ordinal()); - relevant.put("4", Rating.RELEVANT.ordinal()); - relevant.put("5", Rating.RELEVANT.ordinal()); - relevant.put("6", Rating.IRRELEVANT.ordinal()); - RatedQuery intent = new RatedQuery(0, new HashMap<>(), relevant); - SearchHit[] hits = response.getHits().getHits(); - - assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, intent).getQualityLevel(), 0.00001); - } - - public void testPrecisionJSON() { - - } - -/* public void testPrecisionAction() { - // TODO turn into REST test? - - Collection intents = new ArrayList(); - RatedQuery intentAmsterdam = new RatedQuery( - 0, - createParams("var", "amsterdam"), - createRelevant("2", "3", "4", "5")); - intents.add(intentAmsterdam); - - RatedQuery intentBerlin = new RatedQuery( - 1, - createParams("var", "berlin"), - createRelevant("1")); - intents.add(intentBerlin); - - Collection specs = new ArrayList(); - ArrayList indices = new ArrayList<>(); - indices.add("test"); - ArrayList types = new ArrayList<>(); - types.add("testtype"); - - SearchSourceBuilder source = new SearchSourceBuilder(); - QuerySpec spec = new QuerySpec(0, source, indices, types); - specs.add(spec); - - RankEvalSpec task = new RankEvalSpec(intents, specs, new PrecisionAtN(10)); - - RankEvalRequestBuilder builder = new RankEvalRequestBuilder( - client(), - RankEvalAction.INSTANCE, - new RankEvalRequest()); - builder.setRankEvalSpec(task); - - RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); - RankEvalResult result = response.getRankEvalResults().iterator().next(); - for (Entry> entry : result.getUnknownDocs().entrySet()) { - if (entry.getKey() == 0) { - assertEquals(1, entry.getValue().size()); - } else { - assertEquals(0, entry.getValue().size()); - } - } - }*/ - - private Map createRelevant(String... docs) { - Map relevant = new HashMap<>(); - for (String doc : docs) { - relevant.put(doc, Rating.RELEVANT.ordinal()); - } - return relevant; - } - - private Map createParams(String key, String value) { - Map parameters = new HashMap<>(); - parameters.put(key, value); - return parameters; - } - - } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java new file mode 100644 index 00000000000..79df86f6e56 --- /dev/null +++ b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java @@ -0,0 +1,122 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.quality; + +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.rankeval.PrecisionAtN; +import org.elasticsearch.index.rankeval.PrecisionAtN.Rating; +import org.elasticsearch.index.rankeval.QuerySpec; +import org.elasticsearch.index.rankeval.RankEvalAction; +import org.elasticsearch.index.rankeval.RankEvalPlugin; +import org.elasticsearch.index.rankeval.RankEvalRequest; +import org.elasticsearch.index.rankeval.RankEvalRequestBuilder; +import org.elasticsearch.index.rankeval.RankEvalResponse; +import org.elasticsearch.index.rankeval.RankEvalResult; +import org.elasticsearch.index.rankeval.RankEvalSpec; +import org.elasticsearch.index.rankeval.RatedDocument; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.test.ESIntegTestCase; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map.Entry; +import java.util.Set; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0) +// NORELEASE need to fix transport client use case +public class RankEvalRequestTests extends ESIntegTestCase { + @Override + protected Collection> transportClientPlugins() { + return pluginList(RankEvalPlugin.class); + } + + @Override + protected Collection> nodePlugins() { + return pluginList(RankEvalPlugin.class); + } + + @Before + public void setup() { + createIndex("test"); + ensureGreen(); + + client().prepareIndex("test", "testtype").setId("1") + .setSource("text", "berlin").get(); + client().prepareIndex("test", "testtype").setId("2") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("3") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("4") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("5") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("6") + .setSource("text", "amsterdam").get(); + refresh(); + } + + public void testPrecisionAtRequest() { + ArrayList indices = new ArrayList<>(); + indices.add("test"); + ArrayList types = new ArrayList<>(); + types.add("testtype"); + + String specId = randomAsciiOfLength(10); + List specifications = new ArrayList<>(); + SearchSourceBuilder testQuery = new SearchSourceBuilder(); + testQuery.query(new MatchAllQueryBuilder()); + specifications.add(new QuerySpec("amsterdam_query", testQuery, indices, types, createRelevant("2", "3", "4", "5"))); + specifications.add(new QuerySpec("berlin_query", testQuery, indices, types, createRelevant("1"))); + + RankEvalSpec task = new RankEvalSpec(specId, specifications, new PrecisionAtN(10)); + + RankEvalRequestBuilder builder = new RankEvalRequestBuilder( + client(), + RankEvalAction.INSTANCE, + new RankEvalRequest()); + builder.setRankEvalSpec(task); + + RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); + RankEvalResult result = response.getRankEvalResult(); + assertEquals(specId, result.getSpecId()); + assertEquals(1.0, result.getQualityLevel(), Double.MIN_VALUE); + Set>> entrySet = result.getUnknownDocs().entrySet(); + assertEquals(2, entrySet.size()); + for (Entry> entry : entrySet) { + if (entry.getKey() == "amsterdam_query") { + assertEquals(2, entry.getValue().size()); + } + if (entry.getKey() == "berlin_query") { + assertEquals(5, entry.getValue().size()); + } + } + } + + private static List createRelevant(String... docs) { + List relevant = new ArrayList<>(); + for (String doc : docs) { + relevant.add(new RatedDocument(doc, Rating.RELEVANT.ordinal())); + } + return relevant; + } + } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java new file mode 100644 index 00000000000..c123d5bbf8f --- /dev/null +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java @@ -0,0 +1,69 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.rankeval.PrecisionAtN.Rating; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.internal.InternalSearchHit; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ExecutionException; + +public class PrecisionAtNTests extends ESTestCase { + + public void testPrecisionAtFiveCalculation() throws IOException, InterruptedException, ExecutionException { + List rated = new ArrayList<>(); + rated.add(new RatedDocument("0", Rating.RELEVANT.ordinal())); + SearchHit[] hits = new InternalSearchHit[1]; + hits[0] = new InternalSearchHit(0, "0", new Text("type"), Collections.emptyMap()); + assertEquals(1, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001); + } + + public void testPrecisionAtFiveIgnoreOneResult() throws IOException, InterruptedException, ExecutionException { + List rated = new ArrayList<>(); + rated.add(new RatedDocument("0", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("1", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("2", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("3", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("4", Rating.IRRELEVANT.ordinal())); + SearchHit[] hits = new InternalSearchHit[5]; + for (int i = 0; i < 5; i++) { + hits[i] = new InternalSearchHit(i, i+"", new Text("type"), Collections.emptyMap()); + } + assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001); + } + + public void testParseFromXContent() throws IOException { + String xContent = " {\n" + + " \"size\": 10\n" + + "}"; + XContentParser parser = XContentFactory.xContent(xContent).createParser(xContent); + PrecisionAtN precicionAt = PrecisionAtN.fromXContent(parser, () -> ParseFieldMatcher.STRICT); + assertEquals(10, precicionAt.getN()); + } +} diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java new file mode 100644 index 00000000000..49ae4d0d6e4 --- /dev/null +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java @@ -0,0 +1,98 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.ParseFieldRegistry; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.indices.query.IndicesQueriesRegistry; +import org.elasticsearch.search.SearchModule; +import org.elasticsearch.search.aggregations.AggregatorParsers; +import org.elasticsearch.search.suggest.Suggesters; +import org.elasticsearch.test.ESTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.List; + +public class QuerySpecTests extends ESTestCase { + + private static IndicesQueriesRegistry queriesRegistry; + private static SearchModule searchModule; + private static Suggesters suggesters; + private static AggregatorParsers aggsParsers; + + /** + * setup for the whole base test class + */ + @BeforeClass + public static void init() throws IOException { + aggsParsers = new AggregatorParsers(new ParseFieldRegistry<>("aggregation"), new ParseFieldRegistry<>("aggregation_pipes")); + searchModule = new SearchModule(Settings.EMPTY, new NamedWriteableRegistry(), false); + queriesRegistry = searchModule.getQueryParserRegistry(); + suggesters = searchModule.getSuggesters(); + } + + @AfterClass + public static void afterClass() throws Exception { + queriesRegistry = null; + searchModule = null; + suggesters = null; + aggsParsers = null; + } + + public void testParseFromXContent() throws IOException { + String querySpecString = " {\n" + + " \"id\": \"my_qa_query\",\n" + + " \"request\": {\n" + + " \"query\": {\n" + + " \"bool\": {\n" + + " \"must\": [\n" + + " {\"match\": {\"beverage\": \"coffee\"}},\n" + + " {\"term\": {\"browser\": {\"value\": \"safari\"}}},\n" + + " {\"term\": {\"time_of_day\": {\"value\": \"morning\",\"boost\": 2}}},\n" + + " {\"term\": {\"ip_location\": {\"value\": \"ams\",\"boost\": 10}}}]}\n" + + " },\n" + + " \"size\": 10\n" + + " },\n" + + " \"ratings\": [ {\"1\": 1 }, { \"2\": 0 }, { \"3\": 1 } ]\n" + + "}"; + XContentParser parser = XContentFactory.xContent(querySpecString).createParser(querySpecString); + QueryParseContext queryContext = new QueryParseContext(queriesRegistry, parser, ParseFieldMatcher.STRICT); + RankEvalContext rankContext = new RankEvalContext(ParseFieldMatcher.STRICT, queryContext, + aggsParsers, suggesters); + QuerySpec specification = QuerySpec.fromXContent(parser, rankContext); + assertEquals("my_qa_query", specification.getSpecId()); + assertNotNull(specification.getTestRequest()); + List ratedDocs = specification.getRatedDocs(); + assertEquals(3, ratedDocs.size()); + assertEquals("1", ratedDocs.get(0).getDocID()); + assertEquals(1, ratedDocs.get(0).getRating()); + assertEquals("2", ratedDocs.get(1).getDocID()); + assertEquals(0, ratedDocs.get(1).getRating()); + assertEquals("3", ratedDocs.get(2).getDocID()); + assertEquals(1, ratedDocs.get(2).getRating()); + } +} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml index 201efde33aa..d3487082b6a 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml @@ -5,44 +5,53 @@ index: index: foo type: bar - id: 1 + id: doc1 body: { "text": "berlin" } - do: index: index: foo type: bar - id: 2 + id: doc2 body: { "text": "amsterdam" } - + - do: index: index: foo type: bar - id: 3 + id: doc3 body: { "text": "amsterdam" } + + - do: + index: + index: foo + type: bar + id: doc4 + body: { "text": "something about amsterdam and berlin" } - do: indices.refresh: {} - do: rank_eval: - body: - requests: [ + body: { + "spec_id" : "cities_qa_queries", + "requests" : [ { - id: "amsterdam_query", - request: { query: {match : {text : "amsterdam" }}}, - ratings: { "1": 0, "2": 1, "3": 1 } - }, { - id: "berlin_query", - request: { query: { match : { text : "berlin" } }, size : 10 }, - ratings: {"1": 1} + "id": "amsterdam_query", + "request": { "query": { "match" : {"text" : "amsterdam" }}}, + "ratings": [{ "doc1": 0}, {"doc2": 1}, {"doc3": 1}] + }, + { + "id" : "berlin_query", + "request": { "query": { "match" : { "text" : "berlin" } }, "size" : 10 }, + "ratings": [{"doc1": 1}] } - ] - metric: { precisionAtN: { size: 10}} - - - match: {quality_level: 1} - - gte: { took: 0 } - - is_false: task - - is_false: deleted + ], + "metric" : { "precisionatn": { "size": 10}} + } + - match: {rank_eval.spec_id: "cities_qa_queries"} + - match: {rank_eval.quality_level: 1} + - match: {rank_eval.unknown_docs.0.amsterdam_query: [ "doc4"]} + - match: {rank_eval.unknown_docs.1.berlin_query: [ "doc4"]}