From 6d4673fd589893878d1c04a062b7021067ebbd7e Mon Sep 17 00:00:00 2001 From: Isabel Drost-Fromm Date: Thu, 30 Jun 2016 16:15:08 +0200 Subject: [PATCH] Initial commit for Module to compute metrics on queries This is an initial squashed commit of the work on a new feature for query metrics proposed in #18798. --- .../support/TransportProxyClient.java | 1 + modules/rank-eval/build.gradle | 30 +++ .../index/rankeval/EvalQueryQuality.java | 45 +++++ .../index/rankeval/Evaluator.java | 28 +++ .../index/rankeval/PrecisionAtN.java | 144 ++++++++++++++ .../index/rankeval/QuerySpec.java | 114 +++++++++++ .../index/rankeval/RankEvalAction.java | 47 +++++ .../index/rankeval/RankEvalPlugin.java | 46 +++++ .../index/rankeval/RankEvalRequest.java | 69 +++++++ .../rankeval/RankEvalRequestBuilder.java | 44 +++++ .../index/rankeval/RankEvalResponse.java | 75 +++++++ .../index/rankeval/RankEvalResult.java | 73 +++++++ .../index/rankeval/RankEvalSpec.java | 109 ++++++++++ .../rankeval/RankedListQualityMetric.java | 40 ++++ .../index/rankeval/RatedQuery.java | 92 +++++++++ .../index/rankeval/RestRankEvalAction.java | 187 ++++++++++++++++++ .../rankeval/TransportRankEvalAction.java | 120 +++++++++++ .../quality/PrecisionAtRequestTests.java | 170 ++++++++++++++++ .../action/quality/RankEvalRestIT.java | 40 ++++ .../test/rank_eval/10_basic.yaml | 48 +++++ .../rest-api-spec/api/rank_eval.json | 17 ++ settings.gradle | 1 + 22 files changed, 1540 insertions(+) create mode 100644 modules/rank-eval/build.gradle create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalAction.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalPlugin.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequestBuilder.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java create mode 100644 modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java create mode 100644 modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRestIT.java create mode 100644 modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/rank_eval.json diff --git a/core/src/main/java/org/elasticsearch/client/transport/support/TransportProxyClient.java b/core/src/main/java/org/elasticsearch/client/transport/support/TransportProxyClient.java index 900876415e3..600d93e8489 100644 --- a/core/src/main/java/org/elasticsearch/client/transport/support/TransportProxyClient.java +++ b/core/src/main/java/org/elasticsearch/client/transport/support/TransportProxyClient.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.transport.TransportService; +import java.util.Collections; import java.util.HashMap; import java.util.Map; diff --git a/modules/rank-eval/build.gradle b/modules/rank-eval/build.gradle new file mode 100644 index 00000000000..cd4dd32c3d5 --- /dev/null +++ b/modules/rank-eval/build.gradle @@ -0,0 +1,30 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +esplugin { + description 'The Rank Eval module adds APIs to evaluate ranking quality.' + classname 'org.elasticsearch.index.rankeval.RankEvalPlugin' +} + +integTest { + cluster { + setting 'script.inline', 'true' + setting 'script.stored', 'true' + } +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java new file mode 100644 index 00000000000..c5d48c2074a --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import java.util.Collection; + +/** Returned for each search intent and search specification combination. Summarises the document ids found that were not + * annotated and the average precision of result sets in each particular combination based on the annotations given. + * */ +public class EvalQueryQuality { + private double qualityLevel; + + private Collection unknownDocs; + + public EvalQueryQuality (double qualityLevel, Collection unknownDocs) { + this.qualityLevel = qualityLevel; + this.unknownDocs = unknownDocs; + } + + public Collection getUnknownDocs() { + return unknownDocs; + } + + public double getQualityLevel() { + return qualityLevel; + } + +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java new file mode 100644 index 00000000000..35fb4bf03cd --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/Evaluator.java @@ -0,0 +1,28 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.io.stream.NamedWriteable; +import org.elasticsearch.search.SearchHit; + +public interface Evaluator extends NamedWriteable { + + public Object evaluate(SearchHit[] hits, RatedQuery intent); +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java new file mode 100644 index 00000000000..f6216eadede --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/PrecisionAtN.java @@ -0,0 +1,144 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.search.SearchHit; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Map; +import java.util.Map.Entry; + +import javax.naming.directory.SearchResult; + +/** + * Evaluate Precision at N, N being the number of search results to consider for precision calculation. + * + * Documents of unkonwn quality are ignored in the precision at n computation and returned by document id. + * */ +public class PrecisionAtN implements RankedListQualityMetric { + + /** Number of results to check against a given set of relevant results. */ + private int n; + + public static final String NAME = "precisionatn"; + + public PrecisionAtN(StreamInput in) throws IOException { + n = in.readInt(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(n); + } + + @Override + public String getWriteableName() { + return NAME; + } + + /** + * Initialises n with 10 + * */ + public PrecisionAtN() { + this.n = 10; + } + + /** + * @param n number of top results to check against a given set of relevant results. + * */ + public PrecisionAtN(int n) { + this.n= n; + } + + /** + * Return number of search results to check for quality. + * */ + public int getN() { + return n; + } + + /** Compute precisionAtN based on provided relevant document IDs. + * @return precision at n for above {@link SearchResult} list. + **/ + @Override + public EvalQueryQuality evaluate(SearchHit[] hits, RatedQuery intent) { + Map ratedDocIds = intent.getRatedDocuments(); + + Collection relevantDocIds = new ArrayList<>(); + for (Entry entry : ratedDocIds.entrySet()) { + if (Rating.RELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) { + relevantDocIds.add(entry.getKey()); + } + } + + Collection irrelevantDocIds = new ArrayList<>(); + for (Entry entry : ratedDocIds.entrySet()) { + if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) { + irrelevantDocIds.add(entry.getKey()); + } + } + + int good = 0; + int bad = 0; + Collection unknownDocIds = new ArrayList(); + for (int i = 0; (i < n && i < hits.length); i++) { + String id = hits[i].getId(); + if (relevantDocIds.contains(id)) { + good++; + } else if (irrelevantDocIds.contains(id)) { + bad++; + } else { + unknownDocIds.add(id); + } + } + + double precision = (double) good / (good + bad); + + return new EvalQueryQuality(precision, unknownDocIds); + } + + public enum Rating { + RELEVANT, IRRELEVANT; + } + + /** + * Needed to get the enum accross serialisation boundaries. + * */ + public static class RatingMapping { + public static Integer mapFrom(Rating rating) { + if (Rating.RELEVANT.equals(rating)) { + return 0; + } + return 1; + } + + public static Rating mapTo(Integer rating) { + if (rating == 0) { + return Rating.RELEVANT; + } + return Rating.IRRELEVANT; + } + } + +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java new file mode 100644 index 00000000000..b94e0e92bd7 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java @@ -0,0 +1,114 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.search.builder.SearchSourceBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Defines a QA specification: All end user supplied query intents will be mapped to the search request specified in this search request + * template and executed against the targetIndex given. Any filters that should be applied in the target system can be specified as well. + * + * The resulting document lists can then be compared against what was specified in the set of rated documents as part of a QAQuery. + * */ +public class QuerySpec implements Writeable { + + private int specId = 0; + private SearchSourceBuilder testRequest; + private List indices = new ArrayList<>(); + private List types = new ArrayList<>(); + + public QuerySpec( + int specId, SearchSourceBuilder testRequest, List indices, List types) { + this.specId = specId; + this.testRequest = testRequest; + this.indices = indices; + this.types = types; + } + + public QuerySpec(StreamInput in) throws IOException { + this.specId = in.readInt(); + testRequest = new SearchSourceBuilder(in); + int indicesSize = in.readInt(); + indices = new ArrayList(indicesSize); + for (int i = 0; i < indicesSize; i++) { + this.indices.add(in.readString()); + } + int typesSize = in.readInt(); + types = new ArrayList(typesSize); + for (int i = 0; i < typesSize; i++) { + this.types.add(in.readString()); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(specId); + testRequest.writeTo(out); + out.writeInt(indices.size()); + for (String index : indices) { + out.writeString(index); + } + out.writeInt(types.size()); + for (String type : types) { + out.writeString(type); + } + } + + public SearchSourceBuilder getTestRequest() { + return testRequest; + } + + public void setTestRequest(SearchSourceBuilder testRequest) { + this.testRequest = testRequest; + } + + public List getIndices() { + return indices; + } + + public void setIndices(List indices) { + this.indices = indices; + } + + public List getTypes() { + return types; + } + + public void setTypes(List types) { + this.types = types; + } + + /** Returns a user supplied spec id for easier referencing. */ + public int getSpecId() { + return specId; + } + + /** Sets a user supplied spec id for easier referencing. */ + public void setSpecId(int specId) { + this.specId = specId; + } +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalAction.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalAction.java new file mode 100644 index 00000000000..0f506112d65 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalAction.java @@ -0,0 +1,47 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.action.Action; +import org.elasticsearch.client.ElasticsearchClient; + +/** + * Action used to start precision at qa evaluations. + **/ +public class RankEvalAction extends Action { + + public static final RankEvalAction INSTANCE = new RankEvalAction(); + public static final String NAME = "indices:data/read/quality"; + + private RankEvalAction() { + super(NAME); + } + + @Override + public RankEvalRequestBuilder newRequestBuilder(ElasticsearchClient client) { + return new RankEvalRequestBuilder(client, this, new RankEvalRequest()); + } + + @Override + public RankEvalResponse newResponse() { + return new RankEvalResponse(); + } + +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalPlugin.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalPlugin.java new file mode 100644 index 00000000000..f0bd5c1f838 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalPlugin.java @@ -0,0 +1,46 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.action.ActionModule; +import org.elasticsearch.common.network.NetworkModule; +import org.elasticsearch.plugins.Plugin; + +public class RankEvalPlugin extends Plugin { + public static final String NAME = "rank-eval"; + + @Override + public String name() { + return NAME; + } + + @Override + public String description() { + return "The rank-eval module adds APIs to evaluate rankings."; + } + + public void onModule(ActionModule actionModule) { + actionModule.registerAction(RankEvalAction.INSTANCE, TransportRankEvalAction.class); + } + + public void onModule(NetworkModule networkModule) { + networkModule.registerRestHandler(RestRankEvalAction.class); + } +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java new file mode 100644 index 00000000000..3e8f893faaa --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequest.java @@ -0,0 +1,69 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; + + +/** + * Instances of this class represent a complete precision at request. They encode a precision task including search intents and search + * specifications to be executed subsequently. + * */ +public class RankEvalRequest extends ActionRequest { + + /** The request data to use for evaluation. */ + private RankEvalSpec task; + + @Override + public ActionRequestValidationException validate() { + return null; // TODO + } + + /** Returns the specification of this qa run including intents to execute, specifications detailing intent translation and metrics + * to compute. */ + public RankEvalSpec getRankEvalSpec() { + return task; + } + + /** Returns the specification of this qa run including intents to execute, specifications detailing intent translation and metrics + * to compute. */ + public void setRankEvalSpec(RankEvalSpec task) { + this.task = task; + } + + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + task = new RankEvalSpec(in); + + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + task.writeTo(out); + } +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequestBuilder.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequestBuilder.java new file mode 100644 index 00000000000..063bec9d8f7 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalRequestBuilder.java @@ -0,0 +1,44 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.action.Action; +import org.elasticsearch.action.ActionRequestBuilder; +import org.elasticsearch.client.ElasticsearchClient; + +public class RankEvalRequestBuilder extends ActionRequestBuilder { + + public RankEvalRequestBuilder(ElasticsearchClient client, Action action, + RankEvalRequest request) { + super(client, action, request); + } + + public RankEvalRequest request() { + return request; + } + + public void setRankEvalSpec(RankEvalSpec spec) { + this.request.setRankEvalSpec(spec); + } + + public RankEvalSpec getRankEvalSpec() { + return this.request.getRankEvalSpec(); + } +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java new file mode 100644 index 00000000000..f4ab789d429 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResponse.java @@ -0,0 +1,75 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Map; + +/** + * For each qa specification identified by its id this response returns the respective + * averaged precisionAnN value. + * + * In addition for each query the document ids that haven't been found annotated is returned as well. + * + * Documents of unknown quality - i.e. those that haven't been supplied in the set of annotated documents but have been returned + * by the search are not taken into consideration when computing precision at n - they are ignored. + * + **/ +public class RankEvalResponse extends ActionResponse { + + private Collection qualityResults = new ArrayList<>(); + + public RankEvalResponse() { + + } + + public RankEvalResponse(StreamInput in) throws IOException { + int size = in.readInt(); + qualityResults = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + qualityResults.add(new RankEvalResult(in)); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeInt(qualityResults.size()); + for (RankEvalResult result : qualityResults) { + result.writeTo(out); + } + } + + public void addRankEvalResult(int specId, double quality, Map> unknownDocs) { + RankEvalResult result = new RankEvalResult(specId, quality, unknownDocs); + this.qualityResults.add(result); + } + + public Collection getRankEvalResults() { + return qualityResults; + } + +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java new file mode 100644 index 00000000000..b5bae8b6d4c --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalResult.java @@ -0,0 +1,73 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +/** + * For each precision at n computation the id of the search request specification used to generate search requests is returned + * for reference. In addition the averaged precision and the ids of all documents returned but not found annotated is returned. + * */ +public class RankEvalResult implements Writeable { + /**ID of specification this result was generated for.*/ + private int specId; + /**Average precision observed when issueing query intents with this spec.*/ + private double qualityLevel; + /**Mapping from intent id to all documents seen for this intent that were not annotated.*/ + private Map> unknownDocs; + + @SuppressWarnings("unchecked") + public RankEvalResult(StreamInput in) throws IOException { + this.specId = in.readInt(); + this.qualityLevel = in.readDouble(); + this.unknownDocs = (Map>) in.readGenericValue(); + } + + public RankEvalResult(int specId, double quality, Map> unknownDocs) { + this.specId = specId; + this.qualityLevel = quality; + this.unknownDocs = unknownDocs; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(specId); + out.writeDouble(qualityLevel); + out.writeGenericValue(getUnknownDocs()); + } + + public int getSpecId() { + return specId; + } + + public double getQualityLevel() { + return qualityLevel; + } + + public Map> getUnknownDocs() { + return unknownDocs; + } +} \ No newline at end of file diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java new file mode 100644 index 00000000000..f625401be53 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankEvalSpec.java @@ -0,0 +1,109 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; + +/** + * This class defines a qa task including query intent and query spec. + * + * Each QA run is based on a set of queries to send to the index and multiple QA specifications that define how to translate the query + * intents into elastic search queries. In addition it contains the quality metrics to compute. + * */ + +public class RankEvalSpec implements Writeable { + /** Collection of query intents to check against including expected document ids.*/ + private Collection intents = new ArrayList<>(); + /** Collection of query specifications, that is e.g. search request templates to use for query translation. */ + private Collection specifications = new ArrayList<>(); + /** Definition of n in precision at n */ + private RankedListQualityMetric eval; + + + public RankEvalSpec(Collection intents, Collection specs, RankedListQualityMetric metric) { + this.intents = intents; + this.specifications = specs; + this.eval = metric; + } + + public RankEvalSpec(StreamInput in) throws IOException { + int intentSize = in.readInt(); + intents = new ArrayList<>(intentSize); + for (int i = 0; i < intentSize; i++) { + intents.add(new RatedQuery(in)); + } + int specSize = in.readInt(); + specifications = new ArrayList<>(specSize); + for (int i = 0; i < specSize; i++) { + specifications.add(new QuerySpec(in)); + } + eval = in.readNamedWriteable(RankedListQualityMetric.class); // TODO add to registry + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(intents.size()); + for (RatedQuery query : intents) { + query.writeTo(out); + } + out.writeInt(specifications.size()); + for (QuerySpec spec : specifications) { + spec.writeTo(out); + } + out.writeNamedWriteable(eval); + } + + /** Returns the precision at n configuration (containing level of n to consider).*/ + public RankedListQualityMetric getEvaluator() { + return eval; + } + + /** Sets the precision at n configuration (containing level of n to consider).*/ + public void setEvaluator(RankedListQualityMetric config) { + this.eval = config; + } + + /** Returns a list of search intents to evaluate. */ + public Collection getIntents() { + return intents; + } + + /** Set a list of search intents to evaluate. */ + public void setIntents(Collection intents) { + this.intents = intents; + } + + /** Returns a list of intent to query translation specifications to evaluate. */ + public Collection getSpecifications() { + return specifications; + } + + /** Set the list of intent to query translation specifications to evaluate. */ + public void setSpecifications(Collection specifications) { + this.specifications = specifications; + } + +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java new file mode 100644 index 00000000000..1a75247e48d --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RankedListQualityMetric.java @@ -0,0 +1,40 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.search.SearchHit; + +/** + * Classes implementing this interface provide a means to compute the quality of a result list + * returned by some search. + * + * RelevancyLevel specifies the type of object determining the relevancy level of some known docid. + * */ +public interface RankedListQualityMetric extends Evaluator { + + /** + * Returns a single metric representing the ranking quality of a set of returned documents + * wrt. to a set of document Ids labeled as relevant for this search. + * + * @param hits the result hits as returned by some search + * @return some metric representing the quality of the result hit list wrt. to relevant doc ids. + * */ + public EvalQueryQuality evaluate(SearchHit[] hits, RatedQuery intent); +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java new file mode 100644 index 00000000000..5c4c7ea1e53 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedQuery.java @@ -0,0 +1,92 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Objects of this class represent one type of user query to qa. Each query comprises a user supplied id for easer referencing, + * a set of parameters as supplied by the end user to the search application as well as a set of rated documents (ratings e.g. + * supplied by manual result tagging or some form of automated click log based process). + * */ +public class RatedQuery implements Writeable { + + private final int intentId; + private final Map intentParameters; + private final Map ratedDocuments; + + public RatedQuery( + int intentId, Map intentParameters, Map ratedDocuments) { + this.intentId = intentId; + this.intentParameters = intentParameters; + this.ratedDocuments = ratedDocuments; + } + + public RatedQuery(StreamInput in) throws IOException { + this.intentId = in.readInt(); + this.intentParameters = in.readMap(); + + int ratedDocsSize = in.readInt(); + this.ratedDocuments = new HashMap<>(ratedDocsSize); + for (int i = 0; i < ratedDocsSize; i++) { + this.ratedDocuments.put(in.readString(), in.readInt()); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(intentId); + out.writeMap(intentParameters); + out.writeInt(ratedDocuments.size()); + for(Entry entry : ratedDocuments.entrySet()) { + out.writeString(entry.getKey()); + out.writeInt(entry.getValue()); + } + } + + /** For easier referencing users are allowed to supply unique ids with each search intent they want to check for + * performance quality wise.*/ + public int getIntentId() { + return intentId; + } + + + /** + * Returns a mapping from query parameter name to real parameter - ideally as parsed from real user logs. + * */ + public Map getIntentParameters() { + return intentParameters; + } + + /** + * Returns a set of documents and their ratings as supplied by the users. + * */ + public Map getRatedDocuments() { + return ratedDocuments; + } + +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java new file mode 100644 index 00000000000..b8093692130 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RestRankEvalAction.java @@ -0,0 +1,187 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.client.Client; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.indices.query.IndicesQueriesRegistry; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestChannel; +import org.elasticsearch.rest.RestController; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.support.RestActions; +import org.elasticsearch.search.aggregations.AggregatorParsers; +import org.elasticsearch.search.suggest.Suggesters; + +import java.io.IOException; + +import static org.elasticsearch.rest.RestRequest.Method.GET; +import static org.elasticsearch.rest.RestRequest.Method.POST; + +/** + * Accepted input format: + * + * General Format: + * + * + { "requests": [{ + "id": "human_readable_id", + "request": { ... request to check ... }, + "ratings": { ... mapping from doc id to rating value ... } + }], + "metric": { + "... metric_name... ": { + "... metric_parameter_key ...": ...metric_parameter_value... + }}} + * + * Example: + * + * + {"requests": [{ + "id": "amsterdam_query", + "request": { + "query": { + "bool": { + "must": [ + {"match": {"beverage": "coffee"}}, + {"term": {"browser": {"value": "safari"}}}, + {"term": {"time_of_day": {"value": "morning","boost": 2}}}, + {"term": {"ip_location": {"value": "ams","boost": 10}}}]} + }, + "size": 10 + } + }, + "ratings": { + "1": 1, + "2": 0, + "3": 1, + "4": 1 + } + }, { + "id": "berlin_query", + "request": { + "query": { + "bool": { + "must": [ + {"match": {"beverage": "club mate"}}, + {"term": {"browser": {"value": "chromium"}}}, + {"term": {"time_of_day": {"value": "evening","boost": 2}}}, + {"term": {"ip_location": {"value": "ber","boost": 10}}}]} + }, + "size": 10 + } + }, + "ratings": { + "1": 0, + "5": 1, + "6": 1 + } + }], + "metric": { + "precisionAtN": { + "size": 10}} + } + + * + * Output format: + * + * General format: + * + * + { + "took": 59, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "failed": 0 + }, + "quality_level": ... quality level ..., + "unknown_docs": [{"user_request_id": [... list of unknown docs ...]}] +} + + * + * Example: + * + * + * + { + "took": 59, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "failed": 0 + }, + "rank_eval": [{ + "spec_id": "huge_weight_on_city", + "quality_level": 0.4, + "unknown_docs": [{ + "amsterdam_query": [5, 10, 23] + }, { + "berlin_query": [42] + }] + }] + } + + + * */ +public class RestRankEvalAction extends BaseRestHandler { + + @Inject + public RestRankEvalAction(Settings settings, RestController controller, Client client, IndicesQueriesRegistry queryRegistry, + AggregatorParsers aggParsers, Suggesters suggesters) { + super(settings, client); + controller.registerHandler(GET, "/_rank_eval", this); + controller.registerHandler(POST, "/_rank_eval", this); + controller.registerHandler(GET, "/{index}/_rank_eval", this); + controller.registerHandler(POST, "/{index}/_rank_eval", this); + controller.registerHandler(GET, "/{index}/{type}/_rank_eval", this); + controller.registerHandler(POST, "/{index}/{type}/_rank_eval", this); + } + + @Override + public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) throws IOException { + RankEvalRequest rankEvalRequest = new RankEvalRequest(); + //parseRankEvalRequest(rankEvalRequest, request, parseFieldMatcher); + //client.rankEval(rankEvalRequest, new RestStatusToXContentListener<>(channel)); + } + + public static void parseRankEvalRequest(RankEvalRequest rankEvalRequest, RestRequest request, ParseFieldMatcher parseFieldMatcher) + throws IOException { + + String[] indices = Strings.splitStringByCommaToArray(request.param("index")); + BytesReference restContent = null; + if (restContent == null) { + if (RestActions.hasBodyContent(request)) { + restContent = RestActions.getRestContent(request); + } + } + if (restContent != null) { + } + + } +} diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java new file mode 100644 index 00000000000..f75bf3a0ebd --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/TransportRankEvalAction.java @@ -0,0 +1,120 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.action.ActionFuture; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.TransportSearchAction; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.AutoCreateIndex; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.search.SearchHits; +import org.elasticsearch.search.action.SearchTransportService; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.controller.SearchPhaseController; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Instances of this class execute a collection of search intents (read: user supplied query parameters) against a set of + * possible search requests (read: search specifications, expressed as query/search request templates) and compares the result + * against a set of annotated documents per search intent. + * + * If any documents are returned that haven't been annotated the document id of those is returned per search intent. + * + * The resulting search quality is computed in terms of precision at n and returned for each search specification for the full + * set of search intents as averaged precision at n. + * */ +public class TransportRankEvalAction extends HandledTransportAction { + private SearchPhaseController searchPhaseController; + private TransportService transportService; + private SearchTransportService searchTransportService; + private ClusterService clusterService; + private ActionFilters actionFilters; + + @Inject + public TransportRankEvalAction(Settings settings, ThreadPool threadPool, ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver, ClusterService clusterService, ScriptService scriptService, + AutoCreateIndex autoCreateIndex, Client client, TransportService transportService, SearchPhaseController searchPhaseController, + SearchTransportService searchTransportService, NamedWriteableRegistry namedWriteableRegistry) { + super(settings, RankEvalAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver, + RankEvalRequest::new); + this.searchPhaseController = searchPhaseController; + this.transportService = transportService; + this.searchTransportService = searchTransportService; + this.clusterService = clusterService; + this.actionFilters = actionFilters; + + namedWriteableRegistry.register(RankedListQualityMetric.class, PrecisionAtN.NAME, PrecisionAtN::new); + } + + @Override + protected void doExecute(RankEvalRequest request, ActionListener listener) { + RankEvalResponse response = new RankEvalResponse(); + RankEvalSpec qualityTask = request.getRankEvalSpec(); + RankedListQualityMetric metric = qualityTask.getEvaluator(); + + for (QuerySpec spec : qualityTask.getSpecifications()) { + double qualitySum = 0; + + SearchSourceBuilder specRequest = spec.getTestRequest(); + String[] indices = new String[spec.getIndices().size()]; + spec.getIndices().toArray(indices); + SearchRequest templatedRequest = new SearchRequest(indices, specRequest); + + + Map> unknownDocs = new HashMap>(); + Collection intents = qualityTask.getIntents(); + for (RatedQuery intent : intents) { + + TransportSearchAction transportSearchAction = new TransportSearchAction( + settings, + threadPool, + searchPhaseController, + transportService, + searchTransportService, + clusterService, + actionFilters, + indexNameExpressionResolver); + ActionFuture searchResponse = transportSearchAction.execute(templatedRequest); + SearchHits hits = searchResponse.actionGet().getHits(); + + EvalQueryQuality intentQuality = metric.evaluate(hits.getHits(), intent); + qualitySum += intentQuality.getQualityLevel(); + unknownDocs.put(intent.getIntentId(), intentQuality.getUnknownDocs()); + } + response.addRankEvalResult(spec.getSpecId(), qualitySum / intents.size(), unknownDocs); + } + listener.onResponse(response); + } +} diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java new file mode 100644 index 00000000000..c6a9fa658d0 --- /dev/null +++ b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/PrecisionAtRequestTests.java @@ -0,0 +1,170 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.quality; + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.rankeval.PrecisionAtN; +import org.elasticsearch.index.rankeval.RankEvalPlugin; +import org.elasticsearch.index.rankeval.RatedQuery; +import org.elasticsearch.index.rankeval.PrecisionAtN.Rating; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESIntegTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ExecutionException; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, transportClientRatio = 0.0) +// NORELEASE need to fix transport client use case +public class PrecisionAtRequestTests extends ESIntegTestCase { + @Override + protected Collection> transportClientPlugins() { + return pluginList(RankEvalPlugin.class); + } + + @Override + protected Collection> nodePlugins() { + return pluginList(RankEvalPlugin.class); + } + + @Before + public void setup() { + createIndex("test"); + ensureGreen(); + + client().prepareIndex("test", "testtype").setId("1") + .setSource("text", "berlin").get(); + client().prepareIndex("test", "testtype").setId("2") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("3") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("4") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("5") + .setSource("text", "amsterdam").get(); + client().prepareIndex("test", "testtype").setId("6") + .setSource("text", "amsterdam").get(); + refresh(); + } + + + public void testPrecisionAtFiveCalculation() throws IOException, InterruptedException, ExecutionException { + // TODO turn into unit test - no need to execute the query here to fill hits object + MatchQueryBuilder query = new MatchQueryBuilder("text", "berlin"); + + SearchResponse response = client().prepareSearch().setQuery(query) + .execute().actionGet(); + + Map relevant = new HashMap<>(); + relevant.put("1", Rating.RELEVANT.ordinal()); + RatedQuery intent = new RatedQuery(0, new HashMap<>(), relevant); + SearchHit[] hits = response.getHits().getHits(); + + assertEquals(1, (new PrecisionAtN(5)).evaluate(hits, intent).getQualityLevel(), 0.00001); + } + + public void testPrecisionAtFiveIgnoreOneResult() throws IOException, InterruptedException, ExecutionException { + // TODO turn into unit test - no need to actually execute the query here to fill the hits object + MatchQueryBuilder query = new MatchQueryBuilder("text", "amsterdam"); + + SearchResponse response = client().prepareSearch().setQuery(query) + .execute().actionGet(); + + Map relevant = new HashMap<>(); + relevant.put("2", Rating.RELEVANT.ordinal()); + relevant.put("3", Rating.RELEVANT.ordinal()); + relevant.put("4", Rating.RELEVANT.ordinal()); + relevant.put("5", Rating.RELEVANT.ordinal()); + relevant.put("6", Rating.IRRELEVANT.ordinal()); + RatedQuery intent = new RatedQuery(0, new HashMap<>(), relevant); + SearchHit[] hits = response.getHits().getHits(); + + assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, intent).getQualityLevel(), 0.00001); + } + + public void testPrecisionJSON() { + + } + +/* public void testPrecisionAction() { + // TODO turn into REST test? + + Collection intents = new ArrayList(); + RatedQuery intentAmsterdam = new RatedQuery( + 0, + createParams("var", "amsterdam"), + createRelevant("2", "3", "4", "5")); + intents.add(intentAmsterdam); + + RatedQuery intentBerlin = new RatedQuery( + 1, + createParams("var", "berlin"), + createRelevant("1")); + intents.add(intentBerlin); + + Collection specs = new ArrayList(); + ArrayList indices = new ArrayList<>(); + indices.add("test"); + ArrayList types = new ArrayList<>(); + types.add("testtype"); + + SearchSourceBuilder source = new SearchSourceBuilder(); + QuerySpec spec = new QuerySpec(0, source, indices, types); + specs.add(spec); + + RankEvalSpec task = new RankEvalSpec(intents, specs, new PrecisionAtN(10)); + + RankEvalRequestBuilder builder = new RankEvalRequestBuilder( + client(), + RankEvalAction.INSTANCE, + new RankEvalRequest()); + builder.setRankEvalSpec(task); + + RankEvalResponse response = client().execute(RankEvalAction.INSTANCE, builder.request()).actionGet(); + RankEvalResult result = response.getRankEvalResults().iterator().next(); + for (Entry> entry : result.getUnknownDocs().entrySet()) { + if (entry.getKey() == 0) { + assertEquals(1, entry.getValue().size()); + } else { + assertEquals(0, entry.getValue().size()); + } + } + }*/ + + private Map createRelevant(String... docs) { + Map relevant = new HashMap<>(); + for (String doc : docs) { + relevant.put(doc, Rating.RELEVANT.ordinal()); + } + return relevant; + } + + private Map createParams(String key, String value) { + Map parameters = new HashMap<>(); + parameters.put(key, value); + return parameters; + } + + } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRestIT.java b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRestIT.java new file mode 100644 index 00000000000..b68385017f6 --- /dev/null +++ b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRestIT.java @@ -0,0 +1,40 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.quality; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.test.rest.RestTestCandidate; +import org.elasticsearch.test.rest.parser.RestTestParseException; + +import java.io.IOException; + +public class RankEvalRestIT extends ESRestTestCase { + public RankEvalRestIT(@Name("yaml") RestTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws IOException, RestTestParseException { + return ESRestTestCase.createParameters(0, 1); + } +} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml new file mode 100644 index 00000000000..201efde33aa --- /dev/null +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml @@ -0,0 +1,48 @@ +--- +"Response format": + + - do: + index: + index: foo + type: bar + id: 1 + body: { "text": "berlin" } + + - do: + index: + index: foo + type: bar + id: 2 + body: { "text": "amsterdam" } + + - do: + index: + index: foo + type: bar + id: 3 + body: { "text": "amsterdam" } + + - do: + indices.refresh: {} + + - do: + rank_eval: + body: + requests: [ + { + id: "amsterdam_query", + request: { query: {match : {text : "amsterdam" }}}, + ratings: { "1": 0, "2": 1, "3": 1 } + }, { + id: "berlin_query", + request: { query: { match : { text : "berlin" } }, size : 10 }, + ratings: {"1": 1} + } + ] + metric: { precisionAtN: { size: 10}} + + - match: {quality_level: 1} + - gte: { took: 0 } + - is_false: task + - is_false: deleted + diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/rank_eval.json b/rest-api-spec/src/main/resources/rest-api-spec/api/rank_eval.json new file mode 100644 index 00000000000..681eb9f6081 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/rank_eval.json @@ -0,0 +1,17 @@ +{ + "rank_eval": { + "documentation": "https://www.elastic.co/guide/en/elasticsearch/reference/master/docs-rank-eval.html", + "methods": ["POST"], + "url": { + "path": "/_rank_eval", + "paths": ["/_rank_eval"], + "parts": {}, + "params": {} + }, + "body": { + "description": "The search definition using the Query DSL and the prototype for the eval request.", + "required": true + } + } +} + diff --git a/settings.gradle b/settings.gradle index 6588b605a9d..982fc004a18 100644 --- a/settings.gradle +++ b/settings.gradle @@ -25,6 +25,7 @@ List projects = [ 'modules:lang-mustache', 'modules:lang-painless', 'modules:reindex', + 'modules:rank-eval', 'modules:percolator', 'plugins:analysis-icu', 'plugins:analysis-kuromoji',