Add `hits` section to response for each ranking evaluation query

This change adds a `hits` section to the response part for each ranking
evaluation query, containing a list of documents (index/type/id) and ratings (if
the document was rated in the request). This section can be used to better
understand the calculation of the ranking quality of this particular query, but
it can also be used to identify the "unknown" (that is unrated) documents that
were part of the seach hits, for example because a UI later wants to present
those documents to the user to get a rating for them.

If the user specifies a set of field names using a parameter called
`summary_fields` in the request, those fields are also included as part of the
response in addition to "_index", "_type", "_id".
This commit is contained in:
Christoph Büscher 2016-09-21 14:12:47 +02:00
parent cd9d07b91b
commit ebe13100df
14 changed files with 472 additions and 68 deletions

View File

@ -36,6 +36,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
public class DiscountedCumulativeGainAt extends RankedListQualityMetric {
@ -140,36 +141,44 @@ public class DiscountedCumulativeGainAt extends RankedListQualityMetric {
@Override
public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List<RatedDocument> ratedDocs) {
Map<RatedDocumentKey, RatedDocument> ratedDocsByKey = new HashMap<>();
Map<RatedDocumentKey, RatedDocument> ratedDocsByKey = new HashMap<>(ratedDocs.size());
List<Integer> allRatings = new ArrayList<>(ratedDocs.size());
for (RatedDocument doc : ratedDocs) {
ratedDocsByKey.put(doc.getKey(), doc);
allRatings.add(doc.getRating());
}
List<RatedDocumentKey> unknownDocIds = new ArrayList<>();
List<Integer> ratings = new ArrayList<>();
List<RatedSearchHit> hitsAndRatings = new ArrayList<>();
List<Integer> ratingsInSearchHits = new ArrayList<>();
for (int i = 0; (i < position && i < hits.length); i++) {
RatedDocumentKey id = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
RatedDocument ratedDoc = ratedDocsByKey.get(id);
if (ratedDoc != null) {
ratings.add(ratedDoc.getRating());
ratingsInSearchHits.add(ratedDoc.getRating());
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.of(ratedDoc.getRating())));
} else {
unknownDocIds.add(id);
if (unknownDocRating != null) {
ratings.add(unknownDocRating);
ratingsInSearchHits.add(unknownDocRating);
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.of(unknownDocRating)));
} else {
// we add null here so that the later computation knows this position had no rating
ratings.add(null);
ratingsInSearchHits.add(null);
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.empty()));
}
}
}
double dcg = computeDCG(ratings);
double dcg = computeDCG(ratingsInSearchHits);
if (normalize) {
Collections.sort(ratings, Comparator.nullsLast(Collections.reverseOrder()));
double idcg = computeDCG(ratings);
Collections.sort(allRatings, Comparator.nullsLast(Collections.reverseOrder()));
double idcg = computeDCG(allRatings.subList(0, Math.min(hits.length, allRatings.size())));
dcg = dcg / idcg;
}
return new EvalQueryQuality(taskId, dcg, unknownDocIds);
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, dcg, unknownDocIds);
evalQueryQuality.addHitsAndRatings(hitsAndRatings);
return evalQueryQuality;
}
private static double computeDCG(List<Integer> ratings) {

View File

@ -26,6 +26,7 @@ import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;;
@ -42,6 +43,7 @@ public class EvalQueryQuality implements ToXContent, Writeable {
private String id;
private double qualityLevel;
private MetricDetails optionalMetricDetails;
private List<RatedSearchHit> hits = new ArrayList<>();
public EvalQueryQuality(String id, double qualityLevel, List<RatedDocumentKey> unknownDocs) {
this.id = id;
@ -51,9 +53,19 @@ public class EvalQueryQuality implements ToXContent, Writeable {
public EvalQueryQuality(StreamInput in) throws IOException {
this(in.readString(), in.readDouble(), in.readList(RatedDocumentKey::new));
this.hits = in.readList(RatedSearchHit::new);
this.optionalMetricDetails = in.readOptionalNamedWriteable(MetricDetails.class);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(id);
out.writeDouble(qualityLevel);
out.writeList(unknownDocs);
out.writeList(hits);
out.writeOptionalNamedWriteable(this.optionalMetricDetails);
}
public String getId() {
return id;
}
@ -74,15 +86,12 @@ public class EvalQueryQuality implements ToXContent, Writeable {
return this.optionalMetricDetails;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(id);
out.writeDouble(qualityLevel);
out.writeVInt(unknownDocs.size());
for (RatedDocumentKey key : unknownDocs) {
key.writeTo(out);
}
out.writeOptionalNamedWriteable(this.optionalMetricDetails);
public void addHitsAndRatings(List<RatedSearchHit> hits) {
this.hits = hits;
}
public List<RatedSearchHit> getHitsAndRatings() {
return this.hits;
}
@Override
@ -94,6 +103,11 @@ public class EvalQueryQuality implements ToXContent, Writeable {
key.toXContent(builder, params);
}
builder.endArray();
builder.startArray("hits");
for (RatedSearchHit hit : hits) {
hit.toXContent(builder, params);
}
builder.endArray();
if (optionalMetricDetails != null) {
builder.startObject("metric_details");
optionalMetricDetails.toXContent(builder, params);
@ -115,11 +129,12 @@ public class EvalQueryQuality implements ToXContent, Writeable {
return Objects.equals(id, other.id) &&
Objects.equals(qualityLevel, other.qualityLevel) &&
Objects.equals(unknownDocs, other.unknownDocs) &&
Objects.equals(hits, other.hits) &&
Objects.equals(optionalMetricDetails, other.optionalMetricDetails);
}
@Override
public final int hashCode() {
return Objects.hash(id, qualityLevel, unknownDocs, optionalMetricDetails);
return Objects.hash(id, qualityLevel, unknownDocs, hits, optionalMetricDetails);
}
}

View File

@ -30,9 +30,11 @@ import org.elasticsearch.search.SearchHit;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import javax.naming.directory.SearchResult;
@ -122,32 +124,39 @@ public class PrecisionAtN extends RankedListQualityMetric {
@Override
public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List<RatedDocument> ratedDocs) {
Collection<RatedDocumentKey> relevantDocIds = new ArrayList<>();
Collection<RatedDocumentKey> irrelevantDocIds = new ArrayList<>();
Map<RatedDocumentKey, RatedDocument> relevantDocIds = new HashMap<>();
Map<RatedDocumentKey, RatedDocument> irrelevantDocIds = new HashMap<>();
for (RatedDocument doc : ratedDocs) {
if (doc.getRating() >= this.relevantRatingThreshhold) {
relevantDocIds.add(doc.getKey());
relevantDocIds.put(doc.getKey(), doc);
} else {
irrelevantDocIds.add(doc.getKey());
irrelevantDocIds.put(doc.getKey(), doc);
}
}
int good = 0;
int bad = 0;
List<RatedDocumentKey> unknownDocIds = new ArrayList<>();
List<RatedSearchHit> hitsAndRatings = new ArrayList<>();
for (int i = 0; (i < n && i < hits.length); i++) {
RatedDocumentKey hitKey = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
if (relevantDocIds.contains(hitKey)) {
if (relevantDocIds.keySet().contains(hitKey)) {
RatedDocument ratedDocument = relevantDocIds.get(hitKey);
good++;
} else if (irrelevantDocIds.contains(hitKey)) {
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.of(ratedDocument.getRating())));
} else if (irrelevantDocIds.keySet().contains(hitKey)) {
RatedDocument ratedDocument = irrelevantDocIds.get(hitKey);
bad++;
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.of(ratedDocument.getRating())));
} else {
unknownDocIds.add(hitKey);
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.empty()));
}
}
double precision = (double) good / (good + bad);
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, precision, unknownDocIds);
evalQueryQuality.addMetricDetails(new PrecisionAtN.Breakdown(good, good + bad));
evalQueryQuality.addHitsAndRatings(hitsAndRatings);
return evalQueryQuality;
}

View File

@ -47,6 +47,7 @@ public class RatedRequest extends ToXContentToBytes implements Writeable {
private SearchSourceBuilder testRequest;
private List<String> indices = new ArrayList<>();
private List<String> types = new ArrayList<>();
private List<String> summaryFields = new ArrayList<>();
/** Collection of rated queries for this query QA specification.*/
private List<RatedDocument> ratedDocs = new ArrayList<>();
@ -82,6 +83,11 @@ public class RatedRequest extends ToXContentToBytes implements Writeable {
for (int i = 0; i < intentSize; i++) {
ratedDocs.add(new RatedDocument(in));
}
int summaryFieldsSize = in.readInt();
summaryFields = new ArrayList<>(summaryFieldsSize);
for (int i = 0; i < summaryFieldsSize; i++) {
this.summaryFields.add(in.readString());
}
}
@Override
@ -100,6 +106,10 @@ public class RatedRequest extends ToXContentToBytes implements Writeable {
for (RatedDocument ratedDoc : ratedDocs) {
ratedDoc.writeTo(out);
}
out.writeInt(summaryFields.size());
for (String fieldName : summaryFields) {
out.writeString(fieldName);
}
}
public SearchSourceBuilder getTestRequest() {
@ -146,13 +156,24 @@ public class RatedRequest extends ToXContentToBytes implements Writeable {
this.ratedDocs = ratedDocs;
}
public void setSummaryFields(List<String> fields) {
this.summaryFields = fields;
}
/** Returns a list of fields that are included in the docs summary of matched documents. */
public List<String> getSummaryFields() {
return summaryFields;
}
private static final ParseField ID_FIELD = new ParseField("id");
private static final ParseField REQUEST_FIELD = new ParseField("request");
private static final ParseField RATINGS_FIELD = new ParseField("ratings");
private static final ParseField FIELDS_FIELD = new ParseField("summary_fields");
private static final ObjectParser<RatedRequest, RankEvalContext> PARSER = new ObjectParser<>("requests", RatedRequest::new);
static {
PARSER.declareString(RatedRequest::setSpecId, ID_FIELD);
PARSER.declareStringArray(RatedRequest::setSummaryFields, FIELDS_FIELD);
PARSER.declareObject(RatedRequest::setTestRequest, (p, c) -> {
try {
return SearchSourceBuilder.fromXContent(c.getParseContext(), c.getAggs(), c.getSuggesters(), c.getSearchExtParsers());
@ -186,6 +207,7 @@ public class RatedRequest extends ToXContentToBytes implements Writeable {
* },
* "size": 10
* },
* "summary_fields" : ["body"],
* "ratings": [{ "1": 1 }, { "2": 0 }, { "3": 1 } ]
* }
*/
@ -203,6 +225,13 @@ public class RatedRequest extends ToXContentToBytes implements Writeable {
doc.toXContent(builder, params);
}
builder.endArray();
if (this.summaryFields.isEmpty() == false) {
builder.startArray(FIELDS_FIELD.getPreferredName());
for (String field : this.summaryFields) {
builder.value(field);
}
builder.endArray();
}
builder.endObject();
return builder;
}
@ -220,11 +249,12 @@ public class RatedRequest extends ToXContentToBytes implements Writeable {
Objects.equals(testRequest, other.testRequest) &&
Objects.equals(indices, other.indices) &&
Objects.equals(types, other.types) &&
Objects.equals(summaryFields, summaryFields) &&
Objects.equals(ratedDocs, other.ratedDocs);
}
@Override
public final int hashCode() {
return Objects.hash(specId, testRequest, indices.hashCode(), types.hashCode(), ratedDocs.hashCode());
return Objects.hash(specId, testRequest, indices, types, summaryFields, ratedDocs);
}
}

View File

@ -0,0 +1,114 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.internal.InternalSearchHit;
import java.io.IOException;
import java.util.Objects;
import java.util.Optional;
public class RatedSearchHit implements Writeable, ToXContent {
private final SearchHit searchHit;
private final Optional<Integer> rating;
public RatedSearchHit(SearchHit searchHit, Optional<Integer> rating) {
this.searchHit = searchHit;
this.rating = rating;
}
public RatedSearchHit(StreamInput in) throws IOException {
this(InternalSearchHit.readSearchHit(in), in.readBoolean() == true ? Optional.of(in.readVInt()) : Optional.empty());
}
@Override
public void writeTo(StreamOutput out) throws IOException {
searchHit.writeTo(out);
out.writeBoolean(rating.isPresent());
if (rating.isPresent()) {
out.writeVInt(rating.get());
}
}
public SearchHit getSearchHit() {
return this.searchHit;
}
public Optional<Integer> getRating() {
return this.rating;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
builder.startObject();
builder.field("hit", (ToXContent) searchHit);
builder.field("rating", rating.orElse(null));
builder.endObject();
return builder;
}
@Override
public final boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
RatedSearchHit other = (RatedSearchHit) obj;
// NORELEASE this is a workaround because InternalSearchHit does not properly implement equals()/hashCode(), so we compare their
// xcontent
XContentBuilder builder;
String hitAsXContent;
String otherHitAsXContent;
try {
builder = XContentFactory.jsonBuilder();
hitAsXContent = searchHit.toXContent(builder, ToXContent.EMPTY_PARAMS).string();
builder = XContentFactory.jsonBuilder();
otherHitAsXContent = other.searchHit.toXContent(builder, ToXContent.EMPTY_PARAMS).string();
} catch (IOException e) {
throw new RuntimeException(e);
}
return Objects.equals(rating, other.rating) &&
Objects.equals(hitAsXContent, otherHitAsXContent);
}
@Override
public final int hashCode() {
//NORELEASE for this to work requires InternalSearchHit to properly implement equals()/hashCode()
XContentBuilder builder;
String hitAsXContent;
try {
builder = XContentFactory.jsonBuilder();
hitAsXContent = searchHit.toXContent(builder, ToXContent.EMPTY_PARAMS).string();
} catch (IOException e) {
throw new RuntimeException(e);
}
return Objects.hash(rating, hitAsXContent);
}
}

View File

@ -30,9 +30,12 @@ import org.elasticsearch.search.SearchHit;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import javax.naming.directory.SearchResult;
@ -114,6 +117,11 @@ public class ReciprocalRank extends RankedListQualityMetric {
**/
@Override
public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List<RatedDocument> ratedDocs) {
Map<RatedDocumentKey, RatedDocument> ratedDocsByKey = new HashMap<>();
for (RatedDocument doc : ratedDocs) {
ratedDocsByKey.put(doc.getKey(), doc);
}
Set<RatedDocumentKey> relevantDocIds = new HashSet<>();
Set<RatedDocumentKey> irrelevantDocIds = new HashSet<>();
for (RatedDocument doc : ratedDocs) {
@ -125,23 +133,31 @@ public class ReciprocalRank extends RankedListQualityMetric {
}
List<RatedDocumentKey> unknownDocIds = new ArrayList<>();
List<RatedSearchHit> hitsAndRatings = new ArrayList<>();
int firstRelevant = -1;
boolean found = false;
for (int i = 0; i < hits.length; i++) {
RatedDocumentKey key = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
if (relevantDocIds.contains(key)) {
if (found == false && i < maxAcceptableRank) {
firstRelevant = i + 1; // add one because rank is not 0-based
found = true;
RatedDocument ratedDocument = ratedDocsByKey.get(key);
if (ratedDocument != null) {
if (ratedDocument.getRating() >= this.relevantRatingThreshhold) {
if (found == false && i < maxAcceptableRank) {
firstRelevant = i + 1; // add one because rank is not
// 0-based
found = true;
}
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.of(ratedDocument.getRating())));
}
} else {
unknownDocIds.add(key);
hitsAndRatings.add(new RatedSearchHit(hits[i], Optional.empty()));
}
}
double reciprocalRank = (firstRelevant == -1) ? 0 : 1.0d / firstRelevant;
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, reciprocalRank, unknownDocIds);
evalQueryQuality.addMetricDetails(new Breakdown(firstRelevant));
evalQueryQuality.addHitsAndRatings(hitsAndRatings);
return evalQueryQuality;
}

View File

@ -34,6 +34,7 @@ import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
@ -72,6 +73,13 @@ public class TransportRankEvalAction extends HandledTransportAction<RankEvalRequ
final RankEvalActionListener searchListener = new RankEvalActionListener(listener, qualityTask, querySpecification,
partialResults, unknownDocs, responseCounter);
SearchSourceBuilder specRequest = querySpecification.getTestRequest();
List<String> summaryFields = querySpecification.getSummaryFields();
if (summaryFields.isEmpty()) {
specRequest.fetchSource(false);
} else {
specRequest.fetchSource(summaryFields.toArray(new String[summaryFields.size()]), new String[0]);
}
String[] indices = new String[querySpecification.getIndices().size()];
querySpecification.getIndices().toArray(indices);
SearchRequest templatedRequest = new SearchRequest(indices, specRequest);

View File

@ -50,7 +50,7 @@ public class DiscountedCumulativeGainAtTests extends ESTestCase {
*
* dcg = 13.84826362927298 (sum of last column)
*/
public void testDCGAtSix() throws IOException, InterruptedException, ExecutionException {
public void testDCGAt() throws IOException, InterruptedException, ExecutionException {
List<RatedDocument> rated = new ArrayList<>();
int[] relevanceRatings = new int[] { 3, 2, 3, 0, 1, 2 };
InternalSearchHit[] hits = new InternalSearchHit[6];
@ -130,6 +130,59 @@ public class DiscountedCumulativeGainAtTests extends ESTestCase {
assertEquals(12.779642067948913 / 13.347184833073591, dcg.evaluate("id", hits, rated).getQualityLevel(), 0.00001);
}
/**
* This tests that normalization works as expected when there are more rated documents than search hits
* because we restrict DCG to be calculated at the fourth position
*
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
* -------------------------------------------------------------------------------------------
* 1 | 3 | 7.0 | 1.0 | 7.0
* 2 | 2 | 3.0 | 1.5849625007211563 | 1.8927892607143721
* 3 | 3 | 7.0 | 2.0 | 3.5
* 4 | n/a | n/a | n/a | n/a
* -----------------------------------------------------------------
* 5 | 1 | 1.0 | 2.584962500721156 | 0.38685280723454163
* 6 | n/a | n/a | n/a | n/a
*
* dcg = 12.392789260714371 (sum of last column until position 4)
*/
public void testDCGAtFourMoreRatings() throws IOException, InterruptedException, ExecutionException {
List<RatedDocument> rated = new ArrayList<>();
Integer[] relevanceRatings = new Integer[] { 3, 2, 3, null, 1};
InternalSearchHit[] hits = new InternalSearchHit[6];
for (int i = 0; i < 6; i++) {
if (i < relevanceRatings.length) {
if (relevanceRatings[i] != null) {
rated.add(new RatedDocument("index", "type", Integer.toString(i), relevanceRatings[i]));
}
}
hits[i] = new InternalSearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap());
hits[i].shard(new SearchShardTarget("testnode", new ShardId("index", "uuid", 0)));
}
DiscountedCumulativeGainAt dcg = new DiscountedCumulativeGainAt(4);
EvalQueryQuality result = dcg.evaluate("id", hits, rated);
assertEquals(12.392789260714371 , result.getQualityLevel(), 0.00001);
assertEquals(1, result.getUnknownDocs().size());
/**
* Check with normalization: to get the maximal possible dcg, sort documents by relevance in descending order
*
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
* -------------------------------------------------------------------------------------------
* 1 | 3 | 7.0 | 1.0  | 7.0
* 2 | 3 | 7.0 | 1.5849625007211563 | 4.416508275000202
* 3 | 2 | 3.0 | 2.0  | 1.5
* 4 | 1 | 1.0 | 2.321928094887362   | 0.43067655807339
* -------------------------------------------------------------------------------------------
* 5 | n.a | n.a | n.a.  | n.a.
* 6 | n.a | n.a | n.a  | n.a
*
* idcg = 13.347184833073591 (sum of last column)
*/
dcg.setNormalize(true);
assertEquals(12.392789260714371 / 13.347184833073591, dcg.evaluate("id", hits, rated).getQualityLevel(), 0.00001);
}
public void testParseFromXContent() throws IOException {
String xContent = " {\n"
+ " \"size\": 8,\n"

View File

@ -19,10 +19,7 @@
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
@ -39,26 +36,23 @@ public class EvalQueryQualityTests extends ESTestCase {
for (int i = 0; i < numberOfUnknownDocs; i++) {
unknownDocs.add(RatedDocumentKeyTests.createRandomRatedDocumentKey());
}
int numberOfSearchHits = randomInt(5);
List<RatedSearchHit> ratedHits = new ArrayList<>();
for (int i = 0; i < numberOfSearchHits; i++) {
ratedHits.add(RatedSearchHitTests.randomRatedSearchHit());
}
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(randomAsciiOfLength(10), randomDoubleBetween(0.0, 1.0, true), unknownDocs);
if (randomBoolean()) {
// TODO randomize this
evalQueryQuality.addMetricDetails(new PrecisionAtN.Breakdown(1, 5));
}
evalQueryQuality.addHitsAndRatings(ratedHits);
return evalQueryQuality;
}
private static EvalQueryQuality copy(EvalQueryQuality original) throws IOException {
try (BytesStreamOutput output = new BytesStreamOutput()) {
original.writeTo(output);
try (StreamInput in = new NamedWriteableAwareStreamInput(output.bytes().streamInput(), namedWritableRegistry)) {
return new EvalQueryQuality(in);
}
}
}
public void testSerialization() throws IOException {
EvalQueryQuality original = randomEvalQueryQuality();
EvalQueryQuality deserialized = copy(original);
EvalQueryQuality deserialized = RankEvalTestHelper.copy(original, EvalQueryQuality::new, namedWritableRegistry);
assertEquals(deserialized, original);
assertEquals(deserialized.hashCode(), original.hashCode());
assertNotSame(deserialized, original);
@ -67,13 +61,14 @@ public class EvalQueryQualityTests extends ESTestCase {
public void testEqualsAndHash() throws IOException {
EvalQueryQuality testItem = randomEvalQueryQuality();
RankEvalTestHelper.testHashCodeAndEquals(testItem, mutateTestItem(testItem),
copy(testItem));
RankEvalTestHelper.copy(testItem, EvalQueryQuality::new, namedWritableRegistry));
}
private static EvalQueryQuality mutateTestItem(EvalQueryQuality original) {
String id = original.getId();
double qualityLevel = original.getQualityLevel();
List<RatedDocumentKey> unknownDocs = original.getUnknownDocs();
List<RatedDocumentKey> unknownDocs = new ArrayList<>(original.getUnknownDocs());
List<RatedSearchHit> ratedHits = new ArrayList<>(original.getHitsAndRatings());
MetricDetails breakdown = original.getMetricDetails();
switch (randomIntBetween(0, 3)) {
case 0:
@ -83,7 +78,6 @@ public class EvalQueryQualityTests extends ESTestCase {
qualityLevel = qualityLevel + 0.1;
break;
case 2:
unknownDocs = new ArrayList<>(unknownDocs);
unknownDocs.add(RatedDocumentKeyTests.createRandomRatedDocumentKey());
break;
case 3:
@ -93,13 +87,15 @@ public class EvalQueryQualityTests extends ESTestCase {
breakdown = null;
}
break;
case 4:
ratedHits.add(RatedSearchHitTests.randomRatedSearchHit());
break;
default:
throw new IllegalStateException("The test should only allow three parameters mutated");
throw new IllegalStateException("The test should only allow five parameters mutated");
}
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(id, qualityLevel, unknownDocs);
evalQueryQuality.addMetricDetails(breakdown);
evalQueryQuality.addHitsAndRatings(ratedHits);
return evalQueryQuality;
}
}

View File

@ -28,6 +28,7 @@ import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -52,7 +53,7 @@ public class RankEvalRequestTests extends ESIntegTestCase {
ensureGreen();
client().prepareIndex("test", "testtype").setId("1")
.setSource("text", "berlin").get();
.setSource("text", "berlin", "title", "Berlin, Germany").get();
client().prepareIndex("test", "testtype").setId("2")
.setSource("text", "amsterdam").get();
client().prepareIndex("test", "testtype").setId("3")
@ -66,15 +67,19 @@ public class RankEvalRequestTests extends ESIntegTestCase {
refresh();
}
public void testPrecisionAtRequest() {
public void testPrecisionAtRequest() throws IOException {
List<String> indices = Arrays.asList(new String[] { "test" });
List<String> types = Arrays.asList(new String[] { "testtype" });
List<RatedRequest> specifications = new ArrayList<>();
SearchSourceBuilder testQuery = new SearchSourceBuilder();
testQuery.query(new MatchAllQueryBuilder());
specifications.add(new RatedRequest("amsterdam_query", testQuery, indices, types, createRelevant("2", "3", "4", "5")));
specifications.add(new RatedRequest("berlin_query", testQuery, indices, types, createRelevant("1")));
RatedRequest amsterdamRequest = new RatedRequest("amsterdam_query", testQuery, indices, types, createRelevant("2", "3", "4", "5"));
amsterdamRequest.setSummaryFields(Arrays.asList(new String[]{ "text", "title" }));
specifications.add(amsterdamRequest);
RatedRequest berlinRequest = new RatedRequest("berlin_query", testQuery, indices, types, createRelevant("1"));
berlinRequest.setSummaryFields(Arrays.asList(new String[]{ "text", "title" }));
specifications.add(berlinRequest);
RankEvalSpec task = new RankEvalSpec(specifications, new PrecisionAtN(10));
@ -86,11 +91,32 @@ public class RankEvalRequestTests extends ESIntegTestCase {
Set<Entry<String, EvalQueryQuality>> entrySet = response.getPartialResults().entrySet();
assertEquals(2, entrySet.size());
for (Entry<String, EvalQueryQuality> entry : entrySet) {
EvalQueryQuality quality = entry.getValue();
if (entry.getKey() == "amsterdam_query") {
assertEquals(2, entry.getValue().getUnknownDocs().size());
assertEquals(2, quality.getUnknownDocs().size());
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
assertEquals(6, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {
String id = hit.getSearchHit().getId();
if (id.equals("1") || id.equals("6")) {
assertFalse(hit.getRating().isPresent());
} else {
assertEquals(Rating.RELEVANT.ordinal(), hit.getRating().get().intValue());
}
}
}
if (entry.getKey() == "berlin_query") {
assertEquals(5, entry.getValue().getUnknownDocs().size());
assertEquals(5, quality.getUnknownDocs().size());
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
assertEquals(6, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {
String id = hit.getSearchHit().getId();
if (id.equals("1")) {
assertEquals(Rating.RELEVANT.ordinal(), hit.getRating().get().intValue());
} else {
assertFalse(hit.getRating().isPresent());
}
}
}
}
}

View File

@ -20,6 +20,12 @@
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.NamedWriteable;
import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -29,6 +35,7 @@ import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.util.Collections;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.not;
@ -50,7 +57,7 @@ public class RankEvalTestHelper {
return itemParser;
}
public static void testHashCodeAndEquals(Object testItem, Object mutation, Object secondCopy) {
public static <T> void testHashCodeAndEquals(T testItem, T mutation, T secondCopy) {
assertFalse("testItem is equal to null", testItem.equals(null));
assertFalse("testItem is equal to incompatible type", testItem.equals(""));
assertTrue("testItem is not equal to self", testItem.equals(testItem));
@ -65,4 +72,31 @@ public class RankEvalTestHelper {
assertThat("testItem copy's hashcode is different from original hashcode", secondCopy.hashCode(),
equalTo(testItem.hashCode()));
}
/**
* Make a deep copy of an object by running it through a BytesStreamOutput
* @param original the original object
* @param reader a function able to create a new copy of this type
* @return a new copy of the original object
*/
public static <T extends Writeable> T copy(T original, Writeable.Reader<T> reader) throws IOException {
return copy(original, reader, new NamedWriteableRegistry(Collections.emptyList()));
}
/**
* Make a deep copy of an object by running it through a BytesStreamOutput
* @param original the original object
* @param reader a function able to create a new copy of this type
* @param namedWriteableRegistry must be non-empty if the object itself or nested object implement {@link NamedWriteable}
* @return a new copy of the original object
*/
public static <T extends Writeable> T copy(T original, Writeable.Reader<T> reader, NamedWriteableRegistry namedWriteableRegistry)
throws IOException {
try (BytesStreamOutput output = new BytesStreamOutput()) {
original.writeTo(output);
try (StreamInput in = new NamedWriteableAwareStreamInput(output.bytes().streamInput(), namedWriteableRegistry)) {
return reader.read(in);
}
}
}
}

View File

@ -79,7 +79,15 @@ public class RatedRequestsTests extends ESTestCase {
ratedDocs.add(RatedDocumentTests.createRatedDocument());
}
return new RatedRequest(specId, testRequest, indices, types, ratedDocs);
RatedRequest ratedRequest = new RatedRequest(specId, testRequest, indices, types, ratedDocs);
List<String> summaryFields = new ArrayList<>();
int numSummaryFields = randomIntBetween(0, 5);
for (int i = 0; i < numSummaryFields; i++) {
summaryFields.add(randomAsciiOfLength(5));
}
ratedRequest.setSummaryFields(summaryFields);
return ratedRequest;
}
public void testXContentRoundtrip() throws IOException {
@ -126,6 +134,7 @@ public class RatedRequestsTests extends ESTestCase {
+ " },\n"
+ " \"size\": 10\n"
+ " },\n"
+ " \"summary_fields\" : [\"title\"],\n"
+ " \"ratings\": [ "
+ " {\"_index\": \"test\", \"_type\": \"testtype\", \"_id\": \"1\", \"rating\" : 1 }, "
+ " {\"_type\": \"testtype\", \"_index\": \"test\", \"_id\": \"2\", \"rating\" : 0 }, "

View File

@ -0,0 +1,71 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.util.Collections;
import java.util.Optional;
public class RatedSearchHitTests extends ESTestCase {
public static RatedSearchHit randomRatedSearchHit() {
Optional<Integer> rating = randomBoolean() ? Optional.empty() : Optional.of(randomIntBetween(0, 5));
SearchHit searchHit = new InternalSearchHit(randomIntBetween(0, 10), randomAsciiOfLength(10), new Text(randomAsciiOfLength(10)),
Collections.emptyMap());
RatedSearchHit ratedSearchHit = new RatedSearchHit(searchHit, rating);
return ratedSearchHit;
}
private static RatedSearchHit mutateTestItem(RatedSearchHit original) {
Optional<Integer> rating = original.getRating();
InternalSearchHit hit = (InternalSearchHit) original.getSearchHit();
switch (randomIntBetween(0, 1)) {
case 0:
rating = rating.isPresent() ? Optional.of(rating.get() + 1) : Optional.of(randomInt(5));
break;
case 1:
hit = new InternalSearchHit(hit.docId(), hit.getId() + randomAsciiOfLength(10), new Text(hit.getType()),
Collections.emptyMap());
break;
default:
throw new IllegalStateException("The test should only allow two parameters mutated");
}
return new RatedSearchHit(hit, rating);
}
public void testSerialization() throws IOException {
RatedSearchHit original = randomRatedSearchHit();
RatedSearchHit deserialized = RankEvalTestHelper.copy(original, RatedSearchHit::new);
assertEquals(deserialized, original);
assertEquals(deserialized.hashCode(), original.hashCode());
assertNotSame(deserialized, original);
}
public void testEqualsAndHash() throws IOException {
RatedSearchHit testItem = randomRatedSearchHit();
RankEvalTestHelper.testHashCodeAndEquals(testItem, mutateTestItem(testItem),
RankEvalTestHelper.copy(testItem, RatedSearchHit::new));
}
}

View File

@ -59,13 +59,27 @@
"metric" : { "precisionatn": { "size": 10}}
}
- match: {rank_eval.quality_level: 1}
- match: {rank_eval.details.amsterdam_query.quality_level: 1.0}
- match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc4"}]}
- match: {rank_eval.details.amsterdam_query.metric_details: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}}
- match: {rank_eval.details.berlin_query.quality_level: 1.0}
- match: {rank_eval.details.berlin_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc4"}]}
- match: {rank_eval.details.berlin_query.metric_details: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
- match: { rank_eval.quality_level: 1}
- match: { rank_eval.details.amsterdam_query.quality_level: 1.0}
- match: { rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc4"}]}
- match: { rank_eval.details.amsterdam_query.metric_details: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}}
- length: { rank_eval.details.amsterdam_query.hits: 3}
- match: { rank_eval.details.amsterdam_query.hits.0.hit: {"_index" : "foo", "_type" : "bar", "_id" : "doc2", "_score" : 0.44839138}}
- match: { rank_eval.details.amsterdam_query.hits.0.rating: 1}
- match: { rank_eval.details.amsterdam_query.hits.1.hit: {"_index" : "foo", "_type" : "bar", "_id" : "doc3", "_score" : 0.44839138}}
- match: { rank_eval.details.amsterdam_query.hits.1.rating: 1}
- match: { rank_eval.details.amsterdam_query.hits.2.hit: {"_index" : "foo", "_type" : "bar", "_id" : "doc4", "_score" : 0.21492207}}
- is_false: rank_eval.details.amsterdam_query.hits.2.rating
- match: { rank_eval.details.berlin_query.quality_level: 1.0}
- match: { rank_eval.details.berlin_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc4"}]}
- match: { rank_eval.details.berlin_query.metric_details: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
- length: { rank_eval.details.berlin_query.hits: 2}
- match: { rank_eval.details.berlin_query.hits.0.hit: { "_index" : "foo", "_type" : "bar", "_id" : "doc1", "_score" : 0.87138504}}
- match: { rank_eval.details.berlin_query.hits.0.rating: 1}
- match: { rank_eval.details.berlin_query.hits.1.hit: { "_index" : "foo", "_type" : "bar", "_id" : "doc4", "_score" : 0.41767058}}
- is_false: rank_eval.details.berlin_query.hits.1.rating
---
"Reciprocal Rank":
@ -133,7 +147,7 @@
- match: {rank_eval.details.amsterdam_query.quality_level: 0.3333333333333333}
- match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": 3}}
- match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc2"},
{"_index": "foo", "_type": "bar", "_id": "doc3"} ]}
{"_index": "foo", "_type": "bar", "_id": "doc3"} ]}
- match: {rank_eval.details.berlin_query.quality_level: 0.5}
- match: {rank_eval.details.berlin_query.metric_details: {"first_relevant": 2}}
- match: {rank_eval.details.berlin_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc1"}]}
@ -168,7 +182,7 @@
- match: {rank_eval.details.amsterdam_query.quality_level: 0}
- match: {rank_eval.details.amsterdam_query.metric_details: {"first_relevant": -1}}
- match: {rank_eval.details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc2"},
{"_index": "foo", "_type": "bar", "_id": "doc3"} ]}
{"_index": "foo", "_type": "bar", "_id": "doc3"} ]}
- match: {rank_eval.details.berlin_query.quality_level: 0.5}
- match: {rank_eval.details.berlin_query.metric_details: {"first_relevant": 2}}
- match: {rank_eval.details.berlin_query.unknown_docs: [ {"_index": "foo", "_type": "bar", "_id": "doc1"}]}