Remove unknown docs from EvalQueryQuality

The unknown document section in the response for each query can be rendered
using the rated hits that are now also part of the response by just filtering
the documents without a rating.
This commit is contained in:
Christoph Büscher 2016-09-23 16:20:27 +02:00
parent 9e394b0644
commit dfc6d1f369
10 changed files with 33 additions and 49 deletions

View File

@ -36,7 +36,6 @@ import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.filterUnknownDocuments;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.joinHitsWithRatings;
public class DiscountedCumulativeGainAt implements RankedListQualityMetric {
@ -159,7 +158,6 @@ public class DiscountedCumulativeGainAt implements RankedListQualityMetric {
}
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, dcg);
evalQueryQuality.addHitsAndRatings(ratedHits);
evalQueryQuality.setUnknownDocs(filterUnknownDocuments(ratedHits));
return evalQueryQuality;
}

View File

@ -27,7 +27,6 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;;
@ -39,7 +38,6 @@ import java.util.Objects;;
public class EvalQueryQuality implements ToXContent, Writeable {
/** documents seen as result for one request that were not annotated.*/
private List<DocumentKey> unknownDocs = new ArrayList<>();
private String id;
private double qualityLevel;
private MetricDetails optionalMetricDetails;
@ -52,7 +50,6 @@ public class EvalQueryQuality implements ToXContent, Writeable {
public EvalQueryQuality(StreamInput in) throws IOException {
this(in.readString(), in.readDouble());
this.unknownDocs = in.readList(DocumentKey::new);
this.hits = in.readList(RatedSearchHit::new);
this.optionalMetricDetails = in.readOptionalNamedWriteable(MetricDetails.class);
}
@ -61,7 +58,6 @@ public class EvalQueryQuality implements ToXContent, Writeable {
public void writeTo(StreamOutput out) throws IOException {
out.writeString(id);
out.writeDouble(qualityLevel);
out.writeList(unknownDocs);
out.writeList(hits);
out.writeOptionalNamedWriteable(this.optionalMetricDetails);
}
@ -74,14 +70,6 @@ public class EvalQueryQuality implements ToXContent, Writeable {
return qualityLevel;
}
public void setUnknownDocs(List<DocumentKey> unknownDocs) {
this.unknownDocs = unknownDocs;
}
public List<DocumentKey> getUnknownDocs() {
return Collections.unmodifiableList(this.unknownDocs);
}
public void addMetricDetails(MetricDetails breakdown) {
this.optionalMetricDetails = breakdown;
}
@ -103,7 +91,7 @@ public class EvalQueryQuality implements ToXContent, Writeable {
builder.startObject(id);
builder.field("quality_level", this.qualityLevel);
builder.startArray("unknown_docs");
for (DocumentKey key : unknownDocs) {
for (DocumentKey key : RankedListQualityMetric.filterUnknownDocuments(hits)) {
key.toXContent(builder, params);
}
builder.endArray();
@ -132,13 +120,12 @@ public class EvalQueryQuality implements ToXContent, Writeable {
EvalQueryQuality other = (EvalQueryQuality) obj;
return Objects.equals(id, other.id) &&
Objects.equals(qualityLevel, other.qualityLevel) &&
Objects.equals(unknownDocs, other.unknownDocs) &&
Objects.equals(hits, other.hits) &&
Objects.equals(optionalMetricDetails, other.optionalMetricDetails);
}
@Override
public final int hashCode() {
return Objects.hash(id, qualityLevel, unknownDocs, hits, optionalMetricDetails);
return Objects.hash(id, qualityLevel, hits, optionalMetricDetails);
}
}

View File

@ -35,7 +35,6 @@ import java.util.Optional;
import javax.naming.directory.SearchResult;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.filterUnknownDocuments;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.joinHitsWithRatings;
/**
@ -140,7 +139,6 @@ public class PrecisionAtN implements RankedListQualityMetric {
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, precision);
evalQueryQuality.addMetricDetails(new PrecisionAtN.Breakdown(good, good + bad));
evalQueryQuality.addHitsAndRatings(ratedSearchHits);
evalQueryQuality.setUnknownDocs(filterUnknownDocuments(ratedSearchHits));
return evalQueryQuality;
}

View File

@ -100,7 +100,7 @@ public class RatedSearchHit implements Writeable, ToXContent {
@Override
public final int hashCode() {
//NORELEASE for this to work requires InternalSearchHit to properly implement equals()/hashCode()
// NORELEASE for this to work requires InternalSearchHit to properly implement equals()/hashCode()
XContentBuilder builder;
String hitAsXContent;
try {

View File

@ -35,7 +35,6 @@ import java.util.Optional;
import javax.naming.directory.SearchResult;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.filterUnknownDocuments;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.joinHitsWithRatings;
/**
@ -133,7 +132,6 @@ public class ReciprocalRank implements RankedListQualityMetric {
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, reciprocalRank);
evalQueryQuality.addMetricDetails(new Breakdown(firstRelevant));
evalQueryQuality.addHitsAndRatings(ratedHits);
evalQueryQuality.setUnknownDocs(filterUnknownDocuments(ratedHits));
return evalQueryQuality;
}

View File

@ -30,10 +30,13 @@ import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.filterUnknownDocuments;
public class DiscountedCumulativeGainAtTests extends ESTestCase {
/**
@ -110,7 +113,7 @@ public class DiscountedCumulativeGainAtTests extends ESTestCase {
DiscountedCumulativeGainAt dcg = new DiscountedCumulativeGainAt(6);
EvalQueryQuality result = dcg.evaluate("id", hits, rated);
assertEquals(12.779642067948913, result.getQualityLevel(), 0.00001);
assertEquals(2, result.getUnknownDocs().size());
assertEquals(2, filterUnknownDocuments(result.getHitsAndRatings()).size());
/**
* Check with normalization: to get the maximal possible dcg, sort documents by relevance in descending order
@ -148,7 +151,7 @@ public class DiscountedCumulativeGainAtTests extends ESTestCase {
*/
public void testDCGAtFourMoreRatings() throws IOException, InterruptedException, ExecutionException {
List<RatedDocument> rated = new ArrayList<>();
Integer[] relevanceRatings = new Integer[] { 3, 2, 3, null, 1};
Integer[] relevanceRatings = new Integer[] { 3, 2, 3, null, 1, null};
InternalSearchHit[] hits = new InternalSearchHit[6];
for (int i = 0; i < 6; i++) {
if (i < relevanceRatings.length) {
@ -160,9 +163,9 @@ public class DiscountedCumulativeGainAtTests extends ESTestCase {
hits[i].shard(new SearchShardTarget("testnode", new ShardId("index", "uuid", 0)));
}
DiscountedCumulativeGainAt dcg = new DiscountedCumulativeGainAt(4);
EvalQueryQuality result = dcg.evaluate("id", hits, rated);
EvalQueryQuality result = dcg.evaluate("id", Arrays.copyOfRange(hits, 0, 4), rated);
assertEquals(12.392789260714371 , result.getQualityLevel(), 0.00001);
assertEquals(1, result.getUnknownDocs().size());
assertEquals(1, filterUnknownDocuments(result.getHitsAndRatings()).size());
/**
* Check with normalization: to get the maximal possible dcg, sort documents by relevance in descending order

View File

@ -46,7 +46,6 @@ public class EvalQueryQualityTests extends ESTestCase {
// TODO randomize this
evalQueryQuality.addMetricDetails(new PrecisionAtN.Breakdown(1, 5));
}
evalQueryQuality.setUnknownDocs(unknownDocs);
evalQueryQuality.addHitsAndRatings(ratedHits);
return evalQueryQuality;
}
@ -68,9 +67,8 @@ public class EvalQueryQualityTests extends ESTestCase {
private static EvalQueryQuality mutateTestItem(EvalQueryQuality original) {
String id = original.getId();
double qualityLevel = original.getQualityLevel();
List<DocumentKey> unknownDocs = new ArrayList<>(original.getUnknownDocs());
List<RatedSearchHit> ratedHits = new ArrayList<>(original.getHitsAndRatings());
MetricDetails breakdown = original.getMetricDetails();
MetricDetails metricDetails = original.getMetricDetails();
switch (randomIntBetween(0, 3)) {
case 0:
id = id + "_";
@ -79,24 +77,20 @@ public class EvalQueryQualityTests extends ESTestCase {
qualityLevel = qualityLevel + 0.1;
break;
case 2:
unknownDocs.add(DocumentKeyTests.createRandomRatedDocumentKey());
break;
case 3:
if (breakdown == null) {
breakdown = new PrecisionAtN.Breakdown(1, 5);
if (metricDetails == null) {
metricDetails = new PrecisionAtN.Breakdown(1, 5);
} else {
breakdown = null;
metricDetails = null;
}
break;
case 4:
case 3:
ratedHits.add(RatedSearchHitTests.randomRatedSearchHit());
break;
default:
throw new IllegalStateException("The test should only allow five parameters mutated");
throw new IllegalStateException("The test should only allow four parameters mutated");
}
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(id, qualityLevel);
evalQueryQuality.setUnknownDocs(unknownDocs);
evalQueryQuality.addMetricDetails(breakdown);
evalQueryQuality.addMetricDetails(metricDetails);
evalQueryQuality.addHitsAndRatings(ratedHits);
return evalQueryQuality;
}

View File

@ -36,6 +36,9 @@ import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import static org.elasticsearch.index.rankeval.RankedListQualityMetric.filterUnknownDocuments;
public class RankEvalRequestTests extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> transportClientPlugins() {
@ -93,7 +96,7 @@ public class RankEvalRequestTests extends ESIntegTestCase {
for (Entry<String, EvalQueryQuality> entry : entrySet) {
EvalQueryQuality quality = entry.getValue();
if (entry.getKey() == "amsterdam_query") {
assertEquals(2, quality.getUnknownDocs().size());
assertEquals(2, filterUnknownDocuments(quality.getHitsAndRatings()).size());
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
assertEquals(6, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {
@ -106,7 +109,7 @@ public class RankEvalRequestTests extends ESIntegTestCase {
}
}
if (entry.getKey() == "berlin_query") {
assertEquals(5, quality.getUnknownDocs().size());
assertEquals(5, filterUnknownDocuments(quality.getHitsAndRatings()).size());
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
assertEquals(6, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {

View File

@ -46,7 +46,6 @@ public class RankEvalResponseTests extends ESTestCase {
unknownDocs.add(DocumentKeyTests.createRandomRatedDocumentKey());
}
EvalQueryQuality evalQuality = new EvalQueryQuality(id, randomDoubleBetween(0.0, 1.0, true));
evalQuality.setUnknownDocs(unknownDocs);
partials.put(id, evalQuality);
}
return new RankEvalResponse(randomDouble(), partials);

View File

@ -46,7 +46,7 @@ public class ReciprocalRankTests extends ESTestCase {
reciprocalRank.setMaxAcceptableRank(maxRank);
assertEquals(maxRank, reciprocalRank.getMaxAcceptableRank());
SearchHit[] hits = toSearchHits(0, 9, "test", "type");
SearchHit[] hits = createSearchHits(0, 9, "test", "type");
List<RatedDocument> ratedDocs = new ArrayList<>();
int relevantAt = 5;
for (int i = 0; i < 10; i++) {
@ -76,7 +76,7 @@ public class ReciprocalRankTests extends ESTestCase {
public void testEvaluationOneRelevantInResults() {
ReciprocalRank reciprocalRank = new ReciprocalRank();
SearchHit[] hits = toSearchHits(0, 9, "test", "type");
SearchHit[] hits = createSearchHits(0, 9, "test", "type");
List<RatedDocument> ratedDocs = new ArrayList<>();
// mark one of the ten docs relevant
int relevantAt = randomIntBetween(0, 9);
@ -105,7 +105,7 @@ public class ReciprocalRankTests extends ESTestCase {
rated.add(new RatedDocument("test", "testtype", "2", 2));
rated.add(new RatedDocument("test", "testtype", "3", 3));
rated.add(new RatedDocument("test", "testtype", "4", 4));
SearchHit[] hits = toSearchHits(0, 5, "test", "testtype");
SearchHit[] hits = createSearchHits(0, 5, "test", "testtype");
ReciprocalRank reciprocalRank = new ReciprocalRank();
reciprocalRank.setRelevantRatingThreshhold(2);
@ -125,7 +125,7 @@ public class ReciprocalRankTests extends ESTestCase {
public void testEvaluationNoRelevantInResults() {
ReciprocalRank reciprocalRank = new ReciprocalRank();
SearchHit[] hits = toSearchHits(0, 9, "test", "type");
SearchHit[] hits = createSearchHits(0, 9, "test", "type");
List<RatedDocument> ratedDocs = new ArrayList<>();
EvalQueryQuality evaluation = reciprocalRank.evaluate("id", hits, ratedDocs);
assertEquals(0.0, evaluation.getQualityLevel(), Double.MIN_VALUE);
@ -144,9 +144,13 @@ public class ReciprocalRankTests extends ESTestCase {
assertEquals(testItem.hashCode(), parsedItem.hashCode());
}
private static SearchHit[] toSearchHits(int from, int to, String index, String type) {
InternalSearchHit[] hits = new InternalSearchHit[to - from];
for (int i = from; i < to; i++) {
/**
* Create InternalSearchHits for testing, starting from dociId 'from' up to docId 'to'.
* The search hits index and type also need to be provided
*/
private static SearchHit[] createSearchHits(int from, int to, String index, String type) {
InternalSearchHit[] hits = new InternalSearchHit[to + 1 - from];
for (int i = from; i <= to; i++) {
hits[i] = new InternalSearchHit(i, i+"", new Text(type), Collections.emptyMap());
hits[i].shard(new SearchShardTarget("testnode", new Index(index, "uuid"), 0));
}