Actually use index/type in addition to docid for comparing hits.

This commit is contained in:
Isabel Drost-Fromm 2016-08-08 14:09:27 +02:00
parent ac3f2421b1
commit cfaa62723d
12 changed files with 234 additions and 92 deletions

View File

@ -27,14 +27,14 @@ import java.util.Collection;
public class EvalQueryQuality {
private double qualityLevel;
private Collection<String> unknownDocs;
private Collection<RatedDocumentKey> unknownDocs;
public EvalQueryQuality (double qualityLevel, Collection<String> unknownDocs) {
public EvalQueryQuality (double qualityLevel, Collection<RatedDocumentKey> unknownDocs) {
this.qualityLevel = qualityLevel;
this.unknownDocs = unknownDocs;
}
public Collection<String> getUnknownDocs() {
public Collection<RatedDocumentKey> getUnknownDocs() {
return unknownDocs;
}

View File

@ -99,27 +99,27 @@ public class PrecisionAtN extends RankedListQualityMetric {
@Override
public EvalQueryQuality evaluate(SearchHit[] hits, List<RatedDocument> ratedDocs) {
Collection<String> relevantDocIds = new ArrayList<>();
Collection<String> irrelevantDocIds = new ArrayList<>();
Collection<RatedDocumentKey> relevantDocIds = new ArrayList<>();
Collection<RatedDocumentKey> irrelevantDocIds = new ArrayList<>();
for (RatedDocument doc : ratedDocs) {
if (Rating.RELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
relevantDocIds.add(doc.getDocID());
relevantDocIds.add(doc.getKey());
} else if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(doc.getRating()))) {
irrelevantDocIds.add(doc.getDocID());
irrelevantDocIds.add(doc.getKey());
}
}
int good = 0;
int bad = 0;
Collection<String> unknownDocIds = new ArrayList<String>();
Collection<RatedDocumentKey> unknownDocIds = new ArrayList<RatedDocumentKey>();
for (int i = 0; (i < n && i < hits.length); i++) {
String id = hits[i].getId();
if (relevantDocIds.contains(id)) {
RatedDocumentKey hitKey = new RatedDocumentKey(hits[i].getIndex(), hits[i].getType(), hits[i].getId());
if (relevantDocIds.contains(hitKey)) {
good++;
} else if (irrelevantDocIds.contains(id)) {
} else if (irrelevantDocIds.contains(hitKey)) {
bad++;
} else {
unknownDocIds.add(id);
unknownDocIds.add(hitKey);
}
}

View File

@ -73,7 +73,7 @@ public class RankEvalResponse extends ActionResponse implements ToXContent {
builder.field("spec_id", qualityResult.getSpecId());
builder.field("quality_level", qualityResult.getQualityLevel());
builder.startArray("unknown_docs");
Map<String, Collection<String>> unknownDocs = qualityResult.getUnknownDocs();
Map<String, Collection<RatedDocumentKey>> unknownDocs = qualityResult.getUnknownDocs();
for (String key : unknownDocs.keySet()) {
builder.startObject();
builder.field(key, unknownDocs.get(key));

View File

@ -39,16 +39,16 @@ public class RankEvalResult implements Writeable {
/**Average precision observed when issuing query intents with this specification.*/
private double qualityLevel;
/**Mapping from intent id to all documents seen for this intent that were not annotated.*/
private Map<String, Collection<String>> unknownDocs;
private Map<String, Collection<RatedDocumentKey>> unknownDocs;
@SuppressWarnings("unchecked")
public RankEvalResult(StreamInput in) throws IOException {
this.specId = in.readString();
this.qualityLevel = in.readDouble();
this.unknownDocs = (Map<String, Collection<String>>) in.readGenericValue();
this.unknownDocs = (Map<String, Collection<RatedDocumentKey>>) in.readGenericValue();
}
public RankEvalResult(String specId, double quality, Map<String, Collection<String>> unknownDocs) {
public RankEvalResult(String specId, double quality, Map<String, Collection<RatedDocumentKey>> unknownDocs) {
this.specId = specId;
this.qualityLevel = quality;
this.unknownDocs = unknownDocs;
@ -69,7 +69,7 @@ public class RankEvalResult implements Writeable {
return qualityLevel;
}
public Map<String, Collection<String>> getUnknownDocs() {
public Map<String, Collection<RatedDocumentKey>> getUnknownDocs() {
return unknownDocs;
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
@ -36,68 +37,51 @@ import java.util.Objects;
* */
public class RatedDocument extends ToXContentToBytes implements Writeable {
public static final ParseField DOC_ID_FIELD = new ParseField("doc_id");
public static final ParseField TYPE_FIELD = new ParseField("type");
public static final ParseField INDEX_FIELD = new ParseField("index");
public static final ParseField RATING_FIELD = new ParseField("rating");
public static final ParseField KEY_FIELD = new ParseField("key");
private static final ObjectParser<RatedDocument, RankEvalContext> PARSER = new ObjectParser<>("ratings", RatedDocument::new);
static {
PARSER.declareString(RatedDocument::setIndex, INDEX_FIELD);
PARSER.declareString(RatedDocument::setType, TYPE_FIELD);
PARSER.declareString(RatedDocument::setDocId, DOC_ID_FIELD);
PARSER.declareObject(RatedDocument::setKey, (p, c) -> {
try {
return RatedDocumentKey.fromXContent(p, c);
} catch (IOException ex) {
throw new ParsingException(p.getTokenLocation(), "error parsing rank request", ex);
}
} , KEY_FIELD);
PARSER.declareInt(RatedDocument::setRating, RATING_FIELD);
}
// TODO instead of docId use path to id and id itself
private String docId;
private String type;
private String index;
private RatedDocumentKey key;
private int rating;
RatedDocument() {}
void setIndex(String index) {
this.index = index;
void setRatedDocumentKey(RatedDocumentKey key) {
this.key = key;
}
void setType(String type) {
this.type = type;
}
void setDocId(String docId) {
this.docId = docId;
void setKey(RatedDocumentKey key) {
this.key = key;
}
void setRating(int rating) {
this.rating = rating;
}
public RatedDocument(String index, String type, String docId, int rating) {
this.index = index;
this.type = type;
this.docId = docId;
public RatedDocument(RatedDocumentKey key, int rating) {
this.key = key;
this.rating = rating;
}
public RatedDocument(StreamInput in) throws IOException {
this.index = in.readString();
this.type = in.readString();
this.docId = in.readString();
this.key = new RatedDocumentKey(in);
this.rating = in.readVInt();
}
public String getIndex() {
return index;
}
public String getType() {
return type;
}
public String getDocID() {
return docId;
public RatedDocumentKey getKey() {
return this.key;
}
public int getRating() {
@ -106,9 +90,7 @@ public class RatedDocument extends ToXContentToBytes implements Writeable {
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(index);
out.writeString(type);
out.writeString(docId);
this.key.writeTo(out);
out.writeVInt(rating);
}
@ -119,9 +101,7 @@ public class RatedDocument extends ToXContentToBytes implements Writeable {
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(INDEX_FIELD.getPreferredName(), index);
builder.field(TYPE_FIELD.getPreferredName(), type);
builder.field(DOC_ID_FIELD.getPreferredName(), docId);
builder.field(KEY_FIELD.getPreferredName(), key);
builder.field(RATING_FIELD.getPreferredName(), rating);
builder.endObject();
return builder;
@ -136,14 +116,12 @@ public class RatedDocument extends ToXContentToBytes implements Writeable {
return false;
}
RatedDocument other = (RatedDocument) obj;
return Objects.equals(index, other.index) &&
Objects.equals(type, other.type) &&
Objects.equals(docId, other.docId) &&
return Objects.equals(key, other.key) &&
Objects.equals(rating, other.rating);
}
@Override
public final int hashCode() {
return Objects.hash(getClass(), index, type, docId, rating);
return Objects.hash(getClass(), key, rating);
}
}

View File

@ -0,0 +1,130 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.rankeval;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.Objects;
public class RatedDocumentKey extends ToXContentToBytes implements Writeable {
public static final ParseField DOC_ID_FIELD = new ParseField("doc_id");
public static final ParseField TYPE_FIELD = new ParseField("type");
public static final ParseField INDEX_FIELD = new ParseField("index");
private static final ObjectParser<RatedDocumentKey, RankEvalContext> PARSER = new ObjectParser<>("ratings", RatedDocumentKey::new);
static {
PARSER.declareString(RatedDocumentKey::setIndex, INDEX_FIELD);
PARSER.declareString(RatedDocumentKey::setType, TYPE_FIELD);
PARSER.declareString(RatedDocumentKey::setDocId, DOC_ID_FIELD);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(INDEX_FIELD.getPreferredName(), index);
builder.field(TYPE_FIELD.getPreferredName(), type);
builder.field(DOC_ID_FIELD.getPreferredName(), docId);
builder.endObject();
return builder;
}
// TODO instead of docId use path to id and id itself
private String docId;
private String type;
private String index;
public RatedDocumentKey() {}
void setIndex(String index) {
this.index = index;
}
void setType(String type) {
this.type = type;
}
void setDocId(String docId) {
this.docId = docId;
}
public RatedDocumentKey(String index, String type, String docId) {
this.index = index;
this.type = type;
this.docId = docId;
}
public RatedDocumentKey(StreamInput in) throws IOException {
this.index = in.readString();
this.type = in.readString();
this.docId = in.readString();
}
public String getIndex() {
return index;
}
public String getType() {
return type;
}
public String getDocID() {
return docId;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(index);
out.writeString(type);
out.writeString(docId);
}
public static RatedDocumentKey fromXContent(XContentParser parser, RankEvalContext context) throws IOException {
return PARSER.parse(parser, context);
}
@Override
public final boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
RatedDocumentKey other = (RatedDocumentKey) obj;
return Objects.equals(index, other.index) &&
Objects.equals(type, other.type) &&
Objects.equals(docId, other.docId);
}
@Override
public final int hashCode() {
return Objects.hash(getClass(), index, type, docId);
}
}

View File

@ -85,7 +85,7 @@ public class TransportRankEvalAction extends HandledTransportAction<RankEvalRequ
RankedListQualityMetric metric = qualityTask.getEvaluator();
double qualitySum = 0;
Map<String, Collection<String>> unknownDocs = new HashMap<String, Collection<String>>();
Map<String, Collection<RatedDocumentKey>> unknownDocs = new HashMap<String, Collection<RatedDocumentKey>>();
Collection<QuerySpec> specifications = qualityTask.getSpecifications();
for (QuerySpec spec : specifications) {
SearchSourceBuilder specRequest = spec.getTestRequest();

View File

@ -31,6 +31,7 @@ import org.elasticsearch.index.rankeval.RankEvalResponse;
import org.elasticsearch.index.rankeval.RankEvalResult;
import org.elasticsearch.index.rankeval.RankEvalSpec;
import org.elasticsearch.index.rankeval.RatedDocument;
import org.elasticsearch.index.rankeval.RatedDocumentKey;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.ESIntegTestCase;
@ -100,9 +101,9 @@ public class RankEvalRequestTests extends ESIntegTestCase {
RankEvalResult result = response.getRankEvalResult();
assertEquals(specId, result.getSpecId());
assertEquals(1.0, result.getQualityLevel(), Double.MIN_VALUE);
Set<Entry<String, Collection<String>>> entrySet = result.getUnknownDocs().entrySet();
Set<Entry<String, Collection<RatedDocumentKey>>> entrySet = result.getUnknownDocs().entrySet();
assertEquals(2, entrySet.size());
for (Entry<String, Collection<String>> entry : entrySet) {
for (Entry<String, Collection<RatedDocumentKey>> entry : entrySet) {
if (entry.getKey() == "amsterdam_query") {
assertEquals(2, entry.getValue().size());
}
@ -115,7 +116,7 @@ public class RankEvalRequestTests extends ESIntegTestCase {
private static List<RatedDocument> createRelevant(String... docs) {
List<RatedDocument> relevant = new ArrayList<>();
for (String doc : docs) {
relevant.add(new RatedDocument("test", "testtype", doc, Rating.RELEVANT.ordinal()));
relevant.add(new RatedDocument(new RatedDocumentKey("test", "testtype", doc), Rating.RELEVANT.ordinal()));
}
return relevant;
}

View File

@ -23,8 +23,9 @@ import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.rankeval.PrecisionAtN.Rating;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.test.ESTestCase;
@ -38,26 +39,58 @@ public class PrecisionAtNTests extends ESTestCase {
public void testPrecisionAtFiveCalculation() throws IOException, InterruptedException, ExecutionException {
List<RatedDocument> rated = new ArrayList<>();
rated.add(new RatedDocument("test", "testtype", "0", Rating.RELEVANT.ordinal()));
SearchHit[] hits = new InternalSearchHit[1];
hits[0] = new InternalSearchHit(0, "0", new Text("type"), Collections.emptyMap());
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), Rating.RELEVANT.ordinal()));
InternalSearchHit[] hits = new InternalSearchHit[1];
hits[0] = new InternalSearchHit(0, "0", new Text("testtype"), Collections.emptyMap());
hits[0].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
assertEquals(1, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001);
}
public void testPrecisionAtFiveIgnoreOneResult() throws IOException, InterruptedException, ExecutionException {
List<RatedDocument> rated = new ArrayList<>();
rated.add(new RatedDocument("test", "testtype", "0", Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument("test", "testtype", "1", Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument("test", "testtype", "2", Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument("test", "testtype", "3", Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument("test", "testtype", "4", Rating.IRRELEVANT.ordinal()));
SearchHit[] hits = new InternalSearchHit[5];
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "0"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "1"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), Rating.IRRELEVANT.ordinal()));
InternalSearchHit[] hits = new InternalSearchHit[5];
for (int i = 0; i < 5; i++) {
hits[i] = new InternalSearchHit(i, i+"", new Text("type"), Collections.emptyMap());
hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap());
hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
}
assertEquals((double) 4 / 5, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001);
}
public void testPrecisionAtFiveCorrectIndex() throws IOException, InterruptedException, ExecutionException {
List<RatedDocument> rated = new ArrayList<>();
rated.add(new RatedDocument(new RatedDocumentKey("test_other", "testtype", "0"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test_other", "testtype", "1"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), Rating.IRRELEVANT.ordinal()));
InternalSearchHit[] hits = new InternalSearchHit[5];
for (int i = 0; i < 5; i++) {
hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap());
hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
}
assertEquals((double) 2 / 3, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001);
}
public void testPrecisionAtFiveCorrectType() throws IOException, InterruptedException, ExecutionException {
List<RatedDocument> rated = new ArrayList<>();
rated.add(new RatedDocument(new RatedDocumentKey("test", "other_type", "0"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "other_type", "1"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "2"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "3"), Rating.RELEVANT.ordinal()));
rated.add(new RatedDocument(new RatedDocumentKey("test", "testtype", "4"), Rating.IRRELEVANT.ordinal()));
InternalSearchHit[] hits = new InternalSearchHit[5];
for (int i = 0; i < 5; i++) {
hits[i] = new InternalSearchHit(i, i+"", new Text("testtype"), Collections.emptyMap());
hits[i].shard(new SearchShardTarget("testnode", new Index("test", "uuid"), 0));
}
assertEquals((double) 2 / 3, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001);
}
public void testParseFromXContent() throws IOException {
String xContent = " {\n"
+ " \"size\": 10\n"

View File

@ -81,9 +81,9 @@ public class QuerySpecTests extends ESTestCase {
+ " \"size\": 10\n"
+ " },\n"
+ " \"ratings\": [ "
+ " {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"1\", \"rating\" : 1 }, "
+ " {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"2\", \"rating\" : 0 }, "
+ " {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"3\", \"rating\" : 1 }]\n"
+ " {\"key\": {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"1\"}, \"rating\" : 1 }, "
+ " {\"key\": {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"2\"}, \"rating\" : 0 }, "
+ " {\"key\": {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"3\"}, \"rating\" : 1 }]\n"
+ "}";
XContentParser parser = XContentFactory.xContent(querySpecString).createParser(querySpecString);
QueryParseContext queryContext = new QueryParseContext(queriesRegistry, parser, ParseFieldMatcher.STRICT);
@ -94,11 +94,11 @@ public class QuerySpecTests extends ESTestCase {
assertNotNull(specification.getTestRequest());
List<RatedDocument> ratedDocs = specification.getRatedDocs();
assertEquals(3, ratedDocs.size());
assertEquals("1", ratedDocs.get(0).getDocID());
assertEquals("1", ratedDocs.get(0).getKey().getDocID());
assertEquals(1, ratedDocs.get(0).getRating());
assertEquals("2", ratedDocs.get(1).getDocID());
assertEquals("2", ratedDocs.get(1).getKey().getDocID());
assertEquals(0, ratedDocs.get(1).getRating());
assertEquals("3", ratedDocs.get(2).getDocID());
assertEquals("3", ratedDocs.get(2).getKey().getDocID());
assertEquals(1, ratedDocs.get(2).getRating());
}
}

View File

@ -37,7 +37,7 @@ public class RatedDocumentTests extends ESTestCase {
String type = randomAsciiOfLength(10);
String docId = randomAsciiOfLength(10);
int rating = randomInt();
RatedDocument testItem = new RatedDocument(index, type, docId, rating);
RatedDocument testItem = new RatedDocument(new RatedDocumentKey(index, type, docId), rating);
XContentBuilder builder = XContentFactory.contentBuilder(randomFrom(XContentType.values()));
if (randomBoolean()) {

View File

@ -41,14 +41,14 @@
"id": "amsterdam_query",
"request": { "query": { "match" : {"text" : "amsterdam" }}},
"ratings": [
{ "index": "foo", "type": "bar", "doc_id": "doc1", "rating": 0},
{ "index": "foo", "type": "bar", "doc_id": "doc2", "rating": 1},
{ "index": "foo", "type": "bar", "doc_id": "doc3", "rating": 1}]
{"key": { "index": "foo", "type": "bar", "doc_id": "doc1"}, "rating": 0},
{"key": { "index": "foo", "type": "bar", "doc_id": "doc2"}, "rating": 1},
{"key": { "index": "foo", "type": "bar", "doc_id": "doc3"}, "rating": 1}]
},
{
"id" : "berlin_query",
"request": { "query": { "match" : { "text" : "berlin" } }, "size" : 10 },
"ratings": [{"index": "foo", "type": "bar", "doc_id": "doc1", "rating": 1}]
"ratings": [{"key": {"index": "foo", "type": "bar", "doc_id": "doc1"}, "rating": 1}]
}
],
"metric" : { "precisionatn": { "size": 10}}
@ -56,5 +56,5 @@
- match: {rank_eval.spec_id: "cities_qa_queries"}
- match: {rank_eval.quality_level: 1}
- match: {rank_eval.unknown_docs.0.amsterdam_query: [ "doc4"]}
- match: {rank_eval.unknown_docs.1.berlin_query: [ "doc4"]}
- match: {rank_eval.unknown_docs.0.amsterdam_query: [ {"index": "foo", "type": "bar", "doc_id": "doc4"}]}
- match: {rank_eval.unknown_docs.1.berlin_query: [ {"index": "foo", "type": "bar", "doc_id": "doc4"}]}