From 34cbc10128d07742e09cf858f2b7a75e15d7e587 Mon Sep 17 00:00:00 2001 From: Isabel Drost-Fromm Date: Thu, 28 Jul 2016 11:27:06 +0200 Subject: [PATCH] Add index and type information to rated doc Also add roundtrip testing of the xcontent serialisation of RatedDoc --- .../index/rankeval/QuerySpec.java | 2 +- .../index/rankeval/RatedDocument.java | 115 ++++++++++++++---- .../action/quality/RankEvalRequestTests.java | 2 +- .../index/rankeval/PrecisionAtNTests.java | 12 +- .../index/rankeval/QuerySpecTests.java | 5 +- .../test/rank_eval/10_basic.yaml | 7 +- 6 files changed, 106 insertions(+), 37 deletions(-) diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java index 2e82fd98939..b316ab8d1d2 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/QuerySpec.java @@ -159,7 +159,7 @@ public class QuerySpec implements Writeable { } , REQUEST_FIELD); PARSER.declareObjectArray(QuerySpec::setRatedDocs, (p, c) -> { try { - return RatedDocument.fromXContent(p); + return RatedDocument.fromXContent(p, c); } catch (IOException ex) { throw new ParsingException(p.getTokenLocation(), "error parsing ratings", ex); } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java index d1bd99b97c4..c76064d98f1 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/RatedDocument.java @@ -19,33 +19,83 @@ package org.elasticsearch.index.rankeval; -import org.elasticsearch.common.ParsingException; +import org.elasticsearch.action.support.ToXContentToBytes; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentParser.Token; import java.io.IOException; +import java.util.Objects; /** * A document ID and its rating for the query QA use case. * */ -public class RatedDocument implements Writeable { +public class RatedDocument extends ToXContentToBytes implements Writeable { - private final String docId; - private final int rating; + public static final ParseField DOC_ID_FIELD = new ParseField("doc_id"); + public static final ParseField TYPE_FIELD = new ParseField("type"); + public static final ParseField INDEX_FIELD = new ParseField("index"); + public static final ParseField RATING_FIELD = new ParseField("rating"); - public RatedDocument(String docId, int rating) { + private static final ObjectParser PARSER = new ObjectParser<>("ratings", RatedDocument::new); + + static { + PARSER.declareString(RatedDocument::setIndex, INDEX_FIELD); + PARSER.declareString(RatedDocument::setType, TYPE_FIELD); + PARSER.declareString(RatedDocument::setDocId, DOC_ID_FIELD); + PARSER.declareInt(RatedDocument::setRating, RATING_FIELD); + } + + // TODO instead of docId use path to id and id itself + private String docId; + private String type; + private String index; + private int rating; + + RatedDocument() {} + + void setIndex(String index) { + this.index = index; + } + + void setType(String type) { + this.type = type; + } + + void setDocId(String docId) { + this.docId = docId; + } + + void setRating(int rating) { + this.rating = rating; + } + + public RatedDocument(String index, String type, String docId, int rating) { + this.index = index; + this.type = type; this.docId = docId; this.rating = rating; } public RatedDocument(StreamInput in) throws IOException { + this.index = in.readString(); + this.type = in.readString(); this.docId = in.readString(); this.rating = in.readVInt(); } + public String getIndex() { + return index; + } + + public String getType() { + return type; + } + public String getDocID() { return docId; } @@ -56,31 +106,44 @@ public class RatedDocument implements Writeable { @Override public void writeTo(StreamOutput out) throws IOException { + out.writeString(index); + out.writeString(type); out.writeString(docId); out.writeVInt(rating); } - public static RatedDocument fromXContent(XContentParser parser) throws IOException { - String id = null; - int rating = Integer.MIN_VALUE; - Token token; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (parser.currentToken().equals(Token.FIELD_NAME)) { - if (id != null) { - throw new ParsingException(parser.getTokenLocation(), "only one document id allowed, found [{}] but already got [{}]", - parser.currentName(), id); - } - id = parser.currentName(); - } else if (parser.currentToken().equals(Token.VALUE_NUMBER)) { - rating = parser.intValue(); - } else { - throw new ParsingException(parser.getTokenLocation(), "unexpected token [{}] while parsing rated document", - token); - } + public static RatedDocument fromXContent(XContentParser parser, RankEvalContext context) throws IOException { + return PARSER.parse(parser, context); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(INDEX_FIELD.getPreferredName(), index); + builder.field(TYPE_FIELD.getPreferredName(), type); + builder.field(DOC_ID_FIELD.getPreferredName(), docId); + builder.field(RATING_FIELD.getPreferredName(), rating); + builder.endObject(); + return builder; + } + + @Override + public final boolean equals(Object obj) { + if (this == obj) { + return true; } - if (id == null) { - throw new ParsingException(parser.getTokenLocation(), "didn't find document id"); + if (obj == null || getClass() != obj.getClass()) { + return false; } - return new RatedDocument(id, rating); + RatedDocument other = (RatedDocument) obj; + return Objects.equals(index, other.index) && + Objects.equals(type, other.type) && + Objects.equals(docId, other.docId) && + Objects.equals(rating, other.rating); + } + + @Override + public final int hashCode() { + return Objects.hash(getClass(), index, type, docId, rating); } } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java index 79df86f6e56..1250b43c036 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/action/quality/RankEvalRequestTests.java @@ -115,7 +115,7 @@ public class RankEvalRequestTests extends ESIntegTestCase { private static List createRelevant(String... docs) { List relevant = new ArrayList<>(); for (String doc : docs) { - relevant.add(new RatedDocument(doc, Rating.RELEVANT.ordinal())); + relevant.add(new RatedDocument("test", "testtype", doc, Rating.RELEVANT.ordinal())); } return relevant; } diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java index c123d5bbf8f..064918fe3c3 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/PrecisionAtNTests.java @@ -38,7 +38,7 @@ public class PrecisionAtNTests extends ESTestCase { public void testPrecisionAtFiveCalculation() throws IOException, InterruptedException, ExecutionException { List rated = new ArrayList<>(); - rated.add(new RatedDocument("0", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("test", "testtype", "0", Rating.RELEVANT.ordinal())); SearchHit[] hits = new InternalSearchHit[1]; hits[0] = new InternalSearchHit(0, "0", new Text("type"), Collections.emptyMap()); assertEquals(1, (new PrecisionAtN(5)).evaluate(hits, rated).getQualityLevel(), 0.00001); @@ -46,11 +46,11 @@ public class PrecisionAtNTests extends ESTestCase { public void testPrecisionAtFiveIgnoreOneResult() throws IOException, InterruptedException, ExecutionException { List rated = new ArrayList<>(); - rated.add(new RatedDocument("0", Rating.RELEVANT.ordinal())); - rated.add(new RatedDocument("1", Rating.RELEVANT.ordinal())); - rated.add(new RatedDocument("2", Rating.RELEVANT.ordinal())); - rated.add(new RatedDocument("3", Rating.RELEVANT.ordinal())); - rated.add(new RatedDocument("4", Rating.IRRELEVANT.ordinal())); + rated.add(new RatedDocument("test", "testtype", "0", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("test", "testtype", "1", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("test", "testtype", "2", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("test", "testtype", "3", Rating.RELEVANT.ordinal())); + rated.add(new RatedDocument("test", "testtype", "4", Rating.IRRELEVANT.ordinal())); SearchHit[] hits = new InternalSearchHit[5]; for (int i = 0; i < 5; i++) { hits[i] = new InternalSearchHit(i, i+"", new Text("type"), Collections.emptyMap()); diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java index 19f30c8b458..ecfa5684825 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/QuerySpecTests.java @@ -79,7 +79,10 @@ public class QuerySpecTests extends ESTestCase { + " },\n" + " \"size\": 10\n" + " },\n" - + " \"ratings\": [ {\"1\": 1 }, { \"2\": 0 }, { \"3\": 1 } ]\n" + + " \"ratings\": [ " + + " {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"1\", \"rating\" : 1 }, " + + " {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"2\", \"rating\" : 0 }, " + + " {\"index\": \"test\", \"type\": \"testtype\", \"doc_id\": \"3\", \"rating\" : 1 }]\n" + "}"; XContentParser parser = XContentFactory.xContent(querySpecString).createParser(querySpecString); QueryParseContext queryContext = new QueryParseContext(queriesRegistry, parser, ParseFieldMatcher.STRICT); diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml index d3487082b6a..0b7f8d2e813 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yaml @@ -40,12 +40,15 @@ { "id": "amsterdam_query", "request": { "query": { "match" : {"text" : "amsterdam" }}}, - "ratings": [{ "doc1": 0}, {"doc2": 1}, {"doc3": 1}] + "ratings": [ + { "index": "foo", "type": "bar", "doc_id": "doc1", "rating": 0}, + { "index": "foo", "type": "bar", "doc_id": "doc2", "rating": 1}, + { "index": "foo", "type": "bar", "doc_id": "doc3", "rating": 1}] }, { "id" : "berlin_query", "request": { "query": { "match" : { "text" : "berlin" } }, "size" : 10 }, - "ratings": [{"doc1": 1}] + "ratings": [{"index": "foo", "type": "bar", "doc_id": "doc1", "rating": 1}] } ], "metric" : { "precisionatn": { "size": 10}}