From 9b84ad3c7be0f54ec9d0a06443fc2f165d21f903 Mon Sep 17 00:00:00 2001 From: Chris Earle Date: Wed, 17 Sep 2014 16:33:05 -0400 Subject: [PATCH] Adding "min" score mode to parent-child queries Support for "max", "sum", and "avg" already existed. --- .../queries/has-child-query.asciidoc | 10 +- .../index/search/child/ChildrenQuery.java | 37 +++++ .../index/search/child/ScoreType.java | 7 + .../index/search/child/TopChildrenQuery.java | 21 ++- .../search/child/AbstractChildTests.java | 9 +- .../search/child/ChildrenQueryTests.java | 148 +++++++++++++++++- .../index/search/child/MockScorer.java | 44 ++++-- .../index/search/child/ScoreTypeTests.java | 55 +++++++ 8 files changed, 303 insertions(+), 28 deletions(-) create mode 100644 src/test/java/org/elasticsearch/index/search/child/ScoreTypeTests.java diff --git a/docs/reference/query-dsl/queries/has-child-query.asciidoc b/docs/reference/query-dsl/queries/has-child-query.asciidoc index 94ac23b5feb..0fcf7873c26 100644 --- a/docs/reference/query-dsl/queries/has-child-query.asciidoc +++ b/docs/reference/query-dsl/queries/has-child-query.asciidoc @@ -31,7 +31,7 @@ query the `total_hits` is always correct. ==== Scoring capabilities The `has_child` also has scoring support. The -supported score types are `max`, `sum`, `avg` or `none`. The default is +supported score types are `min`, `max`, `sum`, `avg` or `none`. The default is `none` and yields the same behaviour as in previous versions. If the score type is set to another value than `none`, the scores of all the matching child documents are aggregated into the associated parent @@ -84,9 +84,9 @@ the `score_mode` parameter. [float] ==== Memory Considerations -In order to support parent-child joins, all of the (string) parent IDs -must be resident in memory (in the <>. -Additionaly, every child document is mapped to its parent using a long +In order to support parent-child joins, all of the (string) parent IDs +must be resident in memory (in the <>. +Additionaly, every child document is mapped to its parent using a long value (approximately). It is advisable to keep the string parent ID short in order to reduce memory usage. @@ -98,5 +98,3 @@ APIS, eg: -------------------------------------------------- curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human" -------------------------------------------------- - - diff --git a/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java b/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java index 725f0cb5d8e..ff6f132fb2b 100644 --- a/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java @@ -176,6 +176,9 @@ public class ChildrenQuery extends Query { try { if (minChildren == 0 && maxChildren == 0 && scoreType != ScoreType.NONE) { switch (scoreType) { + case MIN: + collector = new MinCollector(globalIfd, sc, parentType); + break; case MAX: collector = new MaxCollector(globalIfd, sc, parentType); break; @@ -186,6 +189,9 @@ public class ChildrenQuery extends Query { } if (collector == null) { switch (scoreType) { + case MIN: + collector = new MinCountCollector(globalIfd, sc, parentType); + break; case MAX: collector = new MaxCountCollector(globalIfd, sc, parentType); break; @@ -468,6 +474,21 @@ public class ChildrenQuery extends Query { } } + private final static class MinCollector extends ParentScoreCollector { + + private MinCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) { + super(globalIfd, searchContext, parentType); + } + + @Override + protected void existingParent(long parentIdx) throws IOException { + float currentScore = scorer.score(); + if (currentScore < scores.get(parentIdx)) { + scores.set(parentIdx, currentScore); + } + } + } + private final static class MaxCountCollector extends ParentScoreCountCollector { private MaxCountCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) { @@ -484,6 +505,22 @@ public class ChildrenQuery extends Query { } } + private final static class MinCountCollector extends ParentScoreCountCollector { + + private MinCountCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) { + super(globalIfd, searchContext, parentType); + } + + @Override + protected void existingParent(long parentIdx) throws IOException { + float currentScore = scorer.score(); + if (currentScore < scores.get(parentIdx)) { + scores.set(parentIdx, currentScore); + } + occurrences.increment(parentIdx, 1); + } + } + private final static class SumCountAndAvgCollector extends ParentScoreCountCollector { SumCountAndAvgCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) { diff --git a/src/main/java/org/elasticsearch/index/search/child/ScoreType.java b/src/main/java/org/elasticsearch/index/search/child/ScoreType.java index acb7ff62987..e0b84a14274 100644 --- a/src/main/java/org/elasticsearch/index/search/child/ScoreType.java +++ b/src/main/java/org/elasticsearch/index/search/child/ScoreType.java @@ -24,6 +24,11 @@ import org.elasticsearch.ElasticsearchIllegalArgumentException; * Defines how scores from child documents are mapped into the parent document. */ public enum ScoreType { + /** + * Only the lowest score of all matching child documents is mapped into the + * parent. + */ + MIN, /** * Only the highest score of all matching child documents is mapped into the * parent. @@ -50,6 +55,8 @@ public enum ScoreType { public static ScoreType fromString(String type) { if ("none".equals(type)) { return NONE; + } else if ("min".equals(type)) { + return MIN; } else if ("max".equals(type)) { return MAX; } else if ("avg".equals(type)) { diff --git a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java index 7d3c9ed5930..5af4b67b9b3 100644 --- a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java @@ -218,11 +218,15 @@ public class TopChildrenQuery extends Query { parentDoc.docId = parentDocId; parentDoc.count = 1; parentDoc.maxScore = scoreDoc.score; + parentDoc.minScore = scoreDoc.score; parentDoc.sumScores = scoreDoc.score; readerParentDocs.put(parentDocId, parentDoc); } else { parentDoc.count++; parentDoc.sumScores += scoreDoc.score; + if (scoreDoc.score < parentDoc.minScore) { + parentDoc.minScore = scoreDoc.score; + } if (scoreDoc.score > parentDoc.maxScore) { parentDoc.maxScore = scoreDoc.score; } @@ -320,11 +324,19 @@ public class TopChildrenQuery extends Query { public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { ParentDoc[] readerParentDocs = parentDocs.get(context.reader().getCoreCacheKey()); if (readerParentDocs != null) { - if (scoreType == ScoreType.MAX) { + if (scoreType == ScoreType.MIN) { return new ParentScorer(this, readerParentDocs) { @Override public float score() throws IOException { - assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS; + assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS; + return doc.minScore; + } + }; + } else if (scoreType == ScoreType.MAX) { + return new ParentScorer(this, readerParentDocs) { + @Override + public float score() throws IOException { + assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS; return doc.maxScore; } }; @@ -332,7 +344,7 @@ public class TopChildrenQuery extends Query { return new ParentScorer(this, readerParentDocs) { @Override public float score() throws IOException { - assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS; + assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS; return doc.sumScores / doc.count; } }; @@ -340,7 +352,7 @@ public class TopChildrenQuery extends Query { return new ParentScorer(this, readerParentDocs) { @Override public float score() throws IOException { - assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS; + assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS; return doc.sumScores; } @@ -412,6 +424,7 @@ public class TopChildrenQuery extends Query { private static class ParentDoc { public int docId; public int count; + public float minScore = Float.NaN; public float maxScore = Float.NaN; public float sumScores = 0; } diff --git a/src/test/java/org/elasticsearch/index/search/child/AbstractChildTests.java b/src/test/java/org/elasticsearch/index/search/child/AbstractChildTests.java index 7358a7dccf4..85ab3bf1aad 100644 --- a/src/test/java/org/elasticsearch/index/search/child/AbstractChildTests.java +++ b/src/test/java/org/elasticsearch/index/search/child/AbstractChildTests.java @@ -42,10 +42,17 @@ import static org.hamcrest.Matchers.equalTo; @LuceneTestCase.SuppressCodecs(value = {"Lucene40", "Lucene3x"}) public abstract class AbstractChildTests extends ElasticsearchSingleNodeLuceneTestCase { + /** + * The name of the field within the child type that stores a score to use in test queries. + *

+ * Its type is {@code double}. + */ + protected static String CHILD_SCORE_NAME = "childScore"; + static SearchContext createSearchContext(String indexName, String parentType, String childType) throws IOException { IndexService indexService = createIndex(indexName); MapperService mapperService = indexService.mapperService(); - mapperService.merge(childType, new CompressedString(PutMappingRequest.buildFromSimplifiedDef(childType, "_parent", "type=" + parentType).string()), true); + mapperService.merge(childType, new CompressedString(PutMappingRequest.buildFromSimplifiedDef(childType, "_parent", "type=" + parentType, CHILD_SCORE_NAME, "type=double").string()), true); return createSearchContext(indexService); } diff --git a/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java b/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java index 96086b7603b..4a13711c8ff 100644 --- a/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java +++ b/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java @@ -24,6 +24,7 @@ import com.carrotsearch.hppc.ObjectObjectOpenHashMap; import com.carrotsearch.randomizedtesting.generators.RandomInts; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.*; @@ -34,11 +35,17 @@ import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.lucene.search.NotFilter; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.XFilteredQuery; +import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction; +import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.cache.fixedbitset.FixedBitSetFilter; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.plain.ParentChildIndexFieldData; +import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.Uid; +import org.elasticsearch.index.mapper.internal.IdFieldMapper; import org.elasticsearch.index.mapper.internal.ParentFieldMapper; import org.elasticsearch.index.mapper.internal.TypeFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; @@ -56,6 +63,9 @@ import java.util.NavigableMap; import java.util.Random; import java.util.TreeMap; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + public class ChildrenQueryTests extends AbstractChildTests { @BeforeClass @@ -128,7 +138,7 @@ public class ChildrenQueryTests extends AbstractChildTests { String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); - document.add(new StringField(UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); + document.add(new StringField(UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add(new StringField(ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); @@ -264,4 +274,140 @@ public class ChildrenQueryTests extends AbstractChildTests { directory.close(); } + @Test + public void testMinScoreMode() throws IOException { + assertScoreType(ScoreType.MIN); + } + + @Test + public void testMaxScoreMode() throws IOException { + assertScoreType(ScoreType.MAX); + } + + @Test + public void testAvgScoreMode() throws IOException { + assertScoreType(ScoreType.AVG); + } + + @Test + public void testSumScoreMode() throws IOException { + assertScoreType(ScoreType.SUM); + } + + /** + * Assert that the {@code scoreType} operates as expected and parents are found in the expected order. + *

+ * This will use the test index's parent/child types to create parents with multiple children. Each child will have + * a randomly generated scored stored in {@link #CHILD_SCORE_NAME}, which is used to score based on the + * {@code scoreType} by using a {@link MockScorer} to determine the expected scores. + * @param scoreType The score type to use within the query to score parents relative to their children. + * @throws IOException if any unexpected error occurs + */ + private void assertScoreType(ScoreType scoreType) throws IOException { + SearchContext context = SearchContext.current(); + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))); + + // calculates the expected score per parent + MockScorer scorer = new MockScorer(scoreType); + scorer.scores = new FloatArrayList(10); + + // number of parents to generate + int parentDocs = scaledRandomIntBetween(2, 10); + // unique child ID + int childDocId = 0; + + // Parent ID to expected score + Map parentScores = new TreeMap<>(); + + // Add a few random parents to ensure that the children's score is appropriately taken into account + for (int parentDocId = 0; parentDocId < parentDocs; ++parentDocId) { + String parent = Integer.toString(parentDocId); + + // Create the parent + Document parentDocument = new Document(); + + parentDocument.add(new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); + parentDocument.add(new StringField(IdFieldMapper.NAME, parent, Field.Store.YES)); + parentDocument.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); + + // add the parent to the index + writer.addDocument(parentDocument); + + int numChildDocs = scaledRandomIntBetween(1, 10); + + // forget any parent's previous scores + scorer.scores.clear(); + + // associate children with the parent + for (int i = 0; i < numChildDocs; ++i) { + int childScore = random().nextInt(128); + + Document childDocument = new Document(); + + childDocument.add(new StringField(UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO)); + childDocument.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); + // parent association: + childDocument.add(new StringField(ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); + childDocument.add(new DoubleField(CHILD_SCORE_NAME, childScore, Field.Store.NO)); + + // remember the score to be calculated + scorer.scores.add(childScore); + + // add the associated child to the index + writer.addDocument(childDocument); + } + + // this score that should be returned for this parent + parentScores.put(parent, scorer.score()); + } + + writer.commit(); + + IndexReader reader = DirectoryReader.open(writer, true); + IndexSearcher searcher = new IndexSearcher(reader); + + // setup to read the parent/child map + Engine.SimpleSearcher engineSearcher = new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher); + ((TestSearchContext)context).setSearcher(new ContextIndexSearcher(context, engineSearcher)); + ParentFieldMapper parentFieldMapper = context.mapperService().documentMapper("child").parentFieldMapper(); + ParentChildIndexFieldData parentChildIndexFieldData = context.fieldData().getForField(parentFieldMapper); + FixedBitSetFilter parentFilter = wrap(new TermFilter(new Term(TypeFieldMapper.NAME, "parent"))); + + // child query that returns the score as the value of "childScore" for each child document, + // with the parent's score determined by the score type + FieldMapper fieldMapper = context.mapperService().smartNameFieldMapper(CHILD_SCORE_NAME); + IndexNumericFieldData fieldData = context.fieldData().getForField(fieldMapper); + FieldValueFactorFunction fieldScore = new FieldValueFactorFunction(CHILD_SCORE_NAME, 1, FieldValueFactorFunction.Modifier.NONE, fieldData); + Query childQuery = new FunctionScoreQuery(new FilteredQuery(Queries.newMatchAllQuery(), new TermFilter(new Term(TypeFieldMapper.NAME, "child"))), fieldScore); + + // Perform the search for the documents using the selected score type + TopDocs docs = + searcher.search( + new ChildrenQuery(parentChildIndexFieldData, "parent", "child", parentFilter, childQuery, scoreType, 0, 0, parentDocs, null), + parentDocs); + + assertThat("Expected all parents", docs.totalHits, is(parentDocs)); + + // score should be descending (just a sanity check) + float topScore = docs.scoreDocs[0].score; + + // ensure each score is returned as expected + for (int i = 0; i < parentDocs; ++i) { + ScoreDoc scoreDoc = docs.scoreDocs[i]; + // get the ID from the document to get its expected score; remove it so we cannot double-count it + float score = parentScores.remove(reader.document(scoreDoc.doc).get(IdFieldMapper.NAME)); + + // expect exact match + assertThat("Unexpected score", scoreDoc.score, is(score)); + assertThat("Not descending", score, lessThanOrEqualTo(topScore)); + + // it had better keep descending + topScore = score; + } + + reader.close(); + writer.close(); + directory.close(); + } } diff --git a/src/test/java/org/elasticsearch/index/search/child/MockScorer.java b/src/test/java/org/elasticsearch/index/search/child/MockScorer.java index 96fb12d3dac..3aa2d0456ec 100644 --- a/src/test/java/org/elasticsearch/index/search/child/MockScorer.java +++ b/src/test/java/org/elasticsearch/index/search/child/MockScorer.java @@ -39,23 +39,35 @@ class MockScorer extends Scorer { return 1.0f; } float aggregateScore = 0; - for (int i = 0; i < scores.elementsCount; i++) { - float score = scores.buffer[i]; - switch (scoreType) { - case MAX: - if (aggregateScore < score) { - aggregateScore = score; - } - break; - case SUM: - case AVG: - aggregateScore += score; - break; - } - } - if (scoreType == ScoreType.AVG) { - aggregateScore /= scores.elementsCount; + // in the case of a min value, it can't start at 0 (the lowest score); in all cases, it doesn't hurt to use the + // first score, so we can safely use the first value by skipping it in the loop + if (scores.elementsCount != 0) { + aggregateScore = scores.buffer[0]; + + for (int i = 1; i < scores.elementsCount; i++) { + float score = scores.buffer[i]; + switch (scoreType) { + case MIN: + if (aggregateScore > score) { + aggregateScore = score; + } + break; + case MAX: + if (aggregateScore < score) { + aggregateScore = score; + } + break; + case SUM: + case AVG: + aggregateScore += score; + break; + } + } + + if (scoreType == ScoreType.AVG) { + aggregateScore /= scores.elementsCount; + } } return aggregateScore; diff --git a/src/test/java/org/elasticsearch/index/search/child/ScoreTypeTests.java b/src/test/java/org/elasticsearch/index/search/child/ScoreTypeTests.java new file mode 100644 index 00000000000..63d91cc26c2 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/search/child/ScoreTypeTests.java @@ -0,0 +1,55 @@ +package org.elasticsearch.index.search.child; + +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.junit.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +/** + * Tests {@link ScoreType} to ensure backward compatibility of any changes. + */ +public class ScoreTypeTests { + @Test + public void minFromString() { + assertThat("fromString(min) != MIN", ScoreType.MIN, equalTo(ScoreType.fromString("min"))); + } + + @Test + public void maxFromString() { + assertThat("fromString(max) != MAX", ScoreType.MAX, equalTo(ScoreType.fromString("max"))); + } + + @Test + public void avgFromString() { + assertThat("fromString(avg) != AVG", ScoreType.AVG, equalTo(ScoreType.fromString("avg"))); + } + + @Test + public void sumFromString() { + assertThat("fromString(sum) != SUM", ScoreType.SUM, equalTo(ScoreType.fromString("sum"))); + // allowed for consistency with ScoreMode.Total: + assertThat("fromString(total) != SUM", ScoreType.SUM, equalTo(ScoreType.fromString("total"))); + } + + @Test + public void noneFromString() { + assertThat("fromString(none) != NONE", ScoreType.NONE, equalTo(ScoreType.fromString("none"))); + } + + /** + * Should throw {@link ElasticsearchIllegalArgumentException} instead of NPE. + */ + @Test(expected = ElasticsearchIllegalArgumentException.class) + public void nullFromString_throwsException() { + ScoreType.fromString(null); + } + + /** + * Failure should not change (and the value should never match anything...). + */ + @Test(expected = ElasticsearchIllegalArgumentException.class) + public void unrecognizedFromString_throwsException() { + ScoreType.fromString("unrecognized value"); + } +}