Adding "min" score mode to parent-child queries

Support for "max", "sum", and "avg" already existed.
2014-09-17 16:33:05 -04:00 · 2014-09-17 16:33:05 -04:00 · 9b84ad3c7b
parent 5dc5849922
commit 9b84ad3c7b
8 changed files with 303 additions and 28 deletions
--- a/docs/reference/query-dsl/queries/has-child-query.asciidoc
+++ b/docs/reference/query-dsl/queries/has-child-query.asciidoc
@ -31,7 +31,7 @@ query the `total_hits` is always correct.
 ==== Scoring capabilities

 The `has_child` also has scoring support. The
-supported score types are `max`, `sum`, `avg` or `none`. The default is
+supported score types are `min`, `max`, `sum`, `avg` or `none`. The default is
 `none` and yields the same behaviour as in previous versions. If the
 score type is set to another value than `none`, the scores of all the
 matching child documents are aggregated into the associated parent
@ -84,9 +84,9 @@ the `score_mode` parameter.
 [float]
 ==== Memory Considerations

-In order to support parent-child joins, all of the (string) parent IDs 
-must be resident in memory (in the <<index-modules-fielddata,field data cache>>. 
-Additionaly, every child document is mapped to its parent using a long 
+In order to support parent-child joins, all of the (string) parent IDs
+must be resident in memory (in the <<index-modules-fielddata,field data cache>>.
+Additionaly, every child document is mapped to its parent using a long
 value (approximately). It is advisable to keep the string parent ID short
 in order to reduce memory usage.

@ -98,5 +98,3 @@ APIS, eg:
 --------------------------------------------------
 curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
 --------------------------------------------------
-
-
--- a/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java
+++ b/src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java
@ -176,6 +176,9 @@ public class ChildrenQuery extends Query {
        try {
            if (minChildren == 0 && maxChildren == 0 && scoreType != ScoreType.NONE) {
                switch (scoreType) {
+                case MIN:
+                    collector = new MinCollector(globalIfd, sc, parentType);
+                    break;
                case MAX:
                    collector = new MaxCollector(globalIfd, sc, parentType);
                    break;
@ -186,6 +189,9 @@ public class ChildrenQuery extends Query {
            }
            if (collector == null) {
                switch (scoreType) {
+                case MIN:
+                    collector = new MinCountCollector(globalIfd, sc, parentType);
+                    break;
                case MAX:
                    collector = new MaxCountCollector(globalIfd, sc, parentType);
                    break;
@ -468,6 +474,21 @@ public class ChildrenQuery extends Query {
        }
    }

+    private final static class MinCollector extends ParentScoreCollector {
+
+        private MinCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) {
+            super(globalIfd, searchContext, parentType);
+        }
+
+        @Override
+        protected void existingParent(long parentIdx) throws IOException {
+            float currentScore = scorer.score();
+            if (currentScore < scores.get(parentIdx)) {
+                scores.set(parentIdx, currentScore);
+            }
+        }
+    }
+
    private final static class MaxCountCollector extends ParentScoreCountCollector {

        private MaxCountCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) {
@ -484,6 +505,22 @@ public class ChildrenQuery extends Query {
        }
    }

+    private final static class MinCountCollector extends ParentScoreCountCollector {
+
+        private MinCountCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) {
+            super(globalIfd, searchContext, parentType);
+        }
+
+        @Override
+        protected void existingParent(long parentIdx) throws IOException {
+            float currentScore = scorer.score();
+            if (currentScore < scores.get(parentIdx)) {
+                scores.set(parentIdx, currentScore);
+            }
+            occurrences.increment(parentIdx, 1);
+        }
+    }
+
    private final static class SumCountAndAvgCollector extends ParentScoreCountCollector {

        SumCountAndAvgCollector(IndexParentChildFieldData globalIfd, SearchContext searchContext, String parentType) {
--- a/src/main/java/org/elasticsearch/index/search/child/ScoreType.java
+++ b/src/main/java/org/elasticsearch/index/search/child/ScoreType.java
@ -24,6 +24,11 @@ import org.elasticsearch.ElasticsearchIllegalArgumentException;
 * Defines how scores from child documents are mapped into the parent document.
 */
 public enum ScoreType {
+    /**
+     * Only the lowest score of all matching child documents is mapped into the
+     * parent.
+     */
+    MIN,
    /**
     * Only the highest score of all matching child documents is mapped into the
     * parent.
@ -50,6 +55,8 @@ public enum ScoreType {
    public static ScoreType fromString(String type) {
        if ("none".equals(type)) {
            return NONE;
+        } else if ("min".equals(type)) {
+            return MIN;
        } else if ("max".equals(type)) {
            return MAX;
        } else if ("avg".equals(type)) {
--- a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java
+++ b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java
@ -218,11 +218,15 @@ public class TopChildrenQuery extends Query {
                        parentDoc.docId = parentDocId;
                        parentDoc.count = 1;
                        parentDoc.maxScore = scoreDoc.score;
+                        parentDoc.minScore = scoreDoc.score;
                        parentDoc.sumScores = scoreDoc.score;
                        readerParentDocs.put(parentDocId, parentDoc);
                    } else {
                        parentDoc.count++;
                        parentDoc.sumScores += scoreDoc.score;
+                        if (scoreDoc.score < parentDoc.minScore) {
+                            parentDoc.minScore = scoreDoc.score;
+                        }
                        if (scoreDoc.score > parentDoc.maxScore) {
                            parentDoc.maxScore = scoreDoc.score;
                        }
@ -320,11 +324,19 @@ public class TopChildrenQuery extends Query {
        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
            ParentDoc[] readerParentDocs = parentDocs.get(context.reader().getCoreCacheKey());
            if (readerParentDocs != null) {
-                if (scoreType == ScoreType.MAX) {
+                if (scoreType == ScoreType.MIN) {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
-                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
+                            assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS;
+                            return doc.minScore;
+                        }
+                    };
+                } else if (scoreType == ScoreType.MAX) {
+                    return new ParentScorer(this, readerParentDocs) {
+                        @Override
+                        public float score() throws IOException {
+                            assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS;
                            return doc.maxScore;
                        }
                    };
@ -332,7 +344,7 @@ public class TopChildrenQuery extends Query {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
-                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
+                            assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS;
                            return doc.sumScores / doc.count;
                        }
                    };
@ -340,7 +352,7 @@ public class TopChildrenQuery extends Query {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
-                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
+                            assert doc.docId >= 0 && doc.docId != NO_MORE_DOCS;
                            return doc.sumScores;
                        }

@ -412,6 +424,7 @@ public class TopChildrenQuery extends Query {
    private static class ParentDoc {
        public int docId;
        public int count;
+        public float minScore = Float.NaN;
        public float maxScore = Float.NaN;
        public float sumScores = 0;
    }
--- a/src/test/java/org/elasticsearch/index/search/child/AbstractChildTests.java
+++ b/src/test/java/org/elasticsearch/index/search/child/AbstractChildTests.java
@ -42,10 +42,17 @@ import static org.hamcrest.Matchers.equalTo;
@LuceneTestCase.SuppressCodecs(value = {"Lucene40", "Lucene3x"})
 public abstract class AbstractChildTests extends ElasticsearchSingleNodeLuceneTestCase {

+    /**
+     * The name of the field within the child type that stores a score to use in test queries.
+     * <p />
+     * Its type is {@code double}.
+     */
+    protected static String CHILD_SCORE_NAME = "childScore";
+
    static SearchContext createSearchContext(String indexName, String parentType, String childType) throws IOException {
        IndexService indexService = createIndex(indexName);
        MapperService mapperService = indexService.mapperService();
-        mapperService.merge(childType, new CompressedString(PutMappingRequest.buildFromSimplifiedDef(childType, "_parent", "type=" + parentType).string()), true);
+        mapperService.merge(childType, new CompressedString(PutMappingRequest.buildFromSimplifiedDef(childType, "_parent", "type=" + parentType, CHILD_SCORE_NAME, "type=double").string()), true);
        return createSearchContext(indexService);
    }

--- a/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java
+++ b/src/test/java/org/elasticsearch/index/search/child/ChildrenQueryTests.java
@ -24,6 +24,7 @@ import com.carrotsearch.hppc.ObjectObjectOpenHashMap;
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.*;
@ -34,11 +35,17 @@ import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.common.lucene.search.NotFilter;
+import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.lucene.search.XFilteredQuery;
+import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
+import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
 import org.elasticsearch.index.engine.Engine;
 import org.elasticsearch.index.cache.fixedbitset.FixedBitSetFilter;
+import org.elasticsearch.index.fielddata.IndexNumericFieldData;
 import org.elasticsearch.index.fielddata.plain.ParentChildIndexFieldData;
+import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.Uid;
+import org.elasticsearch.index.mapper.internal.IdFieldMapper;
 import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
 import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
 import org.elasticsearch.index.mapper.internal.UidFieldMapper;
@ -56,6 +63,9 @@ import java.util.NavigableMap;
 import java.util.Random;
 import java.util.TreeMap;

+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
 public class ChildrenQueryTests extends AbstractChildTests {

    @BeforeClass
@ -128,7 +138,7 @@ public class ChildrenQueryTests extends AbstractChildTests {
                String childValue = childValues[random().nextInt(childValues.length)];

                document = new Document();
-                document.add(new StringField(UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO));
+                document.add(new StringField(UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO));
                document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
                document.add(new StringField(ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
                document.add(new StringField("field1", childValue, Field.Store.NO));
@ -264,4 +274,140 @@ public class ChildrenQueryTests extends AbstractChildTests {
        directory.close();
    }

+    @Test
+    public void testMinScoreMode() throws IOException {
+        assertScoreType(ScoreType.MIN);
+    }
+
+    @Test
+    public void testMaxScoreMode() throws IOException {
+        assertScoreType(ScoreType.MAX);
+    }
+
+    @Test
+    public void testAvgScoreMode() throws IOException {
+        assertScoreType(ScoreType.AVG);
+    }
+
+    @Test
+    public void testSumScoreMode() throws IOException {
+        assertScoreType(ScoreType.SUM);
+    }
+
+    /**
+     * Assert that the {@code scoreType} operates as expected and parents are found in the expected order.
+     * <p />
+     * This will use the test index's parent/child types to create parents with multiple children. Each child will have
+     * a randomly generated scored stored in {@link #CHILD_SCORE_NAME}, which is used to score based on the
+     * {@code scoreType} by using a {@link MockScorer} to determine the expected scores.
+     * @param scoreType The score type to use within the query to score parents relative to their children.
+     * @throws IOException if any unexpected error occurs
+     */
+    private void assertScoreType(ScoreType scoreType) throws IOException {
+        SearchContext context = SearchContext.current();
+        Directory directory = newDirectory();
+        IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));
+
+        // calculates the expected score per parent
+        MockScorer scorer = new MockScorer(scoreType);
+        scorer.scores = new FloatArrayList(10);
+
+        // number of parents to generate
+        int parentDocs = scaledRandomIntBetween(2, 10);
+        // unique child ID
+        int childDocId = 0;
+
+        // Parent ID to expected score
+        Map<String, Float> parentScores = new TreeMap<>();
+
+        // Add a few random parents to ensure that the children's score is appropriately taken into account
+        for (int parentDocId = 0; parentDocId < parentDocs; ++parentDocId) {
+            String parent = Integer.toString(parentDocId);
+
+            // Create the parent
+            Document parentDocument = new Document();
+
+            parentDocument.add(new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
+            parentDocument.add(new StringField(IdFieldMapper.NAME, parent, Field.Store.YES));
+            parentDocument.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
+
+            // add the parent to the index
+            writer.addDocument(parentDocument);
+
+            int numChildDocs = scaledRandomIntBetween(1, 10);
+
+            // forget any parent's previous scores
+            scorer.scores.clear();
+
+            // associate children with the parent
+            for (int i = 0; i < numChildDocs; ++i) {
+                int childScore = random().nextInt(128);
+
+                Document childDocument = new Document();
+
+                childDocument.add(new StringField(UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO));
+                childDocument.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
+                // parent association:
+                childDocument.add(new StringField(ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
+                childDocument.add(new DoubleField(CHILD_SCORE_NAME, childScore, Field.Store.NO));
+
+                // remember the score to be calculated
+                scorer.scores.add(childScore);
+
+                // add the associated child to the index
+                writer.addDocument(childDocument);
+            }
+
+            // this score that should be returned for this parent
+            parentScores.put(parent, scorer.score());
+        }
+
+        writer.commit();
+
+        IndexReader reader = DirectoryReader.open(writer, true);
+        IndexSearcher searcher = new IndexSearcher(reader);
+
+        // setup to read the parent/child map
+        Engine.SimpleSearcher engineSearcher = new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher);
+        ((TestSearchContext)context).setSearcher(new ContextIndexSearcher(context, engineSearcher));
+        ParentFieldMapper parentFieldMapper = context.mapperService().documentMapper("child").parentFieldMapper();
+        ParentChildIndexFieldData parentChildIndexFieldData = context.fieldData().getForField(parentFieldMapper);
+        FixedBitSetFilter parentFilter = wrap(new TermFilter(new Term(TypeFieldMapper.NAME, "parent")));
+
+        // child query that returns the score as the value of "childScore" for each child document,
+        //  with the parent's score determined by the score type
+        FieldMapper fieldMapper = context.mapperService().smartNameFieldMapper(CHILD_SCORE_NAME);
+        IndexNumericFieldData fieldData = context.fieldData().getForField(fieldMapper);
+        FieldValueFactorFunction fieldScore = new FieldValueFactorFunction(CHILD_SCORE_NAME, 1, FieldValueFactorFunction.Modifier.NONE, fieldData);
+        Query childQuery = new FunctionScoreQuery(new FilteredQuery(Queries.newMatchAllQuery(), new TermFilter(new Term(TypeFieldMapper.NAME, "child"))), fieldScore);
+
+        // Perform the search for the documents using the selected score type
+        TopDocs docs =
+                searcher.search(
+                    new ChildrenQuery(parentChildIndexFieldData, "parent", "child", parentFilter, childQuery, scoreType, 0, 0, parentDocs, null),
+                    parentDocs);
+
+        assertThat("Expected all parents", docs.totalHits, is(parentDocs));
+
+        // score should be descending (just a sanity check)
+        float topScore = docs.scoreDocs[0].score;
+
+        // ensure each score is returned as expected
+        for (int i = 0; i < parentDocs; ++i) {
+            ScoreDoc scoreDoc = docs.scoreDocs[i];
+            // get the ID from the document to get its expected score; remove it so we cannot double-count it
+            float score = parentScores.remove(reader.document(scoreDoc.doc).get(IdFieldMapper.NAME));
+
+            // expect exact match
+            assertThat("Unexpected score", scoreDoc.score, is(score));
+            assertThat("Not descending", score, lessThanOrEqualTo(topScore));
+
+            // it had better keep descending
+            topScore = score;
+        }
+
+        reader.close();
+        writer.close();
+        directory.close();
+    }
 }
--- a/src/test/java/org/elasticsearch/index/search/child/MockScorer.java
+++ b/src/test/java/org/elasticsearch/index/search/child/MockScorer.java
@ -39,23 +39,35 @@ class MockScorer extends Scorer {
            return 1.0f;
        }
        float aggregateScore = 0;
-        for (int i = 0; i < scores.elementsCount; i++) {
-            float score = scores.buffer[i];
-            switch (scoreType) {
-                case MAX:
-                    if (aggregateScore < score) {
-                        aggregateScore = score;
-                    }
-                    break;
-                case SUM:
-                case AVG:
-                    aggregateScore += score;
-                    break;
-            }
-        }

-        if (scoreType == ScoreType.AVG) {
-            aggregateScore /= scores.elementsCount;
+        // in the case of a min value, it can't start at 0 (the lowest score); in all cases, it doesn't hurt to use the
+        //  first score, so we can safely use the first value by skipping it in the loop
+        if (scores.elementsCount != 0) {
+            aggregateScore = scores.buffer[0];
+
+            for (int i = 1; i < scores.elementsCount; i++) {
+                float score = scores.buffer[i];
+                switch (scoreType) {
+                    case MIN:
+                        if (aggregateScore > score) {
+                            aggregateScore = score;
+                        }
+                        break;
+                    case MAX:
+                        if (aggregateScore < score) {
+                            aggregateScore = score;
+                        }
+                        break;
+                    case SUM:
+                    case AVG:
+                        aggregateScore += score;
+                        break;
+                }
+            }
+
+            if (scoreType == ScoreType.AVG) {
+                aggregateScore /= scores.elementsCount;
+            }
        }

        return aggregateScore;
--- a/src/test/java/org/elasticsearch/index/search/child/ScoreTypeTests.java
+++ b/src/test/java/org/elasticsearch/index/search/child/ScoreTypeTests.java
@ -0,0 +1,55 @@
+package org.elasticsearch.index.search.child;
+
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.junit.Test;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Tests {@link ScoreType} to ensure backward compatibility of any changes.
+ */
+public class ScoreTypeTests {
+    @Test
+    public void minFromString() {
+        assertThat("fromString(min) != MIN", ScoreType.MIN, equalTo(ScoreType.fromString("min")));
+    }
+
+    @Test
+    public void maxFromString() {
+        assertThat("fromString(max) != MAX", ScoreType.MAX, equalTo(ScoreType.fromString("max")));
+    }
+
+    @Test
+    public void avgFromString() {
+        assertThat("fromString(avg) != AVG", ScoreType.AVG, equalTo(ScoreType.fromString("avg")));
+    }
+
+    @Test
+    public void sumFromString() {
+        assertThat("fromString(sum) != SUM", ScoreType.SUM, equalTo(ScoreType.fromString("sum")));
+        // allowed for consistency with ScoreMode.Total:
+        assertThat("fromString(total) != SUM", ScoreType.SUM, equalTo(ScoreType.fromString("total")));
+    }
+
+    @Test
+    public void noneFromString() {
+        assertThat("fromString(none) != NONE", ScoreType.NONE, equalTo(ScoreType.fromString("none")));
+    }
+
+    /**
+     * Should throw {@link ElasticsearchIllegalArgumentException} instead of NPE.
+     */
+    @Test(expected = ElasticsearchIllegalArgumentException.class)
+    public void nullFromString_throwsException() {
+        ScoreType.fromString(null);
+    }
+
+    /**
+     * Failure should not change (and the value should never match anything...).
+     */
+    @Test(expected = ElasticsearchIllegalArgumentException.class)
+    public void unrecognizedFromString_throwsException() {
+        ScoreType.fromString("unrecognized value");
+    }
+}