diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java index 23601ca994b..bbeb2ee204b 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java @@ -67,6 +67,10 @@ import org.apache.lucene.util.PriorityQueue; * @lucene.experimental */ +// TODO: TopGroups.merge() won't work with TopGroups returned by this collector, because +// each block will be on a different shard. Add a specialized merge() static method +// to this collector? + public class BlockGroupingCollector extends SimpleCollector { private int[] pendingSubDocs; diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java index b88fb743f08..3d0da81ce02 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java @@ -71,6 +71,10 @@ public class GroupingSearch { this(new TermGroupSelector(groupField), null); } + public GroupingSearch(GroupSelector groupSelector) { + this(groupSelector, null); + } + /** * Constructs a GroupingSearch instance that groups documents by function using a {@link ValueSource} * instance. diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/AbstractGroupingTestCase.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/AbstractGroupingTestCase.java index 3c3b9f900d6..e25666734db 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/AbstractGroupingTestCase.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/AbstractGroupingTestCase.java @@ -16,6 +16,15 @@ */ package org.apache.lucene.search.grouping; +import java.io.Closeable; +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -36,4 +45,40 @@ public abstract class AbstractGroupingTestCase extends LuceneTestCase { } while ("".equals(randomValue)); return randomValue; } + + protected static void assertScoreDocsEquals(ScoreDoc[] expected, ScoreDoc[] actual) { + assertEquals(expected.length, actual.length); + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i].doc, actual[i].doc); + assertEquals(expected[i].score, actual[i].score, 0); + } + } + + protected static class Shard implements Closeable { + + final Directory directory; + final RandomIndexWriter writer; + IndexSearcher searcher; + + Shard() throws IOException { + this.directory = newDirectory(); + this.writer = new RandomIndexWriter(random(), directory, + newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); + } + + IndexSearcher getIndexSearcher() throws IOException { + if (searcher == null) { + searcher = new IndexSearcher(this.writer.getReader()); + } + return searcher; + } + + @Override + public void close() throws IOException { + if (searcher != null) { + searcher.getIndexReader().close(); + } + IOUtils.close(writer, directory); + } + } } diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java new file mode 100644 index 00000000000..610ded9b310 --- /dev/null +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java @@ -0,0 +1,365 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.grouping; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +public abstract class BaseGroupSelectorTestCase extends AbstractGroupingTestCase { + + protected abstract void addGroupField(Document document, int id); + + protected abstract GroupSelector getGroupSelector(); + + protected abstract Query filterQuery(T groupValue); + + public void testSortByRelevance() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + + String[] query = new String[]{ "foo", "bar", "baz" }; + Query topLevel = new TermQuery(new Term("text", query[random().nextInt(query.length)])); + + IndexSearcher searcher = shard.getIndexSearcher(); + GroupingSearch grouper = new GroupingSearch(getGroupSelector()); + grouper.setGroupDocsLimit(10); + TopGroups topGroups = grouper.search(searcher, topLevel, 0, 5); + TopDocs topDoc = searcher.search(topLevel, 1); + for (int i = 0; i < topGroups.groups.length; i++) { + // Each group should have a result set equal to that returned by the top-level query, + // filtered by the group value. + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(filterQuery(topGroups.groups[i].groupValue), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 10); + assertScoreDocsEquals(topGroups.groups[i].scoreDocs, td.scoreDocs); + if (i == 0) { + assertEquals(td.scoreDocs[0].doc, topDoc.scoreDocs[0].doc); + assertEquals(td.scoreDocs[0].score, topDoc.scoreDocs[0].score, 0); + } + } + + shard.close(); + } + + public void testSortGroups() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + IndexSearcher searcher = shard.getIndexSearcher(); + + String[] query = new String[]{ "foo", "bar", "baz" }; + Query topLevel = new TermQuery(new Term("text", query[random().nextInt(query.length)])); + + GroupingSearch grouper = new GroupingSearch(getGroupSelector()); + grouper.setGroupDocsLimit(10); + Sort sort = new Sort(new SortField("sort1", SortField.Type.STRING), new SortField("sort2", SortField.Type.LONG)); + grouper.setGroupSort(sort); + TopGroups topGroups = grouper.search(searcher, topLevel, 0, 5); + TopDocs topDoc = searcher.search(topLevel, 1, sort); + for (int i = 0; i < topGroups.groups.length; i++) { + // We're sorting the groups by a defined Sort, but each group itself should be ordered + // by doc relevance, and should be equal to the results of a top-level query filtered + // by the group value + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(filterQuery(topGroups.groups[i].groupValue), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 10); + assertScoreDocsEquals(topGroups.groups[i].scoreDocs, td.scoreDocs); + // The top group should have sort values equal to the sort values of the top doc of + // a top-level search sorted by the same Sort; subsequent groups should have sort values + // that compare lower than their predecessor. + if (i > 0) { + assertSortsBefore(topGroups.groups[i - 1], topGroups.groups[i]); + } else { + assertArrayEquals(((FieldDoc)topDoc.scoreDocs[0]).fields, topGroups.groups[0].groupSortValues); + } + } + + shard.close(); + } + + public void testSortWithinGroups() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + IndexSearcher searcher = shard.getIndexSearcher(); + + String[] query = new String[]{ "foo", "bar", "baz" }; + Query topLevel = new TermQuery(new Term("text", query[random().nextInt(query.length)])); + + GroupingSearch grouper = new GroupingSearch(getGroupSelector()); + grouper.setGroupDocsLimit(10); + Sort sort = new Sort(new SortField("sort1", SortField.Type.STRING), new SortField("sort2", SortField.Type.LONG)); + grouper.setSortWithinGroup(sort); + + TopGroups topGroups = grouper.search(searcher, topLevel, 0, 5); + TopDocs topDoc = searcher.search(topLevel, 1); + + for (int i = 0; i < topGroups.groups.length; i++) { + // Check top-level ordering by score: first group's maxScore should be equal to the + // top score returned by a simple search with no grouping; subsequent groups should + // all have equal or lower maxScores + if (i == 0) { + assertEquals(topDoc.scoreDocs[0].score, topGroups.groups[0].maxScore, 0); + } else { + assertTrue(topGroups.groups[i].maxScore <= topGroups.groups[i - 1].maxScore); + } + // Groups themselves are ordered by a defined Sort, and each should give the same result as + // the top-level query, filtered by the group value, with the same Sort + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(filterQuery(topGroups.groups[i].groupValue), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 10, sort); + assertScoreDocsEquals(td.scoreDocs, topGroups.groups[i].scoreDocs); + } + + shard.close(); + + } + + public void testGroupHeads() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + IndexSearcher searcher = shard.getIndexSearcher(); + + String[] query = new String[]{ "foo", "bar", "baz" }; + Query topLevel = new TermQuery(new Term("text", query[random().nextInt(query.length)])); + + GroupSelector groupSelector = getGroupSelector(); + GroupingSearch grouping = new GroupingSearch(groupSelector); + grouping.setAllGroups(true); + grouping.setAllGroupHeads(true); + + grouping.search(searcher, topLevel, 0, 1); + Collection matchingGroups = grouping.getAllMatchingGroups(); + + // The number of hits from the top-level query should equal the sum of + // the number of hits from the query filtered by each group value in turn + int totalHits = searcher.count(topLevel); + int groupHits = 0; + for (T groupValue : matchingGroups) { + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(filterQuery(groupValue), BooleanClause.Occur.FILTER) + .build(); + groupHits += searcher.count(filtered); + } + assertEquals(totalHits, groupHits); + + Bits groupHeads = grouping.getAllGroupHeads(); + int cardinality = 0; + for (int i = 0; i < groupHeads.length(); i++) { + if (groupHeads.get(i)) { + cardinality++; + } + } + assertEquals(matchingGroups.size(), cardinality); // We should have one set bit per matching group + + // Each group head should correspond to the topdoc of a search filtered by + // that group + for (T groupValue : matchingGroups) { + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(filterQuery(groupValue), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 1); + assertTrue(groupHeads.get(td.scoreDocs[0].doc)); + } + + shard.close(); + } + + public void testGroupHeadsWithSort() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + IndexSearcher searcher = shard.getIndexSearcher(); + + String[] query = new String[]{ "foo", "bar", "baz" }; + Query topLevel = new TermQuery(new Term("text", query[random().nextInt(query.length)])); + + Sort sort = new Sort(new SortField("sort1", SortField.Type.STRING), new SortField("sort2", SortField.Type.LONG)); + GroupSelector groupSelector = getGroupSelector(); + GroupingSearch grouping = new GroupingSearch(groupSelector); + grouping.setAllGroups(true); + grouping.setAllGroupHeads(true); + grouping.setSortWithinGroup(sort); + + grouping.search(searcher, topLevel, 0, 1); + Collection matchingGroups = grouping.getAllMatchingGroups(); + + Bits groupHeads = grouping.getAllGroupHeads(); + int cardinality = 0; + for (int i = 0; i < groupHeads.length(); i++) { + if (groupHeads.get(i)) { + cardinality++; + } + } + assertEquals(matchingGroups.size(), cardinality); // We should have one set bit per matching group + + // Each group head should correspond to the topdoc of a search filtered by + // that group using the same within-group sort + for (T groupValue : matchingGroups) { + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(filterQuery(groupValue), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 1, sort); + assertTrue(groupHeads.get(td.scoreDocs[0].doc)); + } + + shard.close(); + } + + public void testShardedGrouping() throws IOException { + + Shard control = new Shard(); + + int shardCount = random().nextInt(3) + 2; // between 2 and 4 shards + Shard[] shards = new Shard[shardCount]; + for (int i = 0; i < shardCount; i++) { + shards[i] = new Shard(); + } + + String[] texts = new String[]{ "foo", "bar", "bar baz", "foo foo bar" }; + + // Create a bunch of random documents, and index them - once into the control index, + // and once into a randomly picked shard. + + int numDocs = atLeast(200); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("id", i)); + doc.add(new TextField("name", Integer.toString(i), Field.Store.YES)); + doc.add(new TextField("text", texts[random().nextInt(texts.length)], Field.Store.NO)); + doc.add(new SortedDocValuesField("sort1", new BytesRef("sort" + random().nextInt(4)))); + doc.add(new NumericDocValuesField("sort2", random().nextLong())); + addGroupField(doc, i); + control.writer.addDocument(doc); + int shard = random().nextInt(shardCount); + shards[shard].writer.addDocument(doc); + } + + String[] query = new String[]{ "foo", "bar", "baz" }; + Query topLevel = new TermQuery(new Term("text", query[random().nextInt(query.length)])); + + Sort sort = new Sort(new SortField("sort1", SortField.Type.STRING), new SortField("sort2", SortField.Type.LONG)); + + // A grouped query run in two phases against the control should give us the same + // result as the query run against shards and merged back together after each phase. + + FirstPassGroupingCollector singletonFirstPass = new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5); + control.getIndexSearcher().search(topLevel, singletonFirstPass); + Collection> singletonGroups = singletonFirstPass.getTopGroups(0); + + List>> shardGroups = new ArrayList<>(); + for (Shard shard : shards) { + FirstPassGroupingCollector fc = new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5); + shard.getIndexSearcher().search(topLevel, fc); + shardGroups.add(fc.getTopGroups(0)); + } + Collection> mergedGroups = SearchGroup.merge(shardGroups, 0, 5, sort); + assertEquals(singletonGroups, mergedGroups); + + TopGroupsCollector singletonSecondPass = new TopGroupsCollector<>(getGroupSelector(), singletonGroups, sort, + Sort.RELEVANCE, 5, true); + control.getIndexSearcher().search(topLevel, singletonSecondPass); + TopGroups singletonTopGroups = singletonSecondPass.getTopGroups(0); + + // TODO why does SearchGroup.merge() take a list but TopGroups.merge() take an array? + @SuppressWarnings("unchecked") + TopGroups[] shardTopGroups = new TopGroups[shards.length]; + int j = 0; + for (Shard shard : shards) { + TopGroupsCollector sc = new TopGroupsCollector<>(getGroupSelector(), mergedGroups, sort, Sort.RELEVANCE, 5, true); + shard.getIndexSearcher().search(topLevel, sc); + shardTopGroups[j] = sc.getTopGroups(0); + j++; + } + TopGroups mergedTopGroups = TopGroups.merge(shardTopGroups, sort, Sort.RELEVANCE, 0, 5, TopGroups.ScoreMergeMode.None); + assertNotNull(mergedTopGroups); + + assertEquals(singletonTopGroups.totalGroupedHitCount, mergedTopGroups.totalGroupedHitCount); + assertEquals(singletonTopGroups.totalHitCount, mergedTopGroups.totalHitCount); + assertEquals(singletonTopGroups.totalGroupCount, mergedTopGroups.totalGroupCount); + assertEquals(singletonTopGroups.groups.length, mergedTopGroups.groups.length); + for (int i = 0; i < singletonTopGroups.groups.length; i++) { + assertEquals(singletonTopGroups.groups[i].groupValue, mergedTopGroups.groups[i].groupValue); + assertEquals(singletonTopGroups.groups[i].scoreDocs.length, mergedTopGroups.groups[i].scoreDocs.length); + } + + control.close(); + for (Shard shard : shards) { + shard.close(); + } + + } + + private void indexRandomDocs(RandomIndexWriter w) throws IOException { + String[] texts = new String[]{ "foo", "bar", "bar baz", "foo foo bar" }; + + int numDocs = atLeast(200); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("id", i)); + doc.add(new TextField("name", Integer.toString(i), Field.Store.YES)); + doc.add(new TextField("text", texts[random().nextInt(texts.length)], Field.Store.NO)); + doc.add(new SortedDocValuesField("sort1", new BytesRef("sort" + random().nextInt(4)))); + doc.add(new NumericDocValuesField("sort2", random().nextLong())); + addGroupField(doc, i); + w.addDocument(doc); + } + } + + private void assertSortsBefore(GroupDocs first, GroupDocs second) { + Object[] groupSortValues = second.groupSortValues; + Object[] prevSortValues = first.groupSortValues; + assertTrue(((BytesRef)prevSortValues[0]).compareTo((BytesRef)groupSortValues[0]) <= 0); + if (prevSortValues[0].equals(groupSortValues[0])) { + assertTrue((long)prevSortValues[1] <= (long)groupSortValues[1]); + } + } + +} diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BlockGroupingTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BlockGroupingTest.java new file mode 100644 index 00000000000..fc03dfdd84a --- /dev/null +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BlockGroupingTest.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.grouping; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.BytesRef; + +public class BlockGroupingTest extends AbstractGroupingTestCase { + + public void testSimple() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + IndexSearcher searcher = shard.getIndexSearcher(); + + Query blockEndQuery = new TermQuery(new Term("blockEnd", "true")); + GroupingSearch grouper = new GroupingSearch(blockEndQuery); + grouper.setGroupDocsLimit(10); + + Query topLevel = new TermQuery(new Term("text", "grandmother")); + TopGroups tg = grouper.search(searcher, topLevel, 0, 5); + + // We're sorting by score, so the score of the top group should be the same as the + // score of the top document from the same query with no grouping + TopDocs topDoc = searcher.search(topLevel, 1); + assertEquals(topDoc.scoreDocs[0].score, tg.groups[0].scoreDocs[0].score, 0); + assertEquals(topDoc.scoreDocs[0].doc, tg.groups[0].scoreDocs[0].doc); + + for (int i = 0; i < tg.groups.length; i++) { + String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book"); + // The contents of each group should be equal to the results of a search for + // that group alone + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 10); + assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs); + } + + shard.close(); + + } + + public void testTopLevelSort() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + IndexSearcher searcher = shard.getIndexSearcher(); + + Sort sort = new Sort(new SortField("length", SortField.Type.LONG)); + + Query blockEndQuery = new TermQuery(new Term("blockEnd", "true")); + GroupingSearch grouper = new GroupingSearch(blockEndQuery); + grouper.setGroupDocsLimit(10); + grouper.setGroupSort(sort); // groups returned sorted by length, chapters within group sorted by relevancy + + Query topLevel = new TermQuery(new Term("text", "grandmother")); + TopGroups tg = grouper.search(searcher, topLevel, 0, 5); + + // The sort value of the top doc in the top group should be the same as the sort value + // of the top result from the same search done with no grouping + TopDocs topDoc = searcher.search(topLevel, 1, sort); + assertEquals(((FieldDoc)topDoc.scoreDocs[0]).fields[0], tg.groups[0].groupSortValues[0]); + + for (int i = 0; i < tg.groups.length; i++) { + String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book"); + // The contents of each group should be equal to the results of a search for + // that group alone, sorted by score + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 10); + assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs); + if (i > 1) { + assertSortsBefore(tg.groups[i - 1], tg.groups[i]); + } + } + + shard.close(); + + } + + public void testWithinGroupSort() throws IOException { + + Shard shard = new Shard(); + indexRandomDocs(shard.writer); + IndexSearcher searcher = shard.getIndexSearcher(); + + Sort sort = new Sort(new SortField("length", SortField.Type.LONG)); + + Query blockEndQuery = new TermQuery(new Term("blockEnd", "true")); + GroupingSearch grouper = new GroupingSearch(blockEndQuery); + grouper.setGroupDocsLimit(10); + grouper.setSortWithinGroup(sort); // groups returned sorted by relevancy, chapters within group sorted by length + + Query topLevel = new TermQuery(new Term("text", "grandmother")); + TopGroups tg = grouper.search(searcher, topLevel, 0, 5); + + // We're sorting by score, so the score of the top group should be the same as the + // score of the top document from the same query with no grouping + TopDocs topDoc = searcher.search(topLevel, 1); + assertEquals(topDoc.scoreDocs[0].score, (float)tg.groups[0].groupSortValues[0], 0); + + for (int i = 0; i < tg.groups.length; i++) { + String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book"); + // The contents of each group should be equal to the results of a search for + // that group alone, sorted by length + Query filtered = new BooleanQuery.Builder() + .add(topLevel, BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER) + .build(); + TopDocs td = searcher.search(filtered, 10, sort); + assertFieldDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs); + // We're sorting by score, so the group sort value for each group should be a float, + // and the value for the previous group should be higher or equal to the value for this one + if (i > 0) { + float prevScore = (float) tg.groups[i - 1].groupSortValues[0]; + float thisScore = (float) tg.groups[i].groupSortValues[0]; + assertTrue(prevScore >= thisScore); + } + } + + shard.close(); + } + + private static void indexRandomDocs(RandomIndexWriter writer) throws IOException { + int bookCount = atLeast(20); + for (int i = 0; i < bookCount; i++) { + writer.addDocuments(createRandomBlock(i)); + } + } + + private static List createRandomBlock(int book) { + List block = new ArrayList<>(); + String bookName = "book" + book; + int chapterCount = atLeast(10); + for (int j = 0; j < chapterCount; j++) { + Document doc = new Document(); + String chapterName = "chapter" + j; + String chapterText = randomText(); + doc.add(new TextField("book", bookName, Field.Store.YES)); + doc.add(new TextField("chapter", chapterName, Field.Store.YES)); + doc.add(new TextField("text", chapterText, Field.Store.NO)); + doc.add(new NumericDocValuesField("length", chapterText.length())); + doc.add(new SortedDocValuesField("book", new BytesRef(bookName))); + if (j == chapterCount - 1) { + doc.add(new TextField("blockEnd", "true", Field.Store.NO)); + } + block.add(doc); + } + return block; + } + + private static final String[] TEXT = new String[]{ + "It was the day my grandmother exploded", + "It was the best of times, it was the worst of times", + "It was a bright cold morning in April", + "It is a truth universally acknowledged", + "I have just returned from a visit to my landlord", + "I've been here and I've been there" + }; + + private static String randomText() { + StringBuilder sb = new StringBuilder(TEXT[random().nextInt(TEXT.length)]); + int sentences = random().nextInt(20); + for (int i = 0; i < sentences; i++) { + sb.append(" ").append(TEXT[random().nextInt(TEXT.length)]); + } + return sb.toString(); + } + + private void assertSortsBefore(GroupDocs first, GroupDocs second) { + Object[] groupSortValues = second.groupSortValues; + Object[] prevSortValues = first.groupSortValues; + assertTrue(((Long)prevSortValues[0]).compareTo((Long)groupSortValues[0]) <= 0); + } + + protected static void assertFieldDocsEquals(ScoreDoc[] expected, ScoreDoc[] actual) { + assertEquals(expected.length, actual.length); + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i].doc, actual[i].doc); + FieldDoc e = (FieldDoc) expected[i]; + FieldDoc a = (FieldDoc) actual[i]; + assertArrayEquals(e.fields, a.fields); + } + } + +} diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TermGroupSelectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TermGroupSelectorTest.java new file mode 100644 index 00000000000..831a0147d1d --- /dev/null +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TermGroupSelectorTest.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.grouping; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; + +public class TermGroupSelectorTest extends BaseGroupSelectorTestCase { + + @Override + protected void addGroupField(Document document, int id) { + if (rarely()) { + return; // missing value + } + String groupValue = "group" + random().nextInt(10); + document.add(new SortedDocValuesField("groupField", new BytesRef(groupValue))); + document.add(new TextField("groupField", groupValue, Field.Store.NO)); + } + + @Override + protected GroupSelector getGroupSelector() { + return new TermGroupSelector("groupField"); + } + + @Override + protected Query filterQuery(BytesRef groupValue) { + if (groupValue == null) { + return new BooleanQuery.Builder() + .add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER) + .add(new DocValuesFieldExistsQuery("groupField"), BooleanClause.Occur.MUST_NOT) + .build(); + } + return new TermQuery(new Term("groupField", groupValue)); + } +} diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/ValueSourceGroupSelectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/ValueSourceGroupSelectorTest.java new file mode 100644 index 00000000000..941ad8a26c1 --- /dev/null +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/ValueSourceGroupSelectorTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.grouping; + +import java.util.HashMap; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.mutable.MutableValue; + +public class ValueSourceGroupSelectorTest extends BaseGroupSelectorTestCase { + + @Override + protected void addGroupField(Document document, int id) { + String groupValue = "group" + random().nextInt(10); + document.add(new SortedDocValuesField("groupField", new BytesRef(groupValue))); + document.add(new TextField("groupField", groupValue, Field.Store.NO)); + } + + @Override + protected GroupSelector getGroupSelector() { + return new ValueSourceGroupSelector(new SortedSetFieldSource("groupField"), new HashMap<>()); + } + + @Override + protected Query filterQuery(MutableValue groupValue) { + return new TermQuery(new Term("groupField", groupValue.toObject().toString())); + } +}