From a877632aaf86f5eeeba50201e7974f3329f5ccb9 Mon Sep 17 00:00:00 2001 From: Ishan Chattopadhyaya Date: Sun, 28 Apr 2019 23:01:58 +0530 Subject: [PATCH] SOLR-12248, SOLR-4647: Grouping is broken on docValues-only fields --- solr/CHANGES.txt | 3 ++ .../java/org/apache/solr/search/Grouping.java | 16 ++++++-- .../GroupedEndResultTransformer.java | 24 ++++++++---- .../solr/collection1/conf/schema.xml | 2 + .../solr/collection1/conf/schema12.xml | 2 + .../apache/solr/TestDistributedGrouping.java | 39 +++++++++++++------ .../org/apache/solr/TestGroupingSearch.java | 34 ++++++++++++++-- .../java/org/apache/solr/SolrTestCaseJ4.java | 10 ++++- 8 files changed, 105 insertions(+), 25 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d07298ba673..70a0d7a2453 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -190,6 +190,9 @@ Bug Fixes * SOLR-13343: Fix spacing issue in browser log (Marcus Eagan via Jason Gerlowski) +* SOLR-12248, SOLR-4647: Grouping is broken on docValues-only fields (non-stored, non-indexed) + (Erick Ericsson, Adrien Grand, Munendra S N, Scott Stults, Ishan Chattopadhyaya) + Improvements ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/Grouping.java b/solr/core/src/java/org/apache/solr/search/Grouping.java index 4869386c3d8..06a7ed0bdce 100644 --- a/solr/core/src/java/org/apache/solr/search/Grouping.java +++ b/solr/core/src/java/org/apache/solr/search/Grouping.java @@ -26,6 +26,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.lucene.index.ExitableDirectoryReader; import org.apache.lucene.index.IndexableField; @@ -59,6 +60,7 @@ import org.apache.lucene.search.grouping.ValueSourceGroupSelector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.mutable.MutableValue; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.request.SolrQueryRequest; @@ -813,9 +815,17 @@ public class Grouping { if (group.groupValue != null) { SchemaField schemaField = searcher.getSchema().getField(groupBy); FieldType fieldType = schemaField.getType(); - String readableValue = fieldType.indexedToReadable(group.groupValue.utf8ToString()); - IndexableField field = schemaField.createField(readableValue); - nl.add("groupValue", fieldType.toObject(field)); + // use createFields so that fields having doc values are also supported + // TODO: currently, this path is called only for string field, so + // should we just use fieldType.toObject(schemaField, group.groupValue) here? + List fields = schemaField.createFields(group.groupValue.utf8ToString()); + if (CollectionUtils.isNotEmpty(fields)) { + nl.add("groupValue", fieldType.toObject(fields.get(0))); + } else { + throw new SolrException(ErrorCode.INVALID_STATE, + "Couldn't create schema field for grouping, group value: " + group.groupValue.utf8ToString() + + ", field: " + schemaField); + } } else { nl.add("groupValue", null); } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java index 485f724f74c..2c622d4ff09 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java @@ -16,6 +16,12 @@ */ package org.apache.solr.search.grouping.endresulttransformer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHits; @@ -23,6 +29,8 @@ import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.handler.component.ResponseBuilder; @@ -31,10 +39,6 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.grouping.distributed.command.QueryCommandResult; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - /** * Implementation of {@link EndResultTransformer} that keeps each grouped result separate in the final response. */ @@ -67,9 +71,15 @@ public class GroupedEndResultTransformer implements EndResultTransformer { for (GroupDocs group : topGroups.groups) { SimpleOrderedMap groupResult = new SimpleOrderedMap<>(); if (group.groupValue != null) { - groupResult.add( - "groupValue", groupFieldType.toObject(groupField.createField(group.groupValue.utf8ToString())) - ); + // use createFields so that fields having doc values are also supported + List fields = groupField.createFields(group.groupValue.utf8ToString()); + if (CollectionUtils.isNotEmpty(fields)) { + groupResult.add("groupValue", groupFieldType.toObject(fields.get(0))); + } else { + throw new SolrException(ErrorCode.INVALID_STATE, + "Couldn't create schema field for grouping, group value: " + group.groupValue.utf8ToString() + + ", field: " + groupField); + } } else { groupResult.add("groupValue", null); } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml index acdf7cf0402..81c8e33da1b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml @@ -743,6 +743,8 @@ useDocValuesAsStored="true"/> + + + diff --git a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java index 534a29963bd..de55b44607f 100644 --- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java +++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java @@ -50,6 +50,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { String i1dv="a_idv"; String i1="a_i1"; String s1="a_s"; + String s1dv = "a_s_dvo"; + String b1dv = "a_b_dvo"; String tlong = "other_tl1"; String tdate_a = "a_n_tdt"; String tdate_b = "b_n_tdt"; @@ -73,16 +75,16 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1); indexr(id,1, i1, 100, tlong, 100, i1dv, 100, t1,"now is the time for all good men", - tdate_a, "2010-04-20T11:00:00Z", - tdate_b, "2009-08-20T11:00:00Z", + tdate_a, "2010-04-20T11:00:00Z", b1dv, true, + tdate_b, "2009-08-20T11:00:00Z", s1dv, "Trillian", "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d); indexr(id,2, i1, 50 , tlong, 50, i1dv, 50, t1,"to come to the aid of their country.", - tdate_a, "2010-05-02T11:00:00Z", + tdate_a, "2010-05-02T11:00:00Z", b1dv, false, tdate_b, "2009-11-02T11:00:00Z"); indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow", tdate_a, "2010-05-03T11:00:00Z"); indexr(id,4, i1, -100 ,tlong, 101, i1dv, 101, - t1,"the quick fox jumped over the lazy dog", + t1,"the quick fox jumped over the lazy dog", b1dv, true, s1dv, "Zaphod", tdate_a, "2010-05-03T11:00:00Z", tdate_b, "2010-05-03T11:00:00Z"); indexr(id,5, i1, 500, tlong, 500 , i1dv, 500, @@ -111,32 +113,32 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { indexr( id, 18, i1, 232, tlong, 332, i1dv, 150, - t1,"no eggs on wall, lesson learned", + t1,"no eggs on wall, lesson learned", b1dv, true, s1dv, "dent", oddField, "odd man out" ); indexr( id, 19, i1, 232, tlong, 432, i1dv, 300, - t1, "many eggs on wall", + t1, "many eggs on wall", b1dv, false, s1dv, "dent", oddField, "odd man in" ); indexr( id, 20, i1, 232, tlong, 532, i1dv, 150, - t1, "some eggs on wall", + t1, "some eggs on wall", b1dv, false, s1dv, "author", oddField, "odd man between" ); indexr( id, 21, i1, 232, tlong, 632, i1dv, 120, - t1, "a few eggs on wall", + t1, "a few eggs on wall", b1dv, true, s1dv, "ford prefect", oddField, "odd man under" ); indexr( id, 22, i1, 232, tlong, 732, i1dv, 120, - t1, "any eggs on wall", + t1, "any eggs on wall", b1dv, false, s1dv, "ford prefect", oddField, "odd man above" ); indexr( id, 23, i1, 233, tlong, 734, i1dv, 120, - t1, "dirty eggs", + t1, "dirty eggs", b1dv, true, s1dv, "Marvin", oddField, "odd eggs" ); @@ -211,6 +213,11 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { query("q", t1 + ":eggs", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", tlong + " asc, id asc"); query("q", i1 + ":232", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", tlong + " asc, id asc"); + // SOLR-12248 + query("q", "*:*", "rows", 100, "fl", "id," + s1dv, "group", "true", "group.field", s1dv, "group.limit", -1, "sort", b1dv + " asc, id asc", "group.sort", "id desc"); + query("q", "*:*", "fl", "id," + b1dv, "group", "true", "group.field", b1dv, "group.limit", 10, "sort", s1dv + " asc, id asc"); + query("q", s1dv + ":dent", "fl", "id," + b1dv, "group", "true", "group.field", b1dv, "group.limit", 10, "sort", i1 + " asc, id asc"); + // In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true"); query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true"); @@ -303,7 +310,17 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase { assertEquals(docs.toString(), "22", docs.get(0).getFirstValue("id")); assertEquals(docs.toString(), "21", docs.get(4).getFirstValue("id")); - + // grouping on boolean non-stored docValued enabled field + rsp = query("q", b1dv + ":*", "fl", "id," + b1dv, "group", "true", "group.field", + b1dv, "group.limit", 10, "sort", b1dv + " asc, id asc"); + nl = (NamedList) rsp.getResponse().get("grouped"); + nl = (NamedList) nl.get(b1dv); + assertEquals(rsp.toString(), 9, nl.get("matches")); + assertEquals(rsp.toString(), 2, ((List>)nl.get("groups")).size()); + nl = ((List>)nl.get("groups")).get(0); + assertEquals(rsp.toString(), false, nl.get("groupValue")); + docs = (SolrDocumentList) nl.get("doclist"); + assertEquals(docs.toString(), 4, docs.getNumFound()); // Can't validate the response, but can check if no errors occur. simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc", CommonParams.TIME_ALLOWED, 1); diff --git a/solr/core/src/test/org/apache/solr/TestGroupingSearch.java b/solr/core/src/test/org/apache/solr/TestGroupingSearch.java index 77b1cf77472..d8ce1b237ae 100644 --- a/solr/core/src/test/org/apache/solr/TestGroupingSearch.java +++ b/solr/core/src/test/org/apache/solr/TestGroupingSearch.java @@ -54,6 +54,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String FOO_STRING_FIELD = "foo_s1"; + public static final String FOO_STRING_DOCVAL_FIELD = "foo_sdv"; public static final String SMALL_STRING_FIELD = "small_s1"; public static final String SMALL_INT_FIELD = "small_i"; static final String EMPTY_FACETS = "'facet_ranges':{},'facet_intervals':{},'facet_heatmaps':{}"; @@ -658,11 +659,32 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { ,"/grouped/id:1000=={'matches':0,'doclist':{'numFound':0,'start':0,'docs':[]}}" ); - - } + @Test + public void testGroupingNonIndexedOrStoredDocValues() throws Exception { + // test-case from SOLR-4647 + assertU(add(doc("id", "1", FOO_STRING_DOCVAL_FIELD, "a", "bday", "2012-11-20T00:00:00Z"))); + assertU(add(doc("id", "2", FOO_STRING_DOCVAL_FIELD, "b", "bday", "2012-11-21T00:00:00Z"))); + assertU(add(doc("id", "3", FOO_STRING_DOCVAL_FIELD, "a", "bday", "2012-11-20T00:00:00Z"))); + assertU(add(doc("id", "4", FOO_STRING_DOCVAL_FIELD, "b", "bday", "2013-01-15T00:00:00Z"))); + assertU(add(doc("id", "5", FOO_STRING_DOCVAL_FIELD, "a", "bday", "2013-01-14T00:00:00Z"))); + assertU(commit()); + // Facet counts based on groups + SolrQueryRequest req = req("q", "*:*", "rows", "1", "group", "true", "group.field", FOO_STRING_DOCVAL_FIELD, + "sort", FOO_STRING_DOCVAL_FIELD + " asc", "fl", "id", + "fq", "{!tag=chk}bday:[2012-12-18T00:00:00Z TO 2013-01-17T23:59:59Z]", + "facet", "true", "group.truncate", "true", "group.sort", "bday desc", + "facet.query", "{!ex=chk key=LW1}bday:[2013-01-11T00:00:00Z TO 2013-01-17T23:59:59Z]", + "facet.query", "{!ex=chk key=LM1}bday:[2012-12-18T00:00:00Z TO 2013-01-17T23:59:59Z]", + "facet.query", "{!ex=chk key=LM3}bday:[2012-10-18T00:00:00Z TO 2013-01-17T23:59:59Z]"); + assertJQ( + req, + "/grouped=={'"+FOO_STRING_DOCVAL_FIELD+"':{'matches':2,'groups':[{'groupValue':'a','doclist':{'numFound':1,'start':0,'docs':[{'id':'5'}]}}]}}", + "/facet_counts=={'facet_queries':{'LW1':2,'LM1':2,'LM3':2},'facet_fields':{}," + EMPTY_FACETS + "}" + ); + } @Test public void testRandomGrouping() throws Exception { @@ -691,6 +713,12 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { types.add(new FldType(SMALL_STRING_FIELD,ZERO_ONE, new SVal('a',(char)('c'+indexSize/10),1,1))); types.add(new FldType(SMALL_INT_FIELD,ZERO_ONE, new IRange(0,5+indexSize/10))); + // non-stored non-indexed docValue enabled fields + types.add(new FldType("score_ff",ONE_ONE, new FVal(1,100))); + types.add(new FldType("foo_ii",ZERO_ONE, new IRange(0,indexSize))); + types.add(new FldType(FOO_STRING_DOCVAL_FIELD,ONE_ONE, new SVal('a','z',3,7))); + types.add(new FldType("foo_bdv", ZERO_ONE, new BVal())); + clearIndex(); Map model = indexDocs(types, null, indexSize); //System.out.println("############### model=" + model); @@ -825,7 +853,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { int randomPercentage = random().nextInt(101); // TODO: create a random filter too SolrQueryRequest req = req("group","true","wt","json","indent","true", "echoParams","all", "q","{!func}score_f", "group.field",groupField - ,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr + ,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr, "fl", "*,score_ff,foo_ii,foo_bdv," + FOO_STRING_DOCVAL_FIELD // only docValued fields are not returned by default ,(groupSortStr == null || groupSortStr == sortStr) ? "noGroupsort":"group.sort", groupSortStr==null ? "": groupSortStr ,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit, GroupParams.GROUP_CACHE_PERCENTAGE, Integer.toString(randomPercentage), GroupParams.GROUP_TOTAL_COUNT, includeNGroups ? "true" : "false", diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java index 0a7d5aa3c08..c286e42d2d9 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java @@ -1611,7 +1611,15 @@ public abstract class SolrTestCaseJ4 extends SolrTestCase { public Comparable get() { return getFloat(); } - } + } + + public static class BVal extends Vals { + + @Override + public Comparable get() { + return random().nextBoolean(); + } + } public static class SVal extends Vals { char start;