SOLR-12248, SOLR-4647: Grouping is broken on docValues-only fields

This commit is contained in:
Ishan Chattopadhyaya 2019-04-28 23:01:58 +05:30
parent 16203c2668
commit a877632aaf
8 changed files with 105 additions and 25 deletions

View File

@ -190,6 +190,9 @@ Bug Fixes
* SOLR-13343: Fix spacing issue in browser log (Marcus Eagan via Jason Gerlowski)
* SOLR-12248, SOLR-4647: Grouping is broken on docValues-only fields (non-stored, non-indexed)
(Erick Ericsson, Adrien Grand, Munendra S N, Scott Stults, Ishan Chattopadhyaya)
Improvements
----------------------

View File

@ -26,6 +26,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.index.IndexableField;
@ -59,6 +60,7 @@ import org.apache.lucene.search.grouping.ValueSourceGroupSelector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
@ -813,9 +815,17 @@ public class Grouping {
if (group.groupValue != null) {
SchemaField schemaField = searcher.getSchema().getField(groupBy);
FieldType fieldType = schemaField.getType();
String readableValue = fieldType.indexedToReadable(group.groupValue.utf8ToString());
IndexableField field = schemaField.createField(readableValue);
nl.add("groupValue", fieldType.toObject(field));
// use createFields so that fields having doc values are also supported
// TODO: currently, this path is called only for string field, so
// should we just use fieldType.toObject(schemaField, group.groupValue) here?
List<IndexableField> fields = schemaField.createFields(group.groupValue.utf8ToString());
if (CollectionUtils.isNotEmpty(fields)) {
nl.add("groupValue", fieldType.toObject(fields.get(0)));
} else {
throw new SolrException(ErrorCode.INVALID_STATE,
"Couldn't create schema field for grouping, group value: " + group.groupValue.utf8ToString()
+ ", field: " + schemaField);
}
} else {
nl.add("groupValue", null);
}

View File

@ -16,6 +16,12 @@
*/
package org.apache.solr.search.grouping.endresulttransformer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections.CollectionUtils;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
@ -23,6 +29,8 @@ import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.ResponseBuilder;
@ -31,10 +39,6 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.grouping.distributed.command.QueryCommandResult;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* Implementation of {@link EndResultTransformer} that keeps each grouped result separate in the final response.
*/
@ -67,9 +71,15 @@ public class GroupedEndResultTransformer implements EndResultTransformer {
for (GroupDocs<BytesRef> group : topGroups.groups) {
SimpleOrderedMap<Object> groupResult = new SimpleOrderedMap<>();
if (group.groupValue != null) {
groupResult.add(
"groupValue", groupFieldType.toObject(groupField.createField(group.groupValue.utf8ToString()))
);
// use createFields so that fields having doc values are also supported
List<IndexableField> fields = groupField.createFields(group.groupValue.utf8ToString());
if (CollectionUtils.isNotEmpty(fields)) {
groupResult.add("groupValue", groupFieldType.toObject(fields.get(0)));
} else {
throw new SolrException(ErrorCode.INVALID_STATE,
"Couldn't create schema field for grouping, group value: " + group.groupValue.utf8ToString()
+ ", field: " + groupField);
}
} else {
groupResult.add("groupValue", null);
}

View File

@ -743,6 +743,8 @@
useDocValuesAsStored="true"/>
<dynamicField name="*_s_dvo" multiValued="false" type="string" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<dynamicField name="*_b_dvo" multiValued="false" type="boolean" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<dynamicField name="*_l_dvo" multiValued="false" type="long" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<dynamicField name="*_f_dvo" multiValued="false" type="float" docValues="true" indexed="false" stored="false"

View File

@ -697,6 +697,8 @@
<!-- :TODO: why are these identical?!?!?! -->
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_sdv" type="string" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
<dynamicField name="*_bdv" type="boolean" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>

View File

@ -50,6 +50,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
String i1dv="a_idv";
String i1="a_i1";
String s1="a_s";
String s1dv = "a_s_dvo";
String b1dv = "a_b_dvo";
String tlong = "other_tl1";
String tdate_a = "a_n_tdt";
String tdate_b = "b_n_tdt";
@ -73,16 +75,16 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
indexr(id,1, i1, 100, tlong, 100, i1dv, 100, t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
tdate_b, "2009-08-20T11:00:00Z",
tdate_a, "2010-04-20T11:00:00Z", b1dv, true,
tdate_b, "2009-08-20T11:00:00Z", s1dv, "Trillian",
"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
indexr(id,2, i1, 50 , tlong, 50, i1dv, 50, t1,"to come to the aid of their country.",
tdate_a, "2010-05-02T11:00:00Z",
tdate_a, "2010-05-02T11:00:00Z", b1dv, false,
tdate_b, "2009-11-02T11:00:00Z");
indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
tdate_a, "2010-05-03T11:00:00Z");
indexr(id,4, i1, -100 ,tlong, 101, i1dv, 101,
t1,"the quick fox jumped over the lazy dog",
t1,"the quick fox jumped over the lazy dog", b1dv, true, s1dv, "Zaphod",
tdate_a, "2010-05-03T11:00:00Z",
tdate_b, "2010-05-03T11:00:00Z");
indexr(id,5, i1, 500, tlong, 500 , i1dv, 500,
@ -111,32 +113,32 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
indexr(
id, 18, i1, 232, tlong, 332, i1dv, 150,
t1,"no eggs on wall, lesson learned",
t1,"no eggs on wall, lesson learned", b1dv, true, s1dv, "dent",
oddField, "odd man out"
);
indexr(
id, 19, i1, 232, tlong, 432, i1dv, 300,
t1, "many eggs on wall",
t1, "many eggs on wall", b1dv, false, s1dv, "dent",
oddField, "odd man in"
);
indexr(
id, 20, i1, 232, tlong, 532, i1dv, 150,
t1, "some eggs on wall",
t1, "some eggs on wall", b1dv, false, s1dv, "author",
oddField, "odd man between"
);
indexr(
id, 21, i1, 232, tlong, 632, i1dv, 120,
t1, "a few eggs on wall",
t1, "a few eggs on wall", b1dv, true, s1dv, "ford prefect",
oddField, "odd man under"
);
indexr(
id, 22, i1, 232, tlong, 732, i1dv, 120,
t1, "any eggs on wall",
t1, "any eggs on wall", b1dv, false, s1dv, "ford prefect",
oddField, "odd man above"
);
indexr(
id, 23, i1, 233, tlong, 734, i1dv, 120,
t1, "dirty eggs",
t1, "dirty eggs", b1dv, true, s1dv, "Marvin",
oddField, "odd eggs"
);
@ -211,6 +213,11 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", t1 + ":eggs", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", tlong + " asc, id asc");
query("q", i1 + ":232", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", tlong + " asc, id asc");
// SOLR-12248
query("q", "*:*", "rows", 100, "fl", "id," + s1dv, "group", "true", "group.field", s1dv, "group.limit", -1, "sort", b1dv + " asc, id asc", "group.sort", "id desc");
query("q", "*:*", "fl", "id," + b1dv, "group", "true", "group.field", b1dv, "group.limit", 10, "sort", s1dv + " asc, id asc");
query("q", s1dv + ":dent", "fl", "id," + b1dv, "group", "true", "group.field", b1dv, "group.limit", 10, "sort", i1 + " asc, id asc");
// In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true");
query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true");
@ -303,7 +310,17 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
assertEquals(docs.toString(), "22", docs.get(0).getFirstValue("id"));
assertEquals(docs.toString(), "21", docs.get(4).getFirstValue("id"));
// grouping on boolean non-stored docValued enabled field
rsp = query("q", b1dv + ":*", "fl", "id," + b1dv, "group", "true", "group.field",
b1dv, "group.limit", 10, "sort", b1dv + " asc, id asc");
nl = (NamedList<?>) rsp.getResponse().get("grouped");
nl = (NamedList<?>) nl.get(b1dv);
assertEquals(rsp.toString(), 9, nl.get("matches"));
assertEquals(rsp.toString(), 2, ((List<NamedList<?>>)nl.get("groups")).size());
nl = ((List<NamedList<?>>)nl.get("groups")).get(0);
assertEquals(rsp.toString(), false, nl.get("groupValue"));
docs = (SolrDocumentList) nl.get("doclist");
assertEquals(docs.toString(), 4, docs.getNumFound());
// Can't validate the response, but can check if no errors occur.
simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc", CommonParams.TIME_ALLOWED, 1);

View File

@ -54,6 +54,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String FOO_STRING_FIELD = "foo_s1";
public static final String FOO_STRING_DOCVAL_FIELD = "foo_sdv";
public static final String SMALL_STRING_FIELD = "small_s1";
public static final String SMALL_INT_FIELD = "small_i";
static final String EMPTY_FACETS = "'facet_ranges':{},'facet_intervals':{},'facet_heatmaps':{}";
@ -658,11 +659,32 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
,"/grouped/id:1000=={'matches':0,'doclist':{'numFound':0,'start':0,'docs':[]}}"
);
}
@Test
public void testGroupingNonIndexedOrStoredDocValues() throws Exception {
// test-case from SOLR-4647
assertU(add(doc("id", "1", FOO_STRING_DOCVAL_FIELD, "a", "bday", "2012-11-20T00:00:00Z")));
assertU(add(doc("id", "2", FOO_STRING_DOCVAL_FIELD, "b", "bday", "2012-11-21T00:00:00Z")));
assertU(add(doc("id", "3", FOO_STRING_DOCVAL_FIELD, "a", "bday", "2012-11-20T00:00:00Z")));
assertU(add(doc("id", "4", FOO_STRING_DOCVAL_FIELD, "b", "bday", "2013-01-15T00:00:00Z")));
assertU(add(doc("id", "5", FOO_STRING_DOCVAL_FIELD, "a", "bday", "2013-01-14T00:00:00Z")));
assertU(commit());
// Facet counts based on groups
SolrQueryRequest req = req("q", "*:*", "rows", "1", "group", "true", "group.field", FOO_STRING_DOCVAL_FIELD,
"sort", FOO_STRING_DOCVAL_FIELD + " asc", "fl", "id",
"fq", "{!tag=chk}bday:[2012-12-18T00:00:00Z TO 2013-01-17T23:59:59Z]",
"facet", "true", "group.truncate", "true", "group.sort", "bday desc",
"facet.query", "{!ex=chk key=LW1}bday:[2013-01-11T00:00:00Z TO 2013-01-17T23:59:59Z]",
"facet.query", "{!ex=chk key=LM1}bday:[2012-12-18T00:00:00Z TO 2013-01-17T23:59:59Z]",
"facet.query", "{!ex=chk key=LM3}bday:[2012-10-18T00:00:00Z TO 2013-01-17T23:59:59Z]");
assertJQ(
req,
"/grouped=={'"+FOO_STRING_DOCVAL_FIELD+"':{'matches':2,'groups':[{'groupValue':'a','doclist':{'numFound':1,'start':0,'docs':[{'id':'5'}]}}]}}",
"/facet_counts=={'facet_queries':{'LW1':2,'LM1':2,'LM3':2},'facet_fields':{}," + EMPTY_FACETS + "}"
);
}
@Test
public void testRandomGrouping() throws Exception {
@ -691,6 +713,12 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
types.add(new FldType(SMALL_STRING_FIELD,ZERO_ONE, new SVal('a',(char)('c'+indexSize/10),1,1)));
types.add(new FldType(SMALL_INT_FIELD,ZERO_ONE, new IRange(0,5+indexSize/10)));
// non-stored non-indexed docValue enabled fields
types.add(new FldType("score_ff",ONE_ONE, new FVal(1,100)));
types.add(new FldType("foo_ii",ZERO_ONE, new IRange(0,indexSize)));
types.add(new FldType(FOO_STRING_DOCVAL_FIELD,ONE_ONE, new SVal('a','z',3,7)));
types.add(new FldType("foo_bdv", ZERO_ONE, new BVal()));
clearIndex();
Map<Comparable, Doc> model = indexDocs(types, null, indexSize);
//System.out.println("############### model=" + model);
@ -825,7 +853,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
int randomPercentage = random().nextInt(101);
// TODO: create a random filter too
SolrQueryRequest req = req("group","true","wt","json","indent","true", "echoParams","all", "q","{!func}score_f", "group.field",groupField
,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr
,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr, "fl", "*,score_ff,foo_ii,foo_bdv," + FOO_STRING_DOCVAL_FIELD // only docValued fields are not returned by default
,(groupSortStr == null || groupSortStr == sortStr) ? "noGroupsort":"group.sort", groupSortStr==null ? "": groupSortStr
,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit,
GroupParams.GROUP_CACHE_PERCENTAGE, Integer.toString(randomPercentage), GroupParams.GROUP_TOTAL_COUNT, includeNGroups ? "true" : "false",

View File

@ -1613,6 +1613,14 @@ public abstract class SolrTestCaseJ4 extends SolrTestCase {
}
}
public static class BVal extends Vals {
@Override
public Comparable get() {
return random().nextBoolean();
}
}
public static class SVal extends Vals {
char start;
char end;