SOLR-8998: uniqueBlock() aggreagation for singlevalue string fields in json.facet

This commit is contained in:
Mikhail Khludnev 2018-05-01 20:19:15 +03:00
parent d92b891f95
commit ee7b52f4c6
7 changed files with 208 additions and 14 deletions

View File

@ -105,6 +105,9 @@ New Features
* SOLR-11924: Added the ability to listen to changes in the set of active collections in a cloud
in the ZkStateReader, through the CloudCollectionsListener. (Houston Putman, Dennis Gove)
* SOLR-8998: introducing uniqueBlock(_root_) aggregation as faster alternative to unique(_root_) for counting
child value facets in parents via json.facet on block index (Dr Oleg Savrasov, Mikhail Khludnev)
Bug Fixes
----------------------

View File

@ -64,6 +64,7 @@ import org.apache.solr.search.facet.StddevAgg;
import org.apache.solr.search.facet.SumAgg;
import org.apache.solr.search.facet.SumsqAgg;
import org.apache.solr.search.facet.UniqueAgg;
import org.apache.solr.search.facet.UniqueBlockAgg;
import org.apache.solr.search.facet.VarianceAgg;
import org.apache.solr.search.function.CollapseScoreFunction;
import org.apache.solr.search.function.ConcatStringFunction;
@ -964,6 +965,13 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
}
});
addParser("agg_uniqueBlock", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new UniqueBlockAgg(fp.parseArg());
}
});
addParser("agg_hll", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {

View File

@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.Arrays;
import org.apache.solr.schema.SchemaField;
public class UniqueBlockAgg extends UniqueAgg {
private static final class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
private int lastSeenValuesPerSlot[];
private UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
throws IOException { //
super(fcontext, field, /*numSlots suppressing inherited accumulator */0, null);
counts = new int[numSlots];
lastSeenValuesPerSlot = new int[numSlots];
Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
}
@Override
protected void collectOrdToSlot(int slotNum, int ord) {
if (lastSeenValuesPerSlot[slotNum]!=ord) {
counts[slotNum]+=1;
lastSeenValuesPerSlot[slotNum] = ord;
}
}
@Override
public void calcCounts() {
// noop already done
}
@Override
public void reset() throws IOException {
Arrays.fill(counts, 0);
Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
}
@Override
public Object getValue(int slot) throws IOException {
return counts[slot];
}
}
private final static String uniqueBlock = "uniqueBlock";
public UniqueBlockAgg(String field) {
super(field);
name= uniqueBlock;
}
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
final String fieldName = getArg();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
") doesn't allow multivalue fields, got " + sf);
} else {
if (sf.getType().getNumberType() != null) {
throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
") not yet support numbers " + sf);
} else {
return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
}
}
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new FacetLongMerger() ;
}
}

View File

@ -81,12 +81,16 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
int segOrd = subDv.ordValue();
int ord = toGlobal==null ? segOrd : (int)toGlobal.get(segOrd);
FixedBitSet bits = arr[slotNum];
if (bits == null) {
bits = new FixedBitSet(nTerms);
arr[slotNum] = bits;
}
bits.set(ord);
collectOrdToSlot(slotNum, ord);
}
}
protected void collectOrdToSlot(int slotNum, int ord) {
FixedBitSet bits = arr[slotNum];
if (bits == null) {
bits = new FixedBitSet(nTerms);
arr[slotNum] = bits;
}
bits.set(ord);
}
}

View File

@ -1162,6 +1162,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
assertFuncEquals("agg_sum(foo_i)", "agg_sum(foo_i)");
assertFuncEquals("agg_count()", "agg_count()");
assertFuncEquals("agg_unique(foo_i)", "agg_unique(foo_i)");
assertFuncEquals("agg_uniqueBlock(foo_i)", "agg_uniqueBlock(foo_i)");
assertFuncEquals("agg_hll(foo_i)", "agg_hll(foo_i)");
assertFuncEquals("agg_sumsq(foo_i)", "agg_sumsq(foo_i)");
assertFuncEquals("agg_percentile(foo_i,50)", "agg_percentile(foo_i,50)");

View File

@ -334,4 +334,30 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
" ]}}"
);
}
public void testUniqueBlock() throws Exception {
final Client client = Client.localClient();
ModifiableSolrParams p = params("rows","0");
client.testJQ(params(p, "q", "{!parent tag=top which=type_s:book v=$childquery}"
, "childquery", "comment_t:*"
, "fl", "id", "fl" , "title_t"
, "json.facet", "{" +
" types: {" +
" domain: { blockChildren:\"type_s:book\"" +
" }," +
" type:terms," +
" field:type_s,"
+ " limit:-1," +
" facet: {" +
" in_books: \"uniqueBlock(_root_)\" }"+//}}," +
" }" +
"}" )
, "response=={numFound:2,start:0,docs:[]}"
, "facets=={ count:2," +
"types:{" +
" buckets:[ {val:review, count:5, in_books:2} ]}" +
"}"
);
}
}

View File

@ -43,6 +43,7 @@ import org.junit.Test;
public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
private static final int defFacetLimit = 10;
private static final String collection = "facetcollection";
@BeforeClass
@ -74,7 +75,8 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
"fuchsia", "light","dark","green","grey","don't","know","any","more" );
final static List<String> sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi");
@Test
@SuppressWarnings("unchecked")
@Test
public void testBJQFacetComponent() throws Exception {
assert ! colors.removeAll(sizes): "there is no colors in sizes";
@ -126,12 +128,20 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
if (!parentDocs.isEmpty()) {
indexDocs(parentDocs);
}
cluster.getSolrClient().commit(collection);
if (random().nextBoolean()) {
cluster.getSolrClient().commit(collection);
} else {
cluster.getSolrClient().optimize(collection);
}
// to parent query
final String childQueryClause = "COLOR_s:("+(matchingColors.toString().replaceAll("[,\\[\\]]", " "))+")";
final String matchingColorsCommaSep = matchingColors.toString().replaceAll("[ \\[\\]]", "");
final String childQueryClause = "{!terms f=COLOR_s}" + matchingColorsCommaSep;
final boolean oldFacetsEnabled = random().nextBoolean();
QueryResponse results = query("q", "{!parent which=\"type_s:parent\"}"+childQueryClause,
final boolean limitJsonSizes = random().nextBoolean();
final boolean limitJsonColors = random().nextBoolean();
QueryResponse results = query("q", "{!parent which=\"type_s:parent\" v=$matchingColors}",//+childQueryClause,
"matchingColors", childQueryClause,
"facet", oldFacetsEnabled ? "true":"false", // try to enforce multiple phases
oldFacetsEnabled ? "facet.field" : "ignore" , "BRAND_s",
oldFacetsEnabled&&usually() ? "facet.limit" : "ignore" , "1",
@ -141,7 +151,18 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
"child.facet.field", "COLOR_s",
"child.facet.field", "SIZE_s",
"distrib.singlePass", random().nextBoolean() ? "true":"false",
"rows", random().nextBoolean() ? "0":"10"
"rows", random().nextBoolean() ? "0":"10",
"json.facet","{ "
+ "children:{ type: query, query:\"*:*\", domain:{"
+"blockChildren:\"type_s:parent\", filter:{param:matchingColors}"
+ "}, facet:{ colors:{ type:field, field:COLOR_s,"
+ (limitJsonColors ? "":" limit:-1,")
+ " facet:{ inprods:\"uniqueBlock(_root_)\"}}, "
+ "sizes:{type:field, field:SIZE_s, "
+ (limitJsonSizes ? "" : "limit:-1,")
+ " facet:{inprods:\"uniqueBlock(_root_)\"}}"
+ "}"
+ "}}", "debugQuery","true"//, "shards", "shard1"
);
NamedList<Object> resultsResponse = results.getResponse();
assertNotNull(resultsResponse);
@ -155,9 +176,49 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
}
}
assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount());
//System.out.println(parentIdsByAttrValue);
final List<NamedList<Object>> jsonSizes = (List<NamedList<Object>>)
get(resultsResponse, "facets", "children", "sizes", "buckets");
final List<NamedList<Object>> jsonColors = (List<NamedList<Object>>)
get(resultsResponse, "facets", "children", "colors", "buckets");
if (limitJsonColors) {
assertTrue(""+jsonColors, jsonColors.size()<=defFacetLimit);
}
if (limitJsonSizes) {
assertTrue(""+jsonSizes, jsonSizes.size()<=defFacetLimit);
}
for (List<NamedList<Object>> vals : new List[] { jsonSizes,jsonColors}) {
int i=0;
for(NamedList<Object> tuples: vals) {
String val = (String) get(tuples,"val");
Number count = (Number) get(tuples,"inprods");
if (((vals==jsonSizes && limitJsonSizes) || // vals close to the limit are not exact
(vals==jsonColors && limitJsonColors)) && i>=defFacetLimit/2) {
assertTrue(i+ "th "+tuples+". "+vals,
parentIdsByAttrValue.get(val).size()>= count.intValue() &&
count.intValue()>0);
} else {
assertEquals(tuples+". "+vals,
parentIdsByAttrValue.get(val).size(),count.intValue());
}
i++;
}
}
if (!limitJsonColors && !limitJsonSizes) {
assertEquals(""+jsonSizes+jsonColors, parentIdsByAttrValue.size(),jsonSizes.size() + jsonColors.size());
}
}
private static Object get(Object nvList, String ... segments) {
for(String segment: segments) {
nvList = ((NamedList<Object>) nvList).get(segment);
}
return nvList;
}
private QueryResponse query(String ... arg) throws SolrServerException, IOException {