mirror of https://github.com/apache/lucene.git
SOLR-8998: uniqueBlock() aggreagation for singlevalue string fields in json.facet
This commit is contained in:
parent
d92b891f95
commit
ee7b52f4c6
|
@ -105,6 +105,9 @@ New Features
|
|||
* SOLR-11924: Added the ability to listen to changes in the set of active collections in a cloud
|
||||
in the ZkStateReader, through the CloudCollectionsListener. (Houston Putman, Dennis Gove)
|
||||
|
||||
* SOLR-8998: introducing uniqueBlock(_root_) aggregation as faster alternative to unique(_root_) for counting
|
||||
child value facets in parents via json.facet on block index (Dr Oleg Savrasov, Mikhail Khludnev)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@ import org.apache.solr.search.facet.StddevAgg;
|
|||
import org.apache.solr.search.facet.SumAgg;
|
||||
import org.apache.solr.search.facet.SumsqAgg;
|
||||
import org.apache.solr.search.facet.UniqueAgg;
|
||||
import org.apache.solr.search.facet.UniqueBlockAgg;
|
||||
import org.apache.solr.search.facet.VarianceAgg;
|
||||
import org.apache.solr.search.function.CollapseScoreFunction;
|
||||
import org.apache.solr.search.function.ConcatStringFunction;
|
||||
|
@ -964,6 +965,13 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
|||
}
|
||||
});
|
||||
|
||||
addParser("agg_uniqueBlock", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
return new UniqueBlockAgg(fp.parseArg());
|
||||
}
|
||||
});
|
||||
|
||||
addParser("agg_hll", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
||||
public class UniqueBlockAgg extends UniqueAgg {
|
||||
|
||||
private static final class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
|
||||
|
||||
private int lastSeenValuesPerSlot[];
|
||||
|
||||
private UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
|
||||
throws IOException { //
|
||||
super(fcontext, field, /*numSlots suppressing inherited accumulator */0, null);
|
||||
counts = new int[numSlots];
|
||||
lastSeenValuesPerSlot = new int[numSlots];
|
||||
Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void collectOrdToSlot(int slotNum, int ord) {
|
||||
if (lastSeenValuesPerSlot[slotNum]!=ord) {
|
||||
counts[slotNum]+=1;
|
||||
lastSeenValuesPerSlot[slotNum] = ord;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void calcCounts() {
|
||||
// noop already done
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
Arrays.fill(counts, 0);
|
||||
Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(int slot) throws IOException {
|
||||
return counts[slot];
|
||||
}
|
||||
}
|
||||
|
||||
private final static String uniqueBlock = "uniqueBlock";
|
||||
|
||||
public UniqueBlockAgg(String field) {
|
||||
super(field);
|
||||
name= uniqueBlock;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||
final String fieldName = getArg();
|
||||
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
|
||||
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||
throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
|
||||
") doesn't allow multivalue fields, got " + sf);
|
||||
} else {
|
||||
if (sf.getType().getNumberType() != null) {
|
||||
throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
|
||||
") not yet support numbers " + sf);
|
||||
} else {
|
||||
return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetMerger createFacetMerger(Object prototype) {
|
||||
return new FacetLongMerger() ;
|
||||
}
|
||||
}
|
|
@ -81,6 +81,11 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
|
|||
int segOrd = subDv.ordValue();
|
||||
int ord = toGlobal==null ? segOrd : (int)toGlobal.get(segOrd);
|
||||
|
||||
collectOrdToSlot(slotNum, ord);
|
||||
}
|
||||
}
|
||||
|
||||
protected void collectOrdToSlot(int slotNum, int ord) {
|
||||
FixedBitSet bits = arr[slotNum];
|
||||
if (bits == null) {
|
||||
bits = new FixedBitSet(nTerms);
|
||||
|
@ -88,5 +93,4 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
|
|||
}
|
||||
bits.set(ord);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1162,6 +1162,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
|
|||
assertFuncEquals("agg_sum(foo_i)", "agg_sum(foo_i)");
|
||||
assertFuncEquals("agg_count()", "agg_count()");
|
||||
assertFuncEquals("agg_unique(foo_i)", "agg_unique(foo_i)");
|
||||
assertFuncEquals("agg_uniqueBlock(foo_i)", "agg_uniqueBlock(foo_i)");
|
||||
assertFuncEquals("agg_hll(foo_i)", "agg_hll(foo_i)");
|
||||
assertFuncEquals("agg_sumsq(foo_i)", "agg_sumsq(foo_i)");
|
||||
assertFuncEquals("agg_percentile(foo_i,50)", "agg_percentile(foo_i,50)");
|
||||
|
|
|
@ -334,4 +334,30 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
|
|||
" ]}}"
|
||||
);
|
||||
}
|
||||
|
||||
public void testUniqueBlock() throws Exception {
|
||||
final Client client = Client.localClient();
|
||||
ModifiableSolrParams p = params("rows","0");
|
||||
client.testJQ(params(p, "q", "{!parent tag=top which=type_s:book v=$childquery}"
|
||||
, "childquery", "comment_t:*"
|
||||
, "fl", "id", "fl" , "title_t"
|
||||
, "json.facet", "{" +
|
||||
" types: {" +
|
||||
" domain: { blockChildren:\"type_s:book\"" +
|
||||
" }," +
|
||||
" type:terms," +
|
||||
" field:type_s,"
|
||||
+ " limit:-1," +
|
||||
" facet: {" +
|
||||
" in_books: \"uniqueBlock(_root_)\" }"+//}}," +
|
||||
" }" +
|
||||
"}" )
|
||||
|
||||
, "response=={numFound:2,start:0,docs:[]}"
|
||||
, "facets=={ count:2," +
|
||||
"types:{" +
|
||||
" buckets:[ {val:review, count:5, in_books:2} ]}" +
|
||||
"}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.junit.Test;
|
|||
|
||||
public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
|
||||
|
||||
private static final int defFacetLimit = 10;
|
||||
private static final String collection = "facetcollection";
|
||||
|
||||
@BeforeClass
|
||||
|
@ -74,6 +75,7 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
|
|||
"fuchsia", "light","dark","green","grey","don't","know","any","more" );
|
||||
final static List<String> sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test
|
||||
public void testBJQFacetComponent() throws Exception {
|
||||
|
||||
|
@ -126,12 +128,20 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
|
|||
if (!parentDocs.isEmpty()) {
|
||||
indexDocs(parentDocs);
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
cluster.getSolrClient().commit(collection);
|
||||
|
||||
} else {
|
||||
cluster.getSolrClient().optimize(collection);
|
||||
}
|
||||
// to parent query
|
||||
final String childQueryClause = "COLOR_s:("+(matchingColors.toString().replaceAll("[,\\[\\]]", " "))+")";
|
||||
final String matchingColorsCommaSep = matchingColors.toString().replaceAll("[ \\[\\]]", "");
|
||||
final String childQueryClause = "{!terms f=COLOR_s}" + matchingColorsCommaSep;
|
||||
final boolean oldFacetsEnabled = random().nextBoolean();
|
||||
QueryResponse results = query("q", "{!parent which=\"type_s:parent\"}"+childQueryClause,
|
||||
final boolean limitJsonSizes = random().nextBoolean();
|
||||
final boolean limitJsonColors = random().nextBoolean();
|
||||
|
||||
QueryResponse results = query("q", "{!parent which=\"type_s:parent\" v=$matchingColors}",//+childQueryClause,
|
||||
"matchingColors", childQueryClause,
|
||||
"facet", oldFacetsEnabled ? "true":"false", // try to enforce multiple phases
|
||||
oldFacetsEnabled ? "facet.field" : "ignore" , "BRAND_s",
|
||||
oldFacetsEnabled&&usually() ? "facet.limit" : "ignore" , "1",
|
||||
|
@ -141,7 +151,18 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
|
|||
"child.facet.field", "COLOR_s",
|
||||
"child.facet.field", "SIZE_s",
|
||||
"distrib.singlePass", random().nextBoolean() ? "true":"false",
|
||||
"rows", random().nextBoolean() ? "0":"10"
|
||||
"rows", random().nextBoolean() ? "0":"10",
|
||||
"json.facet","{ "
|
||||
+ "children:{ type: query, query:\"*:*\", domain:{"
|
||||
+"blockChildren:\"type_s:parent\", filter:{param:matchingColors}"
|
||||
+ "}, facet:{ colors:{ type:field, field:COLOR_s,"
|
||||
+ (limitJsonColors ? "":" limit:-1,")
|
||||
+ " facet:{ inprods:\"uniqueBlock(_root_)\"}}, "
|
||||
+ "sizes:{type:field, field:SIZE_s, "
|
||||
+ (limitJsonSizes ? "" : "limit:-1,")
|
||||
+ " facet:{inprods:\"uniqueBlock(_root_)\"}}"
|
||||
+ "}"
|
||||
+ "}}", "debugQuery","true"//, "shards", "shard1"
|
||||
);
|
||||
NamedList<Object> resultsResponse = results.getResponse();
|
||||
assertNotNull(resultsResponse);
|
||||
|
@ -157,7 +178,47 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
|
|||
}
|
||||
|
||||
assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount());
|
||||
//System.out.println(parentIdsByAttrValue);
|
||||
|
||||
final List<NamedList<Object>> jsonSizes = (List<NamedList<Object>>)
|
||||
get(resultsResponse, "facets", "children", "sizes", "buckets");
|
||||
final List<NamedList<Object>> jsonColors = (List<NamedList<Object>>)
|
||||
get(resultsResponse, "facets", "children", "colors", "buckets");
|
||||
|
||||
if (limitJsonColors) {
|
||||
assertTrue(""+jsonColors, jsonColors.size()<=defFacetLimit);
|
||||
}
|
||||
|
||||
if (limitJsonSizes) {
|
||||
assertTrue(""+jsonSizes, jsonSizes.size()<=defFacetLimit);
|
||||
}
|
||||
|
||||
for (List<NamedList<Object>> vals : new List[] { jsonSizes,jsonColors}) {
|
||||
int i=0;
|
||||
for(NamedList<Object> tuples: vals) {
|
||||
String val = (String) get(tuples,"val");
|
||||
Number count = (Number) get(tuples,"inprods");
|
||||
if (((vals==jsonSizes && limitJsonSizes) || // vals close to the limit are not exact
|
||||
(vals==jsonColors && limitJsonColors)) && i>=defFacetLimit/2) {
|
||||
assertTrue(i+ "th "+tuples+". "+vals,
|
||||
parentIdsByAttrValue.get(val).size()>= count.intValue() &&
|
||||
count.intValue()>0);
|
||||
} else {
|
||||
assertEquals(tuples+". "+vals,
|
||||
parentIdsByAttrValue.get(val).size(),count.intValue());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (!limitJsonColors && !limitJsonSizes) {
|
||||
assertEquals(""+jsonSizes+jsonColors, parentIdsByAttrValue.size(),jsonSizes.size() + jsonColors.size());
|
||||
}
|
||||
}
|
||||
|
||||
private static Object get(Object nvList, String ... segments) {
|
||||
for(String segment: segments) {
|
||||
nvList = ((NamedList<Object>) nvList).get(segment);
|
||||
}
|
||||
return nvList;
|
||||
}
|
||||
|
||||
private QueryResponse query(String ... arg) throws SolrServerException, IOException {
|
||||
|
|
Loading…
Reference in New Issue