SOLR-12325: uniqueBlock(\{!v=foo:bar})

This commit is contained in:
Mikhail Khludnev 2020-02-02 15:15:17 +03:00
parent 16b8d50284
commit d8bc9bcfcf
8 changed files with 182 additions and 52 deletions

View File

@ -168,6 +168,8 @@ New Features
* SOLR-13892: New "top-level" docValues join implementation (Jason Gerlowski, Joel Bernstein)
* SOLR-12325: Introducing uniqueBlock({!v=type:parent}) aggregation (Anatolii Siuniaev via Mikhail Khludnev)
Improvements
---------------------
* SOLR-14120: Define JavaScript methods 'includes' and 'startsWith' to ensure AdminUI can be displayed when using

View File

@ -67,7 +67,8 @@ import org.apache.solr.search.facet.StddevAgg;
import org.apache.solr.search.facet.SumAgg;
import org.apache.solr.search.facet.SumsqAgg;
import org.apache.solr.search.facet.UniqueAgg;
import org.apache.solr.search.facet.UniqueBlockAgg;
import org.apache.solr.search.facet.UniqueBlockFieldAgg;
import org.apache.solr.search.facet.UniqueBlockQueryAgg;
import org.apache.solr.search.facet.VarianceAgg;
import org.apache.solr.search.function.CollapseScoreFunction;
import org.apache.solr.search.function.ConcatStringFunction;
@ -971,7 +972,10 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
addParser("agg_uniqueBlock", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new UniqueBlockAgg(fp.parseArg());
if (fp.sp.peek() == QueryParsing.LOCALPARAM_START.charAt(0) ) {
return new UniqueBlockQueryAgg(fp.parseNestedQuery());
}
return new UniqueBlockFieldAgg(fp.parseArg());
}
});

View File

@ -21,13 +21,13 @@ import java.util.Arrays;
import org.apache.solr.schema.SchemaField;
public class UniqueBlockAgg extends UniqueAgg {
public abstract class UniqueBlockAgg extends UniqueAgg {
private static final class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
protected static class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
private int lastSeenValuesPerSlot[];
protected int[] lastSeenValuesPerSlot;
private UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
protected UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
throws IOException { //
super(fcontext, field, /*numSlots suppressing inherited accumulator */0, null);
counts = new int[numSlots];
@ -70,25 +70,11 @@ public class UniqueBlockAgg extends UniqueAgg {
public UniqueBlockAgg(String field) {
super(field);
name= uniqueBlock;
name = uniqueBlock;
}
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
final String fieldName = getArg();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
") doesn't allow multivalue fields, got " + sf);
} else {
if (sf.getType().getNumberType() != null) {
throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
") not yet support numbers " + sf);
} else {
return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
}
}
}
public abstract SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException ;
@Override
public FacetMerger createFacetMerger(Object prototype) {

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import org.apache.solr.schema.SchemaField;
public class UniqueBlockFieldAgg extends UniqueBlockAgg {
public UniqueBlockFieldAgg(String field) {
super(field);
}
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
final String fieldName = getArg();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
throw new IllegalArgumentException(name+"("+fieldName+
") doesn't allow multivalue fields, got " + sf);
} else {
if (sf.getType().getNumberType() != null) {
throw new IllegalArgumentException(name+"("+fieldName+
") not yet support numbers " + sf);
} else {
return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
}
}
}
}

View File

@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.function.IntFunction;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BitSet;
import static org.apache.solr.search.join.BlockJoinParentQParser.getCachedFilter;
public class UniqueBlockQueryAgg extends UniqueBlockAgg {
private static final class UniqueBlockQuerySlotAcc extends UniqueBlockSlotAcc {
private Query query;
private BitSet parentBitSet;
private UniqueBlockQuerySlotAcc(FacetContext fcontext, Query query, int numSlots)
throws IOException { //
super(fcontext, null, numSlots);
this.query = query;
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
this.parentBitSet = getCachedFilter(fcontext.req, query).getFilter().getBitSet(readerContext);
}
@Override
public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) {
if (parentBitSet != null) {
int ord = parentBitSet.nextSetBit(doc);
if (ord != DocIdSetIterator.NO_MORE_DOCS) {
collectOrdToSlot(slotNum, ord);
}
}
}
}
final private Query query;
public UniqueBlockQueryAgg(Query query) {
super(null);
this.query = query;
arg = query.toString();
}
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
return new UniqueBlockQuerySlotAcc(fcontext, query, numSlots);
}
}

View File

@ -3149,14 +3149,18 @@ public class TestJsonFacets extends SolrTestCaseHS {
parent = sdoc("id", "2", "type_s","book", "book_s","B", "v_t","q w");
parent.addChildDocument( sdoc("id","2.1", "type_s","page", "page_s","a", "v_t","x y z") );
parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","b", "v_t","x y ") );
parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","c", "v_t"," y z" ) );
parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","a", "v_t","x1 z") );
parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","a", "v_t","x2 z") );
parent.addChildDocument( sdoc("id","2.4", "type_s","page", "page_s","b", "v_t","x y ") );
parent.addChildDocument( sdoc("id","2.5", "type_s","page", "page_s","c", "v_t"," y z" ) );
parent.addChildDocument( sdoc("id","2.6", "type_s","page", "page_s","c", "v_t"," z" ) );
client.add(parent, null);
parent = sdoc("id", "3", "type_s","book", "book_s","C", "v_t","q w e");
parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","d", "v_t","x ") );
parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","e", "v_t"," y ") );
parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","f", "v_t"," z") );
parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","b", "v_t","x y ") );
parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","d", "v_t","x ") );
parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","e", "v_t"," y ") );
parent.addChildDocument( sdoc("id","3.4", "type_s","page", "page_s","f", "v_t"," z") );
client.add(parent, null);
parent = sdoc("id", "4", "type_s","book", "book_s","D", "v_t","e");
@ -3171,35 +3175,38 @@ public class TestJsonFacets extends SolrTestCaseHS {
" field:type_s," +
" limit:-1," +
" facet: {" +
" in_books: \"unique(_root_)\" }"+
" in_books: \"unique(_root_)\"," +
" via_field:\"uniqueBlock(_root_)\","+
" via_query:\"uniqueBlock({!v=type_s:book})\" }"+
" }," +
" pages: {" +
" type:terms," +
" field:page_s," +
" limit:-1," +
" facet: {" +
" in_books: \"uniqueBlock(_root_)\" }"+
" in_books: \"unique(_root_)\"," +
" via_field:\"uniqueBlock(_root_)\","+
" via_query:\"uniqueBlock({!v=type_s:book})\" }"+
" }" +
"}" )
, "response=={numFound:6,start:0,docs:[]}"
, "facets=={ count:6," +
, "response=={numFound:10,start:0,docs:[]}"
, "facets=={ count:10," +
"types:{" +
" buckets:[ {val:page, count:6, in_books:2} ]}" +
" buckets:[ {val:page, count:10, in_books:2, via_field:2, via_query:2 } ]}" +
"pages:{" +
" buckets:[ " +
" {val:a, count:1, in_books:1}," +
" {val:b, count:1, in_books:1}," +
" {val:c, count:1, in_books:1}," +
" {val:d, count:1, in_books:1}," +
" {val:e, count:1, in_books:1}," +
" {val:f, count:1, in_books:1}" +
" {val:a, count:3, in_books:1, via_field:1, via_query:1}," +
" {val:b, count:2, in_books:2, via_field:2, via_query:2}," +
" {val:c, count:2, in_books:1, via_field:1, via_query:1}," +
" {val:d, count:1, in_books:1, via_field:1, via_query:1}," +
" {val:e, count:1, in_books:1, via_field:1, via_query:1}," +
" {val:f, count:1, in_books:1, via_field:1, via_query:1}" +
" ]}" +
"}"
);
}
/**
* Similar to {@link #testBlockJoin} but uses query time joining.
* <p>

View File

@ -61,7 +61,9 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
"author_s", "dan",
"comment_t", "This book was too long."));
client.add(book1, null);
if (rarely()) {
client.commit();
}
SolrInputDocument book2 = sdoc(
"id", "book2",
"type_s", "book",
@ -338,25 +340,36 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
public void testUniqueBlock() throws Exception {
final Client client = Client.localClient();
ModifiableSolrParams p = params("rows","0");
// unique block using field and query logic
client.testJQ(params(p, "q", "{!parent tag=top which=type_s:book v=$childquery}"
, "childquery", "comment_t:*"
, "fl", "id", "fl" , "title_t"
, "root", "_root_"
, "parentQuery", "type_s:book"
, "json.facet", "{" +
" types: {" +
" domain: { blockChildren:\"type_s:book\"" +
" }," +
" type:terms," +
" field:type_s,"
+ " limit:-1," +
" field:type_s," +
" limit:-1," +
" facet: {" +
" in_books: \"uniqueBlock(_root_)\" }"+//}}," +
" in_books1: \"uniqueBlock(_root_)\"," + // field logic
" in_books2: \"uniqueBlock($root)\"," + // field reference logic
" via_query1:\"uniqueBlock({!v=type_s:book})\", " + // query logic
" via_query2:\"uniqueBlock({!v=$parentQuery})\" ," + // query reference logic
" partial_query:\"uniqueBlock({!v=cat_s:fantasy})\" ," + // first doc hit only, never count afterwards
" query_no_match:\"uniqueBlock({!v=cat_s:horor})\" }" +
" }" +
"}" )
, "response=={numFound:2,start:0,docs:[]}"
, "facets=={ count:2," +
"types:{" +
" buckets:[ {val:review, count:5, in_books:2} ]}" +
" buckets:[ {val:review, count:5, in_books1:2, in_books2:2, "
+ " via_query1:2, via_query2:2, "
+ " partial_query:1, query_no_match:0} ]}" +
"}"
);
}

View File

@ -572,7 +572,8 @@ Unlike all the facets discussed so far, Aggregation functions (also called *face
|missing |`missing(author)` |number of documents which do not have value for given field or function
|countvals |`countvals(author)` |number of values for a given field or function
|unique |`unique(author)` |number of unique values of the given field. Beyond 100 values it yields not exact estimate
|uniqueBlock |`uniqueBlock(\_root_)` |same as above with smaller footprint strictly for <<json-faceting-domain-changes.adoc#block-join-domain-changes,counting the number of Block Join blocks>>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended.
|uniqueBlock |`uniqueBlock(\_root_)` or `uniqueBlock($fldref)` where `fldref=_root_` |same as above with smaller footprint strictly for <<json-faceting-domain-changes.adoc#block-join-domain-changes,counting the number of Block Join blocks>>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended.
| |`uniqueBlock({!v=type:parent})` or `uniqueBlock({!v=$qryref})` where `qryref=type:parent` |same as above, but using bitset of the given query to aggregate hits.
|hll |`hll(author)` |distributed cardinality estimate via hyper-log-log algorithm
|percentile |`percentile(salary,50,75,99,99.9)` |Percentile estimates via t-digest algorithm. When sorting by this metric, the first percentile listed is used as the sort value.
|sumsq |`sumsq(rent)` |sum of squares of field or function
@ -875,6 +876,7 @@ color: {
limit: -1,
facet: {
productsCount: "uniqueBlock(_root_)"
// or "uniqueBlock({!v=type:product})"
}
}
----