SOLR-13912: add support for countvals aggregation in JSON facet module

* This aggregation is equivalent to StatsComponent's count
This commit is contained in:
Munendra S N 2019-11-22 18:16:20 +05:30
parent 6aa52b2c4d
commit 2740d90dee
11 changed files with 504 additions and 54 deletions

View File

@ -128,6 +128,8 @@ New Features
* SOLR-13911: Add 'missing' aggregation in JSON FacetModule (hossman, Munendra S N)
* SOLR-13912: Add 'countvals' aggregation in JSON FacetModule (hossman, Munendra S N)
Improvements
---------------------

View File

@ -57,14 +57,15 @@ import org.apache.solr.schema.TextField;
import org.apache.solr.search.facet.AggValueSource;
import org.apache.solr.search.facet.AvgAgg;
import org.apache.solr.search.facet.CountAgg;
import org.apache.solr.search.facet.CountValsAgg;
import org.apache.solr.search.facet.HLLAgg;
import org.apache.solr.search.facet.MinMaxAgg;
import org.apache.solr.search.facet.MissingAgg;
import org.apache.solr.search.facet.PercentileAgg;
import org.apache.solr.search.facet.RelatednessAgg;
import org.apache.solr.search.facet.StddevAgg;
import org.apache.solr.search.facet.SumAgg;
import org.apache.solr.search.facet.SumsqAgg;
import org.apache.solr.search.facet.RelatednessAgg;
import org.apache.solr.search.facet.UniqueAgg;
import org.apache.solr.search.facet.UniqueBlockAgg;
import org.apache.solr.search.facet.VarianceAgg;
@ -1022,6 +1023,13 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
return new MissingAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
addParser("agg_countvals", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new CountValsAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
/***
addParser("agg_multistat", new ValueSourceParser() {

View File

@ -0,0 +1,151 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.Arrays;
import java.util.function.IntFunction;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FieldNameValueSource;
/**
* {@link AggValueSource} to count values for given {@link ValueSource}
*/
public class CountValsAgg extends SimpleAggValueSource {
public CountValsAgg(ValueSource vs) {
super("countvals", vs);
}
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource)vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new CountSortedNumericDVAcc(fcontext, sf, numSlots);
}
return new CountSortedSetDVAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"'countvals' aggregation not supported for PointField without docValues");
}
return new CountMultiValuedAcc(fcontext, sf, numSlots);
} else {
vs = sf.getType().getValueSource(sf, null);
}
}
return new CountValSlotAcc(vs, fcontext, numSlots);
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new FacetLongMerger();
}
class CountValSlotAcc extends LongFuncSlotAcc {
public CountValSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots, 0);
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
if (values.exists(doc)) {
result[slot]++;
}
}
}
class CountSortedNumericDVAcc extends LongSortedNumericDVAcc {
public CountSortedNumericDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
result[slot]+=values.docValueCount();
}
}
class CountSortedSetDVAcc extends LongSortedSetDVAcc {
public CountSortedSetDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
result[slot]++;
}
}
}
class CountMultiValuedAcc extends UnInvertedFieldAcc {
private int currentSlot;
long[] result;
public CountMultiValuedAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
result = new long[numSlots];
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
this.currentSlot = slot;
docToTerm.getBigTerms(doc + currentDocBase, this);
docToTerm.getSmallTerms(doc + currentDocBase, this);
}
@Override
public int compare(int slotA, int slotB) {
return Long.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
@Override
public void reset() throws IOException {
Arrays.fill(result, 0);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, 0);
}
@Override
public void call(int termNum) {
result[currentSlot]++;
}
}
}

View File

@ -0,0 +1,218 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.Arrays;
import java.util.function.IntFunction;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.solr.schema.SchemaField;
/**
* Accumulates stats separated by slot number for the fields with {@link org.apache.lucene.index.DocValues}
*/
public abstract class DocValuesAcc extends SlotAcc {
SchemaField sf;
public DocValuesAcc(FacetContext fcontext, SchemaField sf) throws IOException {
super(fcontext);
this.sf = sf;
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
int valuesDocID = docIdSetIterator().docID();
if (valuesDocID < doc) {
valuesDocID = docIdSetIterator().advance(doc);
}
if (valuesDocID > doc) {
// missing
return;
}
assert valuesDocID == doc;
collectValues(doc, slot);
}
protected abstract void collectValues(int doc, int slot) throws IOException;
protected abstract DocIdSetIterator docIdSetIterator();
}
/**
* Accumulator for {@link NumericDocValues}
*/
abstract class NumericDVAcc extends DocValuesAcc {
NumericDocValues values;
public NumericDVAcc(FacetContext fcontext, SchemaField sf) throws IOException {
super(fcontext, sf);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
}
/**
* Accumulator for {@link SortedNumericDocValues}
*/
abstract class SortedNumericDVAcc extends DocValuesAcc {
SortedNumericDocValues values;
public SortedNumericDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
}
abstract class LongSortedNumericDVAcc extends SortedNumericDVAcc {
long[] result;
long initialValue;
public LongSortedNumericDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, long initialValue) throws IOException {
super(fcontext, sf, numSlots);
this.result = new long[numSlots];
this.initialValue = initialValue;
if (initialValue != 0) {
Arrays.fill(result, initialValue);
}
}
@Override
public int compare(int slotA, int slotB) {
return Long.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
@Override
public void reset() throws IOException {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, initialValue);
}
}
/**
* Accumulator for {@link SortedDocValues}
*/
abstract class SortedDVAcc extends DocValuesAcc {
SortedDocValues values;
public SortedDVAcc(FacetContext fcontext, SchemaField sf) throws IOException {
super(fcontext, sf);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSorted(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
}
/**
* Accumulator for {@link SortedSetDocValues}
*/
abstract class SortedSetDVAcc extends DocValuesAcc {
SortedSetDocValues values;
public SortedSetDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSortedSet(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
}
abstract class LongSortedSetDVAcc extends SortedSetDVAcc {
long[] result;
long initialValue;
public LongSortedSetDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, long initialValue) throws IOException {
super(fcontext, sf, numSlots);
result = new long[numSlots];
this.initialValue = initialValue;
if (initialValue != 0) {
Arrays.fill(result, initialValue);
}
}
@Override
public int compare(int slotA, int slotB) {
return Long.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
@Override
public void reset() throws IOException {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, initialValue);
}
}

View File

@ -17,18 +17,17 @@
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.function.IntFunction;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.solr.util.hll.HLL;
import org.apache.solr.util.hll.HLLType;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.solr.common.util.Hash;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.hll.HLL;
import org.apache.solr.util.hll.HLLType;
public class HLLAgg extends StrAggValueSource {
public static Integer NO_VALUES = 0;
@ -120,13 +119,11 @@ public class HLLAgg extends StrAggValueSource {
// TODO: hybrid model for non-distrib numbers?
// todo - better efficiency for sorting?
abstract class BaseNumericAcc extends SlotAcc {
SchemaField sf;
abstract class BaseNumericAcc extends DocValuesAcc {
HLL[] sets;
public BaseNumericAcc(FacetContext fcontext, String field, int numSlots) throws IOException {
super(fcontext);
sf = fcontext.searcher.getSchema().getField(field);
super(fcontext, fcontext.qcontext.searcher().getSchema().getField(field));
sets = new HLL[numSlots];
}
@ -141,16 +138,7 @@ public class HLLAgg extends StrAggValueSource {
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
int valuesDocID = docIdSetIterator().docID();
if (valuesDocID < doc) {
valuesDocID = docIdSetIterator().advance(doc);
}
if (valuesDocID > doc) {
return;
}
assert valuesDocID == doc;
protected void collectValues(int doc, int slot) throws IOException {
HLL hll = sets[slot];
if (hll == null) {
hll = sets[slot] = factory.getHLL();
@ -158,8 +146,6 @@ public class HLLAgg extends StrAggValueSource {
collectValues(doc, hll);
}
protected abstract DocIdSetIterator docIdSetIterator();
protected abstract void collectValues(int doc, HLL hll) throws IOException;
@Override
@ -246,5 +232,4 @@ public class HLLAgg extends StrAggValueSource {
}
}
}

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import org.apache.solr.schema.SchemaField;
/**
* Base accumulator for {@link UnInvertedField}
*/
public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedField.Callback {
UnInvertedField uif;
UnInvertedField.DocToTerm docToTerm;
public UnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext);
uif = UnInvertedField.getUnInvertedField(sf.getName(), fcontext.qcontext.searcher());
docToTerm = uif.new DocToTerm();
fcontext.qcontext.addCloseHook(this);
}
@Override
public void close() throws IOException {
if (docToTerm != null) {
docToTerm.close();
docToTerm = null;
}
}
}

View File

@ -21,7 +21,6 @@ import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.IntFunction;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
@ -125,13 +124,11 @@ public class UniqueAgg extends StrAggValueSource {
}
static abstract class BaseNumericAcc extends SlotAcc {
SchemaField sf;
static abstract class BaseNumericAcc extends DocValuesAcc {
LongSet[] sets;
public BaseNumericAcc(FacetContext fcontext, String field, int numSlots) throws IOException {
super(fcontext);
sf = fcontext.searcher.getSchema().getField(field);
super(fcontext, fcontext.qcontext.searcher().getSchema().getField(field));
sets = new LongSet[numSlots];
}
@ -146,16 +143,7 @@ public class UniqueAgg extends StrAggValueSource {
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
int valuesDocID = docIdSetIterator().docID();
if (valuesDocID < doc) {
valuesDocID = docIdSetIterator().advance(doc);
}
if (valuesDocID > doc) {
// missing
return;
}
protected void collectValues(int doc, int slot) throws IOException {
LongSet set = sets[slot];
if (set == null) {
set = sets[slot] = new LongSet(16);
@ -163,8 +151,6 @@ public class UniqueAgg extends StrAggValueSource {
collectValues(doc, set);
}
protected abstract DocIdSetIterator docIdSetIterator();
protected abstract void collectValues(int doc, LongSet set) throws IOException;
@Override

View File

@ -1194,6 +1194,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
assertFuncEquals("agg_missing(foo_i)", "agg_missing(foo_i)");
assertFuncEquals("agg(missing(foo_i))", "agg(missing(foo_i))");
assertFuncEquals("agg_missing(field(foo_i))", "agg_missing(field(foo_i))");
assertFuncEquals("agg_countvals(foo_i)", "agg_countvals(foo_i)");
assertFuncEquals("agg(countvals(foo_i))", "agg(countvals(foo_i))");
assertFuncEquals("agg_countvals(field(foo_i))", "agg_countvals(field(foo_i))");
// assertFuncEquals("agg_multistat(foo_i)", "agg_multistat(foo_i)");
}

View File

@ -42,9 +42,8 @@ import org.junit.Test;
*/
public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase {
// TODO: SOLR-11695: need "num_values"
// TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev')
private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", "missing");
private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", "missing", "countvals");
private String STAT_FIELD = "stat_i1";
private String ALL_STATS_JSON = "";
@ -230,7 +229,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(ALL_STATS.size() + 3, bucket.size()); // val,count,facet
assertEquals(-2L, bucket.get("min")); // this min only exists on shard2
assertEquals(1L, bucket.get("max"));
// assertEquals(101L, bucket.get("num_values")); // TODO: SOLR-11695
assertEquals(101L, bucket.get("countvals"));
assertEquals(0L, bucket.get("missing"));
assertEquals(48.0D, bucket.get("sum"));
assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7);
@ -389,7 +388,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(300L, aaa0_Bucket.get("count"));
assertEquals(-99L, aaa0_Bucket.get("min"));
assertEquals(693L, aaa0_Bucket.get("max"));
// assertEquals(300L, aaa0_Bucket.get("num_values")); // TODO: SOLR-11695
assertEquals(300L, aaa0_Bucket.get("countvals"));
assertEquals(0L, aaa0_Bucket.get("missing"));
assertEquals(34650.0D, aaa0_Bucket.get("sum"));
assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7);
@ -403,7 +402,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(135L, tail_Bucket.get("count"));
assertEquals(0L, tail_Bucket.get("min"));
assertEquals(44L, tail_Bucket.get("max"));
// assertEquals(90L, tail_Bucket.get("num_values")); // TODO: SOLR-11695
assertEquals(90L, tail_Bucket.get("countvals"));
assertEquals(45L, tail_Bucket.get("missing"));
assertEquals(1980.0D, tail_Bucket.get("sum"));
assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7);
@ -419,7 +418,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(17L, tailB_Bucket.get("count"));
assertEquals(35L, tailB_Bucket.get("min"));
assertEquals(40L, tailB_Bucket.get("max"));
// assertEquals(12L, tailB_Bucket.get("num_values")); // TODO: SOLR-11695
assertEquals(12L, tailB_Bucket.get("countvals"));
assertEquals(5L, tailB_Bucket.get("missing"));
assertEquals(450.0D, tailB_Bucket.get("sum"));
assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7);

View File

@ -1261,6 +1261,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
" , f5:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'variance(${num_d})'} } " +
" , f6:{type:terms, field:${num_d}, limit:1, sort:'x desc', facet:{x:'hll(${num_i})'} } " + // facet on a field that will cause hashing and exercise hll.resize on numeric field
" , f7:{type:terms, field:${cat_s}, limit:2, sort:'x desc', facet:{x:'missing(${sparse_num_d})'} } " +
" , f8:{type:terms, field:${cat_s}, limit:2, sort:'x desc', facet:{x:'countvals(${sparse_num_d})'} } " +
"}"
)
, "facets=={ 'count':6, " +
@ -1271,6 +1272,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
", f5:{ 'buckets':[{ val:'B', count:3, x:74.6666666666666 }, { val:'A', count:2, x:1.0 }]} " +
", f6:{ buckets:[{ val:-9.0, count:1, x:1 }]} " +
", f7:{ buckets:[{ val:B, count:3, x:3 },{ val:A, count:2, x:0 }]} " +
", f8:{ buckets:[{ val:A, count:2, x:2 },{ val:B, count:3, x:0 }]} " +
"}"
);
@ -1328,6 +1330,50 @@ public class TestJsonFacets extends SolrTestCaseHS {
"}"
);
// test sorting by countvals stat with function
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" +
" , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }"
)
, "facets=={ 'count':6, " +
" f1:{ 'buckets':[{ val:'B', count:3, n1:0 }, { val:'A', count:2, n1:2}]}" +
", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }"
);
// test sorting by missing stat with domain query
client.testJQ(params(p, "q", "-id:*"
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" +
" , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }"
)
, "facets=={ 'count':0, " +
" f1:{ 'buckets':[{ val:'B', count:3, n1:0 }, { val:'A', count:2, n1:2}]}" +
", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }"
);
// test with sub-facet aggregation with stat on field
client.testJQ(params(p, "q", "*:*"
, "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
"facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
"facet:{n1:'countvals(${sparse_num_d})'}}}}}}}"
)
, "facets=={ 'count':6, " +
" f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:0},{val:'NY', count:1, n1:0}]} }," +
" { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:1},{val:'NY', count:1, n1:1}]}}]}" +
"}"
);
// test with sub-facet aggregation with stat on func
client.testJQ(params(p, "q", "*:*"
, "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
"facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
"facet:{n1:'countvals(field(${sparse_num_d}))'}}}}}}}"
)
, "facets=={ 'count':6, " +
" f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:0},{val:'NY', count:1, n1:0}]} }," +
" { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:1},{val:'NY', count:1, n1:1}]}}]}" +
"}"
);
// facet on numbers to test resize from hashing (may need to be sorting by the metric to test that)
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{" +
@ -1741,7 +1787,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
", numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})'" +
", where_hll:'hll(${where_s})', hll_num_i:'hll(${num_i})', hll_num_d:'hll(${num_d})', hll_date:'hll(${date})'" +
", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})'" +
", mini:'min(${num_i})', maxi:'max(${num_i})', missing:'missing(${sparse_num_d})'" +
", mini:'min(${num_i})', maxi:'max(${num_i})', missing:'missing(${sparse_num_d})', vals:'countvals(${sparse_num_d})'" +
" }"
)
, "facets=={ 'count':6, " +
@ -1749,7 +1795,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
", numwhere:2, unique_num_i:4, unique_num_d:5, unique_date:5" +
", where_hll:2, hll_num_i:4, hll_num_d:5, hll_date:5" +
", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639" +
", mini:-5, maxi:7, missing:4" +
", mini:-5, maxi:7, missing:4, vals:2" +
"}"
);
@ -1810,6 +1856,8 @@ public class TestJsonFacets extends SolrTestCaseHS {
"x:'unique(${multi_ss})'" +
",z:'missing(${multi_ss})'" +
",z1:'missing(${num_is})'" +
",v:'countvals(${multi_ss})'" +
",v1:'countvals(${num_is})'" +
",y:{query:{q:'id:2', facet:{x:'unique(${multi_ss})'} }} " +
",x2:'hll(${multi_ss})'" +
",y2:{query:{q:'id:2', facet:{x:'hll(${multi_ss})'} }} " +
@ -1819,6 +1867,8 @@ public class TestJsonFacets extends SolrTestCaseHS {
",x:2" +
",z:2" +
",z1:1" +
",v:6" +
",v1:8" +
",y:{count:1, x:2}" + // single document should yield 2 unique values
",x2:2" +
",y2:{count:1, x:2}" + // single document should yield 2 unique values
@ -2097,11 +2147,12 @@ public class TestJsonFacets extends SolrTestCaseHS {
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{ " +
" c1:'unique(${num_is})', c2:'hll(${num_is})', c3:'missing(${num_is})'" +
", c4:'countvals(${num_is})', c5:'agg(countvals(${num_is}))'" +
",f1:{${terms} type:terms, field:${num_is} } " +
"}"
)
, "facets=={ count:6 " +
", c1:5, c2:5, c3:1" +
", c1:5, c2:5, c3:1, c4:8, c5:8" +
", f1:{ buckets:[ {val:-1,count:2},{val:0,count:2},{val:3,count:2},{val:-5,count:1},{val:2,count:1} ] } " +
"} "
);
@ -2109,12 +2160,12 @@ public class TestJsonFacets extends SolrTestCaseHS {
// multi-valued float
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{ " +
" c1:'unique(${num_fs})', c2:'hll(${num_fs})', c3:'missing(${num_fs})', c4:'agg(missing(${num_fs}))'" +
" c1:'unique(${num_fs})', c2:'hll(${num_fs})', c3:'missing(${num_fs})', c4:'agg(missing(${num_fs}))', c5:'countvals(${num_fs})'" +
",f1:{${terms} type:terms, field:${num_fs} } " +
"}"
)
, "facets=={ count:6 " +
", c1:5, c2:5, c3:1, c4:1" +
", c1:5, c2:5, c3:1, c4:1, c5:8" +
", f1:{ buckets:[ {val:-1.5,count:2},{val:0.0,count:2},{val:3.0,count:2},{val:-5.0,count:1},{val:2.0,count:1} ] } " +
"} "
);
@ -2166,11 +2217,11 @@ public class TestJsonFacets extends SolrTestCaseHS {
// currently non-sorting stats.
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', mini:'min(${num_i})', maxi:'max(${num_i})'" +
", sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})', missing:'missing(${multi_ss})'} }}"
", sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})', missing:'missing(${multi_ss})', vals:'countvals(${multi_ss})'} }}"
)
, "facets=={ 'count':6, " +
"'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, mini:-5, maxi:7, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148, missing:0}," +
" {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, mini:2, maxi:3, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0, missing:1}] } } "
"'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, mini:-5, maxi:7, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148, missing:0, vals:5}," +
" {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, mini:2, maxi:3, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0, missing:1, vals:1}] } } "
);

View File

@ -570,6 +570,7 @@ Unlike all the facets discussed so far, Aggregation functions (also called *face
|min |`min(salary)` |minimum value
|max |`max(mul(price,popularity))` |maximum value
|missing |`missing(author)` |number of documents which do not have value for given field or function
|countvals |`countvals(author)` |number of values for a given field or function
|unique |`unique(author)` |number of unique values of the given field. Beyond 100 values it yields not exact estimate
|uniqueBlock |`uniqueBlock(\_root_)` |same as above with smaller footprint strictly for <<json-faceting-domain-changes.adoc#block-join-domain-changes,counting the number of Block Join blocks>>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended.
|hll |`hll(author)` |distributed cardinality estimate via hyper-log-log algorithm