SOLR-11706: add support for aggregation on multivalued fields

* min, max, sum, sumsq, avg, stddev, variance, percentile aggregations
  in JSON facets now supports multivalued fields
This commit is contained in:
Munendra S N 2019-12-05 10:48:22 +05:30
parent c4126ef858
commit 12e8cca644
17 changed files with 1554 additions and 139 deletions

View File

@ -160,6 +160,9 @@ Improvements
* SOLR-13968: Support postingsFormat and docValuesFormat in schema fields. (Bruno Roustant) * SOLR-13968: Support postingsFormat and docValuesFormat in schema fields. (Bruno Roustant)
* SOLR-11706: Add support for aggregation on multivalued fields in JSON facets. min, max, avg, sum, sumsq, stddev,
variance, percentile aggregations now have support for multivalued fields. (hossman, Munendra S N)
Optimizations Optimizations
--------------------- ---------------------
(No changes) (No changes)

View File

@ -985,35 +985,35 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
addParser("agg_sum", new ValueSourceParser() { addParser("agg_sum", new ValueSourceParser() {
@Override @Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError { public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new SumAgg(fp.parseValueSource()); return new SumAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
} }
}); });
addParser("agg_avg", new ValueSourceParser() { addParser("agg_avg", new ValueSourceParser() {
@Override @Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError { public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new AvgAgg(fp.parseValueSource()); return new AvgAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
} }
}); });
addParser("agg_sumsq", new ValueSourceParser() { addParser("agg_sumsq", new ValueSourceParser() {
@Override @Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError { public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new SumsqAgg(fp.parseValueSource()); return new SumsqAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
} }
}); });
addParser("agg_variance", new ValueSourceParser() { addParser("agg_variance", new ValueSourceParser() {
@Override @Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError { public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new VarianceAgg(fp.parseValueSource()); return new VarianceAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
} }
}); });
addParser("agg_stddev", new ValueSourceParser() { addParser("agg_stddev", new ValueSourceParser() {
@Override @Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError { public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new StddevAgg(fp.parseValueSource()); return new StddevAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
} }
}); });
@ -1054,7 +1054,26 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
} }
}); });
addParser("agg_percentile", new PercentileAgg.Parser()); addParser("agg_percentile", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
List<Double> percentiles = new ArrayList<>();
ValueSource vs = fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE);
while (fp.hasMoreArguments()) {
double val = fp.parseDouble();
if (val<0 || val>100) {
throw new SyntaxError("requested percentile must be between 0 and 100. got " + val);
}
percentiles.add(val);
}
if (percentiles.isEmpty()) {
throw new SyntaxError("expected percentile(valsource,percent1[,percent2]*) EXAMPLE:percentile(myfield,50)");
}
return new PercentileAgg(vs, percentiles);
}
});
addParser("agg_" + RelatednessAgg.NAME, new ValueSourceParser() { addParser("agg_" + RelatednessAgg.NAME, new ValueSourceParser() {
@Override @Override

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
public class AggUtil {
private AggUtil() {
}
/**
* Computes and returns average for given sum and count
*/
public static double avg(double sum, long count) {
// todo: should we return NAN when count==0?
return count == 0? 0.0d: sum / count;
}
/**
* Computes and returns uncorrected standard deviation for given values
*/
public static double stdDev(double sumSq, double sum, long count) {
// todo: switch to corrected stddev SOLR-11725
// todo: should we return NAN when count==0?
double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
return val;
}
/**
* Computes and returns uncorrected variance for given values
*/
public static double variance(double sumSq, double sum, long count) {
// todo: switch to corrected variance SOLR-11725
// todo: should we return NAN when count==0?
double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
return val;
}
}

View File

@ -17,9 +17,18 @@
package org.apache.solr.search.facet; package org.apache.solr.search.facet;
import java.io.IOException; import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List; import java.util.List;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FieldNameValueSource;
public class AvgAgg extends SimpleAggValueSource { public class AvgAgg extends SimpleAggValueSource {
@ -29,7 +38,31 @@ public class AvgAgg extends SimpleAggValueSource {
@Override @Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
return new AvgSlotAcc(getArg(), fcontext, numSlots); ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource) vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.getType().getNumberType() == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for " + sf.getType().getTypeName());
}
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new AvgSortedNumericAcc(fcontext, sf, numSlots);
}
return new AvgSortedSetAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for PointField w/o docValues");
}
return new AvgUnInvertedFieldAcc(fcontext, sf, numSlots);
}
vs = sf.getType().getValueSource(sf, null);
}
return new AvgSlotAcc(vs, fcontext, numSlots);
} }
@Override @Override
@ -43,7 +76,7 @@ public class AvgAgg extends SimpleAggValueSource {
@Override @Override
public void merge(Object facetResult, Context mcontext1) { public void merge(Object facetResult, Context mcontext1) {
List<Number> numberList = (List<Number>)facetResult; List<Number> numberList = (List<Number>) facetResult;
num += numberList.get(0).longValue(); num += numberList.get(0).longValue();
sum += numberList.get(1).doubleValue(); sum += numberList.get(1).doubleValue();
} }
@ -51,8 +84,167 @@ public class AvgAgg extends SimpleAggValueSource {
@Override @Override
protected double getDouble() { protected double getDouble() {
// TODO: is it worth to try and cache? // TODO: is it worth to try and cache?
return num==0 ? 0.0d : sum/num; return AggUtil.avg(sum, num);
}
} }
}; class AvgSortedNumericAcc extends DoubleSortedNumericDVAcc {
int[] counts;
public AvgSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
this.counts = new int[numSlots];
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; i++) {
result[slot]+=getDouble(values.nextValue());
counts[slot]++;
}
}
private double avg(int slot) {
return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result?
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(avg(slotA), avg(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(2);
lst.add(counts[slot]);
lst.add(result[slot]);
return lst;
} else {
return avg(slot);
}
}
@Override
public void reset() throws IOException {
super.reset();
Arrays.fill(counts, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
resizer.resize(counts, 0);
}
}
class AvgSortedSetAcc extends DoubleSortedSetDVAcc {
int[] counts;
public AvgSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
this.counts = new int[numSlots];
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
long ord;
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef term = values.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[slot] += val;
counts[slot]++;
}
}
private double avg(int slot) {
return AggUtil.avg(result[slot], counts[slot]);
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(avg(slotA), avg(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(2);
lst.add(counts[slot]);
lst.add(result[slot]);
return lst;
} else {
return avg(slot);
}
}
@Override
public void reset() throws IOException {
super.reset();
Arrays.fill(counts, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
resizer.resize(counts, 0);
}
}
class AvgUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
int[] counts;
public AvgUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
this.counts = new int[numSlots];
}
@Override
public void call(int termNum) {
try {
BytesRef term = docToTerm.lookupOrd(termNum);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[currentSlot] += val;
counts[currentSlot]++;
} catch (IOException e) {
// find a better way to do it
throw new UncheckedIOException(e);
}
}
private double avg(int slot) {
return AggUtil.avg(result[slot], counts[slot]);
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(avg(slotA), avg(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(2);
lst.add(counts[slot]);
lst.add(result[slot]);
return lst;
} else {
return avg(slot);
}
}
@Override
public void reset() throws IOException {
super.reset();
Arrays.fill(counts, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
resizer.resize(counts, 0);
}
}
} }

View File

@ -18,7 +18,9 @@
package org.apache.solr.search.facet; package org.apache.solr.search.facet;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Date;
import java.util.function.IntFunction; import java.util.function.IntFunction;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
@ -28,6 +30,8 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
/** /**
@ -139,6 +143,118 @@ abstract class LongSortedNumericDVAcc extends SortedNumericDVAcc {
} }
abstract class DoubleSortedNumericDVAcc extends SortedNumericDVAcc {
double[] result;
double initialValue;
public DoubleSortedNumericDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException {
super(fcontext, sf, numSlots);
this.result = new double[numSlots];
this.initialValue = initialValue;
if (initialValue != 0) {
Arrays.fill(result, initialValue);
}
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
@Override
public void reset() throws IOException {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, initialValue);
}
/**
* converts given long value to double based on field type
*/
protected double getDouble(long val) {
switch (sf.getType().getNumberType()) {
case INTEGER:
case LONG:
case DATE:
return val;
case FLOAT:
return NumericUtils.sortableIntToFloat((int) val);
case DOUBLE:
return NumericUtils.sortableLongToDouble(val);
default:
// this would never happen
return 0.0d;
}
}
}
/**
* Base class for standard deviation and variance computation for fields with {@link SortedNumericDocValues}
*/
abstract class SDVSortedNumericAcc extends DoubleSortedNumericDVAcc {
int[] counts;
double[] sum;
public SDVSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
this.counts = new int[numSlots];
this.sum = new double[numSlots];
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; i++) {
double val = getDouble(values.nextValue());
result[slot]+= val * val;
sum[slot]+= val;
counts[slot]++;
}
}
protected abstract double computeVal(int slot);
@Override
public int compare(int slotA, int slotB) {
return Double.compare(computeVal(slotA), computeVal(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(3);
lst.add(counts[slot]);
lst.add(result[slot]);
lst.add(sum[slot]);
return lst;
} else {
return computeVal(slot);
}
}
@Override
public void reset() throws IOException {
super.reset();
Arrays.fill(counts, 0);
Arrays.fill(sum, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
resizer.resize(counts, 0);
resizer.resize(sum, 0);
}
}
/** /**
* Accumulator for {@link SortedDocValues} * Accumulator for {@link SortedDocValues}
*/ */
@ -216,3 +332,98 @@ abstract class LongSortedSetDVAcc extends SortedSetDVAcc {
resizer.resize(result, initialValue); resizer.resize(result, initialValue);
} }
} }
abstract class DoubleSortedSetDVAcc extends SortedSetDVAcc {
double[] result;
double initialValue;
public DoubleSortedSetDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, long initialValue) throws IOException {
super(fcontext, sf, numSlots);
result = new double[numSlots];
this.initialValue = initialValue;
if (initialValue != 0) {
Arrays.fill(result, initialValue);
}
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
@Override
public void reset() throws IOException {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, initialValue);
}
}
/**
* Base class for standard deviation and variance computation for fields with {@link SortedSetDocValues}
*/
abstract class SDVSortedSetAcc extends DoubleSortedSetDVAcc {
int[] counts;
double[] sum;
public SDVSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
this.counts = new int[numSlots];
this.sum = new double[numSlots];
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
long ord;
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef term = values.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[slot] += val * val;
sum[slot] += val;
counts[slot]++;
}
}
protected abstract double computeVal(int slot);
@Override
public int compare(int slotA, int slotB) {
return Double.compare(computeVal(slotA), computeVal(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(3);
lst.add(counts[slot]);
lst.add(result[slot]);
lst.add(sum[slot]);
return lst;
} else {
return computeVal(slot);
}
}
@Override
public void reset() throws IOException {
super.reset();
Arrays.fill(counts, 0);
Arrays.fill(sum, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
resizer.resize(counts, 0);
resizer.resize(sum, 0);
}
}

View File

@ -25,11 +25,15 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrFieldSource; import org.apache.solr.schema.StrFieldSource;
import org.apache.solr.search.function.FieldNameValueSource; import org.apache.solr.search.function.FieldNameValueSource;
@ -53,8 +57,21 @@ public class MinMaxAgg extends SimpleAggValueSource {
sf = fcontext.qcontext.searcher().getSchema().getField(field); sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
vs = null; if (sf.hasDocValues()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "min/max aggregations can't be used on multi-valued field " + field); if(sf.getType().getNumberType() != null) {
FieldType.MultiValueSelector choice = minmax == 1 ? FieldType.MultiValueSelector.MIN : FieldType.MultiValueSelector.MAX;
vs = sf.getType().getSingleValueSource(choice, sf, null);
} else {
// multi-valued strings
return new MinMaxSortedSetDVAcc(fcontext, sf, numSlots);
}
} else {
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"min/max aggregations can't be used on PointField w/o DocValues");
}
return new MinMaxUnInvertedFieldAcc(fcontext, sf, numSlots);
}
} else { } else {
vs = sf.getType().getValueSource(sf, null); vs = sf.getType().getValueSource(sf, null);
} }
@ -137,6 +154,80 @@ public class MinMaxAgg extends SimpleAggValueSource {
} }
} }
class MinMaxUnInvertedFieldAcc extends UnInvertedFieldAcc {
final static int MISSING = -1;
private int currentSlot;
int[] result;
public MinMaxUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
result = new int[numSlots];
Arrays.fill(result, MISSING);
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
this.currentSlot = slot;
docToTerm.getBigTerms(doc + currentDocBase, this);
docToTerm.getSmallTerms(doc + currentDocBase, this);
}
@Override
public int compare(int slotA, int slotB) {
int a = result[slotA];
int b = result[slotB];
return a == MISSING ? -1: (b == MISSING? 1: Integer.compare(a, b));
}
@Override
public Object getValue(int slotNum) throws IOException {
int ord = result[slotNum];
if (ord == MISSING) return null;
BytesRef term = docToTerm.lookupOrd(ord);
return getObject(term);
}
/**
* Wrapper to convert stored format to external format.
* <p>
* This ensures consistent behavior like other accumulators where
* long is returned for integer field types and double is returned for float field types
* </p>
*/
private Object getObject(BytesRef term) {
Object obj = sf.getType().toObject(sf, term);
NumberType type = sf.getType().getNumberType();
if (type == null) {
return obj;
} else if (type == NumberType.INTEGER) {
// this is to ensure consistent behavior with other accumulators
// where long is returned for integer field types
return ((Number)obj).longValue();
} else if (type == NumberType.FLOAT) {
return ((Number)obj).floatValue();
}
return obj;
}
@Override
public void reset() throws IOException {
Arrays.fill(result, MISSING);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, MISSING);
}
@Override
public void call(int termNum) {
int currOrd = result[currentSlot];
if (currOrd == MISSING || Integer.compare(termNum, currOrd) * minmax < 0) {
result[currentSlot] = termNum;
}
}
}
class DFuncAcc extends DoubleFuncSlotAcc { class DFuncAcc extends DoubleFuncSlotAcc {
public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) { public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots, Double.NaN); super(values, fcontext, numSlots, Double.NaN);
@ -291,7 +382,6 @@ public class MinMaxAgg extends SimpleAggValueSource {
} }
} }
class SingleValuedOrdAcc extends OrdAcc { class SingleValuedOrdAcc extends OrdAcc {
SortedDocValues topLevel; SortedDocValues topLevel;
SortedDocValues[] subDvs; SortedDocValues[] subDvs;
@ -346,5 +436,94 @@ public class MinMaxAgg extends SimpleAggValueSource {
} }
} }
class MinMaxSortedSetDVAcc extends DocValuesAcc {
final static int MISSING = -1;
SortedSetDocValues topLevel;
SortedSetDocValues[] subDvs;
OrdinalMap ordMap;
LongValues toGlobal;
SortedSetDocValues subDv;
long[] slotOrd;
public MinMaxSortedSetDVAcc(FacetContext fcontext, SchemaField field, int numSlots) throws IOException {
super(fcontext, field);
this.slotOrd = new long[numSlots];
Arrays.fill(slotOrd, MISSING);
}
@Override
public void resetIterators() throws IOException {
super.resetIterators();
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
ordMap = ((MultiDocValues.MultiSortedSetDocValues)topLevel).mapping;
subDvs = ((MultiDocValues.MultiSortedSetDocValues)topLevel).values;
} else {
ordMap = null;
subDvs = null;
}
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
if (subDvs != null) {
subDv = subDvs[readerContext.ord];
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
assert toGlobal != null;
} else {
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
subDv = topLevel;
}
}
@Override
public int compare(int slotA, int slotB) {
long a = slotOrd[slotA];
long b = slotOrd[slotB];
return a == MISSING ? -1: (b == MISSING? 1: Long.compare(a, b));
}
@Override
public Object getValue(int slotNum) throws IOException {
long ord = slotOrd[slotNum];
if (ord == MISSING) return null;
BytesRef term = topLevel.lookupOrd(ord);
return sf.getType().toObject(sf, term);
}
@Override
public void reset() throws IOException {
Arrays.fill(slotOrd, MISSING);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(slotOrd, MISSING);
}
@Override
public void collectValues(int doc, int slotNum) throws IOException {
long newOrd = MISSING;
if (minmax == 1) {// min
newOrd = subDv.nextOrd();
} else { // max
long ord;
while ((ord = subDv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
newOrd = ord;
}
}
long currOrd = slotOrd[slotNum];
long finalOrd = toGlobal==null ? newOrd : toGlobal.get(newOrd);
if (currOrd == MISSING || Long.compare(finalOrd, currOrd) * minmax < 0) {
slotOrd[slotNum] = finalOrd;
}
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return subDv;
}
}
} }

View File

@ -17,17 +17,29 @@
package org.apache.solr.search.facet; package org.apache.solr.search.facet;
import java.io.IOException; import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Date;
import java.util.List; import java.util.List;
import java.util.function.IntFunction; import java.util.function.IntFunction;
import com.tdunning.math.stats.AVLTreeDigest; import com.tdunning.math.stats.AVLTreeDigest;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.FunctionQParser; import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser; import org.apache.solr.search.ValueSourceParser;
import org.apache.solr.search.function.FieldNameValueSource;
public class PercentileAgg extends SimpleAggValueSource { public class PercentileAgg extends SimpleAggValueSource {
List<Double> percentiles; List<Double> percentiles;
@ -39,7 +51,31 @@ public class PercentileAgg extends SimpleAggValueSource {
@Override @Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
return new Acc(getArg(), fcontext, numSlots); ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource) vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.getType().getNumberType() == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for " + sf.getType().getTypeName());
}
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new PercentileSortedNumericAcc(fcontext, sf, numSlots);
}
return new PercentileSortedSetAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for PointField w/o docValues");
}
return new PercentileUnInvertedFieldAcc(fcontext, sf, numSlots);
}
vs = sf.getType().getValueSource(sf, null);
}
return new Acc(vs, fcontext, numSlots);
} }
@Override @Override
@ -80,7 +116,6 @@ public class PercentileAgg extends SimpleAggValueSource {
} }
} }
protected Object getValueFromDigest(AVLTreeDigest digest) { protected Object getValueFromDigest(AVLTreeDigest digest) {
if (digest == null) { if (digest == null) {
return null; return null;
@ -90,7 +125,7 @@ public class PercentileAgg extends SimpleAggValueSource {
return digest.quantile( percentiles.get(0) * 0.01 ); return digest.quantile( percentiles.get(0) * 0.01 );
} }
List<Double> lst = new ArrayList(percentiles.size()); List<Double> lst = new ArrayList<>(percentiles.size());
for (Double percentile : percentiles) { for (Double percentile : percentiles) {
double val = digest.quantile( percentile * 0.01 ); double val = digest.quantile( percentile * 0.01 );
lst.add( val ); lst.add( val );
@ -98,8 +133,6 @@ public class PercentileAgg extends SimpleAggValueSource {
return lst; return lst;
} }
class Acc extends FuncSlotAcc { class Acc extends FuncSlotAcc {
protected AVLTreeDigest[] digests; protected AVLTreeDigest[] digests;
protected ByteBuffer buf; protected ByteBuffer buf;
@ -155,6 +188,76 @@ public class PercentileAgg extends SimpleAggValueSource {
return getValueFromDigest( digests[slotNum] ); return getValueFromDigest( digests[slotNum] );
} }
public Object getShardValue(int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) return null; // no values for this slot
digest.compress();
int sz = digest.byteSize();
if (buf == null || buf.capacity() < sz) {
buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
} else {
buf.clear();
}
digest.asSmallBytes(buf);
byte[] arr = Arrays.copyOf(buf.array(), buf.position());
return arr;
}
@Override
public void reset() {
digests = new AVLTreeDigest[digests.length];
sortvals = null;
}
@Override
public void resize(Resizer resizer) {
digests = resizer.resize(digests, null);
}
}
abstract class BasePercentileDVAcc extends DocValuesAcc {
AVLTreeDigest[] digests;
protected ByteBuffer buf;
double[] sortvals;
public BasePercentileDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf);
digests = new AVLTreeDigest[numSlots];
}
@Override
public int compare(int slotA, int slotB) {
if (sortvals == null) {
fillSortVals();
}
return Double.compare(sortvals[slotA], sortvals[slotB]);
}
private void fillSortVals() {
sortvals = new double[ digests.length ];
double sortp = percentiles.get(0) * 0.01;
for (int i=0; i<digests.length; i++) {
AVLTreeDigest digest = digests[i];
if (digest == null) {
sortvals[i] = Double.NEGATIVE_INFINITY;
} else {
sortvals[i] = digest.quantile(sortp);
}
}
}
@Override
public Object getValue(int slotNum) throws IOException {
if (fcontext.isShard()) {
return getShardValue(slotNum);
}
if (sortvals != null && percentiles.size()==1) {
// we've already calculated everything we need
return digests[slotNum] != null ? sortvals[slotNum] : null;
}
return getValueFromDigest( digests[slotNum] );
}
public Object getShardValue(int slot) throws IOException { public Object getShardValue(int slot) throws IOException {
AVLTreeDigest digest = digests[slot]; AVLTreeDigest digest = digests[slot];
@ -172,7 +275,6 @@ public class PercentileAgg extends SimpleAggValueSource {
return arr; return arr;
} }
@Override @Override
public void reset() { public void reset() {
digests = new AVLTreeDigest[digests.length]; digests = new AVLTreeDigest[digests.length];
@ -185,6 +287,184 @@ public class PercentileAgg extends SimpleAggValueSource {
} }
} }
class PercentileSortedNumericAcc extends BasePercentileDVAcc {
SortedNumericDocValues values;
public PercentileSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) {
digests[slot] = digest = new AVLTreeDigest(100);
}
for (int i = 0, count = values.docValueCount(); i < count; i++) {
double val = getDouble(values.nextValue());
digest.add(val);
}
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
/**
* converts given long value to double based on field type
*/
protected double getDouble(long val) {
switch (sf.getType().getNumberType()) {
case INTEGER:
case LONG:
case DATE:
return val;
case FLOAT:
return NumericUtils.sortableIntToFloat((int) val);
case DOUBLE:
return NumericUtils.sortableLongToDouble(val);
default:
// this would never happen
return 0.0d;
}
}
}
class PercentileSortedSetAcc extends BasePercentileDVAcc {
SortedSetDocValues values;
public PercentileSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) {
digests[slot] = digest = new AVLTreeDigest(100);
}
long ord;
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef term = values.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
digest.add(val);
}
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSortedSet(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
}
class PercentileUnInvertedFieldAcc extends UnInvertedFieldAcc {
protected AVLTreeDigest[] digests;
protected ByteBuffer buf;
protected double[] sortvals;
private int currentSlot;
public PercentileUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
digests = new AVLTreeDigest[numSlots];
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
this.currentSlot = slot;
docToTerm.getBigTerms(doc + currentDocBase, this);
docToTerm.getSmallTerms(doc + currentDocBase, this);
}
@Override
public int compare(int slotA, int slotB) {
if (sortvals == null) {
fillSortVals();
}
return Double.compare(sortvals[slotA], sortvals[slotB]);
}
private void fillSortVals() {
sortvals = new double[ digests.length ];
double sortp = percentiles.get(0) * 0.01;
for (int i=0; i<digests.length; i++) {
AVLTreeDigest digest = digests[i];
if (digest == null) {
sortvals[i] = Double.NEGATIVE_INFINITY;
} else {
sortvals[i] = digest.quantile(sortp);
}
}
}
@Override
public Object getValue(int slotNum) throws IOException {
if (fcontext.isShard()) {
return getShardValue(slotNum);
}
if (sortvals != null && percentiles.size()==1) {
// we've already calculated everything we need
return digests[slotNum] != null ? sortvals[slotNum] : null;
}
return getValueFromDigest( digests[slotNum] );
}
public Object getShardValue(int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) return null;
digest.compress();
int sz = digest.byteSize();
if (buf == null || buf.capacity() < sz) {
buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
} else {
buf.clear();
}
digest.asSmallBytes(buf);
byte[] arr = Arrays.copyOf(buf.array(), buf.position());
return arr;
}
@Override
public void reset() {
digests = new AVLTreeDigest[digests.length];
sortvals = null;
}
@Override
public void resize(Resizer resizer) {
digests = resizer.resize(digests, null);
}
@Override
public void call(int ord) {
AVLTreeDigest digest = digests[currentSlot];
if (digest == null) {
digests[currentSlot] = digest = new AVLTreeDigest(100);
}
try {
BytesRef term = docToTerm.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date) obj).getTime() : ((Number) obj).doubleValue();
digest.add(val);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
class Merger extends FacetSortableMerger { class Merger extends FacetSortableMerger {
protected AVLTreeDigest digest; protected AVLTreeDigest digest;
@ -221,4 +501,3 @@ public class PercentileAgg extends SimpleAggValueSource {
} }
} }
} }

View File

@ -431,13 +431,8 @@ class AvgSlotAcc extends DoubleFuncSlotAcc {
} }
} }
private double avg(double tot, int count) {
return count == 0 ? 0 : tot / count; // returns 0 instead of NaN.. todo - make configurable? if NaN, we need to
// handle comparisons though...
}
private double avg(int slot) { private double avg(int slot) {
return avg(result[slot], counts[slot]); // calc once and cache in result? return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result?
} }
@Override @Override
@ -488,13 +483,8 @@ class VarianceSlotAcc extends DoubleFuncSlotAcc {
this.sum = resizer.resize(this.sum, 0); this.sum = resizer.resize(this.sum, 0);
} }
private double variance(double sumSq, double sum, int count) {
double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
return val;
}
private double variance(int slot) { private double variance(int slot) {
return variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result? return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
} }
@Override @Override
@ -550,13 +540,8 @@ class StddevSlotAcc extends DoubleFuncSlotAcc {
this.result = resizer.resize(this.result, 0); this.result = resizer.resize(this.result, 0);
} }
private double stdDev(double sumSq, double sum, int count) {
double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
return val;
}
private double stdDev(int slot) { private double stdDev(int slot) {
return stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result? return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
} }
@Override @Override

View File

@ -21,6 +21,9 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FieldNameValueSource;
public class StddevAgg extends SimpleAggValueSource { public class StddevAgg extends SimpleAggValueSource {
@ -30,7 +33,31 @@ public class StddevAgg extends SimpleAggValueSource {
@Override @Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
return new StddevSlotAcc(getArg(), fcontext, numSlots); ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource) vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.getType().getNumberType() == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for " + sf.getType().getTypeName());
}
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new StddevSortedNumericAcc(fcontext, sf, numSlots);
}
return new StddevSortedSetAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for PointField w/o docValues");
}
return new StddevUnInvertedFieldAcc(fcontext, sf, numSlots);
}
vs = sf.getType().getValueSource(sf, null);
}
return new StddevSlotAcc(vs, fcontext, numSlots);
} }
@Override @Override
@ -59,8 +86,43 @@ public class StddevAgg extends SimpleAggValueSource {
@Override @Override
protected double getDouble() { protected double getDouble() {
double val = count == 0 ? 0.0d : Math.sqrt((sumSq/count)-Math.pow(sum/count, 2)); return AggUtil.stdDev(sumSq, sum, count);
return val; }
}
class StddevSortedNumericAcc extends SDVSortedNumericAcc {
public StddevSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected double computeVal(int slot) {
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
class StddevSortedSetAcc extends SDVSortedSetAcc {
public StddevSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected double computeVal(int slot) {
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
class StddevUnInvertedFieldAcc extends SDVUnInvertedFieldAcc {
public StddevUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected double computeVal(int slot) {
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
} }
};
} }

View File

@ -17,8 +17,15 @@
package org.apache.solr.search.facet; package org.apache.solr.search.facet;
import java.io.IOException; import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Date;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FieldNameValueSource;
public class SumAgg extends SimpleAggValueSource { public class SumAgg extends SimpleAggValueSource {
@ -28,7 +35,31 @@ public class SumAgg extends SimpleAggValueSource {
@Override @Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
return new SumSlotAcc(getArg(), fcontext, numSlots); ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource)vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.getType().getNumberType() == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for " + sf.getType().getTypeName());
}
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new SumSortedNumericAcc(fcontext, sf, numSlots);
}
return new SumSortedSetAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for PointField w/o docValues");
}
return new SumUnInvertedFieldAcc(fcontext, sf, numSlots);
}
vs = sf.getType().getValueSource(sf, null);
}
return new SumSlotAcc(vs, fcontext, numSlots);
} }
@Override @Override
@ -48,5 +79,58 @@ public class SumAgg extends SimpleAggValueSource {
return val; return val;
} }
} }
class SumSortedNumericAcc extends DoubleSortedNumericDVAcc {
public SumSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; i++) {
result[slot]+=getDouble(values.nextValue());
}
}
}
class SumSortedSetAcc extends DoubleSortedSetDVAcc {
public SumSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
long ord;
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef term = values.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[slot] += val;
}
}
}
class SumUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
public SumUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
public void call(int termNum) {
try {
BytesRef term = docToTerm.lookupOrd(termNum);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[currentSlot] += val;
} catch (IOException e) {
// find a better way to do it
throw new UncheckedIOException(e);
}
}
}
} }

View File

@ -17,8 +17,15 @@
package org.apache.solr.search.facet; package org.apache.solr.search.facet;
import java.io.IOException; import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Date;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FieldNameValueSource;
public class SumsqAgg extends SimpleAggValueSource { public class SumsqAgg extends SimpleAggValueSource {
public SumsqAgg(ValueSource vs) { public SumsqAgg(ValueSource vs) {
@ -27,11 +34,88 @@ public class SumsqAgg extends SimpleAggValueSource {
@Override @Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
return new SumsqSlotAcc(getArg(), fcontext, numSlots); ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource)vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.getType().getNumberType() == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for " + sf.getType().getTypeName());
}
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new SumSqSortedNumericAcc(fcontext, sf, numSlots);
}
return new SumSqSortedSetAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for PointField w/o docValues");
}
return new SumSqUnInvertedFieldAcc(fcontext, sf, numSlots);
}
vs = sf.getType().getValueSource(sf, null);
}
return new SumsqSlotAcc(vs, fcontext, numSlots);
} }
@Override @Override
public FacetMerger createFacetMerger(Object prototype) { public FacetMerger createFacetMerger(Object prototype) {
return new SumAgg.Merger(); return new SumAgg.Merger();
} }
class SumSqSortedNumericAcc extends DoubleSortedNumericDVAcc {
public SumSqSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; i++) {
double val = getDouble(values.nextValue());
result[slot]+= val * val;
}
}
}
class SumSqSortedSetAcc extends DoubleSortedSetDVAcc {
public SumSqSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
long ord;
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef term = values.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[slot] += val * val;
}
}
}
class SumSqUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
public SumSqUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
}
@Override
public void call(int termNum) {
try {
BytesRef term = docToTerm.lookupOrd(termNum);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[currentSlot] += val * val;
} catch (IOException e) {
// find a better way to do it
throw new UncheckedIOException(e);
}
}
}
} }

View File

@ -18,7 +18,13 @@
package org.apache.solr.search.facet; package org.apache.solr.search.facet;
import java.io.IOException; import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.function.IntFunction;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
/** /**
@ -28,9 +34,11 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi
UnInvertedField uif; UnInvertedField uif;
UnInvertedField.DocToTerm docToTerm; UnInvertedField.DocToTerm docToTerm;
SchemaField sf;
public UnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { public UnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext); super(fcontext);
this.sf = sf;
uif = UnInvertedField.getUnInvertedField(sf.getName(), fcontext.qcontext.searcher()); uif = UnInvertedField.getUnInvertedField(sf.getName(), fcontext.qcontext.searcher());
docToTerm = uif.new DocToTerm(); docToTerm = uif.new DocToTerm();
fcontext.qcontext.addCloseHook(this); fcontext.qcontext.addCloseHook(this);
@ -44,3 +52,108 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi
} }
} }
} }
abstract class DoubleUnInvertedFieldAcc extends UnInvertedFieldAcc {
double[] result;
int currentSlot;
double initialValue;
public DoubleUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException {
super(fcontext, sf, numSlots);
result = new double[numSlots];
if (initialValue != 0) {
this.initialValue = initialValue;
Arrays.fill(result, initialValue);
}
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
this.currentSlot = slot;
docToTerm.getBigTerms(doc + currentDocBase, this);
docToTerm.getSmallTerms(doc + currentDocBase, this);
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
@Override
public void reset() throws IOException {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, initialValue);
}
}
/**
* Base accumulator to compute standard deviation and variance for uninvertible fields
*/
abstract class SDVUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
int[] counts;
double[] sum;
public SDVUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots, 0);
this.counts = new int[numSlots];
this.sum = new double[numSlots];
}
@Override
public void call(int termNum) {
try {
BytesRef term = docToTerm.lookupOrd(termNum);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
result[currentSlot] += val * val;
sum[currentSlot]+= val;
counts[currentSlot]++;
} catch (IOException e) {
// find a better way to do it
throw new UncheckedIOException(e);
}
}
protected abstract double computeVal(int slot);
@Override
public int compare(int slotA, int slotB) {
return Double.compare(computeVal(slotA), computeVal(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(3);
lst.add(counts[slot]);
lst.add(result[slot]);
lst.add(sum[slot]);
return lst;
} else {
return computeVal(slot);
}
}
@Override
public void reset() throws IOException {
super.reset();
Arrays.fill(counts, 0);
Arrays.fill(sum, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
resizer.resize(counts, 0);
resizer.resize(sum, 0);
}
}

View File

@ -20,6 +20,9 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FieldNameValueSource;
public class VarianceAgg extends SimpleAggValueSource { public class VarianceAgg extends SimpleAggValueSource {
@ -29,7 +32,31 @@ public class VarianceAgg extends SimpleAggValueSource {
@Override @Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
return new VarianceSlotAcc(getArg(), fcontext, numSlots); ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource) vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.getType().getNumberType() == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for " + sf.getType().getTypeName());
}
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new VarianceSortedNumericAcc(fcontext, sf, numSlots);
}
return new VarianceSortedSetAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for PointField w/o docValues");
}
return new VarianceUnInvertedFieldAcc(fcontext, sf, numSlots);
}
vs = sf.getType().getValueSource(sf, null);
}
return new VarianceSlotAcc(vs, fcontext, numSlots);
} }
@Override @Override
@ -58,8 +85,43 @@ public class VarianceAgg extends SimpleAggValueSource {
@Override @Override
protected double getDouble() { protected double getDouble() {
double val = count == 0 ? 0.0d : (sumSq/count)-Math.pow(sum/count, 2); return AggUtil.variance(sumSq, sum, count);
return val; }
}
class VarianceSortedNumericAcc extends SDVSortedNumericAcc {
public VarianceSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected double computeVal(int slot) {
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
class VarianceSortedSetAcc extends SDVSortedSetAcc {
public VarianceSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected double computeVal(int slot) {
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
class VarianceUnInvertedFieldAcc extends SDVUnInvertedFieldAcc {
public VarianceUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected double computeVal(int slot) {
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
} }
};
} }

View File

@ -694,6 +694,7 @@
--> -->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/> <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_i1" type="int" indexed="true" stored="true" multiValued="false" sortMissingLast="true"/> <dynamicField name="*_i1" type="int" indexed="true" stored="true" multiValued="false" sortMissingLast="true"/>
<dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_idv" type="int" indexed="true" stored="true" docValues="true" multiValued="false"/> <dynamicField name="*_idv" type="int" indexed="true" stored="true" docValues="true" multiValued="false"/>

View File

@ -18,22 +18,26 @@ package org.apache.solr.handler.component;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.Iterator;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.TimeZone; import java.util.TimeZone;
import com.google.common.hash.HashFunction;
import com.tdunning.math.stats.AVLTreeDigest;
import org.apache.commons.math3.util.Combinations;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.queries.function.valuesource.QueryValueSource; import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
@ -44,23 +48,17 @@ import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.StatsField.Stat;
import org.apache.solr.handler.component.StatsField.HllOptions; import org.apache.solr.handler.component.StatsField.HllOptions;
import org.apache.solr.handler.component.StatsField.Stat;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.commons.math3.util.Combinations;
import com.tdunning.math.stats.AVLTreeDigest;
import com.google.common.hash.HashFunction;
import org.apache.solr.util.hll.HLL; import org.apache.solr.util.hll.HLL;
import org.junit.BeforeClass; import org.junit.BeforeClass;
/** /**
* Statistics Component Test * Statistics Component Test (which also checks some equivalent json.facet functionality)
*/ */
public class StatsComponentTest extends SolrTestCaseJ4 { public class StatsComponentTest extends SolrTestCaseJ4 {
@ -298,56 +296,57 @@ public class StatsComponentTest extends SolrTestCaseJ4 {
"fq", "{!tag=fq1}id:1"), "fq", "{!tag=fq1}id:1"),
params("stats.field", "{!ex=fq1,fq2}"+f, "stats", "true", params("stats.field", "{!ex=fq1,fq2}"+f, "stats", "true",
"fq", "{!tag=fq1}-id_i:[0 TO 2]", "fq", "{!tag=fq1}-id_i:[0 TO 2]",
"fq", "{!tag=fq2}-id_i:[2 TO 1000]") }) { "fq", "{!tag=fq2}-id_i:[2 TO 1000]"),
params("json.facet", // note: no distinctValues support and not comparing min/max values
"{min:'min("+f+")',count:'countvals("+f+")',missing:'missing("+f+")',max:'max("+f+")', sum:'sum("+f+")', " +
" countDistinct:'unique("+f+")', sumOfSquares:'sumsq("+f+")', mean:'avg("+f+")', stddev:'stddev("+f+")' }")
}) {
// easy switch to know if/when we are using json.facet which doesn't support some options
final boolean json = (null != baseParams.get("json.facet"));
assertQ("test statistics values", assertQ("test statistics values",
req(baseParams, "q", "*:*", "stats.calcdistinct", "true") req(baseParams, "q", "*:*", "stats.calcdistinct", "true")
, "//double[@name='min'][.='-100.0']" , json ? "//*" : "//double[@name='min'][.='-100.0']"
, "//double[@name='max'][.='200.0']" , json ? "//*" : "//double[@name='max'][.='200.0']"
, "//double[@name='sum'][.='9.0']" , "//double[@name='sum'][.='9.0']"
, "//long[@name='count'][.='8']" , "//long[@name='count'][.='8']"
, "//long[@name='missing'][.='3']" , "//long[@name='missing'][.='3']"
, "//long[@name='countDistinct'][.='8']" , json ? "//int[@name='countDistinct'][.='8']": "//long[@name='countDistinct'][.='8']" // SOLR-11775
, "count(//arr[@name='distinctValues']/*)=8" , json ? "//*" : "count(//arr[@name='distinctValues']/*)=8"
, "//double[@name='sumOfSquares'][.='53101.0']" , "//double[@name='sumOfSquares'][.='53101.0']"
, "//double[@name='mean'][.='1.125']" , "//double[@name='mean'][.='1.125']"
, "//double[@name='stddev'][.='87.08852228787508']" ,json ? "//*" : "//double[@name='stddev'][.='87.08852228787508']" // SOLR-11725
); );
assertQ("test statistics values w/fq", assertQ("test statistics values w/fq",
req(baseParams, "fq", "-id:1", req(baseParams, "fq", "-id:1",
"q", "*:*", "stats.calcdistinct", "true") "q", "*:*", "stats.calcdistinct", "true")
, "//double[@name='min'][.='-40.0']" , json ? "//*" : "//double[@name='min'][.='-40.0']"
, "//double[@name='max'][.='200.0']" , json ? "//*" : "//double[@name='max'][.='200.0']"
, "//double[@name='sum'][.='119.0']" , "//double[@name='sum'][.='119.0']"
, "//long[@name='count'][.='6']" , "//long[@name='count'][.='6']"
, "//long[@name='missing'][.='3']" , "//long[@name='missing'][.='3']"
, "//long[@name='countDistinct'][.='6']" , json? "//int[@name='countDistinct'][.='6']" :"//long[@name='countDistinct'][.='6']" // SOLR-11775
, "count(//arr[@name='distinctValues']/*)=6" , json ? "//*" : "count(//arr[@name='distinctValues']/*)=6"
, "//double[@name='sumOfSquares'][.='43001.0']" , "//double[@name='sumOfSquares'][.='43001.0']"
, "//double[@name='mean'][.='19.833333333333332']" , "//double[@name='mean'][.='19.833333333333332']"
, "//double[@name='stddev'][.='90.15634568163611']" , json ? "//*" : "//double[@name='stddev'][.='90.15634568163611']" // SOLR-11725
); );
// TODO: why are there 3 identical requests below? if (!json) { // checking stats.facet makes no sense for json faceting
assertQ("test stats.facet (using boolean facet field)",
assertQ("test statistics values",
req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s") req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
, "//double[@name='min'][.='-100.0']" // baseline
, "//double[@name='max'][.='200.0']" , "//lst[@name='"+f+"']/double[@name='min'][.='-100.0']"
, "//double[@name='sum'][.='9.0']" , "//lst[@name='"+f+"']/double[@name='max'][.='200.0']"
, "//long[@name='count'][.='8']" , "//lst[@name='"+f+"']/double[@name='sum'][.='9.0']"
, "//long[@name='missing'][.='3']" , "//lst[@name='"+f+"']/long[@name='count'][.='8']"
, "//long[@name='countDistinct'][.='8']" , "//lst[@name='"+f+"']/long[@name='missing'][.='3']"
, "//lst[@name='"+f+"']/long[@name='countDistinct'][.='8']"
, "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8" , "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8"
, "//double[@name='sumOfSquares'][.='53101.0']" , "//lst[@name='"+f+"']/double[@name='sumOfSquares'][.='53101.0']"
, "//double[@name='mean'][.='1.125']" , "//lst[@name='"+f+"']/double[@name='mean'][.='1.125']"
, "//double[@name='stddev'][.='87.08852228787508']" , "//lst[@name='"+f+"']/double[@name='stddev'][.='87.08852228787508']"
); // facet 'true'
assertQ("test value for active_s=true",
req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
, "//lst[@name='true']/double[@name='min'][.='-100.0']" , "//lst[@name='true']/double[@name='min'][.='-100.0']"
, "//lst[@name='true']/double[@name='max'][.='200.0']" , "//lst[@name='true']/double[@name='max'][.='200.0']"
, "//lst[@name='true']/double[@name='sum'][.='70.0']" , "//lst[@name='true']/double[@name='sum'][.='70.0']"
@ -358,10 +357,7 @@ public class StatsComponentTest extends SolrTestCaseJ4 {
, "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']" , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']"
, "//lst[@name='true']/double[@name='mean'][.='17.5']" , "//lst[@name='true']/double[@name='mean'][.='17.5']"
, "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']" , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']"
); // facet 'false'
assertQ("test value for active_s=false",
req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
, "//lst[@name='false']/double[@name='min'][.='-40.0']" , "//lst[@name='false']/double[@name='min'][.='-40.0']"
, "//lst[@name='false']/double[@name='max'][.='10.0']" , "//lst[@name='false']/double[@name='max'][.='10.0']"
, "//lst[@name='false']/double[@name='sum'][.='-61.0']" , "//lst[@name='false']/double[@name='sum'][.='-61.0']"
@ -374,11 +370,16 @@ public class StatsComponentTest extends SolrTestCaseJ4 {
, "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']" , "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']"
); );
} }
}
assertQ("cardinality" assertQ("cardinality"
, req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f) , req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f)
, "//long[@name='cardinality'][.='8']" , "//long[@name='cardinality'][.='8']"
); );
assertQ("json cardinality"
, req("q", "*:*", "rows", "0", "json.facet", "{cardinality:'hll("+f+")'}")
, "//int[@name='cardinality'][.='8']" // SOLR-11775
);
} }
public void testFieldStatisticsResultsStringField() throws Exception { public void testFieldStatisticsResultsStringField() throws Exception {

View File

@ -43,20 +43,21 @@ import org.junit.Test;
public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase { public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase {
// TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev') // TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev')
private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", "missing", "countvals"); private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique",
"missing", "countvals", "percentile");
private String STAT_FIELD = "stat_i1"; private final String STAT_FIELD;
private String ALL_STATS_JSON = ""; private String ALL_STATS_JSON = "";
public DistributedFacetSimpleRefinementLongTailTest() { public DistributedFacetSimpleRefinementLongTailTest() {
// we need DVs on point fields to compute stats & facets // we need DVs on point fields to compute stats & facets
if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true"); if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
// TODO: randomizing STAT_FIELD to be multiValued=true blocked by SOLR-11706 STAT_FIELD = random().nextBoolean() ? "stat_is" : "stat_i";
// STAT_FIELD = random().nextBoolean() ? "stat_i1" : "stat_i";
for (String stat : ALL_STATS) { for (String stat : ALL_STATS) {
ALL_STATS_JSON += stat + ":'" + stat + "(" + STAT_FIELD + ")',"; String val = stat.equals("percentile")? STAT_FIELD+",90": STAT_FIELD;
ALL_STATS_JSON += stat + ":'" + stat + "(" + val + ")',";
} }
} }
@ -232,6 +233,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(101L, bucket.get("countvals")); assertEquals(101L, bucket.get("countvals"));
assertEquals(0L, bucket.get("missing")); assertEquals(0L, bucket.get("missing"));
assertEquals(48.0D, bucket.get("sum")); assertEquals(48.0D, bucket.get("sum"));
assertEquals(1.0D, bucket.get("percentile"));
assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7); assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7);
assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7); assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7);
// assertEquals(0.55846323792D, bucket.getStddev(), 0.1E-7); // TODO: SOLR-11725 // assertEquals(0.55846323792D, bucket.getStddev(), 0.1E-7); // TODO: SOLR-11725
@ -391,6 +393,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(300L, aaa0_Bucket.get("countvals")); assertEquals(300L, aaa0_Bucket.get("countvals"));
assertEquals(0L, aaa0_Bucket.get("missing")); assertEquals(0L, aaa0_Bucket.get("missing"));
assertEquals(34650.0D, aaa0_Bucket.get("sum")); assertEquals(34650.0D, aaa0_Bucket.get("sum"));
assertEquals(483.70000000000016D, (double)aaa0_Bucket.get("percentile"), 0.1E-7);
assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7); assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7);
assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7); assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7);
// assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7); // TODO: SOLR-11725 // assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7); // TODO: SOLR-11725
@ -403,6 +406,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(0L, tail_Bucket.get("min")); assertEquals(0L, tail_Bucket.get("min"));
assertEquals(44L, tail_Bucket.get("max")); assertEquals(44L, tail_Bucket.get("max"));
assertEquals(90L, tail_Bucket.get("countvals")); assertEquals(90L, tail_Bucket.get("countvals"));
assertEquals(40.0D, tail_Bucket.get("percentile"));
assertEquals(45L, tail_Bucket.get("missing")); assertEquals(45L, tail_Bucket.get("missing"));
assertEquals(1980.0D, tail_Bucket.get("sum")); assertEquals(1980.0D, tail_Bucket.get("sum"));
assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7); assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7);
@ -419,6 +423,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(35L, tailB_Bucket.get("min")); assertEquals(35L, tailB_Bucket.get("min"));
assertEquals(40L, tailB_Bucket.get("max")); assertEquals(40L, tailB_Bucket.get("max"));
assertEquals(12L, tailB_Bucket.get("countvals")); assertEquals(12L, tailB_Bucket.get("countvals"));
assertEquals(39.9D, tailB_Bucket.get("percentile"));
assertEquals(5L, tailB_Bucket.get("missing")); assertEquals(5L, tailB_Bucket.get("missing"));
assertEquals(450.0D, tailB_Bucket.get("sum")); assertEquals(450.0D, tailB_Bucket.get("sum"));
assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7); assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7);

View File

@ -217,14 +217,22 @@ public class TestJsonFacets extends SolrTestCaseHS {
public void indexSimple(Client client) throws Exception { public void indexSimple(Client client) throws Exception {
client.deleteByQuery("*:*", null); client.deleteByQuery("*:*", null);
client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2", "val_b", "true", "sparse_s", "one"), null); client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2",
client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5", "val_b", "false"), null); "num_is", "4", "num_is", "2",
"val_b", "true", "sparse_s", "one"), null);
client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5",
"num_is", "-9", "num_is", "-5",
"val_b", "false"), null);
client.add(sdoc("id", "3"), null); client.add(sdoc("id", "3"), null);
client.commit(); client.commit();
client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3"), null); client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3",
client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7", "sparse_s", "two"),null); "num_is", "2", "num_is", "3"), null);
client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7",
"num_is", "11", "num_is", "7",
"sparse_s", "two"),null);
client.commit(); client.commit();
client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5"),null); client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5",
"num_is", "-5"),null);
client.commit(); client.commit();
} }
@ -874,12 +882,20 @@ public class TestJsonFacets extends SolrTestCaseHS {
Client client = Client.localClient(); Client client = Client.localClient();
indexSimple(client); indexSimple(client);
assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'sum(num_is)'}")
, "facets=={count:6 , x:,10.0}"
);
assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'min(num_is)'}")
, "facets=={count:6 , x:,-9}"
);
// test multiple json.facet commands // test multiple json.facet commands
assertJQ(req("q", "*:*", "rows", "0" assertJQ(req("q", "*:*", "rows", "0"
, "json.facet", "{x:'sum(num_d)'}" , "json.facet", "{x:'sum(num_d)'}"
, "json.facet", "{y:'min(num_d)'}" , "json.facet", "{y:'min(num_d)'}"
, "json.facet", "{z:'min(num_is)'}"
) )
, "facets=={count:6 , x:3.0, y:-9.0 }" , "facets=={count:6 , x:3.0, y:-9.0, z:-9 }"
); );
@ -922,10 +938,11 @@ public class TestJsonFacets extends SolrTestCaseHS {
// test nested streaming with stats under streaming // test nested streaming with stats under streaming
assertJQ(req("q", "*:*", "rows", "0" assertJQ(req("q", "*:*", "rows", "0"
, "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)'} }}} }}}" , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)', y:'sum(num_is)'} }}} }}}"
) )
, "facets=={count:6 " + , "facets=={count:6 " +
", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0},{val:NY,count:1,x:4.0}]} },{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0},{val:NY,count:1,x:-5.0}]} }]}" ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0,y:5.0},{val:NY,count:1,x:4.0,y:6.0}]} }," +
"{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0,y:4.0},{val:NY,count:1,x:-5.0,y:-5.0}]} }]}"
+ "}" + "}"
); );
@ -1340,7 +1357,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }" ", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }"
); );
// test sorting by missing stat with domain query // test sorting by countvals stat with domain query
client.testJQ(params(p, "q", "-id:*" client.testJQ(params(p, "q", "-id:*"
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" + , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" +
" , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }" " , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }"
@ -1779,8 +1796,6 @@ public class TestJsonFacets extends SolrTestCaseHS {
); );
} }
// stats at top level // stats at top level
client.testJQ(params(p, "q", "*:*" client.testJQ(params(p, "q", "*:*"
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" + , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" +
@ -1799,6 +1814,73 @@ public class TestJsonFacets extends SolrTestCaseHS {
"}" "}"
); );
// stats at top level on multi-valued fields
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{ sum1:'sum(${num_fs})', sumsq1:'sumsq(${num_fs})', avg1:'avg(${num_fs})', mind:'min(${num_fs})', maxd:'max(${num_fs})'" +
", mini:'min(${num_is})', maxi:'max(${num_is})', mins:'min(${multi_ss})', maxs:'max(${multi_ss})'" +
", stddev:'stddev(${num_fs})', variance:'variance(${num_fs})', median:'percentile(${num_fs}, 50)'" +
", perc:'percentile(${num_fs}, 0,75,100)'" +
" }"
)
, "facets=={ 'count':6, " +
"sum1:0.0, sumsq1:51.5, avg1:0.0, mind:-5.0, maxd:3.0" +
", mini:-5, maxi:3, mins:'a', maxs:'b'" +
", stddev:2.537222891273055, variance:6.4375, median:0.0, perc:[-5.0,2.25,3.0]" +
"}"
);
// test sorting by multi-valued
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'avg(${num_is})'} }}" +
" , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'avg(${num_is})'} }} }"
)
, "facets=={ 'count':6, " +
" f1:{ 'buckets':[{ val:'B', count:3, n1: 0.25}, { val:'A', count:2, n1:0.0}]}" +
", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:0.25 }]} }"
);
// test sorting by percentile
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'percentile(${num_is}, 50)'} }}" +
" , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'percentile(${num_is}, 50)'} }} }"
)
, "facets=={ 'count':6, " +
" f1:{ 'buckets':[{ val:'B', count:3, n1: -0.50}, { val:'A', count:2, n1:1.0}]}" +
", f2:{ 'buckets':[{ val:'A', count:2, n1:1.0}, { val:'B', count:3, n1:-0.50 }]} }"
);
// test sorting by multi-valued field with domain query
client.testJQ(params(p, "q", "-id:*"
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'sum(${num_is})'} }}" +
" , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'sum(${num_is})'} }} }"
)
, "facets=={ 'count':0, " +
" f1:{ 'buckets':[{ val:'B', count:3, n1:1.0 }, { val:'A', count:2, n1:0.0}]}" +
", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:1.0 }]} }"
);
client.testJQ(params(p, "q", "*:*"
, "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
"facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
"facet:{n1:'min(${multi_ss})'}}}}}}}"
)
, "facets=={ 'count':6, " +
" f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'a'},{val:'NY', count:1, n1:'a'}]} }," +
" { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" +
"}"
);
client.testJQ(params(p, "q", "*:*"
, "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
"facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
"facet:{n1:'max(${multi_ss})'}}}}}}}"
)
, "facets=={ 'count':6, " +
" f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'b'},{val:'NY', count:1, n1:'b'}]} }," +
" { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" +
"}"
);
// stats at top level, no matches // stats at top level, no matches
client.testJQ(params(p, "q", "id:DOESNOTEXIST" client.testJQ(params(p, "q", "id:DOESNOTEXIST"
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})'" + , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})'" +