mirror of https://github.com/apache/lucene.git
SOLR-11706: add support for aggregation on multivalued fields
* min, max, sum, sumsq, avg, stddev, variance, percentile aggregations in JSON facets now supports multivalued fields
This commit is contained in:
parent
c4126ef858
commit
12e8cca644
|
@ -160,6 +160,9 @@ Improvements
|
||||||
|
|
||||||
* SOLR-13968: Support postingsFormat and docValuesFormat in schema fields. (Bruno Roustant)
|
* SOLR-13968: Support postingsFormat and docValuesFormat in schema fields. (Bruno Roustant)
|
||||||
|
|
||||||
|
* SOLR-11706: Add support for aggregation on multivalued fields in JSON facets. min, max, avg, sum, sumsq, stddev,
|
||||||
|
variance, percentile aggregations now have support for multivalued fields. (hossman, Munendra S N)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
(No changes)
|
||||||
|
|
|
@ -985,35 +985,35 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
||||||
addParser("agg_sum", new ValueSourceParser() {
|
addParser("agg_sum", new ValueSourceParser() {
|
||||||
@Override
|
@Override
|
||||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||||
return new SumAgg(fp.parseValueSource());
|
return new SumAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
addParser("agg_avg", new ValueSourceParser() {
|
addParser("agg_avg", new ValueSourceParser() {
|
||||||
@Override
|
@Override
|
||||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||||
return new AvgAgg(fp.parseValueSource());
|
return new AvgAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
addParser("agg_sumsq", new ValueSourceParser() {
|
addParser("agg_sumsq", new ValueSourceParser() {
|
||||||
@Override
|
@Override
|
||||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||||
return new SumsqAgg(fp.parseValueSource());
|
return new SumsqAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
addParser("agg_variance", new ValueSourceParser() {
|
addParser("agg_variance", new ValueSourceParser() {
|
||||||
@Override
|
@Override
|
||||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||||
return new VarianceAgg(fp.parseValueSource());
|
return new VarianceAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
addParser("agg_stddev", new ValueSourceParser() {
|
addParser("agg_stddev", new ValueSourceParser() {
|
||||||
@Override
|
@Override
|
||||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||||
return new StddevAgg(fp.parseValueSource());
|
return new StddevAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1054,7 +1054,26 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
addParser("agg_percentile", new PercentileAgg.Parser());
|
addParser("agg_percentile", new ValueSourceParser() {
|
||||||
|
@Override
|
||||||
|
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||||
|
List<Double> percentiles = new ArrayList<>();
|
||||||
|
ValueSource vs = fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE);
|
||||||
|
while (fp.hasMoreArguments()) {
|
||||||
|
double val = fp.parseDouble();
|
||||||
|
if (val<0 || val>100) {
|
||||||
|
throw new SyntaxError("requested percentile must be between 0 and 100. got " + val);
|
||||||
|
}
|
||||||
|
percentiles.add(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (percentiles.isEmpty()) {
|
||||||
|
throw new SyntaxError("expected percentile(valsource,percent1[,percent2]*) EXAMPLE:percentile(myfield,50)");
|
||||||
|
}
|
||||||
|
|
||||||
|
return new PercentileAgg(vs, percentiles);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
addParser("agg_" + RelatednessAgg.NAME, new ValueSourceParser() {
|
addParser("agg_" + RelatednessAgg.NAME, new ValueSourceParser() {
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
|
|
||||||
|
public class AggUtil {
|
||||||
|
|
||||||
|
private AggUtil() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes and returns average for given sum and count
|
||||||
|
*/
|
||||||
|
public static double avg(double sum, long count) {
|
||||||
|
// todo: should we return NAN when count==0?
|
||||||
|
return count == 0? 0.0d: sum / count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes and returns uncorrected standard deviation for given values
|
||||||
|
*/
|
||||||
|
public static double stdDev(double sumSq, double sum, long count) {
|
||||||
|
// todo: switch to corrected stddev SOLR-11725
|
||||||
|
// todo: should we return NAN when count==0?
|
||||||
|
double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes and returns uncorrected variance for given values
|
||||||
|
*/
|
||||||
|
public static double variance(double sumSq, double sum, long count) {
|
||||||
|
// todo: switch to corrected variance SOLR-11725
|
||||||
|
// todo: should we return NAN when count==0?
|
||||||
|
double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,9 +17,18 @@
|
||||||
package org.apache.solr.search.facet;
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.search.function.FieldNameValueSource;
|
||||||
|
|
||||||
|
|
||||||
public class AvgAgg extends SimpleAggValueSource {
|
public class AvgAgg extends SimpleAggValueSource {
|
||||||
|
@ -29,7 +38,31 @@ public class AvgAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||||
return new AvgSlotAcc(getArg(), fcontext, numSlots);
|
ValueSource vs = getArg();
|
||||||
|
|
||||||
|
if (vs instanceof FieldNameValueSource) {
|
||||||
|
String field = ((FieldNameValueSource) vs).getFieldName();
|
||||||
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||||
|
if (sf.getType().getNumberType() == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for " + sf.getType().getTypeName());
|
||||||
|
}
|
||||||
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
|
if (sf.hasDocValues()) {
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
return new AvgSortedNumericAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
return new AvgSortedSetAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for PointField w/o docValues");
|
||||||
|
}
|
||||||
|
return new AvgUnInvertedFieldAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
vs = sf.getType().getValueSource(sf, null);
|
||||||
|
}
|
||||||
|
return new AvgSlotAcc(vs, fcontext, numSlots);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -43,7 +76,7 @@ public class AvgAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void merge(Object facetResult, Context mcontext1) {
|
public void merge(Object facetResult, Context mcontext1) {
|
||||||
List<Number> numberList = (List<Number>)facetResult;
|
List<Number> numberList = (List<Number>) facetResult;
|
||||||
num += numberList.get(0).longValue();
|
num += numberList.get(0).longValue();
|
||||||
sum += numberList.get(1).doubleValue();
|
sum += numberList.get(1).doubleValue();
|
||||||
}
|
}
|
||||||
|
@ -51,8 +84,167 @@ public class AvgAgg extends SimpleAggValueSource {
|
||||||
@Override
|
@Override
|
||||||
protected double getDouble() {
|
protected double getDouble() {
|
||||||
// TODO: is it worth to try and cache?
|
// TODO: is it worth to try and cache?
|
||||||
return num==0 ? 0.0d : sum/num;
|
return AggUtil.avg(sum, num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class AvgSortedNumericAcc extends DoubleSortedNumericDVAcc {
|
||||||
|
int[] counts;
|
||||||
|
|
||||||
|
public AvgSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
this.counts = new int[numSlots];
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
for (int i = 0, count = values.docValueCount(); i < count; i++) {
|
||||||
|
result[slot]+=getDouble(values.nextValue());
|
||||||
|
counts[slot]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double avg(int slot) {
|
||||||
|
return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result?
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(avg(slotA), avg(slotB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slot) {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
ArrayList lst = new ArrayList(2);
|
||||||
|
lst.add(counts[slot]);
|
||||||
|
lst.add(result[slot]);
|
||||||
|
return lst;
|
||||||
|
} else {
|
||||||
|
return avg(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
Arrays.fill(counts, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
super.resize(resizer);
|
||||||
|
resizer.resize(counts, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class AvgSortedSetAcc extends DoubleSortedSetDVAcc {
|
||||||
|
int[] counts;
|
||||||
|
|
||||||
|
public AvgSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
this.counts = new int[numSlots];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
long ord;
|
||||||
|
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
BytesRef term = values.lookupOrd(ord);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[slot] += val;
|
||||||
|
counts[slot]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double avg(int slot) {
|
||||||
|
return AggUtil.avg(result[slot], counts[slot]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(avg(slotA), avg(slotB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slot) {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
ArrayList lst = new ArrayList(2);
|
||||||
|
lst.add(counts[slot]);
|
||||||
|
lst.add(result[slot]);
|
||||||
|
return lst;
|
||||||
|
} else {
|
||||||
|
return avg(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
Arrays.fill(counts, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
super.resize(resizer);
|
||||||
|
resizer.resize(counts, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class AvgUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
|
||||||
|
int[] counts;
|
||||||
|
|
||||||
|
public AvgUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
this.counts = new int[numSlots];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void call(int termNum) {
|
||||||
|
try {
|
||||||
|
BytesRef term = docToTerm.lookupOrd(termNum);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[currentSlot] += val;
|
||||||
|
counts[currentSlot]++;
|
||||||
|
} catch (IOException e) {
|
||||||
|
// find a better way to do it
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double avg(int slot) {
|
||||||
|
return AggUtil.avg(result[slot], counts[slot]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(avg(slotA), avg(slotB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slot) {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
ArrayList lst = new ArrayList(2);
|
||||||
|
lst.add(counts[slot]);
|
||||||
|
lst.add(result[slot]);
|
||||||
|
return lst;
|
||||||
|
} else {
|
||||||
|
return avg(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
Arrays.fill(counts, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
super.resize(resizer);
|
||||||
|
resizer.resize(counts, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,9 @@
|
||||||
package org.apache.solr.search.facet;
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Date;
|
||||||
import java.util.function.IntFunction;
|
import java.util.function.IntFunction;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
|
@ -28,6 +30,8 @@ import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedNumericDocValues;
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -139,6 +143,118 @@ abstract class LongSortedNumericDVAcc extends SortedNumericDVAcc {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
abstract class DoubleSortedNumericDVAcc extends SortedNumericDVAcc {
|
||||||
|
double[] result;
|
||||||
|
double initialValue;
|
||||||
|
|
||||||
|
public DoubleSortedNumericDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
this.result = new double[numSlots];
|
||||||
|
this.initialValue = initialValue;
|
||||||
|
if (initialValue != 0) {
|
||||||
|
Arrays.fill(result, initialValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(result[slotA], result[slotB]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slotNum) throws IOException {
|
||||||
|
return result[slotNum];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
Arrays.fill(result, initialValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
resizer.resize(result, initialValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* converts given long value to double based on field type
|
||||||
|
*/
|
||||||
|
protected double getDouble(long val) {
|
||||||
|
switch (sf.getType().getNumberType()) {
|
||||||
|
case INTEGER:
|
||||||
|
case LONG:
|
||||||
|
case DATE:
|
||||||
|
return val;
|
||||||
|
case FLOAT:
|
||||||
|
return NumericUtils.sortableIntToFloat((int) val);
|
||||||
|
case DOUBLE:
|
||||||
|
return NumericUtils.sortableLongToDouble(val);
|
||||||
|
default:
|
||||||
|
// this would never happen
|
||||||
|
return 0.0d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for standard deviation and variance computation for fields with {@link SortedNumericDocValues}
|
||||||
|
*/
|
||||||
|
abstract class SDVSortedNumericAcc extends DoubleSortedNumericDVAcc {
|
||||||
|
int[] counts;
|
||||||
|
double[] sum;
|
||||||
|
|
||||||
|
public SDVSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
this.counts = new int[numSlots];
|
||||||
|
this.sum = new double[numSlots];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
for (int i = 0, count = values.docValueCount(); i < count; i++) {
|
||||||
|
double val = getDouble(values.nextValue());
|
||||||
|
result[slot]+= val * val;
|
||||||
|
sum[slot]+= val;
|
||||||
|
counts[slot]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract double computeVal(int slot);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(computeVal(slotA), computeVal(slotB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slot) {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
ArrayList lst = new ArrayList(3);
|
||||||
|
lst.add(counts[slot]);
|
||||||
|
lst.add(result[slot]);
|
||||||
|
lst.add(sum[slot]);
|
||||||
|
return lst;
|
||||||
|
} else {
|
||||||
|
return computeVal(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
Arrays.fill(counts, 0);
|
||||||
|
Arrays.fill(sum, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
super.resize(resizer);
|
||||||
|
resizer.resize(counts, 0);
|
||||||
|
resizer.resize(sum, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accumulator for {@link SortedDocValues}
|
* Accumulator for {@link SortedDocValues}
|
||||||
*/
|
*/
|
||||||
|
@ -216,3 +332,98 @@ abstract class LongSortedSetDVAcc extends SortedSetDVAcc {
|
||||||
resizer.resize(result, initialValue);
|
resizer.resize(result, initialValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
abstract class DoubleSortedSetDVAcc extends SortedSetDVAcc {
|
||||||
|
double[] result;
|
||||||
|
double initialValue;
|
||||||
|
|
||||||
|
public DoubleSortedSetDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, long initialValue) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
result = new double[numSlots];
|
||||||
|
this.initialValue = initialValue;
|
||||||
|
if (initialValue != 0) {
|
||||||
|
Arrays.fill(result, initialValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(result[slotA], result[slotB]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slotNum) throws IOException {
|
||||||
|
return result[slotNum];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
Arrays.fill(result, initialValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
resizer.resize(result, initialValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for standard deviation and variance computation for fields with {@link SortedSetDocValues}
|
||||||
|
*/
|
||||||
|
abstract class SDVSortedSetAcc extends DoubleSortedSetDVAcc {
|
||||||
|
int[] counts;
|
||||||
|
double[] sum;
|
||||||
|
|
||||||
|
public SDVSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
this.counts = new int[numSlots];
|
||||||
|
this.sum = new double[numSlots];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
long ord;
|
||||||
|
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
BytesRef term = values.lookupOrd(ord);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[slot] += val * val;
|
||||||
|
sum[slot] += val;
|
||||||
|
counts[slot]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract double computeVal(int slot);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(computeVal(slotA), computeVal(slotB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slot) {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
ArrayList lst = new ArrayList(3);
|
||||||
|
lst.add(counts[slot]);
|
||||||
|
lst.add(result[slot]);
|
||||||
|
lst.add(sum[slot]);
|
||||||
|
return lst;
|
||||||
|
} else {
|
||||||
|
return computeVal(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
Arrays.fill(counts, 0);
|
||||||
|
Arrays.fill(sum, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
super.resize(resizer);
|
||||||
|
resizer.resize(counts, 0);
|
||||||
|
resizer.resize(sum, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -25,11 +25,15 @@ import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.MultiDocValues;
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
import org.apache.lucene.index.OrdinalMap;
|
import org.apache.lucene.index.OrdinalMap;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LongValues;
|
import org.apache.lucene.util.LongValues;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
import org.apache.solr.schema.NumberType;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.schema.StrFieldSource;
|
import org.apache.solr.schema.StrFieldSource;
|
||||||
import org.apache.solr.search.function.FieldNameValueSource;
|
import org.apache.solr.search.function.FieldNameValueSource;
|
||||||
|
@ -53,8 +57,21 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
||||||
sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||||
|
|
||||||
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
vs = null;
|
if (sf.hasDocValues()) {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "min/max aggregations can't be used on multi-valued field " + field);
|
if(sf.getType().getNumberType() != null) {
|
||||||
|
FieldType.MultiValueSelector choice = minmax == 1 ? FieldType.MultiValueSelector.MIN : FieldType.MultiValueSelector.MAX;
|
||||||
|
vs = sf.getType().getSingleValueSource(choice, sf, null);
|
||||||
|
} else {
|
||||||
|
// multi-valued strings
|
||||||
|
return new MinMaxSortedSetDVAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
"min/max aggregations can't be used on PointField w/o DocValues");
|
||||||
|
}
|
||||||
|
return new MinMaxUnInvertedFieldAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
vs = sf.getType().getValueSource(sf, null);
|
vs = sf.getType().getValueSource(sf, null);
|
||||||
}
|
}
|
||||||
|
@ -137,6 +154,80 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class MinMaxUnInvertedFieldAcc extends UnInvertedFieldAcc {
|
||||||
|
final static int MISSING = -1;
|
||||||
|
private int currentSlot;
|
||||||
|
int[] result;
|
||||||
|
|
||||||
|
public MinMaxUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
result = new int[numSlots];
|
||||||
|
Arrays.fill(result, MISSING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
|
||||||
|
this.currentSlot = slot;
|
||||||
|
docToTerm.getBigTerms(doc + currentDocBase, this);
|
||||||
|
docToTerm.getSmallTerms(doc + currentDocBase, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
int a = result[slotA];
|
||||||
|
int b = result[slotB];
|
||||||
|
return a == MISSING ? -1: (b == MISSING? 1: Integer.compare(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slotNum) throws IOException {
|
||||||
|
int ord = result[slotNum];
|
||||||
|
if (ord == MISSING) return null;
|
||||||
|
BytesRef term = docToTerm.lookupOrd(ord);
|
||||||
|
return getObject(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper to convert stored format to external format.
|
||||||
|
* <p>
|
||||||
|
* This ensures consistent behavior like other accumulators where
|
||||||
|
* long is returned for integer field types and double is returned for float field types
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
private Object getObject(BytesRef term) {
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
NumberType type = sf.getType().getNumberType();
|
||||||
|
if (type == null) {
|
||||||
|
return obj;
|
||||||
|
} else if (type == NumberType.INTEGER) {
|
||||||
|
// this is to ensure consistent behavior with other accumulators
|
||||||
|
// where long is returned for integer field types
|
||||||
|
return ((Number)obj).longValue();
|
||||||
|
} else if (type == NumberType.FLOAT) {
|
||||||
|
return ((Number)obj).floatValue();
|
||||||
|
}
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
Arrays.fill(result, MISSING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
resizer.resize(result, MISSING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void call(int termNum) {
|
||||||
|
int currOrd = result[currentSlot];
|
||||||
|
if (currOrd == MISSING || Integer.compare(termNum, currOrd) * minmax < 0) {
|
||||||
|
result[currentSlot] = termNum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class DFuncAcc extends DoubleFuncSlotAcc {
|
class DFuncAcc extends DoubleFuncSlotAcc {
|
||||||
public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
|
public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
|
||||||
super(values, fcontext, numSlots, Double.NaN);
|
super(values, fcontext, numSlots, Double.NaN);
|
||||||
|
@ -291,7 +382,6 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SingleValuedOrdAcc extends OrdAcc {
|
class SingleValuedOrdAcc extends OrdAcc {
|
||||||
SortedDocValues topLevel;
|
SortedDocValues topLevel;
|
||||||
SortedDocValues[] subDvs;
|
SortedDocValues[] subDvs;
|
||||||
|
@ -346,5 +436,94 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class MinMaxSortedSetDVAcc extends DocValuesAcc {
|
||||||
|
final static int MISSING = -1;
|
||||||
|
SortedSetDocValues topLevel;
|
||||||
|
SortedSetDocValues[] subDvs;
|
||||||
|
OrdinalMap ordMap;
|
||||||
|
LongValues toGlobal;
|
||||||
|
SortedSetDocValues subDv;
|
||||||
|
long[] slotOrd;
|
||||||
|
|
||||||
|
public MinMaxSortedSetDVAcc(FacetContext fcontext, SchemaField field, int numSlots) throws IOException {
|
||||||
|
super(fcontext, field);
|
||||||
|
this.slotOrd = new long[numSlots];
|
||||||
|
Arrays.fill(slotOrd, MISSING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resetIterators() throws IOException {
|
||||||
|
super.resetIterators();
|
||||||
|
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
|
||||||
|
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
|
||||||
|
ordMap = ((MultiDocValues.MultiSortedSetDocValues)topLevel).mapping;
|
||||||
|
subDvs = ((MultiDocValues.MultiSortedSetDocValues)topLevel).values;
|
||||||
|
} else {
|
||||||
|
ordMap = null;
|
||||||
|
subDvs = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||||
|
super.setNextReader(readerContext);
|
||||||
|
if (subDvs != null) {
|
||||||
|
subDv = subDvs[readerContext.ord];
|
||||||
|
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
|
||||||
|
assert toGlobal != null;
|
||||||
|
} else {
|
||||||
|
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
|
||||||
|
subDv = topLevel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
long a = slotOrd[slotA];
|
||||||
|
long b = slotOrd[slotB];
|
||||||
|
return a == MISSING ? -1: (b == MISSING? 1: Long.compare(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slotNum) throws IOException {
|
||||||
|
long ord = slotOrd[slotNum];
|
||||||
|
if (ord == MISSING) return null;
|
||||||
|
BytesRef term = topLevel.lookupOrd(ord);
|
||||||
|
return sf.getType().toObject(sf, term);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
Arrays.fill(slotOrd, MISSING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
resizer.resize(slotOrd, MISSING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collectValues(int doc, int slotNum) throws IOException {
|
||||||
|
long newOrd = MISSING;
|
||||||
|
if (minmax == 1) {// min
|
||||||
|
newOrd = subDv.nextOrd();
|
||||||
|
} else { // max
|
||||||
|
long ord;
|
||||||
|
while ((ord = subDv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
newOrd = ord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
long currOrd = slotOrd[slotNum];
|
||||||
|
long finalOrd = toGlobal==null ? newOrd : toGlobal.get(newOrd);
|
||||||
|
if (currOrd == MISSING || Long.compare(finalOrd, currOrd) * minmax < 0) {
|
||||||
|
slotOrd[slotNum] = finalOrd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DocIdSetIterator docIdSetIterator() {
|
||||||
|
return subDv;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,17 +17,29 @@
|
||||||
package org.apache.solr.search.facet;
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.function.IntFunction;
|
import java.util.function.IntFunction;
|
||||||
|
|
||||||
import com.tdunning.math.stats.AVLTreeDigest;
|
import com.tdunning.math.stats.AVLTreeDigest;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.FunctionQParser;
|
import org.apache.solr.search.FunctionQParser;
|
||||||
import org.apache.solr.search.SyntaxError;
|
import org.apache.solr.search.SyntaxError;
|
||||||
import org.apache.solr.search.ValueSourceParser;
|
import org.apache.solr.search.ValueSourceParser;
|
||||||
|
import org.apache.solr.search.function.FieldNameValueSource;
|
||||||
|
|
||||||
public class PercentileAgg extends SimpleAggValueSource {
|
public class PercentileAgg extends SimpleAggValueSource {
|
||||||
List<Double> percentiles;
|
List<Double> percentiles;
|
||||||
|
@ -39,7 +51,31 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||||
return new Acc(getArg(), fcontext, numSlots);
|
ValueSource vs = getArg();
|
||||||
|
|
||||||
|
if (vs instanceof FieldNameValueSource) {
|
||||||
|
String field = ((FieldNameValueSource) vs).getFieldName();
|
||||||
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||||
|
if (sf.getType().getNumberType() == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for " + sf.getType().getTypeName());
|
||||||
|
}
|
||||||
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
|
if (sf.hasDocValues()) {
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
return new PercentileSortedNumericAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
return new PercentileSortedSetAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for PointField w/o docValues");
|
||||||
|
}
|
||||||
|
return new PercentileUnInvertedFieldAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
vs = sf.getType().getValueSource(sf, null);
|
||||||
|
}
|
||||||
|
return new Acc(vs, fcontext, numSlots);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -80,7 +116,6 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected Object getValueFromDigest(AVLTreeDigest digest) {
|
protected Object getValueFromDigest(AVLTreeDigest digest) {
|
||||||
if (digest == null) {
|
if (digest == null) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -90,7 +125,7 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
return digest.quantile( percentiles.get(0) * 0.01 );
|
return digest.quantile( percentiles.get(0) * 0.01 );
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Double> lst = new ArrayList(percentiles.size());
|
List<Double> lst = new ArrayList<>(percentiles.size());
|
||||||
for (Double percentile : percentiles) {
|
for (Double percentile : percentiles) {
|
||||||
double val = digest.quantile( percentile * 0.01 );
|
double val = digest.quantile( percentile * 0.01 );
|
||||||
lst.add( val );
|
lst.add( val );
|
||||||
|
@ -98,8 +133,6 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
return lst;
|
return lst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Acc extends FuncSlotAcc {
|
class Acc extends FuncSlotAcc {
|
||||||
protected AVLTreeDigest[] digests;
|
protected AVLTreeDigest[] digests;
|
||||||
protected ByteBuffer buf;
|
protected ByteBuffer buf;
|
||||||
|
@ -155,6 +188,76 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
return getValueFromDigest( digests[slotNum] );
|
return getValueFromDigest( digests[slotNum] );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Object getShardValue(int slot) throws IOException {
|
||||||
|
AVLTreeDigest digest = digests[slot];
|
||||||
|
if (digest == null) return null; // no values for this slot
|
||||||
|
|
||||||
|
digest.compress();
|
||||||
|
int sz = digest.byteSize();
|
||||||
|
if (buf == null || buf.capacity() < sz) {
|
||||||
|
buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
|
||||||
|
} else {
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
|
digest.asSmallBytes(buf);
|
||||||
|
byte[] arr = Arrays.copyOf(buf.array(), buf.position());
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() {
|
||||||
|
digests = new AVLTreeDigest[digests.length];
|
||||||
|
sortvals = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
digests = resizer.resize(digests, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract class BasePercentileDVAcc extends DocValuesAcc {
|
||||||
|
AVLTreeDigest[] digests;
|
||||||
|
protected ByteBuffer buf;
|
||||||
|
double[] sortvals;
|
||||||
|
|
||||||
|
public BasePercentileDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf);
|
||||||
|
digests = new AVLTreeDigest[numSlots];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
if (sortvals == null) {
|
||||||
|
fillSortVals();
|
||||||
|
}
|
||||||
|
return Double.compare(sortvals[slotA], sortvals[slotB]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fillSortVals() {
|
||||||
|
sortvals = new double[ digests.length ];
|
||||||
|
double sortp = percentiles.get(0) * 0.01;
|
||||||
|
for (int i=0; i<digests.length; i++) {
|
||||||
|
AVLTreeDigest digest = digests[i];
|
||||||
|
if (digest == null) {
|
||||||
|
sortvals[i] = Double.NEGATIVE_INFINITY;
|
||||||
|
} else {
|
||||||
|
sortvals[i] = digest.quantile(sortp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slotNum) throws IOException {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
return getShardValue(slotNum);
|
||||||
|
}
|
||||||
|
if (sortvals != null && percentiles.size()==1) {
|
||||||
|
// we've already calculated everything we need
|
||||||
|
return digests[slotNum] != null ? sortvals[slotNum] : null;
|
||||||
|
}
|
||||||
|
return getValueFromDigest( digests[slotNum] );
|
||||||
|
}
|
||||||
|
|
||||||
public Object getShardValue(int slot) throws IOException {
|
public Object getShardValue(int slot) throws IOException {
|
||||||
AVLTreeDigest digest = digests[slot];
|
AVLTreeDigest digest = digests[slot];
|
||||||
|
@ -172,7 +275,6 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
return arr;
|
return arr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
digests = new AVLTreeDigest[digests.length];
|
digests = new AVLTreeDigest[digests.length];
|
||||||
|
@ -185,6 +287,184 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class PercentileSortedNumericAcc extends BasePercentileDVAcc {
|
||||||
|
SortedNumericDocValues values;
|
||||||
|
|
||||||
|
public PercentileSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
AVLTreeDigest digest = digests[slot];
|
||||||
|
if (digest == null) {
|
||||||
|
digests[slot] = digest = new AVLTreeDigest(100);
|
||||||
|
}
|
||||||
|
for (int i = 0, count = values.docValueCount(); i < count; i++) {
|
||||||
|
double val = getDouble(values.nextValue());
|
||||||
|
digest.add(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||||
|
super.setNextReader(readerContext);
|
||||||
|
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DocIdSetIterator docIdSetIterator() {
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* converts given long value to double based on field type
|
||||||
|
*/
|
||||||
|
protected double getDouble(long val) {
|
||||||
|
switch (sf.getType().getNumberType()) {
|
||||||
|
case INTEGER:
|
||||||
|
case LONG:
|
||||||
|
case DATE:
|
||||||
|
return val;
|
||||||
|
case FLOAT:
|
||||||
|
return NumericUtils.sortableIntToFloat((int) val);
|
||||||
|
case DOUBLE:
|
||||||
|
return NumericUtils.sortableLongToDouble(val);
|
||||||
|
default:
|
||||||
|
// this would never happen
|
||||||
|
return 0.0d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class PercentileSortedSetAcc extends BasePercentileDVAcc {
|
||||||
|
SortedSetDocValues values;
|
||||||
|
|
||||||
|
public PercentileSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
AVLTreeDigest digest = digests[slot];
|
||||||
|
if (digest == null) {
|
||||||
|
digests[slot] = digest = new AVLTreeDigest(100);
|
||||||
|
}
|
||||||
|
long ord;
|
||||||
|
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
BytesRef term = values.lookupOrd(ord);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
digest.add(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||||
|
super.setNextReader(readerContext);
|
||||||
|
values = DocValues.getSortedSet(readerContext.reader(), sf.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DocIdSetIterator docIdSetIterator() {
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class PercentileUnInvertedFieldAcc extends UnInvertedFieldAcc {
|
||||||
|
protected AVLTreeDigest[] digests;
|
||||||
|
protected ByteBuffer buf;
|
||||||
|
protected double[] sortvals;
|
||||||
|
private int currentSlot;
|
||||||
|
|
||||||
|
public PercentileUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
digests = new AVLTreeDigest[numSlots];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
|
||||||
|
this.currentSlot = slot;
|
||||||
|
docToTerm.getBigTerms(doc + currentDocBase, this);
|
||||||
|
docToTerm.getSmallTerms(doc + currentDocBase, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
if (sortvals == null) {
|
||||||
|
fillSortVals();
|
||||||
|
}
|
||||||
|
return Double.compare(sortvals[slotA], sortvals[slotB]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fillSortVals() {
|
||||||
|
sortvals = new double[ digests.length ];
|
||||||
|
double sortp = percentiles.get(0) * 0.01;
|
||||||
|
for (int i=0; i<digests.length; i++) {
|
||||||
|
AVLTreeDigest digest = digests[i];
|
||||||
|
if (digest == null) {
|
||||||
|
sortvals[i] = Double.NEGATIVE_INFINITY;
|
||||||
|
} else {
|
||||||
|
sortvals[i] = digest.quantile(sortp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slotNum) throws IOException {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
return getShardValue(slotNum);
|
||||||
|
}
|
||||||
|
if (sortvals != null && percentiles.size()==1) {
|
||||||
|
// we've already calculated everything we need
|
||||||
|
return digests[slotNum] != null ? sortvals[slotNum] : null;
|
||||||
|
}
|
||||||
|
return getValueFromDigest( digests[slotNum] );
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getShardValue(int slot) throws IOException {
|
||||||
|
AVLTreeDigest digest = digests[slot];
|
||||||
|
if (digest == null) return null;
|
||||||
|
|
||||||
|
digest.compress();
|
||||||
|
int sz = digest.byteSize();
|
||||||
|
if (buf == null || buf.capacity() < sz) {
|
||||||
|
buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
|
||||||
|
} else {
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
|
digest.asSmallBytes(buf);
|
||||||
|
byte[] arr = Arrays.copyOf(buf.array(), buf.position());
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() {
|
||||||
|
digests = new AVLTreeDigest[digests.length];
|
||||||
|
sortvals = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
digests = resizer.resize(digests, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void call(int ord) {
|
||||||
|
AVLTreeDigest digest = digests[currentSlot];
|
||||||
|
if (digest == null) {
|
||||||
|
digests[currentSlot] = digest = new AVLTreeDigest(100);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
BytesRef term = docToTerm.lookupOrd(ord);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date ? ((Date) obj).getTime() : ((Number) obj).doubleValue();
|
||||||
|
digest.add(val);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class Merger extends FacetSortableMerger {
|
class Merger extends FacetSortableMerger {
|
||||||
protected AVLTreeDigest digest;
|
protected AVLTreeDigest digest;
|
||||||
|
@ -221,4 +501,3 @@ public class PercentileAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -431,13 +431,8 @@ class AvgSlotAcc extends DoubleFuncSlotAcc {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private double avg(double tot, int count) {
|
|
||||||
return count == 0 ? 0 : tot / count; // returns 0 instead of NaN.. todo - make configurable? if NaN, we need to
|
|
||||||
// handle comparisons though...
|
|
||||||
}
|
|
||||||
|
|
||||||
private double avg(int slot) {
|
private double avg(int slot) {
|
||||||
return avg(result[slot], counts[slot]); // calc once and cache in result?
|
return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result?
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -488,13 +483,8 @@ class VarianceSlotAcc extends DoubleFuncSlotAcc {
|
||||||
this.sum = resizer.resize(this.sum, 0);
|
this.sum = resizer.resize(this.sum, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private double variance(double sumSq, double sum, int count) {
|
|
||||||
double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
|
|
||||||
private double variance(int slot) {
|
private double variance(int slot) {
|
||||||
return variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -550,13 +540,8 @@ class StddevSlotAcc extends DoubleFuncSlotAcc {
|
||||||
this.result = resizer.resize(this.result, 0);
|
this.result = resizer.resize(this.result, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private double stdDev(double sumSq, double sum, int count) {
|
|
||||||
double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
|
|
||||||
private double stdDev(int slot) {
|
private double stdDev(int slot) {
|
||||||
return stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,6 +21,9 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.search.function.FieldNameValueSource;
|
||||||
|
|
||||||
|
|
||||||
public class StddevAgg extends SimpleAggValueSource {
|
public class StddevAgg extends SimpleAggValueSource {
|
||||||
|
@ -30,7 +33,31 @@ public class StddevAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||||
return new StddevSlotAcc(getArg(), fcontext, numSlots);
|
ValueSource vs = getArg();
|
||||||
|
|
||||||
|
if (vs instanceof FieldNameValueSource) {
|
||||||
|
String field = ((FieldNameValueSource) vs).getFieldName();
|
||||||
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||||
|
if (sf.getType().getNumberType() == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for " + sf.getType().getTypeName());
|
||||||
|
}
|
||||||
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
|
if (sf.hasDocValues()) {
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
return new StddevSortedNumericAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
return new StddevSortedSetAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for PointField w/o docValues");
|
||||||
|
}
|
||||||
|
return new StddevUnInvertedFieldAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
vs = sf.getType().getValueSource(sf, null);
|
||||||
|
}
|
||||||
|
return new StddevSlotAcc(vs, fcontext, numSlots);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -58,9 +85,44 @@ public class StddevAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected double getDouble() {
|
protected double getDouble() {
|
||||||
double val = count == 0 ? 0.0d : Math.sqrt((sumSq/count)-Math.pow(sum/count, 2));
|
return AggUtil.stdDev(sumSq, sum, count);
|
||||||
return val;
|
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
|
||||||
|
class StddevSortedNumericAcc extends SDVSortedNumericAcc {
|
||||||
|
|
||||||
|
public StddevSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double computeVal(int slot) {
|
||||||
|
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class StddevSortedSetAcc extends SDVSortedSetAcc {
|
||||||
|
|
||||||
|
public StddevSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double computeVal(int slot) {
|
||||||
|
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class StddevUnInvertedFieldAcc extends SDVUnInvertedFieldAcc {
|
||||||
|
|
||||||
|
public StddevUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double computeVal(int slot) {
|
||||||
|
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,15 @@
|
||||||
package org.apache.solr.search.facet;
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.search.function.FieldNameValueSource;
|
||||||
|
|
||||||
public class SumAgg extends SimpleAggValueSource {
|
public class SumAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
|
@ -28,7 +35,31 @@ public class SumAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||||
return new SumSlotAcc(getArg(), fcontext, numSlots);
|
ValueSource vs = getArg();
|
||||||
|
|
||||||
|
if (vs instanceof FieldNameValueSource) {
|
||||||
|
String field = ((FieldNameValueSource)vs).getFieldName();
|
||||||
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||||
|
if (sf.getType().getNumberType() == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for " + sf.getType().getTypeName());
|
||||||
|
}
|
||||||
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
|
if (sf.hasDocValues()) {
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
return new SumSortedNumericAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
return new SumSortedSetAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for PointField w/o docValues");
|
||||||
|
}
|
||||||
|
return new SumUnInvertedFieldAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
vs = sf.getType().getValueSource(sf, null);
|
||||||
|
}
|
||||||
|
return new SumSlotAcc(vs, fcontext, numSlots);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -48,5 +79,58 @@ public class SumAgg extends SimpleAggValueSource {
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class SumSortedNumericAcc extends DoubleSortedNumericDVAcc {
|
||||||
|
|
||||||
|
public SumSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
for (int i = 0, count = values.docValueCount(); i < count; i++) {
|
||||||
|
result[slot]+=getDouble(values.nextValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class SumSortedSetAcc extends DoubleSortedSetDVAcc {
|
||||||
|
|
||||||
|
public SumSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
long ord;
|
||||||
|
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
BytesRef term = values.lookupOrd(ord);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[slot] += val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SumUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
|
||||||
|
|
||||||
|
public SumUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void call(int termNum) {
|
||||||
|
try {
|
||||||
|
BytesRef term = docToTerm.lookupOrd(termNum);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[currentSlot] += val;
|
||||||
|
} catch (IOException e) {
|
||||||
|
// find a better way to do it
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,8 +17,15 @@
|
||||||
package org.apache.solr.search.facet;
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.search.function.FieldNameValueSource;
|
||||||
|
|
||||||
public class SumsqAgg extends SimpleAggValueSource {
|
public class SumsqAgg extends SimpleAggValueSource {
|
||||||
public SumsqAgg(ValueSource vs) {
|
public SumsqAgg(ValueSource vs) {
|
||||||
|
@ -27,11 +34,88 @@ public class SumsqAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||||
return new SumsqSlotAcc(getArg(), fcontext, numSlots);
|
ValueSource vs = getArg();
|
||||||
|
|
||||||
|
if (vs instanceof FieldNameValueSource) {
|
||||||
|
String field = ((FieldNameValueSource)vs).getFieldName();
|
||||||
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||||
|
if (sf.getType().getNumberType() == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for " + sf.getType().getTypeName());
|
||||||
|
}
|
||||||
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
|
if (sf.hasDocValues()) {
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
return new SumSqSortedNumericAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
return new SumSqSortedSetAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for PointField w/o docValues");
|
||||||
|
}
|
||||||
|
return new SumSqUnInvertedFieldAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
vs = sf.getType().getValueSource(sf, null);
|
||||||
|
}
|
||||||
|
return new SumsqSlotAcc(vs, fcontext, numSlots);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FacetMerger createFacetMerger(Object prototype) {
|
public FacetMerger createFacetMerger(Object prototype) {
|
||||||
return new SumAgg.Merger();
|
return new SumAgg.Merger();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class SumSqSortedNumericAcc extends DoubleSortedNumericDVAcc {
|
||||||
|
|
||||||
|
public SumSqSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
for (int i = 0, count = values.docValueCount(); i < count; i++) {
|
||||||
|
double val = getDouble(values.nextValue());
|
||||||
|
result[slot]+= val * val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SumSqSortedSetAcc extends DoubleSortedSetDVAcc {
|
||||||
|
|
||||||
|
public SumSqSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void collectValues(int doc, int slot) throws IOException {
|
||||||
|
long ord;
|
||||||
|
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
BytesRef term = values.lookupOrd(ord);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[slot] += val * val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SumSqUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
|
||||||
|
|
||||||
|
public SumSqUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void call(int termNum) {
|
||||||
|
try {
|
||||||
|
BytesRef term = docToTerm.lookupOrd(termNum);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[currentSlot] += val * val;
|
||||||
|
} catch (IOException e) {
|
||||||
|
// find a better way to do it
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,13 @@
|
||||||
package org.apache.solr.search.facet;
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.function.IntFunction;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -28,9 +34,11 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi
|
||||||
|
|
||||||
UnInvertedField uif;
|
UnInvertedField uif;
|
||||||
UnInvertedField.DocToTerm docToTerm;
|
UnInvertedField.DocToTerm docToTerm;
|
||||||
|
SchemaField sf;
|
||||||
|
|
||||||
public UnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
public UnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
super(fcontext);
|
super(fcontext);
|
||||||
|
this.sf = sf;
|
||||||
uif = UnInvertedField.getUnInvertedField(sf.getName(), fcontext.qcontext.searcher());
|
uif = UnInvertedField.getUnInvertedField(sf.getName(), fcontext.qcontext.searcher());
|
||||||
docToTerm = uif.new DocToTerm();
|
docToTerm = uif.new DocToTerm();
|
||||||
fcontext.qcontext.addCloseHook(this);
|
fcontext.qcontext.addCloseHook(this);
|
||||||
|
@ -44,3 +52,108 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
abstract class DoubleUnInvertedFieldAcc extends UnInvertedFieldAcc {
|
||||||
|
double[] result;
|
||||||
|
int currentSlot;
|
||||||
|
double initialValue;
|
||||||
|
|
||||||
|
public DoubleUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
result = new double[numSlots];
|
||||||
|
if (initialValue != 0) {
|
||||||
|
this.initialValue = initialValue;
|
||||||
|
Arrays.fill(result, initialValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
|
||||||
|
this.currentSlot = slot;
|
||||||
|
docToTerm.getBigTerms(doc + currentDocBase, this);
|
||||||
|
docToTerm.getSmallTerms(doc + currentDocBase, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(result[slotA], result[slotB]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slotNum) throws IOException {
|
||||||
|
return result[slotNum];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
Arrays.fill(result, initialValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
resizer.resize(result, initialValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base accumulator to compute standard deviation and variance for uninvertible fields
|
||||||
|
*/
|
||||||
|
abstract class SDVUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
|
||||||
|
int[] counts;
|
||||||
|
double[] sum;
|
||||||
|
|
||||||
|
public SDVUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots, 0);
|
||||||
|
this.counts = new int[numSlots];
|
||||||
|
this.sum = new double[numSlots];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void call(int termNum) {
|
||||||
|
try {
|
||||||
|
BytesRef term = docToTerm.lookupOrd(termNum);
|
||||||
|
Object obj = sf.getType().toObject(sf, term);
|
||||||
|
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
|
||||||
|
result[currentSlot] += val * val;
|
||||||
|
sum[currentSlot]+= val;
|
||||||
|
counts[currentSlot]++;
|
||||||
|
} catch (IOException e) {
|
||||||
|
// find a better way to do it
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract double computeVal(int slot);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slotA, int slotB) {
|
||||||
|
return Double.compare(computeVal(slotA), computeVal(slotB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getValue(int slot) {
|
||||||
|
if (fcontext.isShard()) {
|
||||||
|
ArrayList lst = new ArrayList(3);
|
||||||
|
lst.add(counts[slot]);
|
||||||
|
lst.add(result[slot]);
|
||||||
|
lst.add(sum[slot]);
|
||||||
|
return lst;
|
||||||
|
} else {
|
||||||
|
return computeVal(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
Arrays.fill(counts, 0);
|
||||||
|
Arrays.fill(sum, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resize(Resizer resizer) {
|
||||||
|
super.resize(resizer);
|
||||||
|
resizer.resize(counts, 0);
|
||||||
|
resizer.resize(sum, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -20,6 +20,9 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.search.function.FieldNameValueSource;
|
||||||
|
|
||||||
|
|
||||||
public class VarianceAgg extends SimpleAggValueSource {
|
public class VarianceAgg extends SimpleAggValueSource {
|
||||||
|
@ -29,7 +32,31 @@ public class VarianceAgg extends SimpleAggValueSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||||
return new VarianceSlotAcc(getArg(), fcontext, numSlots);
|
ValueSource vs = getArg();
|
||||||
|
|
||||||
|
if (vs instanceof FieldNameValueSource) {
|
||||||
|
String field = ((FieldNameValueSource) vs).getFieldName();
|
||||||
|
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||||
|
if (sf.getType().getNumberType() == null) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for " + sf.getType().getTypeName());
|
||||||
|
}
|
||||||
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
|
if (sf.hasDocValues()) {
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
return new VarianceSortedNumericAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
return new VarianceSortedSetAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
if (sf.getType().isPointField()) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
name() + " aggregation not supported for PointField w/o docValues");
|
||||||
|
}
|
||||||
|
return new VarianceUnInvertedFieldAcc(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
vs = sf.getType().getValueSource(sf, null);
|
||||||
|
}
|
||||||
|
return new VarianceSlotAcc(vs, fcontext, numSlots);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -57,9 +84,44 @@ public class VarianceAgg extends SimpleAggValueSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected double getDouble() {
|
protected double getDouble() {
|
||||||
double val = count == 0 ? 0.0d : (sumSq/count)-Math.pow(sum/count, 2);
|
return AggUtil.variance(sumSq, sum, count);
|
||||||
return val;
|
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
|
||||||
|
class VarianceSortedNumericAcc extends SDVSortedNumericAcc {
|
||||||
|
|
||||||
|
public VarianceSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double computeVal(int slot) {
|
||||||
|
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class VarianceSortedSetAcc extends SDVSortedSetAcc {
|
||||||
|
|
||||||
|
public VarianceSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double computeVal(int slot) {
|
||||||
|
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class VarianceUnInvertedFieldAcc extends SDVUnInvertedFieldAcc {
|
||||||
|
|
||||||
|
public VarianceUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
|
||||||
|
super(fcontext, sf, numSlots);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double computeVal(int slot) {
|
||||||
|
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -694,6 +694,7 @@
|
||||||
-->
|
-->
|
||||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_i1" type="int" indexed="true" stored="true" multiValued="false" sortMissingLast="true"/>
|
<dynamicField name="*_i1" type="int" indexed="true" stored="true" multiValued="false" sortMissingLast="true"/>
|
||||||
|
<dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
|
||||||
<dynamicField name="*_idv" type="int" indexed="true" stored="true" docValues="true" multiValued="false"/>
|
<dynamicField name="*_idv" type="int" indexed="true" stored="true" docValues="true" multiValued="false"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,22 +18,26 @@ package org.apache.solr.handler.component;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.text.DateFormat;
|
import java.text.DateFormat;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.TimeZone;
|
import java.util.TimeZone;
|
||||||
|
|
||||||
|
import com.google.common.hash.HashFunction;
|
||||||
|
import com.tdunning.math.stats.AVLTreeDigest;
|
||||||
|
import org.apache.commons.math3.util.Combinations;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.TermQuery;
|
|
||||||
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
|
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
@ -44,23 +48,17 @@ import org.apache.solr.common.util.Base64;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.StrUtils;
|
import org.apache.solr.common.util.StrUtils;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.handler.component.StatsField.Stat;
|
|
||||||
import org.apache.solr.handler.component.StatsField.HllOptions;
|
import org.apache.solr.handler.component.StatsField.HllOptions;
|
||||||
|
import org.apache.solr.handler.component.StatsField.Stat;
|
||||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
|
||||||
|
|
||||||
import org.apache.commons.math3.util.Combinations;
|
|
||||||
import com.tdunning.math.stats.AVLTreeDigest;
|
|
||||||
import com.google.common.hash.HashFunction;
|
|
||||||
import org.apache.solr.util.hll.HLL;
|
import org.apache.solr.util.hll.HLL;
|
||||||
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Statistics Component Test
|
* Statistics Component Test (which also checks some equivalent json.facet functionality)
|
||||||
*/
|
*/
|
||||||
public class StatsComponentTest extends SolrTestCaseJ4 {
|
public class StatsComponentTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@ -116,7 +114,7 @@ public class StatsComponentTest extends SolrTestCaseJ4 {
|
||||||
"stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv", // Doc Values Not indexed
|
"stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv", // Doc Values Not indexed
|
||||||
"stats_is_p", "stats_fs_p", "stats_ls_p", "stats_ds_p", // Point Fields
|
"stats_is_p", "stats_fs_p", "stats_ls_p", "stats_ds_p", // Point Fields
|
||||||
"stats_is_ni_p","stats_fs_ni_p","stats_ls_ni_p" // Point Doc Values Not indexed
|
"stats_is_ni_p","stats_fs_ni_p","stats_ls_ni_p" // Point Doc Values Not indexed
|
||||||
}) {
|
}) {
|
||||||
|
|
||||||
doTestMVFieldStatisticsResult(f);
|
doTestMVFieldStatisticsResult(f);
|
||||||
clearIndex();
|
clearIndex();
|
||||||
|
@ -298,87 +296,90 @@ public class StatsComponentTest extends SolrTestCaseJ4 {
|
||||||
"fq", "{!tag=fq1}id:1"),
|
"fq", "{!tag=fq1}id:1"),
|
||||||
params("stats.field", "{!ex=fq1,fq2}"+f, "stats", "true",
|
params("stats.field", "{!ex=fq1,fq2}"+f, "stats", "true",
|
||||||
"fq", "{!tag=fq1}-id_i:[0 TO 2]",
|
"fq", "{!tag=fq1}-id_i:[0 TO 2]",
|
||||||
"fq", "{!tag=fq2}-id_i:[2 TO 1000]") }) {
|
"fq", "{!tag=fq2}-id_i:[2 TO 1000]"),
|
||||||
|
params("json.facet", // note: no distinctValues support and not comparing min/max values
|
||||||
|
"{min:'min("+f+")',count:'countvals("+f+")',missing:'missing("+f+")',max:'max("+f+")', sum:'sum("+f+")', " +
|
||||||
|
" countDistinct:'unique("+f+")', sumOfSquares:'sumsq("+f+")', mean:'avg("+f+")', stddev:'stddev("+f+")' }")
|
||||||
|
}) {
|
||||||
|
// easy switch to know if/when we are using json.facet which doesn't support some options
|
||||||
|
final boolean json = (null != baseParams.get("json.facet"));
|
||||||
assertQ("test statistics values",
|
assertQ("test statistics values",
|
||||||
req(baseParams, "q", "*:*", "stats.calcdistinct", "true")
|
req(baseParams, "q", "*:*", "stats.calcdistinct", "true")
|
||||||
, "//double[@name='min'][.='-100.0']"
|
, json ? "//*" : "//double[@name='min'][.='-100.0']"
|
||||||
, "//double[@name='max'][.='200.0']"
|
, json ? "//*" : "//double[@name='max'][.='200.0']"
|
||||||
, "//double[@name='sum'][.='9.0']"
|
, "//double[@name='sum'][.='9.0']"
|
||||||
, "//long[@name='count'][.='8']"
|
, "//long[@name='count'][.='8']"
|
||||||
, "//long[@name='missing'][.='3']"
|
, "//long[@name='missing'][.='3']"
|
||||||
, "//long[@name='countDistinct'][.='8']"
|
, json ? "//int[@name='countDistinct'][.='8']": "//long[@name='countDistinct'][.='8']" // SOLR-11775
|
||||||
, "count(//arr[@name='distinctValues']/*)=8"
|
, json ? "//*" : "count(//arr[@name='distinctValues']/*)=8"
|
||||||
, "//double[@name='sumOfSquares'][.='53101.0']"
|
, "//double[@name='sumOfSquares'][.='53101.0']"
|
||||||
, "//double[@name='mean'][.='1.125']"
|
, "//double[@name='mean'][.='1.125']"
|
||||||
, "//double[@name='stddev'][.='87.08852228787508']"
|
,json ? "//*" : "//double[@name='stddev'][.='87.08852228787508']" // SOLR-11725
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test statistics values w/fq",
|
assertQ("test statistics values w/fq",
|
||||||
req(baseParams, "fq", "-id:1",
|
req(baseParams, "fq", "-id:1",
|
||||||
"q", "*:*", "stats.calcdistinct", "true")
|
"q", "*:*", "stats.calcdistinct", "true")
|
||||||
, "//double[@name='min'][.='-40.0']"
|
, json ? "//*" : "//double[@name='min'][.='-40.0']"
|
||||||
, "//double[@name='max'][.='200.0']"
|
, json ? "//*" : "//double[@name='max'][.='200.0']"
|
||||||
, "//double[@name='sum'][.='119.0']"
|
, "//double[@name='sum'][.='119.0']"
|
||||||
, "//long[@name='count'][.='6']"
|
, "//long[@name='count'][.='6']"
|
||||||
, "//long[@name='missing'][.='3']"
|
, "//long[@name='missing'][.='3']"
|
||||||
, "//long[@name='countDistinct'][.='6']"
|
, json? "//int[@name='countDistinct'][.='6']" :"//long[@name='countDistinct'][.='6']" // SOLR-11775
|
||||||
, "count(//arr[@name='distinctValues']/*)=6"
|
, json ? "//*" : "count(//arr[@name='distinctValues']/*)=6"
|
||||||
, "//double[@name='sumOfSquares'][.='43001.0']"
|
, "//double[@name='sumOfSquares'][.='43001.0']"
|
||||||
, "//double[@name='mean'][.='19.833333333333332']"
|
, "//double[@name='mean'][.='19.833333333333332']"
|
||||||
, "//double[@name='stddev'][.='90.15634568163611']"
|
, json ? "//*" : "//double[@name='stddev'][.='90.15634568163611']" // SOLR-11725
|
||||||
);
|
);
|
||||||
|
|
||||||
// TODO: why are there 3 identical requests below?
|
if (!json) { // checking stats.facet makes no sense for json faceting
|
||||||
|
assertQ("test stats.facet (using boolean facet field)",
|
||||||
assertQ("test statistics values",
|
req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
|
||||||
req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
|
// baseline
|
||||||
, "//double[@name='min'][.='-100.0']"
|
, "//lst[@name='"+f+"']/double[@name='min'][.='-100.0']"
|
||||||
, "//double[@name='max'][.='200.0']"
|
, "//lst[@name='"+f+"']/double[@name='max'][.='200.0']"
|
||||||
, "//double[@name='sum'][.='9.0']"
|
, "//lst[@name='"+f+"']/double[@name='sum'][.='9.0']"
|
||||||
, "//long[@name='count'][.='8']"
|
, "//lst[@name='"+f+"']/long[@name='count'][.='8']"
|
||||||
, "//long[@name='missing'][.='3']"
|
, "//lst[@name='"+f+"']/long[@name='missing'][.='3']"
|
||||||
, "//long[@name='countDistinct'][.='8']"
|
, "//lst[@name='"+f+"']/long[@name='countDistinct'][.='8']"
|
||||||
, "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8"
|
, "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8"
|
||||||
, "//double[@name='sumOfSquares'][.='53101.0']"
|
, "//lst[@name='"+f+"']/double[@name='sumOfSquares'][.='53101.0']"
|
||||||
, "//double[@name='mean'][.='1.125']"
|
, "//lst[@name='"+f+"']/double[@name='mean'][.='1.125']"
|
||||||
, "//double[@name='stddev'][.='87.08852228787508']"
|
, "//lst[@name='"+f+"']/double[@name='stddev'][.='87.08852228787508']"
|
||||||
);
|
// facet 'true'
|
||||||
|
, "//lst[@name='true']/double[@name='min'][.='-100.0']"
|
||||||
assertQ("test value for active_s=true",
|
, "//lst[@name='true']/double[@name='max'][.='200.0']"
|
||||||
req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
|
, "//lst[@name='true']/double[@name='sum'][.='70.0']"
|
||||||
, "//lst[@name='true']/double[@name='min'][.='-100.0']"
|
, "//lst[@name='true']/long[@name='count'][.='4']"
|
||||||
, "//lst[@name='true']/double[@name='max'][.='200.0']"
|
, "//lst[@name='true']/long[@name='missing'][.='1']"
|
||||||
, "//lst[@name='true']/double[@name='sum'][.='70.0']"
|
, "//lst[@name='true']//long[@name='countDistinct'][.='4']"
|
||||||
, "//lst[@name='true']/long[@name='count'][.='4']"
|
, "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
|
||||||
, "//lst[@name='true']/long[@name='missing'][.='1']"
|
, "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']"
|
||||||
, "//lst[@name='true']//long[@name='countDistinct'][.='4']"
|
, "//lst[@name='true']/double[@name='mean'][.='17.5']"
|
||||||
, "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
|
, "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']"
|
||||||
, "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']"
|
// facet 'false'
|
||||||
, "//lst[@name='true']/double[@name='mean'][.='17.5']"
|
, "//lst[@name='false']/double[@name='min'][.='-40.0']"
|
||||||
, "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']"
|
, "//lst[@name='false']/double[@name='max'][.='10.0']"
|
||||||
);
|
, "//lst[@name='false']/double[@name='sum'][.='-61.0']"
|
||||||
|
, "//lst[@name='false']/long[@name='count'][.='4']"
|
||||||
assertQ("test value for active_s=false",
|
, "//lst[@name='false']/long[@name='missing'][.='2']"
|
||||||
req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
|
, "//lst[@name='true']//long[@name='countDistinct'][.='4']"
|
||||||
, "//lst[@name='false']/double[@name='min'][.='-40.0']"
|
, "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
|
||||||
, "//lst[@name='false']/double[@name='max'][.='10.0']"
|
, "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']"
|
||||||
, "//lst[@name='false']/double[@name='sum'][.='-61.0']"
|
, "//lst[@name='false']/double[@name='mean'][.='-15.25']"
|
||||||
, "//lst[@name='false']/long[@name='count'][.='4']"
|
, "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']"
|
||||||
, "//lst[@name='false']/long[@name='missing'][.='2']"
|
);
|
||||||
, "//lst[@name='true']//long[@name='countDistinct'][.='4']"
|
}
|
||||||
, "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
|
|
||||||
, "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']"
|
|
||||||
, "//lst[@name='false']/double[@name='mean'][.='-15.25']"
|
|
||||||
, "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assertQ("cardinality"
|
assertQ("cardinality"
|
||||||
, req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f)
|
, req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f)
|
||||||
, "//long[@name='cardinality'][.='8']"
|
, "//long[@name='cardinality'][.='8']"
|
||||||
);
|
);
|
||||||
|
assertQ("json cardinality"
|
||||||
|
, req("q", "*:*", "rows", "0", "json.facet", "{cardinality:'hll("+f+")'}")
|
||||||
|
, "//int[@name='cardinality'][.='8']" // SOLR-11775
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFieldStatisticsResultsStringField() throws Exception {
|
public void testFieldStatisticsResultsStringField() throws Exception {
|
||||||
|
|
|
@ -43,20 +43,21 @@ import org.junit.Test;
|
||||||
public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase {
|
public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase {
|
||||||
|
|
||||||
// TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev')
|
// TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev')
|
||||||
private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", "missing", "countvals");
|
private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique",
|
||||||
|
"missing", "countvals", "percentile");
|
||||||
|
|
||||||
private String STAT_FIELD = "stat_i1";
|
private final String STAT_FIELD;
|
||||||
private String ALL_STATS_JSON = "";
|
private String ALL_STATS_JSON = "";
|
||||||
|
|
||||||
public DistributedFacetSimpleRefinementLongTailTest() {
|
public DistributedFacetSimpleRefinementLongTailTest() {
|
||||||
// we need DVs on point fields to compute stats & facets
|
// we need DVs on point fields to compute stats & facets
|
||||||
if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
|
if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
|
||||||
|
|
||||||
// TODO: randomizing STAT_FIELD to be multiValued=true blocked by SOLR-11706
|
STAT_FIELD = random().nextBoolean() ? "stat_is" : "stat_i";
|
||||||
// STAT_FIELD = random().nextBoolean() ? "stat_i1" : "stat_i";
|
|
||||||
|
|
||||||
for (String stat : ALL_STATS) {
|
for (String stat : ALL_STATS) {
|
||||||
ALL_STATS_JSON += stat + ":'" + stat + "(" + STAT_FIELD + ")',";
|
String val = stat.equals("percentile")? STAT_FIELD+",90": STAT_FIELD;
|
||||||
|
ALL_STATS_JSON += stat + ":'" + stat + "(" + val + ")',";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -232,6 +233,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
|
||||||
assertEquals(101L, bucket.get("countvals"));
|
assertEquals(101L, bucket.get("countvals"));
|
||||||
assertEquals(0L, bucket.get("missing"));
|
assertEquals(0L, bucket.get("missing"));
|
||||||
assertEquals(48.0D, bucket.get("sum"));
|
assertEquals(48.0D, bucket.get("sum"));
|
||||||
|
assertEquals(1.0D, bucket.get("percentile"));
|
||||||
assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7);
|
assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7);
|
||||||
assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7);
|
assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7);
|
||||||
// assertEquals(0.55846323792D, bucket.getStddev(), 0.1E-7); // TODO: SOLR-11725
|
// assertEquals(0.55846323792D, bucket.getStddev(), 0.1E-7); // TODO: SOLR-11725
|
||||||
|
@ -391,6 +393,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
|
||||||
assertEquals(300L, aaa0_Bucket.get("countvals"));
|
assertEquals(300L, aaa0_Bucket.get("countvals"));
|
||||||
assertEquals(0L, aaa0_Bucket.get("missing"));
|
assertEquals(0L, aaa0_Bucket.get("missing"));
|
||||||
assertEquals(34650.0D, aaa0_Bucket.get("sum"));
|
assertEquals(34650.0D, aaa0_Bucket.get("sum"));
|
||||||
|
assertEquals(483.70000000000016D, (double)aaa0_Bucket.get("percentile"), 0.1E-7);
|
||||||
assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7);
|
assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7);
|
||||||
assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7);
|
assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7);
|
||||||
// assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7); // TODO: SOLR-11725
|
// assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7); // TODO: SOLR-11725
|
||||||
|
@ -403,6 +406,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
|
||||||
assertEquals(0L, tail_Bucket.get("min"));
|
assertEquals(0L, tail_Bucket.get("min"));
|
||||||
assertEquals(44L, tail_Bucket.get("max"));
|
assertEquals(44L, tail_Bucket.get("max"));
|
||||||
assertEquals(90L, tail_Bucket.get("countvals"));
|
assertEquals(90L, tail_Bucket.get("countvals"));
|
||||||
|
assertEquals(40.0D, tail_Bucket.get("percentile"));
|
||||||
assertEquals(45L, tail_Bucket.get("missing"));
|
assertEquals(45L, tail_Bucket.get("missing"));
|
||||||
assertEquals(1980.0D, tail_Bucket.get("sum"));
|
assertEquals(1980.0D, tail_Bucket.get("sum"));
|
||||||
assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7);
|
assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7);
|
||||||
|
@ -419,6 +423,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
|
||||||
assertEquals(35L, tailB_Bucket.get("min"));
|
assertEquals(35L, tailB_Bucket.get("min"));
|
||||||
assertEquals(40L, tailB_Bucket.get("max"));
|
assertEquals(40L, tailB_Bucket.get("max"));
|
||||||
assertEquals(12L, tailB_Bucket.get("countvals"));
|
assertEquals(12L, tailB_Bucket.get("countvals"));
|
||||||
|
assertEquals(39.9D, tailB_Bucket.get("percentile"));
|
||||||
assertEquals(5L, tailB_Bucket.get("missing"));
|
assertEquals(5L, tailB_Bucket.get("missing"));
|
||||||
assertEquals(450.0D, tailB_Bucket.get("sum"));
|
assertEquals(450.0D, tailB_Bucket.get("sum"));
|
||||||
assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7);
|
assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7);
|
||||||
|
|
|
@ -217,14 +217,22 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
|
|
||||||
public void indexSimple(Client client) throws Exception {
|
public void indexSimple(Client client) throws Exception {
|
||||||
client.deleteByQuery("*:*", null);
|
client.deleteByQuery("*:*", null);
|
||||||
client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2", "val_b", "true", "sparse_s", "one"), null);
|
client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2",
|
||||||
client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5", "val_b", "false"), null);
|
"num_is", "4", "num_is", "2",
|
||||||
|
"val_b", "true", "sparse_s", "one"), null);
|
||||||
|
client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5",
|
||||||
|
"num_is", "-9", "num_is", "-5",
|
||||||
|
"val_b", "false"), null);
|
||||||
client.add(sdoc("id", "3"), null);
|
client.add(sdoc("id", "3"), null);
|
||||||
client.commit();
|
client.commit();
|
||||||
client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3"), null);
|
client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3",
|
||||||
client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7", "sparse_s", "two"),null);
|
"num_is", "2", "num_is", "3"), null);
|
||||||
|
client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7",
|
||||||
|
"num_is", "11", "num_is", "7",
|
||||||
|
"sparse_s", "two"),null);
|
||||||
client.commit();
|
client.commit();
|
||||||
client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5"),null);
|
client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5",
|
||||||
|
"num_is", "-5"),null);
|
||||||
client.commit();
|
client.commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -874,12 +882,20 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
Client client = Client.localClient();
|
Client client = Client.localClient();
|
||||||
indexSimple(client);
|
indexSimple(client);
|
||||||
|
|
||||||
|
assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'sum(num_is)'}")
|
||||||
|
, "facets=={count:6 , x:,10.0}"
|
||||||
|
);
|
||||||
|
assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'min(num_is)'}")
|
||||||
|
, "facets=={count:6 , x:,-9}"
|
||||||
|
);
|
||||||
|
|
||||||
// test multiple json.facet commands
|
// test multiple json.facet commands
|
||||||
assertJQ(req("q", "*:*", "rows", "0"
|
assertJQ(req("q", "*:*", "rows", "0"
|
||||||
, "json.facet", "{x:'sum(num_d)'}"
|
, "json.facet", "{x:'sum(num_d)'}"
|
||||||
, "json.facet", "{y:'min(num_d)'}"
|
, "json.facet", "{y:'min(num_d)'}"
|
||||||
|
, "json.facet", "{z:'min(num_is)'}"
|
||||||
)
|
)
|
||||||
, "facets=={count:6 , x:3.0, y:-9.0 }"
|
, "facets=={count:6 , x:3.0, y:-9.0, z:-9 }"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
@ -922,10 +938,11 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
|
|
||||||
// test nested streaming with stats under streaming
|
// test nested streaming with stats under streaming
|
||||||
assertJQ(req("q", "*:*", "rows", "0"
|
assertJQ(req("q", "*:*", "rows", "0"
|
||||||
, "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)'} }}} }}}"
|
, "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)', y:'sum(num_is)'} }}} }}}"
|
||||||
)
|
)
|
||||||
, "facets=={count:6 " +
|
, "facets=={count:6 " +
|
||||||
", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0},{val:NY,count:1,x:4.0}]} },{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0},{val:NY,count:1,x:-5.0}]} }]}"
|
", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0,y:5.0},{val:NY,count:1,x:4.0,y:6.0}]} }," +
|
||||||
|
"{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0,y:4.0},{val:NY,count:1,x:-5.0,y:-5.0}]} }]}"
|
||||||
+ "}"
|
+ "}"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1340,7 +1357,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }"
|
", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }"
|
||||||
);
|
);
|
||||||
|
|
||||||
// test sorting by missing stat with domain query
|
// test sorting by countvals stat with domain query
|
||||||
client.testJQ(params(p, "q", "-id:*"
|
client.testJQ(params(p, "q", "-id:*"
|
||||||
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" +
|
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" +
|
||||||
" , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }"
|
" , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }"
|
||||||
|
@ -1779,8 +1796,6 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// stats at top level
|
// stats at top level
|
||||||
client.testJQ(params(p, "q", "*:*"
|
client.testJQ(params(p, "q", "*:*"
|
||||||
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" +
|
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" +
|
||||||
|
@ -1799,6 +1814,73 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
"}"
|
"}"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// stats at top level on multi-valued fields
|
||||||
|
client.testJQ(params(p, "q", "*:*"
|
||||||
|
, "json.facet", "{ sum1:'sum(${num_fs})', sumsq1:'sumsq(${num_fs})', avg1:'avg(${num_fs})', mind:'min(${num_fs})', maxd:'max(${num_fs})'" +
|
||||||
|
", mini:'min(${num_is})', maxi:'max(${num_is})', mins:'min(${multi_ss})', maxs:'max(${multi_ss})'" +
|
||||||
|
", stddev:'stddev(${num_fs})', variance:'variance(${num_fs})', median:'percentile(${num_fs}, 50)'" +
|
||||||
|
", perc:'percentile(${num_fs}, 0,75,100)'" +
|
||||||
|
" }"
|
||||||
|
)
|
||||||
|
, "facets=={ 'count':6, " +
|
||||||
|
"sum1:0.0, sumsq1:51.5, avg1:0.0, mind:-5.0, maxd:3.0" +
|
||||||
|
", mini:-5, maxi:3, mins:'a', maxs:'b'" +
|
||||||
|
", stddev:2.537222891273055, variance:6.4375, median:0.0, perc:[-5.0,2.25,3.0]" +
|
||||||
|
"}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// test sorting by multi-valued
|
||||||
|
client.testJQ(params(p, "q", "*:*"
|
||||||
|
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'avg(${num_is})'} }}" +
|
||||||
|
" , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'avg(${num_is})'} }} }"
|
||||||
|
)
|
||||||
|
, "facets=={ 'count':6, " +
|
||||||
|
" f1:{ 'buckets':[{ val:'B', count:3, n1: 0.25}, { val:'A', count:2, n1:0.0}]}" +
|
||||||
|
", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:0.25 }]} }"
|
||||||
|
);
|
||||||
|
|
||||||
|
// test sorting by percentile
|
||||||
|
client.testJQ(params(p, "q", "*:*"
|
||||||
|
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'percentile(${num_is}, 50)'} }}" +
|
||||||
|
" , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'percentile(${num_is}, 50)'} }} }"
|
||||||
|
)
|
||||||
|
, "facets=={ 'count':6, " +
|
||||||
|
" f1:{ 'buckets':[{ val:'B', count:3, n1: -0.50}, { val:'A', count:2, n1:1.0}]}" +
|
||||||
|
", f2:{ 'buckets':[{ val:'A', count:2, n1:1.0}, { val:'B', count:3, n1:-0.50 }]} }"
|
||||||
|
);
|
||||||
|
|
||||||
|
// test sorting by multi-valued field with domain query
|
||||||
|
client.testJQ(params(p, "q", "-id:*"
|
||||||
|
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'sum(${num_is})'} }}" +
|
||||||
|
" , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'sum(${num_is})'} }} }"
|
||||||
|
)
|
||||||
|
, "facets=={ 'count':0, " +
|
||||||
|
" f1:{ 'buckets':[{ val:'B', count:3, n1:1.0 }, { val:'A', count:2, n1:0.0}]}" +
|
||||||
|
", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:1.0 }]} }"
|
||||||
|
);
|
||||||
|
|
||||||
|
client.testJQ(params(p, "q", "*:*"
|
||||||
|
, "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
|
||||||
|
"facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
|
||||||
|
"facet:{n1:'min(${multi_ss})'}}}}}}}"
|
||||||
|
)
|
||||||
|
, "facets=={ 'count':6, " +
|
||||||
|
" f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'a'},{val:'NY', count:1, n1:'a'}]} }," +
|
||||||
|
" { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" +
|
||||||
|
"}"
|
||||||
|
);
|
||||||
|
|
||||||
|
client.testJQ(params(p, "q", "*:*"
|
||||||
|
, "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
|
||||||
|
"facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
|
||||||
|
"facet:{n1:'max(${multi_ss})'}}}}}}}"
|
||||||
|
)
|
||||||
|
, "facets=={ 'count':6, " +
|
||||||
|
" f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'b'},{val:'NY', count:1, n1:'b'}]} }," +
|
||||||
|
" { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" +
|
||||||
|
"}"
|
||||||
|
);
|
||||||
|
|
||||||
// stats at top level, no matches
|
// stats at top level, no matches
|
||||||
client.testJQ(params(p, "q", "id:DOESNOTEXIST"
|
client.testJQ(params(p, "q", "id:DOESNOTEXIST"
|
||||||
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})'" +
|
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})'" +
|
||||||
|
|
Loading…
Reference in New Issue