From 12e8cca6442e6c64a414d832b3982cc77354c6ad Mon Sep 17 00:00:00 2001 From: Munendra S N Date: Thu, 5 Dec 2019 10:48:22 +0530 Subject: [PATCH] SOLR-11706: add support for aggregation on multivalued fields * min, max, sum, sumsq, avg, stddev, variance, percentile aggregations in JSON facets now supports multivalued fields --- solr/CHANGES.txt | 3 + .../apache/solr/search/ValueSourceParser.java | 31 +- .../org/apache/solr/search/facet/AggUtil.java | 53 ++++ .../org/apache/solr/search/facet/AvgAgg.java | 200 +++++++++++- .../solr/search/facet/DocValuesAcc.java | 211 +++++++++++++ .../apache/solr/search/facet/MinMaxAgg.java | 185 ++++++++++- .../solr/search/facet/PercentileAgg.java | 293 +++++++++++++++++- .../org/apache/solr/search/facet/SlotAcc.java | 21 +- .../apache/solr/search/facet/StddevAgg.java | 72 ++++- .../org/apache/solr/search/facet/SumAgg.java | 86 ++++- .../apache/solr/search/facet/SumsqAgg.java | 86 ++++- .../solr/search/facet/UnInvertedFieldAcc.java | 113 +++++++ .../apache/solr/search/facet/VarianceAgg.java | 72 ++++- .../solr/collection1/conf/schema.xml | 1 + .../handler/component/StatsComponentTest.java | 143 ++++----- ...utedFacetSimpleRefinementLongTailTest.java | 15 +- .../solr/search/facet/TestJsonFacets.java | 108 ++++++- 17 files changed, 1554 insertions(+), 139 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/AggUtil.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 4e213d32b45..9f9bc9f69a1 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -160,6 +160,9 @@ Improvements * SOLR-13968: Support postingsFormat and docValuesFormat in schema fields. (Bruno Roustant) +* SOLR-11706: Add support for aggregation on multivalued fields in JSON facets. min, max, avg, sum, sumsq, stddev, + variance, percentile aggregations now have support for multivalued fields. (hossman, Munendra S N) + Optimizations --------------------- (No changes) diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index 6eeff2b9c4e..64cadb6843f 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -985,35 +985,35 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin { addParser("agg_sum", new ValueSourceParser() { @Override public ValueSource parse(FunctionQParser fp) throws SyntaxError { - return new SumAgg(fp.parseValueSource()); + return new SumAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE)); } }); addParser("agg_avg", new ValueSourceParser() { @Override public ValueSource parse(FunctionQParser fp) throws SyntaxError { - return new AvgAgg(fp.parseValueSource()); + return new AvgAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE)); } }); addParser("agg_sumsq", new ValueSourceParser() { @Override public ValueSource parse(FunctionQParser fp) throws SyntaxError { - return new SumsqAgg(fp.parseValueSource()); + return new SumsqAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE)); } }); addParser("agg_variance", new ValueSourceParser() { @Override public ValueSource parse(FunctionQParser fp) throws SyntaxError { - return new VarianceAgg(fp.parseValueSource()); + return new VarianceAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE)); } }); addParser("agg_stddev", new ValueSourceParser() { @Override public ValueSource parse(FunctionQParser fp) throws SyntaxError { - return new StddevAgg(fp.parseValueSource()); + return new StddevAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE)); } }); @@ -1054,7 +1054,26 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin { } }); - addParser("agg_percentile", new PercentileAgg.Parser()); + addParser("agg_percentile", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + List percentiles = new ArrayList<>(); + ValueSource vs = fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE); + while (fp.hasMoreArguments()) { + double val = fp.parseDouble(); + if (val<0 || val>100) { + throw new SyntaxError("requested percentile must be between 0 and 100. got " + val); + } + percentiles.add(val); + } + + if (percentiles.isEmpty()) { + throw new SyntaxError("expected percentile(valsource,percent1[,percent2]*) EXAMPLE:percentile(myfield,50)"); + } + + return new PercentileAgg(vs, percentiles); + } + }); addParser("agg_" + RelatednessAgg.NAME, new ValueSourceParser() { @Override diff --git a/solr/core/src/java/org/apache/solr/search/facet/AggUtil.java b/solr/core/src/java/org/apache/solr/search/facet/AggUtil.java new file mode 100644 index 00000000000..3370fd9412a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/AggUtil.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.facet; + + +public class AggUtil { + + private AggUtil() { + } + + /** + * Computes and returns average for given sum and count + */ + public static double avg(double sum, long count) { + // todo: should we return NAN when count==0? + return count == 0? 0.0d: sum / count; + } + + /** + * Computes and returns uncorrected standard deviation for given values + */ + public static double stdDev(double sumSq, double sum, long count) { + // todo: switch to corrected stddev SOLR-11725 + // todo: should we return NAN when count==0? + double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2)); + return val; + } + + /** + * Computes and returns uncorrected variance for given values + */ + public static double variance(double sumSq, double sum, long count) { + // todo: switch to corrected variance SOLR-11725 + // todo: should we return NAN when count==0? + double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2); + return val; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java index ebc6459dbab..716bb805e41 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java @@ -17,9 +17,18 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; import java.util.List; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.function.FieldNameValueSource; public class AvgAgg extends SimpleAggValueSource { @@ -29,7 +38,31 @@ public class AvgAgg extends SimpleAggValueSource { @Override public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - return new AvgSlotAcc(getArg(), fcontext, numSlots); + ValueSource vs = getArg(); + + if (vs instanceof FieldNameValueSource) { + String field = ((FieldNameValueSource) vs).getFieldName(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field); + if (sf.getType().getNumberType() == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for " + sf.getType().getTypeName()); + } + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + if (sf.hasDocValues()) { + if (sf.getType().isPointField()) { + return new AvgSortedNumericAcc(fcontext, sf, numSlots); + } + return new AvgSortedSetAcc(fcontext, sf, numSlots); + } + if (sf.getType().isPointField()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for PointField w/o docValues"); + } + return new AvgUnInvertedFieldAcc(fcontext, sf, numSlots); + } + vs = sf.getType().getValueSource(sf, null); + } + return new AvgSlotAcc(vs, fcontext, numSlots); } @Override @@ -43,7 +76,7 @@ public class AvgAgg extends SimpleAggValueSource { @Override public void merge(Object facetResult, Context mcontext1) { - List numberList = (List)facetResult; + List numberList = (List) facetResult; num += numberList.get(0).longValue(); sum += numberList.get(1).doubleValue(); } @@ -51,8 +84,167 @@ public class AvgAgg extends SimpleAggValueSource { @Override protected double getDouble() { // TODO: is it worth to try and cache? - return num==0 ? 0.0d : sum/num; + return AggUtil.avg(sum, num); + } + } + + class AvgSortedNumericAcc extends DoubleSortedNumericDVAcc { + int[] counts; + + public AvgSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + this.counts = new int[numSlots]; } - }; + @Override + protected void collectValues(int doc, int slot) throws IOException { + for (int i = 0, count = values.docValueCount(); i < count; i++) { + result[slot]+=getDouble(values.nextValue()); + counts[slot]++; + } + } + + private double avg(int slot) { + return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result? + } + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(avg(slotA), avg(slotB)); + } + + @Override + public Object getValue(int slot) { + if (fcontext.isShard()) { + ArrayList lst = new ArrayList(2); + lst.add(counts[slot]); + lst.add(result[slot]); + return lst; + } else { + return avg(slot); + } + } + + @Override + public void reset() throws IOException { + super.reset(); + Arrays.fill(counts, 0); + } + + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + resizer.resize(counts, 0); + } + } + + class AvgSortedSetAcc extends DoubleSortedSetDVAcc { + int[] counts; + + public AvgSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + this.counts = new int[numSlots]; + } + + @Override + protected void collectValues(int doc, int slot) throws IOException { + long ord; + while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + BytesRef term = values.lookupOrd(ord); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[slot] += val; + counts[slot]++; + } + } + + private double avg(int slot) { + return AggUtil.avg(result[slot], counts[slot]); + } + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(avg(slotA), avg(slotB)); + } + + @Override + public Object getValue(int slot) { + if (fcontext.isShard()) { + ArrayList lst = new ArrayList(2); + lst.add(counts[slot]); + lst.add(result[slot]); + return lst; + } else { + return avg(slot); + } + } + + @Override + public void reset() throws IOException { + super.reset(); + Arrays.fill(counts, 0); + } + + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + resizer.resize(counts, 0); + } + } + + class AvgUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc { + int[] counts; + + public AvgUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + this.counts = new int[numSlots]; + } + + @Override + public void call(int termNum) { + try { + BytesRef term = docToTerm.lookupOrd(termNum); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[currentSlot] += val; + counts[currentSlot]++; + } catch (IOException e) { + // find a better way to do it + throw new UncheckedIOException(e); + } + } + + private double avg(int slot) { + return AggUtil.avg(result[slot], counts[slot]); + } + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(avg(slotA), avg(slotB)); + } + + @Override + public Object getValue(int slot) { + if (fcontext.isShard()) { + ArrayList lst = new ArrayList(2); + lst.add(counts[slot]); + lst.add(result[slot]); + return lst; + } else { + return avg(slot); + } + } + + @Override + public void reset() throws IOException { + super.reset(); + Arrays.fill(counts, 0); + } + + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + resizer.resize(counts, 0); + } + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java b/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java index e3740e7485b..2bd07a4dfc6 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java @@ -18,7 +18,9 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Date; import java.util.function.IntFunction; import org.apache.lucene.index.DocValues; @@ -28,6 +30,8 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import org.apache.solr.schema.SchemaField; /** @@ -139,6 +143,118 @@ abstract class LongSortedNumericDVAcc extends SortedNumericDVAcc { } +abstract class DoubleSortedNumericDVAcc extends SortedNumericDVAcc { + double[] result; + double initialValue; + + public DoubleSortedNumericDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException { + super(fcontext, sf, numSlots); + this.result = new double[numSlots]; + this.initialValue = initialValue; + if (initialValue != 0) { + Arrays.fill(result, initialValue); + } + } + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(result[slotA], result[slotB]); + } + + @Override + public Object getValue(int slotNum) throws IOException { + return result[slotNum]; + } + + @Override + public void reset() throws IOException { + Arrays.fill(result, initialValue); + } + + @Override + public void resize(Resizer resizer) { + resizer.resize(result, initialValue); + } + + /** + * converts given long value to double based on field type + */ + protected double getDouble(long val) { + switch (sf.getType().getNumberType()) { + case INTEGER: + case LONG: + case DATE: + return val; + case FLOAT: + return NumericUtils.sortableIntToFloat((int) val); + case DOUBLE: + return NumericUtils.sortableLongToDouble(val); + default: + // this would never happen + return 0.0d; + } + } + +} + +/** + * Base class for standard deviation and variance computation for fields with {@link SortedNumericDocValues} + */ +abstract class SDVSortedNumericAcc extends DoubleSortedNumericDVAcc { + int[] counts; + double[] sum; + + public SDVSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + this.counts = new int[numSlots]; + this.sum = new double[numSlots]; + } + + @Override + protected void collectValues(int doc, int slot) throws IOException { + for (int i = 0, count = values.docValueCount(); i < count; i++) { + double val = getDouble(values.nextValue()); + result[slot]+= val * val; + sum[slot]+= val; + counts[slot]++; + } + } + + protected abstract double computeVal(int slot); + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(computeVal(slotA), computeVal(slotB)); + } + + @Override + public Object getValue(int slot) { + if (fcontext.isShard()) { + ArrayList lst = new ArrayList(3); + lst.add(counts[slot]); + lst.add(result[slot]); + lst.add(sum[slot]); + return lst; + } else { + return computeVal(slot); + } + } + + @Override + public void reset() throws IOException { + super.reset(); + Arrays.fill(counts, 0); + Arrays.fill(sum, 0); + } + + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + resizer.resize(counts, 0); + resizer.resize(sum, 0); + } +} + /** * Accumulator for {@link SortedDocValues} */ @@ -216,3 +332,98 @@ abstract class LongSortedSetDVAcc extends SortedSetDVAcc { resizer.resize(result, initialValue); } } + +abstract class DoubleSortedSetDVAcc extends SortedSetDVAcc { + double[] result; + double initialValue; + + public DoubleSortedSetDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, long initialValue) throws IOException { + super(fcontext, sf, numSlots); + result = new double[numSlots]; + this.initialValue = initialValue; + if (initialValue != 0) { + Arrays.fill(result, initialValue); + } + } + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(result[slotA], result[slotB]); + } + + @Override + public Object getValue(int slotNum) throws IOException { + return result[slotNum]; + } + + @Override + public void reset() throws IOException { + Arrays.fill(result, initialValue); + } + + @Override + public void resize(Resizer resizer) { + resizer.resize(result, initialValue); + } +} + +/** + * Base class for standard deviation and variance computation for fields with {@link SortedSetDocValues} + */ +abstract class SDVSortedSetAcc extends DoubleSortedSetDVAcc { + int[] counts; + double[] sum; + + public SDVSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + this.counts = new int[numSlots]; + this.sum = new double[numSlots]; + } + + @Override + protected void collectValues(int doc, int slot) throws IOException { + long ord; + while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + BytesRef term = values.lookupOrd(ord); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[slot] += val * val; + sum[slot] += val; + counts[slot]++; + } + } + + protected abstract double computeVal(int slot); + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(computeVal(slotA), computeVal(slotB)); + } + + @Override + public Object getValue(int slot) { + if (fcontext.isShard()) { + ArrayList lst = new ArrayList(3); + lst.add(counts[slot]); + lst.add(result[slot]); + lst.add(sum[slot]); + return lst; + } else { + return computeVal(slot); + } + } + + @Override + public void reset() throws IOException { + super.reset(); + Arrays.fill(counts, 0); + Arrays.fill(sum, 0); + } + + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + resizer.resize(counts, 0); + resizer.resize(sum, 0); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java index 1c961e06279..0d7d86383c2 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java @@ -25,11 +25,15 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LongValues; import org.apache.solr.common.SolrException; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.NumberType; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrFieldSource; import org.apache.solr.search.function.FieldNameValueSource; @@ -53,8 +57,21 @@ public class MinMaxAgg extends SimpleAggValueSource { sf = fcontext.qcontext.searcher().getSchema().getField(field); if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { - vs = null; - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "min/max aggregations can't be used on multi-valued field " + field); + if (sf.hasDocValues()) { + if(sf.getType().getNumberType() != null) { + FieldType.MultiValueSelector choice = minmax == 1 ? FieldType.MultiValueSelector.MIN : FieldType.MultiValueSelector.MAX; + vs = sf.getType().getSingleValueSource(choice, sf, null); + } else { + // multi-valued strings + return new MinMaxSortedSetDVAcc(fcontext, sf, numSlots); + } + } else { + if (sf.getType().isPointField()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "min/max aggregations can't be used on PointField w/o DocValues"); + } + return new MinMaxUnInvertedFieldAcc(fcontext, sf, numSlots); + } } else { vs = sf.getType().getValueSource(sf, null); } @@ -137,6 +154,80 @@ public class MinMaxAgg extends SimpleAggValueSource { } } + class MinMaxUnInvertedFieldAcc extends UnInvertedFieldAcc { + final static int MISSING = -1; + private int currentSlot; + int[] result; + + public MinMaxUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots); + result = new int[numSlots]; + Arrays.fill(result, MISSING); + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + this.currentSlot = slot; + docToTerm.getBigTerms(doc + currentDocBase, this); + docToTerm.getSmallTerms(doc + currentDocBase, this); + } + + @Override + public int compare(int slotA, int slotB) { + int a = result[slotA]; + int b = result[slotB]; + return a == MISSING ? -1: (b == MISSING? 1: Integer.compare(a, b)); + } + + @Override + public Object getValue(int slotNum) throws IOException { + int ord = result[slotNum]; + if (ord == MISSING) return null; + BytesRef term = docToTerm.lookupOrd(ord); + return getObject(term); + } + + /** + * Wrapper to convert stored format to external format. + *

+ * This ensures consistent behavior like other accumulators where + * long is returned for integer field types and double is returned for float field types + *

+ */ + private Object getObject(BytesRef term) { + Object obj = sf.getType().toObject(sf, term); + NumberType type = sf.getType().getNumberType(); + if (type == null) { + return obj; + } else if (type == NumberType.INTEGER) { + // this is to ensure consistent behavior with other accumulators + // where long is returned for integer field types + return ((Number)obj).longValue(); + } else if (type == NumberType.FLOAT) { + return ((Number)obj).floatValue(); + } + return obj; + } + + @Override + public void reset() throws IOException { + Arrays.fill(result, MISSING); + } + + @Override + public void resize(Resizer resizer) { + resizer.resize(result, MISSING); + } + + @Override + public void call(int termNum) { + int currOrd = result[currentSlot]; + if (currOrd == MISSING || Integer.compare(termNum, currOrd) * minmax < 0) { + result[currentSlot] = termNum; + } + } + } + class DFuncAcc extends DoubleFuncSlotAcc { public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) { super(values, fcontext, numSlots, Double.NaN); @@ -291,7 +382,6 @@ public class MinMaxAgg extends SimpleAggValueSource { } } - class SingleValuedOrdAcc extends OrdAcc { SortedDocValues topLevel; SortedDocValues[] subDvs; @@ -346,5 +436,94 @@ public class MinMaxAgg extends SimpleAggValueSource { } } + class MinMaxSortedSetDVAcc extends DocValuesAcc { + final static int MISSING = -1; + SortedSetDocValues topLevel; + SortedSetDocValues[] subDvs; + OrdinalMap ordMap; + LongValues toGlobal; + SortedSetDocValues subDv; + long[] slotOrd; + public MinMaxSortedSetDVAcc(FacetContext fcontext, SchemaField field, int numSlots) throws IOException { + super(fcontext, field); + this.slotOrd = new long[numSlots]; + Arrays.fill(slotOrd, MISSING); + } + + @Override + public void resetIterators() throws IOException { + super.resetIterators(); + topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null); + if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) { + ordMap = ((MultiDocValues.MultiSortedSetDocValues)topLevel).mapping; + subDvs = ((MultiDocValues.MultiSortedSetDocValues)topLevel).values; + } else { + ordMap = null; + subDvs = null; + } + } + + @Override + public void setNextReader(LeafReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + if (subDvs != null) { + subDv = subDvs[readerContext.ord]; + toGlobal = ordMap.getGlobalOrds(readerContext.ord); + assert toGlobal != null; + } else { + assert readerContext.ord==0 || topLevel.getValueCount() == 0; + subDv = topLevel; + } + } + + @Override + public int compare(int slotA, int slotB) { + long a = slotOrd[slotA]; + long b = slotOrd[slotB]; + return a == MISSING ? -1: (b == MISSING? 1: Long.compare(a, b)); + } + + @Override + public Object getValue(int slotNum) throws IOException { + long ord = slotOrd[slotNum]; + if (ord == MISSING) return null; + BytesRef term = topLevel.lookupOrd(ord); + return sf.getType().toObject(sf, term); + } + + @Override + public void reset() throws IOException { + Arrays.fill(slotOrd, MISSING); + } + + @Override + public void resize(Resizer resizer) { + resizer.resize(slotOrd, MISSING); + } + + @Override + public void collectValues(int doc, int slotNum) throws IOException { + long newOrd = MISSING; + if (minmax == 1) {// min + newOrd = subDv.nextOrd(); + } else { // max + long ord; + while ((ord = subDv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + newOrd = ord; + } + } + + long currOrd = slotOrd[slotNum]; + long finalOrd = toGlobal==null ? newOrd : toGlobal.get(newOrd); + if (currOrd == MISSING || Long.compare(finalOrd, currOrd) * minmax < 0) { + slotOrd[slotNum] = finalOrd; + } + } + + @Override + protected DocIdSetIterator docIdSetIterator() { + return subDv; + } + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java index efdef553a80..c298fd1dec5 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java @@ -17,17 +17,29 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; +import java.util.Date; import java.util.List; import java.util.function.IntFunction; import com.tdunning.math.stats.AVLTreeDigest; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; +import org.apache.solr.common.SolrException; +import org.apache.solr.schema.SchemaField; import org.apache.solr.search.FunctionQParser; import org.apache.solr.search.SyntaxError; import org.apache.solr.search.ValueSourceParser; +import org.apache.solr.search.function.FieldNameValueSource; public class PercentileAgg extends SimpleAggValueSource { List percentiles; @@ -39,7 +51,31 @@ public class PercentileAgg extends SimpleAggValueSource { @Override public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - return new Acc(getArg(), fcontext, numSlots); + ValueSource vs = getArg(); + + if (vs instanceof FieldNameValueSource) { + String field = ((FieldNameValueSource) vs).getFieldName(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field); + if (sf.getType().getNumberType() == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for " + sf.getType().getTypeName()); + } + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + if (sf.hasDocValues()) { + if (sf.getType().isPointField()) { + return new PercentileSortedNumericAcc(fcontext, sf, numSlots); + } + return new PercentileSortedSetAcc(fcontext, sf, numSlots); + } + if (sf.getType().isPointField()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for PointField w/o docValues"); + } + return new PercentileUnInvertedFieldAcc(fcontext, sf, numSlots); + } + vs = sf.getType().getValueSource(sf, null); + } + return new Acc(vs, fcontext, numSlots); } @Override @@ -80,7 +116,6 @@ public class PercentileAgg extends SimpleAggValueSource { } } - protected Object getValueFromDigest(AVLTreeDigest digest) { if (digest == null) { return null; @@ -90,7 +125,7 @@ public class PercentileAgg extends SimpleAggValueSource { return digest.quantile( percentiles.get(0) * 0.01 ); } - List lst = new ArrayList(percentiles.size()); + List lst = new ArrayList<>(percentiles.size()); for (Double percentile : percentiles) { double val = digest.quantile( percentile * 0.01 ); lst.add( val ); @@ -98,8 +133,6 @@ public class PercentileAgg extends SimpleAggValueSource { return lst; } - - class Acc extends FuncSlotAcc { protected AVLTreeDigest[] digests; protected ByteBuffer buf; @@ -155,6 +188,76 @@ public class PercentileAgg extends SimpleAggValueSource { return getValueFromDigest( digests[slotNum] ); } + public Object getShardValue(int slot) throws IOException { + AVLTreeDigest digest = digests[slot]; + if (digest == null) return null; // no values for this slot + + digest.compress(); + int sz = digest.byteSize(); + if (buf == null || buf.capacity() < sz) { + buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50% + } else { + buf.clear(); + } + digest.asSmallBytes(buf); + byte[] arr = Arrays.copyOf(buf.array(), buf.position()); + return arr; + } + + @Override + public void reset() { + digests = new AVLTreeDigest[digests.length]; + sortvals = null; + } + + @Override + public void resize(Resizer resizer) { + digests = resizer.resize(digests, null); + } + } + + abstract class BasePercentileDVAcc extends DocValuesAcc { + AVLTreeDigest[] digests; + protected ByteBuffer buf; + double[] sortvals; + + public BasePercentileDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf); + digests = new AVLTreeDigest[numSlots]; + } + + @Override + public int compare(int slotA, int slotB) { + if (sortvals == null) { + fillSortVals(); + } + return Double.compare(sortvals[slotA], sortvals[slotB]); + } + + private void fillSortVals() { + sortvals = new double[ digests.length ]; + double sortp = percentiles.get(0) * 0.01; + for (int i=0; i slotContext) throws IOException { + this.currentSlot = slot; + docToTerm.getBigTerms(doc + currentDocBase, this); + docToTerm.getSmallTerms(doc + currentDocBase, this); + } + + @Override + public int compare(int slotA, int slotB) { + if (sortvals == null) { + fillSortVals(); + } + return Double.compare(sortvals[slotA], sortvals[slotB]); + } + + private void fillSortVals() { + sortvals = new double[ digests.length ]; + double sortp = percentiles.get(0) * 0.01; + for (int i=0; i>1)); // oversize by 50% + } else { + buf.clear(); + } + digest.asSmallBytes(buf); + byte[] arr = Arrays.copyOf(buf.array(), buf.position()); + return arr; + } + + @Override + public void reset() { + digests = new AVLTreeDigest[digests.length]; + sortvals = null; + } + + @Override + public void resize(Resizer resizer) { + digests = resizer.resize(digests, null); + } + + @Override + public void call(int ord) { + AVLTreeDigest digest = digests[currentSlot]; + if (digest == null) { + digests[currentSlot] = digest = new AVLTreeDigest(100); + } + try { + BytesRef term = docToTerm.lookupOrd(ord); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date ? ((Date) obj).getTime() : ((Number) obj).doubleValue(); + digest.add(val); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } class Merger extends FacetSortableMerger { protected AVLTreeDigest digest; @@ -221,4 +501,3 @@ public class PercentileAgg extends SimpleAggValueSource { } } } - diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java index 244485e4991..2a9234839dd 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java @@ -431,13 +431,8 @@ class AvgSlotAcc extends DoubleFuncSlotAcc { } } - private double avg(double tot, int count) { - return count == 0 ? 0 : tot / count; // returns 0 instead of NaN.. todo - make configurable? if NaN, we need to - // handle comparisons though... - } - private double avg(int slot) { - return avg(result[slot], counts[slot]); // calc once and cache in result? + return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result? } @Override @@ -488,13 +483,8 @@ class VarianceSlotAcc extends DoubleFuncSlotAcc { this.sum = resizer.resize(this.sum, 0); } - private double variance(double sumSq, double sum, int count) { - double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2); - return val; - } - private double variance(int slot) { - return variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result? } @Override @@ -550,13 +540,8 @@ class StddevSlotAcc extends DoubleFuncSlotAcc { this.result = resizer.resize(this.result, 0); } - private double stdDev(double sumSq, double sum, int count) { - double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2)); - return val; - } - private double stdDev(int slot) { - return stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result? } @Override diff --git a/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java b/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java index 917df6e2872..d7237d39108 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.util.List; import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.SolrException; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.function.FieldNameValueSource; public class StddevAgg extends SimpleAggValueSource { @@ -30,7 +33,31 @@ public class StddevAgg extends SimpleAggValueSource { @Override public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - return new StddevSlotAcc(getArg(), fcontext, numSlots); + ValueSource vs = getArg(); + + if (vs instanceof FieldNameValueSource) { + String field = ((FieldNameValueSource) vs).getFieldName(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field); + if (sf.getType().getNumberType() == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for " + sf.getType().getTypeName()); + } + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + if (sf.hasDocValues()) { + if (sf.getType().isPointField()) { + return new StddevSortedNumericAcc(fcontext, sf, numSlots); + } + return new StddevSortedSetAcc(fcontext, sf, numSlots); + } + if (sf.getType().isPointField()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for PointField w/o docValues"); + } + return new StddevUnInvertedFieldAcc(fcontext, sf, numSlots); + } + vs = sf.getType().getValueSource(sf, null); + } + return new StddevSlotAcc(vs, fcontext, numSlots); } @Override @@ -58,9 +85,44 @@ public class StddevAgg extends SimpleAggValueSource { } @Override - protected double getDouble() { - double val = count == 0 ? 0.0d : Math.sqrt((sumSq/count)-Math.pow(sum/count, 2)); - return val; + protected double getDouble() { + return AggUtil.stdDev(sumSq, sum, count); } - }; + } + + class StddevSortedNumericAcc extends SDVSortedNumericAcc { + + public StddevSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots); + } + + @Override + protected double computeVal(int slot) { + return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + } + } + + class StddevSortedSetAcc extends SDVSortedSetAcc { + + public StddevSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots); + } + + @Override + protected double computeVal(int slot) { + return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + } + } + + class StddevUnInvertedFieldAcc extends SDVUnInvertedFieldAcc { + + public StddevUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots); + } + + @Override + protected double computeVal(int slot) { + return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + } + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java index 7b7f34b5515..7cd4b9d441d 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java @@ -17,8 +17,15 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Date; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.function.FieldNameValueSource; public class SumAgg extends SimpleAggValueSource { @@ -28,7 +35,31 @@ public class SumAgg extends SimpleAggValueSource { @Override public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - return new SumSlotAcc(getArg(), fcontext, numSlots); + ValueSource vs = getArg(); + + if (vs instanceof FieldNameValueSource) { + String field = ((FieldNameValueSource)vs).getFieldName(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field); + if (sf.getType().getNumberType() == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for " + sf.getType().getTypeName()); + } + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + if (sf.hasDocValues()) { + if (sf.getType().isPointField()) { + return new SumSortedNumericAcc(fcontext, sf, numSlots); + } + return new SumSortedSetAcc(fcontext, sf, numSlots); + } + if (sf.getType().isPointField()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for PointField w/o docValues"); + } + return new SumUnInvertedFieldAcc(fcontext, sf, numSlots); + } + vs = sf.getType().getValueSource(sf, null); + } + return new SumSlotAcc(vs, fcontext, numSlots); } @Override @@ -48,5 +79,58 @@ public class SumAgg extends SimpleAggValueSource { return val; } } + + class SumSortedNumericAcc extends DoubleSortedNumericDVAcc { + + public SumSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + } + + @Override + protected void collectValues(int doc, int slot) throws IOException { + for (int i = 0, count = values.docValueCount(); i < count; i++) { + result[slot]+=getDouble(values.nextValue()); + } + } + + } + + class SumSortedSetAcc extends DoubleSortedSetDVAcc { + + public SumSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + } + + @Override + protected void collectValues(int doc, int slot) throws IOException { + long ord; + while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + BytesRef term = values.lookupOrd(ord); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[slot] += val; + } + } + } + + class SumUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc { + + public SumUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + } + + @Override + public void call(int termNum) { + try { + BytesRef term = docToTerm.lookupOrd(termNum); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[currentSlot] += val; + } catch (IOException e) { + // find a better way to do it + throw new UncheckedIOException(e); + } + } + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java index 732ab14c1e0..133e39c4b75 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java @@ -17,8 +17,15 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Date; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.function.FieldNameValueSource; public class SumsqAgg extends SimpleAggValueSource { public SumsqAgg(ValueSource vs) { @@ -27,11 +34,88 @@ public class SumsqAgg extends SimpleAggValueSource { @Override public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - return new SumsqSlotAcc(getArg(), fcontext, numSlots); + ValueSource vs = getArg(); + + if (vs instanceof FieldNameValueSource) { + String field = ((FieldNameValueSource)vs).getFieldName(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field); + if (sf.getType().getNumberType() == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for " + sf.getType().getTypeName()); + } + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + if (sf.hasDocValues()) { + if (sf.getType().isPointField()) { + return new SumSqSortedNumericAcc(fcontext, sf, numSlots); + } + return new SumSqSortedSetAcc(fcontext, sf, numSlots); + } + if (sf.getType().isPointField()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for PointField w/o docValues"); + } + return new SumSqUnInvertedFieldAcc(fcontext, sf, numSlots); + } + vs = sf.getType().getValueSource(sf, null); + } + return new SumsqSlotAcc(vs, fcontext, numSlots); } @Override public FacetMerger createFacetMerger(Object prototype) { return new SumAgg.Merger(); } + + class SumSqSortedNumericAcc extends DoubleSortedNumericDVAcc { + + public SumSqSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + } + + @Override + protected void collectValues(int doc, int slot) throws IOException { + for (int i = 0, count = values.docValueCount(); i < count; i++) { + double val = getDouble(values.nextValue()); + result[slot]+= val * val; + } + } + } + + class SumSqSortedSetAcc extends DoubleSortedSetDVAcc { + + public SumSqSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + } + + @Override + protected void collectValues(int doc, int slot) throws IOException { + long ord; + while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + BytesRef term = values.lookupOrd(ord); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[slot] += val * val; + } + } + } + + class SumSqUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc { + + public SumSqUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + } + + @Override + public void call(int termNum) { + try { + BytesRef term = docToTerm.lookupOrd(termNum); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[currentSlot] += val * val; + } catch (IOException e) { + // find a better way to do it + throw new UncheckedIOException(e); + } + } + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java index 3230d385d84..7f2d9eb56da 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java @@ -18,7 +18,13 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.function.IntFunction; +import org.apache.lucene.util.BytesRef; import org.apache.solr.schema.SchemaField; /** @@ -28,9 +34,11 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi UnInvertedField uif; UnInvertedField.DocToTerm docToTerm; + SchemaField sf; public UnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { super(fcontext); + this.sf = sf; uif = UnInvertedField.getUnInvertedField(sf.getName(), fcontext.qcontext.searcher()); docToTerm = uif.new DocToTerm(); fcontext.qcontext.addCloseHook(this); @@ -44,3 +52,108 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi } } } + +abstract class DoubleUnInvertedFieldAcc extends UnInvertedFieldAcc { + double[] result; + int currentSlot; + double initialValue; + + public DoubleUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException { + super(fcontext, sf, numSlots); + result = new double[numSlots]; + if (initialValue != 0) { + this.initialValue = initialValue; + Arrays.fill(result, initialValue); + } + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + this.currentSlot = slot; + docToTerm.getBigTerms(doc + currentDocBase, this); + docToTerm.getSmallTerms(doc + currentDocBase, this); + } + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(result[slotA], result[slotB]); + } + + @Override + public Object getValue(int slotNum) throws IOException { + return result[slotNum]; + } + + @Override + public void reset() throws IOException { + Arrays.fill(result, initialValue); + } + + @Override + public void resize(Resizer resizer) { + resizer.resize(result, initialValue); + } +} + +/** + * Base accumulator to compute standard deviation and variance for uninvertible fields + */ +abstract class SDVUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc { + int[] counts; + double[] sum; + + public SDVUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots, 0); + this.counts = new int[numSlots]; + this.sum = new double[numSlots]; + } + + @Override + public void call(int termNum) { + try { + BytesRef term = docToTerm.lookupOrd(termNum); + Object obj = sf.getType().toObject(sf, term); + double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue(); + result[currentSlot] += val * val; + sum[currentSlot]+= val; + counts[currentSlot]++; + } catch (IOException e) { + // find a better way to do it + throw new UncheckedIOException(e); + } + } + + protected abstract double computeVal(int slot); + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(computeVal(slotA), computeVal(slotB)); + } + + @Override + public Object getValue(int slot) { + if (fcontext.isShard()) { + ArrayList lst = new ArrayList(3); + lst.add(counts[slot]); + lst.add(result[slot]); + lst.add(sum[slot]); + return lst; + } else { + return computeVal(slot); + } + } + + @Override + public void reset() throws IOException { + super.reset(); + Arrays.fill(counts, 0); + Arrays.fill(sum, 0); + } + + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + resizer.resize(counts, 0); + resizer.resize(sum, 0); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java b/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java index ec6955f4663..f04a073e3ff 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java @@ -20,6 +20,9 @@ import java.io.IOException; import java.util.List; import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.SolrException; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.function.FieldNameValueSource; public class VarianceAgg extends SimpleAggValueSource { @@ -29,7 +32,31 @@ public class VarianceAgg extends SimpleAggValueSource { @Override public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - return new VarianceSlotAcc(getArg(), fcontext, numSlots); + ValueSource vs = getArg(); + + if (vs instanceof FieldNameValueSource) { + String field = ((FieldNameValueSource) vs).getFieldName(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field); + if (sf.getType().getNumberType() == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for " + sf.getType().getTypeName()); + } + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + if (sf.hasDocValues()) { + if (sf.getType().isPointField()) { + return new VarianceSortedNumericAcc(fcontext, sf, numSlots); + } + return new VarianceSortedSetAcc(fcontext, sf, numSlots); + } + if (sf.getType().isPointField()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + name() + " aggregation not supported for PointField w/o docValues"); + } + return new VarianceUnInvertedFieldAcc(fcontext, sf, numSlots); + } + vs = sf.getType().getValueSource(sf, null); + } + return new VarianceSlotAcc(vs, fcontext, numSlots); } @Override @@ -57,9 +84,44 @@ public class VarianceAgg extends SimpleAggValueSource { } @Override - protected double getDouble() { - double val = count == 0 ? 0.0d : (sumSq/count)-Math.pow(sum/count, 2); - return val; + protected double getDouble() { + return AggUtil.variance(sumSq, sum, count); } - }; + } + + class VarianceSortedNumericAcc extends SDVSortedNumericAcc { + + public VarianceSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots); + } + + @Override + protected double computeVal(int slot) { + return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + } + } + + class VarianceSortedSetAcc extends SDVSortedSetAcc { + + public VarianceSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots); + } + + @Override + protected double computeVal(int slot) { + return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + } + } + + class VarianceUnInvertedFieldAcc extends SDVUnInvertedFieldAcc { + + public VarianceUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException { + super(fcontext, sf, numSlots); + } + + @Override + protected double computeVal(int slot) { + return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result? + } + } } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml index d5cf09035f4..5c3d48303a6 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml @@ -694,6 +694,7 @@ --> + diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java index 3fffc30900c..4df839be339 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java @@ -18,22 +18,26 @@ package org.apache.solr.handler.component; import java.nio.ByteBuffer; import java.text.DateFormat; import java.text.SimpleDateFormat; -import java.util.Arrays; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Date; -import java.util.Iterator; import java.util.EnumSet; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.TimeZone; +import com.google.common.hash.HashFunction; +import com.tdunning.math.stats.AVLTreeDigest; +import org.apache.commons.math3.util.Combinations; import org.apache.lucene.index.Term; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.queries.function.valuesource.QueryValueSource; +import org.apache.lucene.search.TermQuery; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.CommonParams; @@ -44,23 +48,17 @@ import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.StrUtils; import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.component.StatsField.Stat; import org.apache.solr.handler.component.StatsField.HllOptions; +import org.apache.solr.handler.component.StatsField.Stat; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.SchemaField; -import org.apache.solr.SolrTestCaseJ4; - -import org.apache.commons.math3.util.Combinations; -import com.tdunning.math.stats.AVLTreeDigest; -import com.google.common.hash.HashFunction; import org.apache.solr.util.hll.HLL; - import org.junit.BeforeClass; /** - * Statistics Component Test + * Statistics Component Test (which also checks some equivalent json.facet functionality) */ public class StatsComponentTest extends SolrTestCaseJ4 { @@ -116,7 +114,7 @@ public class StatsComponentTest extends SolrTestCaseJ4 { "stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv", // Doc Values Not indexed "stats_is_p", "stats_fs_p", "stats_ls_p", "stats_ds_p", // Point Fields "stats_is_ni_p","stats_fs_ni_p","stats_ls_ni_p" // Point Doc Values Not indexed - }) { + }) { doTestMVFieldStatisticsResult(f); clearIndex(); @@ -298,87 +296,90 @@ public class StatsComponentTest extends SolrTestCaseJ4 { "fq", "{!tag=fq1}id:1"), params("stats.field", "{!ex=fq1,fq2}"+f, "stats", "true", "fq", "{!tag=fq1}-id_i:[0 TO 2]", - "fq", "{!tag=fq2}-id_i:[2 TO 1000]") }) { - - + "fq", "{!tag=fq2}-id_i:[2 TO 1000]"), + params("json.facet", // note: no distinctValues support and not comparing min/max values + "{min:'min("+f+")',count:'countvals("+f+")',missing:'missing("+f+")',max:'max("+f+")', sum:'sum("+f+")', " + + " countDistinct:'unique("+f+")', sumOfSquares:'sumsq("+f+")', mean:'avg("+f+")', stddev:'stddev("+f+")' }") + }) { + // easy switch to know if/when we are using json.facet which doesn't support some options + final boolean json = (null != baseParams.get("json.facet")); assertQ("test statistics values", req(baseParams, "q", "*:*", "stats.calcdistinct", "true") - , "//double[@name='min'][.='-100.0']" - , "//double[@name='max'][.='200.0']" + , json ? "//*" : "//double[@name='min'][.='-100.0']" + , json ? "//*" : "//double[@name='max'][.='200.0']" , "//double[@name='sum'][.='9.0']" , "//long[@name='count'][.='8']" , "//long[@name='missing'][.='3']" - , "//long[@name='countDistinct'][.='8']" - , "count(//arr[@name='distinctValues']/*)=8" + , json ? "//int[@name='countDistinct'][.='8']": "//long[@name='countDistinct'][.='8']" // SOLR-11775 + , json ? "//*" : "count(//arr[@name='distinctValues']/*)=8" , "//double[@name='sumOfSquares'][.='53101.0']" , "//double[@name='mean'][.='1.125']" - , "//double[@name='stddev'][.='87.08852228787508']" + ,json ? "//*" : "//double[@name='stddev'][.='87.08852228787508']" // SOLR-11725 ); assertQ("test statistics values w/fq", req(baseParams, "fq", "-id:1", "q", "*:*", "stats.calcdistinct", "true") - , "//double[@name='min'][.='-40.0']" - , "//double[@name='max'][.='200.0']" + , json ? "//*" : "//double[@name='min'][.='-40.0']" + , json ? "//*" : "//double[@name='max'][.='200.0']" , "//double[@name='sum'][.='119.0']" , "//long[@name='count'][.='6']" , "//long[@name='missing'][.='3']" - , "//long[@name='countDistinct'][.='6']" - , "count(//arr[@name='distinctValues']/*)=6" + , json? "//int[@name='countDistinct'][.='6']" :"//long[@name='countDistinct'][.='6']" // SOLR-11775 + , json ? "//*" : "count(//arr[@name='distinctValues']/*)=6" , "//double[@name='sumOfSquares'][.='43001.0']" , "//double[@name='mean'][.='19.833333333333332']" - , "//double[@name='stddev'][.='90.15634568163611']" + , json ? "//*" : "//double[@name='stddev'][.='90.15634568163611']" // SOLR-11725 ); - // TODO: why are there 3 identical requests below? - - assertQ("test statistics values", - req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s") - , "//double[@name='min'][.='-100.0']" - , "//double[@name='max'][.='200.0']" - , "//double[@name='sum'][.='9.0']" - , "//long[@name='count'][.='8']" - , "//long[@name='missing'][.='3']" - , "//long[@name='countDistinct'][.='8']" - , "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8" - , "//double[@name='sumOfSquares'][.='53101.0']" - , "//double[@name='mean'][.='1.125']" - , "//double[@name='stddev'][.='87.08852228787508']" - ); - - assertQ("test value for active_s=true", - req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s") - , "//lst[@name='true']/double[@name='min'][.='-100.0']" - , "//lst[@name='true']/double[@name='max'][.='200.0']" - , "//lst[@name='true']/double[@name='sum'][.='70.0']" - , "//lst[@name='true']/long[@name='count'][.='4']" - , "//lst[@name='true']/long[@name='missing'][.='1']" - , "//lst[@name='true']//long[@name='countDistinct'][.='4']" - , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" - , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']" - , "//lst[@name='true']/double[@name='mean'][.='17.5']" - , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']" - ); - - assertQ("test value for active_s=false", - req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s") - , "//lst[@name='false']/double[@name='min'][.='-40.0']" - , "//lst[@name='false']/double[@name='max'][.='10.0']" - , "//lst[@name='false']/double[@name='sum'][.='-61.0']" - , "//lst[@name='false']/long[@name='count'][.='4']" - , "//lst[@name='false']/long[@name='missing'][.='2']" - , "//lst[@name='true']//long[@name='countDistinct'][.='4']" - , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" - , "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']" - , "//lst[@name='false']/double[@name='mean'][.='-15.25']" - , "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']" - ); + if (!json) { // checking stats.facet makes no sense for json faceting + assertQ("test stats.facet (using boolean facet field)", + req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s") + // baseline + , "//lst[@name='"+f+"']/double[@name='min'][.='-100.0']" + , "//lst[@name='"+f+"']/double[@name='max'][.='200.0']" + , "//lst[@name='"+f+"']/double[@name='sum'][.='9.0']" + , "//lst[@name='"+f+"']/long[@name='count'][.='8']" + , "//lst[@name='"+f+"']/long[@name='missing'][.='3']" + , "//lst[@name='"+f+"']/long[@name='countDistinct'][.='8']" + , "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8" + , "//lst[@name='"+f+"']/double[@name='sumOfSquares'][.='53101.0']" + , "//lst[@name='"+f+"']/double[@name='mean'][.='1.125']" + , "//lst[@name='"+f+"']/double[@name='stddev'][.='87.08852228787508']" + // facet 'true' + , "//lst[@name='true']/double[@name='min'][.='-100.0']" + , "//lst[@name='true']/double[@name='max'][.='200.0']" + , "//lst[@name='true']/double[@name='sum'][.='70.0']" + , "//lst[@name='true']/long[@name='count'][.='4']" + , "//lst[@name='true']/long[@name='missing'][.='1']" + , "//lst[@name='true']//long[@name='countDistinct'][.='4']" + , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" + , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']" + , "//lst[@name='true']/double[@name='mean'][.='17.5']" + , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']" + // facet 'false' + , "//lst[@name='false']/double[@name='min'][.='-40.0']" + , "//lst[@name='false']/double[@name='max'][.='10.0']" + , "//lst[@name='false']/double[@name='sum'][.='-61.0']" + , "//lst[@name='false']/long[@name='count'][.='4']" + , "//lst[@name='false']/long[@name='missing'][.='2']" + , "//lst[@name='true']//long[@name='countDistinct'][.='4']" + , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" + , "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']" + , "//lst[@name='false']/double[@name='mean'][.='-15.25']" + , "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']" + ); + } } assertQ("cardinality" - , req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f) - , "//long[@name='cardinality'][.='8']" - ); + , req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f) + , "//long[@name='cardinality'][.='8']" + ); + assertQ("json cardinality" + , req("q", "*:*", "rows", "0", "json.facet", "{cardinality:'hll("+f+")'}") + , "//int[@name='cardinality'][.='8']" // SOLR-11775 + ); } public void testFieldStatisticsResultsStringField() throws Exception { diff --git a/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java b/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java index 450745b54a3..6c019b72fd1 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java +++ b/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java @@ -43,20 +43,21 @@ import org.junit.Test; public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase { // TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev') - private static List ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", "missing", "countvals"); + private static List ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", + "missing", "countvals", "percentile"); - private String STAT_FIELD = "stat_i1"; + private final String STAT_FIELD; private String ALL_STATS_JSON = ""; public DistributedFacetSimpleRefinementLongTailTest() { // we need DVs on point fields to compute stats & facets if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true"); - // TODO: randomizing STAT_FIELD to be multiValued=true blocked by SOLR-11706 - // STAT_FIELD = random().nextBoolean() ? "stat_i1" : "stat_i"; + STAT_FIELD = random().nextBoolean() ? "stat_is" : "stat_i"; for (String stat : ALL_STATS) { - ALL_STATS_JSON += stat + ":'" + stat + "(" + STAT_FIELD + ")',"; + String val = stat.equals("percentile")? STAT_FIELD+",90": STAT_FIELD; + ALL_STATS_JSON += stat + ":'" + stat + "(" + val + ")',"; } } @@ -232,6 +233,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(101L, bucket.get("countvals")); assertEquals(0L, bucket.get("missing")); assertEquals(48.0D, bucket.get("sum")); + assertEquals(1.0D, bucket.get("percentile")); assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7); assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7); // assertEquals(0.55846323792D, bucket.getStddev(), 0.1E-7); // TODO: SOLR-11725 @@ -391,6 +393,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(300L, aaa0_Bucket.get("countvals")); assertEquals(0L, aaa0_Bucket.get("missing")); assertEquals(34650.0D, aaa0_Bucket.get("sum")); + assertEquals(483.70000000000016D, (double)aaa0_Bucket.get("percentile"), 0.1E-7); assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7); assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7); // assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7); // TODO: SOLR-11725 @@ -403,6 +406,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(0L, tail_Bucket.get("min")); assertEquals(44L, tail_Bucket.get("max")); assertEquals(90L, tail_Bucket.get("countvals")); + assertEquals(40.0D, tail_Bucket.get("percentile")); assertEquals(45L, tail_Bucket.get("missing")); assertEquals(1980.0D, tail_Bucket.get("sum")); assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7); @@ -419,6 +423,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(35L, tailB_Bucket.get("min")); assertEquals(40L, tailB_Bucket.get("max")); assertEquals(12L, tailB_Bucket.get("countvals")); + assertEquals(39.9D, tailB_Bucket.get("percentile")); assertEquals(5L, tailB_Bucket.get("missing")); assertEquals(450.0D, tailB_Bucket.get("sum")); assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7); diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 9f0f7bfdfe3..06c13be4c80 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -217,14 +217,22 @@ public class TestJsonFacets extends SolrTestCaseHS { public void indexSimple(Client client) throws Exception { client.deleteByQuery("*:*", null); - client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2", "val_b", "true", "sparse_s", "one"), null); - client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5", "val_b", "false"), null); + client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2", + "num_is", "4", "num_is", "2", + "val_b", "true", "sparse_s", "one"), null); + client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5", + "num_is", "-9", "num_is", "-5", + "val_b", "false"), null); client.add(sdoc("id", "3"), null); client.commit(); - client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3"), null); - client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7", "sparse_s", "two"),null); + client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3", + "num_is", "2", "num_is", "3"), null); + client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7", + "num_is", "11", "num_is", "7", + "sparse_s", "two"),null); client.commit(); - client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5"),null); + client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5", + "num_is", "-5"),null); client.commit(); } @@ -874,12 +882,20 @@ public class TestJsonFacets extends SolrTestCaseHS { Client client = Client.localClient(); indexSimple(client); + assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'sum(num_is)'}") + , "facets=={count:6 , x:,10.0}" + ); + assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'min(num_is)'}") + , "facets=={count:6 , x:,-9}" + ); + // test multiple json.facet commands assertJQ(req("q", "*:*", "rows", "0" - , "json.facet", "{x:'sum(num_d)'}" - , "json.facet", "{y:'min(num_d)'}" + , "json.facet", "{x:'sum(num_d)'}" + , "json.facet", "{y:'min(num_d)'}" + , "json.facet", "{z:'min(num_is)'}" ) - , "facets=={count:6 , x:3.0, y:-9.0 }" + , "facets=={count:6 , x:3.0, y:-9.0, z:-9 }" ); @@ -922,10 +938,11 @@ public class TestJsonFacets extends SolrTestCaseHS { // test nested streaming with stats under streaming assertJQ(req("q", "*:*", "rows", "0" - , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)'} }}} }}}" + , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)', y:'sum(num_is)'} }}} }}}" ) , "facets=={count:6 " + - ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0},{val:NY,count:1,x:4.0}]} },{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0},{val:NY,count:1,x:-5.0}]} }]}" + ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0,y:5.0},{val:NY,count:1,x:4.0,y:6.0}]} }," + + "{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0,y:4.0},{val:NY,count:1,x:-5.0,y:-5.0}]} }]}" + "}" ); @@ -1340,7 +1357,7 @@ public class TestJsonFacets extends SolrTestCaseHS { ", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }" ); - // test sorting by missing stat with domain query + // test sorting by countvals stat with domain query client.testJQ(params(p, "q", "-id:*" , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" + " , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }" @@ -1779,8 +1796,6 @@ public class TestJsonFacets extends SolrTestCaseHS { ); } - - // stats at top level client.testJQ(params(p, "q", "*:*" , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" + @@ -1799,6 +1814,73 @@ public class TestJsonFacets extends SolrTestCaseHS { "}" ); + // stats at top level on multi-valued fields + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{ sum1:'sum(${num_fs})', sumsq1:'sumsq(${num_fs})', avg1:'avg(${num_fs})', mind:'min(${num_fs})', maxd:'max(${num_fs})'" + + ", mini:'min(${num_is})', maxi:'max(${num_is})', mins:'min(${multi_ss})', maxs:'max(${multi_ss})'" + + ", stddev:'stddev(${num_fs})', variance:'variance(${num_fs})', median:'percentile(${num_fs}, 50)'" + + ", perc:'percentile(${num_fs}, 0,75,100)'" + + " }" + ) + , "facets=={ 'count':6, " + + "sum1:0.0, sumsq1:51.5, avg1:0.0, mind:-5.0, maxd:3.0" + + ", mini:-5, maxi:3, mins:'a', maxs:'b'" + + ", stddev:2.537222891273055, variance:6.4375, median:0.0, perc:[-5.0,2.25,3.0]" + + "}" + ); + + // test sorting by multi-valued + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'avg(${num_is})'} }}" + + " , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'avg(${num_is})'} }} }" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'B', count:3, n1: 0.25}, { val:'A', count:2, n1:0.0}]}" + + ", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:0.25 }]} }" + ); + + // test sorting by percentile + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'percentile(${num_is}, 50)'} }}" + + " , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'percentile(${num_is}, 50)'} }} }" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'B', count:3, n1: -0.50}, { val:'A', count:2, n1:1.0}]}" + + ", f2:{ 'buckets':[{ val:'A', count:2, n1:1.0}, { val:'B', count:3, n1:-0.50 }]} }" + ); + + // test sorting by multi-valued field with domain query + client.testJQ(params(p, "q", "-id:*" + , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'sum(${num_is})'} }}" + + " , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'sum(${num_is})'} }} }" + ) + , "facets=={ 'count':0, " + + " f1:{ 'buckets':[{ val:'B', count:3, n1:1.0 }, { val:'A', count:2, n1:0.0}]}" + + ", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:1.0 }]} }" + ); + + client.testJQ(params(p, "q", "*:*" + , "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " + + "facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " + + "facet:{n1:'min(${multi_ss})'}}}}}}}" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'a'},{val:'NY', count:1, n1:'a'}]} }," + + " { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" + + "}" + ); + + client.testJQ(params(p, "q", "*:*" + , "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " + + "facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " + + "facet:{n1:'max(${multi_ss})'}}}}}}}" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'b'},{val:'NY', count:1, n1:'b'}]} }," + + " { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" + + "}" + ); + // stats at top level, no matches client.testJQ(params(p, "q", "id:DOESNOTEXIST" , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})'" +