SOLR-11706: minor refactors

* rename stdDev, variance methods to reflect the functionality
* add util functions to compute corrected stdDev and variance
* use DocValuesIterator#advanceExact to check if values exists for the doc
This commit is contained in:
Munendra S N 2019-12-07 11:16:25 +05:30
parent 7d5d44a3c4
commit a1e51cd777
9 changed files with 71 additions and 49 deletions

View File

@ -34,20 +34,32 @@ public class AggUtil {
/**
* Computes and returns uncorrected standard deviation for given values
*/
public static double stdDev(double sumSq, double sum, long count) {
// todo: switch to corrected stddev SOLR-11725
public static double uncorrectedStdDev(double sumSq, double sum, long count) {
// todo: should we return NAN when count==0?
double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
return val;
return count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
}
/**
* Computes and returns corrected standard deviation for given values
*/
public static double stdDev(double sumSq, double sum, long count) {
// todo: should we return NAN when count==0?
return count == 0 ? 0 : Math.sqrt(((count * sumSq) - (sum * sum)) / (count * (count - 1.0D)));
}
/**
* Computes and returns uncorrected variance for given values
*/
public static double variance(double sumSq, double sum, long count) {
// todo: switch to corrected variance SOLR-11725
public static double uncorrectedVariance(double sumSq, double sum, long count) {
// todo: should we return NAN when count==0?
double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
return val;
return count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
}
/**
* Computes and returns corrected variance for given values
*/
public static double variance(double sumSq, double sum, long count) {
// todo: should we return NAN when count==0?
return count == 0 ? 0 : ((count * sumSq) - (sum * sum)) / (count * (count - 1.0D));
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.schema.SchemaField;
@ -47,21 +46,18 @@ public abstract class DocValuesAcc extends SlotAcc {
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
int valuesDocID = docIdSetIterator().docID();
if (valuesDocID < doc) {
valuesDocID = docIdSetIterator().advance(doc);
if (advanceExact(doc)) {
collectValues(doc, slot);
}
if (valuesDocID > doc) {
// missing
return;
}
assert valuesDocID == doc;
collectValues(doc, slot);
}
protected abstract void collectValues(int doc, int slot) throws IOException;
protected abstract DocIdSetIterator docIdSetIterator();
/**
* Wrapper to {@code org.apache.lucene.index.DocValuesIterator#advanceExact(int)}
* returns whether or not given {@code doc} has value
*/
protected abstract boolean advanceExact(int doc) throws IOException;
}
/**
@ -81,8 +77,8 @@ abstract class NumericDVAcc extends DocValuesAcc {
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
}
@ -103,8 +99,8 @@ abstract class SortedNumericDVAcc extends DocValuesAcc {
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
}
@ -272,8 +268,8 @@ abstract class SortedDVAcc extends DocValuesAcc {
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
}
@ -294,8 +290,8 @@ abstract class SortedSetDVAcc extends DocValuesAcc {
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
}

View File

@ -190,11 +190,15 @@ public class HLLAgg extends StrAggValueSource {
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
@Override
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
@Override
protected void collectValues(int doc, HLL hll) throws IOException {
long val = values.longValue();
@ -216,11 +220,15 @@ public class HLLAgg extends StrAggValueSource {
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
@Override
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
@Override
protected void collectValues(int doc, HLL hll) throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; i++) {

View File

@ -27,7 +27,6 @@ import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;
@ -522,8 +521,8 @@ public class MinMaxAgg extends SimpleAggValueSource {
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return subDv;
protected boolean advanceExact(int doc) throws IOException {
return subDv.advanceExact(doc);
}
}
}

View File

@ -31,7 +31,6 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrException;
@ -313,8 +312,8 @@ public class PercentileAgg extends SimpleAggValueSource {
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
/**
@ -366,8 +365,8 @@ public class PercentileAgg extends SimpleAggValueSource {
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
}

View File

@ -484,7 +484,7 @@ class VarianceSlotAcc extends DoubleFuncSlotAcc {
}
private double variance(int slot) {
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override
@ -541,7 +541,7 @@ class StddevSlotAcc extends DoubleFuncSlotAcc {
}
private double stdDev(int slot) {
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override

View File

@ -86,7 +86,7 @@ public class StddevAgg extends SimpleAggValueSource {
@Override
protected double getDouble() {
return AggUtil.stdDev(sumSq, sum, count);
return AggUtil.uncorrectedStdDev(sumSq, sum, count);
}
}
@ -98,7 +98,7 @@ public class StddevAgg extends SimpleAggValueSource {
@Override
protected double computeVal(int slot) {
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
@ -110,7 +110,7 @@ public class StddevAgg extends SimpleAggValueSource {
@Override
protected double computeVal(int slot) {
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
@ -122,7 +122,7 @@ public class StddevAgg extends SimpleAggValueSource {
@Override
protected double computeVal(int slot) {
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
}

View File

@ -205,11 +205,15 @@ public class UniqueAgg extends StrAggValueSource {
super(fcontext, field, numSlots);
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
@Override
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
@ -228,11 +232,15 @@ public class UniqueAgg extends StrAggValueSource {
super(fcontext, field, numSlots);
}
@Override
protected DocIdSetIterator docIdSetIterator() {
return values;
}
@Override
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());

View File

@ -85,7 +85,7 @@ public class VarianceAgg extends SimpleAggValueSource {
@Override
protected double getDouble() {
return AggUtil.variance(sumSq, sum, count);
return AggUtil.uncorrectedVariance(sumSq, sum, count);
}
}
@ -97,7 +97,7 @@ public class VarianceAgg extends SimpleAggValueSource {
@Override
protected double computeVal(int slot) {
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
@ -109,7 +109,7 @@ public class VarianceAgg extends SimpleAggValueSource {
@Override
protected double computeVal(int slot) {
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
@ -121,7 +121,7 @@ public class VarianceAgg extends SimpleAggValueSource {
@Override
protected double computeVal(int slot) {
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
}
}