mirror of https://github.com/apache/lucene.git
SOLR-11706: minor refactors
* rename stdDev, variance methods to reflect the functionality * add util functions to compute corrected stdDev and variance * use DocValuesIterator#advanceExact to check if values exists for the doc
This commit is contained in:
parent
7d5d44a3c4
commit
a1e51cd777
|
@ -34,20 +34,32 @@ public class AggUtil {
|
|||
/**
|
||||
* Computes and returns uncorrected standard deviation for given values
|
||||
*/
|
||||
public static double stdDev(double sumSq, double sum, long count) {
|
||||
// todo: switch to corrected stddev SOLR-11725
|
||||
public static double uncorrectedStdDev(double sumSq, double sum, long count) {
|
||||
// todo: should we return NAN when count==0?
|
||||
double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
|
||||
return val;
|
||||
return count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes and returns corrected standard deviation for given values
|
||||
*/
|
||||
public static double stdDev(double sumSq, double sum, long count) {
|
||||
// todo: should we return NAN when count==0?
|
||||
return count == 0 ? 0 : Math.sqrt(((count * sumSq) - (sum * sum)) / (count * (count - 1.0D)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes and returns uncorrected variance for given values
|
||||
*/
|
||||
public static double variance(double sumSq, double sum, long count) {
|
||||
// todo: switch to corrected variance SOLR-11725
|
||||
public static double uncorrectedVariance(double sumSq, double sum, long count) {
|
||||
// todo: should we return NAN when count==0?
|
||||
double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
|
||||
return val;
|
||||
return count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes and returns corrected variance for given values
|
||||
*/
|
||||
public static double variance(double sumSq, double sum, long count) {
|
||||
// todo: should we return NAN when count==0?
|
||||
return count == 0 ? 0 : ((count * sumSq) - (sum * sum)) / (count * (count - 1.0D));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.index.NumericDocValues;
|
|||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
@ -47,21 +46,18 @@ public abstract class DocValuesAcc extends SlotAcc {
|
|||
|
||||
@Override
|
||||
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
|
||||
int valuesDocID = docIdSetIterator().docID();
|
||||
if (valuesDocID < doc) {
|
||||
valuesDocID = docIdSetIterator().advance(doc);
|
||||
if (advanceExact(doc)) {
|
||||
collectValues(doc, slot);
|
||||
}
|
||||
if (valuesDocID > doc) {
|
||||
// missing
|
||||
return;
|
||||
}
|
||||
assert valuesDocID == doc;
|
||||
collectValues(doc, slot);
|
||||
}
|
||||
|
||||
protected abstract void collectValues(int doc, int slot) throws IOException;
|
||||
|
||||
protected abstract DocIdSetIterator docIdSetIterator();
|
||||
/**
|
||||
* Wrapper to {@code org.apache.lucene.index.DocValuesIterator#advanceExact(int)}
|
||||
* returns whether or not given {@code doc} has value
|
||||
*/
|
||||
protected abstract boolean advanceExact(int doc) throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -81,8 +77,8 @@ abstract class NumericDVAcc extends DocValuesAcc {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,8 +99,8 @@ abstract class SortedNumericDVAcc extends DocValuesAcc {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -272,8 +268,8 @@ abstract class SortedDVAcc extends DocValuesAcc {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -294,8 +290,8 @@ abstract class SortedSetDVAcc extends DocValuesAcc {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -190,11 +190,15 @@ public class HLLAgg extends StrAggValueSource {
|
|||
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void collectValues(int doc, HLL hll) throws IOException {
|
||||
long val = values.longValue();
|
||||
|
@ -216,11 +220,15 @@ public class HLLAgg extends StrAggValueSource {
|
|||
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void collectValues(int doc, HLL hll) throws IOException {
|
||||
for (int i = 0, count = values.docValueCount(); i < count; i++) {
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.index.OrdinalMap;
|
|||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
@ -522,8 +521,8 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return subDv;
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return subDv.advanceExact(doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -313,8 +312,8 @@ public class PercentileAgg extends SimpleAggValueSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -366,8 +365,8 @@ public class PercentileAgg extends SimpleAggValueSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -484,7 +484,7 @@ class VarianceSlotAcc extends DoubleFuncSlotAcc {
|
|||
}
|
||||
|
||||
private double variance(int slot) {
|
||||
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -541,7 +541,7 @@ class StddevSlotAcc extends DoubleFuncSlotAcc {
|
|||
}
|
||||
|
||||
private double stdDev(int slot) {
|
||||
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -86,7 +86,7 @@ public class StddevAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double getDouble() {
|
||||
return AggUtil.stdDev(sumSq, sum, count);
|
||||
return AggUtil.uncorrectedStdDev(sumSq, sum, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,7 +98,7 @@ public class StddevAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double computeVal(int slot) {
|
||||
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -110,7 +110,7 @@ public class StddevAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double computeVal(int slot) {
|
||||
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,7 +122,7 @@ public class StddevAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double computeVal(int slot) {
|
||||
return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -205,11 +205,15 @@ public class UniqueAgg extends StrAggValueSource {
|
|||
super(fcontext, field, numSlots);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
|
||||
|
@ -228,11 +232,15 @@ public class UniqueAgg extends StrAggValueSource {
|
|||
super(fcontext, field, numSlots);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocIdSetIterator docIdSetIterator() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean advanceExact(int doc) throws IOException {
|
||||
return values.advanceExact(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
|
||||
|
|
|
@ -85,7 +85,7 @@ public class VarianceAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double getDouble() {
|
||||
return AggUtil.variance(sumSq, sum, count);
|
||||
return AggUtil.uncorrectedVariance(sumSq, sum, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,7 +97,7 @@ public class VarianceAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double computeVal(int slot) {
|
||||
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,7 +109,7 @@ public class VarianceAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double computeVal(int slot) {
|
||||
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,7 +121,7 @@ public class VarianceAgg extends SimpleAggValueSource {
|
|||
|
||||
@Override
|
||||
protected double computeVal(int slot) {
|
||||
return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue