From 874993557c334daeee0ca0b4378119521c623930 Mon Sep 17 00:00:00 2001 From: kimchy Date: Tue, 8 Jun 2010 10:39:45 +0300 Subject: [PATCH] add sum of squares, variance, and std deviation for statistical facet --- .../statistical/InternalStatisticalFacet.java | 53 +++++++++++++++---- .../facets/statistical/StatisticalFacet.java | 30 +++++++++++ .../StatisticalFacetCollector.java | 29 ++++++---- .../search/facets/SimpleFacetsTests.java | 1 + 4 files changed, 92 insertions(+), 21 deletions(-) diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/InternalStatisticalFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/InternalStatisticalFacet.java index 3730d3ac62f..0f8b354e894 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/InternalStatisticalFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/InternalStatisticalFacet.java @@ -40,16 +40,19 @@ public class InternalStatisticalFacet implements StatisticalFacet, InternalFacet private double total; + private double sumOfSquares; + private long count; private InternalStatisticalFacet() { } - public InternalStatisticalFacet(String name, double min, double max, double total, long count) { + public InternalStatisticalFacet(String name, double min, double max, double total, double sumOfSquares, long count) { this.name = name; this.min = min; this.max = max; this.total = total; + this.sumOfSquares = sumOfSquares; this.count = count; } @@ -85,6 +88,14 @@ public class InternalStatisticalFacet implements StatisticalFacet, InternalFacet return total(); } + @Override public double sumOfSquares() { + return this.sumOfSquares; + } + + @Override public double getSumOfSquares() { + return sumOfSquares(); + } + @Override public double mean() { return total / count; } @@ -109,10 +120,27 @@ public class InternalStatisticalFacet implements StatisticalFacet, InternalFacet return max(); } + public double variance() { + return (sumOfSquares - ((total * total) / count)) / count; + } + + public double getVariance() { + return variance(); + } + + public double stdDeviation() { + return Math.sqrt(variance()); + } + + public double getStdDeviation() { + return stdDeviation(); + } + @Override public Facet aggregate(Iterable facets) { - double min = Double.MAX_VALUE; - double max = Double.MIN_VALUE; + double min = Double.NaN; + double max = Double.NaN; double total = 0; + double sumOfSquares = 0; long count = 0; for (Facet facet : facets) { @@ -120,26 +148,31 @@ public class InternalStatisticalFacet implements StatisticalFacet, InternalFacet continue; } InternalStatisticalFacet statsFacet = (InternalStatisticalFacet) facet; - if (statsFacet.min() < min) { + if (statsFacet.min() < min || Double.isNaN(min)) { min = statsFacet.min(); } - if (statsFacet.max() > max) { + if (statsFacet.max() > max || Double.isNaN(max)) { max = statsFacet.max(); } total += statsFacet.total(); + sumOfSquares += statsFacet.sumOfSquares(); count += statsFacet.count(); } - return new InternalStatisticalFacet(name, min, max, total, count); + return new InternalStatisticalFacet(name, min, max, total, sumOfSquares, count); } @Override public void toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(name); builder.field("_type", "statistical"); - builder.field("count", count); - builder.field("total", total); - builder.field("min", min); - builder.field("max", max); + builder.field("count", count()); + builder.field("total", total()); + builder.field("min", min()); + builder.field("max", max()); + builder.field("mean", mean()); + builder.field("sum_of_squares", sumOfSquares()); + builder.field("variance", variance()); + builder.field("std_deviation", stdDeviation()); builder.endObject(); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacet.java index 0ae58b3bcde..e5502f4641f 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacet.java @@ -48,6 +48,16 @@ public interface StatisticalFacet extends Facet { */ double getTotal(); + /** + * The sum of squares of the values. + */ + double sumOfSquares(); + + /** + * The sum of squares of the values. + */ + double getSumOfSquares(); + /** * The mean (average) of the values. */ @@ -77,4 +87,24 @@ public interface StatisticalFacet extends Facet { * The maximum value. */ double getMax(); + + /** + * Variance of the values. + */ + double variance(); + + /** + * Variance of the values. + */ + double getVariance(); + + /** + * Standard deviation of the values. + */ + double stdDeviation(); + + /** + * Standard deviation of the values. + */ + double getStdDeviation(); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollector.java index 752771bbb85..6e14e6c120c 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facets/statistical/StatisticalFacetCollector.java @@ -79,44 +79,51 @@ public class StatisticalFacetCollector extends FacetCollector { } @Override public Facet facet() { - return new InternalStatisticalFacet(name, statsProc.min(), statsProc.max(), statsProc.total(), statsProc.count()); + return new InternalStatisticalFacet(name, statsProc.min(), statsProc.max(), statsProc.total(), statsProc.sumOfSquares(), statsProc.count()); } public static class StatsProc implements NumericFieldData.DoubleValueInDocProc { - private double min = Double.MAX_VALUE; + private double min = Double.NaN; - private double max = Double.MIN_VALUE; + private double max = Double.NaN; private double total = 0; + private double sumOfSquares = 0.0; + private long count; @Override public void onValue(int docId, double value) { - count++; - total += value; - if (value < min) { + if (value < min || Double.isNaN(min)) { min = value; } - if (value > max) { + if (value > max || Double.isNaN(max)) { max = value; } + sumOfSquares += value * value; + total += value; + count++; } - public double min() { + public final double min() { return min; } - public double max() { + public final double max() { return max; } - public double total() { + public final double total() { return total; } - public long count() { + public final long count() { return count; } + + public final double sumOfSquares() { + return sumOfSquares; + } } } diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java index cfc42f4fc8d..a953a3de82c 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facets/SimpleFacetsTests.java @@ -124,6 +124,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet.min(), equalTo(1d)); assertThat(facet.max(), equalTo(2d)); assertThat(facet.mean(), equalTo(1.5d)); + assertThat(facet.sumOfSquares(), equalTo(5d)); facet = searchResponse.facets().facet(StatisticalFacet.class, "stats2"); assertThat(facet.name(), equalTo(facet.name()));