diff --git a/pom.xml b/pom.xml index e6024f74f..805232d15 100644 --- a/pom.xml +++ b/pom.xml @@ -178,6 +178,9 @@ Andreas Rieger + + Matthew Rowles + Gilles Sadowski diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java b/src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java index 3a051fcf4..b23121605 100644 --- a/src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java +++ b/src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java @@ -46,7 +46,7 @@ public abstract class AbstractUnivariateStatistic * {@inheritDoc} */ public abstract double evaluate(final double[] values, final int begin, final int length); - + /** * {@inheritDoc} */ @@ -57,11 +57,11 @@ public abstract class AbstractUnivariateStatistic * to verify that the input parameters designate a subarray of positive length. *

*

* @@ -79,17 +79,17 @@ public abstract class AbstractUnivariateStatistic if (values == null) { throw MathRuntimeException.createIllegalArgumentException("input values array is null"); } - + if (begin < 0) { throw MathRuntimeException.createIllegalArgumentException( "start position cannot be negative ({0})", begin); } - + if (length < 0) { throw MathRuntimeException.createIllegalArgumentException( "length cannot be negative ({0})", length); } - + if (begin + length > values.length) { throw MathRuntimeException.createIllegalArgumentException( "subarray ends after array end"); @@ -102,4 +102,75 @@ public abstract class AbstractUnivariateStatistic return true; } -} \ No newline at end of file + + /** + * This method is used by evaluate(double[], double[], int, int) methods + * to verify that the begin and length parameters designate a subarray of positive length + * and the weights are all non-negative, non-NaN, finite, and not all zero. + *

+ *

+ * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return true if the parameters are valid and designate a subarray of positive length + * @throws IllegalArgumentException if the indices are invalid or the array is null + */ + protected boolean test( + final double[] values, + final double[] weights, + final int begin, + final int length) { + + if (weights == null) { + throw MathRuntimeException.createIllegalArgumentException("input weights array is null"); + } + + if (weights.length != values.length) { + throw MathRuntimeException.createIllegalArgumentException( + "Different number of weights and values"); + } + + boolean containsPositiveWeight = false; + for (int i = begin; i < begin + length; i++) { + if (Double.isNaN(weights[i])) { + throw MathRuntimeException.createIllegalArgumentException( + "NaN weight at index {0}", i); + } + if (Double.isInfinite(weights[i])) { + throw MathRuntimeException.createIllegalArgumentException( + "Infinite weight at index {0}", i); + } + if (weights[i] < 0) { + throw MathRuntimeException.createIllegalArgumentException( + "negative weight {0} at index {1} ", weights[i], i); + } + if (!containsPositiveWeight && weights[i] > 0.0) { + containsPositiveWeight = true; + } + } + + if (!containsPositiveWeight) { + throw MathRuntimeException.createIllegalArgumentException( + "weight array must contain at least one non-zero value"); + } + + return test(values, begin, length); + } +} + diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java index 6325eaca8..657e63fbb 100644 --- a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java +++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java @@ -22,7 +22,7 @@ import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStati import org.apache.commons.math.stat.descriptive.summary.Sum; /** - *

Computes the arithmetic mean of a set of values. Uses the definitional + *

Computes the arithmetic mean of a set of values. Uses the definitional * formula:

*

* mean = sum(x_i) / n @@ -30,7 +30,7 @@ import org.apache.commons.math.stat.descriptive.summary.Sum; *

where n is the number of observations. *

*

When {@link #increment(double)} is used to add data incrementally from a - * stream of (unstored) values, the value of the statistic that + * stream of (unstored) values, the value of the statistic that * {@link #getResult()} returns is computed using the following recursive * updating algorithm:

*
    @@ -80,18 +80,18 @@ public class Mean extends AbstractStorelessUnivariateStatistic /** * Constructs a Mean with an External Moment. - * + * * @param m1 the moment */ public Mean(final FirstMoment m1) { this.moment = m1; incMoment = false; } - + /** * Copy constructor, creates a new {@code Mean} identical * to the {@code original} - * + * * @param original the {@code Mean} instance to copy */ public Mean(Mean original) { @@ -141,7 +141,7 @@ public class Mean extends AbstractStorelessUnivariateStatistic * Throws IllegalArgumentException if the array is null.

    *

    * See {@link Mean} for details on the computing algorithm.

    - * + * * @param values the input array * @param begin index of the first array element to include * @param length the number of elements to include @@ -154,10 +154,10 @@ public class Mean extends AbstractStorelessUnivariateStatistic if (test(values, begin, length)) { Sum sum = new Sum(); double sampleSize = length; - + // Compute initial estimate using definitional formula double xbar = sum.evaluate(values, begin, length) / sampleSize; - + // Compute correction factor in second pass double correction = 0; for (int i = begin; i < begin + length; i++) { @@ -167,7 +167,54 @@ public class Mean extends AbstractStorelessUnivariateStatistic } return Double.NaN; } - + + /** + * Returns the weighted arithmetic mean of the entries in the specified portion of + * the input array, or Double.NaN if the designated subarray + * is empty. + *

    + * Throws IllegalArgumentException if either array is null.

    + *

    + * See {@link Mean} for details on the computing algorithm. The two-pass algorithm + * described above is used here, with weights applied in computing both the original + * estimate and the correction factor.

    + *

    + * Throws IllegalArgumentException if any of the following are true: + *

    • the values array is null
    • + *
    • the weights array is null
    • + *
    • the weights array does not have the same length as the values array
    • + *
    • the weights array contains one or more infinite values
    • + *
    • the weights array contains one or more NaN values
    • + *
    • the weights array contains negative values
    • + *
    • the start and length arguments do not determine a valid array
    • + *

    + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the mean of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) { + if (test(values, weights, begin, length)) { + Sum sum = new Sum(); + + // Compute initial estimate using definitional formula + double sumw = sum.evaluate(weights,begin,length); + double xbarw = sum.evaluate(values, weights, begin, length) / sumw; + + // Compute correction factor in second pass + double correction = 0; + for (int i = begin; i < begin + length; i++) { + correction += weights[i] * (values[i] - xbarw); + } + return xbarw + (correction/sumw); + } + return Double.NaN; + } + /** * {@inheritDoc} */ @@ -177,12 +224,12 @@ public class Mean extends AbstractStorelessUnivariateStatistic copy(this, result); return result; } - - + + /** * Copies source to dest. *

    Neither source nor dest can be null.

    - * + * * @param source Mean to copy * @param dest Mean to copy to * @throws NullPointerException if either source or dest is null diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java index ed259e483..0a539f51d 100644 --- a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java +++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java @@ -20,10 +20,11 @@ import java.io.Serializable; import org.apache.commons.math.MathRuntimeException; import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math.stat.descriptive.summary.Sum; /** * Computes the variance of the available values. By default, the unbiased - * "sample variance" definitional formula is used: + * "sample variance" definitional formula is used: *

    * variance = sum((x_i - mean)^2) / (n - 1)

    *

    @@ -33,19 +34,19 @@ import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStati * The definitional formula does not have good numerical properties, so * this implementation does not compute the statistic using the definitional * formula.

      - *
    • The getResult method computes the variance using + *
    • The getResult method computes the variance using * updating formulas based on West's algorithm, as described in * Chan, T. F. and * J. G. Lewis 1979, Communications of the ACM, * vol. 22 no. 9, pp. 526-531.
    • *
    • The evaluate methods leverage the fact that they have the - * full array of values in memory to execute a two-pass algorithm. + * full array of values in memory to execute a two-pass algorithm. * Specifically, these methods use the "corrected two-pass algorithm" from * Chan, Golub, Levesque, Algorithms for Computing the Sample Variance, - * American Statistician, August 1983.
    - * Note that adding values using increment or + * American Statistician, vol. 37, no. 3 (1983) pp. 242-247. + * Note that adding values using increment or * incrementAll and then executing getResult will - * sometimes give a different, less accurate, result than executing + * sometimes give a different, less accurate, result than executing * evaluate with the full array of values. The former approach * should only be used when the full array of values is not available.

    *

    @@ -77,10 +78,10 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se * constructed with an external SecondMoment as a parameter. */ protected boolean incMoment = true; - + /** * Determines whether or not bias correction is applied when computing the - * value of the statisic. True means that bias is corrected. See + * value of the statisic. True means that bias is corrected. See * {@link Variance} for details on the formula. */ private boolean isBiasCorrected = true; @@ -95,7 +96,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se /** * Constructs a Variance based on an external second moment. - * + * * @param m2 the SecondMoment (Third or Fourth moments work * here as well.) */ @@ -103,11 +104,11 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se incMoment = false; this.moment = m2; } - + /** * Constructs a Variance with the specified isBiasCorrected * property - * + * * @param isBiasCorrected setting for bias correction - true means * bias will be corrected and is equivalent to using the argumentless * constructor @@ -116,11 +117,11 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se moment = new SecondMoment(); this.isBiasCorrected = isBiasCorrected; } - + /** * Constructs a Variance with the specified isBiasCorrected * property and the supplied external second moment. - * + * * @param isBiasCorrected setting for bias correction - true means * bias will be corrected * @param m2 the SecondMoment (Third or Fourth moments work @@ -129,26 +130,26 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se public Variance(boolean isBiasCorrected, SecondMoment m2) { incMoment = false; this.moment = m2; - this.isBiasCorrected = isBiasCorrected; + this.isBiasCorrected = isBiasCorrected; } - + /** * Copy constructor, creates a new {@code Variance} identical * to the {@code original} - * + * * @param original the {@code Variance} instance to copy */ public Variance(Variance original) { copy(original, this); - } - + } + /** - * {@inheritDoc} - *

    If all values are available, it is more accurate to use + * {@inheritDoc} + *

    If all values are available, it is more accurate to use * {@link #evaluate(double[])} rather than adding values one at a time * using this method and then executing {@link #getResult}, since - * evaluate leverages the fact that is has the full - * list of values together to execute a two-pass algorithm. + * evaluate leverages the fact that is has the full + * list of values together to execute a two-pass algorithm. * See {@link Variance}.

    */ @Override @@ -182,7 +183,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se public long getN() { return moment.getN(); } - + /** * {@inheritDoc} */ @@ -192,9 +193,9 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se moment.clear(); } } - + /** - * Returns the variance of the entries in the input array, or + * Returns the variance of the entries in the input array, or * Double.NaN if the array is empty. *

    * See {@link Variance} for details on the computing algorithm.

    @@ -204,7 +205,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se * Throws IllegalArgumentException if the array is null.

    *

    * Does not change the internal state of the statistic.

    - * + * * @param values the input array * @return the variance of the values or Double.NaN if length = 0 * @throws IllegalArgumentException if the array is null @@ -229,7 +230,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se * Does not change the internal state of the statistic.

    *

    * Throws IllegalArgumentException if the array is null.

    - * + * * @param values the input array * @param begin index of the first array element to include * @param length the number of elements to include @@ -254,10 +255,69 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se } return var; } - + + /** + *

    Returns the weighted variance of the entries in the specified portion of + * the input array, or Double.NaN if the designated subarray + * is empty.

    + *

    + * Uses the formula

    +     *   Σ(weights[i]*(values[i] - weightedMean)2)/(Σ(weights[i]) - 1)
    +     * 
    + * where weightedMean is the weighted mean

    + *

    + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use

    +     *   evaluate(values, MathUtils.normalizeArray(weights, values.length)); 
    +     * 
    + *

    + * Returns 0 for a single-value (i.e. length = 1) sample.

    + *

    + * Throws IllegalArgumentException if any of the following are true: + *

    • the values array is null
    • + *
    • the weights array is null
    • + *
    • the weights array does not have the same length as the values array
    • + *
    • the weights array contains one or more infinite values
    • + *
    • the weights array contains one or more NaN values
    • + *
    • the weights array contains negative values
    • + *
    • the start and length arguments do not determine a valid array
    • + *

    + *

    + * Does not change the internal state of the statistic.

    + *

    + * Throws IllegalArgumentException if either array is null.

    + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) { + + double var = Double.NaN; + + if (test(values, weights,begin, length)) { + clear(); + if (length == 1) { + var = 0.0; + } else if (length > 1) { + Mean mean = new Mean(); + double m = mean.evaluate(values, weights, begin, length); + var = evaluate(values, weights, m, begin, length); + } + } + return var; + } + /** * Returns the variance of the entries in the specified portion of - * the input array, using the precomputed mean value. Returns + * the input array, using the precomputed mean value. Returns * Double.NaN if the designated subarray is empty. *

    * See {@link Variance} for details on the computing algorithm.

    @@ -272,7 +332,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se * Throws IllegalArgumentException if the array is null.

    *

    * Does not change the internal state of the statistic.

    - * + * * @param values the input array * @param mean the precomputed mean value * @param begin index of the first array element to include @@ -281,9 +341,9 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se * @throws IllegalArgumentException if the array is null or the array index * parameters are not valid */ - public double evaluate(final double[] values, final double mean, + public double evaluate(final double[] values, final double mean, final int begin, final int length) { - + double var = Double.NaN; if (test(values, begin, length)) { @@ -298,7 +358,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se accum += dev * dev; accum2 += dev; } - double len = length; + double len = length; if (isBiasCorrected) { var = (accum - (accum2 * accum2 / len)) / (len - 1.0); } else { @@ -308,7 +368,82 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se } return var; } - + + /** + * Returns the weighted variance of the entries in the specified portion of + * the input array, using the precomputed weighted mean value. Returns + * Double.NaN if the designated subarray is empty. + *

    + * Uses the formula

    +     *   Σ(weights[i]*(values[i] - mean)2)/(Σ(weights[i]) - 1)
    +     * 

    + *

    + * The formula used assumes that the supplied mean value is the weighted arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.

    + *

    + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use

    +     *   evaluate(values, MathUtils.normalizeArray(weights, values.length)); 
    +     * 
    + *

    + * Returns 0 for a single-value (i.e. length = 1) sample.

    + *

    + * Throws IllegalArgumentException if any of the following are true: + *

    • the values array is null
    • + *
    • the weights array is null
    • + *
    • the weights array does not have the same length as the values array
    • + *
    • the weights array contains one or more infinite values
    • + *
    • the weights array contains one or more NaN values
    • + *
    • the weights array contains negative values
    • + *
    • the start and length arguments do not determine a valid array
    • + *

    + *

    + * Does not change the internal state of the statistic.

    + * + * @param values the input array + * @param weights the weights array + * @param mean the precomputed weighted mean value + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + */ + public double evaluate(final double[] values, final double[] weights, + final double mean, final int begin, final int length) { + + double var = Double.NaN; + + if (test(values, weights, begin, length)) { + if (length == 1) { + var = 0.0; + } else if (length > 1) { + double accum = 0.0; + double dev = 0.0; + for (int i = begin; i < begin + length; i++) { + dev = values[i] - mean; + accum += weights[i] * (dev * dev); + } + + double sumWts = 0; + for (int i = 0; i < weights.length; i++) { + sumWts += weights[i]; + } + + if (isBiasCorrected) { + var = accum / (sumWts - 1); + } else { + var = accum / sumWts; + } + } + } + return var; + } + /** * Returns the variance of the entries in the input array, using the * precomputed mean value. Returns Double.NaN if the array @@ -328,7 +463,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se * Throws IllegalArgumentException if the array is null.

    *

    * Does not change the internal state of the statistic.

    - * + * * @param values the input array * @param mean the precomputed mean value * @return the variance of the values or Double.NaN if the array is empty @@ -351,7 +486,7 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se public void setBiasCorrected(boolean isBiasCorrected) { this.isBiasCorrected = isBiasCorrected; } - + /** * {@inheritDoc} */ @@ -361,12 +496,12 @@ public class Variance extends AbstractStorelessUnivariateStatistic implements Se copy(this, result); return result; } - - + + /** * Copies source to dest. *

    Neither source nor dest can be null.

    - * + * * @param source Variance to copy * @param dest Variance to copy to * @throws NullPointerException if either source or dest is null diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java index 3455c5420..d4ebda352 100644 --- a/src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java +++ b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java @@ -19,6 +19,7 @@ package org.apache.commons.math.stat.descriptive.summary; import java.io.Serializable; import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic; /** * Returns the product of the available values. @@ -53,17 +54,17 @@ public class Product extends AbstractStorelessUnivariateStatistic implements Ser n = 0; value = Double.NaN; } - + /** * Copy constructor, creates a new {@code Product} identical * to the {@code original} - * + * * @param original the {@code Product} instance to copy */ public Product(Product original) { copy(original, this); } - + /** * {@inheritDoc} */ @@ -91,7 +92,7 @@ public class Product extends AbstractStorelessUnivariateStatistic implements Ser public long getN() { return n; } - + /** * {@inheritDoc} */ @@ -107,7 +108,7 @@ public class Product extends AbstractStorelessUnivariateStatistic implements Ser * is empty. *

    * Throws IllegalArgumentException if the array is null.

    - * + * * @param values the input array * @param begin index of the first array element to include * @param length the number of elements to include @@ -126,7 +127,46 @@ public class Product extends AbstractStorelessUnivariateStatistic implements Ser } return product; } - + + /** + *

    Returns the weighted product of the entries in the specified portion of + * the input array, or Double.NaN if the designated subarray + * is empty.

    + * + *

    Throws IllegalArgumentException if any of the following are true: + *

    • the values array is null
    • + *
    • the weights array is null
    • + *
    • the weights array does not have the same length as the values array
    • + *
    • the weights array contains one or more infinite values
    • + *
    • the weights array contains one or more NaN values
    • + *
    • the weights array contains negative values
    • + *
    • the start and length arguments do not determine a valid array
    • + *

    + * + *

    Uses the formula,

    +     *    weighted product = ∏values[i]weights[i]
    +     * 
    + * that is, the weights are applied as exponents when computing the weighted product.

    + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the product of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) { + double product = Double.NaN; + if (test(values, weights, begin, length)) { + product = 1.0; + for (int i = begin; i < begin + length; i++) { + product *= Math.pow(values[i], weights[i]); + } + } + return product; + } + /** * {@inheritDoc} */ @@ -136,11 +176,11 @@ public class Product extends AbstractStorelessUnivariateStatistic implements Ser copy(this, result); return result; } - + /** * Copies source to dest. *

    Neither source nor dest can be null.

    - * + * * @param source Product to copy * @param dest Product to copy to * @throws NullPointerException if either source or dest is null diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java index 5eb61f138..ef41ac7eb 100644 --- a/src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java +++ b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java @@ -53,17 +53,17 @@ public class Sum extends AbstractStorelessUnivariateStatistic implements Seriali n = 0; value = Double.NaN; } - + /** * Copy constructor, creates a new {@code Sum} identical * to the {@code original} - * + * * @param original the {@code Sum} instance to copy */ public Sum(Sum original) { copy(original, this); } - + /** * {@inheritDoc} */ @@ -91,7 +91,7 @@ public class Sum extends AbstractStorelessUnivariateStatistic implements Seriali public long getN() { return n; } - + /** * {@inheritDoc} */ @@ -107,7 +107,7 @@ public class Sum extends AbstractStorelessUnivariateStatistic implements Seriali * is empty. *

    * Throws IllegalArgumentException if the array is null.

    - * + * * @param values the input array * @param begin index of the first array element to include * @param length the number of elements to include @@ -126,7 +126,45 @@ public class Sum extends AbstractStorelessUnivariateStatistic implements Seriali } return sum; } - + + /** + * The weighted sum of the entries in the specified portion of + * the input array, or Double.NaN if the designated subarray + * is empty. + *

    + * Throws IllegalArgumentException if any of the following are true: + *

    • the values array is null
    • + *
    • the weights array is null
    • + *
    • the weights array does not have the same length as the values array
    • + *
    • the weights array contains one or more infinite values
    • + *
    • the weights array contains one or more NaN values
    • + *
    • the weights array contains negative values
    • + *
    • the start and length arguments do not determine a valid array
    • + *

    + *

    + * Uses the formula,

    +     *    weighted sum = Σ(values[i] * weights[i])
    +     * 

    + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the sum of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) { + double sum = Double.NaN; + if (test(values, weights, begin, length)) { + sum = 0.0; + for (int i = begin; i < begin + length; i++) { + sum += (values[i] * weights[i]); + } + } + return sum; + } + /** * {@inheritDoc} */ @@ -136,11 +174,11 @@ public class Sum extends AbstractStorelessUnivariateStatistic implements Seriali copy(this, result); return result; } - + /** * Copies source to dest. *

    Neither source nor dest can be null.

    - * + * * @param source Sum to copy * @param dest Sum to copy to * @throws NullPointerException if either source or dest is null diff --git a/src/site/xdoc/changes.xml b/src/site/xdoc/changes.xml index 933cfa4db..03d676d88 100644 --- a/src/site/xdoc/changes.xml +++ b/src/site/xdoc/changes.xml @@ -39,6 +39,9 @@ The type attribute can be add,update,fix,remove. + + Added support for weighted descriptive statistics. + Added normalizeArray method to MathUtils. diff --git a/src/test/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatisticTest.java b/src/test/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatisticTest.java index aa20a9c70..1767275d4 100644 --- a/src/test/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatisticTest.java +++ b/src/test/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatisticTest.java @@ -40,6 +40,8 @@ public class AbstractUnivariateStatisticTest extends TestCase { } protected double[] testArray = {0, 1, 2, 3, 4, 5}; + protected double[] testWeightsArray = {0.3, 0.2, 1.3, 1.1, 1.0, 1.8}; + protected double[] testNegativeWeightsArray = {-0.3, 0.2, -1.3, 1.1, 1.0, 1.8}; protected double[] nullArray = null; protected double[] singletonArray = {0}; protected Mean testStatistic = new Mean(); @@ -85,6 +87,24 @@ public class AbstractUnivariateStatisticTest extends TestCase { fail("Expecting IllegalArgumentException"); } catch (IllegalArgumentException ex) { // expected - } - } + } + try { + testStatistic.test(testArray, nullArray, 0, 1); // null weights array + fail("Expecting IllegalArgumentException"); + } catch (IllegalArgumentException ex) { + // expected + } + try { + testStatistic.test(singletonArray, testWeightsArray, 0, 1); // weights.length != value.length + fail("Expecting IllegalArgumentException"); + } catch (IllegalArgumentException ex) { + // expected + } + try { + testStatistic.test(testArray, testNegativeWeightsArray, 0, 6); // can't have negative weights + fail("Expecting IllegalArgumentException"); + } catch (IllegalArgumentException ex) { + // expected + } + } } diff --git a/src/test/java/org/apache/commons/math/stat/descriptive/UnivariateStatisticAbstractTest.java b/src/test/java/org/apache/commons/math/stat/descriptive/UnivariateStatisticAbstractTest.java index 6ba8f6b03..1845d1621 100644 --- a/src/test/java/org/apache/commons/math/stat/descriptive/UnivariateStatisticAbstractTest.java +++ b/src/test/java/org/apache/commons/math/stat/descriptive/UnivariateStatisticAbstractTest.java @@ -5,10 +5,10 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software + * +s * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and @@ -16,8 +16,16 @@ */ package org.apache.commons.math.stat.descriptive; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.List; + import junit.framework.TestCase; +import org.apache.commons.math.TestUtils; +import org.apache.commons.math.random.RandomData; +import org.apache.commons.math.random.RandomDataImpl; + /** * Test cases for the {@link UnivariateStatistic} class. * @version $Revision$ $Date$ @@ -46,12 +54,35 @@ public abstract class UnivariateStatisticAbstractTest extends TestCase { protected double thirdMoment = 868.0906859504136; protected double fourthMoment = 9244.080993773481; + + protected double weightedMean = 12.366995073891626d; + protected double weightedVar = 9.974760968886391d; + protected double weightedStd = Math.sqrt(weightedVar); + protected double weightedProduct = 8517647448765288000000d; + protected double weightedSum = 251.05d; + protected double tolerance = 10E-12; protected double[] testArray = - {12.5, 12, 11.8, 14.2, 14.9, 14.5, 21, 8.2, 10.3, 11.3, - 14.1, 9.9, 12.2, 12, 12.1, 11, 19.8, 11, 10, 8.8, - 9, 12.3 }; + { 12.5, 12.0, 11.8, 14.2, 14.9, 14.5, 21.0, 8.2, 10.3, 11.3, + 14.1, 9.9, 12.2, 12.0, 12.1, 11.0, 19.8, 11.0, 10.0, 8.8, + 9.0, 12.3 }; + + protected double[] testWeightsArray = + { 1.5, 0.8, 1.2, 0.4, 0.8, 1.8, 1.2, 1.1, 1.0, 0.7, + 1.3, 0.6, 0.7, 1.3, 0.7, 1.0, 0.4, 0.1, 1.4, 0.9, + 1.1, 0.3 }; + + protected double[] identicalWeightsArray = + { 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5 }; + + protected double[] unitWeightsArray = + { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0 }; + public UnivariateStatisticAbstractTest(String name) { super(name); @@ -65,13 +96,13 @@ public abstract class UnivariateStatisticAbstractTest extends TestCase { return tolerance; } - public void testEvaluation() throws Exception { + public void testEvaluation() throws Exception { assertEquals( expectedValue(), getUnivariateStatistic().evaluate(testArray), getTolerance()); } - + public void testCopy() throws Exception { UnivariateStatistic original = getUnivariateStatistic(); UnivariateStatistic copy = original.copy(); @@ -81,4 +112,70 @@ public abstract class UnivariateStatisticAbstractTest extends TestCase { getTolerance()); } + /** + * Tests consistency of weighted statistic computation. + * For statistics that support weighted evaluation, this test case compares + * the result of direct computation on an array with repeated values with + * a weighted computation on the corresponding (shorter) array with each + * value appearing only once but with a weight value equal to its multiplicity + * in the repeating array. + */ + + public void testWeightedConsistency() throws Exception { + + // See if this statistic computes weighted statistics + // If not, skip this test + UnivariateStatistic statistic = getUnivariateStatistic(); + Method evaluateMethod = null; + try { + evaluateMethod = statistic.getClass().getDeclaredMethod("evaluate", + double[].class, double[].class, int.class, int.class); + } catch (NoSuchMethodException ex) { + return; // skip test + } + + // Create arrays of values and corresponding integral weights + // and longer array with values repeated according to the weights + final int len = 10; // length of values array + final double mu = 0; // mean of test data + final double sigma = 5; // std dev of test data + double[] values = new double[len]; + double[] weights = new double[len]; + RandomData randomData = new RandomDataImpl(); + + // Fill weights array with random int values between 1 and 5 + int[] intWeights = new int[len]; + for (int i = 0; i < len; i++) { + intWeights[i] = randomData.nextInt(1, 5); + weights[i] = intWeights[i]; + } + + // Fill values array with random data from N(mu, sigma) + // and fill valuesList with values from values array with + // values[i] repeated weights[i] times, each i + List valuesList = new ArrayList(); + for (int i = 0; i < len; i++) { + double value = randomData.nextGaussian(mu, sigma); + values[i] = value; + for (int j = 0; j < intWeights[i]; j++) { + valuesList.add(new Double(value)); + } + } + + // Dump valuesList into repeatedValues array + int sumWeights = valuesList.size(); + double[] repeatedValues = new double[sumWeights]; + for (int i = 0; i < sumWeights; i++) { + repeatedValues[i] = valuesList.get(i); + } + + // Compare result of weighted statistic computation with direct computation + // on array of repeated values + double weightedResult = (Double) evaluateMethod.invoke( + statistic, values, weights, 0, values.length); + TestUtils.assertRelativelyEquals( + statistic.evaluate(repeatedValues), weightedResult, 10E-14); + + } + } diff --git a/src/test/java/org/apache/commons/math/stat/descriptive/moment/MeanTest.java b/src/test/java/org/apache/commons/math/stat/descriptive/moment/MeanTest.java index 90d481a9b..954908667 100644 --- a/src/test/java/org/apache/commons/math/stat/descriptive/moment/MeanTest.java +++ b/src/test/java/org/apache/commons/math/stat/descriptive/moment/MeanTest.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,7 +29,7 @@ import org.apache.commons.math.stat.descriptive.UnivariateStatistic; public class MeanTest extends StorelessUnivariateStatisticAbstractTest{ protected Mean stat; - + /** * @param name */ @@ -42,7 +42,7 @@ public class MeanTest extends StorelessUnivariateStatisticAbstractTest{ suite.setName("Mean Tests"); return suite; } - + /** * {@inheritDoc} */ @@ -58,7 +58,12 @@ public class MeanTest extends StorelessUnivariateStatisticAbstractTest{ public double expectedValue() { return this.mean; } - + + /**Expected value for the testArray defined in UnivariateStatisticAbstractTest */ + public double expectedWeightedValue() { + return this.weightedMean; + } + public void testSmallSamples() { Mean mean = new Mean(); assertTrue(Double.isNaN(mean.getResult())); @@ -66,4 +71,10 @@ public class MeanTest extends StorelessUnivariateStatisticAbstractTest{ assertEquals(1d, mean.getResult(), 0); } + public void testWeightedMean() { + Mean mean = new Mean(); + assertEquals(expectedWeightedValue(), mean.evaluate(testArray, testWeightsArray, 0, testArray.length), getTolerance()); + assertEquals(expectedValue(), mean.evaluate(testArray, identicalWeightsArray, 0, testArray.length), getTolerance()); + } + } diff --git a/src/test/java/org/apache/commons/math/stat/descriptive/moment/VarianceTest.java b/src/test/java/org/apache/commons/math/stat/descriptive/moment/VarianceTest.java index 143df1f86..29846b62d 100644 --- a/src/test/java/org/apache/commons/math/stat/descriptive/moment/VarianceTest.java +++ b/src/test/java/org/apache/commons/math/stat/descriptive/moment/VarianceTest.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import junit.framework.TestSuite; import org.apache.commons.math.stat.descriptive.StorelessUnivariateStatisticAbstractTest; import org.apache.commons.math.stat.descriptive.UnivariateStatistic; +import org.apache.commons.math.util.MathUtils; /** * Test cases for the {@link UnivariateStatistic} class. @@ -51,7 +52,7 @@ public class VarianceTest extends StorelessUnivariateStatisticAbstractTest{ suite.setName("Variance Tests"); return suite; } - + /** * {@inheritDoc} */ @@ -59,7 +60,12 @@ public class VarianceTest extends StorelessUnivariateStatisticAbstractTest{ public double expectedValue() { return this.var; } - + + /**Expected value for the testArray defined in UnivariateStatisticAbstractTest */ + public double expectedWeightedValue() { + return this.weightedVar; + } + /** * Make sure Double.NaN is returned iff n = 0 * @@ -70,10 +76,10 @@ public class VarianceTest extends StorelessUnivariateStatisticAbstractTest{ std.increment(1d); assertEquals(0d, std.getResult(), 0); } - + /** * Test population version of variance - */ + */ public void testPopulation() { double[] values = {-1.0d, 3.1d, 4.0d, -2.1d, 22d, 11.7d, 3d, 14d}; SecondMoment m = new SecondMoment(); @@ -84,13 +90,13 @@ public class VarianceTest extends StorelessUnivariateStatisticAbstractTest{ v1.incrementAll(values); assertEquals(populationVariance(values), v1.getResult(), 1E-14); v1 = new Variance(false, m); - assertEquals(populationVariance(values), v1.getResult(), 1E-14); + assertEquals(populationVariance(values), v1.getResult(), 1E-14); v1 = new Variance(false); assertEquals(populationVariance(values), v1.evaluate(values), 1E-14); v1.incrementAll(values); - assertEquals(populationVariance(values), v1.getResult(), 1E-14); + assertEquals(populationVariance(values), v1.getResult(), 1E-14); } - + /** * Definitional formula for population variance */ @@ -98,9 +104,26 @@ public class VarianceTest extends StorelessUnivariateStatisticAbstractTest{ double mean = new Mean().evaluate(v); double sum = 0; for (int i = 0; i < v.length; i++) { - sum += (v[i] - mean) * (v[i] - mean); + sum += (v[i] - mean) * (v[i] - mean); } return sum / v.length; } + public void testWeightedVariance() { + Variance variance = new Variance(); + assertEquals(expectedWeightedValue(), + variance.evaluate(testArray, testWeightsArray, 0, testArray.length), getTolerance()); + + // All weights = 1 -> weighted variance = unweighted variance + assertEquals(expectedValue(), + variance.evaluate(testArray, unitWeightsArray, 0, testArray.length), getTolerance()); + + // All weights the same -> when weights are normalized to sum to the length of the values array, + // weighted variance = unweighted value + assertEquals(expectedValue(), + variance.evaluate(testArray, MathUtils.normalizeArray(identicalWeightsArray, testArray.length), + 0, testArray.length), getTolerance()); + + } + } diff --git a/src/test/java/org/apache/commons/math/stat/descriptive/summary/ProductTest.java b/src/test/java/org/apache/commons/math/stat/descriptive/summary/ProductTest.java index d6c8b709c..ef379b69a 100644 --- a/src/test/java/org/apache/commons/math/stat/descriptive/summary/ProductTest.java +++ b/src/test/java/org/apache/commons/math/stat/descriptive/summary/ProductTest.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,7 +29,7 @@ import org.apache.commons.math.stat.descriptive.UnivariateStatistic; public class ProductTest extends StorelessUnivariateStatisticAbstractTest{ protected Product stat; - + /** * @param name */ @@ -42,7 +42,7 @@ public class ProductTest extends StorelessUnivariateStatisticAbstractTest{ suite.setName("Product Tests"); return suite; } - + /** * {@inheritDoc} */ @@ -58,7 +58,7 @@ public class ProductTest extends StorelessUnivariateStatisticAbstractTest{ public double getTolerance() { return 10E8; //sic -- big absolute error due to only 15 digits of accuracy in double } - + /** * {@inheritDoc} */ @@ -66,7 +66,12 @@ public class ProductTest extends StorelessUnivariateStatisticAbstractTest{ public double expectedValue() { return this.product; } - + + /**Expected value for the testArray defined in UnivariateStatisticAbstractTest */ + public double expectedWeightedValue() { + return this.weightedProduct; + } + public void testSpecialValues() { Product product = new Product(); assertTrue(Double.isNaN(product.getResult())); @@ -77,9 +82,15 @@ public class ProductTest extends StorelessUnivariateStatisticAbstractTest{ product.increment(Double.NEGATIVE_INFINITY); assertEquals(Double.NEGATIVE_INFINITY, product.getResult(), 0); product.increment(Double.NaN); - assertTrue(Double.isNaN(product.getResult())); + assertTrue(Double.isNaN(product.getResult())); product.increment(1); - assertTrue(Double.isNaN(product.getResult())); + assertTrue(Double.isNaN(product.getResult())); + } + + public void testWeightedProduct() { + Product product = new Product(); + assertEquals(expectedWeightedValue(), product.evaluate(testArray, testWeightsArray, 0, testArray.length),getTolerance()); + assertEquals(expectedValue(), product.evaluate(testArray, unitWeightsArray, 0, testArray.length), getTolerance()); } } diff --git a/src/test/java/org/apache/commons/math/stat/descriptive/summary/SumTest.java b/src/test/java/org/apache/commons/math/stat/descriptive/summary/SumTest.java index c31fc4d2d..d7cd2493b 100644 --- a/src/test/java/org/apache/commons/math/stat/descriptive/summary/SumTest.java +++ b/src/test/java/org/apache/commons/math/stat/descriptive/summary/SumTest.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,7 +29,7 @@ import org.apache.commons.math.stat.descriptive.UnivariateStatistic; public class SumTest extends StorelessUnivariateStatisticAbstractTest{ protected Sum stat; - + /** * @param name */ @@ -42,13 +42,13 @@ public class SumTest extends StorelessUnivariateStatisticAbstractTest{ suite.setName("Sum Tests"); return suite; } - + /** * {@inheritDoc} */ @Override public UnivariateStatistic getUnivariateStatistic() { - return new Sum(); + return new Sum(); } /** @@ -58,7 +58,12 @@ public class SumTest extends StorelessUnivariateStatisticAbstractTest{ public double expectedValue() { return this.sum; } - + + /**Expected value for the testArray defined in UnivariateStatisticAbstractTest */ + public double expectedWeightedValue() { + return this.weightedSum; + } + public void testSpecialValues() { Sum sum = new Sum(); assertTrue(Double.isNaN(sum.getResult())); @@ -69,7 +74,13 @@ public class SumTest extends StorelessUnivariateStatisticAbstractTest{ sum.increment(Double.NEGATIVE_INFINITY); assertTrue(Double.isNaN(sum.getResult())); sum.increment(1); - assertTrue(Double.isNaN(sum.getResult())); + assertTrue(Double.isNaN(sum.getResult())); + } + + public void testWeightedSum() { + Sum sum = new Sum(); + assertEquals(expectedWeightedValue(), sum.evaluate(testArray, testWeightsArray, 0, testArray.length), getTolerance()); + assertEquals(expectedValue(), sum.evaluate(testArray, unitWeightsArray, 0, testArray.length), getTolerance()); } }