From 78ec9dc5b58501b990364d3a9803d2d9a68e873b Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Sat, 19 Jun 2004 21:14:54 +0000 Subject: [PATCH] Enforced bounds on quantile, improved javadoc. git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@141324 13f79535-47bb-0310-9956-ffa450edef68 --- .../math/stat/univariate/rank/Percentile.java | 137 +++++++++++++----- 1 file changed, 97 insertions(+), 40 deletions(-) diff --git a/src/java/org/apache/commons/math/stat/univariate/rank/Percentile.java b/src/java/org/apache/commons/math/stat/univariate/rank/Percentile.java index 38509e9b3..a50128c06 100644 --- a/src/java/org/apache/commons/math/stat/univariate/rank/Percentile.java +++ b/src/java/org/apache/commons/math/stat/univariate/rank/Percentile.java @@ -22,20 +22,49 @@ import org.apache.commons.math.stat.univariate.AbstractUnivariateStatistic; /** * Provides percentile computation. *

- * There are several commonly used methods for estimating percentiles (a.k.a. quantiles) based - * on sample data. For large samples, the different methods agree closely, but when sample sizes - * are small, different methods will give significantly different results. The implementation provided here - * follows the first estimation procedure presented - * here. + * There are several commonly used methods for estimating percentiles (a.k.a. + * quantiles) based on sample data. For large samples, the different methods + * agree closely, but when sample sizes are small, different methods will give + * significantly different results. The algorithm implemented here works as follows: + *

    + *
  1. Let n be the length of the (sorted) array and + * 0 < p <= 100 be the desired percentile.
  2. + *
  3. If n = 1 return the unique array element (regardless of + * the value of p); otherwise
  4. + *
  5. Compute the estimated percentile position + * pos = p * (n + 1) / 100 and the difference, d + * between pos and floor(pos) (i.e. the fractional + * part of pos). If pos >= n return the largest + * element in the array; otherwise
  6. + *
  7. Let lower be the element in position + * floor(pos) in the array and let upper be the + * next element in the array. Return lower + d * (upper - lower) + *
  8. + *
+ *

* - * @version $Revision: 1.19 $ $Date: 2004/05/19 14:16:32 $ + * To compute percentiles, the data must be (totally) ordered. Input arrays + * are copied and then sorted using {@link java.util.Arrays#sort(double[])}. + * The ordering used by Arrays.sort(double[] is the one determined + * by {@link java.lang.Double#compareTo(Double)}. This ordering makes + * Double.NaN larger than any other value (including + * Double.POSITIVE_INFINITY). Therefore, for example, the median + * (50th percentile) of + * {0, 1, 2, 3, 4, Double.NaN} evaluates to 2.5. + *

+ * Since percentile estimation usually involves interpolation between array + * elements, arrays containing NaN or infinite values will often + * result in NaN or infinite values returned. + * + * @version $Revision: 1.20 $ $Date: 2004/06/19 21:14:54 $ */ public class Percentile extends AbstractUnivariateStatistic implements Serializable { /** Serializable version identifier */ static final long serialVersionUID = -8091216485095130416L; - /** Determines what percentile is computed when evaluate() is activated with no quantile argument */ + /** Determines what percentile is computed when evaluate() is activated + * with no quantile argument */ private double quantile = 0.0; /** @@ -49,9 +78,11 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa /** * Constructs a Percentile with the specific quantile value. * @param p the quantile + * @throws IllegalArgumentException if p is not greater than 0 and less + * than or equal to 100 */ public Percentile(final double p) { - this.quantile = p; + setQuantile(p); } /** @@ -61,12 +92,22 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa * Calls to this method do not modify the internal quantile * state of this statistic. *

- * See {@link Percentile} for a description of the percentile estimation algorithm used. + *

+ *

+ * See {@link Percentile} for a description of the percentile estimation + * algorithm used. * * @param values Is a double[] containing the values * @param p Is the quantile to evaluate to. - * @return the result of the evaluation or Double.NaN - * if the array is empty + * @return the result of the evaluation or Double.NaN if the array is empty + * @throws IllegalArgumentException if values is null */ public double evaluate(final double[] values, final double p) { test(values, 0, 0); @@ -74,24 +115,30 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa } /** - * Returns an estimate of the quantileth percentile of the values - * in the values array. The quantile estimated is determined by - * the quantile property. + * Returns an estimate of the quantileth percentile of the + * values in the values array. The quantile estimated is + * determined by the quantile property. *

- * See {@link Percentile} for a description of the percentile estimation algorithm used. + *

    + *
  • Returns Double.NaN if length = 0
  • + *
  • Returns (for any value of quantile) + * values[begin] if length = 1
  • + *
  • Throws IllegalArgumentException if values + * is null, or start or length + * is invalid
  • + *
+ *

+ * See {@link Percentile} for a description of the percentile estimation + * algorithm used. * * @param values array of input values * @param start the first (0-based) element to include in the computation * @param length the number of array elements to include - * @return the result of the evaluation or Double.NaN - * if the array is empty + * @return the percentile value + * @throws IllegalArgumentException if the parameters are not valid * */ - public double evaluate( - final double[] values, - final int start, - final int length) { - + public double evaluate( final double[] values, final int start, final int length) { return evaluate(values, start, length, quantile); } @@ -104,27 +151,32 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa * Calls to this method do not modify the internal quantile * state of this statistic. *

- * See {@link Percentile} for a description of the percentile estimation algorithm used. + *

    + *
  • Returns Double.NaN if length = 0
  • + *
  • Returns (for any value of p) values[begin] + * if length = 1
  • + *
  • Throws IllegalArgumentException if values + * is null , begin or length is invalid, or + * p is not a valid quantile value
  • + *
+ *

+ * See {@link Percentile} for a description of the percentile estimation + * algorithm used. * - * @param values Is a double[] containing the values - * @param p Is the quantile to evaluate to. + * @param values array of input values + * @param p the percentile to compute * @param begin the first (0-based) element to include in the computation * @param length the number of array elements to include - * @return the result of the evaluation or Double.NaN - * if the array is empty - * @throws IllegalArgumentException if p is not a valid - * quantile. + * @return the percentile value + * @throws IllegalArgumentException if the parameters are not valid */ - public double evaluate( - final double[] values, - final int begin, - final int length, - final double p) { + public double evaluate(final double[] values, final int begin, + final int length, final double p) { test(values, begin, length); if ((p > 100) || (p <= 0)) { - throw new IllegalArgumentException("invalid quantile value"); + throw new IllegalArgumentException("invalid quantile value: " + p); } double n = (double) length; if (n == 0) { @@ -153,8 +205,8 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa } /** - * Returns the value of the quantile field (determines what percentile is computed when evaluate() - * is called with no quantile argument) + * Returns the value of the quantile field (determines what percentile is + * computed when evaluate() is called with no quantile argument). * * @return quantile */ @@ -163,12 +215,17 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa } /** - * Sets the value of the quantile field (determines what percentile is computed when evaluate() - * is called with no quantile argument) + * Sets the value of the quantile field (determines what percentile is + * computed when evaluate() is called with no quantile argument). * - * @param p a value between 0 <= p <= 100 + * @param p a value between 0 < p <= 100 + * @throws IllegalArgumentException if p is not greater than 0 and less + * than or equal to 100 */ public void setQuantile(final double p) { + if (p <= 0 || p > 100) { + throw new IllegalArgumentException("Illegal quantile value: " + p); + } quantile = p; }