Enforced bounds on quantile, improved javadoc.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@141324 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2004-06-19 21:14:54 +00:00
parent a39d48f1e5
commit 78ec9dc5b5
1 changed files with 97 additions and 40 deletions

View File

@ -22,20 +22,49 @@ import org.apache.commons.math.stat.univariate.AbstractUnivariateStatistic;
/** /**
* Provides percentile computation. * Provides percentile computation.
* <p> * <p>
* There are several commonly used methods for estimating percentiles (a.k.a. quantiles) based * There are several commonly used methods for estimating percentiles (a.k.a.
* on sample data. For large samples, the different methods agree closely, but when sample sizes * quantiles) based on sample data. For large samples, the different methods
* are small, different methods will give significantly different results. The implementation provided here * agree closely, but when sample sizes are small, different methods will give
* follows the first estimation procedure presented * significantly different results. The algorithm implemented here works as follows:
* <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> * <ol>
* <li>Let <code>n</code> be the length of the (sorted) array and
* <code>0 < p <= 100</code> be the desired percentile.</li>
* <li>If <code> n = 1 </code> return the unique array element (regardless of
* the value of <code>p</code>); otherwise </li>
* <li>Compute the estimated percentile position
* <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
* between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
* part of <code>pos</code>). If <code>pos >= n</code> return the largest
* element in the array; otherwise</li>
* <li>Let <code>lower</code> be the element in position
* <code>floor(pos)</code> in the array and let <code>upper</code> be the
* next element in the array. Return <code>lower + d * (upper - lower)</code>
* </li>
* </ol>
* <p>
* *
* @version $Revision: 1.19 $ $Date: 2004/05/19 14:16:32 $ * To compute percentiles, the data must be (totally) ordered. Input arrays
* are copied and then sorted using {@link java.util.Arrays#sort(double[])}.
* The ordering used by <code>Arrays.sort(double[]</code> is the one determined
* by {@link java.lang.Double#compareTo(Double)}. This ordering makes
* <code>Double.NaN</code> larger than any other value (including
* <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
* (50th percentile) of
* <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code>
* <p>
* Since percentile estimation usually involves interpolation between array
* elements, arrays containing <code>NaN</code> or infinite values will often
* result in <code>NaN<code> or infinite values returned.
*
* @version $Revision: 1.20 $ $Date: 2004/06/19 21:14:54 $
*/ */
public class Percentile extends AbstractUnivariateStatistic implements Serializable { public class Percentile extends AbstractUnivariateStatistic implements Serializable {
/** Serializable version identifier */ /** Serializable version identifier */
static final long serialVersionUID = -8091216485095130416L; static final long serialVersionUID = -8091216485095130416L;
/** Determines what percentile is computed when evaluate() is activated with no quantile argument */ /** Determines what percentile is computed when evaluate() is activated
* with no quantile argument */
private double quantile = 0.0; private double quantile = 0.0;
/** /**
@ -49,9 +78,11 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa
/** /**
* Constructs a Percentile with the specific quantile value. * Constructs a Percentile with the specific quantile value.
* @param p the quantile * @param p the quantile
* @throws IllegalArgumentException if p is not greater than 0 and less
* than or equal to 100
*/ */
public Percentile(final double p) { public Percentile(final double p) {
this.quantile = p; setQuantile(p);
} }
/** /**
@ -61,12 +92,22 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa
* Calls to this method do not modify the internal <code>quantile</code> * Calls to this method do not modify the internal <code>quantile</code>
* state of this statistic. * state of this statistic.
* <p> * <p>
* See {@link Percentile} for a description of the percentile estimation algorithm used. * <ul>
* <li>Returns <code>Double.NaN</code> if <code>values</code> has length
* <code>0</code></li>
* <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
* if <code>values</code> has length <code>1</code></li>
* <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
* is null </li>
* </ul>
* <p>
* See {@link Percentile} for a description of the percentile estimation
* algorithm used.
* *
* @param values Is a double[] containing the values * @param values Is a double[] containing the values
* @param p Is the quantile to evaluate to. * @param p Is the quantile to evaluate to.
* @return the result of the evaluation or Double.NaN * @return the result of the evaluation or Double.NaN if the array is empty
* if the array is empty * @throws IllegalArgumentException if <code>values</code> is null
*/ */
public double evaluate(final double[] values, final double p) { public double evaluate(final double[] values, final double p) {
test(values, 0, 0); test(values, 0, 0);
@ -74,24 +115,30 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa
} }
/** /**
* Returns an estimate of the <code>quantile</code>th percentile of the values * Returns an estimate of the <code>quantile</code>th percentile of the
* in the <code>values</code> array. The quantile estimated is determined by * values in the <code>values</code> array. The quantile estimated is
* the <code>quantile</code> property. * determined by the <code>quantile</code> property.
* <p> * <p>
* See {@link Percentile} for a description of the percentile estimation algorithm used. * <ul>
* <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
* <li>Returns (for any value of <code>quantile</code>)
* <code>values[begin]</code> if <code>length = 1 </code></li>
* <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
* is null, or <code>start</code> or <code>length</code>
* is invalid</li>
* </ul>
* <p>
* See {@link Percentile} for a description of the percentile estimation
* algorithm used.
* *
* @param values array of input values * @param values array of input values
* @param start the first (0-based) element to include in the computation * @param start the first (0-based) element to include in the computation
* @param length the number of array elements to include * @param length the number of array elements to include
* @return the result of the evaluation or Double.NaN * @return the percentile value
* if the array is empty * @throws IllegalArgumentException if the parameters are not valid
* *
*/ */
public double evaluate( public double evaluate( final double[] values, final int start, final int length) {
final double[] values,
final int start,
final int length) {
return evaluate(values, start, length, quantile); return evaluate(values, start, length, quantile);
} }
@ -104,27 +151,32 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa
* Calls to this method do not modify the internal <code>quantile</code> * Calls to this method do not modify the internal <code>quantile</code>
* state of this statistic. * state of this statistic.
* <p> * <p>
* See {@link Percentile} for a description of the percentile estimation algorithm used. * <ul>
* <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
* <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
* if <code>length = 1 </code></li>
* <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
* is null , <code>begin</code> or <code>length</code> is invalid, or
* <code>p</code> is not a valid quantile value</li>
* </ul>
* <p>
* See {@link Percentile} for a description of the percentile estimation
* algorithm used.
* *
* @param values Is a double[] containing the values * @param values array of input values
* @param p Is the quantile to evaluate to. * @param p the percentile to compute
* @param begin the first (0-based) element to include in the computation * @param begin the first (0-based) element to include in the computation
* @param length the number of array elements to include * @param length the number of array elements to include
* @return the result of the evaluation or Double.NaN * @return the percentile value
* if the array is empty * @throws IllegalArgumentException if the parameters are not valid
* @throws IllegalArgumentException if <code>p</code> is not a valid
* quantile.
*/ */
public double evaluate( public double evaluate(final double[] values, final int begin,
final double[] values, final int length, final double p) {
final int begin,
final int length,
final double p) {
test(values, begin, length); test(values, begin, length);
if ((p > 100) || (p <= 0)) { if ((p > 100) || (p <= 0)) {
throw new IllegalArgumentException("invalid quantile value"); throw new IllegalArgumentException("invalid quantile value: " + p);
} }
double n = (double) length; double n = (double) length;
if (n == 0) { if (n == 0) {
@ -153,8 +205,8 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa
} }
/** /**
* Returns the value of the quantile field (determines what percentile is computed when evaluate() * Returns the value of the quantile field (determines what percentile is
* is called with no quantile argument) * computed when evaluate() is called with no quantile argument).
* *
* @return quantile * @return quantile
*/ */
@ -163,12 +215,17 @@ public class Percentile extends AbstractUnivariateStatistic implements Serializa
} }
/** /**
* Sets the value of the quantile field (determines what percentile is computed when evaluate() * Sets the value of the quantile field (determines what percentile is
* is called with no quantile argument) * computed when evaluate() is called with no quantile argument).
* *
* @param p a value between 0 <= p <= 100 * @param p a value between 0 < p <= 100
* @throws IllegalArgumentException if p is not greater than 0 and less
* than or equal to 100
*/ */
public void setQuantile(final double p) { public void setQuantile(final double p) {
if (p <= 0 || p > 100) {
throw new IllegalArgumentException("Illegal quantile value: " + p);
}
quantile = p; quantile = p;
} }