Added implementation of PSquare algorithm to estimate percentiles without

storing data in memory (i.e. as StorelessUnivariateStatistic).

JIRA: MATH-418
Contributed by: Venkatesha Murthy



git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1604443 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2014-06-21 18:14:14 +00:00
parent 44b5c55e07
commit 7b3e0d4297
4 changed files with 1764 additions and 0 deletions

View File

@ -257,6 +257,9 @@
<contributor>
<name>J. Lewis Muir</name>
</contributor>
<contributor>
<name>Venkatesha Murthy</name>
</contributor>
<contributor>
<name>Christopher Nix</name>
</contributor>

View File

@ -73,6 +73,10 @@ Users are encouraged to upgrade to this version as this release not
2. A few methods in the FastMath class are in fact slower that their
counterpart in either Math or StrictMath (cf. MATH-740 and MATH-901).
">
<action dev="psteitz" type="add" issue="MATH-418" due-to="Venkatesha Murthy">
Added implementation of PSquare algorithm to estimate percentiles without
storing data in memory (i.e. as StorelessUnivariateStatistic).
</action>
<action dev="erans" type="fix" issue="MATH-1129">
"Percentile": wrong sorting in the presence of NaN.
</action>

View File

@ -0,0 +1,997 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.stat.descriptive.rank;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.commons.math3.analysis.UnivariateFunction;
import org.apache.commons.math3.analysis.interpolation.LinearInterpolator;
import org.apache.commons.math3.analysis.interpolation.NevilleInterpolator;
import org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator;
import org.apache.commons.math3.exception.InsufficientDataException;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic;
import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic;
import org.apache.commons.math3.util.MathArrays;
import org.apache.commons.math3.util.MathUtils;
import org.apache.commons.math3.util.Precision;
/**
* A {@link StorelessUnivariateStatistic} estimating percentiles using the
* <ahref=http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf>P<SUP>2</SUP></a>
* Algorithm as explained by <a href=http://www.cse.wustl.edu/~jain/>Raj
* Jain</a> and Imrich Chlamtac in
* <a href=http://www.cse.wustl.edu/~jain/papers/psqr.htm>P<SUP>2</SUP> Algorithm
* for Dynamic Calculation of Quantiles and Histogram Without Storing
* Observations</a>.
* <p>
* Note: This implementation is not synchronized and produces an approximate
* result. For small samples, where data can be stored and processed in memory,
* {@link Percentile} should be used.</p>
*
* @version $Id$
*/
public class PSquarePercentile extends AbstractStorelessUnivariateStatistic
implements StorelessUnivariateStatistic, Serializable {
/**
* The maximum array size used for psquare algorithm
*/
private static final int PSQUARE_CONSTANT = 5;
/**
* A Default quantile needed in case if user prefers to use default no
* argument constructor.
*/
private static final double DEFAULT_QUANTILE_DESIRED = 50d;
/**
* Serial ID
*/
private static final long serialVersionUID = 2283912083175715479L;
/**
* A decimal formatter for print convenience
*/
private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat(
"00.00");
/**
* Initial list of 5 numbers corresponding to 5 markers. <b>NOTE:</b>watch
* out for the add methods that are overloaded
*/
private final List<Double> initialFive = new FixedCapacityList<Double>(
PSQUARE_CONSTANT);
/**
* The quantile needed should be in range of 0-1. The constructor
* {@link #PSquarePercentile(double)} ensures that passed in percentile is
* divide by 100
*/
private final double quantile;
/**
* lastObservation is the last observation value/input sample. No need to
* serialize
*/
private transient double lastObservation;
/**
* Markers is the marker collection object which comes to effect
* only after 5 values are inserted
*/
private PSquareMarkers markers = null;
/**
* Computed p value (i,e percentile value of data set hither to received)
*/
private double pValue = Double.NaN;
/**
* Counter to count the values/observations accepted into this data set
*/
private long countOfObservations;
/**
* Constructs a PSquarePercentile with the specific percentile value.
* @param p the percentile
* @throws OutOfRangeException if p is not greater than 0 and less
* than or equal to 100
*/
public PSquarePercentile(final double p) {
if (p > 100 || p < 0) {
throw new OutOfRangeException(LocalizedFormats.OUT_OF_RANGE,
p, 0, 100);
}
this.quantile = p / 100d;// always set it within (0,1]
}
/**
* Default constructor that assumes a {@link #DEFAULT_QUANTILE_DESIRED
* default quantile} needed
*/
PSquarePercentile() {
this(DEFAULT_QUANTILE_DESIRED);
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
double result = getResult();
result = Double.isNaN(result) ? 37 : result;
final double markersHash = markers == null ? 0 : markers.hashCode();
final double[] toHash = {result, quantile, markersHash, countOfObservations};
return Arrays.hashCode(toHash);
}
/**
* Returns true iff {@code o} is a {@code PSquarePercentile} returning the
* same values as this for {@code getResult()} and {@code getN()} and also
* having equal markers
*
* @param o object to compare
* @return true if {@code o} is a {@code PSquarePercentile} with
* equivalent internal state
*/
@Override
public boolean equals(Object o) {
boolean result = false;
if (this == o) {
result = true;
} else if (o != null && o instanceof PSquarePercentile) {
PSquarePercentile that = (PSquarePercentile) o;
boolean isNotNull = markers != null && that.markers != null;
boolean isNull = markers == null && that.markers == null;
result = isNotNull ? markers.equals(that.markers) : isNull;
// markers as in the case of first
// five observations
result = result && getN() == that.getN();
}
return result;
}
/**
* {@inheritDoc}The internal state updated due to the new value in this
* context is basically of the marker positions and computation of the
* approximate quantile.
*
* @param observation the observation currently being added.
*/
@Override
public void increment(final double observation) {
// Increment counter
countOfObservations++;
// Store last observation
this.lastObservation = observation;
// 0. Use Brute force for <5
if (markers == null) {
if (initialFive.add(observation)) {
Collections.sort(initialFive);
pValue =
initialFive
.get((int) (quantile * (initialFive.size() - 1)));
return;
}
// 1. Initialize once after 5th observation
markers = newMarkers(initialFive, quantile);
}
// 2. process a Data Point and return pValue
pValue = markers.processDataPoint(observation);
}
/**
* Returns a string containing the last observation, the current estimate
* of the quantile and all markers.
*
* @return string representation of state data
*/
@Override
public String toString() {
if (markers == null) {
return String.format("obs=%s pValue=%s",
DECIMAL_FORMAT.format(lastObservation),
DECIMAL_FORMAT.format(pValue));
} else {
return String.format("obs=%s markers=%s",
DECIMAL_FORMAT.format(lastObservation), markers.toString());
}
}
/**
* {@inheritDoc}
*/
public long getN() {
return countOfObservations;
}
/**
* {@inheritDoc}
*/
@Override
public StorelessUnivariateStatistic copy() {
// multiply quantile by 100 now as anyway constructor divides it by 100
PSquarePercentile copy = new PSquarePercentile(100d * quantile);
if (markers != null) {
copy.markers = (PSquareMarkers) markers.clone();
}
copy.countOfObservations = countOfObservations;
copy.pValue = pValue;
copy.initialFive.clear();
copy.initialFive.addAll(initialFive);
return copy;
}
/**
* Returns the quantile estimated by this statistic in the range [0.0-1.0]
*
* @return quantile estimated by {@link #getResult()}
*/
public double quantile() {
return quantile;
}
/**
* {@inheritDoc}. This basically clears all the markers, the
* initialFive list and sets countOfObservations to 0.
*/
@Override
public void clear() {
markers = null;
initialFive.clear();
countOfObservations = 0L;
pValue = Double.NaN;
}
/**
* {@inheritDoc}
*/
@Override
public double getResult() {
if (Double.compare(quantile, 1d) == 0) {
pValue = maximum();
} else if (Double.compare(quantile, 0d) == 0) {
pValue = minimum();
}
return pValue;
}
/**
* @return maximum in the data set added to this statistic
*/
private double maximum() {
double val = Double.NaN;
if (markers != null) {
val = markers.height(PSQUARE_CONSTANT);
} else if (!initialFive.isEmpty()) {
val = initialFive.get(initialFive.size() - 1);
}
return val;
}
/**
* @return minimum in the data set added to this statistic
*/
private double minimum() {
double val = Double.NaN;
if (markers != null) {
val = markers.height(1);
} else if (!initialFive.isEmpty()) {
val = initialFive.get(0);
}
return val;
}
/**
* Markers is an encapsulation of the five markers/buckets as indicated in
* the original works.
*/
private static class Markers implements PSquareMarkers, Serializable {
/**
* Serial version id
*/
private static final long serialVersionUID = 1L;
/** Low marker index */
private static final int LOW = 2;
/** High marker index */
private static final int HIGH = 4;
/**
* Array of 5+1 Markers (The first marker is dummy just so we
* can match the rest of indexes [1-5] indicated in the original works
* which follows unit based index)
*/
private final Marker[] markerArray;
/**
* Kth cell belonging to [1-5] of the markerArray. No need for
* this to be serialized
*/
private transient int k = -1;
/**
* Constructor
*
* @param theMarkerArray marker array to be used
*/
private Markers(final Marker[] theMarkerArray) {
MathUtils.checkNotNull(theMarkerArray);
markerArray = theMarkerArray;
for (int i = 1; i < PSQUARE_CONSTANT; i++) {
markerArray[i].previous(markerArray[i - 1])
.next(markerArray[i + 1]).index(i);
}
markerArray[0].previous(markerArray[0]).next(markerArray[1])
.index(0);
markerArray[5].previous(markerArray[4]).next(markerArray[5])
.index(5);
}
/**
* Constructor
*
* @param initialFive elements required to build Marker
* @param p quantile required to be computed
*/
private Markers(final List<Double> initialFive, final double p) {
this(createMarkerArray(initialFive, p));
}
/**
* Creates a marker array using initial five elements and a quantile
*
* @param initialFive list of initial five elements
* @param p the pth quantile
* @return Marker array
*/
private static Marker[] createMarkerArray(
final List<Double> initialFive, final double p) {
final int countObserved =
initialFive == null ? -1 : initialFive.size();
if (countObserved < PSQUARE_CONSTANT) {
throw new InsufficientDataException(
LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE,
countObserved, PSQUARE_CONSTANT);
}
Collections.sort(initialFive);
return new Marker[] {
new Marker(),// Null Marker
new Marker(initialFive.get(0), 1, 0, 1),
new Marker(initialFive.get(1), 1 + 2 * p, p / 2, 2),
new Marker(initialFive.get(2), 1 + 4 * p, p, 3),
new Marker(initialFive.get(3), 3 + 2 * p, (1 + p) / 2, 4),
new Marker(initialFive.get(4), 5, 1, 5) };
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
return Arrays.deepHashCode(markerArray);
}
/**
* {@inheritDoc}.This equals method basically checks for marker array to
* be deep equals.
*
* @param o is the other object
* @return true if the object compares with this object are equivalent
*/
@Override
public boolean equals(Object o) {
boolean result = false;
if (this == o) {
result = true;
} else if (o != null && o instanceof Markers) {
Markers that = (Markers) o;
result = Arrays.deepEquals(markerArray, that.markerArray);
}
return result;
}
/**
* Process a data point
*
* @param inputDataPoint is the data point passed
* @return computed percentile
*/
public double processDataPoint(final double inputDataPoint) {
// 1. Find cell and update minima and maxima
final int kthCell = findCellAndUpdateMinMax(inputDataPoint);
// 2. Increment positions
incrementPositions(1, kthCell + 1, 5);
// 2a. Update desired position with increments
updateDesiredPositions();
// 3. Adjust heights of m[2-4] if necessary
adjustHeightsOfMarkers();
// 4. Return percentile
return getPercentileValue();
}
/**
* Returns the percentile computed thus far.
*
* @return height of mid point marker
*/
public double getPercentileValue() {
return height(3);
}
/**
* Finds the cell where the input observation / value fits.
*
* @param observation the input value to be checked for
* @return kth cell (of the markers ranging from 1-5) where observed
* sample fits
*/
private int findCellAndUpdateMinMax(final double observation) {
k = -1;
if (observation < height(1)) {
markerArray[1].markerHeight = observation;
k = 1;
} else if (observation < height(2)) {
k = 1;
} else if (observation < height(3)) {
k = 2;
} else if (observation < height(4)) {
k = 3;
} else if (observation <= height(5)) {
k = 4;
} else {
markerArray[5].markerHeight = observation;
k = 4;
}
return k;
}
/**
* Adjust marker heights by setting quantile estimates to middle markers.
*/
private void adjustHeightsOfMarkers() {
for (int i = LOW; i <= HIGH; i++) {
estimate(i);
}
}
/**
* {@inheritDoc}
*/
public double estimate(final int index) {
if (index < LOW || index > HIGH) {
throw new OutOfRangeException(index, LOW, HIGH);
}
return markerArray[index].estimate();
}
/**
* Increment positions by d. Refer to algorithm paper for the
* definition of d.
*
* @param d The increment value for the position
* @param startIndex start index of the marker array
* @param endIndex end index of the marker array
*/
private void incrementPositions(final int d, final int startIndex,
final int endIndex) {
for (int i = startIndex; i <= endIndex; i++) {
markerArray[i].incrementPosition(d);
}
}
/**
* Desired positions incremented by bucket width. The bucket width is
* basically the desired increments.
*/
private void updateDesiredPositions() {
for (int i = 1; i < markerArray.length; i++) {
markerArray[i].updateDesiredPosition();
}
}
/**
* Sets previous and next markers after default read is done.
*
* @param anInputStream the input stream to be deserialized
* @throws ClassNotFoundException thrown when a desired class not found
* @throws IOException thrown due to any io errors
*/
private void readObject(ObjectInputStream anInputStream)
throws ClassNotFoundException, IOException {
// always perform the default de-serialization first
anInputStream.defaultReadObject();
// Build links
for (int i = 1; i < PSQUARE_CONSTANT; i++) {
markerArray[i].previous(markerArray[i - 1])
.next(markerArray[i + 1]).index(i);
}
markerArray[0].previous(markerArray[0]).next(markerArray[1])
.index(0);
markerArray[5].previous(markerArray[4]).next(markerArray[5])
.index(5);
}
/**
* Return marker height given index
*
* @param markerIndex index of marker within (1,6)
* @return marker height
*/
public double height(final int markerIndex) {
if (markerIndex >= markerArray.length || markerIndex <= 0) {
throw new OutOfRangeException(markerIndex, 1,
markerArray.length);
}
return markerArray[markerIndex].markerHeight;
}
/**
* {@inheritDoc}.Clone Markers
*
* @return cloned object
*/
@Override
public Object clone() {
return new Markers(new Marker[] { new Marker(),
(Marker) markerArray[1].clone(),
(Marker) markerArray[2].clone(),
(Marker) markerArray[3].clone(),
(Marker) markerArray[4].clone(),
(Marker) markerArray[5].clone() });
}
/**
* Returns string representation of the Marker array.
*
* @return Markers as a string
*/
@Override
public String toString() {
return String.format("m1=[%s],m2=[%s],m3=[%s],m4=[%s],m5=[%s]",
markerArray[1].toString(), markerArray[2].toString(),
markerArray[3].toString(), markerArray[4].toString(),
markerArray[5].toString());
}
}
/**
* The class modeling the attributes of the marker of the P-square algorithm
*/
private static class Marker implements Serializable, Cloneable {
/**
* Serial Version ID
*/
private static final long serialVersionUID = -3575879478288538431L;
/**
* The marker index which is just a serial number for the marker in the
* marker array of 5+1.
*/
private int index;
/**
* The integral marker position. Refer to the variable n in the original
* works.
*/
private double intMarkerPosition;
/**
* Desired marker position. Refer to the variable n' in the original
* works.
*/
private double desiredMarkerPosition;
/**
* Marker height or the quantile. Refer to the variable q in the
* original works.
*/
private double markerHeight;
/**
* Desired marker increment. Refer to the variable dn' in the original
* works.
*/
private double desiredMarkerIncrement;
/**
* Next and previous markers for easy linked navigation in loops. this
* is not serialized as they can be rebuilt during deserialization.
*/
private transient Marker next;
/**
* The previous marker links
*/
private transient Marker previous;
/**
* Nonlinear interpolator
*/
private final UnivariateInterpolator nonLinear =
new NevilleInterpolator();
/**
* Linear interpolator which is not serializable
*/
private transient UnivariateInterpolator linear =
new LinearInterpolator();
/**
* Default constructor
*/
private Marker() {
this.next = this.previous = this;
}
/**
* Constructor of the marker with parameters
*
* @param heightOfMarker represent the quantile value
* @param makerPositionDesired represent the desired marker position
* @param markerPositionIncrement represent increments for position
* @param markerPositionNumber represent the position number of marker
*/
private Marker(double heightOfMarker, double makerPositionDesired,
double markerPositionIncrement, double markerPositionNumber) {
this();
this.markerHeight = heightOfMarker;
this.desiredMarkerPosition = makerPositionDesired;
this.desiredMarkerIncrement = markerPositionIncrement;
this.intMarkerPosition = markerPositionNumber;
}
/**
* Sets the previous marker.
*
* @param previousMarker the previous marker to the current marker in
* the array of markers
* @return this instance
*/
private Marker previous(final Marker previousMarker) {
MathUtils.checkNotNull(previousMarker);
this.previous = previousMarker;
return this;
}
/**
* Sets the next marker.
*
* @param nextMarker the next marker to the current marker in the array
* of markers
* @return this instance
*/
private Marker next(final Marker nextMarker) {
MathUtils.checkNotNull(nextMarker);
this.next = nextMarker;
return this;
}
/**
* Sets the index of the marker.
*
* @param indexOfMarker the array index of the marker in marker array
* @return this instance
*/
private Marker index(final int indexOfMarker) {
this.index = indexOfMarker;
return this;
}
/**
* Update desired Position with increment.
*/
private void updateDesiredPosition() {
desiredMarkerPosition += desiredMarkerIncrement;
}
/**
* Increment Position by d.
*
* @param d a delta value to increment
*/
private void incrementPosition(final int d) {
intMarkerPosition += d;
}
/**
* Difference between desired and actual position
*
* @return difference between desired and actual position
*/
private double difference() {
return desiredMarkerPosition - intMarkerPosition;
}
/**
* Estimate the quantile for the current marker.
*
* @return estimated quantile
*/
private double estimate() {
final double di = difference();
final boolean isNextHigher =
next.intMarkerPosition - intMarkerPosition > 1;
final boolean isPreviousLower =
previous.intMarkerPosition - intMarkerPosition < -1;
if (di >= 1 && isNextHigher || di <= -1 && isPreviousLower) {
final int d = di >= 0 ? 1 : -1;
final double[] xval =
new double[] { previous.intMarkerPosition,
intMarkerPosition, next.intMarkerPosition };
final double[] yval =
new double[] { previous.markerHeight, markerHeight,
next.markerHeight };
final double xD = intMarkerPosition + d;
UnivariateFunction univariateFunction =
nonLinear.interpolate(xval, yval);
markerHeight = univariateFunction.value(xD);
// If parabolic estimate is bad then turn linear
if (isEstimateBad(yval, markerHeight)) {
int delta = xD - xval[1] > 0 ? 1 : -1;
final double[] xBad =
new double[] { xval[1], xval[1 + delta] };
final double[] yBad =
new double[] { yval[1], yval[1 + delta] };
MathArrays.sortInPlace(xBad, yBad);// since d can be +/- 1
univariateFunction = linear.interpolate(xBad, yBad);
markerHeight = univariateFunction.value(xD);
}
incrementPosition(d);
}
return markerHeight;
}
/**
* Check if parabolic/nonlinear estimate is bad by checking if the
* ordinate found is beyond the y[0] and y[2].
*
* @param y the array to get the bounds
* @param yD the estimate
* @return true if yD is a bad estimate
*/
private boolean isEstimateBad(final double[] y, final double yD) {
return yD <= y[0] || yD >= y[2];
}
/**
* {@inheritDoc}<i>This equals method checks for marker attributes and
* as well checks if navigation pointers (next and previous) are the same
* between this and passed in object</i>
*
* @param o Other object
* @return true if this equals passed in other object o
*/
@Override
public boolean equals(Object o) {
boolean result = false;
if (this == o) {
result = true;
} else if (o != null && o instanceof Marker) {
Marker that = (Marker) o;
result = Double.compare(markerHeight, that.markerHeight) == 0;
result =
result &&
Double.compare(intMarkerPosition,
that.intMarkerPosition) == 0;
result =
result &&
Double.compare(desiredMarkerPosition,
that.desiredMarkerPosition) == 0;
result =
result &&
Double.compare(desiredMarkerIncrement,
that.desiredMarkerIncrement) == 0;
result = result && next.index == that.next.index;
result = result && previous.index == that.previous.index;
}
return result;
}
@Override
public int hashCode() {
return Arrays.hashCode(new double[] {markerHeight, intMarkerPosition,
desiredMarkerIncrement, desiredMarkerPosition, previous.index, next.index});
}
/**
* Read Object to deserialize.
*
* @param anInstream Stream Object data
* @throws IOException thrown for IO Errors
* @throws ClassNotFoundException thrown for class not being found
*/
private void readObject(ObjectInputStream anInstream)
throws ClassNotFoundException, IOException {
anInstream.defaultReadObject();
previous=next=this;
linear = new LinearInterpolator();
}
/**
* Clone this instance.
*
* @return cloned marker
*/
@Override
public Object clone() {
return new Marker(markerHeight, desiredMarkerPosition,
desiredMarkerIncrement, intMarkerPosition);
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return String.format(
"index=%.0f,n=%.0f,np=%.2f,q=%.2f,dn=%.2f,prev=%d,next=%d",
(double) index, Precision.round(intMarkerPosition, 0),
Precision.round(desiredMarkerPosition, 2),
Precision.round(markerHeight, 2),
Precision.round(desiredMarkerIncrement, 2), previous.index,
next.index);
}
}
/**
* A simple fixed capacity list that has an upper bound to growth.
* Once its capacity is reached, {@code add} is a no-op, returning
* {@code false}.
*
* @param <E>
*/
private static class FixedCapacityList<E> extends ArrayList<E> implements
Serializable {
/**
* Serialization Version Id
*/
private static final long serialVersionUID = 2283952083075725479L;
/**
* Capacity of the list
*/
private final int capacity;
/**
* This constructor constructs the list with given capacity and as well
* as stores the capacity
*
* @param fixedCapacity the capacity to be fixed for this list
*/
public FixedCapacityList(final int fixedCapacity) {
super(fixedCapacity);
this.capacity = fixedCapacity;
}
/**
* {@inheritDoc} In addition it checks if the {@link #size()} returns a
* size that is within capacity and if true it adds; otherwise the list
* contents are unchanged and {@code false} is returned.
*
* @return true if addition is successful and false otherwise
*/
@Override
public boolean add(final E e) {
return size() < capacity ? super.add(e) : false;
}
/**
* {@inheritDoc} In addition it checks if the sum of Collection size and
* this instance's {@link #size()} returns a value that is within
* capacity and if true it adds the collection; otherwise the list
* contents are unchanged and {@code false} is returned.
*
* @return true if addition is successful and false otherwise
*/
@Override
public boolean addAll(Collection<? extends E> collection) {
boolean isCollectionLess =
collection != null &&
collection.size() + size() <= capacity;
return isCollectionLess ? super.addAll(collection) : false;
}
}
/**
* A creation method to build Markers
*
* @param initialFive list of initial five elements
* @param p the quantile desired
* @return an instance of PSquareMarkers
*/
public static PSquareMarkers newMarkers(final List<Double> initialFive,
final double p) {
return new Markers(initialFive, p);
}
/**
* An interface that encapsulates abstractions of the
* P-square algorithm markers as is explained in the original works. This
* interface is exposed with protected access to help in testability.
*/
protected interface PSquareMarkers extends Cloneable {
/**
* Returns Percentile value computed thus far.
*
* @return percentile
*/
double getPercentileValue();
/**
* A clone function to clone the current instance. It's created as an
* interface method as well for convenience though Cloneable is just a
* marker interface.
*
* @return clone of this instance
*/
Object clone();
/**
* Returns the marker height (or percentile) of a given marker index.
*
* @param markerIndex is the index of marker in the marker array
* @return percentile value of the marker index passed
* @throws OutOfRangeException in case the index is not within [1-5]
*/
double height(final int markerIndex);
/**
* Process a data point by moving the marker heights based on estimator.
*
* @param inputDataPoint is the data point passed
* @return computed percentile
*/
double processDataPoint(final double inputDataPoint);
/**
* An Estimate of the percentile value of a given Marker
*
* @param index the marker's index in the array of markers
* @return percentile estimate
* @throws OutOfRangeException in case if index is not within [1-5]
*/
double estimate(final int index);
}
}

View File

@ -0,0 +1,760 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.stat.descriptive.rank;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import org.apache.commons.math3.distribution.LogNormalDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.RealDistribution;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic;
import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatisticAbstractTest;
import org.apache.commons.math3.stat.descriptive.UnivariateStatistic;
import org.apache.commons.math3.stat.descriptive.rank.PSquarePercentile.PSquareMarkers;
import org.apache.commons.math3.util.FastMath;
import org.junit.Assert;
import org.junit.Test;
/**
* Test cases for the {@link PSquarePercentile} class which naturally extends
* {@link StorelessUnivariateStatisticAbstractTest}.
*/
public class PSquarePercentileTest extends
StorelessUnivariateStatisticAbstractTest {
protected double percentile5 = 8.2299d;
protected double percentile95 = 16.72195;// 20.82d; this is approximation
protected double tolerance = 10E-12;
private final RandomGenerator randomGenerator = new Well19937c(1000);
@Override
public double getTolerance() {
return 1.0e-2;// tolerance limit changed as this is an approximation
// algorithm and also gets accurate after few tens of
// samples
}
/**
* Verifies that copied statistics remain equal to originals when
* incremented the same way by making the copy after a majority of elements
* are incremented
*/
@Test
public void testCopyConsistencyWithInitialMostElements() {
StorelessUnivariateStatistic master =
(StorelessUnivariateStatistic) getUnivariateStatistic();
StorelessUnivariateStatistic replica = null;
// select a portion of testArray till 75 % of the length to load first
long index = FastMath.round(0.75 * testArray.length);
// Put first half in master and copy master to replica
master.incrementAll(testArray, 0, (int) index);
replica = master.copy();
// Check same
Assert.assertTrue(replica.equals(master));
Assert.assertTrue(master.equals(replica));
// Now add second part to both and check again
master.incrementAll(testArray, (int) index,
(int) (testArray.length - index));
replica.incrementAll(testArray, (int) index,
(int) (testArray.length - index));
Assert.assertTrue(replica.equals(master));
Assert.assertTrue(master.equals(replica));
}
/**
* Verifies that copied statistics remain equal to originals when
* incremented the same way by way of copying original after just a few
* elements are incremented
*/
@Test
public void testCopyConsistencyWithInitialFirstFewElements() {
StorelessUnivariateStatistic master =
(StorelessUnivariateStatistic) getUnivariateStatistic();
StorelessUnivariateStatistic replica = null;
// select a portion of testArray which is 10% of the length to load
// first
long index = FastMath.round(0.1 * testArray.length);
// Put first half in master and copy master to replica
master.incrementAll(testArray, 0, (int) index);
replica = master.copy();
// Check same
Assert.assertTrue(replica.equals(master));
Assert.assertTrue(master.equals(replica));
// Now add second part to both and check again
master.incrementAll(testArray, (int) index,
(int) (testArray.length - index));
replica.incrementAll(testArray, (int) index,
(int) (testArray.length - index));
Assert.assertTrue(master.equals(master));
Assert.assertTrue(replica.equals(replica));
Assert.assertTrue(replica.equals(master));
Assert.assertTrue(master.equals(replica));
}
@Test(expected = MathIllegalArgumentException.class)
public void testNullListInMarkers() {
// In case of null list Markers cannot be instantiated..is geting
// verified
// new Markers(null, 0, PSquarePercentile.newEstimator());
PSquarePercentile.newMarkers(null, 0);
}
@Test
public void testMiscellaniousFunctionsInMarkers() {
double p = 0.5;
PSquareMarkers markers =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91,
38.62 }), p);
// Markers equality
Assert.assertTrue(markers.equals(markers));
Assert.assertFalse(markers.equals(null));
Assert.assertFalse(markers.equals(new String()));
// Check for null markers test during equality testing
// Until 5 elements markers are not initialized
PSquarePercentile p1 = new PSquarePercentile(), p2 =
new PSquarePercentile();
Assert.assertEquals(p1, p2);
p1.evaluate(new double[] { 1.0, 2.0, 3.0 });
p2.evaluate(new double[] { 1.0, 2.0, 3.0 });
Assert.assertEquals(p1, p2);
// Move p2 alone with more values just to make sure markers are not null
// for p2
p2.incrementAll(new double[] { 5.0, 7.0, 11.0 });
Assert.assertFalse(p1.equals(p2));
Assert.assertFalse(p2.equals(p1));
// Next add different data to p1 to make number of elements match and
// markers are not null however actual results will vary
p1.incrementAll(new double[] { 20, 21, 22, 23 });
Assert.assertFalse(p1.equals(p2));// though markers are non null, N
// matches, results wont
}
@Test(expected = OutOfRangeException.class)
public void testMarkersOORLow() {
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, 38.62 }),
0.5).estimate(0);
}
@Test(expected = OutOfRangeException.class)
public void testMarkersOORHigh() {
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, 38.62 }),
0.5).estimate(5);
}
@Test
public void testMarkers2() {
double p = 0.5;
PSquareMarkers markers =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91,
38.62 }), p);
PSquareMarkers markersNew =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91,
38.62 }), p);
Assert.assertTrue(markers.equals(markersNew));
// If just one element of markers got changed then its still false.
markersNew.processDataPoint(39);
Assert.assertFalse(markers.equals(markersNew));
}
@Test
public void testHashCodeInMarkers() {
PSquarePercentile p = new PSquarePercentile(95);
PSquarePercentile p2 = new PSquarePercentile(95);
Set<PSquarePercentile> s = new HashSet<PSquarePercentile>();
s.add(p);
s.add(p2);
Assert.assertEquals(1, s.size());
Assert.assertEquals(p, s.iterator().next());
double[] d =
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
95.1682 };
Assert.assertEquals(95.1981, p.evaluate(d), 1.0e-2); // change
Assert.assertEquals(95.1981, p2.evaluate(d), 1.0e-2); // change
s.clear();
s.add(p);
s.add(p2);
Assert.assertEquals(1, s.size());
Assert.assertEquals(p, s.iterator().next());
PSquareMarkers m1 =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
95.1772, 95.0925, 95.1990, 95.1682 }), 0.0);
PSquareMarkers m2 =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
95.1772, 95.0925, 95.1990, 95.1682 }), 0.0);
Assert.assertTrue(m1.equals(m2));
Set<PSquareMarkers> setMarkers = new LinkedHashSet<PSquareMarkers>();
Assert.assertTrue(setMarkers.add(m1));
Assert.assertFalse(setMarkers.add(m2));
Assert.assertEquals(1, setMarkers.size());
PSquareMarkers mThis =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 195.1772, 195.1567,
195.1937, 195.1959, 95.1442, 195.0610,
195.1591, 195.1195, 195.1772, 95.0925, 95.1990,
195.1682 }), 0.50);
PSquareMarkers mThat =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
Assert.assertTrue(mThis.equals(mThis));
Assert.assertFalse(mThis.equals(mThat));
String s1="";
Assert.assertFalse(mThis.equals(s1));
for (int i = 0; i < testArray.length; i++) {
mThat.processDataPoint(testArray[i]);
}
setMarkers.add(mThat);
setMarkers.add(mThis);
Assert.assertTrue(mThat.equals(mThat));
Assert.assertTrue(setMarkers.contains(mThat));
Assert.assertTrue(setMarkers.contains(mThis));
Assert.assertEquals(3, setMarkers.size());
Iterator<PSquareMarkers> iterator=setMarkers.iterator();
Assert.assertEquals(m1, iterator.next());
Assert.assertEquals(mThat, iterator.next());
Assert.assertEquals(mThis, iterator.next());
}
@Test(expected = OutOfRangeException.class)
public void testMarkersWithLowerIndex() {
PSquareMarkers mThat =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
for (int i = 0; i < testArray.length; i++) {
mThat.processDataPoint(testArray[i]);
}
mThat.estimate(0);
}
@Test(expected = OutOfRangeException.class)
public void testMarkersWithHigherIndex() {
PSquareMarkers mThat =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
for (int i = 0; i < testArray.length; i++) {
mThat.processDataPoint(testArray[i]);
}
mThat.estimate(6);
}
@Test(expected = OutOfRangeException.class)
public void testMarkerHeightWithLowerIndex() {
PSquareMarkers mThat =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
mThat.height(0);
}
@Test(expected = OutOfRangeException.class)
public void testMarkerHeightWithHigherIndex() {
PSquareMarkers mThat =
PSquarePercentile.newMarkers(
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
mThat.height(6);
}
@Test
public void testPSquaredEqualsAndMin() {
PSquarePercentile ptile = new PSquarePercentile(0);
Assert.assertEquals(ptile, ptile);
Assert.assertFalse(ptile.equals(null));
Assert.assertFalse(ptile.equals(new String()));
// Just to check if there is no data get result for zeroth and 100th
// ptile returns NAN
Assert.assertTrue(Double.isNaN(ptile.getResult()));
Assert.assertTrue(Double.isNaN(new PSquarePercentile(100).getResult()));
double[] d = new double[] { 1, 3, 2, 4, 9, 10, 11 };
ptile.evaluate(d);
Assert.assertEquals(ptile, ptile);
Assert.assertEquals(1d, ptile.getResult(), 1e-02);// this calls min
}
@Test
public void testString() {
PSquarePercentile ptile = new PSquarePercentile(95);
Assert.assertNotNull(ptile.toString());
ptile.increment(1);
ptile.increment(2);
ptile.increment(3);
Assert.assertNotNull(ptile.toString());
Assert.assertEquals(expectedValue(), ptile.evaluate(testArray),
getTolerance());
Assert.assertNotNull(ptile.toString());
}
@Override
public UnivariateStatistic getUnivariateStatistic() {
PSquarePercentile ptile = new PSquarePercentile(95);
// Assert.assertNull(ptile.markers());
return ptile;
}
@Override
public double expectedValue() {
return this.percentile95;
}
@Test
public void testHighPercentile() {
double[] d = new double[] { 1, 2, 3 };
PSquarePercentile p = new PSquarePercentile(75.0);
Assert.assertEquals(2, p.evaluate(d), 1.0e-5);
PSquarePercentile p95 = new PSquarePercentile();
Assert.assertEquals(2, p95.evaluate(d), 1.0e-5);
}
@Test
public void testLowPercentile() {
double[] d = new double[] { 0, 1 };
PSquarePercentile p = new PSquarePercentile(25.0);
Assert.assertEquals(0d, p.evaluate(d), Double.MIN_VALUE);
}
@Test
public void testPercentile() {
double[] d = new double[] { 1, 3, 2, 4 };
PSquarePercentile p = new PSquarePercentile(30d);
Assert.assertEquals(1.0, p.evaluate(d), 1.0e-5);
p = new PSquarePercentile(25);
Assert.assertEquals(1.0, p.evaluate(d), 1.0e-5);
p = new PSquarePercentile(75);
Assert.assertEquals(3.0, p.evaluate(d), 1.0e-5);
p = new PSquarePercentile(50);
Assert.assertEquals(2d, p.evaluate(d), 1.0e-5);
}
@Test(expected = MathIllegalArgumentException.class)
public void testInitial() {
PSquarePercentile.newMarkers(new ArrayList<Double>(), 0.5);
Assert.fail();
}
@Test(expected = MathIllegalArgumentException.class)
public void testNegativeInvalidValues() {
double[] d =
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
95.1682 };
PSquarePercentile p = new PSquarePercentile(-1.0);
p.evaluate(d, 0, d.length);
Assert.fail("This method has had to throw exception..but it is not..");
}
@Test(expected = MathIllegalArgumentException.class)
public void testPositiveInvalidValues() {
double[] d =
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
95.1682 };
PSquarePercentile p = new PSquarePercentile(101.0);
p.evaluate(d, 0, d.length);
Assert.fail("This method has had to throw exception..but it is not..");
}
@Test
public void testNISTExample() {
double[] d =
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
95.1682 };
Assert.assertEquals(95.1981, new PSquarePercentile(90d).evaluate(d),
1.0e-2); // changed the accuracy to 1.0e-2
Assert.assertEquals(95.061, new PSquarePercentile(0d).evaluate(d), 0);
Assert.assertEquals(95.1990,
new PSquarePercentile(100d).evaluate(d, 0, d.length), 0);
}
@Test
public void test5() {
PSquarePercentile percentile = new PSquarePercentile(5d);
Assert.assertEquals(this.percentile5, percentile.evaluate(testArray),
1.0);// changed the accuracy to 1 instead of tolerance
}
@Test(expected = MathIllegalArgumentException.class)
public void testNull() {
PSquarePercentile percentile = new PSquarePercentile(50d);
double[] nullArray = null;
percentile.evaluate(nullArray);
}
@Test
public void testEmpty() {
PSquarePercentile percentile = new PSquarePercentile(50d);
double[] emptyArray = new double[] {};
Assert.assertTrue(Double.isNaN(percentile.evaluate(emptyArray)));
}
@Test
public void testSingleton() {
PSquarePercentile percentile = new PSquarePercentile(50d);
double[] singletonArray = new double[] { 1d };
Assert.assertEquals(1d, percentile.evaluate(singletonArray), 0);
Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0);
percentile = new PSquarePercentile(5);
Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0);
percentile = new PSquarePercentile(100);
Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0);
percentile = new PSquarePercentile(100);
Assert.assertTrue(Double.isNaN(percentile
.evaluate(singletonArray, 0, 0)));
}
@Test
public void testSpecialValues() {
PSquarePercentile percentile = new PSquarePercentile(50d);
double[] specialValues =
new double[] { 0d, 1d, 2d, 3d, 4d, Double.NaN };
Assert.assertEquals(2d, percentile.evaluate(specialValues), 0);
specialValues =
new double[] { Double.NEGATIVE_INFINITY, 1d, 2d, 3d,
Double.NaN, Double.POSITIVE_INFINITY };
Assert.assertEquals(2d, percentile.evaluate(specialValues), 0);
specialValues =
new double[] { 1d, 1d, Double.POSITIVE_INFINITY,
Double.POSITIVE_INFINITY };
Assert.assertFalse(Double.isInfinite(percentile.evaluate(specialValues)));
specialValues = new double[] { 1d, 1d, Double.NaN, Double.NaN };
Assert.assertFalse(Double.isNaN(percentile.evaluate(specialValues)));
specialValues =
new double[] { 1d, 1d, Double.NEGATIVE_INFINITY,
Double.NEGATIVE_INFINITY };
percentile = new PSquarePercentile(50d);
// Interpolation results in NEGATIVE_INFINITY + POSITIVE_INFINITY
// changed the result check to infinity instead of NaN
Assert.assertTrue(Double.isInfinite(percentile.evaluate(specialValues)));
}
@Test
public void testArrayExample() {
Assert.assertEquals(expectedValue(),
new PSquarePercentile(95d).evaluate(testArray), getTolerance());
}
@Test
public void testSetQuantile() {
PSquarePercentile percentile = new PSquarePercentile(10d);
percentile = new PSquarePercentile(100); // OK
Assert.assertEquals(1.0, percentile.quantile(), 0);
try {
percentile = new PSquarePercentile(0);
// Assert.fail("Expecting MathIllegalArgumentException");
} catch (MathIllegalArgumentException ex) {
// expected
}
try {
new PSquarePercentile(0d);
// Assert.fail("Expecting MathIllegalArgumentException");
} catch (MathIllegalArgumentException ex) {
// expected
}
}
private Double[] randomTestData(int factor, int values) {
Double[] test = new Double[values];
for (int i = 0; i < test.length; i++) {
test[i] = Math.abs(randomGenerator.nextDouble() * factor);
}
return test;
}
@Test
public void testAccept() {
PSquarePercentile psquared = new PSquarePercentile(0.99);
Assert.assertTrue(Double.isNaN(psquared.getResult()));
Double[] test = randomTestData(100, 10000);
for (Double value : test) {
psquared.increment(value);
Assert.assertTrue(psquared.getResult() >= 0);
}
}
private void assertValues(Double a, Double b, double delta) {
if (Double.isNaN(a)) {
Assert.assertTrue("" + b + " is not NaN.", Double.isNaN(a));
} else {
double max = FastMath.max(a, b);
double percentage = FastMath.abs(a - b) / max;
double deviation = delta;
Assert.assertTrue(String.format(
"Deviated = %f and is beyond %f as a=%f, b=%f",
percentage, deviation, a, b), percentage < deviation);
}
}
private void doCalculatePercentile(Double percentile, Number[] test) {
doCalculatePercentile(percentile, test, Double.MAX_VALUE);
}
private void doCalculatePercentile(Double percentile, Number[] test,
double delta) {
PSquarePercentile psquared = new PSquarePercentile(percentile);
for (Number value : test) {
psquared.increment(value.doubleValue());
}
Percentile p2 = new Percentile(percentile * 100);
double[] dall = new double[test.length];
for (int i = 0; i < test.length; i++) {
dall[i] = test[i].doubleValue();
}
Double referenceValue = p2.evaluate(dall);
assertValues(psquared.getResult(), referenceValue, delta);
}
private void doCalculatePercentile(double percentile, double[] test,
double delta) {
PSquarePercentile psquared = new PSquarePercentile(percentile);
for (double value : test) {
psquared.increment(value);
}
Percentile p2 =
new Percentile(percentile < 1 ? percentile * 100 : percentile);
/*
* double[] dall = new double[test.length]; for (int i = 0; i <
* test.length; i++) dall[i] = test[i];
*/
Double referenceValue = p2.evaluate(test);
assertValues(psquared.getResult(), referenceValue, delta);
}
@Test
public void testCannedDataSet() {
// test.unoverride("dump");
Integer[] seedInput =
new Integer[] { 283, 285, 298, 304, 310, 31, 319, 32, 33, 339,
342, 348, 350, 354, 354, 357, 36, 36, 369, 37, 37, 375,
378, 383, 390, 396, 405, 408, 41, 414, 419, 416, 42,
420, 430, 430, 432, 444, 447, 447, 449, 45, 451, 456,
468, 470, 471, 474, 600, 695, 70, 83, 97, 109, 113, 128 };
Integer[] input = new Integer[seedInput.length * 100];
for (int i = 0; i < input.length; i++) {
input[i] = seedInput[i % seedInput.length] + i;
}
// Arrays.sort(input);
doCalculatePercentile(0.50d, input);
doCalculatePercentile(0.95d, input);
}
@Test
public void test99Percentile() {
Double[] test = randomTestData(100, 10000);
doCalculatePercentile(0.99d, test);
}
@Test
public void test90Percentile() {
Double[] test = randomTestData(100, 10000);
doCalculatePercentile(0.90d, test);
}
@Test
public void test20Percentile() {
Double[] test = randomTestData(100, 100000);
doCalculatePercentile(0.20d, test);
}
@Test
public void test5Percentile() {
Double[] test = randomTestData(50, 990000);
doCalculatePercentile(0.50d, test);
}
@Test
public void test99PercentileHighValues() {
Double[] test = randomTestData(100000, 10000);
doCalculatePercentile(0.99d, test);
}
@Test
public void test90PercentileHighValues() {
Double[] test = randomTestData(100000, 100000);
doCalculatePercentile(0.90d, test);
}
@Test
public void test20PercentileHighValues() {
Double[] test = randomTestData(100000, 100000);
doCalculatePercentile(0.20d, test);
}
@Test
public void test5PercentileHighValues() {
Double[] test = randomTestData(100000, 100000);
doCalculatePercentile(0.05d, test);
}
@Test
public void test0PercentileValuesWithFewerThan5Values() {
double[] test = { 1d, 2d, 3d, 4d };
PSquarePercentile p = new PSquarePercentile(0d);
Assert.assertEquals(1d, p.evaluate(test), 0);
Assert.assertNotNull(p.toString());
}
@Test
public void testPSQuaredEvalFuncWithPapersExampleData() throws IOException {
// This data as input is considered from
// http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf
double[] data =
{ 0.02, 0.5, 0.74, 3.39, 0.83, 22.37, 10.15, 15.43, 38.62,
15.92, 34.6, 10.28, 1.47, 0.4, 0.05, 11.39, 0.27, 0.42,
0.09, 11.37,
11.39, 15.43, 15.92, 22.37, 34.6, 38.62, 18.9, 19.2,
27.6, 12.8, 13.7, 21.9
};
PSquarePercentile psquared = new PSquarePercentile(50);
Double p2value = 0d;
for (int i = 0; i < 20; i++) {
psquared.increment(data[i]);
p2value = psquared.getResult();
// System.out.println(psquared.toString());//uncomment here to see
// the papers example output
}
// System.out.println("p2value=" + p2value);
Double expected = 4.44d;// 13d; // From The Paper
// http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf.
// Pl refer Pg 1061 Look at the mid marker
// height
// expected = new Percentile(50).evaluate(data,0,20);
// Well the values deviate in our calculation by 0.25 so its 4.25 vs
// 4.44
Assert.assertEquals(
String.format("Expected=%f, Actual=%f", expected, p2value),
expected, p2value, 0.25);
}
final int TINY = 10, SMALL = 50, NOMINAL = 100, MEDIUM = 500,
STANDARD = 1000, BIG = 10000, VERY_BIG = 50000, LARGE = 1000000,
VERY_LARGE = 10000000;
private void doDistributionTest(RealDistribution distribution) {
double data[];
data = distribution.sample(VERY_LARGE);
doCalculatePercentile(50, data, 0.0001);
doCalculatePercentile(95, data, 0.0001);
data = distribution.sample(LARGE);
doCalculatePercentile(50, data, 0.001);
doCalculatePercentile(95, data, 0.001);
data = distribution.sample(VERY_BIG);
doCalculatePercentile(50, data, 0.001);
doCalculatePercentile(95, data, 0.001);
data = distribution.sample(BIG);
doCalculatePercentile(50, data, 0.001);
doCalculatePercentile(95, data, 0.001);
data = distribution.sample(STANDARD);
doCalculatePercentile(50, data, 0.005);
doCalculatePercentile(95, data, 0.005);
data = distribution.sample(MEDIUM);
doCalculatePercentile(50, data, 0.005);
doCalculatePercentile(95, data, 0.005);
data = distribution.sample(NOMINAL);
doCalculatePercentile(50, data, 0.01);
doCalculatePercentile(95, data, 0.01);
data = distribution.sample(SMALL);
doCalculatePercentile(50, data, 0.01);
doCalculatePercentile(95, data, 0.01);
data = distribution.sample(TINY);
doCalculatePercentile(50, data, 0.05);
doCalculatePercentile(95, data, 0.05);
}
/**
* Test Various Dist
*/
@Test
public void testDistribution() {
doDistributionTest(new NormalDistribution(4000, 50));
doDistributionTest(new LogNormalDistribution(4000, 50));
// doDistributionTest((new ExponentialDistribution(4000));
// doDistributionTest(new GammaDistribution(5d,1d),0.1);
}
}