From 7b3e0d4297a5a8257a8e33b28ad7f2d3a23edb05 Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Sat, 21 Jun 2014 18:14:14 +0000 Subject: [PATCH] Added implementation of PSquare algorithm to estimate percentiles without storing data in memory (i.e. as StorelessUnivariateStatistic). JIRA: MATH-418 Contributed by: Venkatesha Murthy git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1604443 13f79535-47bb-0310-9956-ffa450edef68 --- pom.xml | 3 + src/changes/changes.xml | 4 + .../descriptive/rank/PSquarePercentile.java | 997 ++++++++++++++++++ .../rank/PSquarePercentileTest.java | 760 +++++++++++++ 4 files changed, 1764 insertions(+) create mode 100644 src/main/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentile.java create mode 100644 src/test/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentileTest.java diff --git a/pom.xml b/pom.xml index ee8827290..0ad054763 100644 --- a/pom.xml +++ b/pom.xml @@ -257,6 +257,9 @@ J. Lewis Muir + + Venkatesha Murthy + Christopher Nix diff --git a/src/changes/changes.xml b/src/changes/changes.xml index fb3ed2eff..4f3715e34 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -73,6 +73,10 @@ Users are encouraged to upgrade to this version as this release not 2. A few methods in the FastMath class are in fact slower that their counterpart in either Math or StrictMath (cf. MATH-740 and MATH-901). "> + + Added implementation of PSquare algorithm to estimate percentiles without + storing data in memory (i.e. as StorelessUnivariateStatistic). + "Percentile": wrong sorting in the presence of NaN. diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentile.java b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentile.java new file mode 100644 index 000000000..1ed12b04b --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentile.java @@ -0,0 +1,997 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.rank; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.Serializable; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.analysis.interpolation.LinearInterpolator; +import org.apache.commons.math3.analysis.interpolation.NevilleInterpolator; +import org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator; +import org.apache.commons.math3.exception.InsufficientDataException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.Precision; + +/** + * A {@link StorelessUnivariateStatistic} estimating percentiles using the + * P2 + * Algorithm as explained by Raj + * Jain and Imrich Chlamtac in + * P2 Algorithm + * for Dynamic Calculation of Quantiles and Histogram Without Storing + * Observations. + *

+ * Note: This implementation is not synchronized and produces an approximate + * result. For small samples, where data can be stored and processed in memory, + * {@link Percentile} should be used.

+ * + * @version $Id$ + */ +public class PSquarePercentile extends AbstractStorelessUnivariateStatistic + implements StorelessUnivariateStatistic, Serializable { + + /** + * The maximum array size used for psquare algorithm + */ + private static final int PSQUARE_CONSTANT = 5; + + /** + * A Default quantile needed in case if user prefers to use default no + * argument constructor. + */ + private static final double DEFAULT_QUANTILE_DESIRED = 50d; + + /** + * Serial ID + */ + private static final long serialVersionUID = 2283912083175715479L; + + /** + * A decimal formatter for print convenience + */ + private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat( + "00.00"); + + /** + * Initial list of 5 numbers corresponding to 5 markers. NOTE:watch + * out for the add methods that are overloaded + */ + private final List initialFive = new FixedCapacityList( + PSQUARE_CONSTANT); + + /** + * The quantile needed should be in range of 0-1. The constructor + * {@link #PSquarePercentile(double)} ensures that passed in percentile is + * divide by 100 + */ + private final double quantile; + + /** + * lastObservation is the last observation value/input sample. No need to + * serialize + */ + private transient double lastObservation; + + /** + * Markers is the marker collection object which comes to effect + * only after 5 values are inserted + */ + private PSquareMarkers markers = null; + + /** + * Computed p value (i,e percentile value of data set hither to received) + */ + private double pValue = Double.NaN; + + /** + * Counter to count the values/observations accepted into this data set + */ + private long countOfObservations; + + /** + * Constructs a PSquarePercentile with the specific percentile value. + * @param p the percentile + * @throws OutOfRangeException if p is not greater than 0 and less + * than or equal to 100 + */ + public PSquarePercentile(final double p) { + if (p > 100 || p < 0) { + throw new OutOfRangeException(LocalizedFormats.OUT_OF_RANGE, + p, 0, 100); + } + this.quantile = p / 100d;// always set it within (0,1] + } + + /** + * Default constructor that assumes a {@link #DEFAULT_QUANTILE_DESIRED + * default quantile} needed + */ + PSquarePercentile() { + this(DEFAULT_QUANTILE_DESIRED); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + double result = getResult(); + result = Double.isNaN(result) ? 37 : result; + final double markersHash = markers == null ? 0 : markers.hashCode(); + final double[] toHash = {result, quantile, markersHash, countOfObservations}; + return Arrays.hashCode(toHash); + } + + /** + * Returns true iff {@code o} is a {@code PSquarePercentile} returning the + * same values as this for {@code getResult()} and {@code getN()} and also + * having equal markers + * + * @param o object to compare + * @return true if {@code o} is a {@code PSquarePercentile} with + * equivalent internal state + */ + @Override + public boolean equals(Object o) { + boolean result = false; + if (this == o) { + result = true; + } else if (o != null && o instanceof PSquarePercentile) { + PSquarePercentile that = (PSquarePercentile) o; + boolean isNotNull = markers != null && that.markers != null; + boolean isNull = markers == null && that.markers == null; + result = isNotNull ? markers.equals(that.markers) : isNull; + // markers as in the case of first + // five observations + result = result && getN() == that.getN(); + } + return result; + } + + /** + * {@inheritDoc}The internal state updated due to the new value in this + * context is basically of the marker positions and computation of the + * approximate quantile. + * + * @param observation the observation currently being added. + */ + @Override + public void increment(final double observation) { + // Increment counter + countOfObservations++; + + // Store last observation + this.lastObservation = observation; + + // 0. Use Brute force for <5 + if (markers == null) { + if (initialFive.add(observation)) { + Collections.sort(initialFive); + pValue = + initialFive + .get((int) (quantile * (initialFive.size() - 1))); + return; + } + // 1. Initialize once after 5th observation + markers = newMarkers(initialFive, quantile); + } + // 2. process a Data Point and return pValue + pValue = markers.processDataPoint(observation); + } + + /** + * Returns a string containing the last observation, the current estimate + * of the quantile and all markers. + * + * @return string representation of state data + */ + @Override + public String toString() { + + if (markers == null) { + return String.format("obs=%s pValue=%s", + DECIMAL_FORMAT.format(lastObservation), + DECIMAL_FORMAT.format(pValue)); + } else { + return String.format("obs=%s markers=%s", + DECIMAL_FORMAT.format(lastObservation), markers.toString()); + } + } + + /** + * {@inheritDoc} + */ + public long getN() { + return countOfObservations; + } + + /** + * {@inheritDoc} + */ + @Override + public StorelessUnivariateStatistic copy() { + // multiply quantile by 100 now as anyway constructor divides it by 100 + PSquarePercentile copy = new PSquarePercentile(100d * quantile); + + if (markers != null) { + copy.markers = (PSquareMarkers) markers.clone(); + } + copy.countOfObservations = countOfObservations; + copy.pValue = pValue; + copy.initialFive.clear(); + copy.initialFive.addAll(initialFive); + return copy; + } + + /** + * Returns the quantile estimated by this statistic in the range [0.0-1.0] + * + * @return quantile estimated by {@link #getResult()} + */ + public double quantile() { + return quantile; + } + + /** + * {@inheritDoc}. This basically clears all the markers, the + * initialFive list and sets countOfObservations to 0. + */ + @Override + public void clear() { + markers = null; + initialFive.clear(); + countOfObservations = 0L; + pValue = Double.NaN; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + if (Double.compare(quantile, 1d) == 0) { + pValue = maximum(); + } else if (Double.compare(quantile, 0d) == 0) { + pValue = minimum(); + } + return pValue; + } + + /** + * @return maximum in the data set added to this statistic + */ + private double maximum() { + double val = Double.NaN; + if (markers != null) { + val = markers.height(PSQUARE_CONSTANT); + } else if (!initialFive.isEmpty()) { + val = initialFive.get(initialFive.size() - 1); + } + return val; + } + + /** + * @return minimum in the data set added to this statistic + */ + private double minimum() { + double val = Double.NaN; + if (markers != null) { + val = markers.height(1); + } else if (!initialFive.isEmpty()) { + val = initialFive.get(0); + } + return val; + } + + /** + * Markers is an encapsulation of the five markers/buckets as indicated in + * the original works. + */ + private static class Markers implements PSquareMarkers, Serializable { + /** + * Serial version id + */ + private static final long serialVersionUID = 1L; + + /** Low marker index */ + private static final int LOW = 2; + + /** High marker index */ + private static final int HIGH = 4; + + /** + * Array of 5+1 Markers (The first marker is dummy just so we + * can match the rest of indexes [1-5] indicated in the original works + * which follows unit based index) + */ + private final Marker[] markerArray; + + /** + * Kth cell belonging to [1-5] of the markerArray. No need for + * this to be serialized + */ + private transient int k = -1; + + /** + * Constructor + * + * @param theMarkerArray marker array to be used + */ + private Markers(final Marker[] theMarkerArray) { + MathUtils.checkNotNull(theMarkerArray); + markerArray = theMarkerArray; + for (int i = 1; i < PSQUARE_CONSTANT; i++) { + markerArray[i].previous(markerArray[i - 1]) + .next(markerArray[i + 1]).index(i); + } + markerArray[0].previous(markerArray[0]).next(markerArray[1]) + .index(0); + markerArray[5].previous(markerArray[4]).next(markerArray[5]) + .index(5); + } + + /** + * Constructor + * + * @param initialFive elements required to build Marker + * @param p quantile required to be computed + */ + private Markers(final List initialFive, final double p) { + this(createMarkerArray(initialFive, p)); + } + + /** + * Creates a marker array using initial five elements and a quantile + * + * @param initialFive list of initial five elements + * @param p the pth quantile + * @return Marker array + */ + private static Marker[] createMarkerArray( + final List initialFive, final double p) { + final int countObserved = + initialFive == null ? -1 : initialFive.size(); + if (countObserved < PSQUARE_CONSTANT) { + throw new InsufficientDataException( + LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, + countObserved, PSQUARE_CONSTANT); + } + Collections.sort(initialFive); + return new Marker[] { + new Marker(),// Null Marker + new Marker(initialFive.get(0), 1, 0, 1), + new Marker(initialFive.get(1), 1 + 2 * p, p / 2, 2), + new Marker(initialFive.get(2), 1 + 4 * p, p, 3), + new Marker(initialFive.get(3), 3 + 2 * p, (1 + p) / 2, 4), + new Marker(initialFive.get(4), 5, 1, 5) }; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + return Arrays.deepHashCode(markerArray); + } + + /** + * {@inheritDoc}.This equals method basically checks for marker array to + * be deep equals. + * + * @param o is the other object + * @return true if the object compares with this object are equivalent + */ + @Override + public boolean equals(Object o) { + boolean result = false; + if (this == o) { + result = true; + } else if (o != null && o instanceof Markers) { + Markers that = (Markers) o; + result = Arrays.deepEquals(markerArray, that.markerArray); + } + return result; + } + + /** + * Process a data point + * + * @param inputDataPoint is the data point passed + * @return computed percentile + */ + public double processDataPoint(final double inputDataPoint) { + + // 1. Find cell and update minima and maxima + final int kthCell = findCellAndUpdateMinMax(inputDataPoint); + + // 2. Increment positions + incrementPositions(1, kthCell + 1, 5); + + // 2a. Update desired position with increments + updateDesiredPositions(); + + // 3. Adjust heights of m[2-4] if necessary + adjustHeightsOfMarkers(); + + // 4. Return percentile + return getPercentileValue(); + } + + /** + * Returns the percentile computed thus far. + * + * @return height of mid point marker + */ + public double getPercentileValue() { + return height(3); + } + + /** + * Finds the cell where the input observation / value fits. + * + * @param observation the input value to be checked for + * @return kth cell (of the markers ranging from 1-5) where observed + * sample fits + */ + private int findCellAndUpdateMinMax(final double observation) { + k = -1; + if (observation < height(1)) { + markerArray[1].markerHeight = observation; + k = 1; + } else if (observation < height(2)) { + k = 1; + } else if (observation < height(3)) { + k = 2; + } else if (observation < height(4)) { + k = 3; + } else if (observation <= height(5)) { + k = 4; + } else { + markerArray[5].markerHeight = observation; + k = 4; + } + return k; + } + + /** + * Adjust marker heights by setting quantile estimates to middle markers. + */ + private void adjustHeightsOfMarkers() { + for (int i = LOW; i <= HIGH; i++) { + estimate(i); + } + } + + /** + * {@inheritDoc} + */ + public double estimate(final int index) { + if (index < LOW || index > HIGH) { + throw new OutOfRangeException(index, LOW, HIGH); + } + return markerArray[index].estimate(); + } + + /** + * Increment positions by d. Refer to algorithm paper for the + * definition of d. + * + * @param d The increment value for the position + * @param startIndex start index of the marker array + * @param endIndex end index of the marker array + */ + private void incrementPositions(final int d, final int startIndex, + final int endIndex) { + for (int i = startIndex; i <= endIndex; i++) { + markerArray[i].incrementPosition(d); + } + } + + /** + * Desired positions incremented by bucket width. The bucket width is + * basically the desired increments. + */ + private void updateDesiredPositions() { + for (int i = 1; i < markerArray.length; i++) { + markerArray[i].updateDesiredPosition(); + } + } + + /** + * Sets previous and next markers after default read is done. + * + * @param anInputStream the input stream to be deserialized + * @throws ClassNotFoundException thrown when a desired class not found + * @throws IOException thrown due to any io errors + */ + private void readObject(ObjectInputStream anInputStream) + throws ClassNotFoundException, IOException { + // always perform the default de-serialization first + anInputStream.defaultReadObject(); + // Build links + for (int i = 1; i < PSQUARE_CONSTANT; i++) { + markerArray[i].previous(markerArray[i - 1]) + .next(markerArray[i + 1]).index(i); + } + markerArray[0].previous(markerArray[0]).next(markerArray[1]) + .index(0); + markerArray[5].previous(markerArray[4]).next(markerArray[5]) + .index(5); + } + + /** + * Return marker height given index + * + * @param markerIndex index of marker within (1,6) + * @return marker height + */ + public double height(final int markerIndex) { + if (markerIndex >= markerArray.length || markerIndex <= 0) { + throw new OutOfRangeException(markerIndex, 1, + markerArray.length); + } + return markerArray[markerIndex].markerHeight; + } + + /** + * {@inheritDoc}.Clone Markers + * + * @return cloned object + */ + @Override + public Object clone() { + return new Markers(new Marker[] { new Marker(), + (Marker) markerArray[1].clone(), + (Marker) markerArray[2].clone(), + (Marker) markerArray[3].clone(), + (Marker) markerArray[4].clone(), + (Marker) markerArray[5].clone() }); + + } + + /** + * Returns string representation of the Marker array. + * + * @return Markers as a string + */ + @Override + public String toString() { + return String.format("m1=[%s],m2=[%s],m3=[%s],m4=[%s],m5=[%s]", + markerArray[1].toString(), markerArray[2].toString(), + markerArray[3].toString(), markerArray[4].toString(), + markerArray[5].toString()); + } + + } + + /** + * The class modeling the attributes of the marker of the P-square algorithm + */ + private static class Marker implements Serializable, Cloneable { + + /** + * Serial Version ID + */ + private static final long serialVersionUID = -3575879478288538431L; + + /** + * The marker index which is just a serial number for the marker in the + * marker array of 5+1. + */ + private int index; + + /** + * The integral marker position. Refer to the variable n in the original + * works. + */ + private double intMarkerPosition; + + /** + * Desired marker position. Refer to the variable n' in the original + * works. + */ + private double desiredMarkerPosition; + + /** + * Marker height or the quantile. Refer to the variable q in the + * original works. + */ + private double markerHeight; + + /** + * Desired marker increment. Refer to the variable dn' in the original + * works. + */ + private double desiredMarkerIncrement; + + /** + * Next and previous markers for easy linked navigation in loops. this + * is not serialized as they can be rebuilt during deserialization. + */ + private transient Marker next; + + /** + * The previous marker links + */ + private transient Marker previous; + + /** + * Nonlinear interpolator + */ + private final UnivariateInterpolator nonLinear = + new NevilleInterpolator(); + + /** + * Linear interpolator which is not serializable + */ + private transient UnivariateInterpolator linear = + new LinearInterpolator(); + + /** + * Default constructor + */ + private Marker() { + this.next = this.previous = this; + } + + /** + * Constructor of the marker with parameters + * + * @param heightOfMarker represent the quantile value + * @param makerPositionDesired represent the desired marker position + * @param markerPositionIncrement represent increments for position + * @param markerPositionNumber represent the position number of marker + */ + private Marker(double heightOfMarker, double makerPositionDesired, + double markerPositionIncrement, double markerPositionNumber) { + this(); + this.markerHeight = heightOfMarker; + this.desiredMarkerPosition = makerPositionDesired; + this.desiredMarkerIncrement = markerPositionIncrement; + this.intMarkerPosition = markerPositionNumber; + } + + /** + * Sets the previous marker. + * + * @param previousMarker the previous marker to the current marker in + * the array of markers + * @return this instance + */ + private Marker previous(final Marker previousMarker) { + MathUtils.checkNotNull(previousMarker); + this.previous = previousMarker; + return this; + } + + /** + * Sets the next marker. + * + * @param nextMarker the next marker to the current marker in the array + * of markers + * @return this instance + */ + private Marker next(final Marker nextMarker) { + MathUtils.checkNotNull(nextMarker); + this.next = nextMarker; + return this; + } + + /** + * Sets the index of the marker. + * + * @param indexOfMarker the array index of the marker in marker array + * @return this instance + */ + private Marker index(final int indexOfMarker) { + this.index = indexOfMarker; + return this; + } + + /** + * Update desired Position with increment. + */ + private void updateDesiredPosition() { + desiredMarkerPosition += desiredMarkerIncrement; + } + + /** + * Increment Position by d. + * + * @param d a delta value to increment + */ + private void incrementPosition(final int d) { + intMarkerPosition += d; + } + + /** + * Difference between desired and actual position + * + * @return difference between desired and actual position + */ + private double difference() { + return desiredMarkerPosition - intMarkerPosition; + } + + /** + * Estimate the quantile for the current marker. + * + * @return estimated quantile + */ + private double estimate() { + final double di = difference(); + final boolean isNextHigher = + next.intMarkerPosition - intMarkerPosition > 1; + final boolean isPreviousLower = + previous.intMarkerPosition - intMarkerPosition < -1; + + if (di >= 1 && isNextHigher || di <= -1 && isPreviousLower) { + final int d = di >= 0 ? 1 : -1; + final double[] xval = + new double[] { previous.intMarkerPosition, + intMarkerPosition, next.intMarkerPosition }; + final double[] yval = + new double[] { previous.markerHeight, markerHeight, + next.markerHeight }; + final double xD = intMarkerPosition + d; + + UnivariateFunction univariateFunction = + nonLinear.interpolate(xval, yval); + markerHeight = univariateFunction.value(xD); + + // If parabolic estimate is bad then turn linear + if (isEstimateBad(yval, markerHeight)) { + int delta = xD - xval[1] > 0 ? 1 : -1; + final double[] xBad = + new double[] { xval[1], xval[1 + delta] }; + final double[] yBad = + new double[] { yval[1], yval[1 + delta] }; + MathArrays.sortInPlace(xBad, yBad);// since d can be +/- 1 + univariateFunction = linear.interpolate(xBad, yBad); + markerHeight = univariateFunction.value(xD); + } + incrementPosition(d); + } + return markerHeight; + } + + /** + * Check if parabolic/nonlinear estimate is bad by checking if the + * ordinate found is beyond the y[0] and y[2]. + * + * @param y the array to get the bounds + * @param yD the estimate + * @return true if yD is a bad estimate + */ + private boolean isEstimateBad(final double[] y, final double yD) { + return yD <= y[0] || yD >= y[2]; + } + + /** + * {@inheritDoc}This equals method checks for marker attributes and + * as well checks if navigation pointers (next and previous) are the same + * between this and passed in object + * + * @param o Other object + * @return true if this equals passed in other object o + */ + @Override + public boolean equals(Object o) { + boolean result = false; + if (this == o) { + result = true; + } else if (o != null && o instanceof Marker) { + Marker that = (Marker) o; + + result = Double.compare(markerHeight, that.markerHeight) == 0; + result = + result && + Double.compare(intMarkerPosition, + that.intMarkerPosition) == 0; + result = + result && + Double.compare(desiredMarkerPosition, + that.desiredMarkerPosition) == 0; + result = + result && + Double.compare(desiredMarkerIncrement, + that.desiredMarkerIncrement) == 0; + + result = result && next.index == that.next.index; + result = result && previous.index == that.previous.index; + } + return result; + } + + @Override + public int hashCode() { + return Arrays.hashCode(new double[] {markerHeight, intMarkerPosition, + desiredMarkerIncrement, desiredMarkerPosition, previous.index, next.index}); + } + + /** + * Read Object to deserialize. + * + * @param anInstream Stream Object data + * @throws IOException thrown for IO Errors + * @throws ClassNotFoundException thrown for class not being found + */ + private void readObject(ObjectInputStream anInstream) + throws ClassNotFoundException, IOException { + anInstream.defaultReadObject(); + previous=next=this; + linear = new LinearInterpolator(); + } + + /** + * Clone this instance. + * + * @return cloned marker + */ + @Override + public Object clone() { + return new Marker(markerHeight, desiredMarkerPosition, + desiredMarkerIncrement, intMarkerPosition); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return String.format( + "index=%.0f,n=%.0f,np=%.2f,q=%.2f,dn=%.2f,prev=%d,next=%d", + (double) index, Precision.round(intMarkerPosition, 0), + Precision.round(desiredMarkerPosition, 2), + Precision.round(markerHeight, 2), + Precision.round(desiredMarkerIncrement, 2), previous.index, + next.index); + } + } + + /** + * A simple fixed capacity list that has an upper bound to growth. + * Once its capacity is reached, {@code add} is a no-op, returning + * {@code false}. + * + * @param + */ + private static class FixedCapacityList extends ArrayList implements + Serializable { + /** + * Serialization Version Id + */ + private static final long serialVersionUID = 2283952083075725479L; + /** + * Capacity of the list + */ + private final int capacity; + + /** + * This constructor constructs the list with given capacity and as well + * as stores the capacity + * + * @param fixedCapacity the capacity to be fixed for this list + */ + public FixedCapacityList(final int fixedCapacity) { + super(fixedCapacity); + this.capacity = fixedCapacity; + } + + /** + * {@inheritDoc} In addition it checks if the {@link #size()} returns a + * size that is within capacity and if true it adds; otherwise the list + * contents are unchanged and {@code false} is returned. + * + * @return true if addition is successful and false otherwise + */ + @Override + public boolean add(final E e) { + return size() < capacity ? super.add(e) : false; + } + + /** + * {@inheritDoc} In addition it checks if the sum of Collection size and + * this instance's {@link #size()} returns a value that is within + * capacity and if true it adds the collection; otherwise the list + * contents are unchanged and {@code false} is returned. + * + * @return true if addition is successful and false otherwise + */ + @Override + public boolean addAll(Collection collection) { + boolean isCollectionLess = + collection != null && + collection.size() + size() <= capacity; + return isCollectionLess ? super.addAll(collection) : false; + } + } + + /** + * A creation method to build Markers + * + * @param initialFive list of initial five elements + * @param p the quantile desired + * @return an instance of PSquareMarkers + */ + public static PSquareMarkers newMarkers(final List initialFive, + final double p) { + return new Markers(initialFive, p); + } + + /** + * An interface that encapsulates abstractions of the + * P-square algorithm markers as is explained in the original works. This + * interface is exposed with protected access to help in testability. + */ + protected interface PSquareMarkers extends Cloneable { + /** + * Returns Percentile value computed thus far. + * + * @return percentile + */ + double getPercentileValue(); + + /** + * A clone function to clone the current instance. It's created as an + * interface method as well for convenience though Cloneable is just a + * marker interface. + * + * @return clone of this instance + */ + Object clone(); + + /** + * Returns the marker height (or percentile) of a given marker index. + * + * @param markerIndex is the index of marker in the marker array + * @return percentile value of the marker index passed + * @throws OutOfRangeException in case the index is not within [1-5] + */ + double height(final int markerIndex); + + /** + * Process a data point by moving the marker heights based on estimator. + * + * @param inputDataPoint is the data point passed + * @return computed percentile + */ + double processDataPoint(final double inputDataPoint); + + /** + * An Estimate of the percentile value of a given Marker + * + * @param index the marker's index in the array of markers + * @return percentile estimate + * @throws OutOfRangeException in case if index is not within [1-5] + */ + double estimate(final int index); + } +} diff --git a/src/test/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentileTest.java b/src/test/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentileTest.java new file mode 100644 index 000000000..adc5a95c9 --- /dev/null +++ b/src/test/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentileTest.java @@ -0,0 +1,760 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.rank; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; + +import org.apache.commons.math3.distribution.LogNormalDistribution; +import org.apache.commons.math3.distribution.NormalDistribution; +import org.apache.commons.math3.distribution.RealDistribution; +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatisticAbstractTest; +import org.apache.commons.math3.stat.descriptive.UnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.rank.PSquarePercentile.PSquareMarkers; +import org.apache.commons.math3.util.FastMath; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test cases for the {@link PSquarePercentile} class which naturally extends + * {@link StorelessUnivariateStatisticAbstractTest}. + */ +public class PSquarePercentileTest extends + StorelessUnivariateStatisticAbstractTest { + + protected double percentile5 = 8.2299d; + protected double percentile95 = 16.72195;// 20.82d; this is approximation + protected double tolerance = 10E-12; + + private final RandomGenerator randomGenerator = new Well19937c(1000); + + @Override + public double getTolerance() { + return 1.0e-2;// tolerance limit changed as this is an approximation + // algorithm and also gets accurate after few tens of + // samples + } + + /** + * Verifies that copied statistics remain equal to originals when + * incremented the same way by making the copy after a majority of elements + * are incremented + */ + @Test + public void testCopyConsistencyWithInitialMostElements() { + + StorelessUnivariateStatistic master = + (StorelessUnivariateStatistic) getUnivariateStatistic(); + + StorelessUnivariateStatistic replica = null; + + // select a portion of testArray till 75 % of the length to load first + long index = FastMath.round(0.75 * testArray.length); + + // Put first half in master and copy master to replica + master.incrementAll(testArray, 0, (int) index); + replica = master.copy(); + + // Check same + Assert.assertTrue(replica.equals(master)); + Assert.assertTrue(master.equals(replica)); + + // Now add second part to both and check again + master.incrementAll(testArray, (int) index, + (int) (testArray.length - index)); + replica.incrementAll(testArray, (int) index, + (int) (testArray.length - index)); + Assert.assertTrue(replica.equals(master)); + Assert.assertTrue(master.equals(replica)); + } + + /** + * Verifies that copied statistics remain equal to originals when + * incremented the same way by way of copying original after just a few + * elements are incremented + */ + @Test + public void testCopyConsistencyWithInitialFirstFewElements() { + + StorelessUnivariateStatistic master = + (StorelessUnivariateStatistic) getUnivariateStatistic(); + + StorelessUnivariateStatistic replica = null; + + // select a portion of testArray which is 10% of the length to load + // first + long index = FastMath.round(0.1 * testArray.length); + + // Put first half in master and copy master to replica + master.incrementAll(testArray, 0, (int) index); + replica = master.copy(); + + // Check same + Assert.assertTrue(replica.equals(master)); + Assert.assertTrue(master.equals(replica)); + // Now add second part to both and check again + master.incrementAll(testArray, (int) index, + (int) (testArray.length - index)); + replica.incrementAll(testArray, (int) index, + (int) (testArray.length - index)); + Assert.assertTrue(master.equals(master)); + Assert.assertTrue(replica.equals(replica)); + Assert.assertTrue(replica.equals(master)); + Assert.assertTrue(master.equals(replica)); + } + + @Test(expected = MathIllegalArgumentException.class) + public void testNullListInMarkers() { + // In case of null list Markers cannot be instantiated..is geting + // verified + // new Markers(null, 0, PSquarePercentile.newEstimator()); + PSquarePercentile.newMarkers(null, 0); + + } + + @Test + public void testMiscellaniousFunctionsInMarkers() { + double p = 0.5; + PSquareMarkers markers = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, + 38.62 }), p); + // Markers equality + Assert.assertTrue(markers.equals(markers)); + Assert.assertFalse(markers.equals(null)); + Assert.assertFalse(markers.equals(new String())); + // Check for null markers test during equality testing + // Until 5 elements markers are not initialized + PSquarePercentile p1 = new PSquarePercentile(), p2 = + new PSquarePercentile(); + Assert.assertEquals(p1, p2); + p1.evaluate(new double[] { 1.0, 2.0, 3.0 }); + p2.evaluate(new double[] { 1.0, 2.0, 3.0 }); + Assert.assertEquals(p1, p2); + // Move p2 alone with more values just to make sure markers are not null + // for p2 + p2.incrementAll(new double[] { 5.0, 7.0, 11.0 }); + Assert.assertFalse(p1.equals(p2)); + Assert.assertFalse(p2.equals(p1)); + // Next add different data to p1 to make number of elements match and + // markers are not null however actual results will vary + p1.incrementAll(new double[] { 20, 21, 22, 23 }); + Assert.assertFalse(p1.equals(p2));// though markers are non null, N + // matches, results wont + + } + + @Test(expected = OutOfRangeException.class) + public void testMarkersOORLow() { + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, 38.62 }), + 0.5).estimate(0); + } + + @Test(expected = OutOfRangeException.class) + public void testMarkersOORHigh() { + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, 38.62 }), + 0.5).estimate(5); + } + + @Test + public void testMarkers2() { + double p = 0.5; + PSquareMarkers markers = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, + 38.62 }), p); + + PSquareMarkers markersNew = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, + 38.62 }), p); + + Assert.assertTrue(markers.equals(markersNew)); + // If just one element of markers got changed then its still false. + markersNew.processDataPoint(39); + Assert.assertFalse(markers.equals(markersNew)); + + } + + @Test + public void testHashCodeInMarkers() { + PSquarePercentile p = new PSquarePercentile(95); + PSquarePercentile p2 = new PSquarePercentile(95); + Set s = new HashSet(); + s.add(p); + s.add(p2); + Assert.assertEquals(1, s.size()); + Assert.assertEquals(p, s.iterator().next()); + double[] d = + new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442, + 95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990, + 95.1682 }; + Assert.assertEquals(95.1981, p.evaluate(d), 1.0e-2); // change + Assert.assertEquals(95.1981, p2.evaluate(d), 1.0e-2); // change + s.clear(); + s.add(p); + s.add(p2); + Assert.assertEquals(1, s.size()); + Assert.assertEquals(p, s.iterator().next()); + + PSquareMarkers m1 = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937, + 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, + 95.1772, 95.0925, 95.1990, 95.1682 }), 0.0); + PSquareMarkers m2 = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937, + 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, + 95.1772, 95.0925, 95.1990, 95.1682 }), 0.0); + Assert.assertTrue(m1.equals(m2)); + Set setMarkers = new LinkedHashSet(); + Assert.assertTrue(setMarkers.add(m1)); + Assert.assertFalse(setMarkers.add(m2)); + Assert.assertEquals(1, setMarkers.size()); + + PSquareMarkers mThis = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 195.1772, 195.1567, + 195.1937, 195.1959, 95.1442, 195.0610, + 195.1591, 195.1195, 195.1772, 95.0925, 95.1990, + 195.1682 }), 0.50); + PSquareMarkers mThat = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937, + 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, + 95.1772, 95.0925, 95.1990, 95.1682 }), 0.50); + Assert.assertTrue(mThis.equals(mThis)); + Assert.assertFalse(mThis.equals(mThat)); + String s1=""; + Assert.assertFalse(mThis.equals(s1)); + for (int i = 0; i < testArray.length; i++) { + mThat.processDataPoint(testArray[i]); + } + setMarkers.add(mThat); + setMarkers.add(mThis); + Assert.assertTrue(mThat.equals(mThat)); + Assert.assertTrue(setMarkers.contains(mThat)); + Assert.assertTrue(setMarkers.contains(mThis)); + Assert.assertEquals(3, setMarkers.size()); + Iterator iterator=setMarkers.iterator(); + Assert.assertEquals(m1, iterator.next()); + Assert.assertEquals(mThat, iterator.next()); + Assert.assertEquals(mThis, iterator.next()); + } + + @Test(expected = OutOfRangeException.class) + public void testMarkersWithLowerIndex() { + PSquareMarkers mThat = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937, + 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, + 95.1772, 95.0925, 95.1990, 95.1682 }), 0.50); + for (int i = 0; i < testArray.length; i++) { + mThat.processDataPoint(testArray[i]); + } + mThat.estimate(0); + } + + @Test(expected = OutOfRangeException.class) + public void testMarkersWithHigherIndex() { + PSquareMarkers mThat = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937, + 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, + 95.1772, 95.0925, 95.1990, 95.1682 }), 0.50); + for (int i = 0; i < testArray.length; i++) { + mThat.processDataPoint(testArray[i]); + } + mThat.estimate(6); + } + + @Test(expected = OutOfRangeException.class) + public void testMarkerHeightWithLowerIndex() { + PSquareMarkers mThat = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937, + 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, + 95.1772, 95.0925, 95.1990, 95.1682 }), 0.50); + mThat.height(0); + } + + @Test(expected = OutOfRangeException.class) + public void testMarkerHeightWithHigherIndex() { + PSquareMarkers mThat = + PSquarePercentile.newMarkers( + Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937, + 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, + 95.1772, 95.0925, 95.1990, 95.1682 }), 0.50); + mThat.height(6); + } + + @Test + public void testPSquaredEqualsAndMin() { + PSquarePercentile ptile = new PSquarePercentile(0); + Assert.assertEquals(ptile, ptile); + Assert.assertFalse(ptile.equals(null)); + Assert.assertFalse(ptile.equals(new String())); + // Just to check if there is no data get result for zeroth and 100th + // ptile returns NAN + Assert.assertTrue(Double.isNaN(ptile.getResult())); + Assert.assertTrue(Double.isNaN(new PSquarePercentile(100).getResult())); + + double[] d = new double[] { 1, 3, 2, 4, 9, 10, 11 }; + ptile.evaluate(d); + Assert.assertEquals(ptile, ptile); + Assert.assertEquals(1d, ptile.getResult(), 1e-02);// this calls min + } + + @Test + public void testString() { + PSquarePercentile ptile = new PSquarePercentile(95); + Assert.assertNotNull(ptile.toString()); + ptile.increment(1); + ptile.increment(2); + ptile.increment(3); + Assert.assertNotNull(ptile.toString()); + Assert.assertEquals(expectedValue(), ptile.evaluate(testArray), + getTolerance()); + Assert.assertNotNull(ptile.toString()); + } + + @Override + public UnivariateStatistic getUnivariateStatistic() { + PSquarePercentile ptile = new PSquarePercentile(95); + // Assert.assertNull(ptile.markers()); + return ptile; + } + + @Override + public double expectedValue() { + return this.percentile95; + } + + @Test + public void testHighPercentile() { + double[] d = new double[] { 1, 2, 3 }; + PSquarePercentile p = new PSquarePercentile(75.0); + Assert.assertEquals(2, p.evaluate(d), 1.0e-5); + PSquarePercentile p95 = new PSquarePercentile(); + Assert.assertEquals(2, p95.evaluate(d), 1.0e-5); + } + + @Test + public void testLowPercentile() { + double[] d = new double[] { 0, 1 }; + PSquarePercentile p = new PSquarePercentile(25.0); + Assert.assertEquals(0d, p.evaluate(d), Double.MIN_VALUE); + } + + @Test + public void testPercentile() { + double[] d = new double[] { 1, 3, 2, 4 }; + PSquarePercentile p = new PSquarePercentile(30d); + Assert.assertEquals(1.0, p.evaluate(d), 1.0e-5); + p = new PSquarePercentile(25); + Assert.assertEquals(1.0, p.evaluate(d), 1.0e-5); + p = new PSquarePercentile(75); + Assert.assertEquals(3.0, p.evaluate(d), 1.0e-5); + p = new PSquarePercentile(50); + Assert.assertEquals(2d, p.evaluate(d), 1.0e-5); + + } + + @Test(expected = MathIllegalArgumentException.class) + public void testInitial() { + PSquarePercentile.newMarkers(new ArrayList(), 0.5); + Assert.fail(); + } + + @Test(expected = MathIllegalArgumentException.class) + public void testNegativeInvalidValues() { + double[] d = + new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442, + 95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990, + 95.1682 }; + PSquarePercentile p = new PSquarePercentile(-1.0); + p.evaluate(d, 0, d.length); + Assert.fail("This method has had to throw exception..but it is not.."); + + } + + @Test(expected = MathIllegalArgumentException.class) + public void testPositiveInvalidValues() { + double[] d = + new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442, + 95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990, + 95.1682 }; + PSquarePercentile p = new PSquarePercentile(101.0); + p.evaluate(d, 0, d.length); + Assert.fail("This method has had to throw exception..but it is not.."); + + } + + @Test + public void testNISTExample() { + double[] d = + new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442, + 95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990, + 95.1682 }; + Assert.assertEquals(95.1981, new PSquarePercentile(90d).evaluate(d), + 1.0e-2); // changed the accuracy to 1.0e-2 + Assert.assertEquals(95.061, new PSquarePercentile(0d).evaluate(d), 0); + Assert.assertEquals(95.1990, + new PSquarePercentile(100d).evaluate(d, 0, d.length), 0); + } + + @Test + public void test5() { + PSquarePercentile percentile = new PSquarePercentile(5d); + Assert.assertEquals(this.percentile5, percentile.evaluate(testArray), + 1.0);// changed the accuracy to 1 instead of tolerance + } + + @Test(expected = MathIllegalArgumentException.class) + public void testNull() { + PSquarePercentile percentile = new PSquarePercentile(50d); + double[] nullArray = null; + percentile.evaluate(nullArray); + } + + @Test + public void testEmpty() { + PSquarePercentile percentile = new PSquarePercentile(50d); + double[] emptyArray = new double[] {}; + Assert.assertTrue(Double.isNaN(percentile.evaluate(emptyArray))); + } + + @Test + public void testSingleton() { + PSquarePercentile percentile = new PSquarePercentile(50d); + double[] singletonArray = new double[] { 1d }; + Assert.assertEquals(1d, percentile.evaluate(singletonArray), 0); + Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0); + percentile = new PSquarePercentile(5); + Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0); + percentile = new PSquarePercentile(100); + Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0); + percentile = new PSquarePercentile(100); + Assert.assertTrue(Double.isNaN(percentile + .evaluate(singletonArray, 0, 0))); + } + + @Test + public void testSpecialValues() { + PSquarePercentile percentile = new PSquarePercentile(50d); + double[] specialValues = + new double[] { 0d, 1d, 2d, 3d, 4d, Double.NaN }; + Assert.assertEquals(2d, percentile.evaluate(specialValues), 0); + specialValues = + new double[] { Double.NEGATIVE_INFINITY, 1d, 2d, 3d, + Double.NaN, Double.POSITIVE_INFINITY }; + Assert.assertEquals(2d, percentile.evaluate(specialValues), 0); + specialValues = + new double[] { 1d, 1d, Double.POSITIVE_INFINITY, + Double.POSITIVE_INFINITY }; + Assert.assertFalse(Double.isInfinite(percentile.evaluate(specialValues))); + specialValues = new double[] { 1d, 1d, Double.NaN, Double.NaN }; + Assert.assertFalse(Double.isNaN(percentile.evaluate(specialValues))); + specialValues = + new double[] { 1d, 1d, Double.NEGATIVE_INFINITY, + Double.NEGATIVE_INFINITY }; + percentile = new PSquarePercentile(50d); + // Interpolation results in NEGATIVE_INFINITY + POSITIVE_INFINITY + // changed the result check to infinity instead of NaN + Assert.assertTrue(Double.isInfinite(percentile.evaluate(specialValues))); + } + + @Test + public void testArrayExample() { + Assert.assertEquals(expectedValue(), + new PSquarePercentile(95d).evaluate(testArray), getTolerance()); + } + + @Test + public void testSetQuantile() { + PSquarePercentile percentile = new PSquarePercentile(10d); + + percentile = new PSquarePercentile(100); // OK + Assert.assertEquals(1.0, percentile.quantile(), 0); + try { + percentile = new PSquarePercentile(0); + // Assert.fail("Expecting MathIllegalArgumentException"); + } catch (MathIllegalArgumentException ex) { + // expected + } + try { + new PSquarePercentile(0d); + // Assert.fail("Expecting MathIllegalArgumentException"); + } catch (MathIllegalArgumentException ex) { + // expected + } + } + + private Double[] randomTestData(int factor, int values) { + Double[] test = new Double[values]; + for (int i = 0; i < test.length; i++) { + test[i] = Math.abs(randomGenerator.nextDouble() * factor); + } + return test; + } + + @Test + public void testAccept() { + PSquarePercentile psquared = new PSquarePercentile(0.99); + Assert.assertTrue(Double.isNaN(psquared.getResult())); + Double[] test = randomTestData(100, 10000); + + for (Double value : test) { + psquared.increment(value); + Assert.assertTrue(psquared.getResult() >= 0); + } + } + + private void assertValues(Double a, Double b, double delta) { + if (Double.isNaN(a)) { + Assert.assertTrue("" + b + " is not NaN.", Double.isNaN(a)); + } else { + double max = FastMath.max(a, b); + double percentage = FastMath.abs(a - b) / max; + double deviation = delta; + Assert.assertTrue(String.format( + "Deviated = %f and is beyond %f as a=%f, b=%f", + percentage, deviation, a, b), percentage < deviation); + } + } + + private void doCalculatePercentile(Double percentile, Number[] test) { + doCalculatePercentile(percentile, test, Double.MAX_VALUE); + } + + private void doCalculatePercentile(Double percentile, Number[] test, + double delta) { + PSquarePercentile psquared = new PSquarePercentile(percentile); + for (Number value : test) { + psquared.increment(value.doubleValue()); + } + + Percentile p2 = new Percentile(percentile * 100); + + double[] dall = new double[test.length]; + for (int i = 0; i < test.length; i++) { + dall[i] = test[i].doubleValue(); + } + + Double referenceValue = p2.evaluate(dall); + assertValues(psquared.getResult(), referenceValue, delta); + } + + private void doCalculatePercentile(double percentile, double[] test, + double delta) { + PSquarePercentile psquared = new PSquarePercentile(percentile); + for (double value : test) { + psquared.increment(value); + } + + Percentile p2 = + new Percentile(percentile < 1 ? percentile * 100 : percentile); + /* + * double[] dall = new double[test.length]; for (int i = 0; i < + * test.length; i++) dall[i] = test[i]; + */ + Double referenceValue = p2.evaluate(test); + assertValues(psquared.getResult(), referenceValue, delta); + } + + @Test + public void testCannedDataSet() { + // test.unoverride("dump"); + Integer[] seedInput = + new Integer[] { 283, 285, 298, 304, 310, 31, 319, 32, 33, 339, + 342, 348, 350, 354, 354, 357, 36, 36, 369, 37, 37, 375, + 378, 383, 390, 396, 405, 408, 41, 414, 419, 416, 42, + 420, 430, 430, 432, 444, 447, 447, 449, 45, 451, 456, + 468, 470, 471, 474, 600, 695, 70, 83, 97, 109, 113, 128 }; + Integer[] input = new Integer[seedInput.length * 100]; + for (int i = 0; i < input.length; i++) { + input[i] = seedInput[i % seedInput.length] + i; + } + // Arrays.sort(input); + doCalculatePercentile(0.50d, input); + doCalculatePercentile(0.95d, input); + + } + + @Test + public void test99Percentile() { + Double[] test = randomTestData(100, 10000); + doCalculatePercentile(0.99d, test); + } + + @Test + public void test90Percentile() { + Double[] test = randomTestData(100, 10000); + doCalculatePercentile(0.90d, test); + } + + @Test + public void test20Percentile() { + Double[] test = randomTestData(100, 100000); + doCalculatePercentile(0.20d, test); + } + + @Test + public void test5Percentile() { + Double[] test = randomTestData(50, 990000); + doCalculatePercentile(0.50d, test); + } + + @Test + public void test99PercentileHighValues() { + Double[] test = randomTestData(100000, 10000); + doCalculatePercentile(0.99d, test); + } + + @Test + public void test90PercentileHighValues() { + Double[] test = randomTestData(100000, 100000); + doCalculatePercentile(0.90d, test); + } + + @Test + public void test20PercentileHighValues() { + Double[] test = randomTestData(100000, 100000); + doCalculatePercentile(0.20d, test); + } + + @Test + public void test5PercentileHighValues() { + Double[] test = randomTestData(100000, 100000); + doCalculatePercentile(0.05d, test); + } + + @Test + public void test0PercentileValuesWithFewerThan5Values() { + double[] test = { 1d, 2d, 3d, 4d }; + PSquarePercentile p = new PSquarePercentile(0d); + Assert.assertEquals(1d, p.evaluate(test), 0); + Assert.assertNotNull(p.toString()); + } + + @Test + public void testPSQuaredEvalFuncWithPapersExampleData() throws IOException { + + // This data as input is considered from + // http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf + double[] data = + { 0.02, 0.5, 0.74, 3.39, 0.83, 22.37, 10.15, 15.43, 38.62, + 15.92, 34.6, 10.28, 1.47, 0.4, 0.05, 11.39, 0.27, 0.42, + 0.09, 11.37, + + 11.39, 15.43, 15.92, 22.37, 34.6, 38.62, 18.9, 19.2, + 27.6, 12.8, 13.7, 21.9 + + }; + + PSquarePercentile psquared = new PSquarePercentile(50); + + Double p2value = 0d; + for (int i = 0; i < 20; i++) { + psquared.increment(data[i]); + p2value = psquared.getResult(); + // System.out.println(psquared.toString());//uncomment here to see + // the papers example output + } + // System.out.println("p2value=" + p2value); + Double expected = 4.44d;// 13d; // From The Paper + // http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf. + // Pl refer Pg 1061 Look at the mid marker + // height + // expected = new Percentile(50).evaluate(data,0,20); + // Well the values deviate in our calculation by 0.25 so its 4.25 vs + // 4.44 + Assert.assertEquals( + String.format("Expected=%f, Actual=%f", expected, p2value), + expected, p2value, 0.25); + + } + + final int TINY = 10, SMALL = 50, NOMINAL = 100, MEDIUM = 500, + STANDARD = 1000, BIG = 10000, VERY_BIG = 50000, LARGE = 1000000, + VERY_LARGE = 10000000; + + private void doDistributionTest(RealDistribution distribution) { + double data[]; + + data = distribution.sample(VERY_LARGE); + doCalculatePercentile(50, data, 0.0001); + doCalculatePercentile(95, data, 0.0001); + + data = distribution.sample(LARGE); + doCalculatePercentile(50, data, 0.001); + doCalculatePercentile(95, data, 0.001); + + data = distribution.sample(VERY_BIG); + doCalculatePercentile(50, data, 0.001); + doCalculatePercentile(95, data, 0.001); + + data = distribution.sample(BIG); + doCalculatePercentile(50, data, 0.001); + doCalculatePercentile(95, data, 0.001); + + data = distribution.sample(STANDARD); + doCalculatePercentile(50, data, 0.005); + doCalculatePercentile(95, data, 0.005); + + data = distribution.sample(MEDIUM); + doCalculatePercentile(50, data, 0.005); + doCalculatePercentile(95, data, 0.005); + + data = distribution.sample(NOMINAL); + doCalculatePercentile(50, data, 0.01); + doCalculatePercentile(95, data, 0.01); + + data = distribution.sample(SMALL); + doCalculatePercentile(50, data, 0.01); + doCalculatePercentile(95, data, 0.01); + + data = distribution.sample(TINY); + doCalculatePercentile(50, data, 0.05); + doCalculatePercentile(95, data, 0.05); + + } + + /** + * Test Various Dist + */ + @Test + public void testDistribution() { + doDistributionTest(new NormalDistribution(4000, 50)); + doDistributionTest(new LogNormalDistribution(4000, 50)); + // doDistributionTest((new ExponentialDistribution(4000)); + // doDistributionTest(new GammaDistribution(5d,1d),0.1); + } +}