Added implementation of PSquare algorithm to estimate percentiles without
storing data in memory (i.e. as StorelessUnivariateStatistic). JIRA: MATH-418 Contributed by: Venkatesha Murthy git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1604443 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
44b5c55e07
commit
7b3e0d4297
3
pom.xml
3
pom.xml
|
@ -257,6 +257,9 @@
|
||||||
<contributor>
|
<contributor>
|
||||||
<name>J. Lewis Muir</name>
|
<name>J. Lewis Muir</name>
|
||||||
</contributor>
|
</contributor>
|
||||||
|
<contributor>
|
||||||
|
<name>Venkatesha Murthy</name>
|
||||||
|
</contributor>
|
||||||
<contributor>
|
<contributor>
|
||||||
<name>Christopher Nix</name>
|
<name>Christopher Nix</name>
|
||||||
</contributor>
|
</contributor>
|
||||||
|
|
|
@ -73,6 +73,10 @@ Users are encouraged to upgrade to this version as this release not
|
||||||
2. A few methods in the FastMath class are in fact slower that their
|
2. A few methods in the FastMath class are in fact slower that their
|
||||||
counterpart in either Math or StrictMath (cf. MATH-740 and MATH-901).
|
counterpart in either Math or StrictMath (cf. MATH-740 and MATH-901).
|
||||||
">
|
">
|
||||||
|
<action dev="psteitz" type="add" issue="MATH-418" due-to="Venkatesha Murthy">
|
||||||
|
Added implementation of PSquare algorithm to estimate percentiles without
|
||||||
|
storing data in memory (i.e. as StorelessUnivariateStatistic).
|
||||||
|
</action>
|
||||||
<action dev="erans" type="fix" issue="MATH-1129">
|
<action dev="erans" type="fix" issue="MATH-1129">
|
||||||
"Percentile": wrong sorting in the presence of NaN.
|
"Percentile": wrong sorting in the presence of NaN.
|
||||||
</action>
|
</action>
|
||||||
|
|
|
@ -0,0 +1,997 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.commons.math3.stat.descriptive.rank;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.ObjectInputStream;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.text.DecimalFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.math3.analysis.UnivariateFunction;
|
||||||
|
import org.apache.commons.math3.analysis.interpolation.LinearInterpolator;
|
||||||
|
import org.apache.commons.math3.analysis.interpolation.NevilleInterpolator;
|
||||||
|
import org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator;
|
||||||
|
import org.apache.commons.math3.exception.InsufficientDataException;
|
||||||
|
import org.apache.commons.math3.exception.OutOfRangeException;
|
||||||
|
import org.apache.commons.math3.exception.util.LocalizedFormats;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic;
|
||||||
|
import org.apache.commons.math3.util.MathArrays;
|
||||||
|
import org.apache.commons.math3.util.MathUtils;
|
||||||
|
import org.apache.commons.math3.util.Precision;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link StorelessUnivariateStatistic} estimating percentiles using the
|
||||||
|
* <ahref=http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf>P<SUP>2</SUP></a>
|
||||||
|
* Algorithm as explained by <a href=http://www.cse.wustl.edu/~jain/>Raj
|
||||||
|
* Jain</a> and Imrich Chlamtac in
|
||||||
|
* <a href=http://www.cse.wustl.edu/~jain/papers/psqr.htm>P<SUP>2</SUP> Algorithm
|
||||||
|
* for Dynamic Calculation of Quantiles and Histogram Without Storing
|
||||||
|
* Observations</a>.
|
||||||
|
* <p>
|
||||||
|
* Note: This implementation is not synchronized and produces an approximate
|
||||||
|
* result. For small samples, where data can be stored and processed in memory,
|
||||||
|
* {@link Percentile} should be used.</p>
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class PSquarePercentile extends AbstractStorelessUnivariateStatistic
|
||||||
|
implements StorelessUnivariateStatistic, Serializable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum array size used for psquare algorithm
|
||||||
|
*/
|
||||||
|
private static final int PSQUARE_CONSTANT = 5;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Default quantile needed in case if user prefers to use default no
|
||||||
|
* argument constructor.
|
||||||
|
*/
|
||||||
|
private static final double DEFAULT_QUANTILE_DESIRED = 50d;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serial ID
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = 2283912083175715479L;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A decimal formatter for print convenience
|
||||||
|
*/
|
||||||
|
private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat(
|
||||||
|
"00.00");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initial list of 5 numbers corresponding to 5 markers. <b>NOTE:</b>watch
|
||||||
|
* out for the add methods that are overloaded
|
||||||
|
*/
|
||||||
|
private final List<Double> initialFive = new FixedCapacityList<Double>(
|
||||||
|
PSQUARE_CONSTANT);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The quantile needed should be in range of 0-1. The constructor
|
||||||
|
* {@link #PSquarePercentile(double)} ensures that passed in percentile is
|
||||||
|
* divide by 100
|
||||||
|
*/
|
||||||
|
private final double quantile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* lastObservation is the last observation value/input sample. No need to
|
||||||
|
* serialize
|
||||||
|
*/
|
||||||
|
private transient double lastObservation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Markers is the marker collection object which comes to effect
|
||||||
|
* only after 5 values are inserted
|
||||||
|
*/
|
||||||
|
private PSquareMarkers markers = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computed p value (i,e percentile value of data set hither to received)
|
||||||
|
*/
|
||||||
|
private double pValue = Double.NaN;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Counter to count the values/observations accepted into this data set
|
||||||
|
*/
|
||||||
|
private long countOfObservations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a PSquarePercentile with the specific percentile value.
|
||||||
|
* @param p the percentile
|
||||||
|
* @throws OutOfRangeException if p is not greater than 0 and less
|
||||||
|
* than or equal to 100
|
||||||
|
*/
|
||||||
|
public PSquarePercentile(final double p) {
|
||||||
|
if (p > 100 || p < 0) {
|
||||||
|
throw new OutOfRangeException(LocalizedFormats.OUT_OF_RANGE,
|
||||||
|
p, 0, 100);
|
||||||
|
}
|
||||||
|
this.quantile = p / 100d;// always set it within (0,1]
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default constructor that assumes a {@link #DEFAULT_QUANTILE_DESIRED
|
||||||
|
* default quantile} needed
|
||||||
|
*/
|
||||||
|
PSquarePercentile() {
|
||||||
|
this(DEFAULT_QUANTILE_DESIRED);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
double result = getResult();
|
||||||
|
result = Double.isNaN(result) ? 37 : result;
|
||||||
|
final double markersHash = markers == null ? 0 : markers.hashCode();
|
||||||
|
final double[] toHash = {result, quantile, markersHash, countOfObservations};
|
||||||
|
return Arrays.hashCode(toHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true iff {@code o} is a {@code PSquarePercentile} returning the
|
||||||
|
* same values as this for {@code getResult()} and {@code getN()} and also
|
||||||
|
* having equal markers
|
||||||
|
*
|
||||||
|
* @param o object to compare
|
||||||
|
* @return true if {@code o} is a {@code PSquarePercentile} with
|
||||||
|
* equivalent internal state
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
boolean result = false;
|
||||||
|
if (this == o) {
|
||||||
|
result = true;
|
||||||
|
} else if (o != null && o instanceof PSquarePercentile) {
|
||||||
|
PSquarePercentile that = (PSquarePercentile) o;
|
||||||
|
boolean isNotNull = markers != null && that.markers != null;
|
||||||
|
boolean isNull = markers == null && that.markers == null;
|
||||||
|
result = isNotNull ? markers.equals(that.markers) : isNull;
|
||||||
|
// markers as in the case of first
|
||||||
|
// five observations
|
||||||
|
result = result && getN() == that.getN();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}The internal state updated due to the new value in this
|
||||||
|
* context is basically of the marker positions and computation of the
|
||||||
|
* approximate quantile.
|
||||||
|
*
|
||||||
|
* @param observation the observation currently being added.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void increment(final double observation) {
|
||||||
|
// Increment counter
|
||||||
|
countOfObservations++;
|
||||||
|
|
||||||
|
// Store last observation
|
||||||
|
this.lastObservation = observation;
|
||||||
|
|
||||||
|
// 0. Use Brute force for <5
|
||||||
|
if (markers == null) {
|
||||||
|
if (initialFive.add(observation)) {
|
||||||
|
Collections.sort(initialFive);
|
||||||
|
pValue =
|
||||||
|
initialFive
|
||||||
|
.get((int) (quantile * (initialFive.size() - 1)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// 1. Initialize once after 5th observation
|
||||||
|
markers = newMarkers(initialFive, quantile);
|
||||||
|
}
|
||||||
|
// 2. process a Data Point and return pValue
|
||||||
|
pValue = markers.processDataPoint(observation);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string containing the last observation, the current estimate
|
||||||
|
* of the quantile and all markers.
|
||||||
|
*
|
||||||
|
* @return string representation of state data
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
|
||||||
|
if (markers == null) {
|
||||||
|
return String.format("obs=%s pValue=%s",
|
||||||
|
DECIMAL_FORMAT.format(lastObservation),
|
||||||
|
DECIMAL_FORMAT.format(pValue));
|
||||||
|
} else {
|
||||||
|
return String.format("obs=%s markers=%s",
|
||||||
|
DECIMAL_FORMAT.format(lastObservation), markers.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public long getN() {
|
||||||
|
return countOfObservations;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public StorelessUnivariateStatistic copy() {
|
||||||
|
// multiply quantile by 100 now as anyway constructor divides it by 100
|
||||||
|
PSquarePercentile copy = new PSquarePercentile(100d * quantile);
|
||||||
|
|
||||||
|
if (markers != null) {
|
||||||
|
copy.markers = (PSquareMarkers) markers.clone();
|
||||||
|
}
|
||||||
|
copy.countOfObservations = countOfObservations;
|
||||||
|
copy.pValue = pValue;
|
||||||
|
copy.initialFive.clear();
|
||||||
|
copy.initialFive.addAll(initialFive);
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the quantile estimated by this statistic in the range [0.0-1.0]
|
||||||
|
*
|
||||||
|
* @return quantile estimated by {@link #getResult()}
|
||||||
|
*/
|
||||||
|
public double quantile() {
|
||||||
|
return quantile;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}. This basically clears all the markers, the
|
||||||
|
* initialFive list and sets countOfObservations to 0.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
markers = null;
|
||||||
|
initialFive.clear();
|
||||||
|
countOfObservations = 0L;
|
||||||
|
pValue = Double.NaN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public double getResult() {
|
||||||
|
if (Double.compare(quantile, 1d) == 0) {
|
||||||
|
pValue = maximum();
|
||||||
|
} else if (Double.compare(quantile, 0d) == 0) {
|
||||||
|
pValue = minimum();
|
||||||
|
}
|
||||||
|
return pValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return maximum in the data set added to this statistic
|
||||||
|
*/
|
||||||
|
private double maximum() {
|
||||||
|
double val = Double.NaN;
|
||||||
|
if (markers != null) {
|
||||||
|
val = markers.height(PSQUARE_CONSTANT);
|
||||||
|
} else if (!initialFive.isEmpty()) {
|
||||||
|
val = initialFive.get(initialFive.size() - 1);
|
||||||
|
}
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return minimum in the data set added to this statistic
|
||||||
|
*/
|
||||||
|
private double minimum() {
|
||||||
|
double val = Double.NaN;
|
||||||
|
if (markers != null) {
|
||||||
|
val = markers.height(1);
|
||||||
|
} else if (!initialFive.isEmpty()) {
|
||||||
|
val = initialFive.get(0);
|
||||||
|
}
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Markers is an encapsulation of the five markers/buckets as indicated in
|
||||||
|
* the original works.
|
||||||
|
*/
|
||||||
|
private static class Markers implements PSquareMarkers, Serializable {
|
||||||
|
/**
|
||||||
|
* Serial version id
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
/** Low marker index */
|
||||||
|
private static final int LOW = 2;
|
||||||
|
|
||||||
|
/** High marker index */
|
||||||
|
private static final int HIGH = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Array of 5+1 Markers (The first marker is dummy just so we
|
||||||
|
* can match the rest of indexes [1-5] indicated in the original works
|
||||||
|
* which follows unit based index)
|
||||||
|
*/
|
||||||
|
private final Marker[] markerArray;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Kth cell belonging to [1-5] of the markerArray. No need for
|
||||||
|
* this to be serialized
|
||||||
|
*/
|
||||||
|
private transient int k = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
*
|
||||||
|
* @param theMarkerArray marker array to be used
|
||||||
|
*/
|
||||||
|
private Markers(final Marker[] theMarkerArray) {
|
||||||
|
MathUtils.checkNotNull(theMarkerArray);
|
||||||
|
markerArray = theMarkerArray;
|
||||||
|
for (int i = 1; i < PSQUARE_CONSTANT; i++) {
|
||||||
|
markerArray[i].previous(markerArray[i - 1])
|
||||||
|
.next(markerArray[i + 1]).index(i);
|
||||||
|
}
|
||||||
|
markerArray[0].previous(markerArray[0]).next(markerArray[1])
|
||||||
|
.index(0);
|
||||||
|
markerArray[5].previous(markerArray[4]).next(markerArray[5])
|
||||||
|
.index(5);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
*
|
||||||
|
* @param initialFive elements required to build Marker
|
||||||
|
* @param p quantile required to be computed
|
||||||
|
*/
|
||||||
|
private Markers(final List<Double> initialFive, final double p) {
|
||||||
|
this(createMarkerArray(initialFive, p));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a marker array using initial five elements and a quantile
|
||||||
|
*
|
||||||
|
* @param initialFive list of initial five elements
|
||||||
|
* @param p the pth quantile
|
||||||
|
* @return Marker array
|
||||||
|
*/
|
||||||
|
private static Marker[] createMarkerArray(
|
||||||
|
final List<Double> initialFive, final double p) {
|
||||||
|
final int countObserved =
|
||||||
|
initialFive == null ? -1 : initialFive.size();
|
||||||
|
if (countObserved < PSQUARE_CONSTANT) {
|
||||||
|
throw new InsufficientDataException(
|
||||||
|
LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE,
|
||||||
|
countObserved, PSQUARE_CONSTANT);
|
||||||
|
}
|
||||||
|
Collections.sort(initialFive);
|
||||||
|
return new Marker[] {
|
||||||
|
new Marker(),// Null Marker
|
||||||
|
new Marker(initialFive.get(0), 1, 0, 1),
|
||||||
|
new Marker(initialFive.get(1), 1 + 2 * p, p / 2, 2),
|
||||||
|
new Marker(initialFive.get(2), 1 + 4 * p, p, 3),
|
||||||
|
new Marker(initialFive.get(3), 3 + 2 * p, (1 + p) / 2, 4),
|
||||||
|
new Marker(initialFive.get(4), 5, 1, 5) };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Arrays.deepHashCode(markerArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}.This equals method basically checks for marker array to
|
||||||
|
* be deep equals.
|
||||||
|
*
|
||||||
|
* @param o is the other object
|
||||||
|
* @return true if the object compares with this object are equivalent
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
boolean result = false;
|
||||||
|
if (this == o) {
|
||||||
|
result = true;
|
||||||
|
} else if (o != null && o instanceof Markers) {
|
||||||
|
Markers that = (Markers) o;
|
||||||
|
result = Arrays.deepEquals(markerArray, that.markerArray);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a data point
|
||||||
|
*
|
||||||
|
* @param inputDataPoint is the data point passed
|
||||||
|
* @return computed percentile
|
||||||
|
*/
|
||||||
|
public double processDataPoint(final double inputDataPoint) {
|
||||||
|
|
||||||
|
// 1. Find cell and update minima and maxima
|
||||||
|
final int kthCell = findCellAndUpdateMinMax(inputDataPoint);
|
||||||
|
|
||||||
|
// 2. Increment positions
|
||||||
|
incrementPositions(1, kthCell + 1, 5);
|
||||||
|
|
||||||
|
// 2a. Update desired position with increments
|
||||||
|
updateDesiredPositions();
|
||||||
|
|
||||||
|
// 3. Adjust heights of m[2-4] if necessary
|
||||||
|
adjustHeightsOfMarkers();
|
||||||
|
|
||||||
|
// 4. Return percentile
|
||||||
|
return getPercentileValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the percentile computed thus far.
|
||||||
|
*
|
||||||
|
* @return height of mid point marker
|
||||||
|
*/
|
||||||
|
public double getPercentileValue() {
|
||||||
|
return height(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the cell where the input observation / value fits.
|
||||||
|
*
|
||||||
|
* @param observation the input value to be checked for
|
||||||
|
* @return kth cell (of the markers ranging from 1-5) where observed
|
||||||
|
* sample fits
|
||||||
|
*/
|
||||||
|
private int findCellAndUpdateMinMax(final double observation) {
|
||||||
|
k = -1;
|
||||||
|
if (observation < height(1)) {
|
||||||
|
markerArray[1].markerHeight = observation;
|
||||||
|
k = 1;
|
||||||
|
} else if (observation < height(2)) {
|
||||||
|
k = 1;
|
||||||
|
} else if (observation < height(3)) {
|
||||||
|
k = 2;
|
||||||
|
} else if (observation < height(4)) {
|
||||||
|
k = 3;
|
||||||
|
} else if (observation <= height(5)) {
|
||||||
|
k = 4;
|
||||||
|
} else {
|
||||||
|
markerArray[5].markerHeight = observation;
|
||||||
|
k = 4;
|
||||||
|
}
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adjust marker heights by setting quantile estimates to middle markers.
|
||||||
|
*/
|
||||||
|
private void adjustHeightsOfMarkers() {
|
||||||
|
for (int i = LOW; i <= HIGH; i++) {
|
||||||
|
estimate(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public double estimate(final int index) {
|
||||||
|
if (index < LOW || index > HIGH) {
|
||||||
|
throw new OutOfRangeException(index, LOW, HIGH);
|
||||||
|
}
|
||||||
|
return markerArray[index].estimate();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment positions by d. Refer to algorithm paper for the
|
||||||
|
* definition of d.
|
||||||
|
*
|
||||||
|
* @param d The increment value for the position
|
||||||
|
* @param startIndex start index of the marker array
|
||||||
|
* @param endIndex end index of the marker array
|
||||||
|
*/
|
||||||
|
private void incrementPositions(final int d, final int startIndex,
|
||||||
|
final int endIndex) {
|
||||||
|
for (int i = startIndex; i <= endIndex; i++) {
|
||||||
|
markerArray[i].incrementPosition(d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Desired positions incremented by bucket width. The bucket width is
|
||||||
|
* basically the desired increments.
|
||||||
|
*/
|
||||||
|
private void updateDesiredPositions() {
|
||||||
|
for (int i = 1; i < markerArray.length; i++) {
|
||||||
|
markerArray[i].updateDesiredPosition();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets previous and next markers after default read is done.
|
||||||
|
*
|
||||||
|
* @param anInputStream the input stream to be deserialized
|
||||||
|
* @throws ClassNotFoundException thrown when a desired class not found
|
||||||
|
* @throws IOException thrown due to any io errors
|
||||||
|
*/
|
||||||
|
private void readObject(ObjectInputStream anInputStream)
|
||||||
|
throws ClassNotFoundException, IOException {
|
||||||
|
// always perform the default de-serialization first
|
||||||
|
anInputStream.defaultReadObject();
|
||||||
|
// Build links
|
||||||
|
for (int i = 1; i < PSQUARE_CONSTANT; i++) {
|
||||||
|
markerArray[i].previous(markerArray[i - 1])
|
||||||
|
.next(markerArray[i + 1]).index(i);
|
||||||
|
}
|
||||||
|
markerArray[0].previous(markerArray[0]).next(markerArray[1])
|
||||||
|
.index(0);
|
||||||
|
markerArray[5].previous(markerArray[4]).next(markerArray[5])
|
||||||
|
.index(5);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return marker height given index
|
||||||
|
*
|
||||||
|
* @param markerIndex index of marker within (1,6)
|
||||||
|
* @return marker height
|
||||||
|
*/
|
||||||
|
public double height(final int markerIndex) {
|
||||||
|
if (markerIndex >= markerArray.length || markerIndex <= 0) {
|
||||||
|
throw new OutOfRangeException(markerIndex, 1,
|
||||||
|
markerArray.length);
|
||||||
|
}
|
||||||
|
return markerArray[markerIndex].markerHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}.Clone Markers
|
||||||
|
*
|
||||||
|
* @return cloned object
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Object clone() {
|
||||||
|
return new Markers(new Marker[] { new Marker(),
|
||||||
|
(Marker) markerArray[1].clone(),
|
||||||
|
(Marker) markerArray[2].clone(),
|
||||||
|
(Marker) markerArray[3].clone(),
|
||||||
|
(Marker) markerArray[4].clone(),
|
||||||
|
(Marker) markerArray[5].clone() });
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns string representation of the Marker array.
|
||||||
|
*
|
||||||
|
* @return Markers as a string
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("m1=[%s],m2=[%s],m3=[%s],m4=[%s],m5=[%s]",
|
||||||
|
markerArray[1].toString(), markerArray[2].toString(),
|
||||||
|
markerArray[3].toString(), markerArray[4].toString(),
|
||||||
|
markerArray[5].toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The class modeling the attributes of the marker of the P-square algorithm
|
||||||
|
*/
|
||||||
|
private static class Marker implements Serializable, Cloneable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serial Version ID
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = -3575879478288538431L;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The marker index which is just a serial number for the marker in the
|
||||||
|
* marker array of 5+1.
|
||||||
|
*/
|
||||||
|
private int index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The integral marker position. Refer to the variable n in the original
|
||||||
|
* works.
|
||||||
|
*/
|
||||||
|
private double intMarkerPosition;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Desired marker position. Refer to the variable n' in the original
|
||||||
|
* works.
|
||||||
|
*/
|
||||||
|
private double desiredMarkerPosition;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marker height or the quantile. Refer to the variable q in the
|
||||||
|
* original works.
|
||||||
|
*/
|
||||||
|
private double markerHeight;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Desired marker increment. Refer to the variable dn' in the original
|
||||||
|
* works.
|
||||||
|
*/
|
||||||
|
private double desiredMarkerIncrement;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Next and previous markers for easy linked navigation in loops. this
|
||||||
|
* is not serialized as they can be rebuilt during deserialization.
|
||||||
|
*/
|
||||||
|
private transient Marker next;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The previous marker links
|
||||||
|
*/
|
||||||
|
private transient Marker previous;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nonlinear interpolator
|
||||||
|
*/
|
||||||
|
private final UnivariateInterpolator nonLinear =
|
||||||
|
new NevilleInterpolator();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Linear interpolator which is not serializable
|
||||||
|
*/
|
||||||
|
private transient UnivariateInterpolator linear =
|
||||||
|
new LinearInterpolator();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default constructor
|
||||||
|
*/
|
||||||
|
private Marker() {
|
||||||
|
this.next = this.previous = this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor of the marker with parameters
|
||||||
|
*
|
||||||
|
* @param heightOfMarker represent the quantile value
|
||||||
|
* @param makerPositionDesired represent the desired marker position
|
||||||
|
* @param markerPositionIncrement represent increments for position
|
||||||
|
* @param markerPositionNumber represent the position number of marker
|
||||||
|
*/
|
||||||
|
private Marker(double heightOfMarker, double makerPositionDesired,
|
||||||
|
double markerPositionIncrement, double markerPositionNumber) {
|
||||||
|
this();
|
||||||
|
this.markerHeight = heightOfMarker;
|
||||||
|
this.desiredMarkerPosition = makerPositionDesired;
|
||||||
|
this.desiredMarkerIncrement = markerPositionIncrement;
|
||||||
|
this.intMarkerPosition = markerPositionNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the previous marker.
|
||||||
|
*
|
||||||
|
* @param previousMarker the previous marker to the current marker in
|
||||||
|
* the array of markers
|
||||||
|
* @return this instance
|
||||||
|
*/
|
||||||
|
private Marker previous(final Marker previousMarker) {
|
||||||
|
MathUtils.checkNotNull(previousMarker);
|
||||||
|
this.previous = previousMarker;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the next marker.
|
||||||
|
*
|
||||||
|
* @param nextMarker the next marker to the current marker in the array
|
||||||
|
* of markers
|
||||||
|
* @return this instance
|
||||||
|
*/
|
||||||
|
private Marker next(final Marker nextMarker) {
|
||||||
|
MathUtils.checkNotNull(nextMarker);
|
||||||
|
this.next = nextMarker;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the index of the marker.
|
||||||
|
*
|
||||||
|
* @param indexOfMarker the array index of the marker in marker array
|
||||||
|
* @return this instance
|
||||||
|
*/
|
||||||
|
private Marker index(final int indexOfMarker) {
|
||||||
|
this.index = indexOfMarker;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update desired Position with increment.
|
||||||
|
*/
|
||||||
|
private void updateDesiredPosition() {
|
||||||
|
desiredMarkerPosition += desiredMarkerIncrement;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment Position by d.
|
||||||
|
*
|
||||||
|
* @param d a delta value to increment
|
||||||
|
*/
|
||||||
|
private void incrementPosition(final int d) {
|
||||||
|
intMarkerPosition += d;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Difference between desired and actual position
|
||||||
|
*
|
||||||
|
* @return difference between desired and actual position
|
||||||
|
*/
|
||||||
|
private double difference() {
|
||||||
|
return desiredMarkerPosition - intMarkerPosition;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate the quantile for the current marker.
|
||||||
|
*
|
||||||
|
* @return estimated quantile
|
||||||
|
*/
|
||||||
|
private double estimate() {
|
||||||
|
final double di = difference();
|
||||||
|
final boolean isNextHigher =
|
||||||
|
next.intMarkerPosition - intMarkerPosition > 1;
|
||||||
|
final boolean isPreviousLower =
|
||||||
|
previous.intMarkerPosition - intMarkerPosition < -1;
|
||||||
|
|
||||||
|
if (di >= 1 && isNextHigher || di <= -1 && isPreviousLower) {
|
||||||
|
final int d = di >= 0 ? 1 : -1;
|
||||||
|
final double[] xval =
|
||||||
|
new double[] { previous.intMarkerPosition,
|
||||||
|
intMarkerPosition, next.intMarkerPosition };
|
||||||
|
final double[] yval =
|
||||||
|
new double[] { previous.markerHeight, markerHeight,
|
||||||
|
next.markerHeight };
|
||||||
|
final double xD = intMarkerPosition + d;
|
||||||
|
|
||||||
|
UnivariateFunction univariateFunction =
|
||||||
|
nonLinear.interpolate(xval, yval);
|
||||||
|
markerHeight = univariateFunction.value(xD);
|
||||||
|
|
||||||
|
// If parabolic estimate is bad then turn linear
|
||||||
|
if (isEstimateBad(yval, markerHeight)) {
|
||||||
|
int delta = xD - xval[1] > 0 ? 1 : -1;
|
||||||
|
final double[] xBad =
|
||||||
|
new double[] { xval[1], xval[1 + delta] };
|
||||||
|
final double[] yBad =
|
||||||
|
new double[] { yval[1], yval[1 + delta] };
|
||||||
|
MathArrays.sortInPlace(xBad, yBad);// since d can be +/- 1
|
||||||
|
univariateFunction = linear.interpolate(xBad, yBad);
|
||||||
|
markerHeight = univariateFunction.value(xD);
|
||||||
|
}
|
||||||
|
incrementPosition(d);
|
||||||
|
}
|
||||||
|
return markerHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if parabolic/nonlinear estimate is bad by checking if the
|
||||||
|
* ordinate found is beyond the y[0] and y[2].
|
||||||
|
*
|
||||||
|
* @param y the array to get the bounds
|
||||||
|
* @param yD the estimate
|
||||||
|
* @return true if yD is a bad estimate
|
||||||
|
*/
|
||||||
|
private boolean isEstimateBad(final double[] y, final double yD) {
|
||||||
|
return yD <= y[0] || yD >= y[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}<i>This equals method checks for marker attributes and
|
||||||
|
* as well checks if navigation pointers (next and previous) are the same
|
||||||
|
* between this and passed in object</i>
|
||||||
|
*
|
||||||
|
* @param o Other object
|
||||||
|
* @return true if this equals passed in other object o
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
boolean result = false;
|
||||||
|
if (this == o) {
|
||||||
|
result = true;
|
||||||
|
} else if (o != null && o instanceof Marker) {
|
||||||
|
Marker that = (Marker) o;
|
||||||
|
|
||||||
|
result = Double.compare(markerHeight, that.markerHeight) == 0;
|
||||||
|
result =
|
||||||
|
result &&
|
||||||
|
Double.compare(intMarkerPosition,
|
||||||
|
that.intMarkerPosition) == 0;
|
||||||
|
result =
|
||||||
|
result &&
|
||||||
|
Double.compare(desiredMarkerPosition,
|
||||||
|
that.desiredMarkerPosition) == 0;
|
||||||
|
result =
|
||||||
|
result &&
|
||||||
|
Double.compare(desiredMarkerIncrement,
|
||||||
|
that.desiredMarkerIncrement) == 0;
|
||||||
|
|
||||||
|
result = result && next.index == that.next.index;
|
||||||
|
result = result && previous.index == that.previous.index;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Arrays.hashCode(new double[] {markerHeight, intMarkerPosition,
|
||||||
|
desiredMarkerIncrement, desiredMarkerPosition, previous.index, next.index});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read Object to deserialize.
|
||||||
|
*
|
||||||
|
* @param anInstream Stream Object data
|
||||||
|
* @throws IOException thrown for IO Errors
|
||||||
|
* @throws ClassNotFoundException thrown for class not being found
|
||||||
|
*/
|
||||||
|
private void readObject(ObjectInputStream anInstream)
|
||||||
|
throws ClassNotFoundException, IOException {
|
||||||
|
anInstream.defaultReadObject();
|
||||||
|
previous=next=this;
|
||||||
|
linear = new LinearInterpolator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clone this instance.
|
||||||
|
*
|
||||||
|
* @return cloned marker
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Object clone() {
|
||||||
|
return new Marker(markerHeight, desiredMarkerPosition,
|
||||||
|
desiredMarkerIncrement, intMarkerPosition);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format(
|
||||||
|
"index=%.0f,n=%.0f,np=%.2f,q=%.2f,dn=%.2f,prev=%d,next=%d",
|
||||||
|
(double) index, Precision.round(intMarkerPosition, 0),
|
||||||
|
Precision.round(desiredMarkerPosition, 2),
|
||||||
|
Precision.round(markerHeight, 2),
|
||||||
|
Precision.round(desiredMarkerIncrement, 2), previous.index,
|
||||||
|
next.index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple fixed capacity list that has an upper bound to growth.
|
||||||
|
* Once its capacity is reached, {@code add} is a no-op, returning
|
||||||
|
* {@code false}.
|
||||||
|
*
|
||||||
|
* @param <E>
|
||||||
|
*/
|
||||||
|
private static class FixedCapacityList<E> extends ArrayList<E> implements
|
||||||
|
Serializable {
|
||||||
|
/**
|
||||||
|
* Serialization Version Id
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = 2283952083075725479L;
|
||||||
|
/**
|
||||||
|
* Capacity of the list
|
||||||
|
*/
|
||||||
|
private final int capacity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This constructor constructs the list with given capacity and as well
|
||||||
|
* as stores the capacity
|
||||||
|
*
|
||||||
|
* @param fixedCapacity the capacity to be fixed for this list
|
||||||
|
*/
|
||||||
|
public FixedCapacityList(final int fixedCapacity) {
|
||||||
|
super(fixedCapacity);
|
||||||
|
this.capacity = fixedCapacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc} In addition it checks if the {@link #size()} returns a
|
||||||
|
* size that is within capacity and if true it adds; otherwise the list
|
||||||
|
* contents are unchanged and {@code false} is returned.
|
||||||
|
*
|
||||||
|
* @return true if addition is successful and false otherwise
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean add(final E e) {
|
||||||
|
return size() < capacity ? super.add(e) : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc} In addition it checks if the sum of Collection size and
|
||||||
|
* this instance's {@link #size()} returns a value that is within
|
||||||
|
* capacity and if true it adds the collection; otherwise the list
|
||||||
|
* contents are unchanged and {@code false} is returned.
|
||||||
|
*
|
||||||
|
* @return true if addition is successful and false otherwise
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean addAll(Collection<? extends E> collection) {
|
||||||
|
boolean isCollectionLess =
|
||||||
|
collection != null &&
|
||||||
|
collection.size() + size() <= capacity;
|
||||||
|
return isCollectionLess ? super.addAll(collection) : false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A creation method to build Markers
|
||||||
|
*
|
||||||
|
* @param initialFive list of initial five elements
|
||||||
|
* @param p the quantile desired
|
||||||
|
* @return an instance of PSquareMarkers
|
||||||
|
*/
|
||||||
|
public static PSquareMarkers newMarkers(final List<Double> initialFive,
|
||||||
|
final double p) {
|
||||||
|
return new Markers(initialFive, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface that encapsulates abstractions of the
|
||||||
|
* P-square algorithm markers as is explained in the original works. This
|
||||||
|
* interface is exposed with protected access to help in testability.
|
||||||
|
*/
|
||||||
|
protected interface PSquareMarkers extends Cloneable {
|
||||||
|
/**
|
||||||
|
* Returns Percentile value computed thus far.
|
||||||
|
*
|
||||||
|
* @return percentile
|
||||||
|
*/
|
||||||
|
double getPercentileValue();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A clone function to clone the current instance. It's created as an
|
||||||
|
* interface method as well for convenience though Cloneable is just a
|
||||||
|
* marker interface.
|
||||||
|
*
|
||||||
|
* @return clone of this instance
|
||||||
|
*/
|
||||||
|
Object clone();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the marker height (or percentile) of a given marker index.
|
||||||
|
*
|
||||||
|
* @param markerIndex is the index of marker in the marker array
|
||||||
|
* @return percentile value of the marker index passed
|
||||||
|
* @throws OutOfRangeException in case the index is not within [1-5]
|
||||||
|
*/
|
||||||
|
double height(final int markerIndex);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a data point by moving the marker heights based on estimator.
|
||||||
|
*
|
||||||
|
* @param inputDataPoint is the data point passed
|
||||||
|
* @return computed percentile
|
||||||
|
*/
|
||||||
|
double processDataPoint(final double inputDataPoint);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An Estimate of the percentile value of a given Marker
|
||||||
|
*
|
||||||
|
* @param index the marker's index in the array of markers
|
||||||
|
* @return percentile estimate
|
||||||
|
* @throws OutOfRangeException in case if index is not within [1-5]
|
||||||
|
*/
|
||||||
|
double estimate(final int index);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,760 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.commons.math3.stat.descriptive.rank;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.math3.distribution.LogNormalDistribution;
|
||||||
|
import org.apache.commons.math3.distribution.NormalDistribution;
|
||||||
|
import org.apache.commons.math3.distribution.RealDistribution;
|
||||||
|
import org.apache.commons.math3.exception.MathIllegalArgumentException;
|
||||||
|
import org.apache.commons.math3.exception.OutOfRangeException;
|
||||||
|
import org.apache.commons.math3.random.RandomGenerator;
|
||||||
|
import org.apache.commons.math3.random.Well19937c;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatisticAbstractTest;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.UnivariateStatistic;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.rank.PSquarePercentile.PSquareMarkers;
|
||||||
|
import org.apache.commons.math3.util.FastMath;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases for the {@link PSquarePercentile} class which naturally extends
|
||||||
|
* {@link StorelessUnivariateStatisticAbstractTest}.
|
||||||
|
*/
|
||||||
|
public class PSquarePercentileTest extends
|
||||||
|
StorelessUnivariateStatisticAbstractTest {
|
||||||
|
|
||||||
|
protected double percentile5 = 8.2299d;
|
||||||
|
protected double percentile95 = 16.72195;// 20.82d; this is approximation
|
||||||
|
protected double tolerance = 10E-12;
|
||||||
|
|
||||||
|
private final RandomGenerator randomGenerator = new Well19937c(1000);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getTolerance() {
|
||||||
|
return 1.0e-2;// tolerance limit changed as this is an approximation
|
||||||
|
// algorithm and also gets accurate after few tens of
|
||||||
|
// samples
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that copied statistics remain equal to originals when
|
||||||
|
* incremented the same way by making the copy after a majority of elements
|
||||||
|
* are incremented
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCopyConsistencyWithInitialMostElements() {
|
||||||
|
|
||||||
|
StorelessUnivariateStatistic master =
|
||||||
|
(StorelessUnivariateStatistic) getUnivariateStatistic();
|
||||||
|
|
||||||
|
StorelessUnivariateStatistic replica = null;
|
||||||
|
|
||||||
|
// select a portion of testArray till 75 % of the length to load first
|
||||||
|
long index = FastMath.round(0.75 * testArray.length);
|
||||||
|
|
||||||
|
// Put first half in master and copy master to replica
|
||||||
|
master.incrementAll(testArray, 0, (int) index);
|
||||||
|
replica = master.copy();
|
||||||
|
|
||||||
|
// Check same
|
||||||
|
Assert.assertTrue(replica.equals(master));
|
||||||
|
Assert.assertTrue(master.equals(replica));
|
||||||
|
|
||||||
|
// Now add second part to both and check again
|
||||||
|
master.incrementAll(testArray, (int) index,
|
||||||
|
(int) (testArray.length - index));
|
||||||
|
replica.incrementAll(testArray, (int) index,
|
||||||
|
(int) (testArray.length - index));
|
||||||
|
Assert.assertTrue(replica.equals(master));
|
||||||
|
Assert.assertTrue(master.equals(replica));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that copied statistics remain equal to originals when
|
||||||
|
* incremented the same way by way of copying original after just a few
|
||||||
|
* elements are incremented
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCopyConsistencyWithInitialFirstFewElements() {
|
||||||
|
|
||||||
|
StorelessUnivariateStatistic master =
|
||||||
|
(StorelessUnivariateStatistic) getUnivariateStatistic();
|
||||||
|
|
||||||
|
StorelessUnivariateStatistic replica = null;
|
||||||
|
|
||||||
|
// select a portion of testArray which is 10% of the length to load
|
||||||
|
// first
|
||||||
|
long index = FastMath.round(0.1 * testArray.length);
|
||||||
|
|
||||||
|
// Put first half in master and copy master to replica
|
||||||
|
master.incrementAll(testArray, 0, (int) index);
|
||||||
|
replica = master.copy();
|
||||||
|
|
||||||
|
// Check same
|
||||||
|
Assert.assertTrue(replica.equals(master));
|
||||||
|
Assert.assertTrue(master.equals(replica));
|
||||||
|
// Now add second part to both and check again
|
||||||
|
master.incrementAll(testArray, (int) index,
|
||||||
|
(int) (testArray.length - index));
|
||||||
|
replica.incrementAll(testArray, (int) index,
|
||||||
|
(int) (testArray.length - index));
|
||||||
|
Assert.assertTrue(master.equals(master));
|
||||||
|
Assert.assertTrue(replica.equals(replica));
|
||||||
|
Assert.assertTrue(replica.equals(master));
|
||||||
|
Assert.assertTrue(master.equals(replica));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = MathIllegalArgumentException.class)
|
||||||
|
public void testNullListInMarkers() {
|
||||||
|
// In case of null list Markers cannot be instantiated..is geting
|
||||||
|
// verified
|
||||||
|
// new Markers(null, 0, PSquarePercentile.newEstimator());
|
||||||
|
PSquarePercentile.newMarkers(null, 0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMiscellaniousFunctionsInMarkers() {
|
||||||
|
double p = 0.5;
|
||||||
|
PSquareMarkers markers =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91,
|
||||||
|
38.62 }), p);
|
||||||
|
// Markers equality
|
||||||
|
Assert.assertTrue(markers.equals(markers));
|
||||||
|
Assert.assertFalse(markers.equals(null));
|
||||||
|
Assert.assertFalse(markers.equals(new String()));
|
||||||
|
// Check for null markers test during equality testing
|
||||||
|
// Until 5 elements markers are not initialized
|
||||||
|
PSquarePercentile p1 = new PSquarePercentile(), p2 =
|
||||||
|
new PSquarePercentile();
|
||||||
|
Assert.assertEquals(p1, p2);
|
||||||
|
p1.evaluate(new double[] { 1.0, 2.0, 3.0 });
|
||||||
|
p2.evaluate(new double[] { 1.0, 2.0, 3.0 });
|
||||||
|
Assert.assertEquals(p1, p2);
|
||||||
|
// Move p2 alone with more values just to make sure markers are not null
|
||||||
|
// for p2
|
||||||
|
p2.incrementAll(new double[] { 5.0, 7.0, 11.0 });
|
||||||
|
Assert.assertFalse(p1.equals(p2));
|
||||||
|
Assert.assertFalse(p2.equals(p1));
|
||||||
|
// Next add different data to p1 to make number of elements match and
|
||||||
|
// markers are not null however actual results will vary
|
||||||
|
p1.incrementAll(new double[] { 20, 21, 22, 23 });
|
||||||
|
Assert.assertFalse(p1.equals(p2));// though markers are non null, N
|
||||||
|
// matches, results wont
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = OutOfRangeException.class)
|
||||||
|
public void testMarkersOORLow() {
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, 38.62 }),
|
||||||
|
0.5).estimate(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = OutOfRangeException.class)
|
||||||
|
public void testMarkersOORHigh() {
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91, 38.62 }),
|
||||||
|
0.5).estimate(5);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMarkers2() {
|
||||||
|
double p = 0.5;
|
||||||
|
PSquareMarkers markers =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91,
|
||||||
|
38.62 }), p);
|
||||||
|
|
||||||
|
PSquareMarkers markersNew =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 0.02, 1.18, 9.15, 21.91,
|
||||||
|
38.62 }), p);
|
||||||
|
|
||||||
|
Assert.assertTrue(markers.equals(markersNew));
|
||||||
|
// If just one element of markers got changed then its still false.
|
||||||
|
markersNew.processDataPoint(39);
|
||||||
|
Assert.assertFalse(markers.equals(markersNew));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHashCodeInMarkers() {
|
||||||
|
PSquarePercentile p = new PSquarePercentile(95);
|
||||||
|
PSquarePercentile p2 = new PSquarePercentile(95);
|
||||||
|
Set<PSquarePercentile> s = new HashSet<PSquarePercentile>();
|
||||||
|
s.add(p);
|
||||||
|
s.add(p2);
|
||||||
|
Assert.assertEquals(1, s.size());
|
||||||
|
Assert.assertEquals(p, s.iterator().next());
|
||||||
|
double[] d =
|
||||||
|
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
|
||||||
|
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
|
||||||
|
95.1682 };
|
||||||
|
Assert.assertEquals(95.1981, p.evaluate(d), 1.0e-2); // change
|
||||||
|
Assert.assertEquals(95.1981, p2.evaluate(d), 1.0e-2); // change
|
||||||
|
s.clear();
|
||||||
|
s.add(p);
|
||||||
|
s.add(p2);
|
||||||
|
Assert.assertEquals(1, s.size());
|
||||||
|
Assert.assertEquals(p, s.iterator().next());
|
||||||
|
|
||||||
|
PSquareMarkers m1 =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
|
||||||
|
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
|
||||||
|
95.1772, 95.0925, 95.1990, 95.1682 }), 0.0);
|
||||||
|
PSquareMarkers m2 =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
|
||||||
|
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
|
||||||
|
95.1772, 95.0925, 95.1990, 95.1682 }), 0.0);
|
||||||
|
Assert.assertTrue(m1.equals(m2));
|
||||||
|
Set<PSquareMarkers> setMarkers = new LinkedHashSet<PSquareMarkers>();
|
||||||
|
Assert.assertTrue(setMarkers.add(m1));
|
||||||
|
Assert.assertFalse(setMarkers.add(m2));
|
||||||
|
Assert.assertEquals(1, setMarkers.size());
|
||||||
|
|
||||||
|
PSquareMarkers mThis =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 195.1772, 195.1567,
|
||||||
|
195.1937, 195.1959, 95.1442, 195.0610,
|
||||||
|
195.1591, 195.1195, 195.1772, 95.0925, 95.1990,
|
||||||
|
195.1682 }), 0.50);
|
||||||
|
PSquareMarkers mThat =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
|
||||||
|
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
|
||||||
|
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
|
||||||
|
Assert.assertTrue(mThis.equals(mThis));
|
||||||
|
Assert.assertFalse(mThis.equals(mThat));
|
||||||
|
String s1="";
|
||||||
|
Assert.assertFalse(mThis.equals(s1));
|
||||||
|
for (int i = 0; i < testArray.length; i++) {
|
||||||
|
mThat.processDataPoint(testArray[i]);
|
||||||
|
}
|
||||||
|
setMarkers.add(mThat);
|
||||||
|
setMarkers.add(mThis);
|
||||||
|
Assert.assertTrue(mThat.equals(mThat));
|
||||||
|
Assert.assertTrue(setMarkers.contains(mThat));
|
||||||
|
Assert.assertTrue(setMarkers.contains(mThis));
|
||||||
|
Assert.assertEquals(3, setMarkers.size());
|
||||||
|
Iterator<PSquareMarkers> iterator=setMarkers.iterator();
|
||||||
|
Assert.assertEquals(m1, iterator.next());
|
||||||
|
Assert.assertEquals(mThat, iterator.next());
|
||||||
|
Assert.assertEquals(mThis, iterator.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = OutOfRangeException.class)
|
||||||
|
public void testMarkersWithLowerIndex() {
|
||||||
|
PSquareMarkers mThat =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
|
||||||
|
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
|
||||||
|
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
|
||||||
|
for (int i = 0; i < testArray.length; i++) {
|
||||||
|
mThat.processDataPoint(testArray[i]);
|
||||||
|
}
|
||||||
|
mThat.estimate(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = OutOfRangeException.class)
|
||||||
|
public void testMarkersWithHigherIndex() {
|
||||||
|
PSquareMarkers mThat =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
|
||||||
|
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
|
||||||
|
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
|
||||||
|
for (int i = 0; i < testArray.length; i++) {
|
||||||
|
mThat.processDataPoint(testArray[i]);
|
||||||
|
}
|
||||||
|
mThat.estimate(6);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = OutOfRangeException.class)
|
||||||
|
public void testMarkerHeightWithLowerIndex() {
|
||||||
|
PSquareMarkers mThat =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
|
||||||
|
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
|
||||||
|
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
|
||||||
|
mThat.height(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = OutOfRangeException.class)
|
||||||
|
public void testMarkerHeightWithHigherIndex() {
|
||||||
|
PSquareMarkers mThat =
|
||||||
|
PSquarePercentile.newMarkers(
|
||||||
|
Arrays.asList(new Double[] { 95.1772, 95.1567, 95.1937,
|
||||||
|
95.1959, 95.1442, 95.0610, 95.1591, 95.1195,
|
||||||
|
95.1772, 95.0925, 95.1990, 95.1682 }), 0.50);
|
||||||
|
mThat.height(6);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPSquaredEqualsAndMin() {
|
||||||
|
PSquarePercentile ptile = new PSquarePercentile(0);
|
||||||
|
Assert.assertEquals(ptile, ptile);
|
||||||
|
Assert.assertFalse(ptile.equals(null));
|
||||||
|
Assert.assertFalse(ptile.equals(new String()));
|
||||||
|
// Just to check if there is no data get result for zeroth and 100th
|
||||||
|
// ptile returns NAN
|
||||||
|
Assert.assertTrue(Double.isNaN(ptile.getResult()));
|
||||||
|
Assert.assertTrue(Double.isNaN(new PSquarePercentile(100).getResult()));
|
||||||
|
|
||||||
|
double[] d = new double[] { 1, 3, 2, 4, 9, 10, 11 };
|
||||||
|
ptile.evaluate(d);
|
||||||
|
Assert.assertEquals(ptile, ptile);
|
||||||
|
Assert.assertEquals(1d, ptile.getResult(), 1e-02);// this calls min
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testString() {
|
||||||
|
PSquarePercentile ptile = new PSquarePercentile(95);
|
||||||
|
Assert.assertNotNull(ptile.toString());
|
||||||
|
ptile.increment(1);
|
||||||
|
ptile.increment(2);
|
||||||
|
ptile.increment(3);
|
||||||
|
Assert.assertNotNull(ptile.toString());
|
||||||
|
Assert.assertEquals(expectedValue(), ptile.evaluate(testArray),
|
||||||
|
getTolerance());
|
||||||
|
Assert.assertNotNull(ptile.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public UnivariateStatistic getUnivariateStatistic() {
|
||||||
|
PSquarePercentile ptile = new PSquarePercentile(95);
|
||||||
|
// Assert.assertNull(ptile.markers());
|
||||||
|
return ptile;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double expectedValue() {
|
||||||
|
return this.percentile95;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHighPercentile() {
|
||||||
|
double[] d = new double[] { 1, 2, 3 };
|
||||||
|
PSquarePercentile p = new PSquarePercentile(75.0);
|
||||||
|
Assert.assertEquals(2, p.evaluate(d), 1.0e-5);
|
||||||
|
PSquarePercentile p95 = new PSquarePercentile();
|
||||||
|
Assert.assertEquals(2, p95.evaluate(d), 1.0e-5);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLowPercentile() {
|
||||||
|
double[] d = new double[] { 0, 1 };
|
||||||
|
PSquarePercentile p = new PSquarePercentile(25.0);
|
||||||
|
Assert.assertEquals(0d, p.evaluate(d), Double.MIN_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPercentile() {
|
||||||
|
double[] d = new double[] { 1, 3, 2, 4 };
|
||||||
|
PSquarePercentile p = new PSquarePercentile(30d);
|
||||||
|
Assert.assertEquals(1.0, p.evaluate(d), 1.0e-5);
|
||||||
|
p = new PSquarePercentile(25);
|
||||||
|
Assert.assertEquals(1.0, p.evaluate(d), 1.0e-5);
|
||||||
|
p = new PSquarePercentile(75);
|
||||||
|
Assert.assertEquals(3.0, p.evaluate(d), 1.0e-5);
|
||||||
|
p = new PSquarePercentile(50);
|
||||||
|
Assert.assertEquals(2d, p.evaluate(d), 1.0e-5);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = MathIllegalArgumentException.class)
|
||||||
|
public void testInitial() {
|
||||||
|
PSquarePercentile.newMarkers(new ArrayList<Double>(), 0.5);
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = MathIllegalArgumentException.class)
|
||||||
|
public void testNegativeInvalidValues() {
|
||||||
|
double[] d =
|
||||||
|
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
|
||||||
|
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
|
||||||
|
95.1682 };
|
||||||
|
PSquarePercentile p = new PSquarePercentile(-1.0);
|
||||||
|
p.evaluate(d, 0, d.length);
|
||||||
|
Assert.fail("This method has had to throw exception..but it is not..");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = MathIllegalArgumentException.class)
|
||||||
|
public void testPositiveInvalidValues() {
|
||||||
|
double[] d =
|
||||||
|
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
|
||||||
|
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
|
||||||
|
95.1682 };
|
||||||
|
PSquarePercentile p = new PSquarePercentile(101.0);
|
||||||
|
p.evaluate(d, 0, d.length);
|
||||||
|
Assert.fail("This method has had to throw exception..but it is not..");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNISTExample() {
|
||||||
|
double[] d =
|
||||||
|
new double[] { 95.1772, 95.1567, 95.1937, 95.1959, 95.1442,
|
||||||
|
95.0610, 95.1591, 95.1195, 95.1772, 95.0925, 95.1990,
|
||||||
|
95.1682 };
|
||||||
|
Assert.assertEquals(95.1981, new PSquarePercentile(90d).evaluate(d),
|
||||||
|
1.0e-2); // changed the accuracy to 1.0e-2
|
||||||
|
Assert.assertEquals(95.061, new PSquarePercentile(0d).evaluate(d), 0);
|
||||||
|
Assert.assertEquals(95.1990,
|
||||||
|
new PSquarePercentile(100d).evaluate(d, 0, d.length), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test5() {
|
||||||
|
PSquarePercentile percentile = new PSquarePercentile(5d);
|
||||||
|
Assert.assertEquals(this.percentile5, percentile.evaluate(testArray),
|
||||||
|
1.0);// changed the accuracy to 1 instead of tolerance
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = MathIllegalArgumentException.class)
|
||||||
|
public void testNull() {
|
||||||
|
PSquarePercentile percentile = new PSquarePercentile(50d);
|
||||||
|
double[] nullArray = null;
|
||||||
|
percentile.evaluate(nullArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmpty() {
|
||||||
|
PSquarePercentile percentile = new PSquarePercentile(50d);
|
||||||
|
double[] emptyArray = new double[] {};
|
||||||
|
Assert.assertTrue(Double.isNaN(percentile.evaluate(emptyArray)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSingleton() {
|
||||||
|
PSquarePercentile percentile = new PSquarePercentile(50d);
|
||||||
|
double[] singletonArray = new double[] { 1d };
|
||||||
|
Assert.assertEquals(1d, percentile.evaluate(singletonArray), 0);
|
||||||
|
Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0);
|
||||||
|
percentile = new PSquarePercentile(5);
|
||||||
|
Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0);
|
||||||
|
percentile = new PSquarePercentile(100);
|
||||||
|
Assert.assertEquals(1d, percentile.evaluate(singletonArray, 0, 1), 0);
|
||||||
|
percentile = new PSquarePercentile(100);
|
||||||
|
Assert.assertTrue(Double.isNaN(percentile
|
||||||
|
.evaluate(singletonArray, 0, 0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSpecialValues() {
|
||||||
|
PSquarePercentile percentile = new PSquarePercentile(50d);
|
||||||
|
double[] specialValues =
|
||||||
|
new double[] { 0d, 1d, 2d, 3d, 4d, Double.NaN };
|
||||||
|
Assert.assertEquals(2d, percentile.evaluate(specialValues), 0);
|
||||||
|
specialValues =
|
||||||
|
new double[] { Double.NEGATIVE_INFINITY, 1d, 2d, 3d,
|
||||||
|
Double.NaN, Double.POSITIVE_INFINITY };
|
||||||
|
Assert.assertEquals(2d, percentile.evaluate(specialValues), 0);
|
||||||
|
specialValues =
|
||||||
|
new double[] { 1d, 1d, Double.POSITIVE_INFINITY,
|
||||||
|
Double.POSITIVE_INFINITY };
|
||||||
|
Assert.assertFalse(Double.isInfinite(percentile.evaluate(specialValues)));
|
||||||
|
specialValues = new double[] { 1d, 1d, Double.NaN, Double.NaN };
|
||||||
|
Assert.assertFalse(Double.isNaN(percentile.evaluate(specialValues)));
|
||||||
|
specialValues =
|
||||||
|
new double[] { 1d, 1d, Double.NEGATIVE_INFINITY,
|
||||||
|
Double.NEGATIVE_INFINITY };
|
||||||
|
percentile = new PSquarePercentile(50d);
|
||||||
|
// Interpolation results in NEGATIVE_INFINITY + POSITIVE_INFINITY
|
||||||
|
// changed the result check to infinity instead of NaN
|
||||||
|
Assert.assertTrue(Double.isInfinite(percentile.evaluate(specialValues)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testArrayExample() {
|
||||||
|
Assert.assertEquals(expectedValue(),
|
||||||
|
new PSquarePercentile(95d).evaluate(testArray), getTolerance());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSetQuantile() {
|
||||||
|
PSquarePercentile percentile = new PSquarePercentile(10d);
|
||||||
|
|
||||||
|
percentile = new PSquarePercentile(100); // OK
|
||||||
|
Assert.assertEquals(1.0, percentile.quantile(), 0);
|
||||||
|
try {
|
||||||
|
percentile = new PSquarePercentile(0);
|
||||||
|
// Assert.fail("Expecting MathIllegalArgumentException");
|
||||||
|
} catch (MathIllegalArgumentException ex) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
new PSquarePercentile(0d);
|
||||||
|
// Assert.fail("Expecting MathIllegalArgumentException");
|
||||||
|
} catch (MathIllegalArgumentException ex) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Double[] randomTestData(int factor, int values) {
|
||||||
|
Double[] test = new Double[values];
|
||||||
|
for (int i = 0; i < test.length; i++) {
|
||||||
|
test[i] = Math.abs(randomGenerator.nextDouble() * factor);
|
||||||
|
}
|
||||||
|
return test;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAccept() {
|
||||||
|
PSquarePercentile psquared = new PSquarePercentile(0.99);
|
||||||
|
Assert.assertTrue(Double.isNaN(psquared.getResult()));
|
||||||
|
Double[] test = randomTestData(100, 10000);
|
||||||
|
|
||||||
|
for (Double value : test) {
|
||||||
|
psquared.increment(value);
|
||||||
|
Assert.assertTrue(psquared.getResult() >= 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertValues(Double a, Double b, double delta) {
|
||||||
|
if (Double.isNaN(a)) {
|
||||||
|
Assert.assertTrue("" + b + " is not NaN.", Double.isNaN(a));
|
||||||
|
} else {
|
||||||
|
double max = FastMath.max(a, b);
|
||||||
|
double percentage = FastMath.abs(a - b) / max;
|
||||||
|
double deviation = delta;
|
||||||
|
Assert.assertTrue(String.format(
|
||||||
|
"Deviated = %f and is beyond %f as a=%f, b=%f",
|
||||||
|
percentage, deviation, a, b), percentage < deviation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doCalculatePercentile(Double percentile, Number[] test) {
|
||||||
|
doCalculatePercentile(percentile, test, Double.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doCalculatePercentile(Double percentile, Number[] test,
|
||||||
|
double delta) {
|
||||||
|
PSquarePercentile psquared = new PSquarePercentile(percentile);
|
||||||
|
for (Number value : test) {
|
||||||
|
psquared.increment(value.doubleValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
Percentile p2 = new Percentile(percentile * 100);
|
||||||
|
|
||||||
|
double[] dall = new double[test.length];
|
||||||
|
for (int i = 0; i < test.length; i++) {
|
||||||
|
dall[i] = test[i].doubleValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
Double referenceValue = p2.evaluate(dall);
|
||||||
|
assertValues(psquared.getResult(), referenceValue, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doCalculatePercentile(double percentile, double[] test,
|
||||||
|
double delta) {
|
||||||
|
PSquarePercentile psquared = new PSquarePercentile(percentile);
|
||||||
|
for (double value : test) {
|
||||||
|
psquared.increment(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Percentile p2 =
|
||||||
|
new Percentile(percentile < 1 ? percentile * 100 : percentile);
|
||||||
|
/*
|
||||||
|
* double[] dall = new double[test.length]; for (int i = 0; i <
|
||||||
|
* test.length; i++) dall[i] = test[i];
|
||||||
|
*/
|
||||||
|
Double referenceValue = p2.evaluate(test);
|
||||||
|
assertValues(psquared.getResult(), referenceValue, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCannedDataSet() {
|
||||||
|
// test.unoverride("dump");
|
||||||
|
Integer[] seedInput =
|
||||||
|
new Integer[] { 283, 285, 298, 304, 310, 31, 319, 32, 33, 339,
|
||||||
|
342, 348, 350, 354, 354, 357, 36, 36, 369, 37, 37, 375,
|
||||||
|
378, 383, 390, 396, 405, 408, 41, 414, 419, 416, 42,
|
||||||
|
420, 430, 430, 432, 444, 447, 447, 449, 45, 451, 456,
|
||||||
|
468, 470, 471, 474, 600, 695, 70, 83, 97, 109, 113, 128 };
|
||||||
|
Integer[] input = new Integer[seedInput.length * 100];
|
||||||
|
for (int i = 0; i < input.length; i++) {
|
||||||
|
input[i] = seedInput[i % seedInput.length] + i;
|
||||||
|
}
|
||||||
|
// Arrays.sort(input);
|
||||||
|
doCalculatePercentile(0.50d, input);
|
||||||
|
doCalculatePercentile(0.95d, input);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test99Percentile() {
|
||||||
|
Double[] test = randomTestData(100, 10000);
|
||||||
|
doCalculatePercentile(0.99d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test90Percentile() {
|
||||||
|
Double[] test = randomTestData(100, 10000);
|
||||||
|
doCalculatePercentile(0.90d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test20Percentile() {
|
||||||
|
Double[] test = randomTestData(100, 100000);
|
||||||
|
doCalculatePercentile(0.20d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test5Percentile() {
|
||||||
|
Double[] test = randomTestData(50, 990000);
|
||||||
|
doCalculatePercentile(0.50d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test99PercentileHighValues() {
|
||||||
|
Double[] test = randomTestData(100000, 10000);
|
||||||
|
doCalculatePercentile(0.99d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test90PercentileHighValues() {
|
||||||
|
Double[] test = randomTestData(100000, 100000);
|
||||||
|
doCalculatePercentile(0.90d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test20PercentileHighValues() {
|
||||||
|
Double[] test = randomTestData(100000, 100000);
|
||||||
|
doCalculatePercentile(0.20d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test5PercentileHighValues() {
|
||||||
|
Double[] test = randomTestData(100000, 100000);
|
||||||
|
doCalculatePercentile(0.05d, test);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test0PercentileValuesWithFewerThan5Values() {
|
||||||
|
double[] test = { 1d, 2d, 3d, 4d };
|
||||||
|
PSquarePercentile p = new PSquarePercentile(0d);
|
||||||
|
Assert.assertEquals(1d, p.evaluate(test), 0);
|
||||||
|
Assert.assertNotNull(p.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPSQuaredEvalFuncWithPapersExampleData() throws IOException {
|
||||||
|
|
||||||
|
// This data as input is considered from
|
||||||
|
// http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf
|
||||||
|
double[] data =
|
||||||
|
{ 0.02, 0.5, 0.74, 3.39, 0.83, 22.37, 10.15, 15.43, 38.62,
|
||||||
|
15.92, 34.6, 10.28, 1.47, 0.4, 0.05, 11.39, 0.27, 0.42,
|
||||||
|
0.09, 11.37,
|
||||||
|
|
||||||
|
11.39, 15.43, 15.92, 22.37, 34.6, 38.62, 18.9, 19.2,
|
||||||
|
27.6, 12.8, 13.7, 21.9
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
PSquarePercentile psquared = new PSquarePercentile(50);
|
||||||
|
|
||||||
|
Double p2value = 0d;
|
||||||
|
for (int i = 0; i < 20; i++) {
|
||||||
|
psquared.increment(data[i]);
|
||||||
|
p2value = psquared.getResult();
|
||||||
|
// System.out.println(psquared.toString());//uncomment here to see
|
||||||
|
// the papers example output
|
||||||
|
}
|
||||||
|
// System.out.println("p2value=" + p2value);
|
||||||
|
Double expected = 4.44d;// 13d; // From The Paper
|
||||||
|
// http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf.
|
||||||
|
// Pl refer Pg 1061 Look at the mid marker
|
||||||
|
// height
|
||||||
|
// expected = new Percentile(50).evaluate(data,0,20);
|
||||||
|
// Well the values deviate in our calculation by 0.25 so its 4.25 vs
|
||||||
|
// 4.44
|
||||||
|
Assert.assertEquals(
|
||||||
|
String.format("Expected=%f, Actual=%f", expected, p2value),
|
||||||
|
expected, p2value, 0.25);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
final int TINY = 10, SMALL = 50, NOMINAL = 100, MEDIUM = 500,
|
||||||
|
STANDARD = 1000, BIG = 10000, VERY_BIG = 50000, LARGE = 1000000,
|
||||||
|
VERY_LARGE = 10000000;
|
||||||
|
|
||||||
|
private void doDistributionTest(RealDistribution distribution) {
|
||||||
|
double data[];
|
||||||
|
|
||||||
|
data = distribution.sample(VERY_LARGE);
|
||||||
|
doCalculatePercentile(50, data, 0.0001);
|
||||||
|
doCalculatePercentile(95, data, 0.0001);
|
||||||
|
|
||||||
|
data = distribution.sample(LARGE);
|
||||||
|
doCalculatePercentile(50, data, 0.001);
|
||||||
|
doCalculatePercentile(95, data, 0.001);
|
||||||
|
|
||||||
|
data = distribution.sample(VERY_BIG);
|
||||||
|
doCalculatePercentile(50, data, 0.001);
|
||||||
|
doCalculatePercentile(95, data, 0.001);
|
||||||
|
|
||||||
|
data = distribution.sample(BIG);
|
||||||
|
doCalculatePercentile(50, data, 0.001);
|
||||||
|
doCalculatePercentile(95, data, 0.001);
|
||||||
|
|
||||||
|
data = distribution.sample(STANDARD);
|
||||||
|
doCalculatePercentile(50, data, 0.005);
|
||||||
|
doCalculatePercentile(95, data, 0.005);
|
||||||
|
|
||||||
|
data = distribution.sample(MEDIUM);
|
||||||
|
doCalculatePercentile(50, data, 0.005);
|
||||||
|
doCalculatePercentile(95, data, 0.005);
|
||||||
|
|
||||||
|
data = distribution.sample(NOMINAL);
|
||||||
|
doCalculatePercentile(50, data, 0.01);
|
||||||
|
doCalculatePercentile(95, data, 0.01);
|
||||||
|
|
||||||
|
data = distribution.sample(SMALL);
|
||||||
|
doCalculatePercentile(50, data, 0.01);
|
||||||
|
doCalculatePercentile(95, data, 0.01);
|
||||||
|
|
||||||
|
data = distribution.sample(TINY);
|
||||||
|
doCalculatePercentile(50, data, 0.05);
|
||||||
|
doCalculatePercentile(95, data, 0.05);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test Various Dist
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDistribution() {
|
||||||
|
doDistributionTest(new NormalDistribution(4000, 50));
|
||||||
|
doDistributionTest(new LogNormalDistribution(4000, 50));
|
||||||
|
// doDistributionTest((new ExponentialDistribution(4000));
|
||||||
|
// doDistributionTest(new GammaDistribution(5d,1d),0.1);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue