Added rank transformations.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@776311 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e52b2a3ecf
commit
bcccc34cba
|
@ -170,4 +170,10 @@
|
|||
<Bug pattern="NP_NULL_PARAM_DEREF_ALL_TARGETS_DANGEROUS" />
|
||||
</Match>
|
||||
|
||||
<!-- IntDoublePair intentionally implements Comparable inconsistently with equals -->
|
||||
<Match>
|
||||
<Class name="org.apache.commons.math.stat.ranking.NaturalRanking$IntDoublePair" />
|
||||
<Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS" />
|
||||
</Match>
|
||||
|
||||
</FindBugsFilter>
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math.stat.ranking;
|
||||
|
||||
/**
|
||||
* Strategies for handling NaN values in rank transformations.
|
||||
* <ul>
|
||||
* <li>MINIMAL - NaNs are treated as minimal in the ordering, equivalent to
|
||||
* (that is, tied with) <code>Double.NEGATIVE_INFINITY</code>.</li>
|
||||
* <li>MAXIMAL - NaNs are treated as maximal in the ordering, equivalent to
|
||||
* <code>Double.POSITIVE_INFINITY</code></li>
|
||||
* <li>REMOVED - NaNs are removed before the rank transform is applied</li>
|
||||
* <li>FIXED - NaNs are left "in place," that is the rank transformation is
|
||||
* applied to the other elements in the input array, but the NaN elements
|
||||
* are returned unchanged.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since 2.0
|
||||
* @version $Revision:$ $Date:$
|
||||
*/
|
||||
public enum NaNStrategy {
|
||||
|
||||
/** NaNs are considered minimal in the ordering */
|
||||
MINIMAL,
|
||||
|
||||
/** NaNs are considered maximal in the ordering */
|
||||
MAXIMAL,
|
||||
|
||||
/** NaNs are removed before computing ranks */
|
||||
REMOVED,
|
||||
|
||||
/** NaNs are left in place */
|
||||
FIXED
|
||||
}
|
|
@ -0,0 +1,458 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math.stat.ranking;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.math.random.RandomData;
|
||||
import org.apache.commons.math.random.RandomDataImpl;
|
||||
import org.apache.commons.math.random.RandomGenerator;
|
||||
|
||||
|
||||
/**
|
||||
* <p> Ranking based on the natural ordering on doubles.</p>
|
||||
* <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
|
||||
* are handled using the selected {@link TiesStrategy}.
|
||||
* Configuration settings are supplied in optional constructor arguments.
|
||||
* Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
|
||||
* respectively. When using {@link TiesStrategy#RANDOM}, a
|
||||
* {@link RandomGenerator} may be supplied as a constructor argument.</p>
|
||||
* <p>Examples:
|
||||
* <table border="1" cellpadding="3">
|
||||
* <tr><th colspan="3">
|
||||
* Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
|
||||
* </th></tr>
|
||||
* <tr><th>NaNStrategy</th><th>TiesStrategy</th>
|
||||
* <th><code>rank(data)</code></th>
|
||||
* <tr>
|
||||
* <td>default (NaNs maximal)</td>
|
||||
* <td>default (ties averaged)</td>
|
||||
* <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
|
||||
* <tr>
|
||||
* <td>default (NaNs maximal)</td>
|
||||
* <td>MINIMUM</td>
|
||||
* <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
|
||||
* <tr>
|
||||
* <td>MINIMAL</td>
|
||||
* <td>default (ties averaged)</td>
|
||||
* <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
|
||||
* <tr>
|
||||
* <td>REMOVED</td>
|
||||
* <td>SEQUENTIAL</td>
|
||||
* <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
|
||||
* <tr>
|
||||
* <td>MINIMAL</td>
|
||||
* <td>MAXIMUM</td>
|
||||
* <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
|
||||
*
|
||||
* @since 2.0
|
||||
* @version $Revision:$ $Date:$
|
||||
*/
|
||||
public class NaturalRanking implements RankingAlgorithm {
|
||||
|
||||
/** NaN strategy - defaults to NaNs maximal */
|
||||
private final NaNStrategy nanStrategy;
|
||||
|
||||
/** Ties strategy - defaults to ties averaged */
|
||||
private final TiesStrategy tiesStrategy;
|
||||
|
||||
/** Source of random data - used only when ties strategy is RANDOM */
|
||||
private final RandomData randomData;
|
||||
|
||||
/** default NaN strategy */
|
||||
public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
|
||||
|
||||
/** default ties strategy */
|
||||
public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
|
||||
|
||||
/**
|
||||
* Create a NaturalRanking with default strategies for handling ties and NaNs.
|
||||
*/
|
||||
public NaturalRanking() {
|
||||
super();
|
||||
tiesStrategy = DEFAULT_TIES_STRATEGY;
|
||||
nanStrategy = DEFAULT_NAN_STRATEGY;
|
||||
randomData = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a NaturalRanking with the given TiesStrategy.
|
||||
*
|
||||
* @param tiesStrategy the TiesStrategy to use
|
||||
*/
|
||||
public NaturalRanking(TiesStrategy tiesStrategy) {
|
||||
super();
|
||||
this.tiesStrategy = tiesStrategy;
|
||||
nanStrategy = DEFAULT_NAN_STRATEGY;
|
||||
randomData = new RandomDataImpl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a NaturalRanking with the given NaNStrategy.
|
||||
*
|
||||
* @param nanStrategy the NaNStrategy to use
|
||||
*/
|
||||
public NaturalRanking(NaNStrategy nanStrategy) {
|
||||
super();
|
||||
this.nanStrategy = nanStrategy;
|
||||
tiesStrategy = DEFAULT_TIES_STRATEGY;
|
||||
randomData = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
|
||||
*
|
||||
* @param nanStrategy NaNStrategy to use
|
||||
* @param tiesStrategy TiesStrategy to use
|
||||
*/
|
||||
public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
|
||||
super();
|
||||
this.nanStrategy = nanStrategy;
|
||||
this.tiesStrategy = tiesStrategy;
|
||||
randomData = new RandomDataImpl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a NaturalRanking with TiesStrategy.RANDOM and the given
|
||||
* RandomGenerator as the source of random data.
|
||||
*
|
||||
* @param randomGenerator source of random data
|
||||
*/
|
||||
public NaturalRanking(RandomGenerator randomGenerator) {
|
||||
super();
|
||||
this.tiesStrategy = TiesStrategy.RANDOM;
|
||||
nanStrategy = DEFAULT_NAN_STRATEGY;
|
||||
randomData = new RandomDataImpl(randomGenerator);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
|
||||
* and the given source of random data.
|
||||
*
|
||||
* @param nanStrategy NaNStrategy to use
|
||||
* @param randomGenerator source of random data
|
||||
*/
|
||||
public NaturalRanking(NaNStrategy nanStrategy,
|
||||
RandomGenerator randomGenerator) {
|
||||
super();
|
||||
this.nanStrategy = nanStrategy;
|
||||
this.tiesStrategy = TiesStrategy.RANDOM;
|
||||
randomData = new RandomDataImpl(randomGenerator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the NaNStrategy
|
||||
*
|
||||
* @return returns the NaNStrategy
|
||||
*/
|
||||
public NaNStrategy getNanStrategy() {
|
||||
return nanStrategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the TiesStrategy
|
||||
*
|
||||
* @return the TiesStrategy
|
||||
*/
|
||||
public TiesStrategy getTiesStrategy() {
|
||||
return tiesStrategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rank <code>data</code> using the natural ordering on Doubles, with
|
||||
* NaN values handled according to <code>nanStrategy</code> and ties
|
||||
* resolved using <code>tiesStrategy.</code>
|
||||
*
|
||||
* @param data array to be ranked
|
||||
* @return array of ranks
|
||||
*/
|
||||
public double[] rank(double[] data) {
|
||||
|
||||
// Array recording initial positions of data to be ranked
|
||||
IntDoublePair[] ranks = new IntDoublePair[data.length];
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
ranks[i] = new IntDoublePair(data[i], i);
|
||||
}
|
||||
|
||||
// Recode, remove or record positions of NaNs
|
||||
List<Integer> nanPositions = null;
|
||||
switch (nanStrategy) {
|
||||
case MAXIMAL: // Replace NaNs with +INFs
|
||||
recodeNaNs(ranks, Double.POSITIVE_INFINITY);
|
||||
break;
|
||||
case MINIMAL: // Replace NaNs with -INFs
|
||||
recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
|
||||
break;
|
||||
case REMOVED: // Drop NaNs from data
|
||||
ranks = removeNaNs(ranks);
|
||||
break;
|
||||
case FIXED: // Record positions of NaNs
|
||||
nanPositions = getNanPositions(ranks);
|
||||
break;
|
||||
}
|
||||
|
||||
// Sort the IntDoublePairs
|
||||
Arrays.sort(ranks);
|
||||
|
||||
// Walk the sorted array, filling output array using sorted positions,
|
||||
// resolving ties as we go
|
||||
double[] out = new double[ranks.length];
|
||||
int pos = 1; // position in sorted array
|
||||
out[ranks[0].getPosition()] = pos;
|
||||
List<Integer> tiesTrace = new ArrayList<Integer>();
|
||||
tiesTrace.add(ranks[0].getPosition());
|
||||
for (int i = 1; i < ranks.length; i++) {
|
||||
if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
|
||||
// tie sequence has ended (or had length 1)
|
||||
pos = i + 1;
|
||||
if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
|
||||
resolveTie(out, tiesTrace);
|
||||
}
|
||||
tiesTrace = new ArrayList<Integer>();
|
||||
tiesTrace.add(ranks[i].getPosition());
|
||||
} else {
|
||||
// tie sequence continues
|
||||
tiesTrace.add(ranks[i].getPosition());
|
||||
}
|
||||
out[ranks[i].getPosition()] = pos;
|
||||
}
|
||||
if (tiesTrace.size() > 1) { // handle tie sequence at end
|
||||
resolveTie(out, tiesTrace);
|
||||
}
|
||||
if (nanStrategy == NaNStrategy.FIXED) {
|
||||
restoreNaNs(out, nanPositions);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array that is a copy of the input array with IntDoublePairs
|
||||
* having NaN values removed.
|
||||
*
|
||||
* @param ranks input array
|
||||
* @return array with NaN-valued entries removed
|
||||
*/
|
||||
private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
|
||||
if (!containsNaNs(ranks)) {
|
||||
return ranks;
|
||||
}
|
||||
IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
|
||||
int j = 0;
|
||||
for (int i = 0; i < ranks.length; i++) {
|
||||
if (Double.isNaN(ranks[i].getValue())) {
|
||||
// drop, but adjust original ranks of later elements
|
||||
for (int k = i + 1; k < ranks.length; k++) {
|
||||
ranks[k] = new IntDoublePair(
|
||||
ranks[k].getValue(), ranks[k].getPosition() - 1);
|
||||
}
|
||||
} else {
|
||||
outRanks[j] = new IntDoublePair(
|
||||
ranks[i].getValue(), ranks[i].getPosition());
|
||||
j++;
|
||||
}
|
||||
}
|
||||
IntDoublePair[] returnRanks = new IntDoublePair[j];
|
||||
System.arraycopy(outRanks, 0, returnRanks, 0, j);
|
||||
return returnRanks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recodes NaN values to the given value.
|
||||
*
|
||||
* @param ranks array to recode
|
||||
* @param value the value to replace NaNs with
|
||||
*/
|
||||
private void recodeNaNs(IntDoublePair[] ranks, double value) {
|
||||
for (int i = 0; i < ranks.length; i++) {
|
||||
if (Double.isNaN(ranks[i].getValue())) {
|
||||
ranks[i] = new IntDoublePair(
|
||||
value, ranks[i].getPosition());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for presence of NaNs in <code>ranks.</code>
|
||||
*
|
||||
* @param ranks array to be searched for NaNs
|
||||
* @return true iff ranks contains one or more NaNs
|
||||
*/
|
||||
private boolean containsNaNs(IntDoublePair[] ranks) {
|
||||
for (int i = 0; i < ranks.length; i++) {
|
||||
if (Double.isNaN(ranks[i].getValue())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a sequence of ties, using the cconfigured {@link TiesStrategy}.
|
||||
* The input <code>ranks</code> array is expected to take the same value
|
||||
* for all indices in <code>tiesTrace</code>. The common value is recoded
|
||||
* according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
|
||||
* tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
|
||||
* The same array and trace with tiesStrategy AVERAGE will come out
|
||||
* <5,8,3,6,3,7,1,3>.
|
||||
*
|
||||
* @param ranks array of ranks
|
||||
* @param tiesTrace list of indices where <code>ranks</code> is constant
|
||||
* -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
|
||||
* </code>
|
||||
*/
|
||||
private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
|
||||
|
||||
// constant value of ranks over tiesTrace
|
||||
final double c = ranks[tiesTrace.get(0)];
|
||||
|
||||
// length of sequence of tied ranks
|
||||
final int length = tiesTrace.size();
|
||||
|
||||
switch (tiesStrategy) {
|
||||
case AVERAGE: // Replace ranks with average
|
||||
fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
|
||||
break;
|
||||
case MAXIMUM: // Replace ranks with maximum values
|
||||
fill(ranks, tiesTrace, c + length - 1);
|
||||
break;
|
||||
case MINIMUM: // Replace ties with minimum
|
||||
fill(ranks, tiesTrace, c);
|
||||
break;
|
||||
case RANDOM: // Fill with random integral values in [c, c + length - 1]
|
||||
Iterator<Integer> iterator = tiesTrace.iterator();
|
||||
long f = Math.round(c);
|
||||
while (iterator.hasNext()) {
|
||||
ranks[iterator.next()] =
|
||||
randomData.nextLong(f, f + length - 1);
|
||||
}
|
||||
break;
|
||||
case SEQUENTIAL: // Fill sequentially from c to c + length - 1
|
||||
// walk and fill
|
||||
iterator = tiesTrace.iterator();
|
||||
f = Math.round(c);
|
||||
int i = 0;
|
||||
while (iterator.hasNext()) {
|
||||
ranks[iterator.next()] = f + i++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
|
||||
*
|
||||
* @param data array to modify
|
||||
* @param tiesTrace list of index values to set
|
||||
* @param value value to set
|
||||
*/
|
||||
private void fill(double[] data, List<Integer> tiesTrace, double value) {
|
||||
Iterator<Integer> iterator = tiesTrace.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
data[iterator.next()] = value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
|
||||
*
|
||||
* @param ranks array to modify
|
||||
* @param nanPositions list of index values to set to <code>Double.NaN</code>
|
||||
*/
|
||||
private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
|
||||
if (nanPositions.size() == 0) {
|
||||
return;
|
||||
}
|
||||
Iterator<Integer> iterator = nanPositions.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
ranks[iterator.next().intValue()] = Double.NaN;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
|
||||
*
|
||||
* @param ranks array to search for <code>NaNs</code>
|
||||
* @return list of indexes i such that <code>ranks[i] = NaN</code>
|
||||
*/
|
||||
private List<Integer> getNanPositions(IntDoublePair[] ranks) {
|
||||
ArrayList<Integer> out = new ArrayList<Integer>();
|
||||
for (int i = 0; i < ranks.length; i++) {
|
||||
if (Double.isNaN(ranks[i].getValue())) {
|
||||
out.add(Integer.valueOf(i));
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the position of a double value in an ordering.
|
||||
* Comparable interface is implemented so Arrays.sort can be used
|
||||
* to sort an array of IntDoublePairs by value. Note that the
|
||||
* implicitly defined natural ordering is NOT consistent with equals.
|
||||
*/
|
||||
private static class IntDoublePair implements Comparable<IntDoublePair> {
|
||||
|
||||
/** Value of the pair */
|
||||
final private double value;
|
||||
|
||||
/** Original position of the pair */
|
||||
final private int position;
|
||||
|
||||
/**
|
||||
* Construct an IntDoublePair with the given value and position.
|
||||
* @param value the value of the pair
|
||||
* @param position the original position
|
||||
*/
|
||||
public IntDoublePair(double value, int position) {
|
||||
this.value = value;
|
||||
this.position = position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare this IntDoublePair to another pair.
|
||||
* Only the <strong>values</strong> are compared.
|
||||
*
|
||||
* @param other the other pair to compare this to
|
||||
* @return result of <code>Double.compare(value, other.value)</code>
|
||||
*/
|
||||
public int compareTo(IntDoublePair other) {
|
||||
return Double.compare(value, other.value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value of the pair.
|
||||
* @return value
|
||||
*/
|
||||
public double getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the original position of the pair.
|
||||
* @return position
|
||||
*/
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math.stat.ranking;
|
||||
|
||||
/**
|
||||
* Interface representing a rank transformation.
|
||||
*
|
||||
* @since 2.0
|
||||
* @version $Revision:$ $Date:$
|
||||
*/
|
||||
public interface RankingAlgorithm {
|
||||
/**
|
||||
* <p>Performs a rank transformation on the input data, returning an array
|
||||
* of ranks.</p>
|
||||
*
|
||||
* <p>Ranks should be 1-based - that is, the smallest value
|
||||
* returned in an array of ranks should be greater than or equal to one,
|
||||
* rather than 0. Ranks should in general take integer values, though
|
||||
* implementations may return averages or other floating point values
|
||||
* to resolve ties in the input data.</p>
|
||||
*
|
||||
* @param data array of data to be ranked
|
||||
* @return an array of ranks corresponding to the elements of the input array
|
||||
*/
|
||||
double[] rank (double[] data);
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math.stat.ranking;
|
||||
|
||||
/**
|
||||
* Strategies for handling tied values in rank transformations.
|
||||
* <ul>
|
||||
* <li>SEQUENTIAL - Ties are assigned ranks in order of occurrence in the original array,
|
||||
* for example (1,3,4,3) is ranked as (1,2,4,3)</li>
|
||||
* <li>MINIMUM - Tied values are assigned the minimum applicable rank, or the rank
|
||||
* of the first occurrence. For example, (1,3,4,3) is ranked as (1,2,4,2)</li>
|
||||
* <li>MAXIMUM - Tied values are assigned the maximum applicable rank, or the rank
|
||||
* of the last occurrence. For example, (1,3,4,3) is ranked as (1,3,4,3)</li>
|
||||
* <li>AVERAGE - Tied values are assigned the average of the applicable ranks.
|
||||
* For example, (1,3,4,3) is ranked as (1,2.5,4,2.5)</li>
|
||||
* <li>AVERAGE - Tied values are assigned a random integer rank from among the
|
||||
* applicable values. The assigned rank will always be an integer, (inclusively)
|
||||
* between the values retured by the MINIMUM and MAXIMUM strategies.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since 2.0
|
||||
* @version $Revision:$ $Date:$
|
||||
*/
|
||||
public enum TiesStrategy {
|
||||
|
||||
/** Ties assigned sequential ranks in order of occurrence */
|
||||
SEQUENTIAL,
|
||||
|
||||
/** Ties get the minimum applicable rank */
|
||||
MINIMUM,
|
||||
|
||||
/** Ties get the maximum applicable rank */
|
||||
MAXIMUM,
|
||||
|
||||
/** Ties get the average of applicable ranks */
|
||||
AVERAGE,
|
||||
|
||||
/** Ties get a random integral value from among applicable ranks */
|
||||
RANDOM
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<html>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!-- $Revision:$ $Date:$ -->
|
||||
<body>
|
||||
Classes providing rank transformations.
|
||||
</body>
|
||||
</html>
|
|
@ -39,7 +39,10 @@ The <action> type attribute can be add,update,fix,remove.
|
|||
</properties>
|
||||
<body>
|
||||
<release version="2.0" date="TBD" description="TBD">
|
||||
<action dev="luc" type="update" >
|
||||
<action dev="psteitz" type="add">
|
||||
Added support for rank transformations.
|
||||
</action>
|
||||
<action dev="luc" type="update">
|
||||
Completed internationalization of all error messages
|
||||
</action>
|
||||
<action dev="luc" type="add" issue="MATH-266" due-to="Benjamin McCann">
|
||||
|
|
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
* TiesEquivalentRankTest.java created on Aug 24, 2005 by jdgant01
|
||||
*
|
||||
*/
|
||||
package org.apache.commons.math.stat.ranking;
|
||||
|
||||
import org.apache.commons.math.TestUtils;
|
||||
import org.apache.commons.math.random.JDKRandomGenerator;
|
||||
import org.apache.commons.math.random.RandomGenerator;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Test cases for NaturalRanking class
|
||||
*
|
||||
* @since 2.0
|
||||
* @version $Revision:$ $Date:$
|
||||
*/
|
||||
public class NaturalRankingTest extends TestCase {
|
||||
|
||||
private final double[] exampleData =
|
||||
{20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17};
|
||||
private final double[] tiesFirst = {0, 0, 2, 1, 4};
|
||||
private final double[] tiesLast = {4, 4, 1, 0};
|
||||
private final double[] multipleNaNs = {0, 1, Double.NaN, Double.NaN};
|
||||
private final double[] multipleTies = {3, 2, 5, 5, 6, 6, 1};
|
||||
private final double[] allSame = {0, 0, 0, 0};
|
||||
|
||||
public NaturalRankingTest(String arg0) {
|
||||
super(arg0);
|
||||
}
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
protected void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testDefault() { // Ties averaged, NaNs maximal
|
||||
NaturalRanking ranking = new NaturalRanking();
|
||||
double[] ranks = ranking.rank(exampleData);
|
||||
double[] correctRanks = {5, 3, 6, 7, 3, 8, 9, 1, 3};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesFirst);
|
||||
correctRanks = new double[] {1.5, 1.5, 4, 3, 5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesLast);
|
||||
correctRanks = new double[] {3.5, 3.5, 2, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleNaNs);
|
||||
correctRanks = new double[] {1, 2, 3.5, 3.5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleTies);
|
||||
correctRanks = new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(allSame);
|
||||
correctRanks = new double[] {2.5, 2.5, 2.5, 2.5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
}
|
||||
|
||||
public void testNaNsMaximalTiesMinimum() {
|
||||
NaturalRanking ranking = new NaturalRanking(TiesStrategy.MINIMUM);
|
||||
double[] ranks = ranking.rank(exampleData);
|
||||
double[] correctRanks = {5, 2, 6, 7, 2, 8, 9, 1, 2};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesFirst);
|
||||
correctRanks = new double[] {1, 1, 4, 3, 5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesLast);
|
||||
correctRanks = new double[] {3, 3, 2, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleNaNs);
|
||||
correctRanks = new double[] {1, 2, 3, 3};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleTies);
|
||||
correctRanks = new double[] {3, 2, 4, 4, 6, 6, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(allSame);
|
||||
correctRanks = new double[] {1, 1, 1, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
}
|
||||
|
||||
public void testNaNsRemovedTiesSequential() {
|
||||
NaturalRanking ranking = new NaturalRanking(NaNStrategy.REMOVED,
|
||||
TiesStrategy.SEQUENTIAL);
|
||||
double[] ranks = ranking.rank(exampleData);
|
||||
double[] correctRanks = {5, 2, 6, 7, 3, 8, 1, 4};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesFirst);
|
||||
correctRanks = new double[] {1, 2, 4, 3, 5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesLast);
|
||||
correctRanks = new double[] {3, 4, 2, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleNaNs);
|
||||
correctRanks = new double[] {1, 2};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleTies);
|
||||
correctRanks = new double[] {3, 2, 4, 5, 6, 7, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(allSame);
|
||||
correctRanks = new double[] {1, 2, 3, 4};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
}
|
||||
|
||||
public void testNaNsMinimalTiesMaximum() {
|
||||
NaturalRanking ranking = new NaturalRanking(NaNStrategy.MINIMAL,
|
||||
TiesStrategy.MAXIMUM);
|
||||
double[] ranks = ranking.rank(exampleData);
|
||||
double[] correctRanks = {6, 5, 7, 8, 5, 9, 2, 2, 5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesFirst);
|
||||
correctRanks = new double[] {2, 2, 4, 3, 5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesLast);
|
||||
correctRanks = new double[] {4, 4, 2, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleNaNs);
|
||||
correctRanks = new double[] {3, 4, 2, 2};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleTies);
|
||||
correctRanks = new double[] {3, 2, 5, 5, 7, 7, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(allSame);
|
||||
correctRanks = new double[] {4, 4, 4, 4};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
}
|
||||
|
||||
public void testNaNsMinimalTiesAverage() {
|
||||
NaturalRanking ranking = new NaturalRanking(NaNStrategy.MINIMAL);
|
||||
double[] ranks = ranking.rank(exampleData);
|
||||
double[] correctRanks = {6, 4, 7, 8, 4, 9, 1.5, 1.5, 4};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesFirst);
|
||||
correctRanks = new double[] {1.5, 1.5, 4, 3, 5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesLast);
|
||||
correctRanks = new double[] {3.5, 3.5, 2, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleNaNs);
|
||||
correctRanks = new double[] {3, 4, 1.5, 1.5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleTies);
|
||||
correctRanks = new double[] {3, 2, 4.5, 4.5, 6.5, 6.5, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(allSame);
|
||||
correctRanks = new double[] {2.5, 2.5, 2.5, 2.5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
}
|
||||
|
||||
public void testNaNsFixedTiesRandom() {
|
||||
RandomGenerator randomGenerator = new JDKRandomGenerator();
|
||||
randomGenerator.setSeed(1000);
|
||||
NaturalRanking ranking = new NaturalRanking(NaNStrategy.FIXED,
|
||||
randomGenerator);
|
||||
double[] ranks = ranking.rank(exampleData);
|
||||
double[] correctRanks = {5, 4, 6, 7, 3, 8, Double.NaN, 1, 4};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesFirst);
|
||||
correctRanks = new double[] {1, 1, 4, 3, 5};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(tiesLast);
|
||||
correctRanks = new double[] {3, 4, 2, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleNaNs);
|
||||
correctRanks = new double[] {1, 2, Double.NaN, Double.NaN};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(multipleTies);
|
||||
correctRanks = new double[] {3, 2, 5, 5, 7, 6, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranks = ranking.rank(allSame);
|
||||
correctRanks = new double[] {1, 3, 4, 4};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
}
|
||||
|
||||
public void testNaNsAndInfs() {
|
||||
double[] data = {0, Double.POSITIVE_INFINITY, Double.NaN, Double.NEGATIVE_INFINITY};
|
||||
NaturalRanking ranking = new NaturalRanking(NaNStrategy.MAXIMAL);
|
||||
double[] ranks = ranking.rank(data);
|
||||
double[] correctRanks = new double[] {2, 3.5, 3.5, 1};
|
||||
TestUtils.assertEquals(correctRanks, ranks, 0d);
|
||||
ranking = new NaturalRanking(NaNStrategy.MINIMAL);
|
||||
ranks = ranking.rank(data);
|
||||
correctRanks = new double[] {3, 4, 1.5, 1.5};
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue