Clarified contracts re NaNs, IAEs and when constructor arguments are necessary.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1540395 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eca87abcee
commit
b72f31c6bd
|
@ -34,9 +34,17 @@ import org.apache.commons.math3.util.FastMath;
|
||||||
* <code>double[][]</code> arguments generate correlation matrices. The
|
* <code>double[][]</code> arguments generate correlation matrices. The
|
||||||
* columns of the input matrices are assumed to represent variable values.
|
* columns of the input matrices are assumed to represent variable values.
|
||||||
* Correlations are given by the formula</p>
|
* Correlations are given by the formula</p>
|
||||||
* <code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
|
*
|
||||||
|
* <p><code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
|
||||||
* where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
|
* where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
|
||||||
* is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
|
* is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.</p>
|
||||||
|
*
|
||||||
|
* <p>To compute the correlation coefficient for a single pair of arrays, use {@link #PearsonsCorrelation()}
|
||||||
|
* to construct an instance with no data and then {@link #correlation(double[], double[])}.
|
||||||
|
* Correlation matrices can also be computed directly from an instance with no data using
|
||||||
|
* {@link #computeCorrelationMatrix(double[][])}. In order to use {@link #getCorrelationMatrix()},
|
||||||
|
* {@link #getCorrelationPValues()}, or {@link #getCorrelationStandardErrors()}; however, one of the
|
||||||
|
* constructors supplying data or a covariance matrix must be used to create the instance.</p>
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
* @since 2.0
|
* @since 2.0
|
||||||
|
@ -50,7 +58,7 @@ public class PearsonsCorrelation {
|
||||||
private final int nObs;
|
private final int nObs;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a PearsonsCorrelation instance without data
|
* Create a PearsonsCorrelation instance without data.
|
||||||
*/
|
*/
|
||||||
public PearsonsCorrelation() {
|
public PearsonsCorrelation() {
|
||||||
super();
|
super();
|
||||||
|
@ -62,9 +70,14 @@ public class PearsonsCorrelation {
|
||||||
* Create a PearsonsCorrelation from a rectangular array
|
* Create a PearsonsCorrelation from a rectangular array
|
||||||
* whose columns represent values of variables to be correlated.
|
* whose columns represent values of variables to be correlated.
|
||||||
*
|
*
|
||||||
|
* Throws MathIllegalArgumentException if the input array does not have at least
|
||||||
|
* two columns and two rows. Pairwise correlations are set to NaN if one
|
||||||
|
* of the correlates has zero variance.
|
||||||
|
*
|
||||||
* @param data rectangular array with columns representing variables
|
* @param data rectangular array with columns representing variables
|
||||||
* @throws IllegalArgumentException if the input data array is not
|
* @throws MathIllegalArgumentException if the input data array is not
|
||||||
* rectangular with at least two rows and two columns.
|
* rectangular with at least two rows and two columns.
|
||||||
|
* @see #correlation(double[], double[])
|
||||||
*/
|
*/
|
||||||
public PearsonsCorrelation(double[][] data) {
|
public PearsonsCorrelation(double[][] data) {
|
||||||
this(new BlockRealMatrix(data));
|
this(new BlockRealMatrix(data));
|
||||||
|
@ -74,10 +87,15 @@ public class PearsonsCorrelation {
|
||||||
* Create a PearsonsCorrelation from a RealMatrix whose columns
|
* Create a PearsonsCorrelation from a RealMatrix whose columns
|
||||||
* represent variables to be correlated.
|
* represent variables to be correlated.
|
||||||
*
|
*
|
||||||
|
* Throws MathIllegalArgumentException if the matrix does not have at least
|
||||||
|
* two columns and two rows. Pairwise correlations are set to NaN if one
|
||||||
|
* of the correlates has zero variance.
|
||||||
|
*
|
||||||
* @param matrix matrix with columns representing variables to correlate
|
* @param matrix matrix with columns representing variables to correlate
|
||||||
|
* @throws MathIllegalArgumentException if the matrix does not contain sufficient data
|
||||||
|
* @see #correlation(double[], double[])
|
||||||
*/
|
*/
|
||||||
public PearsonsCorrelation(RealMatrix matrix) {
|
public PearsonsCorrelation(RealMatrix matrix) {
|
||||||
checkSufficientData(matrix);
|
|
||||||
nObs = matrix.getRowDimension();
|
nObs = matrix.getRowDimension();
|
||||||
correlationMatrix = computeCorrelationMatrix(matrix);
|
correlationMatrix = computeCorrelationMatrix(matrix);
|
||||||
}
|
}
|
||||||
|
@ -100,7 +118,7 @@ public class PearsonsCorrelation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a PearsonsCorrelation from a covariance matrix. The correlation
|
* Create a PearsonsCorrelation from a covariance matrix. The correlation
|
||||||
* matrix is computed by scaling the covariance matrix.
|
* matrix is computed by scaling the covariance matrix.
|
||||||
*
|
*
|
||||||
* @param covarianceMatrix covariance matrix
|
* @param covarianceMatrix covariance matrix
|
||||||
|
@ -110,11 +128,14 @@ public class PearsonsCorrelation {
|
||||||
public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
|
public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
|
||||||
nObs = numberOfObservations;
|
nObs = numberOfObservations;
|
||||||
correlationMatrix = covarianceToCorrelation(covarianceMatrix);
|
correlationMatrix = covarianceToCorrelation(covarianceMatrix);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the correlation matrix
|
* Returns the correlation matrix.
|
||||||
|
*
|
||||||
|
* <p>This method will return null if the argumentless constructor was used
|
||||||
|
* to create this instance, even if {@link #computeCorrelationMatrix(double[][])}
|
||||||
|
* has been called before it is activated.</p>
|
||||||
*
|
*
|
||||||
* @return correlation matrix
|
* @return correlation matrix
|
||||||
*/
|
*/
|
||||||
|
@ -127,12 +148,17 @@ public class PearsonsCorrelation {
|
||||||
* in the correlation matrix.<br/>
|
* in the correlation matrix.<br/>
|
||||||
* <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
|
* <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
|
||||||
* error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
|
* error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
|
||||||
|
*
|
||||||
* <p>The formula used to compute the standard error is <br/>
|
* <p>The formula used to compute the standard error is <br/>
|
||||||
* <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
|
* <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
|
||||||
* where <code>r</code> is the estimated correlation coefficient and
|
* where <code>r</code> is the estimated correlation coefficient and
|
||||||
* <code>n</code> is the number of observations in the source dataset.</p>
|
* <code>n</code> is the number of observations in the source dataset.</p>
|
||||||
*
|
*
|
||||||
|
* <p>To use this method, one of the constructors that supply an input
|
||||||
|
* matrix must have been used to create this instance.</p>
|
||||||
|
*
|
||||||
* @return matrix of correlation standard errors
|
* @return matrix of correlation standard errors
|
||||||
|
* @throws NullPointerException if this instance was created with no data
|
||||||
*/
|
*/
|
||||||
public RealMatrix getCorrelationStandardErrors() {
|
public RealMatrix getCorrelationStandardErrors() {
|
||||||
int nVars = correlationMatrix.getColumnDimension();
|
int nVars = correlationMatrix.getColumnDimension();
|
||||||
|
@ -149,16 +175,22 @@ public class PearsonsCorrelation {
|
||||||
/**
|
/**
|
||||||
* Returns a matrix of p-values associated with the (two-sided) null
|
* Returns a matrix of p-values associated with the (two-sided) null
|
||||||
* hypothesis that the corresponding correlation coefficient is zero.
|
* hypothesis that the corresponding correlation coefficient is zero.
|
||||||
|
*
|
||||||
* <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
|
* <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
|
||||||
* that a random variable distributed as <code>t<sub>n-2</sub></code> takes
|
* that a random variable distributed as <code>t<sub>n-2</sub></code> takes
|
||||||
* a value with absolute value greater than or equal to <br>
|
* a value with absolute value greater than or equal to <br>
|
||||||
* <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
|
* <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
|
||||||
|
*
|
||||||
* <p>The values in the matrix are sometimes referred to as the
|
* <p>The values in the matrix are sometimes referred to as the
|
||||||
* <i>significance</i> of the corresponding correlation coefficients.</p>
|
* <i>significance</i> of the corresponding correlation coefficients.</p>
|
||||||
*
|
*
|
||||||
|
* <p>To use this method, one of the constructors that supply an input
|
||||||
|
* matrix must have been used to create this instance.</p>
|
||||||
|
*
|
||||||
* @return matrix of p-values
|
* @return matrix of p-values
|
||||||
* @throws org.apache.commons.math3.exception.MaxCountExceededException
|
* @throws org.apache.commons.math3.exception.MaxCountExceededException
|
||||||
* if an error occurs estimating probabilities
|
* if an error occurs estimating probabilities
|
||||||
|
* @throws NullPointerException if this instance was created with no data
|
||||||
*/
|
*/
|
||||||
public RealMatrix getCorrelationPValues() {
|
public RealMatrix getCorrelationPValues() {
|
||||||
TDistribution tDistribution = new TDistribution(nObs - 2);
|
TDistribution tDistribution = new TDistribution(nObs - 2);
|
||||||
|
@ -181,12 +213,19 @@ public class PearsonsCorrelation {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the correlation matrix for the columns of the
|
* Computes the correlation matrix for the columns of the
|
||||||
* input matrix.
|
* input matrix, using {@link #correlation(double[], double[])}.
|
||||||
|
*
|
||||||
|
* Throws MathIllegalArgumentException if the matrix does not have at least
|
||||||
|
* two columns and two rows. Pairwise correlations are set to NaN if one
|
||||||
|
* of the correlates has zero variance.
|
||||||
*
|
*
|
||||||
* @param matrix matrix with columns representing variables to correlate
|
* @param matrix matrix with columns representing variables to correlate
|
||||||
* @return correlation matrix
|
* @return correlation matrix
|
||||||
|
* @throws MathIllegalArgumentException if the matrix does not contain sufficient data
|
||||||
|
* @see #correlation(double[], double[])
|
||||||
*/
|
*/
|
||||||
public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
|
public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
|
||||||
|
checkSufficientData(matrix);
|
||||||
int nVars = matrix.getColumnDimension();
|
int nVars = matrix.getColumnDimension();
|
||||||
RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
|
RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
|
||||||
for (int i = 0; i < nVars; i++) {
|
for (int i = 0; i < nVars; i++) {
|
||||||
|
@ -202,21 +241,29 @@ public class PearsonsCorrelation {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the correlation matrix for the columns of the
|
* Computes the correlation matrix for the columns of the
|
||||||
* input rectangular array. The colums of the array represent values
|
* input rectangular array. The columns of the array represent values
|
||||||
* of variables to be correlated.
|
* of variables to be correlated.
|
||||||
*
|
*
|
||||||
|
* Throws MathIllegalArgumentException if the matrix does not have at least
|
||||||
|
* two columns and two rows or if the array is not rectangular. Pairwise
|
||||||
|
* correlations are set to NaN if one of the correlates has zero variance.
|
||||||
|
*
|
||||||
* @param data matrix with columns representing variables to correlate
|
* @param data matrix with columns representing variables to correlate
|
||||||
* @return correlation matrix
|
* @return correlation matrix
|
||||||
|
* @throws MathIllegalArgumentException if the array does not contain sufficient data
|
||||||
|
* @see #correlation(double[], double[])
|
||||||
*/
|
*/
|
||||||
public RealMatrix computeCorrelationMatrix(double[][] data) {
|
public RealMatrix computeCorrelationMatrix(double[][] data) {
|
||||||
return computeCorrelationMatrix(new BlockRealMatrix(data));
|
return computeCorrelationMatrix(new BlockRealMatrix(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the Pearson's product-moment correlation coefficient between the two arrays.
|
* Computes the Pearson's product-moment correlation coefficient between two arrays.
|
||||||
*
|
*
|
||||||
* </p>Throws IllegalArgumentException if the arrays do not have the same length
|
* <p>Throws MathIllegalArgumentException if the arrays do not have the same length
|
||||||
* or their common length is less than 2</p>
|
* or their common length is less than 2. Returns {@code NaN} if either of the arrays
|
||||||
|
* has zero variance (i.e., if one of the arrays does not contain at least two distinct
|
||||||
|
* values).</p>
|
||||||
*
|
*
|
||||||
* @param xArray first data array
|
* @param xArray first data array
|
||||||
* @param yArray second data array
|
* @param yArray second data array
|
||||||
|
@ -267,8 +314,8 @@ public class PearsonsCorrelation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Throws IllegalArgumentException of the matrix does not have at least
|
* Throws MathIllegalArgumentException if the matrix does not have at least
|
||||||
* two columns and two rows
|
* two columns and two rows.
|
||||||
*
|
*
|
||||||
* @param matrix matrix to check for sufficiency
|
* @param matrix matrix to check for sufficiency
|
||||||
* @throws MathIllegalArgumentException if there is insufficient data
|
* @throws MathIllegalArgumentException if there is insufficient data
|
||||||
|
|
Loading…
Reference in New Issue