From b72f31c6bdc26cea7e29248e84556d4804bf81bf Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Sat, 9 Nov 2013 21:32:06 +0000 Subject: [PATCH] Clarified contracts re NaNs, IAEs and when constructor arguments are necessary. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1540395 13f79535-47bb-0310-9956-ffa450edef68 --- .../stat/correlation/PearsonsCorrelation.java | 77 +++++++++++++++---- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java b/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java index 0234ec435..695b2adca 100644 --- a/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java +++ b/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java @@ -34,9 +34,17 @@ import org.apache.commons.math3.util.FastMath; * double[][] arguments generate correlation matrices. The * columns of the input matrices are assumed to represent variable values. * Correlations are given by the formula

- * cor(X, Y) = Σ[(xi - E(X))(yi - E(Y))] / [(n - 1)s(X)s(Y)] + * + *

cor(X, Y) = Σ[(xi - E(X))(yi - E(Y))] / [(n - 1)s(X)s(Y)] * where E(X) is the mean of X, E(Y) - * is the mean of the Y values and s(X), s(Y) are standard deviations. + * is the mean of the Y values and s(X), s(Y) are standard deviations.

+ * + *

To compute the correlation coefficient for a single pair of arrays, use {@link #PearsonsCorrelation()} + * to construct an instance with no data and then {@link #correlation(double[], double[])}. + * Correlation matrices can also be computed directly from an instance with no data using + * {@link #computeCorrelationMatrix(double[][])}. In order to use {@link #getCorrelationMatrix()}, + * {@link #getCorrelationPValues()}, or {@link #getCorrelationStandardErrors()}; however, one of the + * constructors supplying data or a covariance matrix must be used to create the instance.

* * @version $Id$ * @since 2.0 @@ -50,7 +58,7 @@ public class PearsonsCorrelation { private final int nObs; /** - * Create a PearsonsCorrelation instance without data + * Create a PearsonsCorrelation instance without data. */ public PearsonsCorrelation() { super(); @@ -62,9 +70,14 @@ public class PearsonsCorrelation { * Create a PearsonsCorrelation from a rectangular array * whose columns represent values of variables to be correlated. * + * Throws MathIllegalArgumentException if the input array does not have at least + * two columns and two rows. Pairwise correlations are set to NaN if one + * of the correlates has zero variance. + * * @param data rectangular array with columns representing variables - * @throws IllegalArgumentException if the input data array is not + * @throws MathIllegalArgumentException if the input data array is not * rectangular with at least two rows and two columns. + * @see #correlation(double[], double[]) */ public PearsonsCorrelation(double[][] data) { this(new BlockRealMatrix(data)); @@ -74,10 +87,15 @@ public class PearsonsCorrelation { * Create a PearsonsCorrelation from a RealMatrix whose columns * represent variables to be correlated. * + * Throws MathIllegalArgumentException if the matrix does not have at least + * two columns and two rows. Pairwise correlations are set to NaN if one + * of the correlates has zero variance. + * * @param matrix matrix with columns representing variables to correlate + * @throws MathIllegalArgumentException if the matrix does not contain sufficient data + * @see #correlation(double[], double[]) */ public PearsonsCorrelation(RealMatrix matrix) { - checkSufficientData(matrix); nObs = matrix.getRowDimension(); correlationMatrix = computeCorrelationMatrix(matrix); } @@ -100,7 +118,7 @@ public class PearsonsCorrelation { } /** - * Create a PearsonsCorrelation from a covariance matrix. The correlation + * Create a PearsonsCorrelation from a covariance matrix. The correlation * matrix is computed by scaling the covariance matrix. * * @param covarianceMatrix covariance matrix @@ -110,11 +128,14 @@ public class PearsonsCorrelation { public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) { nObs = numberOfObservations; correlationMatrix = covarianceToCorrelation(covarianceMatrix); - } /** - * Returns the correlation matrix + * Returns the correlation matrix. + * + *

This method will return null if the argumentless constructor was used + * to create this instance, even if {@link #computeCorrelationMatrix(double[][])} + * has been called before it is activated.

* * @return correlation matrix */ @@ -127,12 +148,17 @@ public class PearsonsCorrelation { * in the correlation matrix.
* getCorrelationStandardErrors().getEntry(i,j) is the standard * error associated with getCorrelationMatrix.getEntry(i,j) + * *

The formula used to compute the standard error is
* SEr = ((1 - r2) / (n - 2))1/2 * where r is the estimated correlation coefficient and * n is the number of observations in the source dataset.

* + *

To use this method, one of the constructors that supply an input + * matrix must have been used to create this instance.

+ * * @return matrix of correlation standard errors + * @throws NullPointerException if this instance was created with no data */ public RealMatrix getCorrelationStandardErrors() { int nVars = correlationMatrix.getColumnDimension(); @@ -149,16 +175,22 @@ public class PearsonsCorrelation { /** * Returns a matrix of p-values associated with the (two-sided) null * hypothesis that the corresponding correlation coefficient is zero. + * *

getCorrelationPValues().getEntry(i,j) is the probability * that a random variable distributed as tn-2 takes * a value with absolute value greater than or equal to
* |r|((n - 2) / (1 - r2))1/2

+ * *

The values in the matrix are sometimes referred to as the * significance of the corresponding correlation coefficients.

* + *

To use this method, one of the constructors that supply an input + * matrix must have been used to create this instance.

+ * * @return matrix of p-values * @throws org.apache.commons.math3.exception.MaxCountExceededException * if an error occurs estimating probabilities + * @throws NullPointerException if this instance was created with no data */ public RealMatrix getCorrelationPValues() { TDistribution tDistribution = new TDistribution(nObs - 2); @@ -181,12 +213,19 @@ public class PearsonsCorrelation { /** * Computes the correlation matrix for the columns of the - * input matrix. + * input matrix, using {@link #correlation(double[], double[])}. + * + * Throws MathIllegalArgumentException if the matrix does not have at least + * two columns and two rows. Pairwise correlations are set to NaN if one + * of the correlates has zero variance. * * @param matrix matrix with columns representing variables to correlate * @return correlation matrix + * @throws MathIllegalArgumentException if the matrix does not contain sufficient data + * @see #correlation(double[], double[]) */ public RealMatrix computeCorrelationMatrix(RealMatrix matrix) { + checkSufficientData(matrix); int nVars = matrix.getColumnDimension(); RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); for (int i = 0; i < nVars; i++) { @@ -202,21 +241,29 @@ public class PearsonsCorrelation { /** * Computes the correlation matrix for the columns of the - * input rectangular array. The colums of the array represent values + * input rectangular array. The columns of the array represent values * of variables to be correlated. * + * Throws MathIllegalArgumentException if the matrix does not have at least + * two columns and two rows or if the array is not rectangular. Pairwise + * correlations are set to NaN if one of the correlates has zero variance. + * * @param data matrix with columns representing variables to correlate * @return correlation matrix + * @throws MathIllegalArgumentException if the array does not contain sufficient data + * @see #correlation(double[], double[]) */ public RealMatrix computeCorrelationMatrix(double[][] data) { return computeCorrelationMatrix(new BlockRealMatrix(data)); } /** - * Computes the Pearson's product-moment correlation coefficient between the two arrays. + * Computes the Pearson's product-moment correlation coefficient between two arrays. * - *

Throws IllegalArgumentException if the arrays do not have the same length - * or their common length is less than 2

+ *

Throws MathIllegalArgumentException if the arrays do not have the same length + * or their common length is less than 2. Returns {@code NaN} if either of the arrays + * has zero variance (i.e., if one of the arrays does not contain at least two distinct + * values).

* * @param xArray first data array * @param yArray second data array @@ -267,8 +314,8 @@ public class PearsonsCorrelation { } /** - * Throws IllegalArgumentException of the matrix does not have at least - * two columns and two rows + * Throws MathIllegalArgumentException if the matrix does not have at least + * two columns and two rows. * * @param matrix matrix to check for sufficiency * @throws MathIllegalArgumentException if there is insufficient data