001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.correlation;
018
019 import org.apache.commons.math.MathException;
020 import org.apache.commons.math.MathRuntimeException;
021 import org.apache.commons.math.distribution.TDistribution;
022 import org.apache.commons.math.distribution.TDistributionImpl;
023 import org.apache.commons.math.exception.util.LocalizedFormats;
024 import org.apache.commons.math.exception.NullArgumentException;
025 import org.apache.commons.math.exception.DimensionMismatchException;
026 import org.apache.commons.math.linear.RealMatrix;
027 import org.apache.commons.math.linear.BlockRealMatrix;
028 import org.apache.commons.math.stat.regression.SimpleRegression;
029 import org.apache.commons.math.util.FastMath;
030
031 /**
032 * Computes Pearson's product-moment correlation coefficients for pairs of arrays
033 * or columns of a matrix.
034 *
035 * <p>The constructors that take <code>RealMatrix</code> or
036 * <code>double[][]</code> arguments generate correlation matrices. The
037 * columns of the input matrices are assumed to represent variable values.
038 * Correlations are given by the formula</p>
039 * <code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
040 * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
041 * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
042 *
043 * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 ao??t 2010) $
044 * @since 2.0
045 */
046 public class PearsonsCorrelation {
047
048 /** correlation matrix */
049 private final RealMatrix correlationMatrix;
050
051 /** number of observations */
052 private final int nObs;
053
054 /**
055 * Create a PearsonsCorrelation instance without data
056 */
057 public PearsonsCorrelation() {
058 super();
059 correlationMatrix = null;
060 nObs = 0;
061 }
062
063 /**
064 * Create a PearsonsCorrelation from a rectangular array
065 * whose columns represent values of variables to be correlated.
066 *
067 * @param data rectangular array with columns representing variables
068 * @throws IllegalArgumentException if the input data array is not
069 * rectangular with at least two rows and two columns.
070 */
071 public PearsonsCorrelation(double[][] data) {
072 this(new BlockRealMatrix(data));
073 }
074
075 /**
076 * Create a PearsonsCorrelation from a RealMatrix whose columns
077 * represent variables to be correlated.
078 *
079 * @param matrix matrix with columns representing variables to correlate
080 */
081 public PearsonsCorrelation(RealMatrix matrix) {
082 checkSufficientData(matrix);
083 nObs = matrix.getRowDimension();
084 correlationMatrix = computeCorrelationMatrix(matrix);
085 }
086
087 /**
088 * Create a PearsonsCorrelation from a {@link Covariance}. The correlation
089 * matrix is computed by scaling the Covariance's covariance matrix.
090 * The Covariance instance must have been created from a data matrix with
091 * columns representing variable values.
092 *
093 * @param covariance Covariance instance
094 */
095 public PearsonsCorrelation(Covariance covariance) {
096 RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
097 if (covarianceMatrix == null) {
098 throw new NullArgumentException(LocalizedFormats.COVARIANCE_MATRIX);
099 }
100 nObs = covariance.getN();
101 correlationMatrix = covarianceToCorrelation(covarianceMatrix);
102 }
103
104 /**
105 * Create a PearsonsCorrelation from a covariance matrix. The correlation
106 * matrix is computed by scaling the covariance matrix.
107 *
108 * @param covarianceMatrix covariance matrix
109 * @param numberOfObservations the number of observations in the dataset used to compute
110 * the covariance matrix
111 */
112 public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
113 nObs = numberOfObservations;
114 correlationMatrix = covarianceToCorrelation(covarianceMatrix);
115
116 }
117
118 /**
119 * Returns the correlation matrix
120 *
121 * @return correlation matrix
122 */
123 public RealMatrix getCorrelationMatrix() {
124 return correlationMatrix;
125 }
126
127 /**
128 * Returns a matrix of standard errors associated with the estimates
129 * in the correlation matrix.<br/>
130 * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
131 * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
132 * <p>The formula used to compute the standard error is <br/>
133 * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
134 * where <code>r</code> is the estimated correlation coefficient and
135 * <code>n</code> is the number of observations in the source dataset.</p>
136 *
137 * @return matrix of correlation standard errors
138 */
139 public RealMatrix getCorrelationStandardErrors() {
140 int nVars = correlationMatrix.getColumnDimension();
141 double[][] out = new double[nVars][nVars];
142 for (int i = 0; i < nVars; i++) {
143 for (int j = 0; j < nVars; j++) {
144 double r = correlationMatrix.getEntry(i, j);
145 out[i][j] = FastMath.sqrt((1 - r * r) /(nObs - 2));
146 }
147 }
148 return new BlockRealMatrix(out);
149 }
150
151 /**
152 * Returns a matrix of p-values associated with the (two-sided) null
153 * hypothesis that the corresponding correlation coefficient is zero.
154 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
155 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
156 * a value with absolute value greater than or equal to <br>
157 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
158 * <p>The values in the matrix are sometimes referred to as the
159 * <i>significance</i> of the corresponding correlation coefficients.</p>
160 *
161 * @return matrix of p-values
162 * @throws MathException if an error occurs estimating probabilities
163 */
164 public RealMatrix getCorrelationPValues() throws MathException {
165 TDistribution tDistribution = new TDistributionImpl(nObs - 2);
166 int nVars = correlationMatrix.getColumnDimension();
167 double[][] out = new double[nVars][nVars];
168 for (int i = 0; i < nVars; i++) {
169 for (int j = 0; j < nVars; j++) {
170 if (i == j) {
171 out[i][j] = 0d;
172 } else {
173 double r = correlationMatrix.getEntry(i, j);
174 double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
175 out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
176 }
177 }
178 }
179 return new BlockRealMatrix(out);
180 }
181
182
183 /**
184 * Computes the correlation matrix for the columns of the
185 * input matrix.
186 *
187 * @param matrix matrix with columns representing variables to correlate
188 * @return correlation matrix
189 */
190 public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
191 int nVars = matrix.getColumnDimension();
192 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
193 for (int i = 0; i < nVars; i++) {
194 for (int j = 0; j < i; j++) {
195 double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
196 outMatrix.setEntry(i, j, corr);
197 outMatrix.setEntry(j, i, corr);
198 }
199 outMatrix.setEntry(i, i, 1d);
200 }
201 return outMatrix;
202 }
203
204 /**
205 * Computes the correlation matrix for the columns of the
206 * input rectangular array. The colums of the array represent values
207 * of variables to be correlated.
208 *
209 * @param data matrix with columns representing variables to correlate
210 * @return correlation matrix
211 */
212 public RealMatrix computeCorrelationMatrix(double[][] data) {
213 return computeCorrelationMatrix(new BlockRealMatrix(data));
214 }
215
216 /**
217 * Computes the Pearson's product-moment correlation coefficient between the two arrays.
218 *
219 * </p>Throws IllegalArgumentException if the arrays do not have the same length
220 * or their common length is less than 2</p>
221 *
222 * @param xArray first data array
223 * @param yArray second data array
224 * @return Returns Pearson's correlation coefficient for the two arrays
225 * @throws IllegalArgumentException if the arrays lengths do not match or
226 * there is insufficient data
227 */
228 public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException {
229 SimpleRegression regression = new SimpleRegression();
230 if (xArray.length != yArray.length) {
231 throw new DimensionMismatchException(xArray.length, yArray.length);
232 } else if (xArray.length < 2) {
233 throw MathRuntimeException.createIllegalArgumentException(
234 LocalizedFormats.INSUFFICIENT_DIMENSION, xArray.length, 2);
235 } else {
236 for(int i=0; i<xArray.length; i++) {
237 regression.addData(xArray[i], yArray[i]);
238 }
239 return regression.getR();
240 }
241 }
242
243 /**
244 * Derives a correlation matrix from a covariance matrix.
245 *
246 * <p>Uses the formula <br/>
247 * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where
248 * <code>r(·,·)</code> is the correlation coefficient and
249 * <code>s(·)</code> means standard deviation.</p>
250 *
251 * @param covarianceMatrix the covariance matrix
252 * @return correlation matrix
253 */
254 public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
255 int nVars = covarianceMatrix.getColumnDimension();
256 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
257 for (int i = 0; i < nVars; i++) {
258 double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i));
259 outMatrix.setEntry(i, i, 1d);
260 for (int j = 0; j < i; j++) {
261 double entry = covarianceMatrix.getEntry(i, j) /
262 (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j)));
263 outMatrix.setEntry(i, j, entry);
264 outMatrix.setEntry(j, i, entry);
265 }
266 }
267 return outMatrix;
268 }
269
270 /**
271 * Throws IllegalArgumentException of the matrix does not have at least
272 * two columns and two rows
273 *
274 * @param matrix matrix to check for sufficiency
275 */
276 private void checkSufficientData(final RealMatrix matrix) {
277 int nRows = matrix.getRowDimension();
278 int nCols = matrix.getColumnDimension();
279 if (nRows < 2 || nCols < 2) {
280 throw MathRuntimeException.createIllegalArgumentException(
281 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
282 nRows, nCols);
283 }
284 }
285 }