View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.math;
20  
21  import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
22  import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix1D;
23  import ubic.basecode.dataStructure.matrix.DoubleMatrix;
24  import ubic.basecode.dataStructure.matrix.MatrixUtil;
25  import ubic.basecode.dataStructure.matrix.SparseDoubleMatrix;
26  import cern.colt.function.DoubleFunction;
27  import cern.colt.function.DoubleProcedure;
28  import cern.colt.list.DoubleArrayList;
29  import cern.colt.matrix.DoubleFactory2D;
30  import cern.colt.matrix.DoubleMatrix1D;
31  import cern.colt.matrix.DoubleMatrix2D;
32  import cern.colt.matrix.linalg.Algebra;
33  import cern.jet.math.Functions;
34  
35  /**
36   * @author pavlidis
37   */
38  public class MatrixStats {
39  
40      /**
41       * NaN values are omitted from calculations.
42       * 
43       * @param <R>
44       * @param <C>
45       * @param data
46       * @return column sums
47       */
48      public static <R, C> DoubleMatrix1D colSums( DoubleMatrix<R, C> data ) {
49          assert data != null;
50          DoubleMatrix1D librarySize = new DenseDoubleMatrix1D( data.columns() );
51          for ( int i = 0; i < librarySize.size(); i++ ) {
52              librarySize.set( i, DescriptiveWithMissing.sum( new DoubleArrayList( data.getColumn( i ) ) ) );
53          }
54          return librarySize;
55      }
56  
57      /**
58       * Convert a log_b-transformed data set to log 2.
59       * 
60       * @param matrix
61       * @param base the current base
62       */
63      public static <R, C> void convertToLog2( DoubleMatrix<R, C> matrix, double base ) {
64          double v = Math.log( 2.0 ) / Math.log( base );
65          for ( int j = 0; j < matrix.rows(); j++ ) {
66              DoubleMatrix1D row = matrix.viewRow( j );
67              row.assign( Functions.div( v ) );
68              for ( int i = 0; i < row.size(); i++ ) {
69                  matrix.set( j, i, row.get( i ) );
70              }
71          }
72      }
73  
74      /**
75       * Convert a count matrix to log2 counts per million. Equivalent to
76       * <code>t(log2(t(counts+0.5)/(lib.size+1)*1e6))</code>
77       * in R. (NOTE: originally this did the operation in place)
78       * 
79       * @param matrix
80       * @param librarySize if null, it will default to <code>colSums(matrix)</code>.
81       * @return
82       * @return transformed matrix
83       */
84      public static <R, C> DoubleMatrix<R, C> convertToLog2Cpm( DoubleMatrix<R, C> matrix, DoubleMatrix1D librarySize ) {
85  
86          if ( librarySize == null ) {
87              librarySize = new DenseDoubleMatrix1D( matrix.columns() );
88              for ( int i = 0; i < librarySize.size(); i++ ) {
89                  librarySize.set( i, DescriptiveWithMissing.sum( new DoubleArrayList( matrix.getColumn( i ) ) ) );
90              }
91          }
92          DoubleMatrix<R, C> newmatrix = matrix.copy();
93          assert librarySize.size() == newmatrix.columns();
94  
95          for ( int j = 0; j < newmatrix.rows(); j++ ) {
96              DoubleMatrix1D row = newmatrix.viewRow( j );
97              for ( int i = 0; i < row.size(); i++ ) {
98                  double val = newmatrix.get( j, i );
99                  val = ( val + 0.5 ) / ( librarySize.get( i ) + 1.0 ) * Math.pow( 10, 6 );
100                 val = Math.log( val ) / Math.log( 2.0 );
101                 newmatrix.set( j, i, val );
102             }
103         }
104 
105         return newmatrix;
106     }
107 
108     /**
109      * Compute the correlation matrix of the rows of a matrix.
110      * 
111      * @param data
112      * @return a symmetric matrix that has the rows and columns set to be the names of the rows of the input.
113      */
114     public static <R, C> DoubleMatrix<R, R> correlationMatrix( DoubleMatrix<R, C> data ) {
115         DoubleMatrix<R, R> result = new DenseDoubleMatrix<>( data.rows(), data.rows() );
116 
117         for ( int i = 0; i < data.rows(); i++ ) {
118             DoubleArrayList irow = new DoubleArrayList( data.getRow( i ) );
119             for ( int j = i; j < data.rows(); j++ ) {
120                 if ( j == i ) {
121                     result.set( i, j, 1.0 );
122                     continue;
123                 }
124                 DoubleArrayList jrow = new DoubleArrayList( data.getRow( j ) );
125                 double c = DescriptiveWithMissing.correlation( irow, jrow );
126                 result.set( i, j, c );
127                 result.set( j, i, c );
128             }
129         }
130         result.setRowNames( data.getRowNames() );
131         result.setColumnNames( data.getRowNames() );
132 
133         return result;
134     }
135 
136     /**
137      * @param data DenseDoubleMatrix2DNamed
138      * @param threshold only correlations with absolute values above this level are stored (others are Double.NaN)
139      * @return a sparse symmetric matrix that has the rows and columns set to be the names of the rows of the input. The
140      *         diagonal is set to Double.NaN
141      */
142     public static <R, C> SparseDoubleMatrix<R, R> correlationMatrix( DoubleMatrix<R, C> data, double threshold ) {
143         SparseDoubleMatrix<R, R> result = new SparseDoubleMatrix<>( data.rows(), data.rows() );
144 
145         for ( int i = 0; i < data.rows(); i++ ) {
146             DoubleArrayList irow = new DoubleArrayList( data.getRow( i ) );
147             result.set( i, i, Double.NaN );
148             for ( int j = i + 1; j < data.rows(); j++ ) {
149                 DoubleArrayList jrow = new DoubleArrayList( data.getRow( j ) );
150                 double c = DescriptiveWithMissing.correlation( irow, jrow );
151                 if ( Math.abs( c ) > threshold ) {
152                     result.set( i, j, c );
153                     result.set( j, i, c );
154                 } else {
155                     result.set( i, j, Double.NaN );
156                     result.set( j, i, Double.NaN );
157                 }
158             }
159         }
160         result.setRowNames( data.getRowNames() );
161         result.setColumnNames( data.getRowNames() );
162 
163         return result;
164     }
165 
166     /**
167      * Iteratively standardize the columns and rows of the matrix.
168      * 
169      * @param data
170      */
171     public static <R, C> DoubleMatrix<R, C> doubleStandardize( DoubleMatrix<R, C> matrix ) {
172         DoubleMatrix<R, C> newMatrix = matrix.copy();
173 
174         int ITERS = 5;
175         for ( int i = 0; i < ITERS; i++ ) {
176             // scale columns, then rows.
177             newMatrix = standardize( standardize( newMatrix.transpose() /* columns */ ).transpose() /* rows */ );
178         }
179 
180         /*
181          * Check convergence.DEBUG CODE
182          */
183         MatrixRowStats.means( newMatrix ).forEach( new DoubleProcedure() {
184             @Override
185             public boolean apply( double element ) {
186                 if ( Math.abs( element ) > 0.02 ) {
187                     // throw new IllegalStateException( "Row mean was: " + Math.abs( element ) );
188                 }
189                 return true;
190             }
191         } );
192 
193         MatrixRowStats.means( newMatrix.transpose() ).forEach( new DoubleProcedure() {
194             @Override
195             public boolean apply( double element ) {
196                 if ( Math.abs( element ) > 0.1 ) {
197                     // throw new IllegalStateException( "Column mean was: " + Math.abs( element ) );
198                 }
199                 return true;
200             }
201         } );
202 
203         MatrixRowStats.sampleStandardDeviations( newMatrix ).forEach( new DoubleProcedure() {
204             @Override
205             public boolean apply( double element ) {
206                 if ( Math.abs( element - 1.0 ) > 0.1 ) {
207                     // throw new IllegalStateException();
208                 }
209                 return true;
210             }
211         } );
212 
213         MatrixRowStats.sampleStandardDeviations( newMatrix.transpose() ).forEach( new DoubleProcedure() {
214             @Override
215             public boolean apply( double element ) {
216                 if ( Math.abs( element - 1.0 ) > 0.1 ) {
217                     // throw new IllegalStateException();
218                 }
219                 return true;
220             }
221         } );
222 
223         return newMatrix;
224     }
225 
226     /**
227      * Log-transform the values in a matrix (log base 2). Values that are less than or equal to zero are left as
228      * Double.NaN.
229      * 
230      * @param matrixToNormalize
231      */
232     public static <R, C> void logTransform( DoubleMatrix<R, C> matrix ) {
233         for ( int j = 0; j < matrix.rows(); j++ ) {
234             DoubleMatrix1D row = matrix.viewRow( j );
235             row.assign( Functions.log2 );
236             for ( int i = 0; i < row.size(); i++ ) {
237                 matrix.set( j, i, row.get( i ) );
238             }
239         }
240     }
241 
242     /**
243      * Compute the maximum value in the matrix.
244      * 
245      * @param matrix DenseDoubleMatrix2DNamed
246      * @return the largest value in the matrix
247      */
248     public static <R, C> double max( DoubleMatrix<R, C> matrix ) {
249 
250         int totalRows = matrix.rows();
251         int totalColumns = matrix.columns();
252 
253         double max = -Double.MAX_VALUE;
254 
255         for ( int i = 0; i < totalRows; i++ ) {
256             for ( int j = 0; j < totalColumns; j++ ) {
257                 double val = matrix.get( i, j );
258                 if ( Double.isNaN( val ) ) {
259                     continue;
260                 }
261 
262                 if ( val > max ) {
263                     max = val;
264                 }
265             }
266         }
267 
268         if ( max == -Double.MAX_VALUE ) {
269             return Double.NaN;
270         }
271 
272         return max; // might be NaN if all values are missing
273 
274     }
275 
276     /**
277      * Find the minimum of the entire matrix.
278      * 
279      * @param matrix DenseDoubleMatrix2DNamed
280      * @return the smallest value in the matrix
281      */
282     public static <R, C> double min( DoubleMatrix<R, C> matrix ) {
283 
284         int totalRows = matrix.rows();
285         int totalColumns = matrix.columns();
286 
287         double min = Double.MAX_VALUE;
288 
289         for ( int i = 0; i < totalRows; i++ ) {
290             for ( int j = 0; j < totalColumns; j++ ) {
291                 double val = matrix.get( i, j );
292                 if ( Double.isNaN( val ) ) {
293                     continue;
294                 }
295 
296                 if ( val < min ) {
297                     min = val;
298                 }
299 
300             }
301         }
302         if ( min == Double.MAX_VALUE ) {
303             return Double.NaN;
304         }
305         return min; // might be NaN if all values are missing
306 
307     } // end min
308 
309     /**
310      * @param data
311      * @return matrix indicating whether each value in the input matix is NaN.
312      */
313     public static boolean[][] nanStatusMatrix( double[][] data ) {
314         boolean[][] result = new boolean[data.length][];
315         for ( int i = 0; i < data.length; i++ ) {
316             double[] row = data[i];
317             result[i] = new boolean[data[i].length];
318             for ( int j = 0; j < row.length; j++ ) {
319                 result[i][j] = Double.isNaN( data[i][j] );
320             }
321         }
322         return result;
323     }
324 
325     /**
326      * Normalize a matrix in place to be a transition matrix. Assumes that values operate such that small values like p
327      * values represent closer distances, and the values are probabilities.
328      * <p>
329      * Each point is first transformed via v' = exp(-v/sigma). Then the values for each node's edges are adjusted to sum
330      * to 1.
331      * 
332      * @param matrixToNormalize
333      * @param sigma a scaling factor for the input values.
334      */
335     public static <R, C> void rbfNormalize( DoubleMatrix<R, C> matrixToNormalize, final double sigma ) {
336 
337         // define the function we will use.
338         DoubleFunction f = new DoubleFunction() {
339             @Override
340             public double apply( double value ) {
341                 return Math.exp( -value / sigma );
342             }
343         };
344 
345         for ( int j = 0; j < matrixToNormalize.rows(); j++ ) {
346 
347             DoubleMatrix1D row = matrixToNormalize.viewRow( j );
348             row.assign( f );
349             double sum = row.zSum();
350             row.assign( Functions.div( sum ) );
351 
352         }
353     }
354 
355     /**
356      * @param input raw double 2-d matrix
357      * @return the element-by-element product (not matrix product) of the matrix.
358      */
359     public static double[][] selfSquaredMatrix( double[][] input ) {
360         double[][] returnValue = new double[input.length][];
361         for ( int i = 0; i < returnValue.length; i++ ) {
362             returnValue[i] = new double[input[i].length];
363 
364             for ( int j = 0; j < returnValue[i].length; j++ ) {
365                 returnValue[i][j] = input[i][j] * input[i][j];
366             }
367 
368         }
369         return returnValue;
370     }
371 
372     /**
373      * Scale the rows of the matrix; returns a new matrix.
374      * 
375      * @param <R>
376      * @param <C>
377      * @param data
378      * @return
379      */
380     public static <R, C> DoubleMatrix<R, C> standardize( DoubleMatrix<R, C> matrix ) {
381         DoubleMatrix<R, C> newMatrix = new DenseDoubleMatrix<>( matrix.rows(), matrix.columns() );
382         newMatrix.setRowNames( matrix.getRowNames() );
383         newMatrix.setColumnNames( matrix.getColNames() );
384         for ( int i = 0; i < matrix.rows(); i++ ) {
385             double[] row = matrix.getRow( i );
386             DoubleArrayList li = new DoubleArrayList( row );
387             DescriptiveWithMissing.standardize( li );
388 
389             /*
390              * DEBUG CODE
391              */
392             if ( Math.abs( DescriptiveWithMissing.mean( li ) ) > 0.001 ) {
393                 throw new IllegalStateException( "NOT CENTERED" );
394             }
395             if ( Math.abs(
396                     DescriptiveWithMissing.sampleVariance( li, DescriptiveWithMissing.mean( li ) ) - 1.0 ) > 0.001 ) {
397                 throw new IllegalStateException( "NOT SCALED" );
398             }
399 
400             for ( int j = 0; j < matrix.columns(); j++ ) {
401                 newMatrix.set( i, j, li.getQuick( j ) );
402             }
403         }
404         return newMatrix;
405     }
406 
407     /**
408      * Scale a covariance matrix to the corresponding correlation matrix.
409      * 
410      * @param cov a symmetric matrix of covariances
411      * @return
412      */
413     public static DoubleMatrix2D cov2cor( DoubleMatrix2D cov ) {
414         assert cov.rows() == cov.columns();
415         DoubleMatrix1D v = MatrixUtil.diagonal( cov ).assign( Functions.sqrt ).assign( Functions.inv );
416         Algebra solver = new Algebra();
417         DoubleFactory2D f = DoubleFactory2D.sparse;
418         DoubleMatrix2D vd = f.diagonal( v );
419         DoubleMatrix2D cormatrix = solver.mult( solver.mult( vd, cov ), vd );
420         return cormatrix;
421     }
422 
423     /**
424      * Undo log2 transform.
425      * 
426      * @param <R>
427      * @param <C>
428      * @param matrix
429      */
430     public static <R, C> void unLogTransform( DoubleMatrix<R, C> matrix ) {
431         for ( int j = 0; j < matrix.rows(); j++ ) {
432             DoubleMatrix1D row = matrix.viewRow( j );
433             for ( int i = 0; i < row.size(); i++ ) {
434                 row.setQuick( i, Math.pow( 2.0, row.getQuick( i ) ) );
435             }
436         }
437 
438     }
439 
440 }