1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.math;
20
21 import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
22 import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix1D;
23 import ubic.basecode.dataStructure.matrix.DoubleMatrix;
24 import ubic.basecode.dataStructure.matrix.MatrixUtil;
25 import ubic.basecode.dataStructure.matrix.SparseDoubleMatrix;
26 import cern.colt.function.DoubleFunction;
27 import cern.colt.function.DoubleProcedure;
28 import cern.colt.list.DoubleArrayList;
29 import cern.colt.matrix.DoubleFactory2D;
30 import cern.colt.matrix.DoubleMatrix1D;
31 import cern.colt.matrix.DoubleMatrix2D;
32 import cern.colt.matrix.linalg.Algebra;
33 import cern.jet.math.Functions;
34
35
36
37
38 public class MatrixStats {
39
40
41
42
43
44
45
46
47
48 public static <R, C> DoubleMatrix1D colSums( DoubleMatrix<R, C> data ) {
49 assert data != null;
50 DoubleMatrix1D librarySize = new DenseDoubleMatrix1D( data.columns() );
51 for ( int i = 0; i < librarySize.size(); i++ ) {
52 librarySize.set( i, DescriptiveWithMissing.sum( new DoubleArrayList( data.getColumn( i ) ) ) );
53 }
54 return librarySize;
55 }
56
57
58
59
60
61
62
63 public static <R, C> void convertToLog2( DoubleMatrix<R, C> matrix, double base ) {
64 double v = Math.log( 2.0 ) / Math.log( base );
65 for ( int j = 0; j < matrix.rows(); j++ ) {
66 DoubleMatrix1D row = matrix.viewRow( j );
67 row.assign( Functions.div( v ) );
68 for ( int i = 0; i < row.size(); i++ ) {
69 matrix.set( j, i, row.get( i ) );
70 }
71 }
72 }
73
74
75
76
77
78
79
80
81
82
83
84 public static <R, C> DoubleMatrix<R, C> convertToLog2Cpm( DoubleMatrix<R, C> matrix, DoubleMatrix1D librarySize ) {
85
86 if ( librarySize == null ) {
87 librarySize = new DenseDoubleMatrix1D( matrix.columns() );
88 for ( int i = 0; i < librarySize.size(); i++ ) {
89 librarySize.set( i, DescriptiveWithMissing.sum( new DoubleArrayList( matrix.getColumn( i ) ) ) );
90 }
91 }
92 DoubleMatrix<R, C> newmatrix = matrix.copy();
93 assert librarySize.size() == newmatrix.columns();
94
95 for ( int j = 0; j < newmatrix.rows(); j++ ) {
96 DoubleMatrix1D row = newmatrix.viewRow( j );
97 for ( int i = 0; i < row.size(); i++ ) {
98 double val = newmatrix.get( j, i );
99 val = ( val + 0.5 ) / ( librarySize.get( i ) + 1.0 ) * Math.pow( 10, 6 );
100 val = Math.log( val ) / Math.log( 2.0 );
101 newmatrix.set( j, i, val );
102 }
103 }
104
105 return newmatrix;
106 }
107
108
109
110
111
112
113
114 public static <R, C> DoubleMatrix<R, R> correlationMatrix( DoubleMatrix<R, C> data ) {
115 DoubleMatrix<R, R> result = new DenseDoubleMatrix<>( data.rows(), data.rows() );
116
117 for ( int i = 0; i < data.rows(); i++ ) {
118 DoubleArrayList irow = new DoubleArrayList( data.getRow( i ) );
119 for ( int j = i; j < data.rows(); j++ ) {
120 if ( j == i ) {
121 result.set( i, j, 1.0 );
122 continue;
123 }
124 DoubleArrayList jrow = new DoubleArrayList( data.getRow( j ) );
125 double c = DescriptiveWithMissing.correlation( irow, jrow );
126 result.set( i, j, c );
127 result.set( j, i, c );
128 }
129 }
130 result.setRowNames( data.getRowNames() );
131 result.setColumnNames( data.getRowNames() );
132
133 return result;
134 }
135
136
137
138
139
140
141
142 public static <R, C> SparseDoubleMatrix<R, R> correlationMatrix( DoubleMatrix<R, C> data, double threshold ) {
143 SparseDoubleMatrix<R, R> result = new SparseDoubleMatrix<>( data.rows(), data.rows() );
144
145 for ( int i = 0; i < data.rows(); i++ ) {
146 DoubleArrayList irow = new DoubleArrayList( data.getRow( i ) );
147 result.set( i, i, Double.NaN );
148 for ( int j = i + 1; j < data.rows(); j++ ) {
149 DoubleArrayList jrow = new DoubleArrayList( data.getRow( j ) );
150 double c = DescriptiveWithMissing.correlation( irow, jrow );
151 if ( Math.abs( c ) > threshold ) {
152 result.set( i, j, c );
153 result.set( j, i, c );
154 } else {
155 result.set( i, j, Double.NaN );
156 result.set( j, i, Double.NaN );
157 }
158 }
159 }
160 result.setRowNames( data.getRowNames() );
161 result.setColumnNames( data.getRowNames() );
162
163 return result;
164 }
165
166
167
168
169
170
171 public static <R, C> DoubleMatrix<R, C> doubleStandardize( DoubleMatrix<R, C> matrix ) {
172 DoubleMatrix<R, C> newMatrix = matrix.copy();
173
174 int ITERS = 5;
175 for ( int i = 0; i < ITERS; i++ ) {
176
177 newMatrix = standardize( standardize( newMatrix.transpose() ).transpose() );
178 }
179
180
181
182
183 MatrixRowStats.means( newMatrix ).forEach( new DoubleProcedure() {
184 @Override
185 public boolean apply( double element ) {
186 if ( Math.abs( element ) > 0.02 ) {
187
188 }
189 return true;
190 }
191 } );
192
193 MatrixRowStats.means( newMatrix.transpose() ).forEach( new DoubleProcedure() {
194 @Override
195 public boolean apply( double element ) {
196 if ( Math.abs( element ) > 0.1 ) {
197
198 }
199 return true;
200 }
201 } );
202
203 MatrixRowStats.sampleStandardDeviations( newMatrix ).forEach( new DoubleProcedure() {
204 @Override
205 public boolean apply( double element ) {
206 if ( Math.abs( element - 1.0 ) > 0.1 ) {
207
208 }
209 return true;
210 }
211 } );
212
213 MatrixRowStats.sampleStandardDeviations( newMatrix.transpose() ).forEach( new DoubleProcedure() {
214 @Override
215 public boolean apply( double element ) {
216 if ( Math.abs( element - 1.0 ) > 0.1 ) {
217
218 }
219 return true;
220 }
221 } );
222
223 return newMatrix;
224 }
225
226
227
228
229
230
231
232 public static <R, C> void logTransform( DoubleMatrix<R, C> matrix ) {
233 for ( int j = 0; j < matrix.rows(); j++ ) {
234 DoubleMatrix1D row = matrix.viewRow( j );
235 row.assign( Functions.log2 );
236 for ( int i = 0; i < row.size(); i++ ) {
237 matrix.set( j, i, row.get( i ) );
238 }
239 }
240 }
241
242
243
244
245
246
247
248 public static <R, C> double max( DoubleMatrix<R, C> matrix ) {
249
250 int totalRows = matrix.rows();
251 int totalColumns = matrix.columns();
252
253 double max = -Double.MAX_VALUE;
254
255 for ( int i = 0; i < totalRows; i++ ) {
256 for ( int j = 0; j < totalColumns; j++ ) {
257 double val = matrix.get( i, j );
258 if ( Double.isNaN( val ) ) {
259 continue;
260 }
261
262 if ( val > max ) {
263 max = val;
264 }
265 }
266 }
267
268 if ( max == -Double.MAX_VALUE ) {
269 return Double.NaN;
270 }
271
272 return max;
273
274 }
275
276
277
278
279
280
281
282 public static <R, C> double min( DoubleMatrix<R, C> matrix ) {
283
284 int totalRows = matrix.rows();
285 int totalColumns = matrix.columns();
286
287 double min = Double.MAX_VALUE;
288
289 for ( int i = 0; i < totalRows; i++ ) {
290 for ( int j = 0; j < totalColumns; j++ ) {
291 double val = matrix.get( i, j );
292 if ( Double.isNaN( val ) ) {
293 continue;
294 }
295
296 if ( val < min ) {
297 min = val;
298 }
299
300 }
301 }
302 if ( min == Double.MAX_VALUE ) {
303 return Double.NaN;
304 }
305 return min;
306
307 }
308
309
310
311
312
313 public static boolean[][] nanStatusMatrix( double[][] data ) {
314 boolean[][] result = new boolean[data.length][];
315 for ( int i = 0; i < data.length; i++ ) {
316 double[] row = data[i];
317 result[i] = new boolean[data[i].length];
318 for ( int j = 0; j < row.length; j++ ) {
319 result[i][j] = Double.isNaN( data[i][j] );
320 }
321 }
322 return result;
323 }
324
325
326
327
328
329
330
331
332
333
334
335 public static <R, C> void rbfNormalize( DoubleMatrix<R, C> matrixToNormalize, final double sigma ) {
336
337
338 DoubleFunction f = new DoubleFunction() {
339 @Override
340 public double apply( double value ) {
341 return Math.exp( -value / sigma );
342 }
343 };
344
345 for ( int j = 0; j < matrixToNormalize.rows(); j++ ) {
346
347 DoubleMatrix1D row = matrixToNormalize.viewRow( j );
348 row.assign( f );
349 double sum = row.zSum();
350 row.assign( Functions.div( sum ) );
351
352 }
353 }
354
355
356
357
358
359 public static double[][] selfSquaredMatrix( double[][] input ) {
360 double[][] returnValue = new double[input.length][];
361 for ( int i = 0; i < returnValue.length; i++ ) {
362 returnValue[i] = new double[input[i].length];
363
364 for ( int j = 0; j < returnValue[i].length; j++ ) {
365 returnValue[i][j] = input[i][j] * input[i][j];
366 }
367
368 }
369 return returnValue;
370 }
371
372
373
374
375
376
377
378
379
380 public static <R, C> DoubleMatrix<R, C> standardize( DoubleMatrix<R, C> matrix ) {
381 DoubleMatrix<R, C> newMatrix = new DenseDoubleMatrix<>( matrix.rows(), matrix.columns() );
382 newMatrix.setRowNames( matrix.getRowNames() );
383 newMatrix.setColumnNames( matrix.getColNames() );
384 for ( int i = 0; i < matrix.rows(); i++ ) {
385 double[] row = matrix.getRow( i );
386 DoubleArrayList li = new DoubleArrayList( row );
387 DescriptiveWithMissing.standardize( li );
388
389
390
391
392 if ( Math.abs( DescriptiveWithMissing.mean( li ) ) > 0.001 ) {
393 throw new IllegalStateException( "NOT CENTERED" );
394 }
395 if ( Math.abs(
396 DescriptiveWithMissing.sampleVariance( li, DescriptiveWithMissing.mean( li ) ) - 1.0 ) > 0.001 ) {
397 throw new IllegalStateException( "NOT SCALED" );
398 }
399
400 for ( int j = 0; j < matrix.columns(); j++ ) {
401 newMatrix.set( i, j, li.getQuick( j ) );
402 }
403 }
404 return newMatrix;
405 }
406
407
408
409
410
411
412
413 public static DoubleMatrix2D cov2cor( DoubleMatrix2D cov ) {
414 assert cov.rows() == cov.columns();
415 DoubleMatrix1D v = MatrixUtil.diagonal( cov ).assign( Functions.sqrt ).assign( Functions.inv );
416 Algebra solver = new Algebra();
417 DoubleFactory2D f = DoubleFactory2D.sparse;
418 DoubleMatrix2D vd = f.diagonal( v );
419 DoubleMatrix2D cormatrix = solver.mult( solver.mult( vd, cov ), vd );
420 return cormatrix;
421 }
422
423
424
425
426
427
428
429
430 public static <R, C> void unLogTransform( DoubleMatrix<R, C> matrix ) {
431 for ( int j = 0; j < matrix.rows(); j++ ) {
432 DoubleMatrix1D row = matrix.viewRow( j );
433 for ( int i = 0; i < row.size(); i++ ) {
434 row.setQuick( i, Math.pow( 2.0, row.getQuick( i ) ) );
435 }
436 }
437
438 }
439
440 }