1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.math.linearmodels;
20
21 import java.io.Serializable;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28
29 import org.apache.commons.lang3.ArrayUtils;
30 import org.apache.commons.lang3.StringUtils;
31 import org.apache.commons.math3.distribution.FDistribution;
32 import org.rosuda.REngine.REXP;
33 import org.rosuda.REngine.REXPDouble;
34 import org.rosuda.REngine.REXPMismatchException;
35 import org.rosuda.REngine.RList;
36
37 import ubic.basecode.dataStructure.matrix.DoubleMatrix;
38 import ubic.basecode.dataStructure.matrix.DoubleMatrixFactory;
39
40
41
42
43
44
45
46
47 public class LinearModelSummary implements Serializable {
48
49 public static final String INTERCEPT_COEFFICIENT_NAME = "(Intercept)";
50
51 private static final long serialVersionUID = 1L;
52
53 private Double adjRSquared = Double.NaN;
54
55 private GenericAnovaResult anovaResult;
56
57 private Double[] coefficients = null;
58
59
60
61
62 private DoubleMatrix<String, String> contrastCoefficients = null;
63
64 private Integer denominatorDof = null;
65
66
67
68
69 private Double[] effects = null;
70
71 private List<String> factorNames = null;
72
73 private Map<String, List<String>> factorValueNames = new HashMap<>();
74
75 private String formula = null;
76
77 private Double fStat = Double.NaN;
78
79
80
81
82 private String key = null;
83
84 private Integer numeratorDof = null;
85
86
87
88
89 private Double priorDof = null;
90
91 private Double[] residuals = null;
92
93 private Double rSquared = Double.NaN;
94
95
96
97
98 private boolean shrunken = false;
99
100 private Double sigma = null;
101
102 private Double[] stdevUnscaled;
103
104 private Map<String, Collection<String>> term2CoefficientNames = new HashMap<>();
105
106
107
108
109
110
111
112
113
114 public LinearModelSummary( REXP summaryLm, REXP anova, String[] factorNames ) {
115 try {
116 basicSetup( summaryLm );
117
118 this.factorNames = Arrays.asList( factorNames );
119
120 if ( anova != null ) {
121 this.anovaResult = new GenericAnovaResult( anova );
122 }
123
124 setupCoefficientNames();
125
126 } catch ( REXPMismatchException e ) {
127 throw new RuntimeException( e );
128 }
129
130 }
131
132
133
134
135
136
137 public LinearModelSummary( String key ) {
138 this();
139 this.key = key;
140 }
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158 public LinearModelSummary( String k, Double[] coefficients, Double[] residuals, List<String> terms,
159 DoubleMatrix<String, String> contrastCoefficients, Double[] effects, Double[] stdevUnscaled,
160 double rsquared,
161 double adjRsquared,
162 double fstat, Integer ndof,
163 Integer ddof, GenericAnovaResult anovaResult, double sigma, boolean isShrunken ) {
164
165 this.residuals = residuals;
166 this.coefficients = coefficients;
167 this.contrastCoefficients = contrastCoefficients;
168 this.rSquared = rsquared;
169 this.adjRSquared = adjRsquared;
170 this.fStat = fstat;
171 this.numeratorDof = ndof;
172 this.denominatorDof = ddof;
173 this.key = k;
174 this.anovaResult = anovaResult;
175 this.effects = effects;
176 this.stdevUnscaled = stdevUnscaled;
177 this.factorNames = terms;
178 this.sigma = sigma;
179 this.shrunken = isShrunken;
180
181 if ( anovaResult != null ) {
182 if ( anovaResult.getKey() == null ) {
183 anovaResult.setKey( key );
184 } else {
185 if ( !anovaResult.getKey().equals( key ) ) {
186 throw new IllegalArgumentException( "Keys of ANOVA and holding LM must match" );
187 }
188 }
189 }
190 this.setupCoefficientNames();
191 }
192
193
194
195
196 protected LinearModelSummary() {
197 }
198
199
200
201
202 public Double getAdjRSquared() {
203 return adjRSquared;
204 }
205
206
207
208
209 public GenericAnovaResult getAnova() {
210 return this.anovaResult;
211 }
212
213 public Double[] getCoefficients() {
214 return coefficients;
215 }
216
217
218
219
220
221
222
223
224
225
226 public DoubleMatrix<String, String> getContrastCoefficients() {
227 return contrastCoefficients;
228 }
229
230
231
232
233
234
235 public Map<String, Double> getContrastCoefficients( String factorName ) {
236 Collection<String> terms = term2CoefficientNames.get( factorName );
237
238 if ( terms == null ) return null;
239
240 Map<String, Double> results = new HashMap<>();
241 for ( String term : terms ) {
242 results.put( term, contrastCoefficients.getByKeys( term, "Estimate" ) );
243 }
244
245 return results;
246 }
247
248
249
250
251
252
253
254
255 public Map<String, Double> getContrastCoefficientStderr( String factorName ) {
256 Collection<String> terms = term2CoefficientNames.get( factorName );
257
258 if ( terms == null ) return null;
259
260 Map<String, Double> results = new HashMap<>();
261 for ( String term : terms ) {
262 results.put( term, contrastCoefficients.getByKeys( term, "Std. Error" ) );
263 }
264
265 return results;
266 }
267
268
269
270
271
272
273
274 public Map<String, Double> getContrastPValues( String factorName ) {
275 Collection<String> terms = term2CoefficientNames.get( factorName );
276
277 if ( terms == null ) return null;
278
279 Map<String, Double> results = new HashMap<>();
280 for ( String term : terms ) {
281 results.put( term, contrastCoefficients.getByKeys( term, "Pr(>|t|)" ) );
282 }
283
284 return results;
285 }
286
287
288
289
290
291
292
293 public Map<String, Double> getContrastTStats( String factorName ) {
294 Collection<String> terms = term2CoefficientNames.get( factorName );
295
296 if ( terms == null ) return null;
297
298 Map<String, Double> results = new HashMap<>();
299 for ( String term : terms ) {
300 results.put( term, contrastCoefficients.getByKeys( term, "t value" ) );
301 }
302
303 return results;
304
305 }
306
307
308
309
310 public Double[] getEffects() {
311 return this.effects;
312 }
313
314
315
316
317 public Double getF() {
318 return this.fStat;
319 }
320
321
322
323
324 public List<String> getFactorNames() {
325 return factorNames;
326 }
327
328
329
330
331
332
333
334
335 public List<String> getFactorValueNames( String factorName ) {
336 return factorValueNames.get( factorName );
337 }
338
339
340
341
342 public String getFormula() {
343 return formula;
344 }
345
346
347
348
349
350
351 public Double getInteractionEffectP( String... fnames ) {
352 if ( anovaResult == null ) return Double.NaN;
353 return anovaResult.getInteractionEffectP( fnames );
354 }
355
356
357
358
359 public Double getInterceptCoeff() {
360 if ( contrastCoefficients != null ) {
361 if ( contrastCoefficients.hasRow( INTERCEPT_COEFFICIENT_NAME ) ) {
362 return contrastCoefficients.getByKeys( INTERCEPT_COEFFICIENT_NAME, "Estimate" );
363 } else if ( contrastCoefficients.rows() == 1 ) {
364
365
366
367
368
369 assert contrastCoefficients.getRowName( 0 ).equals( "x1" );
370 return contrastCoefficients.getByKeys( contrastCoefficients.getRowName( 0 ), "Estimate" );
371 }
372 }
373
374 return Double.NaN;
375 }
376
377
378
379
380 public Double getInterceptP() {
381 if ( contrastCoefficients != null ) {
382 if ( contrastCoefficients.hasRow( INTERCEPT_COEFFICIENT_NAME ) ) {
383 return contrastCoefficients.getByKeys( INTERCEPT_COEFFICIENT_NAME, "Pr(>|t|)" );
384 } else if ( contrastCoefficients.rows() == 1 ) {
385
386
387
388
389
390 assert contrastCoefficients.getRowName( 0 ).equals( "x1" );
391 return contrastCoefficients.getByKeys( contrastCoefficients.getRowName( 0 ), "Pr(>|t|)" );
392 }
393 }
394 return Double.NaN;
395 }
396
397
398
399
400 public Double getInterceptT() {
401 if ( contrastCoefficients != null ) {
402 if ( contrastCoefficients.hasRow( INTERCEPT_COEFFICIENT_NAME ) ) {
403 return contrastCoefficients.getByKeys( INTERCEPT_COEFFICIENT_NAME, "t value" );
404 } else if ( contrastCoefficients.rows() == 1 ) {
405
406
407
408
409
410 assert contrastCoefficients.getRowName( 0 ).equals( "x1" );
411 return contrastCoefficients.getByKeys( contrastCoefficients.getRowName( 0 ), "t value" );
412 }
413 }
414
415 return Double.NaN;
416 }
417
418 public String getKey() {
419 return key;
420 }
421
422
423
424
425
426 public Collection<String> getMainEffectFactorNames() {
427 if ( anovaResult == null ) return null;
428 return anovaResult.getMainEffectFactorNames();
429 }
430
431
432
433
434
435
436 public Double getMainEffectP( String factorName ) {
437 if ( anovaResult == null ) return Double.NaN;
438 return anovaResult.getMainEffectP( factorName );
439 }
440
441
442
443
444 public Integer getNumeratorDof() {
445 return this.numeratorDof;
446 }
447
448
449
450
451
452
453 public Double getP() {
454 if ( numeratorDof == null || denominatorDof == null || numeratorDof == 0 || denominatorDof == 0 )
455 return Double.NaN;
456
457 FDistribution f = new FDistribution( numeratorDof, denominatorDof );
458
459 return 1.0 - f.cumulativeProbability( this.getF() );
460
461 }
462
463
464
465
466 public Double getPriorDof() {
467 return priorDof;
468 }
469
470
471
472
473 public Integer getResidualDof() {
474 return this.denominatorDof;
475 }
476
477
478
479
480 public Double[] getResiduals() {
481 return residuals;
482 }
483
484
485
486
487 public Double getRSquared() {
488 return rSquared;
489 }
490
491
492
493
494
495
496 public Double getSigma() {
497 return sigma;
498 }
499
500
501
502
503
504 public Double[] getStdevUnscaled() {
505 return stdevUnscaled;
506 }
507
508
509
510
511
512 public boolean hasInteractions() {
513 if ( anovaResult == null ) return false;
514 return anovaResult.hasInteractions();
515 }
516
517 public boolean isBaseline( String factorValueName ) {
518 return !contrastCoefficients.hasRow( factorValueName );
519 }
520
521
522
523
524
525
526 public boolean isShrunken() {
527 return shrunken;
528 }
529
530
531
532
533 public void setAnova( GenericAnovaResult genericAnovaResult ) {
534 this.anovaResult = genericAnovaResult;
535 }
536
537 public void setKey( String key ) {
538 this.key = key;
539 }
540
541
542
543
544 public void setPriorDof( Double priorDof ) {
545 this.priorDof = priorDof;
546 }
547
548
549
550
551
552
553 @Override
554 public String toString() {
555 StringBuilder buf = new StringBuilder();
556 if ( StringUtils.isNotBlank( this.key ) ) {
557 buf.append( this.key + "\n" );
558 }
559 buf.append( "F=" + String.format( "%.2f", this.fStat ) + " Rsquare=" + String.format( "%.2f", this.rSquared )
560 + "\n" );
561
562 buf.append( "Residuals:\n" );
563 if ( residuals != null ) {
564 for ( Double d : residuals ) {
565 buf.append( String.format( "%.2f ", d ) );
566 }
567 } else {
568 buf.append( "Residuals are null" );
569 }
570
571 buf.append( "\n\nCoefficients:\n" + contrastCoefficients + "\n" );
572 if ( this.anovaResult != null ) {
573 buf.append( this.anovaResult.toString() + "\n" );
574 }
575
576 return buf.toString();
577 }
578
579
580
581
582
583
584 private RList basicSetup( REXP summaryLm ) throws REXPMismatchException {
585 RList li = summaryLm.asList();
586
587 extractCoefficients( li );
588
589 this.residuals = ArrayUtils.toObject( ( ( REXP ) li.get( "residuals" ) ).asDoubles() );
590
591 this.rSquared = ( ( REXP ) li.get( "r.squared" ) ).asDouble();
592
593 this.adjRSquared = ( ( REXP ) li.get( "adj.r.squared" ) ).asDouble();
594
595 REXP fstats = ( REXP ) li.get( "fstatistic" );
596 if ( fstats != null ) {
597 Double[] ff = ArrayUtils.toObject( fstats.asDoubles() );
598 this.fStat = ff[0];
599 this.numeratorDof = ff[1].intValue();
600 this.denominatorDof = ff[2].intValue();
601 }
602
603
604
605
606
607 return li;
608 }
609
610
611
612
613
614 private void extractCoefficients( RList li ) throws REXPMismatchException {
615 REXPDouble coraw = ( REXPDouble ) li.get( "coefficients" );
616
617 RList dimnames = coraw.getAttribute( "dimnames" ).asList();
618 String[] itemnames = ( ( REXP ) dimnames.get( 0 ) ).asStrings();
619 String[] colNames = ( ( REXP ) dimnames.get( 1 ) ).asStrings();
620
621 double[][] coef = coraw.asDoubleMatrix();
622 contrastCoefficients = DoubleMatrixFactory.dense( coef );
623 contrastCoefficients.setRowNames( Arrays.asList( itemnames ) );
624 contrastCoefficients.setColumnNames( Arrays.asList( colNames ) );
625 }
626
627
628
629 private void setupCoefficientNames() {
630
631 for ( String string : factorNames ) {
632 term2CoefficientNames.put( string, new HashSet<String>() );
633 }
634
635 assert this.contrastCoefficients != null;
636
637 List<String> coefRowNames = contrastCoefficients.getRowNames();
638
639 for ( String coefNameFromR : coefRowNames ) {
640
641 if ( coefNameFromR.equals( INTERCEPT_COEFFICIENT_NAME ) ) {
642 continue;
643 } else if ( coefNameFromR.contains( ":" ) ) {
644
645
646
647
648
649
650
651 String cleanedInterationTermName = coefNameFromR.replaceAll( "fv_[0-9]+(?=(:|$))", "" );
652
653 for ( String factorName : factorNames ) {
654 if ( !factorName.contains( ":" ) ) continue;
655
656 if ( factorName.equals( cleanedInterationTermName ) ) {
657 assert term2CoefficientNames.containsKey( factorName );
658 term2CoefficientNames.get( factorName ).add( coefNameFromR );
659 }
660
661 }
662
663 } else {
664
665 for ( String factorName : factorNames ) {
666 if ( coefNameFromR.startsWith( factorName ) ) {
667 assert term2CoefficientNames.containsKey( factorName );
668 term2CoefficientNames.get( factorName ).add( coefNameFromR );
669
670 }
671 }
672 }
673 }
674 }
675
676 }