1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package ubic.basecode.math.linearmodels;
16
17 import cern.colt.bitvector.BitVector;
18 import cern.colt.list.DoubleArrayList;
19 import cern.colt.matrix.DoubleMatrix1D;
20 import cern.colt.matrix.DoubleMatrix2D;
21 import cern.colt.matrix.impl.DenseDoubleMatrix1D;
22 import cern.colt.matrix.impl.DenseDoubleMatrix2D;
23 import cern.colt.matrix.linalg.Algebra;
24 import cern.jet.math.Functions;
25 import cern.jet.stat.Descriptive;
26 import org.apache.commons.lang3.ArrayUtils;
27 import org.apache.commons.lang3.StringUtils;
28 import org.apache.commons.lang3.time.StopWatch;
29 import org.apache.commons.math3.distribution.FDistribution;
30 import org.apache.commons.math3.distribution.TDistribution;
31 import org.apache.commons.math3.exception.NotStrictlyPositiveException;
32 import org.slf4j.Logger;
33 import org.slf4j.LoggerFactory;
34 import ubic.basecode.dataStructure.matrix.DoubleMatrix;
35 import ubic.basecode.dataStructure.matrix.DoubleMatrixFactory;
36 import ubic.basecode.dataStructure.matrix.MatrixUtil;
37 import ubic.basecode.dataStructure.matrix.ObjectMatrix;
38 import ubic.basecode.math.Constants;
39 import ubic.basecode.math.linalg.QRDecomposition;
40
41 import java.util.*;
42
43
44
45
46
47
48
49
50
51
52 public class LeastSquaresFit {
53
54 private static Logger log = LoggerFactory.getLogger(LeastSquaresFit.class);
55
56
57
58
59 boolean hasBeenShrunken = false;
60
61
62
63
64 private DoubleMatrix2D A;
65
66
67
68
69
70 private List<Integer> assign = new ArrayList<>();
71
72
73
74
75 private List<List<Integer>> assigns = new ArrayList<>();
76
77
78
79
80 private DoubleMatrix2D b;
81
82
83
84
85 private DoubleMatrix2D coefficients = null;
86
87
88
89
90 private DesignMatrix designMatrix;
91
92
93
94
95 private double dfPrior = 0;
96
97
98
99
100 private DoubleMatrix2D fitted;
101
102
103
104
105 private boolean hasIntercept = true;
106
107
108
109
110 private boolean hasMissing = false;
111
112
113
114
115 private QRDecomposition qr = null;
116
117
118
119
120
121
122 private Map<BitVector, QRDecomposition> qrs = new HashMap<>();
123
124
125
126
127
128 private Map<Integer, QRDecomposition> qrsForWeighted = new HashMap<>();
129
130 private int residualDof = -1;
131
132
133
134
135 private List<Integer> residualDofs = new ArrayList<>();
136
137
138
139
140 private DoubleMatrix2D residuals = null;
141
142
143
144
145 private List<String> rowNames;
146
147
148
149
150 private Map<Integer, DoubleMatrix1D> stdevUnscaled = new TreeMap<>();
151
152
153
154
155 private List<String> terms;
156
157
158
159
160 private Map<Integer, BitVector> valuesPresentMap = new HashMap<>();
161
162
163
164
165 private DoubleMatrix1D varPost = null;
166
167
168
169
170 private Double varPrior = null;
171
172
173
174
175 private DoubleMatrix2D weights = null;
176
177
178
179
180
181
182
183 public LeastSquaresFit(DesignMatrix designMatrix, DoubleMatrix<String, String> data) {
184 this.designMatrix = designMatrix;
185 this.A = designMatrix.getDoubleMatrix();
186 this.assign = designMatrix.getAssign();
187 this.terms = designMatrix.getTerms();
188
189 this.rowNames = data.getRowNames();
190 this.b = new DenseDoubleMatrix2D(data.asArray());
191 boolean hasInterceptTerm = this.terms.contains( LinearModelSummary.INTERCEPT_COEFFICIENT_NAME);
192 this.hasIntercept = designMatrix.hasIntercept();
193 assert hasInterceptTerm == this.hasIntercept : diagnosis(null);
194 fit();
195 }
196
197
198
199
200
201
202
203
204 public LeastSquaresFit(DesignMatrix designMatrix, DoubleMatrix<String, String> data,
205 final DoubleMatrix2D weights) {
206 this.designMatrix = designMatrix;
207 DoubleMatrix2D X = designMatrix.getDoubleMatrix();
208 this.assign = designMatrix.getAssign();
209 this.terms = designMatrix.getTerms();
210 this.A = X;
211 this.rowNames = data.getRowNames();
212 this.b = new DenseDoubleMatrix2D(data.asArray());
213 boolean hasInterceptTerm = this.terms.contains( LinearModelSummary.INTERCEPT_COEFFICIENT_NAME);
214 this.hasIntercept = designMatrix.hasIntercept();
215 assert hasInterceptTerm == this.hasIntercept : diagnosis(null);
216 this.weights = weights;
217 fit();
218 }
219
220
221
222
223
224
225
226
227 public LeastSquaresFit(DesignMatrix designMatrix, DoubleMatrix2D b, final DoubleMatrix2D weights) {
228
229 this.designMatrix = designMatrix;
230 DoubleMatrix2D X = designMatrix.getDoubleMatrix();
231 this.assign = designMatrix.getAssign();
232 this.terms = designMatrix.getTerms();
233 this.A = X;
234 this.b = b;
235 boolean hasInterceptTerm = this.terms.contains( LinearModelSummary.INTERCEPT_COEFFICIENT_NAME);
236 this.hasIntercept = designMatrix.hasIntercept();
237 assert hasInterceptTerm == this.hasIntercept : diagnosis(null);
238
239 this.weights = weights;
240
241 fit();
242
243 }
244
245
246
247
248
249
250
251 public LeastSquaresFit(DoubleMatrix1D vectorA, DoubleMatrix1D vectorB) {
252 assert vectorA.size() == vectorB.size();
253
254 this.A = new DenseDoubleMatrix2D(vectorA.size(), 2);
255 this.b = new DenseDoubleMatrix2D(1, vectorB.size());
256
257 for (int i = 0; i < vectorA.size(); i++) {
258 A.set(i, 0, 1);
259 A.set(i, 1, vectorA.get(i));
260 b.set(0, i, vectorB.get(i));
261 }
262
263 fit();
264 }
265
266
267
268
269
270
271
272
273 public LeastSquaresFit(DoubleMatrix1D vectorA, DoubleMatrix1D vectorB, final DoubleMatrix1D weights) {
274
275 assert vectorA.size() == vectorB.size();
276 assert vectorA.size() == weights.size();
277
278 this.A = new DenseDoubleMatrix2D(vectorA.size(), 2);
279 this.b = new DenseDoubleMatrix2D(1, vectorB.size());
280 this.weights = new DenseDoubleMatrix2D(1, weights.size());
281
282 for (int i = 0; i < vectorA.size(); i++) {
283
284 A.set(i, 0, 1);
285 A.set(i, 1, vectorA.get(i));
286 b.set(0, i, vectorB.get(i));
287 this.weights.set(0, i, weights.get(i));
288 }
289
290 fit();
291 }
292
293
294
295
296
297
298
299 public LeastSquaresFit(DoubleMatrix2D A, DoubleMatrix2D b) {
300 this.A = A;
301 this.b = b;
302 fit();
303 }
304
305
306
307
308
309
310
311
312 public LeastSquaresFit(DoubleMatrix2D A, DoubleMatrix2D b, final DoubleMatrix2D weights) {
313 assert A != null;
314 assert b != null;
315 assert A.rows() == b.columns();
316 assert weights == null || b.columns() == weights.columns();
317 assert weights == null || b.rows() == weights.rows();
318
319 this.A = A;
320 this.b = b;
321 this.weights = weights;
322
323 fit();
324
325 }
326
327
328
329
330
331 public LeastSquaresFit(ObjectMatrix<String, String, Object> sampleInfo, DenseDoubleMatrix2D data) {
332
333 this.designMatrix = new DesignMatrix(sampleInfo, true);
334
335 this.hasIntercept = true;
336 this.A = designMatrix.getDoubleMatrix();
337 this.assign = designMatrix.getAssign();
338 this.terms = designMatrix.getTerms();
339
340 this.b = data;
341 fit();
342 }
343
344
345
346
347
348
349 public LeastSquaresFit(ObjectMatrix<String, String, Object> sampleInfo, DenseDoubleMatrix2D data,
350 boolean interactions) {
351 this.designMatrix = new DesignMatrix(sampleInfo, true);
352
353 if (interactions) {
354 addInteraction();
355 }
356
357 this.A = designMatrix.getDoubleMatrix();
358 this.assign = designMatrix.getAssign();
359 this.terms = designMatrix.getTerms();
360
361 this.b = data;
362 fit();
363 }
364
365
366
367
368
369
370
371 public LeastSquaresFit(ObjectMatrix<String, String, Object> design, DoubleMatrix<String, String> b) {
372 this.designMatrix = new DesignMatrix(design, true);
373
374 this.A = designMatrix.getDoubleMatrix();
375 this.assign = designMatrix.getAssign();
376 this.terms = designMatrix.getTerms();
377
378 this.b = new DenseDoubleMatrix2D(b.asArray());
379 this.rowNames = b.getRowNames();
380 fit();
381 }
382
383
384
385
386
387
388
389 public LeastSquaresFit(ObjectMatrix<String, String, Object> design, DoubleMatrix<String, String> data,
390 boolean interactions) {
391 this.designMatrix = new DesignMatrix(design, true);
392
393 if (interactions) {
394 addInteraction();
395 }
396
397 DoubleMatrix2D X = designMatrix.getDoubleMatrix();
398 this.assign = designMatrix.getAssign();
399 this.terms = designMatrix.getTerms();
400 this.A = X;
401 this.b = new DenseDoubleMatrix2D(data.asArray());
402 fit();
403 }
404
405
406
407
408
409
410
411 public DoubleMatrix2D getCoefficients() {
412 return coefficients;
413 }
414
415 public double getDfPrior() {
416 return dfPrior;
417 }
418
419 public DoubleMatrix2D getFitted() {
420 return fitted;
421 }
422
423 public int getResidualDof() {
424 return residualDof;
425 }
426
427 public List<Integer> getResidualDofs() {
428 return residualDofs;
429 }
430
431 public DoubleMatrix2D getResiduals() {
432 return residuals;
433 }
434
435
436
437
438 public DoubleMatrix2D getStudentizedResiduals() {
439 int dof = this.residualDof - 1;
440
441 assert dof > 0;
442
443 if (this.hasMissing) {
444 throw new UnsupportedOperationException("Studentizing not supported with missing values");
445 }
446
447 DoubleMatrix2D result = this.residuals.like();
448
449
450
451
452 DoubleMatrix2D q = this.getQR(0).getQ();
453
454 DoubleMatrix1D hatdiag = new DenseDoubleMatrix1D(residuals.columns());
455 for (int j = 0; j < residuals.columns(); j++) {
456 double hj = q.viewRow(j).aggregate(Functions.plus, Functions.square);
457 if (1.0 - hj < Constants.TINY) {
458 hj = 1.0;
459 }
460 hatdiag.set(j, hj);
461 }
462
463
464
465
466 for (int i = 0; i < residuals.rows(); i++) {
467
468
469
470
471 DoubleMatrix1D residualRow = residuals.viewRow(i);
472
473 if (this.weights != null) {
474
475 DoubleMatrix1D w = weights.viewRow(i).copy().assign(Functions.sqrt);
476 residualRow = residualRow.copy().assign(w, Functions.mult);
477 }
478
479 double sum = residualRow.aggregate(Functions.plus, Functions.square);
480
481 for (int j = 0; j < residualRow.size(); j++) {
482
483 double hj = hatdiag.get(j);
484
485
486 double sigma;
487
488 if (hj < 1.0) {
489 sigma = Math.sqrt((sum - Math.pow(residualRow.get(j), 2) / (1.0 - hj)) / dof);
490 } else {
491 sigma = Math.sqrt(sum / dof);
492 }
493
494 double res = residualRow.getQuick(j);
495 double studres = res / (sigma * Math.sqrt(1.0 - hj));
496
497 if (log.isDebugEnabled()) log.debug("sigma=" + sigma + " hj=" + hj + " stres=" + studres);
498
499 result.set(i, j, studres);
500 }
501 }
502 return result;
503 }
504
505 public DoubleMatrix1D getVarPost() {
506 return varPost;
507 }
508
509 public double getVarPrior() {
510 return varPrior;
511 }
512
513 public DoubleMatrix2D getWeights() {
514 return weights;
515 }
516
517 public boolean isHasBeenShrunken() {
518 return hasBeenShrunken;
519 }
520
521 public boolean isHasMissing() {
522 return hasMissing;
523 }
524
525
526
527
528
529 public List<LinearModelSummary> summarize() {
530 return this.summarize(false);
531 }
532
533
534
535
536
537 public List<LinearModelSummary> summarize(boolean anova) {
538 List<LinearModelSummary> lmsresults = new ArrayList<>();
539
540 List<GenericAnovaResult> anovas = null;
541 if (anova) {
542 anovas = this.anova();
543 }
544
545 StopWatch timer = new StopWatch();
546 timer.start();
547 log.info("Summarizing");
548 for (int i = 0; i < this.coefficients.columns(); i++) {
549 LinearModelSummaryImpl lms = summarize(i);
550 lms.setAnova(anovas != null ? anovas.get(i) : null);
551 lmsresults.add(lms);
552 if (timer.getTime() > 10000 && i > 0 && i % 10000 == 0) {
553 log.info("Summarized " + i);
554 }
555 }
556 log.info("Summzarized " + this.coefficients.columns() + " results");
557
558 return lmsresults;
559 }
560
561
562
563
564
565
566
567 public Map<String, LinearModelSummary> summarizeByKeys(boolean anova) {
568 List<LinearModelSummary> summaries = this.summarize(anova);
569 Map<String, LinearModelSummary> result = new LinkedHashMap<>();
570 for (LinearModelSummary lms : summaries) {
571 if (StringUtils.isBlank(lms.getKey())) {
572
573
574
575 throw new IllegalStateException("Key must not be blank");
576 }
577
578 if (result.containsKey(lms.getKey())) {
579 throw new IllegalStateException("Duplicate key " + lms.getKey());
580 }
581 result.put(lms.getKey(), lms);
582 }
583 return result;
584 }
585
586
587
588
589
590
591
592
593
594
595 protected List<GenericAnovaResult> anova() {
596
597 DoubleMatrix1D ones = new DenseDoubleMatrix1D(residuals.columns());
598 ones.assign(1.0);
599
600
601
602
603 DoubleMatrix1D residualSumsOfSquares;
604
605 if (this.weights == null) {
606 residualSumsOfSquares = MatrixUtil.multWithMissing(residuals.copy().assign(Functions.square),
607 ones);
608 } else {
609 residualSumsOfSquares = MatrixUtil.multWithMissing(
610 residuals.copy().assign(this.weights.copy().assign(Functions.sqrt), Functions.mult).assign(Functions.square),
611 ones);
612 }
613
614 DoubleMatrix2D effects = null;
615 if (this.hasMissing || this.weights != null) {
616 effects = new DenseDoubleMatrix2D(this.b.rows(), this.A.columns());
617 effects.assign(Double.NaN);
618 for (int i = 0; i < this.b.rows(); i++) {
619 QRDecomposition qrd = this.getQR(i);
620 if (qrd == null) {
621
622 for (int j = 0; j < effects.columns(); j++) {
623 effects.set(i, j, Double.NaN);
624 }
625 continue;
626 }
627
628
629
630
631 DoubleMatrix1D brow = b.viewRow(i);
632 DoubleMatrix1D browWithoutMissing = MatrixUtil.removeMissingOrInfinite(brow);
633
634 DoubleMatrix1D tqty;
635 if (weights != null) {
636 DoubleMatrix1D w = MatrixUtil.removeMissingOrInfinite(brow, this.weights.viewRow(i).copy().assign(Functions.sqrt));
637 assert w.size() == browWithoutMissing.size();
638 DoubleMatrix1D bw = browWithoutMissing.copy().assign(w, Functions.mult);
639 tqty = qrd.effects(bw);
640 } else {
641 tqty = qrd.effects(browWithoutMissing);
642 }
643
644
645 for (int j = 0; j < qrd.getRank(); j++) {
646 effects.set(i, j, tqty.get(j));
647 }
648 }
649
650 } else {
651 assert this.qr != null;
652 effects = qr.effects(this.b.viewDice().copy()).viewDice();
653 }
654
655
656 effects.assign(Functions.square);
657
658
659
660
661 Set<Integer> facs = new TreeSet<>();
662 facs.addAll(assign);
663
664 DoubleMatrix2D ssq = new DenseDoubleMatrix2D(effects.rows(), facs.size() + 1);
665 DoubleMatrix2D dof = new DenseDoubleMatrix2D(effects.rows(), facs.size() + 1);
666 dof.assign(0.0);
667 ssq.assign(0.0);
668 List<Integer> assignToUse = assign;
669
670 for (int i = 0; i < ssq.rows(); i++) {
671
672 ssq.set(i, facs.size(), residualSumsOfSquares.get(i));
673 int rdof;
674 if (this.residualDofs.isEmpty()) {
675 rdof = this.residualDof;
676 } else {
677 rdof = this.residualDofs.get(i);
678 }
679
680
681
682 dof.set(i, facs.size(), rdof);
683
684 if (!assigns.isEmpty()) {
685
686 assignToUse = assigns.get(i);
687 }
688
689
690 DoubleMatrix1D effectsForRow = effects.viewRow(i);
691
692 if (assignToUse.size() != effectsForRow.size()) {
693
694
695
696 log.debug("Check me: effects has missing values");
697 }
698
699 for (int j = 0; j < assignToUse.size(); j++) {
700
701 double valueToAdd = effectsForRow.get(j);
702 int col = assignToUse.get(j);
703 if (col > 0 && !this.hasIntercept) {
704 col = col - 1;
705 }
706
707
708
709
710
711
712 if (!Double.isNaN(valueToAdd) && valueToAdd > Constants.SMALL) {
713 ssq.set(i, col, ssq.get(i, col) + valueToAdd);
714 dof.set(i, col, dof.get(i, col) + 1);
715 }
716 }
717 }
718
719 DoubleMatrix1D denominator;
720 if (this.hasBeenShrunken) {
721 denominator = this.varPost.copy();
722 } else {
723 if (this.residualDofs.isEmpty()) {
724
725 denominator = residualSumsOfSquares.copy().assign(Functions.div(residualDof));
726 } else {
727 denominator = new DenseDoubleMatrix1D(residualSumsOfSquares.size());
728 for (int i = 0; i < residualSumsOfSquares.size(); i++) {
729 denominator.set(i, residualSumsOfSquares.get(i) / residualDofs.get(i));
730 }
731 }
732 }
733
734
735 DoubleMatrix2D fStats = ssq.copy().assign(dof, Functions.div);
736 DoubleMatrix2D pvalues = fStats.like();
737 computeStats(dof, fStats, denominator, pvalues);
738
739 return summarizeAnova(ssq, dof, fStats, pvalues);
740 }
741
742
743
744
745
746
747
748
749 protected void ebayesUpdate(double d, double v, DoubleMatrix1D vp) {
750 this.dfPrior = d;
751 this.varPrior = v;
752 this.varPost = vp;
753 this.hasBeenShrunken = true;
754 }
755
756
757
758
759
760
761
762
763
764
765
766
767 LinearModelSummaryImpl summarize(int i) {
768
769 String key = null;
770 if (this.rowNames != null) {
771 key = this.rowNames.get(i);
772 if (key == null) log.warn("Key null at " + i);
773 }
774
775 QRDecomposition qrd = null;
776 qrd = this.getQR(i);
777
778 if (qrd == null) {
779 log.debug("QR was null for item " + i);
780 return new LinearModelSummaryImpl(key);
781 }
782
783 int rdf;
784 if (this.residualDofs.isEmpty()) {
785 rdf = this.residualDof;
786 } else {
787 rdf = this.residualDofs.get(i);
788 }
789 assert !Double.isNaN(rdf);
790
791 if (rdf == 0) {
792 return new LinearModelSummaryImpl(key);
793 }
794
795 DoubleMatrix1D resid = MatrixUtil.removeMissingOrInfinite(this.residuals.viewRow(i));
796 DoubleMatrix1D f = MatrixUtil.removeMissingOrInfinite(fitted.viewRow(i));
797
798 DoubleMatrix1D rweights = null;
799 DoubleMatrix1D sqrtweights = null;
800 if (this.weights != null) {
801 rweights = MatrixUtil.removeMissingOrInfinite(fitted.viewRow(i), this.weights.viewRow(i).copy());
802 sqrtweights = rweights.copy().assign(Functions.sqrt);
803 } else {
804 rweights = new DenseDoubleMatrix1D(f.size()).assign(1.0);
805 sqrtweights = rweights.copy();
806 }
807
808 DoubleMatrix1D allCoef = coefficients.viewColumn(i);
809 DoubleMatrix1D estCoef = MatrixUtil.removeMissingOrInfinite(allCoef);
810
811 if (estCoef.size() == 0) {
812 log.warn("No coefficients estimated for row " + i + this.diagnosis(qrd));
813 log.info("Data for this row:\n" + this.b.viewRow(i));
814 return new LinearModelSummaryImpl(key);
815 }
816
817 int rank = qrd.getRank();
818 int n = qrd.getQ().rows();
819 assert rdf == n - rank : "Rank was not correct, expected " + rdf + " but got Q rows=" + n + ", #Coef=" + rank
820 + diagnosis(qrd);
821
822
823
824
825
826
827
828
829
830
831
832
833
834 double mss;
835
836 if (weights != null) {
837
838 if (hasIntercept) {
839
840 double m = f.copy().assign(Functions.div(rweights.zSum())).assign(rweights, Functions.mult).zSum();
841
842 mss = f.copy().assign(Functions.minus(m)).assign(Functions.square).assign(rweights, Functions.mult).zSum();
843 } else {
844 mss = f.copy().assign(Functions.square).assign(rweights, Functions.mult).zSum();
845 }
846
847 assert resid.size() == rweights.size();
848 } else {
849 if (hasIntercept) {
850 mss = f.copy().assign(Functions.minus(Descriptive.mean(new DoubleArrayList(f.toArray()))))
851 .assign(Functions.square).zSum();
852 } else {
853 mss = f.copy().assign(Functions.square).zSum();
854 }
855 }
856
857 double rss = resid.copy().assign(Functions.square).assign(rweights, Functions.mult).zSum();
858 if (weights != null) resid = resid.copy().assign(sqrtweights, Functions.mult);
859
860 double resvar = rss / rdf;
861
862
863 DoubleMatrix<String, String> summaryTable = DoubleMatrixFactory.dense(allCoef.size(), 4);
864 summaryTable.assign(Double.NaN);
865 summaryTable
866 .setColumnNames(Arrays.asList(new String[]{"Estimate", "Std. Error", "t value", "Pr(>|t|)"}));
867
868
869
870 DoubleMatrix2D XtXi = qrd.chol2inv();
871
872
873 DoubleMatrix1D sdUnscaled = MatrixUtil.diagonal(XtXi).assign(Functions.sqrt);
874
875
876
877
878 this.stdevUnscaled.put(i, sdUnscaled);
879
880 DoubleMatrix1D sdScaled = MatrixUtil
881 .removeMissingOrInfinite(MatrixUtil.diagonal(XtXi).assign(Functions.mult(resvar))
882 .assign(Functions.sqrt));
883
884
885
886 DoubleMatrix1D effects = qrd.effects(MatrixUtil.removeMissingOrInfinite(this.b.viewRow(i).copy()).assign(sqrtweights, Functions.mult));
887
888
889
890
891
892
893
894
895
896 double sigma = Math.sqrt(
897 effects.copy().viewPart(rank, effects.size() - rank).aggregate(Functions.plus, Functions.square) / (effects.size() - rank));
898
899
900
901
902
903 DoubleMatrix1D tstats;
904 TDistribution tdist;
905 if (this.hasBeenShrunken) {
906
907
908
909
910 tstats = estCoef.copy().assign(sdUnscaled, Functions.div).assign(
911 Functions.div(Math.sqrt(this.varPost.get(i))));
912
913
914
915
916
917
918
919
920
921 double dfTotal = rdf + this.dfPrior;
922
923 assert !Double.isNaN(dfTotal);
924 tdist = new TDistribution(dfTotal);
925 } else {
926
927
928
929
930
931 tstats = estCoef.copy().assign(sdScaled, Functions.div);
932 tdist = new TDistribution(rdf);
933 }
934
935 int j = 0;
936 for (int ti = 0; ti < allCoef.size(); ti++) {
937 double c = allCoef.get(ti);
938 assert this.designMatrix != null;
939 List<String> colNames = this.designMatrix.getMatrix().getColNames();
940
941 String dmcolname;
942 if (colNames == null) {
943 dmcolname = "Column_" + ti;
944 } else {
945 dmcolname = colNames.get(ti);
946 }
947
948 summaryTable.addRowName(dmcolname);
949 if (Double.isNaN(c)) {
950 continue;
951 }
952
953 summaryTable.set(ti, 0, estCoef.get(j));
954 summaryTable.set(ti, 1, sdUnscaled.get(j));
955 summaryTable.set(ti, 2, tstats.get(j));
956
957 double pval = 2.0 * (1.0 - tdist.cumulativeProbability(Math.abs(tstats.get(j))));
958 summaryTable.set(ti, 3, pval);
959
960 j++;
961
962 }
963
964 double rsquared = 0.0;
965 double adjRsquared = 0.0;
966 double fstatistic = 0.0;
967 int numdf = 0;
968 int dendf = 0;
969
970 if (terms.size() > 1 || !hasIntercept) {
971 int dfint = hasIntercept ? 1 : 0;
972 rsquared = mss / (mss + rss);
973 adjRsquared = 1 - (1 - rsquared) * ((n - dfint) / (double) rdf);
974
975 fstatistic = mss / (rank - dfint) / resvar;
976
977
978 numdf = rank - dfint;
979 dendf = rdf;
980
981 } else {
982
983 rsquared = 0.0;
984 adjRsquared = 0.0;
985 }
986
987
988
989 LinearModelSummaryImplelSummaryImpl.html#LinearModelSummaryImpl">LinearModelSummaryImpl lms = new LinearModelSummaryImpl( key, allCoef.toArray(), resid.toArray(), terms,
990 summaryTable, effects.toArray(), sdUnscaled.toArray(), rsquared, adjRsquared, fstatistic, numdf, dendf,
991 null, sigma, this.hasBeenShrunken, this.dfPrior );
992
993 return lms;
994 }
995
996
997
998
999 private void addInteraction() {
1000 if (designMatrix.getTerms().size() == 1) {
1001 throw new IllegalArgumentException("Need at least two factors for interactions");
1002 }
1003 if (designMatrix.getTerms().size() != 2) {
1004 throw new UnsupportedOperationException("Interactions not supported for more than two factors");
1005 }
1006 this.designMatrix.addInteraction(designMatrix.getTerms().get(0), designMatrix.getTerms().get(1));
1007 }
1008
1009
1010
1011
1012
1013
1014
1015
1016 private void addQR(Integer row, BitVector valuesPresent, QRDecomposition newQR) {
1017
1018
1019
1020
1021
1022
1023 if (this.weights != null) {
1024 this.qrsForWeighted.put(row, newQR);
1025 return;
1026 }
1027
1028 assert row != null;
1029
1030 if (valuesPresent == null) {
1031 valuesPresentMap.put(row, null);
1032 }
1033
1034 QRDecomposition cachedQr = qrs.get(valuesPresent);
1035 if (cachedQr == null) {
1036 qrs.put(valuesPresent, newQR);
1037 }
1038 valuesPresentMap.put(row, valuesPresent);
1039 }
1040
1041
1042
1043
1044 private void checkForMissingValues() {
1045 for (int i = 0; i < b.rows(); i++) {
1046 for (int j = 0; j < b.columns(); j++) {
1047 double v = b.get(i, j);
1048 if (Double.isNaN(v) || Double.isInfinite(v)) {
1049 this.hasMissing = true;
1050 log.info("Data has missing values (at row=" + (i + 1) + " column=" + (j + 1));
1051 break;
1052 }
1053 }
1054 if (this.hasMissing) break;
1055 }
1056 }
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071 private DoubleMatrix2D cleanDesign(final DoubleMatrix2D design, int ypsize, List<Integer> droppedColumns) {
1072
1073
1074
1075
1076 for (int j = 0; j < design.columns(); j++) {
1077 if (j == 0 && this.hasIntercept) continue;
1078 double lastValue = Double.NaN;
1079 boolean constant = true;
1080 for (int i = 0; i < design.rows(); i++) {
1081 double thisvalue = design.get(i, j);
1082 if (i > 0 && thisvalue != lastValue) {
1083 constant = false;
1084 break;
1085 }
1086 lastValue = thisvalue;
1087 }
1088 if (constant) {
1089 log.debug("Dropping constant column " + j);
1090 droppedColumns.add(j);
1091 continue;
1092 }
1093
1094 DoubleMatrix1D col = design.viewColumn(j);
1095
1096 for (int p = 0; p < j; p++) {
1097 boolean redundant = true;
1098 DoubleMatrix1D otherCol = design.viewColumn(p);
1099 for (int v = 0; v < col.size(); v++) {
1100 if (col.get(v) != otherCol.get(v)) {
1101 redundant = false;
1102 break;
1103 }
1104 }
1105 if (redundant) {
1106 log.debug("Dropping redundant column " + j);
1107 droppedColumns.add(j);
1108 break;
1109 }
1110 }
1111
1112 }
1113
1114 DoubleMatrix2D returnValue = MatrixUtil.dropColumns(design, droppedColumns);
1115
1116 return returnValue;
1117 }
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 private void computeStats(DoubleMatrix2D dof, DoubleMatrix2D fStats, DoubleMatrix1D denominator,
1128 DoubleMatrix2D pvalues) {
1129 pvalues.assign(Double.NaN);
1130 int timesWarned = 0;
1131 for (int i = 0; i < fStats.rows(); i++) {
1132
1133 int rdof;
1134 if (this.residualDofs.isEmpty()) {
1135 rdof = residualDof;
1136 } else {
1137 rdof = this.residualDofs.get(i);
1138 }
1139
1140 for (int j = 0; j < fStats.columns(); j++) {
1141
1142 double ndof = dof.get(i, j);
1143
1144 if (ndof <= 0 || rdof <= 0) {
1145 pvalues.set(i, j, Double.NaN);
1146 fStats.set(i, j, Double.NaN);
1147 continue;
1148 }
1149
1150 if (j == fStats.columns() - 1) {
1151
1152 pvalues.set(i, j, Double.NaN);
1153 fStats.set(i, j, Double.NaN);
1154 continue;
1155 }
1156
1157
1158
1159
1160 if (fStats.get(i, j) < Constants.SMALLISH && denominator.get(i) < Constants.SMALLISH) {
1161 pvalues.set(i, j, Double.NaN);
1162 fStats.set(i, j, Double.NaN);
1163 continue;
1164 }
1165
1166 fStats.set(i, j, fStats.get(i, j) / denominator.get(i));
1167 try {
1168 FDistribution pf = new FDistribution(ndof, rdof + this.dfPrior);
1169 pvalues.set(i, j, 1.0 - pf.cumulativeProbability(fStats.get(i, j)));
1170 } catch (NotStrictlyPositiveException e) {
1171 if (timesWarned < 10) {
1172 log.warn("Pvalue could not be computed for F=" + fStats.get(i, j) + "; denominator was="
1173 + denominator.get(i) + "; Error: " + e.getMessage()
1174 + " (limited warnings of this type will be given)");
1175 timesWarned++;
1176 }
1177 pvalues.set(i, j, Double.NaN);
1178 }
1179
1180 }
1181 }
1182 }
1183
1184
1185
1186
1187
1188 private String diagnosis(QRDecomposition qrd) {
1189 StringBuilder buf = new StringBuilder();
1190 buf.append("\n--------\nLM State\n--------\n");
1191 buf.append("hasMissing=" + this.hasMissing + "\n");
1192 buf.append("hasIntercept=" + this.hasIntercept + "\n");
1193 buf.append("Design: " + this.designMatrix + "\n");
1194 if (this.b.rows() < 5) {
1195 buf.append("Data matrix: " + this.b + "\n");
1196 } else {
1197 buf.append("Data (first few rows): " + this.b.viewSelection(new int[]{0, 1, 2, 3, 4}, null) + "\n");
1198
1199 }
1200 buf.append("Current QR:" + qrd + "\n");
1201 return buf.toString();
1202 }
1203
1204
1205
1206
1207 private void fit() {
1208 if (this.weights == null) {
1209 lsf();
1210 return;
1211 }
1212 wlsf();
1213 }
1214
1215
1216
1217
1218
1219 private QRDecomposition getQR(BitVector valuesPresent) {
1220 assert this.weights == null;
1221 return qrs.get(valuesPresent);
1222 }
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234 private QRDecomposition getQR(Integer row) {
1235 if (!this.hasMissing && this.weights == null) {
1236 return this.qr;
1237 }
1238
1239 if (this.weights != null)
1240 return qrsForWeighted.get(row);
1241
1242 assert this.hasMissing;
1243 BitVector key = valuesPresentMap.get(row);
1244 if (key == null) return null;
1245 return qrs.get(key);
1246
1247 }
1248
1249
1250
1251
1252 private void lsf() {
1253
1254 assert this.weights == null;
1255
1256 checkForMissingValues();
1257 Algebra solver = new Algebra();
1258
1259 if (this.hasMissing) {
1260 double[][] rawResult = new double[b.rows()][];
1261 for (int i = 0; i < b.rows(); i++) {
1262
1263 DoubleMatrix1D row = b.viewRow(i);
1264 if (row.size() < 3) {
1265 rawResult[i] = new double[A.columns()];
1266 continue;
1267 }
1268 DoubleMatrix1D withoutMissing = lsfWmissing(i, row, A);
1269 if (withoutMissing == null) {
1270 rawResult[i] = new double[A.columns()];
1271 } else {
1272 rawResult[i] = withoutMissing.toArray();
1273 }
1274 }
1275 this.coefficients = new DenseDoubleMatrix2D(rawResult).viewDice();
1276
1277 } else {
1278
1279 this.qr = new QRDecomposition(A);
1280 this.coefficients = qr.solve(solver.transpose(b));
1281 this.residualDof = b.columns() - qr.getRank();
1282 if (residualDof <= 0) {
1283 throw new IllegalArgumentException(
1284 "No residual degrees of freedom to fit the model" + diagnosis(qr));
1285 }
1286
1287 }
1288 assert this.assign.isEmpty() || this.assign.size() == this.coefficients.rows() : assign.size()
1289 + " != # coefficients " + this.coefficients.rows();
1290
1291 assert this.coefficients.rows() == A.columns();
1292
1293
1294 this.fitted = solver.transpose(MatrixUtil.multWithMissing(A, coefficients));
1295
1296 if (this.hasMissing) {
1297 MatrixUtil.maskMissing(b, fitted);
1298 }
1299
1300 this.residuals = b.copy().assign(fitted, Functions.minus);
1301 }
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314 private DoubleMatrix1D lsfWmissing(Integer row, DoubleMatrix1D y, DoubleMatrix2D des) {
1315 Algebra solver = new Algebra();
1316
1317
1318 List<Double> ywithoutMissingList = new ArrayList<>(y.size());
1319 int size = y.size();
1320 boolean hasAssign = !this.assign.isEmpty();
1321 int countNonMissing = 0;
1322 for (int i = 0; i < size; i++) {
1323 double v = y.getQuick(i);
1324 if (!Double.isNaN(v) && !Double.isInfinite(v)) {
1325 countNonMissing++;
1326 }
1327 }
1328
1329 if (countNonMissing < 3) {
1330
1331
1332
1333 DoubleMatrix1D re = new DenseDoubleMatrix1D(des.columns());
1334 re.assign(Double.NaN);
1335 log.debug("Not enough non-missing values");
1336 this.addQR(row, null, null);
1337 this.residualDofs.add(countNonMissing - des.columns());
1338 if (hasAssign) this.assigns.add(new ArrayList<Integer>());
1339 return re;
1340 }
1341
1342 double[][] rawDesignWithoutMissing = new double[countNonMissing][];
1343 int index = 0;
1344 boolean missing = false;
1345
1346 BitVector bv = new BitVector(size);
1347 for (int i = 0; i < size; i++) {
1348 double yi = y.getQuick(i);
1349 if (Double.isNaN(yi) || Double.isInfinite(yi)) {
1350 missing = true;
1351 continue;
1352 }
1353 ywithoutMissingList.add(yi);
1354 bv.set(i);
1355 rawDesignWithoutMissing[index++] = des.viewRow(i).toArray();
1356 }
1357 double[] yWithoutMissing = ArrayUtils.toPrimitive(ywithoutMissingList.toArray(new Double[]{}));
1358 DenseDoubleMatrix2D yWithoutMissingAsMatrix = new DenseDoubleMatrix2D(new double[][]{yWithoutMissing});
1359
1360 DoubleMatrix2D designWithoutMissing = new DenseDoubleMatrix2D(rawDesignWithoutMissing);
1361
1362 boolean fail = false;
1363 List<Integer> droppedColumns = new ArrayList<>();
1364 designWithoutMissing = this.cleanDesign(designWithoutMissing, yWithoutMissingAsMatrix.size(), droppedColumns);
1365
1366 if (designWithoutMissing.columns() == 0 || designWithoutMissing.columns() > designWithoutMissing.rows()) {
1367 fail = true;
1368 }
1369
1370 if (fail) {
1371 DoubleMatrix1D re = new DenseDoubleMatrix1D(des.columns());
1372 re.assign(Double.NaN);
1373 this.addQR(row, null, null);
1374 this.residualDofs.add(countNonMissing - des.columns());
1375 if (hasAssign) this.assigns.add(new ArrayList<Integer>());
1376 return re;
1377 }
1378
1379 QRDecomposition rqr = null;
1380 if (this.weights != null) {
1381 rqr = new QRDecomposition(designWithoutMissing);
1382 addQR(row, null, rqr);
1383 } else if (missing) {
1384 rqr = this.getQR(bv);
1385 if (rqr == null) {
1386 rqr = new QRDecomposition(designWithoutMissing);
1387 addQR(row, bv, rqr);
1388 }
1389 } else {
1390
1391
1392 if (this.qr == null) {
1393 rqr = new QRDecomposition(des);
1394 } else {
1395
1396 rqr = this.qr;
1397 }
1398 }
1399
1400 this.addQR(row, bv, rqr);
1401
1402 int pivots = rqr.getRank();
1403
1404 int rdof = yWithoutMissingAsMatrix.size() - pivots;
1405 this.residualDofs.add(rdof);
1406
1407 DoubleMatrix2D coefs = rqr.solve(solver.transpose(yWithoutMissingAsMatrix));
1408
1409
1410
1411
1412 if (designWithoutMissing.columns() < des.columns()) {
1413 DoubleMatrix1D col = coefs.viewColumn(0);
1414 DoubleMatrix1D result = new DenseDoubleMatrix1D(des.columns());
1415 result.assign(Double.NaN);
1416 int k = 0;
1417 List<Integer> assignForRow = new ArrayList<>();
1418 for (int i = 0; i < des.columns(); i++) {
1419 if (droppedColumns.contains(i)) {
1420
1421 continue;
1422 }
1423
1424 if (hasAssign) assignForRow.add(this.assign.get(i));
1425 assert k < col.size();
1426 result.set(i, col.get(k));
1427 k++;
1428 }
1429 if (hasAssign) assigns.add(assignForRow);
1430 return result;
1431 }
1432 if (hasAssign) assigns.add(this.assign);
1433 return coefs.viewColumn(0);
1434
1435 }
1436
1437
1438
1439
1440
1441
1442
1443
1444 private List<GenericAnovaResult> summarizeAnova(DoubleMatrix2D ssq, DoubleMatrix2D dof, DoubleMatrix2D fStats,
1445 DoubleMatrix2D pvalues) {
1446
1447 assert ssq != null;
1448 assert dof != null;
1449 assert fStats != null;
1450 assert pvalues != null;
1451
1452 List<GenericAnovaResult> results = new ArrayList<>();
1453 for (int i = 0; i < fStats.rows(); i++) {
1454 Collection<AnovaEffect> efs = new ArrayList<>();
1455
1456
1457
1458
1459 for (int j = 0; j < fStats.columns() - 1; j++) {
1460 String effectName = terms.get(j);
1461 assert effectName != null;
1462 AnovaEffectls/AnovaEffect.html#AnovaEffect">AnovaEffect ae = new AnovaEffect(effectName, pvalues.get(i, j), fStats.get(i, j), dof.get(
1463 i, j ), ssq.get( i, j ), effectName.contains( ":" ), false );
1464 efs.add(ae);
1465 }
1466
1467
1468
1469
1470 int residCol = fStats.columns() - 1;
1471 AnovaEffectls/AnovaEffect.html#AnovaEffect">AnovaEffect ae = new AnovaEffect( "Residual", Double.NaN, Double.NaN, dof.get( i, residCol ) + this.dfPrior, ssq.get( i,
1472 residCol ), false, true );
1473 efs.add(ae);
1474
1475 GenericAnovaResultImplnovaResultImpl.html#GenericAnovaResultImpl">GenericAnovaResultImpl ao = new GenericAnovaResultImpl( this.rowNames != null ? this.rowNames.get( i ) : String.valueOf( i ), efs );
1476 results.add(ao);
1477 }
1478 return results;
1479 }
1480
1481
1482
1483
1484 private void wlsf() {
1485
1486 assert this.weights != null;
1487
1488 checkForMissingValues();
1489 Algebra solver = new Algebra();
1490
1491
1492
1493
1494 List<DoubleMatrix2D> AwList = new ArrayList<>(b.rows());
1495 List<DoubleMatrix1D> bList = new ArrayList<>(b.rows());
1496
1497
1498
1499
1500
1501
1502
1503
1504 for (int i = 0; i < b.rows(); i++) {
1505 DoubleMatrix1D wts = this.weights.viewRow(i).copy().assign(Functions.sqrt);
1506 DoubleMatrix1D bw = b.viewRow(i).copy().assign(wts, Functions.mult);
1507 DoubleMatrix2D Aw = A.copy();
1508 for (int j = 0; j < Aw.columns(); j++) {
1509 Aw.viewColumn(j).assign(wts, Functions.mult);
1510 }
1511 AwList.add(Aw);
1512 bList.add(bw);
1513 }
1514
1515 double[][] rawResult = new double[b.rows()][];
1516
1517 if (this.hasMissing) {
1518
1519
1520
1521 for (int i = 0; i < b.rows(); i++) {
1522 DoubleMatrix1D bw = bList.get(i);
1523 DoubleMatrix2D Aw = AwList.get(i);
1524 DoubleMatrix1D withoutMissing = lsfWmissing(i, bw, Aw);
1525 if (withoutMissing == null) {
1526 rawResult[i] = new double[A.columns()];
1527 } else {
1528 rawResult[i] = withoutMissing.toArray();
1529 }
1530 }
1531
1532 } else {
1533
1534
1535
1536 for (int i = 0; i < b.rows(); i++) {
1537 DoubleMatrix1D bw = bList.get(i);
1538 DoubleMatrix2D Aw = AwList.get(i);
1539 DoubleMatrix2D bw2D = new DenseDoubleMatrix2D(1, bw.size());
1540 bw2D.viewRow(0).assign(bw);
1541 QRDecompositionosition.html#QRDecomposition">QRDecomposition wqr = new QRDecomposition(Aw);
1542
1543
1544 this.addQR(i, null, wqr);
1545
1546 rawResult[i] = wqr.solve(solver.transpose(bw2D)).viewColumn(0).toArray();
1547 this.residualDof = bw.size() - wqr.getRank();
1548 assert this.residualDof >= 0;
1549 if (residualDof == 0) {
1550 throw new IllegalArgumentException("No residual degrees of freedom to fit the model"
1551 + diagnosis(wqr));
1552 }
1553 }
1554 }
1555
1556 this.coefficients = solver.transpose(new DenseDoubleMatrix2D(rawResult));
1557
1558 assert this.assign.isEmpty() || this.assign.size() == this.coefficients.rows() : assign.size()
1559 + " != # coefficients " + this.coefficients.rows();
1560 assert this.coefficients.rows() == A.columns();
1561
1562 this.fitted = solver.transpose(MatrixUtil.multWithMissing(A, coefficients));
1563
1564 if (this.hasMissing) {
1565 MatrixUtil.maskMissing(b, fitted);
1566 }
1567
1568 this.residuals = b.copy().assign(fitted, Functions.minus);
1569 }
1570
1571 }