View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2011 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
7    * the License. You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
12   * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
13   * specific language governing permissions and limitations under the License.
14   */
15  package ubic.basecode.math.linearmodels;
16  
17  import cern.colt.bitvector.BitVector;
18  import cern.colt.list.DoubleArrayList;
19  import cern.colt.matrix.DoubleMatrix1D;
20  import cern.colt.matrix.DoubleMatrix2D;
21  import cern.colt.matrix.impl.DenseDoubleMatrix1D;
22  import cern.colt.matrix.impl.DenseDoubleMatrix2D;
23  import cern.colt.matrix.linalg.Algebra;
24  import cern.jet.math.Functions;
25  import cern.jet.stat.Descriptive;
26  import org.apache.commons.lang3.ArrayUtils;
27  import org.apache.commons.lang3.StringUtils;
28  import org.apache.commons.lang3.time.StopWatch;
29  import org.apache.commons.math3.distribution.FDistribution;
30  import org.apache.commons.math3.distribution.TDistribution;
31  import org.apache.commons.math3.exception.NotStrictlyPositiveException;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  import ubic.basecode.dataStructure.matrix.DoubleMatrix;
35  import ubic.basecode.dataStructure.matrix.DoubleMatrixFactory;
36  import ubic.basecode.dataStructure.matrix.MatrixUtil;
37  import ubic.basecode.dataStructure.matrix.ObjectMatrix;
38  import ubic.basecode.math.Constants;
39  import ubic.basecode.math.linalg.QRDecomposition;
40  
41  import java.util.*;
42  
43  /**
44   * For performing "bulk" linear model fits, but also offers simple methods for simple univariate and multivariate
45   * regression for a single vector of dependent variables (data). Has support for ebayes-like shrinkage of variance.
46   * <p>
47   * Data with missing values is handled but is less memory efficient and somewhat slower. The main cost is that when
48   * there are no missing values, a single QR decomposition can be performed.
49   *
50   * @author paul
51   */
52  public class LeastSquaresFit {
53  
54      private static Logger log = LoggerFactory.getLogger(LeastSquaresFit.class);
55  
56      /**
57       * For ebayes
58       */
59      boolean hasBeenShrunken = false;
60  
61      /**
62       * The (raw) design matrix
63       */
64      private DoubleMatrix2D A;
65  
66      /**
67       * Lists which factors (terms) are associated with which columns of the design matrix; 0 indicates the intercept.
68       * Used for ANOVA
69       */
70      private List<Integer> assign = new ArrayList<>();
71  
72      /**
73       * Row-specific assign values, for use when there are missing values; Used for ANOVA
74       */
75      private List<List<Integer>> assigns = new ArrayList<>();
76  
77      /**
78       * Independent variables - data
79       */
80      private DoubleMatrix2D b;
81  
82      /**
83       * Model fit coefficients (the x in Ax=b)
84       */
85      private DoubleMatrix2D coefficients = null;
86  
87      /**
88       * Original design matrix, if provided or generated from constructor arguments.
89       */
90      private DesignMatrix designMatrix;
91  
92      /**
93       * For ebayes. Default value is zero
94       */
95      private double dfPrior = 0;
96  
97      /**
98       * Fitted values
99       */
100     private DoubleMatrix2D fitted;
101 
102     /**
103      * True if model includes intercept
104      */
105     private boolean hasIntercept = true;
106 
107     /**
108      * True if data has missing values.
109      */
110     private boolean hasMissing = false;
111 
112     /**
113      * QR decomposition of the design matrix; will only be non-null if the QR is the same for all data.
114      */
115     private QRDecomposition qr = null;
116 
117     /**
118      * Used if we have missing values so QR might be different for each row. The key
119      * is the bitvector representing the values present. DO NOT
120      * ACCESS DIRECTLY, use the getQR methods.
121      */
122     private Map<BitVector, QRDecomposition> qrs = new HashMap<>();
123 
124     /**
125      * Used if we are using weighted regresion, so QR is different for each row. DO NOT
126      * ACCESS DIRECTLY, use the getQR methods.
127      */
128     private Map<Integer, QRDecomposition> qrsForWeighted = new HashMap<>();
129 
130     private int residualDof = -1;
131 
132     /**
133      * Used if we have missing value so RDOF might be different for each row (we can actually get away without this)
134      */
135     private List<Integer> residualDofs = new ArrayList<>();
136 
137     /**
138      * Residuals of the fit
139      */
140     private DoubleMatrix2D residuals = null;
141 
142     /**
143      * Optional, but useful
144      */
145     private List<String> rowNames;
146 
147     /**
148      * Map of data rows to sdUnscaled (a la limma) Use of treemap: probably not necessary.
149      */
150     private Map<Integer, DoubleMatrix1D> stdevUnscaled = new TreeMap<>();
151 
152     /**
153      * Names of the factors (terms)
154      */
155     private List<String> terms;
156 
157     /**
158      * Map of row indices to values-present key.
159      */
160     private Map<Integer, BitVector> valuesPresentMap = new HashMap<>();
161 
162     /**
163      * ebayes per-item variance estimates (if computed, null otherwise)
164      */
165     private DoubleMatrix1D varPost = null;
166 
167     /**
168      * prior variance estimate (if computed, null otherwise)
169      */
170     private Double varPrior = null;
171 
172     /*
173      * For weighted regression
174      */
175     private DoubleMatrix2D weights = null;
176 
177     /**
178      * Preferred interface if you want control over how the design is set up.
179      *
180      * @param designMatrix
181      * @param data
182      */
183     public LeastSquaresFit(DesignMatrix designMatrix, DoubleMatrix<String, String> data) {
184         this.designMatrix = designMatrix;
185         this.A = designMatrix.getDoubleMatrix();
186         this.assign = designMatrix.getAssign();
187         this.terms = designMatrix.getTerms();
188 
189         this.rowNames = data.getRowNames();
190         this.b = new DenseDoubleMatrix2D(data.asArray());
191         boolean hasInterceptTerm = this.terms.contains( LinearModelSummary.INTERCEPT_COEFFICIENT_NAME);
192         this.hasIntercept = designMatrix.hasIntercept();
193         assert hasInterceptTerm == this.hasIntercept : diagnosis(null);
194         fit();
195     }
196 
197     /**
198      * Weighted least squares fit between two matrices
199      *
200      * @param designMatrix
201      * @param data
202      * @param weights      to be used in modifying the influence of the observations in data.
203      */
204     public LeastSquaresFit(DesignMatrix designMatrix, DoubleMatrix<String, String> data,
205                            final DoubleMatrix2D weights) {
206         this.designMatrix = designMatrix;
207         DoubleMatrix2D X = designMatrix.getDoubleMatrix();
208         this.assign = designMatrix.getAssign();
209         this.terms = designMatrix.getTerms();
210         this.A = X;
211         this.rowNames = data.getRowNames();
212         this.b = new DenseDoubleMatrix2D(data.asArray());
213         boolean hasInterceptTerm = this.terms.contains( LinearModelSummary.INTERCEPT_COEFFICIENT_NAME);
214         this.hasIntercept = designMatrix.hasIntercept();
215         assert hasInterceptTerm == this.hasIntercept : diagnosis(null);
216         this.weights = weights;
217         fit();
218     }
219 
220     /**
221      * Preferred interface for weighted least squares fit between two matrices
222      *
223      * @param designMatrix
224      * @param b            the data
225      * @param weights      to be used in modifying the influence of the observations in vectorB.
226      */
227     public LeastSquaresFit(DesignMatrix designMatrix, DoubleMatrix2D b, final DoubleMatrix2D weights) {
228 
229         this.designMatrix = designMatrix;
230         DoubleMatrix2D X = designMatrix.getDoubleMatrix();
231         this.assign = designMatrix.getAssign();
232         this.terms = designMatrix.getTerms();
233         this.A = X;
234         this.b = b;
235         boolean hasInterceptTerm = this.terms.contains( LinearModelSummary.INTERCEPT_COEFFICIENT_NAME);
236         this.hasIntercept = designMatrix.hasIntercept();
237         assert hasInterceptTerm == this.hasIntercept : diagnosis(null);
238 
239         this.weights = weights;
240 
241         fit();
242 
243     }
244 
245     /**
246      * Least squares fit between two vectors. Always adds an intercept!
247      *
248      * @param vectorA Design
249      * @param vectorB Data
250      */
251     public LeastSquaresFit(DoubleMatrix1D vectorA, DoubleMatrix1D vectorB) {
252         assert vectorA.size() == vectorB.size();
253 
254         this.A = new DenseDoubleMatrix2D(vectorA.size(), 2);
255         this.b = new DenseDoubleMatrix2D(1, vectorB.size());
256 
257         for (int i = 0; i < vectorA.size(); i++) {
258             A.set(i, 0, 1);
259             A.set(i, 1, vectorA.get(i));
260             b.set(0, i, vectorB.get(i));
261         }
262 
263         fit();
264     }
265 
266     /**
267      * Stripped-down interface for simple use. Least squares fit between two vectors. Always adds an intercept!
268      *
269      * @param vectorA Design
270      * @param vectorB Data
271      * @param weights to be used in modifying the influence of the observations in vectorB.
272      */
273     public LeastSquaresFit(DoubleMatrix1D vectorA, DoubleMatrix1D vectorB, final DoubleMatrix1D weights) {
274 
275         assert vectorA.size() == vectorB.size();
276         assert vectorA.size() == weights.size();
277 
278         this.A = new DenseDoubleMatrix2D(vectorA.size(), 2);
279         this.b = new DenseDoubleMatrix2D(1, vectorB.size());
280         this.weights = new DenseDoubleMatrix2D(1, weights.size());
281 
282         for (int i = 0; i < vectorA.size(); i++) {
283             //   double ws = Math.sqrt( weights.get( i ) );
284             A.set(i, 0, 1);
285             A.set(i, 1, vectorA.get(i));
286             b.set(0, i, vectorB.get(i));
287             this.weights.set(0, i, weights.get(i));
288         }
289 
290         fit();
291     }
292 
293     /**
294      * ANOVA not possible (use the other constructors)
295      *
296      * @param A Design matrix, which will be used directly in least squares regression
297      * @param b Data matrix, containing data in rows.
298      */
299     public LeastSquaresFit(DoubleMatrix2D A, DoubleMatrix2D b) {
300         this.A = A;
301         this.b = b;
302         fit();
303     }
304 
305     /**
306      * Weighted least squares fit between two matrices
307      *
308      * @param A       Design
309      * @param b       Data
310      * @param weights to be used in modifying the influence of the observations in b. If null, will be ignored.
311      */
312     public LeastSquaresFit(DoubleMatrix2D A, DoubleMatrix2D b, final DoubleMatrix2D weights) {
313         assert A != null;
314         assert b != null;
315         assert A.rows() == b.columns();
316         assert weights == null || b.columns() == weights.columns();
317         assert weights == null || b.rows() == weights.rows();
318 
319         this.A = A;
320         this.b = b;
321         this.weights = weights;
322 
323         fit();
324 
325     }
326 
327     /**
328      * @param sampleInfo information that will be converted to a design matrix; intercept term is added.
329      * @param data       Data matrix
330      */
331     public LeastSquaresFit(ObjectMatrix<String, String, Object> sampleInfo, DenseDoubleMatrix2D data) {
332 
333         this.designMatrix = new DesignMatrix(sampleInfo, true);
334 
335         this.hasIntercept = true;
336         this.A = designMatrix.getDoubleMatrix();
337         this.assign = designMatrix.getAssign();
338         this.terms = designMatrix.getTerms();
339 
340         this.b = data;
341         fit();
342     }
343 
344     /**
345      * @param sampleInfo
346      * @param data
347      * @param interactions add interaction term (two-way only is supported)
348      */
349     public LeastSquaresFit(ObjectMatrix<String, String, Object> sampleInfo, DenseDoubleMatrix2D data,
350                            boolean interactions) {
351         this.designMatrix = new DesignMatrix(sampleInfo, true);
352 
353         if (interactions) {
354             addInteraction();
355         }
356 
357         this.A = designMatrix.getDoubleMatrix();
358         this.assign = designMatrix.getAssign();
359         this.terms = designMatrix.getTerms();
360 
361         this.b = data;
362         fit();
363     }
364 
365     /**
366      * NamedMatrix allows easier handling of the results.
367      *
368      * @param design information that will be converted to a design matrix; intercept term is added.
369      * @param b      Data matrix
370      */
371     public LeastSquaresFit(ObjectMatrix<String, String, Object> design, DoubleMatrix<String, String> b) {
372         this.designMatrix = new DesignMatrix(design, true);
373 
374         this.A = designMatrix.getDoubleMatrix();
375         this.assign = designMatrix.getAssign();
376         this.terms = designMatrix.getTerms();
377 
378         this.b = new DenseDoubleMatrix2D(b.asArray());
379         this.rowNames = b.getRowNames();
380         fit();
381     }
382 
383     /**
384      * NamedMatrix allows easier handling of the results.
385      *
386      * @param design information that will be converted to a design matrix; intercept term is added.
387      * @param data   Data matrix
388      */
389     public LeastSquaresFit(ObjectMatrix<String, String, Object> design, DoubleMatrix<String, String> data,
390                            boolean interactions) {
391         this.designMatrix = new DesignMatrix(design, true);
392 
393         if (interactions) {
394             addInteraction();
395         }
396 
397         DoubleMatrix2D X = designMatrix.getDoubleMatrix();
398         this.assign = designMatrix.getAssign();
399         this.terms = designMatrix.getTerms();
400         this.A = X;
401         this.b = new DenseDoubleMatrix2D(data.asArray());
402         fit();
403     }
404 
405     /**
406      * The matrix of coefficients x for Ax = b (parameter estimates). Each column represents one fitted model (e.g., one
407      * gene); there is a row for each parameter.
408      *
409      * @return
410      */
411     public DoubleMatrix2D getCoefficients() {
412         return coefficients;
413     }
414 
415     public double getDfPrior() {
416         return dfPrior;
417     }
418 
419     public DoubleMatrix2D getFitted() {
420         return fitted;
421     }
422 
423     public int getResidualDof() {
424         return residualDof;
425     }
426 
427     public List<Integer> getResidualDofs() {
428         return residualDofs;
429     }
430 
431     public DoubleMatrix2D getResiduals() {
432         return residuals;
433     }
434 
435     /**
436      * @return externally studentized residuals (assumes we have only one QR)
437      */
438     public DoubleMatrix2D getStudentizedResiduals() {
439         int dof = this.residualDof - 1; // MINUS for external studentizing!!
440 
441         assert dof > 0;
442 
443         if (this.hasMissing) {
444             throw new UnsupportedOperationException("Studentizing not supported with missing values");
445         }
446 
447         DoubleMatrix2D result = this.residuals.like();
448 
449         /*
450          * Diagnonal of the hat matrix at i (hi) is the squared norm of the ith row of Q
451          */
452         DoubleMatrix2D q = this.getQR(0).getQ();
453 
454         DoubleMatrix1D hatdiag = new DenseDoubleMatrix1D(residuals.columns());
455         for (int j = 0; j < residuals.columns(); j++) {
456             double hj = q.viewRow(j).aggregate(Functions.plus, Functions.square);
457             if (1.0 - hj < Constants.TINY) {
458                 hj = 1.0;
459             }
460             hatdiag.set(j, hj);
461         }
462 
463         /*
464          * Measure sum of squares of residuals / residualDof
465          */
466         for (int i = 0; i < residuals.rows(); i++) {
467 
468             // these are 'internally studentized'
469             // double sdhat = Math.sqrt( residuals.viewRow( i ).aggregate( Functions.plus, Functions.square ) / dof );
470 
471             DoubleMatrix1D residualRow = residuals.viewRow(i);
472 
473             if (this.weights != null) {
474                 // use weighted residuals.
475                 DoubleMatrix1D w = weights.viewRow(i).copy().assign(Functions.sqrt);
476                 residualRow = residualRow.copy().assign(w, Functions.mult);
477             }
478 
479             double sum = residualRow.aggregate(Functions.plus, Functions.square);
480 
481             for (int j = 0; j < residualRow.size(); j++) {
482 
483                 double hj = hatdiag.get(j);
484 
485                 // this is how we externalize...
486                 double sigma;
487 
488                 if (hj < 1.0) {
489                     sigma = Math.sqrt((sum - Math.pow(residualRow.get(j), 2) / (1.0 - hj)) / dof);
490                 } else {
491                     sigma = Math.sqrt(sum / dof);
492                 }
493 
494                 double res = residualRow.getQuick(j);
495                 double studres = res / (sigma * Math.sqrt(1.0 - hj));
496 
497                 if (log.isDebugEnabled()) log.debug("sigma=" + sigma + " hj=" + hj + " stres=" + studres);
498 
499                 result.set(i, j, studres);
500             }
501         }
502         return result;
503     }
504 
505     public DoubleMatrix1D getVarPost() {
506         return varPost;
507     }
508 
509     public double getVarPrior() {
510         return varPrior;
511     }
512 
513     public DoubleMatrix2D getWeights() {
514         return weights;
515     }
516 
517     public boolean isHasBeenShrunken() {
518         return hasBeenShrunken;
519     }
520 
521     public boolean isHasMissing() {
522         return hasMissing;
523     }
524 
525     /**
526      * @return summaries. ANOVA will not be computed. If ebayesUpdate has been run, variance and degrees of freedom
527      * estimated using the limma eBayes algorithm will be used.
528      */
529     public List<LinearModelSummary> summarize() {
530         return this.summarize(false);
531     }
532 
533     /**
534      * @param anova if true, ANOVA will be computed
535      * @return
536      */
537     public List<LinearModelSummary> summarize(boolean anova) {
538         List<LinearModelSummary> lmsresults = new ArrayList<>();
539 
540         List<GenericAnovaResult> anovas = null;
541         if (anova) {
542             anovas = this.anova();
543         }
544 
545         StopWatch timer = new StopWatch();
546         timer.start();
547         log.info("Summarizing");
548         for (int i = 0; i < this.coefficients.columns(); i++) {
549             LinearModelSummaryImpl lms = summarize(i);
550             lms.setAnova(anovas != null ? anovas.get(i) : null);
551             lmsresults.add(lms);
552             if (timer.getTime() > 10000 && i > 0 && i % 10000 == 0) {
553                 log.info("Summarized " + i);
554             }
555         }
556         log.info("Summzarized " + this.coefficients.columns() + " results");
557 
558         return lmsresults;
559     }
560 
561     /**
562      * @param anova perform ANOVA, otherwise only basic summarization will be done. If ebayesUpdate has been run,
563      *              variance and degrees of freedom
564      *              estimated using the limma eBayes algorithm will be used.
565      * @return
566      */
567     public Map<String, LinearModelSummary> summarizeByKeys(boolean anova) {
568         List<LinearModelSummary> summaries = this.summarize(anova);
569         Map<String, LinearModelSummary> result = new LinkedHashMap<>();
570         for (LinearModelSummary lms : summaries) {
571             if (StringUtils.isBlank(lms.getKey())) {
572                 /*
573                  * Perhaps we should just use an integer.
574                  */
575                 throw new IllegalStateException("Key must not be blank");
576             }
577 
578             if (result.containsKey(lms.getKey())) {
579                 throw new IllegalStateException("Duplicate key " + lms.getKey());
580             }
581             result.put(lms.getKey(), lms);
582         }
583         return result;
584     }
585 
586     /**
587      * Compute ANOVA based on the model fit (Type I SSQ, sequential)
588      * <p>
589      * The idea is to add up the sums of squares (and dof) for all parameters associated with a particular factor.
590      * <p>
591      * This code is more or less ported from R summary.aov.
592      *
593      * @return
594      */
595     protected List<GenericAnovaResult> anova() {
596 
597         DoubleMatrix1D ones = new DenseDoubleMatrix1D(residuals.columns());
598         ones.assign(1.0);
599 
600         /*
601          * For ebayes, instead of this value (divided by rdof), we'll use the moderated sigma^2
602          */
603         DoubleMatrix1D residualSumsOfSquares;
604 
605         if (this.weights == null) {
606             residualSumsOfSquares = MatrixUtil.multWithMissing(residuals.copy().assign(Functions.square),
607                     ones);
608         } else {
609             residualSumsOfSquares = MatrixUtil.multWithMissing(
610                     residuals.copy().assign(this.weights.copy().assign(Functions.sqrt), Functions.mult).assign(Functions.square),
611                     ones);
612         }
613 
614         DoubleMatrix2D effects = null;
615         if (this.hasMissing || this.weights != null) {
616             effects = new DenseDoubleMatrix2D(this.b.rows(), this.A.columns());
617             effects.assign(Double.NaN);
618             for (int i = 0; i < this.b.rows(); i++) {
619                 QRDecomposition qrd = this.getQR(i);
620                 if (qrd == null) {
621                     // means we did not get a fit
622                     for (int j = 0; j < effects.columns(); j++) {
623                         effects.set(i, j, Double.NaN);
624                     }
625                     continue;
626                 }
627 
628                 /*
629                  * Compute Qty for the specific y, dealing with missing values.
630                  */
631                 DoubleMatrix1D brow = b.viewRow(i);
632                 DoubleMatrix1D browWithoutMissing = MatrixUtil.removeMissingOrInfinite(brow);
633 
634                 DoubleMatrix1D tqty;
635                 if (weights != null) {
636                     DoubleMatrix1D w = MatrixUtil.removeMissingOrInfinite(brow, this.weights.viewRow(i).copy().assign(Functions.sqrt));
637                     assert w.size() == browWithoutMissing.size();
638                     DoubleMatrix1D bw = browWithoutMissing.copy().assign(w, Functions.mult);
639                     tqty = qrd.effects(bw);
640                 } else {
641                     tqty = qrd.effects(browWithoutMissing);
642                 }
643 
644                 // view just part we need; put values back so missingness is restored.
645                 for (int j = 0; j < qrd.getRank(); j++) {
646                     effects.set(i, j, tqty.get(j));
647                 }
648             }
649 
650         } else {
651             assert this.qr != null;
652             effects = qr.effects(this.b.viewDice().copy()).viewDice();
653         }
654 
655         /* this is t(Qfty), the effects associated with the parameters only! We already have the residuals. */
656         effects.assign(Functions.square); // because we're going to compute sums of squares.
657 
658         /*
659          * Add up the ssr for the columns within each factor.
660          */
661         Set<Integer> facs = new TreeSet<>();
662         facs.addAll(assign);
663 
664         DoubleMatrix2D ssq = new DenseDoubleMatrix2D(effects.rows(), facs.size() + 1);
665         DoubleMatrix2D dof = new DenseDoubleMatrix2D(effects.rows(), facs.size() + 1);
666         dof.assign(0.0);
667         ssq.assign(0.0);
668         List<Integer> assignToUse = assign; // if has missing values, this will get swapped.
669 
670         for (int i = 0; i < ssq.rows(); i++) {
671 
672             ssq.set(i, facs.size(), residualSumsOfSquares.get(i));
673             int rdof;
674             if (this.residualDofs.isEmpty()) {
675                 rdof = this.residualDof;
676             } else {
677                 rdof = this.residualDofs.get(i);
678             }
679             /*
680              * Store residual DOF in the last column.
681              */
682             dof.set(i, facs.size(), rdof);
683 
684             if (!assigns.isEmpty()) {
685                 // when missing values are present we end up here.
686                 assignToUse = assigns.get(i);
687             }
688 
689             // these have been squared.
690             DoubleMatrix1D effectsForRow = effects.viewRow(i);
691 
692             if (assignToUse.size() != effectsForRow.size()) {
693                 /*
694                  * Effects will have NaNs, just so you know.
695                  */
696                 log.debug("Check me: effects has missing values");
697             }
698 
699             for (int j = 0; j < assignToUse.size(); j++) {
700 
701                 double valueToAdd = effectsForRow.get(j);
702                 int col = assignToUse.get(j);
703                 if (col > 0 && !this.hasIntercept) {
704                     col = col - 1;
705                 }
706 
707                 /*
708                  * Accumulate the sums for the different parameters associated with the same factor. When the data is
709                  * "constant" you can end up with a tiny but non-zero coefficient,
710                  * but it's bogus. See bug 3177. Ignore missing values.
711                  */
712                 if (!Double.isNaN(valueToAdd) && valueToAdd > Constants.SMALL) {
713                     ssq.set(i, col, ssq.get(i, col) + valueToAdd);
714                     dof.set(i, col, dof.get(i, col) + 1);
715                 }
716             }
717         }
718 
719         DoubleMatrix1D denominator;
720         if (this.hasBeenShrunken) {
721             denominator = this.varPost.copy();
722         } else {
723             if (this.residualDofs.isEmpty()) {
724                 // when there's just one value...
725                 denominator = residualSumsOfSquares.copy().assign(Functions.div(residualDof));
726             } else {
727                 denominator = new DenseDoubleMatrix1D(residualSumsOfSquares.size());
728                 for (int i = 0; i < residualSumsOfSquares.size(); i++) {
729                     denominator.set(i, residualSumsOfSquares.get(i) / residualDofs.get(i));
730                 }
731             }
732         }
733 
734         // Fstats and pvalues will go here. Just initializing.
735         DoubleMatrix2D fStats = ssq.copy().assign(dof, Functions.div); // this is the numerator.
736         DoubleMatrix2D pvalues = fStats.like();
737         computeStats(dof, fStats, denominator, pvalues);
738 
739         return summarizeAnova(ssq, dof, fStats, pvalues);
740     }
741 
742     /**
743      * Provide results of limma eBayes algorithm. These will be used next time summarize is called on this.
744      *
745      * @param d  dfPrior
746      * @param v  varPrior
747      * @param vp varPost
748      */
749     protected void ebayesUpdate(double d, double v, DoubleMatrix1D vp) {
750         this.dfPrior = d;
751         this.varPrior = v; // somewhat confusingly, this is sd.prior in limma; var.prior gets used for B stat.
752         this.varPost = vp; // also called s2.post; without ebayes this is the same as sigma^2 = rssq/rdof
753         this.hasBeenShrunken = true;
754     }
755 
756     /**
757      * Compute and organize the various summary statistics for a fit.
758      * <p>
759      * If ebayes has been run, variance and degrees of freedom
760      * estimated using the limma eBayes algorithm will be used.
761      * <p>
762      * Does not populate the ANOVA.
763      *
764      * @param i index of the fit to summarize
765      * @return
766      */
767     LinearModelSummaryImpl summarize(int i) {
768 
769         String key = null;
770         if (this.rowNames != null) {
771             key = this.rowNames.get(i);
772             if (key == null) log.warn("Key null at " + i);
773         }
774 
775         QRDecomposition qrd = null;
776         qrd = this.getQR(i);
777 
778         if (qrd == null) {
779             log.debug("QR was null for item " + i);
780             return new LinearModelSummaryImpl(key);
781         }
782 
783         int rdf;
784         if (this.residualDofs.isEmpty()) {
785             rdf = this.residualDof; // no missing values, so it's global
786         } else {
787             rdf = this.residualDofs.get(i); // row-specific
788         }
789         assert !Double.isNaN(rdf);
790 
791         if (rdf == 0) {
792             return new LinearModelSummaryImpl(key);
793         }
794 
795         DoubleMatrix1D resid = MatrixUtil.removeMissingOrInfinite(this.residuals.viewRow(i));
796         DoubleMatrix1D f = MatrixUtil.removeMissingOrInfinite(fitted.viewRow(i));
797 
798         DoubleMatrix1D rweights = null;
799         DoubleMatrix1D sqrtweights = null;
800         if (this.weights != null) {
801             rweights = MatrixUtil.removeMissingOrInfinite(fitted.viewRow(i), this.weights.viewRow(i).copy());
802             sqrtweights = rweights.copy().assign(Functions.sqrt);
803         } else {
804             rweights = new DenseDoubleMatrix1D(f.size()).assign(1.0);
805             sqrtweights = rweights.copy();
806         }
807 
808         DoubleMatrix1D allCoef = coefficients.viewColumn(i); // has NA for unestimated parameters.
809         DoubleMatrix1D estCoef = MatrixUtil.removeMissingOrInfinite(allCoef); // estimated parameters.
810 
811         if (estCoef.size() == 0) {
812             log.warn("No coefficients estimated for row " + i + this.diagnosis(qrd));
813             log.info("Data for this row:\n" + this.b.viewRow(i));
814             return new LinearModelSummaryImpl(key);
815         }
816 
817         int rank = qrd.getRank();
818         int n = qrd.getQ().rows();
819         assert rdf == n - rank : "Rank was not correct, expected " + rdf + " but got Q rows=" + n + ", #Coef=" + rank
820                 + diagnosis(qrd);
821 
822         //        if (is.null(w)) {
823         //            mss <- if (attr(z$terms, "intercept"))
824         //                sum((f - mean(f))^2) else sum(f^2)
825         //            rss <- sum(r^2)
826         //        } else {
827         //            mss <- if (attr(z$terms, "intercept")) {
828         //                m <- sum(w * f /sum(w))
829         //                sum(w * (f - m)^2)
830         //            } else sum(w * f^2)
831         //            rss <- sum(w * r^2)
832         //            r <- sqrt(w) * r
833         //        }
834         double mss;
835 
836         if (weights != null) {
837 
838             if (hasIntercept) {
839                 //  m <- sum(w * f /sum(w))
840                 double m = f.copy().assign(Functions.div(rweights.zSum())).assign(rweights, Functions.mult).zSum();
841 
842                 mss = f.copy().assign(Functions.minus(m)).assign(Functions.square).assign(rweights, Functions.mult).zSum();
843             } else {
844                 mss = f.copy().assign(Functions.square).assign(rweights, Functions.mult).zSum();
845             }
846 
847             assert resid.size() == rweights.size();
848         } else {
849             if (hasIntercept) {
850                 mss = f.copy().assign(Functions.minus(Descriptive.mean(new DoubleArrayList(f.toArray()))))
851                         .assign(Functions.square).zSum();
852             } else {
853                 mss = f.copy().assign(Functions.square).zSum();
854             }
855         }
856 
857         double rss = resid.copy().assign(Functions.square).assign(rweights, Functions.mult).zSum();
858         if (weights != null) resid = resid.copy().assign(sqrtweights, Functions.mult);
859 
860         double resvar = rss / rdf; // sqrt of this is sigma.
861 
862         // matrix to hold the summary information.
863         DoubleMatrix<String, String> summaryTable = DoubleMatrixFactory.dense(allCoef.size(), 4);
864         summaryTable.assign(Double.NaN);
865         summaryTable
866                 .setColumnNames(Arrays.asList(new String[]{"Estimate", "Std. Error", "t value", "Pr(>|t|)"}));
867 
868         // XtXi is (X'X)^-1; in R limma this is fit$cov.coefficients: "unscaled covariance matrix of the estimable coefficients"
869 
870         DoubleMatrix2D XtXi = qrd.chol2inv();
871 
872         // the diagonal has the (unscaled) variances s; NEGATIVE VALUES can occur when not of full rank...
873         DoubleMatrix1D sdUnscaled = MatrixUtil.diagonal(XtXi).assign(Functions.sqrt);
874 
875         // in contrast the stdev.unscaled uses the gene-specific QR
876         // //  stdev.unscaled[i,est] <- sqrt(diag(chol2inv(out$qr$qr,size=out$rank)))
877 
878         this.stdevUnscaled.put(i, sdUnscaled);
879 
880         DoubleMatrix1D sdScaled = MatrixUtil
881                 .removeMissingOrInfinite(MatrixUtil.diagonal(XtXi).assign(Functions.mult(resvar))
882                         .assign(Functions.sqrt)); // wasteful...
883 
884         // AKA Qty
885 
886         DoubleMatrix1D effects = qrd.effects(MatrixUtil.removeMissingOrInfinite(this.b.viewRow(i).copy()).assign(sqrtweights, Functions.mult));
887 
888         // sigma is the estimated sd of the parameters. In limma, fit$sigma <- sqrt(mean(fit$effects[-(1:fit$rank)]^2)
889         // in lm.series, it's same: sigma[i] <- sqrt(mean(out$effects[-(1:out$rank)]^2))
890         // first p elements are associated with the coefficients; same as residuals (QQty) / resid dof.
891         //        double sigma = Math
892         //                .sqrt( resid.copy().assign( Functions.square ).aggregate( Functions.plus, Functions.identity )
893         //                        / ( resid.size() - rank ) );
894 
895         // Based on effects
896         double sigma = Math.sqrt(
897                 effects.copy().viewPart(rank, effects.size() - rank).aggregate(Functions.plus, Functions.square) / (effects.size() - rank));
898 
899         /*
900          * Finally ready to compute t-stats and finish up.
901          */
902 
903         DoubleMatrix1D tstats;
904         TDistribution tdist;
905         if (this.hasBeenShrunken) {
906             /*
907              * moderated t-statistic
908              * out$t <- coefficients / stdev.unscaled / sqrt(out$s2.post)
909              */
910             tstats = estCoef.copy().assign(sdUnscaled, Functions.div).assign(
911                     Functions.div(Math.sqrt(this.varPost.get(i))));
912 
913             /*
914              * df.total <- df.residual + out$df.prior
915              * df.pooled <- sum(df.residual,na.rm=TRUE)
916              * df.total <- pmin(df.total,df.pooled)
917              * out$df.total <- df.total
918              * out$p.value <- 2*pt(-abs(out$t),df=df.total
919              */
920 
921             double dfTotal = rdf + this.dfPrior;
922 
923             assert !Double.isNaN(dfTotal);
924             tdist = new TDistribution(dfTotal);
925         } else {
926             /*
927              * Or we could get these from
928              * tstat.ord <- coefficients/ stdev.unscaled/ sigma
929              * And not have to store the sdScaled.
930              */
931             tstats = estCoef.copy().assign(sdScaled, Functions.div);
932             tdist = new TDistribution(rdf);
933         }
934 
935         int j = 0;
936         for (int ti = 0; ti < allCoef.size(); ti++) {
937             double c = allCoef.get(ti);
938             assert this.designMatrix != null;
939             List<String> colNames = this.designMatrix.getMatrix().getColNames();
940 
941             String dmcolname;
942             if (colNames == null) {
943                 dmcolname = "Column_" + ti;
944             } else {
945                 dmcolname = colNames.get(ti);
946             }
947 
948             summaryTable.addRowName(dmcolname);
949             if (Double.isNaN(c)) {
950                 continue;
951             }
952 
953             summaryTable.set(ti, 0, estCoef.get(j));
954             summaryTable.set(ti, 1, sdUnscaled.get(j));
955             summaryTable.set(ti, 2, tstats.get(j));
956 
957             double pval = 2.0 * (1.0 - tdist.cumulativeProbability(Math.abs(tstats.get(j))));
958             summaryTable.set(ti, 3, pval);
959 
960             j++;
961 
962         }
963 
964         double rsquared = 0.0;
965         double adjRsquared = 0.0;
966         double fstatistic = 0.0;
967         int numdf = 0;
968         int dendf = 0;
969 
970         if (terms.size() > 1 || !hasIntercept) {
971             int dfint = hasIntercept ? 1 : 0;
972             rsquared = mss / (mss + rss);
973             adjRsquared = 1 - (1 - rsquared) * ((n - dfint) / (double) rdf);
974 
975             fstatistic = mss / (rank - dfint) / resvar;
976 
977             // This doesn't get set otherwise??
978             numdf = rank - dfint;
979             dendf = rdf;
980 
981         } else {
982             // intercept only, apparently.
983             rsquared = 0.0;
984             adjRsquared = 0.0;
985         }
986 
987         // NOTE that not all the information stored in the summary is likely to be important/used,
988         // while other information is probably still needed.
989         LinearModelSummaryImpl lms = new LinearModelSummaryImpl( key, allCoef.toArray(), resid.toArray(), terms,
990             summaryTable, effects.toArray(), sdUnscaled.toArray(), rsquared, adjRsquared, fstatistic, numdf, dendf,
991             null, sigma, this.hasBeenShrunken, this.dfPrior );
992 
993         return lms;
994     }
995 
996     /**
997      *
998      */
999     private void addInteraction() {
1000         if (designMatrix.getTerms().size() == 1) {
1001             throw new IllegalArgumentException("Need at least two factors for interactions");
1002         }
1003         if (designMatrix.getTerms().size() != 2) {
1004             throw new UnsupportedOperationException("Interactions not supported for more than two factors");
1005         }
1006         this.designMatrix.addInteraction(designMatrix.getTerms().get(0), designMatrix.getTerms().get(1));
1007     }
1008 
1009     /**
1010      * Cache a QR. Only important if missing values are present or if using weights, otherwise we use the "global" QR.
1011      *
1012      * @param row           cannot be null; indicates the index into the datamatrix rows.
1013      * @param valuesPresent if null, this is taken to mean the row wasn't usable.
1014      * @param newQR         can be null, if valuePresent is null
1015      */
1016     private void addQR(Integer row, BitVector valuesPresent, QRDecomposition newQR) {
1017 
1018         /*
1019          * Use of weights takes precedence over missing values, in terms of how we store QRs. If we only have missing
1020          * values, often we can get away with a small number of distinct QRs. With weights, we assume they are different
1021          * for each data row.
1022          */
1023         if (this.weights != null) {
1024             this.qrsForWeighted.put(row, newQR);
1025             return;
1026         }
1027 
1028         assert row != null;
1029 
1030         if (valuesPresent == null) {
1031             valuesPresentMap.put(row, null);
1032         }
1033 
1034         QRDecomposition cachedQr = qrs.get(valuesPresent);
1035         if (cachedQr == null) {
1036             qrs.put(valuesPresent, newQR);
1037         }
1038         valuesPresentMap.put(row, valuesPresent);
1039     }
1040 
1041     /**
1042      *
1043      */
1044     private void checkForMissingValues() {
1045         for (int i = 0; i < b.rows(); i++) {
1046             for (int j = 0; j < b.columns(); j++) {
1047                 double v = b.get(i, j);
1048                 if (Double.isNaN(v) || Double.isInfinite(v)) {
1049                     this.hasMissing = true;
1050                     log.info("Data has missing values (at row=" + (i + 1) + " column=" + (j + 1));
1051                     break;
1052                 }
1053             }
1054             if (this.hasMissing) break;
1055         }
1056     }
1057 
1058     /**
1059      * Drop, and track, redundant or constant columns (not counting the intercept, if present). This is only used if we
1060      * have missing values which would require changing the design depending on what is missing. Otherwise the model is
1061      * assumed to be clean. Note that this does not check the model for singularity, but does help avoid some obvious
1062      * causes of singularity.
1063      * <p>
1064      * NOTE Probably slow if we have to run this often; should cache re-used values.
1065      *
1066      * @param design
1067      * @param ypsize
1068      * @param droppedColumns populated by this call
1069      * @return
1070      */
1071     private DoubleMatrix2D cleanDesign(final DoubleMatrix2D design, int ypsize, List<Integer> droppedColumns) {
1072 
1073         /*
1074          * Drop constant columns or columns which are the same as another column.
1075          */
1076         for (int j = 0; j < design.columns(); j++) {
1077             if (j == 0 && this.hasIntercept) continue;
1078             double lastValue = Double.NaN;
1079             boolean constant = true;
1080             for (int i = 0; i < design.rows(); i++) {
1081                 double thisvalue = design.get(i, j);
1082                 if (i > 0 && thisvalue != lastValue) {
1083                     constant = false;
1084                     break;
1085                 }
1086                 lastValue = thisvalue;
1087             }
1088             if (constant) {
1089                 log.debug("Dropping constant column " + j);
1090                 droppedColumns.add(j);
1091                 continue;
1092             }
1093 
1094             DoubleMatrix1D col = design.viewColumn(j);
1095 
1096             for (int p = 0; p < j; p++) {
1097                 boolean redundant = true;
1098                 DoubleMatrix1D otherCol = design.viewColumn(p);
1099                 for (int v = 0; v < col.size(); v++) {
1100                     if (col.get(v) != otherCol.get(v)) {
1101                         redundant = false;
1102                         break;
1103                     }
1104                 }
1105                 if (redundant) {
1106                     log.debug("Dropping redundant column " + j);
1107                     droppedColumns.add(j);
1108                     break;
1109                 }
1110             }
1111 
1112         }
1113 
1114         DoubleMatrix2D returnValue = MatrixUtil.dropColumns(design, droppedColumns);
1115 
1116         return returnValue;
1117     }
1118 
1119     /**
1120      * ANOVA f statistics etc.
1121      *
1122      * @param dof         raw degrees of freedom
1123      * @param fStats      results will be stored here
1124      * @param denominator residual sums of squares / rdof
1125      * @param pvalues     results will be stored here
1126      */
1127     private void computeStats(DoubleMatrix2D dof, DoubleMatrix2D fStats, DoubleMatrix1D denominator,
1128                               DoubleMatrix2D pvalues) {
1129         pvalues.assign(Double.NaN);
1130         int timesWarned = 0;
1131         for (int i = 0; i < fStats.rows(); i++) {
1132 
1133             int rdof;
1134             if (this.residualDofs.isEmpty()) {
1135                 rdof = residualDof;
1136             } else {
1137                 rdof = this.residualDofs.get(i);
1138             }
1139 
1140             for (int j = 0; j < fStats.columns(); j++) {
1141 
1142                 double ndof = dof.get(i, j);
1143 
1144                 if (ndof <= 0 || rdof <= 0) {
1145                     pvalues.set(i, j, Double.NaN);
1146                     fStats.set(i, j, Double.NaN);
1147                     continue;
1148                 }
1149 
1150                 if (j == fStats.columns() - 1) {
1151                     // don't fill in f & p values for the residual...
1152                     pvalues.set(i, j, Double.NaN);
1153                     fStats.set(i, j, Double.NaN);
1154                     continue;
1155                 }
1156 
1157                 /*
1158                  * Taking ratios of two very small values is not meaningful; happens if the data are ~constant.
1159                  */
1160                 if (fStats.get(i, j) < Constants.SMALLISH && denominator.get(i) < Constants.SMALLISH) {
1161                     pvalues.set(i, j, Double.NaN);
1162                     fStats.set(i, j, Double.NaN);
1163                     continue;
1164                 }
1165 
1166                 fStats.set(i, j, fStats.get(i, j) / denominator.get(i));
1167                 try {
1168                     FDistribution pf = new FDistribution(ndof, rdof + this.dfPrior);
1169                     pvalues.set(i, j, 1.0 - pf.cumulativeProbability(fStats.get(i, j)));
1170                 } catch (NotStrictlyPositiveException e) {
1171                     if (timesWarned < 10) {
1172                         log.warn("Pvalue could not be computed for F=" + fStats.get(i, j) + "; denominator was="
1173                                 + denominator.get(i) + "; Error: " + e.getMessage()
1174                                 + " (limited warnings of this type will be given)");
1175                         timesWarned++;
1176                     }
1177                     pvalues.set(i, j, Double.NaN);
1178                 }
1179 
1180             }
1181         }
1182     }
1183 
1184     /**
1185      * @param qrd
1186      * @return
1187      */
1188     private String diagnosis(QRDecomposition qrd) {
1189         StringBuilder buf = new StringBuilder();
1190         buf.append("\n--------\nLM State\n--------\n");
1191         buf.append("hasMissing=" + this.hasMissing + "\n");
1192         buf.append("hasIntercept=" + this.hasIntercept + "\n");
1193         buf.append("Design: " + this.designMatrix + "\n");
1194         if (this.b.rows() < 5) {
1195             buf.append("Data matrix: " + this.b + "\n");
1196         } else {
1197             buf.append("Data (first few rows): " + this.b.viewSelection(new int[]{0, 1, 2, 3, 4}, null) + "\n");
1198 
1199         }
1200         buf.append("Current QR:" + qrd + "\n");
1201         return buf.toString();
1202     }
1203 
1204     /**
1205      *
1206      */
1207     private void fit() {
1208         if (this.weights == null) {
1209             lsf();
1210             return;
1211         }
1212         wlsf();
1213     }
1214 
1215     /**
1216      * @param valuesPresent - only if we have unweighted regression
1217      * @return appropriate cached QR, or null
1218      */
1219     private QRDecomposition getQR(BitVector valuesPresent) {
1220         assert this.weights == null;
1221         return qrs.get(valuesPresent);
1222     }
1223 
1224     /**
1225      * Get the QR decomposition to use for data row given. If it has not yet been computed/cached return null.
1226      *
1227      * @param row
1228      * @return QR or null if the row wasn't usable. If there are no missing values and weights aren't used, this
1229      * returns
1230      * the global qr. If there are only missing values, this returns the QR that matches the pattern of
1231      * missing
1232      * values. If there are weights, a row-specific QR is returned.
1233      */
1234     private QRDecomposition getQR(Integer row) {
1235         if (!this.hasMissing && this.weights == null) {
1236             return this.qr;
1237         }
1238 
1239         if (this.weights != null)
1240             return qrsForWeighted.get(row);
1241 
1242         assert this.hasMissing;
1243         BitVector key = valuesPresentMap.get(row);
1244         if (key == null) return null;
1245         return qrs.get(key);
1246 
1247     }
1248 
1249     /**
1250      * Internal function that does the hard work in unweighted case.
1251      */
1252     private void lsf() {
1253 
1254         assert this.weights == null;
1255 
1256         checkForMissingValues();
1257         Algebra solver = new Algebra();
1258 
1259         if (this.hasMissing) {
1260             double[][] rawResult = new double[b.rows()][];
1261             for (int i = 0; i < b.rows(); i++) {
1262 
1263                 DoubleMatrix1D row = b.viewRow(i);
1264                 if (row.size() < 3) { // don't bother.
1265                     rawResult[i] = new double[A.columns()];
1266                     continue;
1267                 }
1268                 DoubleMatrix1D withoutMissing = lsfWmissing(i, row, A);
1269                 if (withoutMissing == null) {
1270                     rawResult[i] = new double[A.columns()];
1271                 } else {
1272                     rawResult[i] = withoutMissing.toArray();
1273                 }
1274             }
1275             this.coefficients = new DenseDoubleMatrix2D(rawResult).viewDice();
1276 
1277         } else {
1278 
1279             this.qr = new QRDecomposition(A);
1280             this.coefficients = qr.solve(solver.transpose(b));
1281             this.residualDof = b.columns() - qr.getRank();
1282             if (residualDof <= 0) {
1283                 throw new IllegalArgumentException(
1284                         "No residual degrees of freedom to fit the model" + diagnosis(qr));
1285             }
1286 
1287         }
1288         assert this.assign.isEmpty() || this.assign.size() == this.coefficients.rows() : assign.size()
1289                 + " != # coefficients " + this.coefficients.rows();
1290 
1291         assert this.coefficients.rows() == A.columns();
1292 
1293         // It is somewhat wasteful to hold on to this.
1294         this.fitted = solver.transpose(MatrixUtil.multWithMissing(A, coefficients));
1295 
1296         if (this.hasMissing) {
1297             MatrixUtil.maskMissing(b, fitted);
1298         }
1299 
1300         this.residuals = b.copy().assign(fitted, Functions.minus);
1301     }
1302 
1303     /**
1304      * Perform OLS when there might be missing values, for a single vector of data y. If y doesn't have any missing
1305      * values this works normally.
1306      * <p>
1307      * Has side effect of filling in this.qrs and this.residualDofs, so run this "in order".
1308      *
1309      * @param row
1310      * @param y   the data to fit. For weighted ls, you must supply y*w
1311      * @param des the design matrix. For weighted ls, you must supply des*w.
1312      * @return the coefficients (a.k.a. x)
1313      */
1314     private DoubleMatrix1D lsfWmissing(Integer row, DoubleMatrix1D y, DoubleMatrix2D des) {
1315         Algebra solver = new Algebra();
1316         // This can potentially be improved by getting the indices of non-missing values and using that to make slices.
1317 
1318         List<Double> ywithoutMissingList = new ArrayList<>(y.size());
1319         int size = y.size();
1320         boolean hasAssign = !this.assign.isEmpty();
1321         int countNonMissing = 0;
1322         for (int i = 0; i < size; i++) {
1323             double v = y.getQuick(i);
1324             if (!Double.isNaN(v) && !Double.isInfinite(v)) {
1325                 countNonMissing++;
1326             }
1327         }
1328 
1329         if (countNonMissing < 3) {
1330             /*
1331              * return nothing.
1332              */
1333             DoubleMatrix1D re = new DenseDoubleMatrix1D(des.columns());
1334             re.assign(Double.NaN);
1335             log.debug("Not enough non-missing values");
1336             this.addQR(row, null, null);
1337             this.residualDofs.add(countNonMissing - des.columns());
1338             if (hasAssign) this.assigns.add(new ArrayList<Integer>());
1339             return re;
1340         }
1341 
1342         double[][] rawDesignWithoutMissing = new double[countNonMissing][];
1343         int index = 0;
1344         boolean missing = false;
1345 
1346         BitVector bv = new BitVector(size);
1347         for (int i = 0; i < size; i++) {
1348             double yi = y.getQuick(i);
1349             if (Double.isNaN(yi) || Double.isInfinite(yi)) {
1350                 missing = true;
1351                 continue;
1352             }
1353             ywithoutMissingList.add(yi);
1354             bv.set(i);
1355             rawDesignWithoutMissing[index++] = des.viewRow(i).toArray();
1356         }
1357         double[] yWithoutMissing = ArrayUtils.toPrimitive(ywithoutMissingList.toArray(new Double[]{}));
1358         DenseDoubleMatrix2D yWithoutMissingAsMatrix = new DenseDoubleMatrix2D(new double[][]{yWithoutMissing});
1359 
1360         DoubleMatrix2D designWithoutMissing = new DenseDoubleMatrix2D(rawDesignWithoutMissing);
1361 
1362         boolean fail = false;
1363         List<Integer> droppedColumns = new ArrayList<>();
1364         designWithoutMissing = this.cleanDesign(designWithoutMissing, yWithoutMissingAsMatrix.size(), droppedColumns);
1365 
1366         if (designWithoutMissing.columns() == 0 || designWithoutMissing.columns() > designWithoutMissing.rows()) {
1367             fail = true;
1368         }
1369 
1370         if (fail) {
1371             DoubleMatrix1D re = new DenseDoubleMatrix1D(des.columns());
1372             re.assign(Double.NaN);
1373             this.addQR(row, null, null);
1374             this.residualDofs.add(countNonMissing - des.columns());
1375             if (hasAssign) this.assigns.add(new ArrayList<Integer>());
1376             return re;
1377         }
1378 
1379         QRDecomposition rqr = null;
1380         if (this.weights != null) {
1381             rqr = new QRDecomposition(designWithoutMissing);
1382             addQR(row, null, rqr);
1383         } else if (missing) {
1384             rqr = this.getQR(bv);
1385             if (rqr == null) {
1386                 rqr = new QRDecomposition(designWithoutMissing);
1387                 addQR(row, bv, rqr);
1388             }
1389         } else {
1390             // in the case of weighted least squares, the Design matrix has different weights
1391             // for every row observation, so recompute qr everytime.
1392             if (this.qr == null) {
1393                 rqr = new QRDecomposition(des);
1394             } else {
1395                 // presumably not weighted.Why would this be set already, though? Is this ever reached?
1396                 rqr = this.qr;
1397             }
1398         }
1399 
1400         this.addQR(row, bv, rqr);
1401 
1402         int pivots = rqr.getRank();
1403 
1404         int rdof = yWithoutMissingAsMatrix.size() - pivots;
1405         this.residualDofs.add(rdof);
1406 
1407         DoubleMatrix2D coefs = rqr.solve(solver.transpose(yWithoutMissingAsMatrix));
1408 
1409         /*
1410          * Put NaNs in for missing coefficients that were dropped from our estimation.
1411          */
1412         if (designWithoutMissing.columns() < des.columns()) {
1413             DoubleMatrix1D col = coefs.viewColumn(0);
1414             DoubleMatrix1D result = new DenseDoubleMatrix1D(des.columns());
1415             result.assign(Double.NaN);
1416             int k = 0;
1417             List<Integer> assignForRow = new ArrayList<>();
1418             for (int i = 0; i < des.columns(); i++) {
1419                 if (droppedColumns.contains(i)) {
1420                     // leave it as NaN.
1421                     continue;
1422                 }
1423 
1424                 if (hasAssign) assignForRow.add(this.assign.get(i));
1425                 assert k < col.size();
1426                 result.set(i, col.get(k));
1427                 k++;
1428             }
1429             if (hasAssign) assigns.add(assignForRow);
1430             return result;
1431         }
1432         if (hasAssign) assigns.add(this.assign);
1433         return coefs.viewColumn(0);
1434 
1435     }
1436 
1437     /**
1438      * @param ssq
1439      * @param dof
1440      * @param fStats
1441      * @param pvalues
1442      * @return
1443      */
1444     private List<GenericAnovaResult> summarizeAnova(DoubleMatrix2D ssq, DoubleMatrix2D dof, DoubleMatrix2D fStats,
1445                                                     DoubleMatrix2D pvalues) {
1446 
1447         assert ssq != null;
1448         assert dof != null;
1449         assert fStats != null;
1450         assert pvalues != null;
1451 
1452         List<GenericAnovaResult> results = new ArrayList<>();
1453         for (int i = 0; i < fStats.rows(); i++) {
1454             Collection<AnovaEffect> efs = new ArrayList<>();
1455 
1456             /*
1457              * Don't put in ftest results for the residual thus the -1.
1458              */
1459             for (int j = 0; j < fStats.columns() - 1; j++) {
1460                 String effectName = terms.get(j);
1461                 assert effectName != null;
1462                 AnovaEffect ae = new AnovaEffect(effectName, pvalues.get(i, j), fStats.get(i, j), dof.get(
1463                     i, j ), ssq.get( i, j ), effectName.contains( ":" ), false );
1464                 efs.add(ae);
1465             }
1466 
1467             /*
1468              * Add residual
1469              */
1470             int residCol = fStats.columns() - 1;
1471             AnovaEffect ae = new AnovaEffect( "Residual", Double.NaN, Double.NaN, dof.get( i, residCol ) + this.dfPrior, ssq.get( i,
1472                 residCol ), false, true );
1473             efs.add(ae);
1474 
1475             GenericAnovaResultImpl ao = new GenericAnovaResultImpl( this.rowNames != null ? this.rowNames.get( i ) : String.valueOf( i ), efs );
1476             results.add(ao);
1477         }
1478         return results;
1479     }
1480 
1481     /**
1482      * The weighted version which works like 'lm.wfit()' in R.
1483      */
1484     private void wlsf() {
1485 
1486         assert this.weights != null;
1487 
1488         checkForMissingValues();
1489         Algebra solver = new Algebra();
1490 
1491         /*
1492          * weight A and b: wts <- sqrt(w) A * wts, row * wts
1493          */
1494         List<DoubleMatrix2D> AwList = new ArrayList<>(b.rows());
1495         List<DoubleMatrix1D> bList = new ArrayList<>(b.rows());
1496 
1497         /*
1498          * Implemented like R::stats::lm.wfit : z <- .Call(C_Cdqrls, x * wts, y * wts, tol, FALSE), but we're doing each
1499          * y (gene) separately rather than in bulk since weights are different for each y.
1500          *
1501          *
1502          * Limma uses this approach in lm.series.
1503          */
1504         for (int i = 0; i < b.rows(); i++) {
1505             DoubleMatrix1D wts = this.weights.viewRow(i).copy().assign(Functions.sqrt);
1506             DoubleMatrix1D bw = b.viewRow(i).copy().assign(wts, Functions.mult);
1507             DoubleMatrix2D Aw = A.copy();
1508             for (int j = 0; j < Aw.columns(); j++) {
1509                 Aw.viewColumn(j).assign(wts, Functions.mult);
1510             }
1511             AwList.add(Aw);
1512             bList.add(bw);
1513         }
1514 
1515         double[][] rawResult = new double[b.rows()][];
1516 
1517         if (this.hasMissing) {
1518             /*
1519              * Have to drop missing values from the design matrix, so invoke special code.
1520              */
1521             for (int i = 0; i < b.rows(); i++) {
1522                 DoubleMatrix1D bw = bList.get(i);
1523                 DoubleMatrix2D Aw = AwList.get(i);
1524                 DoubleMatrix1D withoutMissing = lsfWmissing(i, bw, Aw);
1525                 if (withoutMissing == null) {
1526                     rawResult[i] = new double[A.columns()];
1527                 } else {
1528                     rawResult[i] = withoutMissing.toArray();
1529                 }
1530             }
1531 
1532         } else {
1533 
1534             // do QR for each row because A is scaled by different row weights
1535             // see lm.series() in limma; calls lm.wfit.
1536             for (int i = 0; i < b.rows(); i++) {
1537                 DoubleMatrix1D bw = bList.get(i);
1538                 DoubleMatrix2D Aw = AwList.get(i);
1539                 DoubleMatrix2D bw2D = new DenseDoubleMatrix2D(1, bw.size());
1540                 bw2D.viewRow(0).assign(bw);
1541                 QRDecomposition wqr = new QRDecomposition(Aw);
1542 
1543                 // We keep all the QRs for later use.
1544                 this.addQR(i, null, wqr);
1545 
1546                 rawResult[i] = wqr.solve(solver.transpose(bw2D)).viewColumn(0).toArray();
1547                 this.residualDof = bw.size() - wqr.getRank();
1548                 assert this.residualDof >= 0;
1549                 if (residualDof == 0) {
1550                     throw new IllegalArgumentException("No residual degrees of freedom to fit the model"
1551                             + diagnosis(wqr));
1552                 }
1553             }
1554         }
1555 
1556         this.coefficients = solver.transpose(new DenseDoubleMatrix2D(rawResult));
1557 
1558         assert this.assign.isEmpty() || this.assign.size() == this.coefficients.rows() : assign.size()
1559                 + " != # coefficients " + this.coefficients.rows();
1560         assert this.coefficients.rows() == A.columns();
1561 
1562         this.fitted = solver.transpose(MatrixUtil.multWithMissing(A, coefficients));
1563 
1564         if (this.hasMissing) {
1565             MatrixUtil.maskMissing(b, fitted);
1566         }
1567 
1568         this.residuals = b.copy().assign(fitted, Functions.minus);
1569     }
1570 
1571 }