View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2010 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.util.r;
20  
21  import org.apache.commons.lang3.StringUtils;
22  import org.apache.commons.math3.distribution.FDistribution;
23  import org.rosuda.REngine.REXP;
24  import org.rosuda.REngine.REXPDouble;
25  import org.rosuda.REngine.REXPMismatchException;
26  import org.rosuda.REngine.RList;
27  import ubic.basecode.dataStructure.matrix.DoubleMatrix;
28  import ubic.basecode.dataStructure.matrix.DoubleMatrixFactory;
29  import ubic.basecode.math.linearmodels.GenericAnovaResult;
30  import ubic.basecode.math.linearmodels.LinearModelSummary;
31  import ubic.basecode.math.linearmodels.LinearModelSummaryUtils;
32  
33  import javax.annotation.Nullable;
34  import java.util.*;
35  
36  /**
37   * Represents the results of a linear model analysis from R. Both the summary.lm and anova objects are represented.
38   *
39   * @author paul
40   */
41  class LinearModelSummaryImpl implements LinearModelSummary {
42  
43      private static final long serialVersionUID = 1L;
44  
45      /**
46       * Name for this summary e.g. probe identifier
47       */
48      private final String key;
49      private final double adjRSquared;
50      @Nullable
51      private final GenericAnovaResultImpl anovaResult;
52      private final DoubleMatrix<String, String> contrastCoefficients;
53      private final double denominatorDof;
54      private final List<String> factorNames;
55      private final double fStat;
56      private final double numeratorDof;
57      private final double[] residuals;
58      private final double rSquared;
59      private final Map<String, Collection<String>> term2CoefficientNames;
60      // computed on-demand from final fields
61      @Nullable
62      private Double overallPValue = null;
63  
64      /**
65       * Construct an empty summary. Use for model fits that fail due to 0 degrees of freedom, etc.
66       *
67       * @param key identifier
68       */
69      public LinearModelSummaryImpl( String key ) {
70          this.key = key;
71          this.factorNames = null;
72          this.contrastCoefficients = null;
73          this.anovaResult = null;
74          this.adjRSquared = Double.NaN;
75          this.fStat = Double.NaN;
76          this.numeratorDof = Double.NaN;
77          this.denominatorDof = Double.NaN;
78          this.residuals = null;
79          this.rSquared = Double.NaN;
80          this.term2CoefficientNames = null;
81      }
82  
83      /**
84       * Construct from the result of evaluation of an R call. should be deprecated because we're not using R integration
85       *
86       * @param summaryLm
87       * @param anova
88       * @param factorNames as referred to in the model. Used as keys to keep track of coefficients etc.
89       *
90       */
91      public LinearModelSummaryImpl( String key, REXP summaryLm, @Nullable REXP anova, String[] factorNames ) throws REXPMismatchException {
92          this.key = key;
93  
94          RList li = summaryLm.asList();
95  
96          REXPDouble coraw = ( REXPDouble ) li.get( "coefficients" );
97  
98          RList dimnames = coraw.getAttribute( "dimnames" ).asList();
99          String[] itemnames = ( ( REXP ) dimnames.get( 0 ) ).asStrings();
100         String[] colNames = ( ( REXP ) dimnames.get( 1 ) ).asStrings();
101 
102         double[][] coef = coraw.asDoubleMatrix();
103         contrastCoefficients = DoubleMatrixFactory.dense( coef );
104         contrastCoefficients.setRowNames( Arrays.asList( itemnames ) );
105         contrastCoefficients.setColumnNames( Arrays.asList( colNames ) );
106 
107         this.residuals = ( ( REXP ) li.get( "residuals" ) ).asDoubles();
108 
109         this.rSquared = ( ( REXP ) li.get( "r.squared" ) ).asDouble();
110 
111         this.adjRSquared = ( ( REXP ) li.get( "adj.r.squared" ) ).asDouble();
112 
113         REXP fstats = ( REXP ) li.get( "fstatistic" ); // for overall model fit.
114         if ( fstats != null ) {
115             double[] ff = ( fstats.asDoubles() );
116             this.fStat = ff[0];
117             this.numeratorDof = ff[1];
118             this.denominatorDof = ff[2];
119         } else {
120             this.fStat = Double.NaN;
121             this.numeratorDof = Double.NaN;
122             this.denominatorDof = Double.NaN;
123         }
124         // this.residualDof = ArrayUtils.toObject( ( ( REXP ) li.get( "df" ) ).asIntegers() )[1];
125 
126         // li.get( "cov.unscaled" );
127         // this.doF = ArrayUtils.toObject( ( ( REXP ) li.get( "df" ) ).asIntegers() );
128 
129         this.factorNames = Arrays.asList( factorNames );
130 
131         if ( anova != null ) {
132             this.anovaResult = new GenericAnovaResultImpl( key, anova );
133         } else {
134             this.anovaResult = null;
135         }
136 
137         this.term2CoefficientNames = LinearModelSummaryUtils.createTerm2CoefficientNames( this.factorNames, contrastCoefficients );
138     }
139 
140     /**
141      * @return the adjRSquared
142      */
143     @Override
144     public double getAdjRSquared() {
145         return adjRSquared;
146     }
147 
148     /**
149      * @return may be null if ANOVA was not run.
150      */
151     @Override
152     public GenericAnovaResult getAnova() {
153         return this.anovaResult;
154     }
155 
156     @Override
157     public double[] getCoefficients() {
158         throw new UnsupportedOperationException();
159     }
160 
161     @Override
162     public DoubleMatrix<String, String> getContrastCoefficients() {
163         return contrastCoefficients;
164     }
165 
166     @Override
167     public Map<String, Double> getContrastCoefficients( String factorName ) {
168         return getContrastAttribute( factorName, "Estimate" );
169     }
170 
171     @Override
172     public Map<String, Double> getContrastCoefficientStderr( String factorName ) {
173         return getContrastAttribute( factorName, "Std. Error" );
174     }
175 
176     @Override
177     public Map<String, Double> getContrastPValues( String factorName ) {
178         return getContrastAttribute( factorName, "Pr(>|t|)" );
179     }
180 
181     @Override
182     public Map<String, Double> getContrastTStats( String factorName ) {
183         return getContrastAttribute( factorName, "t value" );
184     }
185 
186     private Map<String, Double> getContrastAttribute( String factorName, String attributeName ) {
187         Collection<String> terms = term2CoefficientNames.get( factorName );
188         if ( terms == null ) return null;
189         Map<String, Double> results = new HashMap<>();
190         for ( String term : terms ) {
191             results.put( term, contrastCoefficients.getByKeys( term, attributeName ) );
192         }
193         return results;
194     }
195 
196     @Override
197     public double[] getEffects() {
198         throw new UnsupportedOperationException();
199     }
200 
201     @Override
202     public double getFStat() {
203         return this.fStat;
204     }
205 
206     @Override
207     public List<String> getFactorNames() {
208         return factorNames;
209     }
210 
211     @Override
212     public double getInterceptCoefficient() {
213         if ( contrastCoefficients != null ) {
214             if ( contrastCoefficients.hasRow( INTERCEPT_COEFFICIENT_NAME ) ) {
215                 return contrastCoefficients.getByKeys( INTERCEPT_COEFFICIENT_NAME, "Estimate" );
216             } else if ( contrastCoefficients.rows() == 1 ) {
217                 /*
218                  * This is a bit of a kludge. When we use lm.fit instead of lm, we end up with a somewhat screwy
219                  * coefficent matrix in the case of one-sample ttest, and R put in x1 (I think it starts as 1 and it
220                  * prepends the x).
221                  */
222                 assert "x1".equals( contrastCoefficients.getRowName( 0 ) );
223                 return contrastCoefficients.getByKeys( contrastCoefficients.getRowName( 0 ), "Estimate" );
224             }
225         }
226 
227         return Double.NaN;
228     }
229 
230     @Override
231     public double getInterceptPValue() {
232         if ( contrastCoefficients != null ) {
233             if ( contrastCoefficients.hasRow( INTERCEPT_COEFFICIENT_NAME ) ) {
234                 return contrastCoefficients.getByKeys( INTERCEPT_COEFFICIENT_NAME, "Pr(>|t|)" );
235             } else if ( contrastCoefficients.rows() == 1 ) {
236                 /*
237                  * This is a bit of a kludge. When we use lm.fit instead of lm, we end up with a somewhat screwy
238                  * coefficent matrix in the case of one-sample ttest, and R put in x1 (I think it starts as 1 and it
239                  * prepends the x).
240                  */
241                 assert "x1".equals( contrastCoefficients.getRowName( 0 ) );
242                 return contrastCoefficients.getByKeys( contrastCoefficients.getRowName( 0 ), "Pr(>|t|)" );
243             }
244         }
245         return Double.NaN;
246     }
247 
248     @Override
249     public double getInterceptTStat() {
250         if ( contrastCoefficients != null ) {
251             if ( contrastCoefficients.hasRow( INTERCEPT_COEFFICIENT_NAME ) ) {
252                 return contrastCoefficients.getByKeys( INTERCEPT_COEFFICIENT_NAME, "t value" );
253             } else if ( contrastCoefficients.rows() == 1 ) {
254                 /*
255                  * This is a bit of a kludge. When we use lm.fit instead of lm, we end up with a somewhat screwy
256                  * coefficent matrix in the case of one-sample ttest, and R put in x1 (I think it starts as 1 and it
257                  * prepends the x).
258                  */
259                 assert "x1".equals( contrastCoefficients.getRowName( 0 ) );
260                 return contrastCoefficients.getByKeys( contrastCoefficients.getRowName( 0 ), "t value" );
261             }
262         }
263 
264         return Double.NaN;
265     }
266 
267     @Override
268     public String getKey() {
269         return key;
270     }
271 
272     public double getMainEffectPValue( String factorName ) {
273         if ( anovaResult == null ) return Double.NaN;
274         return anovaResult.getMainEffectPValue( factorName );
275     }
276 
277     @Override
278     public double getNumeratorDof() {
279         return this.numeratorDof;
280     }
281 
282     @Override
283     public double getOverallPValue() {
284         if ( overallPValue == null ) {
285             if ( Double.isNaN( numeratorDof ) || Double.isNaN( denominatorDof ) || numeratorDof == 0 || denominatorDof == 0 ) {
286                 overallPValue = Double.NaN;
287             } else {
288                 FDistribution f = new FDistribution( numeratorDof, denominatorDof );
289                 overallPValue = 1.0 - f.cumulativeProbability( this.getFStat() );
290             }
291         }
292         return overallPValue;
293     }
294 
295     @Override
296     public double getPriorDof() {
297         throw new UnsupportedOperationException();
298     }
299 
300     @Override
301     public double getResidualsDof() {
302         return this.denominatorDof;
303     }
304 
305     @Override
306     public double[] getResiduals() {
307         return residuals;
308     }
309 
310     @Override
311     public double getRSquared() {
312         return rSquared;
313     }
314 
315     @Override
316     public double getSigma() {
317         return Double.NaN;
318     }
319 
320     @Override
321     public double[] getStdevUnscaled() {
322         throw new UnsupportedOperationException();
323     }
324 
325     @Override
326     public boolean isBaseline( String factorValueName ) {
327         return !contrastCoefficients.hasRow( factorValueName );
328     }
329 
330     @Override
331     public boolean isShrunken() {
332         return false;
333     }
334 
335     @Override
336     public String toString() {
337         StringBuilder buf = new StringBuilder();
338         if ( StringUtils.isNotBlank( this.key ) ) {
339             buf.append( this.key ).append( "\n" );
340         }
341         buf.append( "F=" ).append( String.format( "%.2f", this.fStat ) ).append( " Rsquare=" ).append( String.format( "%.2f", this.rSquared ) ).append( "\n" );
342 
343         buf.append( "Residuals:\n" );
344         if ( residuals != null ) {
345             for ( double d : residuals ) {
346                 buf.append( String.format( "%.2f ", d ) );
347             }
348         } else {
349             buf.append( "Residuals are null" );
350         }
351 
352         buf.append( "\n\nCoefficients:\n" ).append( contrastCoefficients ).append( "\n" );
353         if ( this.anovaResult != null ) {
354             buf.append( this.anovaResult ).append( "\n" );
355         }
356 
357         return buf.toString();
358     }
359 }