View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.math.metaanalysis;
20  
21  import ubic.basecode.math.Constants;
22  import cern.colt.list.DoubleArrayList;
23  import cern.jet.stat.Descriptive;
24  import cern.jet.stat.Probability;
25  
26  /**
27   * Statistics for meta-analysis. Methods from Cooper and Hedges (CH); Hunter and Schmidt (HS).
28   * <p>
29   * In this class "conditional variance" means the variance for one data set. Unconditional means "between data set", or
30   * 
31   * @author Paul Pavlidis
32   * 
33   */
34  public abstract class MetaAnalysis {
35  
36      /**
37       * Fisher's method for combining p values. (Cooper and Hedges 15-8)
38       * 
39       * @param pvals DoubleArrayList
40       * @return double upper tail
41       */
42      public static double fisherCombinePvalues( DoubleArrayList pvals ) {
43          double r = 0.0;
44          for ( int i = 0, n = pvals.size(); i < n; i++ ) {
45              r += Math.log( pvals.getQuick( i ) );
46          }
47          r *= -2.0;
48          // NOTE: dof is first argument.
49          return Probability.chiSquareComplemented( 2.0 * pvals.size(), r );
50      }
51  
52      /**
53       * Test for statistical significance of Q.
54       * 
55       * @param Q - computed using qStatistic
56       * @param N - number of studies.
57       * @see qStatistic
58       * @return The upper tail chi-square probability for Q with N - degrees of freedom.
59       */
60      public double qTest( double Q, double N ) {
61          // NOTE: dof is first argument.
62          return Probability.chiSquareComplemented( N - 1, Q );
63      }
64  
65      /**
66       * Fisher's method for combining p values (Cooper and Hedges 15-8)
67       * <p>
68       * Use for p values that have already been log transformed.
69       * 
70       * @param pvals DoubleArrayList
71       * @return double upper tail
72       */
73      protected double fisherCombineLogPvalues( DoubleArrayList pvals ) {
74          double r = 0.0;
75          for ( int i = 0, n = pvals.size(); i < n; i++ ) {
76              r += pvals.getQuick( i );
77          }
78          r *= -2.0;
79          // NOTE: dof is first argument.
80          return Probability.chiSquareComplemented( 2.0 * pvals.size(), r );
81      }
82  
83      /**
84       * Weights under a fixed effects model. Simply w_i = 1/v_i. CH eqn 18-2.
85       * 
86       * @param variances
87       * @return
88       */
89      protected DoubleArrayList metaFEWeights( DoubleArrayList variances ) {
90          DoubleArrayList w = new DoubleArrayList( variances.size() );
91  
92          for ( int i = 0; i < variances.size(); i++ ) {
93              double v = variances.getQuick( i );
94              if ( v <= Constants.SMALL ) {
95                  v = Constants.SMALL;
96                  System.err.println( "Tiny variance    " + v );
97              }
98              w.add( 1 / v );
99          }
100 
101         return w;
102     }
103 
104     /**
105      * CH sample variance under random effects model, equation 18-20
106      * 
107      * @param
108      * @return
109      */
110     protected double metaRESampleVariance( DoubleArrayList effectSizes ) {
111         return Descriptive.sampleVariance( effectSizes, Descriptive.mean( effectSizes ) );
112     }
113 
114     /**
115      * CH estimate of between-studies variance, equation 18-22, for random effects model.
116      * 
117      * @param effectSizes
118      * @param variances
119      * @return
120      */
121     // protected double metaREVariance( DoubleArrayList effectSizes,
122     // DoubleArrayList variances ) {
123     // return Math.max( metaRESampleVariance( effectSizes )
124     // - Descriptive.mean( variances ), 0.0 );
125     // }
126     /**
127      * CH equation 18-23. Another estimator of the between-studies variance s<sup>2 </sup> for random effects model.
128      * This is non-zero only if Q is larger than expected under the null hypothesis that the variance is zero.
129      * 
130      * <pre>
131      *      s&circ;2 = [Q - ( k - 1 ) ] / c
132      * </pre>
133      * 
134      * where
135      * 
136      * <pre>
137      *      c = Max(sum_i=1&circ;k w_i - [ sum_i&circ;k w_i&circ;2 / sum_i&circ;k w_i ], 0)
138      * </pre>
139      * 
140      * @param effectSizes
141      * @param variances
142      * @param weights
143      * @return
144      */
145     protected double metaREVariance( DoubleArrayList effectSizes, DoubleArrayList variances, DoubleArrayList weights ) {
146 
147         if ( !( effectSizes.size() == weights.size() && weights.size() == variances.size() ) )
148             throw new IllegalArgumentException( "Unequal sizes" );
149 
150         // the weighted unconditional variance.
151         double q = qStatistic( effectSizes, variances, weightedMean( effectSizes, weights ) );
152 
153         double c = Descriptive.sum( weights ) - Descriptive.sumOfSquares( weights ) / Descriptive.sum( weights );
154         return Math.max( ( q - ( effectSizes.size() - 1 ) ) / c, 0.0 );
155     }
156 
157     /**
158      * Under a random effects model, CH eqn. 18-24, we replace the conditional variance with the sum of the
159      * between-sample variance and the conditional variance.
160      * 
161      * <pre>
162      *      v_i&circ;* = sigma-hat_theta&circ;2 + v_i.
163      * </pre>
164      * 
165      * @param variances Conditional variances
166      * @param sampleVariance estimated...somehow.
167      * @return
168      */
169     protected DoubleArrayList metaREWeights( DoubleArrayList variances, double sampleVariance ) {
170         DoubleArrayList w = new DoubleArrayList( variances.size() );
171 
172         for ( int i = 0; i < variances.size(); i++ ) {
173             if ( variances.getQuick( i ) <= 0 ) {
174                 throw new IllegalStateException( "Negative or zero variance" );
175             }
176             w.add( 1 / ( variances.getQuick( i ) + sampleVariance ) );
177         }
178 
179         return w;
180     }
181 
182     /**
183      * CH 18-3. Can be used for fixed or random effects model, the variances just have to computed differently.
184      * 
185      * <pre>
186      * 
187      *            v_dot = 1/sum_i=1&circ;k ( 1/v_i)
188      * </pre>
189      * 
190      * @param variances
191      * @return
192      */
193     protected double metaVariance( DoubleArrayList variances ) {
194         double var = 0.0;
195         for ( int i = 0; i < variances.size(); i++ ) {
196             var += 1.0 / variances.getQuick( i );
197         }
198         if ( var == 0.0 ) {
199             var = Double.MIN_VALUE;
200             // throw new IllegalStateException( "Variance of zero" );
201         }
202         return 1.0 / var;
203     }
204 
205     /**
206      * CH 18-3 version 2 for quality weighted. ( page 266 ) in Fixed effects model.
207      * 
208      * <pre>
209      *            v_dot = [ sum_i=1&circ;k ( q_i &circ; 2 * w_i) ]/[ sum_i=1&circ;k  q_i * w_i ]&circ;2
210      * </pre>
211      * 
212      * @param variances
213      * @return
214      */
215     protected double metaVariance( DoubleArrayList weights, DoubleArrayList qualityIndices ) {
216         double num = 0.0;
217         double denom = 0.0;
218         for ( int i = 0; i < weights.size(); i++ ) {
219             num += Math.pow( weights.getQuick( i ), 2 ) * qualityIndices.getQuick( i );
220             denom += Math.pow( weights.getQuick( i ) * qualityIndices.getQuick( i ), 2 );
221         }
222         if ( denom == 0.0 ) {
223             throw new IllegalStateException( "Attempt to divide by zero." );
224         }
225         return num / denom;
226     }
227 
228     /**
229      * Test statistic for H0: effectSize == 0. CH 18-5. For fixed effects model.
230      * 
231      * @param metaEffectSize
232      * @param metaVariance
233      * @return
234      */
235     protected double metaZscore( double metaEffectSize, double metaVariance ) {
236         return Math.abs( metaEffectSize ) / Math.sqrt( metaVariance );
237     }
238 
239     /**
240      * The "Q" statistic used to test homogeneity of effect sizes. (Cooper and Hedges 18-6)
241      * 
242      * @param effectSizes DoubleArrayList
243      * @param variances DoubleArrayList
244      * @param globalMean double
245      * @return double
246      */
247     protected double qStatistic( DoubleArrayList effectSizes, DoubleArrayList variances, double globalMean ) {
248 
249         if ( !( effectSizes.size() == variances.size() ) ) throw new IllegalArgumentException( "Unequal sizes" );
250 
251         double r = 0.0;
252         for ( int i = 0, n = effectSizes.size(); i < n; i++ ) {
253             r += Math.pow( effectSizes.getQuick( i ) - globalMean, 2.0 ) / variances.getQuick( i );
254         }
255         return r;
256     }
257 
258     /**
259      * General formula for weighted mean of effect sizes. Cooper and Hedges 18-1, or HS pg. 100.
260      * <p>
261      * In HS, the weights are simply the sample sizes. For CH, the weights are 1/v for a fixed effect model. Under a
262      * random effects model, we would use 1/(v + v_bs) where v_bs is the between-studies variance.
263      * 
264      * @param effectSizes
265      * @param sampleSizes
266      * @return
267      */
268     protected double weightedMean( DoubleArrayList effectSizes, DoubleArrayList weights ) {
269 
270         if ( !( effectSizes.size() == weights.size() ) ) throw new IllegalArgumentException( "Unequal sizes" );
271 
272         double wm = 0.0;
273         for ( int i = 0; i < effectSizes.size(); i++ ) {
274             wm += weights.getQuick( i ) * effectSizes.getQuick( i );
275         }
276 
277         double s = Descriptive.sum( weights );
278 
279         if ( s == 0.0 ) return 0.0;
280         return wm /= s;
281     }
282 
283     /**
284      * General formula for weighted mean of effect sizes including quality index scores for each value. Cooper and
285      * Hedges 18-1, or HS pg. 100.
286      * 
287      * @param effectSizes
288      * @param sampleSizes
289      * @param qualityIndices
290      * @return
291      */
292     protected double weightedMean( DoubleArrayList effectSizes, DoubleArrayList weights, DoubleArrayList qualityIndices ) {
293 
294         if ( !( effectSizes.size() == weights.size() && weights.size() == qualityIndices.size() ) )
295             throw new IllegalArgumentException( "Unequal sizes" );
296 
297         double wm = 0.0;
298         for ( int i = 0; i < effectSizes.size(); i++ ) {
299             wm += weights.getQuick( i ) * effectSizes.getQuick( i ) * qualityIndices.getQuick( i );
300         }
301         return wm /= Descriptive.sum( weights ) * Descriptive.sum( qualityIndices );
302     }
303 }