1 /*
2 * The baseCode project
3 *
4 * Copyright (c) 2006 University of British Columbia
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 */
19 package ubic.basecode.math.metaanalysis;
20
21 import ubic.basecode.math.Constants;
22 import cern.colt.list.DoubleArrayList;
23 import cern.jet.stat.Descriptive;
24 import cern.jet.stat.Probability;
25
26 /**
27 * Statistics for meta-analysis. Methods from Cooper and Hedges (CH); Hunter and Schmidt (HS).
28 * <p>
29 * In this class "conditional variance" means the variance for one data set. Unconditional means "between data set", or
30 *
31 * @author Paul Pavlidis
32 *
33 */
34 public abstract class MetaAnalysis {
35
36 /**
37 * Fisher's method for combining p values. (Cooper and Hedges 15-8)
38 *
39 * @param pvals DoubleArrayList
40 * @return double upper tail
41 */
42 public static double fisherCombinePvalues( DoubleArrayList pvals ) {
43 double r = 0.0;
44 for ( int i = 0, n = pvals.size(); i < n; i++ ) {
45 r += Math.log( pvals.getQuick( i ) );
46 }
47 r *= -2.0;
48 // NOTE: dof is first argument.
49 return Probability.chiSquareComplemented( 2.0 * pvals.size(), r );
50 }
51
52 /**
53 * Test for statistical significance of Q.
54 *
55 * @param Q - computed using qStatistic
56 * @param N - number of studies.
57 * @see qStatistic
58 * @return The upper tail chi-square probability for Q with N - degrees of freedom.
59 */
60 public double qTest( double Q, double N ) {
61 // NOTE: dof is first argument.
62 return Probability.chiSquareComplemented( N - 1, Q );
63 }
64
65 /**
66 * Fisher's method for combining p values (Cooper and Hedges 15-8)
67 * <p>
68 * Use for p values that have already been log transformed.
69 *
70 * @param pvals DoubleArrayList
71 * @return double upper tail
72 */
73 protected double fisherCombineLogPvalues( DoubleArrayList pvals ) {
74 double r = 0.0;
75 for ( int i = 0, n = pvals.size(); i < n; i++ ) {
76 r += pvals.getQuick( i );
77 }
78 r *= -2.0;
79 // NOTE: dof is first argument.
80 return Probability.chiSquareComplemented( 2.0 * pvals.size(), r );
81 }
82
83 /**
84 * Weights under a fixed effects model. Simply w_i = 1/v_i. CH eqn 18-2.
85 *
86 * @param variances
87 * @return
88 */
89 protected DoubleArrayList metaFEWeights( DoubleArrayList variances ) {
90 DoubleArrayList w = new DoubleArrayList( variances.size() );
91
92 for ( int i = 0; i < variances.size(); i++ ) {
93 double v = variances.getQuick( i );
94 if ( v <= Constants.SMALL ) {
95 v = Constants.SMALL;
96 System.err.println( "Tiny variance " + v );
97 }
98 w.add( 1 / v );
99 }
100
101 return w;
102 }
103
104 /**
105 * CH sample variance under random effects model, equation 18-20
106 *
107 * @param
108 * @return
109 */
110 protected double metaRESampleVariance( DoubleArrayList effectSizes ) {
111 return Descriptive.sampleVariance( effectSizes, Descriptive.mean( effectSizes ) );
112 }
113
114 /**
115 * CH estimate of between-studies variance, equation 18-22, for random effects model.
116 *
117 * @param effectSizes
118 * @param variances
119 * @return
120 */
121 // protected double metaREVariance( DoubleArrayList effectSizes,
122 // DoubleArrayList variances ) {
123 // return Math.max( metaRESampleVariance( effectSizes )
124 // - Descriptive.mean( variances ), 0.0 );
125 // }
126 /**
127 * CH equation 18-23. Another estimator of the between-studies variance s<sup>2 </sup> for random effects model.
128 * This is non-zero only if Q is larger than expected under the null hypothesis that the variance is zero.
129 *
130 * <pre>
131 * sˆ2 = [Q - ( k - 1 ) ] / c
132 * </pre>
133 *
134 * where
135 *
136 * <pre>
137 * c = Max(sum_i=1ˆk w_i - [ sum_iˆk w_iˆ2 / sum_iˆk w_i ], 0)
138 * </pre>
139 *
140 * @param effectSizes
141 * @param variances
142 * @param weights
143 * @return
144 */
145 protected double metaREVariance( DoubleArrayList effectSizes, DoubleArrayList variances, DoubleArrayList weights ) {
146
147 if ( !( effectSizes.size() == weights.size() && weights.size() == variances.size() ) )
148 throw new IllegalArgumentException( "Unequal sizes" );
149
150 // the weighted unconditional variance.
151 double q = qStatistic( effectSizes, variances, weightedMean( effectSizes, weights ) );
152
153 double c = Descriptive.sum( weights ) - Descriptive.sumOfSquares( weights ) / Descriptive.sum( weights );
154 return Math.max( ( q - ( effectSizes.size() - 1 ) ) / c, 0.0 );
155 }
156
157 /**
158 * Under a random effects model, CH eqn. 18-24, we replace the conditional variance with the sum of the
159 * between-sample variance and the conditional variance.
160 *
161 * <pre>
162 * v_iˆ* = sigma-hat_thetaˆ2 + v_i.
163 * </pre>
164 *
165 * @param variances Conditional variances
166 * @param sampleVariance estimated...somehow.
167 * @return
168 */
169 protected DoubleArrayList metaREWeights( DoubleArrayList variances, double sampleVariance ) {
170 DoubleArrayList w = new DoubleArrayList( variances.size() );
171
172 for ( int i = 0; i < variances.size(); i++ ) {
173 if ( variances.getQuick( i ) <= 0 ) {
174 throw new IllegalStateException( "Negative or zero variance" );
175 }
176 w.add( 1 / ( variances.getQuick( i ) + sampleVariance ) );
177 }
178
179 return w;
180 }
181
182 /**
183 * CH 18-3. Can be used for fixed or random effects model, the variances just have to computed differently.
184 *
185 * <pre>
186 *
187 * v_dot = 1/sum_i=1ˆk ( 1/v_i)
188 * </pre>
189 *
190 * @param variances
191 * @return
192 */
193 protected double metaVariance( DoubleArrayList variances ) {
194 double var = 0.0;
195 for ( int i = 0; i < variances.size(); i++ ) {
196 var += 1.0 / variances.getQuick( i );
197 }
198 if ( var == 0.0 ) {
199 var = Double.MIN_VALUE;
200 // throw new IllegalStateException( "Variance of zero" );
201 }
202 return 1.0 / var;
203 }
204
205 /**
206 * CH 18-3 version 2 for quality weighted. ( page 266 ) in Fixed effects model.
207 *
208 * <pre>
209 * v_dot = [ sum_i=1ˆk ( q_i ˆ 2 * w_i) ]/[ sum_i=1ˆk q_i * w_i ]ˆ2
210 * </pre>
211 *
212 * @param variances
213 * @return
214 */
215 protected double metaVariance( DoubleArrayList weights, DoubleArrayList qualityIndices ) {
216 double num = 0.0;
217 double denom = 0.0;
218 for ( int i = 0; i < weights.size(); i++ ) {
219 num += Math.pow( weights.getQuick( i ), 2 ) * qualityIndices.getQuick( i );
220 denom += Math.pow( weights.getQuick( i ) * qualityIndices.getQuick( i ), 2 );
221 }
222 if ( denom == 0.0 ) {
223 throw new IllegalStateException( "Attempt to divide by zero." );
224 }
225 return num / denom;
226 }
227
228 /**
229 * Test statistic for H0: effectSize == 0. CH 18-5. For fixed effects model.
230 *
231 * @param metaEffectSize
232 * @param metaVariance
233 * @return
234 */
235 protected double metaZscore( double metaEffectSize, double metaVariance ) {
236 return Math.abs( metaEffectSize ) / Math.sqrt( metaVariance );
237 }
238
239 /**
240 * The "Q" statistic used to test homogeneity of effect sizes. (Cooper and Hedges 18-6)
241 *
242 * @param effectSizes DoubleArrayList
243 * @param variances DoubleArrayList
244 * @param globalMean double
245 * @return double
246 */
247 protected double qStatistic( DoubleArrayList effectSizes, DoubleArrayList variances, double globalMean ) {
248
249 if ( !( effectSizes.size() == variances.size() ) ) throw new IllegalArgumentException( "Unequal sizes" );
250
251 double r = 0.0;
252 for ( int i = 0, n = effectSizes.size(); i < n; i++ ) {
253 r += Math.pow( effectSizes.getQuick( i ) - globalMean, 2.0 ) / variances.getQuick( i );
254 }
255 return r;
256 }
257
258 /**
259 * General formula for weighted mean of effect sizes. Cooper and Hedges 18-1, or HS pg. 100.
260 * <p>
261 * In HS, the weights are simply the sample sizes. For CH, the weights are 1/v for a fixed effect model. Under a
262 * random effects model, we would use 1/(v + v_bs) where v_bs is the between-studies variance.
263 *
264 * @param effectSizes
265 * @param sampleSizes
266 * @return
267 */
268 protected double weightedMean( DoubleArrayList effectSizes, DoubleArrayList weights ) {
269
270 if ( !( effectSizes.size() == weights.size() ) ) throw new IllegalArgumentException( "Unequal sizes" );
271
272 double wm = 0.0;
273 for ( int i = 0; i < effectSizes.size(); i++ ) {
274 wm += weights.getQuick( i ) * effectSizes.getQuick( i );
275 }
276
277 double s = Descriptive.sum( weights );
278
279 if ( s == 0.0 ) return 0.0;
280 return wm /= s;
281 }
282
283 /**
284 * General formula for weighted mean of effect sizes including quality index scores for each value. Cooper and
285 * Hedges 18-1, or HS pg. 100.
286 *
287 * @param effectSizes
288 * @param sampleSizes
289 * @param qualityIndices
290 * @return
291 */
292 protected double weightedMean( DoubleArrayList effectSizes, DoubleArrayList weights, DoubleArrayList qualityIndices ) {
293
294 if ( !( effectSizes.size() == weights.size() && weights.size() == qualityIndices.size() ) )
295 throw new IllegalArgumentException( "Unequal sizes" );
296
297 double wm = 0.0;
298 for ( int i = 0; i < effectSizes.size(); i++ ) {
299 wm += weights.getQuick( i ) * effectSizes.getQuick( i ) * qualityIndices.getQuick( i );
300 }
301 return wm /= Descriptive.sum( weights ) * Descriptive.sum( qualityIndices );
302 }
303 }