1 /*
2 * The baseCode project
3 *
4 * Copyright (c) 2006 University of British Columbia
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 */
19 package ubic.basecode.math;
20
21 import java.util.ArrayList;
22
23 import cern.colt.list.DoubleArrayList;
24 import cern.colt.list.IntArrayList;
25 import cern.colt.matrix.DoubleMatrix1D;
26 import cern.colt.matrix.impl.DenseDoubleMatrix1D;
27
28 /**
29 * Methods for p-value correction of sets of hypothesis tests.
30 * <p>
31 * FIXME make this API more consistent.
32 *
33 * @author Paul Pavlidis
34 *
35 */
36 public class MultipleTestCorrection {
37
38 /**
39 * @param pvalues
40 * @return false discovery rates (FDR) computed using the method of Benjamini and Hochberg. The order is the same as
41 * the original pvalues (that is, each value fdr[i] corresponds to the pvaluesp[i].
42 */
43 public static DoubleArrayList benjaminiHochberg( DoubleArrayList pvalues ) {
44 int nump = pvalues.size();
45 int n = nump;
46
47 IntArrayList order = Rank.order( pvalues );
48
49 DoubleMatrix1D tmp = new DenseDoubleMatrix1D( nump );
50
51 DoubleArrayList sorted = pvalues.copy();
52 sorted.sort();
53
54 double previous = 1.0;
55 for ( int i = sorted.size() - 1; i >= 0; i-- ) {
56 double pval = sorted.get( i );
57 // never let the qvalue increase.
58 double qval = Math.min( pval * nump / n, previous );
59 tmp.set( i, qval );
60 previous = qval;
61 n--;
62 }
63 DoubleArrayList results = new DoubleArrayList( nump );
64 for ( int i = 0; i < nump; i++ ) {
65 results.add( 0.0 );
66 }
67 for ( int i = 0; i < nump; i++ ) {
68 results.set( order.get( i ), tmp.get( i ) );
69 }
70
71 return results;
72 }
73
74 /**
75 * @param pvalues; can contain missing values or invalid pvalues (outside range [0-1]), which are ignored.
76 * @return false discovery rates (FDRs) computed using the method of Benjamini and Hochberg; or null if they could
77 * not be computed. The order is the same as the original pvalues (that is, each value fdr[i] corresponds to
78 * the pvaluesp[i].
79 */
80 public static DoubleMatrix1D benjaminiHochberg( DoubleMatrix1D pvalues ) {
81 double[] qvalues = new double[pvalues.size()];
82
83 /* Create a list with only the p-values that are not Double.NaN */
84 ArrayList<Double> pvaluesList = new ArrayList<Double>();
85 for ( int i = 0; i < pvalues.size(); i++ ) {
86 qvalues[i] = Double.NaN; // initialize.
87
88 Double pvalue = pvalues.getQuick( i );
89 if ( pvalue < 0.0 || pvalue > 1.0 || Double.isNaN( pvalue ) ) continue;
90 pvaluesList.add( pvalue );
91 }
92
93 if ( pvaluesList.isEmpty() ) {
94 return null;
95 }
96
97 /* convert to primitive array */
98 double[] pvaluesToUse = new double[pvaluesList.size()];
99 int j = 0;
100 for ( Double d : pvaluesList ) {
101 pvaluesToUse[j] = d;
102 j++;
103 }
104
105 DoubleArrayList r = benjaminiHochberg( new DoubleArrayList( pvaluesToUse ) );
106
107 /* Add the Double.NaN back in */
108 int k = 0;
109 for ( int i = 0; i < qvalues.length; i++ ) {
110 Double pvalue = pvalues.get( i );
111 if ( pvalue < 0.0 || pvalue > 1.0 || Double.isNaN( pvalue ) ) {
112 qvalues[i] = Double.NaN;
113 } else {
114 qvalues[i] = r.get( k );
115 k++;
116 }
117 }
118
119 return new DenseDoubleMatrix1D( qvalues );
120 }
121
122 /**
123 * Benjamini-Hochberg method. Determines the maximum p value to maintain the false discovery rate. (Assuming pvalues
124 * are independent);
125 *
126 * @param pvalues list of pvalues. Need not be sorted.
127 * @param fdr false discovery rate (value q in B-H).
128 * @return The maximum pvalue that maintains the false discovery rate
129 * @throws IllegalArgumentException if invalid pvalues are encountered.
130 */
131 public static double benjaminiHochbergCut( DoubleArrayList pvalues, double fdr ) {
132 int numpvals = pvalues.size();
133 DoubleArrayList pvalcop = pvalues.copy();
134 pvalcop.sort();
135
136 for ( int i = numpvals - 1; i >= 0; i-- ) {
137
138 double p = pvalcop.get( i );
139 if ( p < 0.0 || p > 1.0 ) throw new IllegalArgumentException( "p-value must be in range [0,1]" );
140
141 double thresh = fdr * i / numpvals;
142 if ( p < thresh ) {
143 return p;
144 }
145 }
146 return 0.0;
147 }
148
149 /**
150 * Benjamini-Yekuteli method. Determines the maximum p value to maintain the false discovery rate. Valid under
151 * dependence of the pvalues. This method is more conservative than Benjamini-Hochberg.
152 *
153 * @param pvalues list of pvalues. Need not be sorted.
154 * @param fdr false discovery rate (value q in B-H).
155 * @return The maximum pvalue that maintains the false discovery rate
156 * @throws IllegalArgumentException if invalid pvalues are encountered.
157 */
158 public static double BenjaminiYekuteliCut( DoubleArrayList pvalues, double fdr ) {
159
160 int numpvals = pvalues.size();
161 DoubleArrayList pvalcop = pvalues.copy();
162 pvalcop.sort();
163
164 // as per Theorem 1.3 in paper.
165 // qmod replaces q (fdr) in the method
166 double qmod = 0.0;
167 for ( int i = 1; i <= numpvals; i++ ) {
168 qmod += 1.0 / i;
169 }
170 qmod = fdr / qmod;
171
172 for ( int i = numpvals - 1; i >= 0; i-- ) {
173
174 double p = pvalcop.get( i );
175 if ( p < 0.0 || p > 1.0 ) {
176 throw new IllegalArgumentException( "p-value must be in range [0,1], got: " + p );
177 }
178
179 double thresh = qmod * i / numpvals;
180 if ( p < thresh ) {
181 return p;
182 }
183 }
184 return 0.0;
185 }
186
187 /**
188 * Determine the Bonferroni pvalue threshold to maintain the family wise error rate (assuming pvalues are
189 * independent).
190 *
191 * @param pvalues The pvalues
192 * @param fwe The family wise error rate
193 * @return The minimum pvalue that maintains the FWE
194 */
195 public static double BonferroniCut( DoubleArrayList pvalues, double fwe ) {
196 int numpvals = pvalues.size();
197 return fwe / numpvals;
198 }
199
200 }