View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.math;
20  
21  import java.util.ArrayList;
22  
23  import cern.colt.list.DoubleArrayList;
24  import cern.colt.list.IntArrayList;
25  import cern.colt.matrix.DoubleMatrix1D;
26  import cern.colt.matrix.impl.DenseDoubleMatrix1D;
27  
28  /**
29   * Methods for p-value correction of sets of hypothesis tests.
30   * <p>
31   * FIXME make this API more consistent.
32   * 
33   * @author Paul Pavlidis
34   * 
35   */
36  public class MultipleTestCorrection {
37  
38      /**
39       * @param pvalues
40       * @return false discovery rates (FDR) computed using the method of Benjamini and Hochberg. The order is the same as
41       *         the original pvalues (that is, each value fdr[i] corresponds to the pvaluesp[i].
42       */
43      public static DoubleArrayList benjaminiHochberg( DoubleArrayList pvalues ) {
44          int nump = pvalues.size();
45          int n = nump;
46  
47          IntArrayList order = Rank.order( pvalues );
48  
49          DoubleMatrix1D tmp = new DenseDoubleMatrix1D( nump );
50  
51          DoubleArrayList sorted = pvalues.copy();
52          sorted.sort();
53  
54          double previous = 1.0;
55          for ( int i = sorted.size() - 1; i >= 0; i-- ) {
56              double pval = sorted.get( i );
57              // never let the qvalue increase.
58              double qval = Math.min( pval * nump / n, previous );
59              tmp.set( i, qval );
60              previous = qval;
61              n--;
62          }
63          DoubleArrayList results = new DoubleArrayList( nump );
64          for ( int i = 0; i < nump; i++ ) {
65              results.add( 0.0 );
66          }
67          for ( int i = 0; i < nump; i++ ) {
68              results.set( order.get( i ), tmp.get( i ) );
69          }
70  
71          return results;
72      }
73  
74      /**
75       * @param pvalues; can contain missing values or invalid pvalues (outside range [0-1]), which are ignored.
76       * @return false discovery rates (FDRs) computed using the method of Benjamini and Hochberg; or null if they could
77       *         not be computed. The order is the same as the original pvalues (that is, each value fdr[i] corresponds to
78       *         the pvaluesp[i].
79       */
80      public static DoubleMatrix1D benjaminiHochberg( DoubleMatrix1D pvalues ) {
81          double[] qvalues = new double[pvalues.size()];
82  
83          /* Create a list with only the p-values that are not Double.NaN */
84          ArrayList<Double> pvaluesList = new ArrayList<Double>();
85          for ( int i = 0; i < pvalues.size(); i++ ) {
86              qvalues[i] = Double.NaN; // initialize.
87  
88              Double pvalue = pvalues.getQuick( i );
89              if ( pvalue < 0.0 || pvalue > 1.0 || Double.isNaN( pvalue ) ) continue;
90              pvaluesList.add( pvalue );
91          }
92  
93          if ( pvaluesList.isEmpty() ) {
94              return null;
95          }
96  
97          /* convert to primitive array */
98          double[] pvaluesToUse = new double[pvaluesList.size()];
99          int j = 0;
100         for ( Double d : pvaluesList ) {
101             pvaluesToUse[j] = d;
102             j++;
103         }
104 
105         DoubleArrayList r = benjaminiHochberg( new DoubleArrayList( pvaluesToUse ) );
106 
107         /* Add the Double.NaN back in */
108         int k = 0;
109         for ( int i = 0; i < qvalues.length; i++ ) {
110             Double pvalue = pvalues.get( i );
111             if ( pvalue < 0.0 || pvalue > 1.0 || Double.isNaN( pvalue ) ) {
112                 qvalues[i] = Double.NaN;
113             } else {
114                 qvalues[i] = r.get( k );
115                 k++;
116             }
117         }
118 
119         return new DenseDoubleMatrix1D( qvalues );
120     }
121 
122     /**
123      * Benjamini-Hochberg method. Determines the maximum p value to maintain the false discovery rate. (Assuming pvalues
124      * are independent);
125      * 
126      * @param pvalues list of pvalues. Need not be sorted.
127      * @param fdr false discovery rate (value q in B-H).
128      * @return The maximum pvalue that maintains the false discovery rate
129      * @throws IllegalArgumentException if invalid pvalues are encountered.
130      */
131     public static double benjaminiHochbergCut( DoubleArrayList pvalues, double fdr ) {
132         int numpvals = pvalues.size();
133         DoubleArrayList pvalcop = pvalues.copy();
134         pvalcop.sort();
135 
136         for ( int i = numpvals - 1; i >= 0; i-- ) {
137 
138             double p = pvalcop.get( i );
139             if ( p < 0.0 || p > 1.0 ) throw new IllegalArgumentException( "p-value must be in range [0,1]" );
140 
141             double thresh = fdr * i / numpvals;
142             if ( p < thresh ) {
143                 return p;
144             }
145         }
146         return 0.0;
147     }
148 
149     /**
150      * Benjamini-Yekuteli method. Determines the maximum p value to maintain the false discovery rate. Valid under
151      * dependence of the pvalues. This method is more conservative than Benjamini-Hochberg.
152      * 
153      * @param pvalues list of pvalues. Need not be sorted.
154      * @param fdr false discovery rate (value q in B-H).
155      * @return The maximum pvalue that maintains the false discovery rate
156      * @throws IllegalArgumentException if invalid pvalues are encountered.
157      */
158     public static double BenjaminiYekuteliCut( DoubleArrayList pvalues, double fdr ) {
159 
160         int numpvals = pvalues.size();
161         DoubleArrayList pvalcop = pvalues.copy();
162         pvalcop.sort();
163 
164         // as per Theorem 1.3 in paper.
165         // qmod replaces q (fdr) in the method
166         double qmod = 0.0;
167         for ( int i = 1; i <= numpvals; i++ ) {
168             qmod += 1.0 / i;
169         }
170         qmod = fdr / qmod;
171 
172         for ( int i = numpvals - 1; i >= 0; i-- ) {
173 
174             double p = pvalcop.get( i );
175             if ( p < 0.0 || p > 1.0 ) {
176                 throw new IllegalArgumentException( "p-value must be in range [0,1], got: " + p );
177             }
178 
179             double thresh = qmod * i / numpvals;
180             if ( p < thresh ) {
181                 return p;
182             }
183         }
184         return 0.0;
185     }
186 
187     /**
188      * Determine the Bonferroni pvalue threshold to maintain the family wise error rate (assuming pvalues are
189      * independent).
190      * 
191      * @param pvalues The pvalues
192      * @param fwe The family wise error rate
193      * @return The minimum pvalue that maintains the FWE
194      */
195     public static double BonferroniCut( DoubleArrayList pvalues, double fwe ) {
196         int numpvals = pvalues.size();
197         return fwe / numpvals;
198     }
199 
200 }