1 /*
2 * The baseCode project
3 *
4 * Copyright (c) 2006 University of British Columbia
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 */
19 package ubic.basecode.math;
20
21 import java.util.List;
22
23 /**
24 * Functions for calculating Receiver operator characteristics.
25 *
26 * @author Paul Pavlidis
27 *
28 */
29 public class ROC {
30
31 /**
32 * Calculate area under ROC, up to a given number of False positives. The input is the total number of items in the
33 * data, and the ranks of the positives in the current ranking. LOW ranks are considered better. (e.g., rank 1 is
34 * the 'best')
35 *
36 * @param totalSize
37 * @param ranks LOW ranks are considered better. (e.g., rank 1 is the 'best')
38 * @return AROC
39 */
40 public static double aroc( int totalSize, List<Double> ranks ) {
41
42 double sumOfRanks = 0.0;
43 for ( Double r : ranks ) {
44 if ( r == 0.0 ) throw new IllegalArgumentException( "Ranks must be one-based" );
45 sumOfRanks += r; // ranks are 1-based.
46 }
47
48 Long inGroup = new Long( ranks.size() );
49 Long outGroup = totalSize - inGroup;
50
51 Double t1 = inGroup * ( inGroup + 1.0 ) / 2.0;
52
53 Long t2 = inGroup * outGroup;
54
55 assert t2 > 0;
56
57 Double t3 = sumOfRanks - t1;
58
59 Double auc = Math.max( 0.0, 1.0 - t3 / t2 );
60
61 assert auc >= 0.0 && auc <= 1.0 : "AUC was " + auc + " t1=" + t1 + "t2=" + t2 + " t3=" + t3;
62
63 return auc;
64
65 }
66
67 /**
68 * For an AROC value, calculates a p value. Uses fact that ROC is equivalent to the Wilcoxon rank sum test.
69 *
70 * @param numpos How many positives are in the data.
71 * @param Ranks of objects in the class, where low ranks are considered better. (one-based)
72 * @return The p value.
73 */
74 public static double rocpval( int totalSize, List<Double> ranks ) {
75 if ( totalSize == 0 && ( ranks == null || ranks.size() == 0 ) ) return 1.0;
76 return Wilcoxon.wilcoxonP( totalSize, ranks );
77 }
78
79 }