View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.math;
20  
21  import java.util.ArrayList;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.Iterator;
25  import java.util.List;
26  import java.util.Map;
27  
28  import cern.colt.list.DoubleArrayList;
29  import cern.colt.list.IntArrayList;
30  import cern.colt.list.ObjectArrayList;
31  
32  /**
33   * Calculate rank statistics for arrays.
34   * 
35   * @author Paul Pavlidis
36   * 
37   */
38  public class Rank {
39  
40      /**
41       * Return a permutation which puts the array in sorted order. In other words, the values returned indicate the
42       * positions of the sorted values in the <em>original</em> array (the lowest value has the lowest rank, but it could
43       * be located anywhere in the array). Indexes start from 0. Tied values are put in an arbitrary ordering.
44       * 
45       * @param array
46       * @return
47       */
48      public static IntArrayList order( DoubleArrayList array ) {
49          int size = array.size();
50          IntArrayList result = new IntArrayList( new int[size] );
51  
52          ObjectArrayList ranks = new ObjectArrayList( size );
53          for ( int i = 0; i < size; i++ ) {
54              RankData<Double> rd = new RankData<>( i, array.get( i ) );
55              ranks.add( rd );
56          }
57          ranks.sort();
58  
59          for ( int i = 0; i < size; i++ ) {
60              @SuppressWarnings("unchecked")
61              RankData<Double> rd = ( RankData<Double> ) ranks.getQuick( i );
62              result.setQuick( i, rd.getIndex() );
63          }
64          return result;
65      }
66  
67      /**
68       * @param ranks
69       * @return
70       */
71      public static long rankSum( List<Double> ranks ) {
72          double sum = 0.0;
73          for ( Double rank : ranks ) {
74              sum += rank;
75          }
76          return ( long ) sum;
77      }
78  
79      /**
80       * Rank transform an array. The ranks are constructed based on the sort order of the elements. That is, low values
81       * get low numbered ranks starting from 1.
82       * <p>
83       * Ties are resolved by assigning the average rank for tied values. For example, instead of arbitrarily assigning
84       * ties ranks 3,4,5, all three values would get a rank of 4 and no value would get a rank of 3 or 5.
85       * <p>
86       * Missing values are sorted in their natural order, which means they end up all at one end (at the high ('bad')
87       * end)
88       * 
89       * @param array DoubleArrayList
90       * @return cern.colt.list.DoubleArrayList
91       * @param array, or null if the ranks could not be computed.
92       * @return
93       */
94      public static DoubleArrayList rankTransform( DoubleArrayList array ) {
95          return rankTransform( array, false );
96      }
97  
98      /**
99       * Rank transform an array. The ranks are constructed based on the sort order of the elements. That is, low values
100      * get low numbered ranks starting from 1, unless you set descending = true.
101      * <p>
102      * Ties are resolved by assigning the average rank for tied values. For example, instead of arbitrarily assigning
103      * ties ranks 3,4,5, all three values would get a rank of 4 and no value would get a rank of 3 or 5.
104      * <p>
105      * Missing values are sorted in their natural order, which means they end up all at one end (at the high ('bad')
106      * end)
107      * 
108      * @param array DoubleArrayList
109      * @param descending - reverse the usual ordering so larger values are the the front.
110      * @return cern.colt.list.DoubleArrayList, or null if the input is empty or null.
111      */
112     public static DoubleArrayList rankTransform( DoubleArrayList array, boolean descending ) {
113         if ( array == null ) {
114             return null;
115         }
116 
117         int size = array.size();
118         if ( size == 0 ) {
119             return null;
120         }
121 
122         List<RankData<Double>> ranks = new ArrayList<RankData<Double>>( size );
123         DoubleArrayList result = new DoubleArrayList( new double[size] );
124 
125         // store the values with their indices - not sorted yet.
126         for ( int i = 0; i < size; i++ ) {
127             double v = array.get( i );
128 
129             if ( descending ) {
130                 v = -v;
131             }
132             RankData<Double> rd = new RankData<Double>( i, v );
133             // if ( Double.isNaN( v ) ) throw new IllegalArgumentException( "Missing values are not tolerated." );
134             ranks.add( rd );
135         }
136 
137         Collections.sort( ranks );
138 
139         // fill in the results. We iterate over the ranks by order.
140         Double prevVal = null;
141         int rank = 1;
142         int nominalRank = 1; // rank we'd have if no ties.
143         for ( int i = 0; i < size; i++ ) {
144             RankData<Double> rankData = ranks.get( i );
145             int index = rankData.getIndex();
146             Double val = rankData.getValue();
147 
148             result.set( index, nominalRank ); // might not keep.
149 
150             // only bump up ranks if we're not tied with the last one.
151             if ( prevVal != null && !val.equals( prevVal ) ) {
152                 rank = nominalRank;
153             } else {
154                 // tied. Do not advance the rank.
155                 result.set( index, rank );
156             }
157 
158             prevVal = val;
159             nominalRank++;
160         }
161 
162         // At this point we may have repeated ranks.
163 
164         fixTies( result, ranks );
165 
166         return result;
167     }
168 
169     /**
170      * Rank transform a map, where the values are Comparable values we wish to rank. Ties are broken as for the other
171      * methods. Ranks are zero-based
172      * <p>
173      * Missing values are sorted in their natural order, which means they end up all at one end (at the high ('bad')
174      * end)
175      * 
176      * @param m java.util.Map with keys Objects, values Doubles.
177      * @return A java.util.Map keys=old keys, values=java.lang.Double rank of the key. Non-integer values mean tie
178      *         splits.
179      */
180     public static <K> Map<K, Double> rankTransform( Map<K, ? extends Comparable<?>> m ) {
181         return rankTransform( m, false );
182     }
183 
184     /**
185      * Ties are broken as for the other methods. CAUTION - ranks start at 0.
186      * <p>
187      * Missing values are sorted in their natural order, which means they end up all at one end (at the high ('bad')
188      * end)
189      * 
190      * @param <K>
191      * @param m
192      * @param desc if true, the lowest (first) rank will be for the highest value.
193      * @return A java.util.Map keys=old keys, values=java.lang.Double rank of the key. Non-integer values mean tie
194      *         splits.
195      */
196     public static <K> Map<K, Double> rankTransform( Map<K, ? extends Comparable<?>> m, boolean desc ) {
197 
198         List<KeyAndValueData<K>> values = new ArrayList<KeyAndValueData<K>>();
199 
200         for ( Iterator<K> itr = m.keySet().iterator(); itr.hasNext(); ) {
201 
202             K key = itr.next();
203             Comparable<?> val = m.get( key );
204 
205             values.add( new KeyAndValueData<K>( 0, key, val ) );
206         }
207 
208         return rankTransform( m, desc, values );
209     }
210 
211     /**
212      * @param ranksWithTies
213      */
214     private static void fixTies( DoubleArrayList ranksWithTies, List<? extends RankData<?>> ranks ) {
215 
216         int numties = 1;
217         Double prev = null;
218         int i = 0;
219         // iterate over in order of the values.
220         for ( int j = ranksWithTies.size(); i < j; i++ ) {
221             RankData<?> rankData = ranks.get( i );
222             int index = rankData.getIndex();
223             double rank = ranksWithTies.getQuick( index );
224 
225             if ( prev != null ) {
226                 if ( rank == prev.doubleValue() ) {
227                     // record how many ties we have seen
228                     numties++;
229                     // System.err.println( "Tied rank: " + rank );
230                 } else if ( numties > 1 ) {
231                     // we're past the batch of ties and need to adjust them
232                     for ( int k = 1; k <= numties; k++ ) {
233                         int indexOfTied = ranks.get( i - k ).getIndex(); // original rank?
234                         double rawRankInTie = ranksWithTies.getQuick( indexOfTied );
235                         double meanRank = meanRank( rawRankInTie, numties );
236                         ranksWithTies.setQuick( indexOfTied, meanRank );
237                     }
238                     numties = 1; // reset
239                 }
240                 // no tie, nothing needs to be changed.
241             }
242             prev = rank;
243         }
244 
245         // cleanup the end of the array if there were ties there.
246         if ( numties > 1 ) {
247             for ( int k = 1; k <= numties; k++ ) {
248                 int indexOfTied = ( ( RankData<?> ) ranks.get( i - k ) ).getIndex();
249                 double rawRankInTie = ranksWithTies.getQuick( indexOfTied );
250                 double meanRank = meanRank( rawRankInTie, numties );
251                 ranksWithTies.setQuick( indexOfTied, meanRank );
252             }
253         }
254 
255     }
256 
257     /**
258      * @param <K>
259      * @param ranksWithTies
260      * @param ranks
261      */
262     private static <K> void fixTies( Map<K, Double> ranksWithTies, List<KeyAndValueData<K>> ranks ) {
263         int numties = 1;
264         Double prev = null;
265         int i = 0;
266         // iterate over in order of the values.
267         for ( int j = ranksWithTies.size(); i < j; i++ ) {
268             KeyAndValueData<K> rankData = ranks.get( i );
269             double rank = ranksWithTies.get( rankData.getKey() );
270             if ( prev != null ) {
271                 if ( rank == prev.doubleValue() ) {
272                     // record how many ties we have seen
273                     numties++;
274                     // System.err.println( "Tied rank: " + rank );
275                 } else if ( numties > 1 ) {
276                     // we're past the batch of ties and need to adjust them
277                     for ( int k = 1; k <= numties; k++ ) {
278                         K indexOfTied = ranks.get( i - k ).getKey(); // original key
279                         double rawRankInTie = ranksWithTies.get( indexOfTied );
280                         double meanRank = meanRank( rawRankInTie, numties );
281                         ranksWithTies.put( indexOfTied, meanRank );
282                     }
283                     numties = 1; // reset
284                 }
285                 // no tie, nothing needs to be changed.
286             }
287             prev = rank;
288         }
289 
290         // cleanup the end of the array if there were ties there.
291         if ( numties > 1 ) {
292             for ( int k = 1; k <= numties; k++ ) {
293                 K indexOfTied = ranks.get( i - k ).getKey();
294                 double rawRankInTie = ranksWithTies.get( indexOfTied );
295                 double meanRank = meanRank( rawRankInTie, numties );
296                 ranksWithTies.put( indexOfTied, meanRank );
297             }
298         }
299     }
300 
301     /**
302      * @param rawRank
303      * @param numties
304      * @return
305      */
306     private static double meanRank( double rawRank, int numties ) {
307         double total = 0;
308         for ( int i = 0; i < numties; i++ ) {
309             total = total + rawRank + i;
310         }
311         return total / numties;
312     }
313 
314     /**
315      * @param m
316      * @param desc
317      * @param values this gets sorted.
318      * @return A java.util.Map keys=old keys, values=java.lang.Double rank of the key. Non-integer values mean tie
319      *         splits.
320      */
321     @SuppressWarnings("unchecked")
322     private static <K> Map<K, Double> rankTransform( Map<K, ? extends Comparable<?>> m, boolean desc,
323             List<KeyAndValueData<K>> values ) {
324 
325         Collections.sort( values );
326         Map<K, Double> result = new HashMap<K, Double>();
327 
328         Double rank = 0.0;
329         Comparable<K> prevVal = null;
330         Double nominalRank = 0.0; // rank we'd have if no ties.
331         for ( int i = 0; i < m.size(); i++ ) {
332             result.put( values.get( i ).getKey(), ( double ) nominalRank ); // might not keep.
333 
334             Comparable<K> val = values.get( i ).getValue();
335             // only bump up ranks if we're not tied with the last one.
336             if ( prevVal != null && !val.equals( prevVal ) ) {
337                 rank = nominalRank;
338             } else {
339                 // tied. Do not advance the rank.
340                 result.put( values.get( i ).getKey(), rank );
341             }
342 
343             prevVal = val;
344             nominalRank++;
345 
346         }
347 
348         fixTies( result, values );
349 
350         if ( desc ) {
351             /*
352              * Reverse all the values.
353              */
354             Map<K, Double> finalResult = new HashMap<K, Double>();
355             double mr = result.size();
356             for ( K k : result.keySet() ) {
357                 double d = result.get( k );
358                 finalResult.put( k, mr - d - 1.0 );
359             }
360             return finalResult;
361         }
362 
363         return result;
364     }
365 }
366 
367 /*
368  * Helper class for rankTransform map.
369  */
370 @SuppressWarnings({ "unchecked", "rawtypes" })
371 class KeyAndValueData<K> extends RankData {
372     private K key;
373 
374     public KeyAndValueData( int index, K id, Comparable<?> v ) {
375         super( index, v );
376         this.key = id;
377     }
378 
379     public int compareTo( KeyAndValueData<K> other ) {
380         return this.value.compareTo( other.getValue() );
381     }
382 
383     public K getKey() {
384         return key;
385     }
386 }
387 
388 /*
389  * Helper class for rankTransform .
390  */
391 class RankData<C extends Comparable<C>> implements Comparable<RankData<C>> {
392 
393     int index = 0;
394 
395     C value = null;
396 
397     public RankData( int tindex, C tvalue ) {
398         index = tindex;
399         value = tvalue;
400     }
401 
402     @Override
403     public int compareTo( RankData<C> other ) {
404         return this.value.compareTo( other.getValue() );
405     }
406 
407     @SuppressWarnings("unchecked")
408     @Override
409     public boolean equals( Object obj ) {
410         if ( obj == null ) return false;
411         if ( !( obj instanceof RankData ) ) return false;
412         return this.value.equals( ( ( RankData<C> ) obj ).getValue() );
413     }
414 
415     public int getIndex() {
416         return index;
417     }
418 
419     public C getValue() {
420         return value;
421     }
422 
423     @Override
424     public int hashCode() {
425         if ( value == null ) return 1;
426         return value.hashCode();
427     }
428 
429     @Override
430     public String toString() {
431         return "Index=" + index + " value=" + value;
432     }
433 }