View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2006 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.math;
20  
21  import cern.colt.list.DoubleArrayList;
22  import cern.jet.stat.Descriptive;
23  
24  /**
25   * Mathematical functions for statistics that allow missing values without scotching the calculations.
26   * <p>
27   * Some functions that come with DoubleArrayLists will not work in an entirely compatible way with missing values. For
28   * examples, size() reports the total number of elements, including missing values. To get a count of non-missing
29   * values, use this.sizeWithoutMissingValues(). The right one to use may vary.
30   * </p>
31   * <p>
32   * Not all methods need to be overridden. However, all methods that take a "size" parameter should be passed the results
33   * of sizeWithoutMissingValues(data), instead of data.size().
34   * <p>
35   * Based in part on code from the colt package: Copyright &copy; 1999 CERN - European Organization for Nuclear Research.
36   *
37   * @author Paul Pavlidis
38   * @see <a
39   * href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/Descriptive.html">cern.jet.stat.Descriptive
40   * </a>
41   */
42  public class DescriptiveWithMissing {
43  
44      /**
45       * Highly optimized version of the correlation computation, where as much information is precomputed as possible.
46       * Use of this method only makes sense if many comparisons with the inputs x and y are being performed.
47       * <p>
48       * Implementation note: In correlation(DoubleArrayList x, DoubleArrayList y), profiling shows that calls to
49       * Double.NaN consume half the CPU time. The precomputation of the element-by-element squared values is another
50       * obvious optimization. There is also no checking for matching lengths of the arrays.
51       *
52       * @param x            double array containing values of x_i for each X.
53       * @param y            double array containing values of y_i for each Y.
54       * @param selfSquaredX double array containing values of x_i^2 for each X.
55       * @param selfSquaredY double array containing values of y_i^2 for each Y
56       * @param nanStatusX   boolean array containing value of {@link Double#isNaN(double)} for each X.
57       * @param nanStatusY   boolean array containing value of {@link Double#isNaN(double)} for each Y.
58       * @return
59       */
60      public static double correlation( double[] x, double[] y, double[] selfSquaredX, double[] selfSquaredY, boolean[] nanStatusX, boolean[] nanStatusY ) {
61          double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
62          int numused = 0;
63          syy = 0.0;
64          sxy = 0.0;
65          sxx = 0.0;
66          sx = 0.0;
67          sy = 0.0;
68  
69          int length = x.length;
70  
71          if ( y.length != length || selfSquaredX.length != length || selfSquaredY.length != length || nanStatusX.length != length || nanStatusY.length != length ) {
72              throw new ArithmeticException();
73          }
74  
75          for ( int j = 0; j < length; j++ ) {
76              xj = x[j];
77              yj = y[j];
78  
79              if ( nanStatusX[j] || nanStatusY[j] ) {
80                  continue;
81              }
82              sx += xj;
83              sy += yj;
84              sxy += xj * yj;
85              sxx += selfSquaredX[j];
86              syy += selfSquaredY[j];
87              numused++;
88          }
89  
90          if ( numused > 0 ) {
91              ay = sy / numused;
92              ax = sx / numused;
93              return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
94          }
95          return Double.NaN; // signifies that it could not be calculated.
96      }
97  
98      /**
99       * Calculate the pearson correlation of two arrays. Missing values (NaNs) are ignored.
100      *
101      * @param x DoubleArrayList
102      * @param y DoubleArrayList
103      * @return double
104      */
105     public static double correlation( DoubleArrayList x, DoubleArrayList y ) {
106         int j;
107         double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
108         int numused;
109         syy = 0.0;
110         sxy = 0.0;
111         sxx = 0.0;
112         sx = 0.0;
113         sy = 0.0;
114         numused = 0;
115         if ( x.size() != y.size() ) {
116             throw new ArithmeticException( "Unequal vector sizes: " + x.size() + " != " + y.size() );
117         }
118 
119         double[] xel = x.elements();
120         double[] yel = y.elements();
121 
122         int length = x.size();
123         for ( j = 0; j < length; j++ ) {
124             xj = xel[j];
125             yj = yel[j];
126 
127             if ( !Double.isNaN( xj ) && !Double.isNaN( yj ) ) {
128                 sx += xj;
129                 sy += yj;
130                 sxy += xj * yj;
131                 sxx += xj * xj;
132                 syy += yj * yj;
133                 numused++;
134             }
135         }
136 
137         if ( numused > 0 ) {
138             ay = sy / numused;
139             ax = sx / numused;
140             return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
141         }
142 
143         return Double.NaN; // signifies that it could not be calculated.
144     }
145 
146     /**
147      * Returns the SAMPLE covariance of two data sequences. Pairs of values are only considered if both are not NaN. If
148      * there are no non-missing pairs, the covariance is zero.
149      *
150      * @param data1 the first vector
151      * @param data2 the second vector
152      * @return double
153      */
154     public static double covariance( DoubleArrayList data1, DoubleArrayList data2 ) {
155         int size = data1.size();
156         if ( size != data2.size() || size == 0 ) {
157             throw new IllegalArgumentException();
158         }
159         double[] elements1 = data1.elements();
160         double[] elements2 = data2.elements();
161 
162         /* initialize sumx and sumy to the first non-NaN pair of values */
163 
164         int i = 0;
165         double sumx = 0.0, sumy = 0.0, Sxy = 0.0;
166         while ( i < size ) {
167             sumx = elements1[i];
168             sumy = elements2[i];
169             if ( !Double.isNaN( elements1[i] ) && !Double.isNaN( elements2[i] ) ) {
170                 break;
171             }
172             i++;
173         }
174         i++;
175         int usedPairs = 1;
176         for ( ; i < size; ++i ) {
177             double x = elements1[i];
178             double y = elements2[i];
179             if ( Double.isNaN( x ) || Double.isNaN( y ) ) {
180                 continue;
181             }
182 
183             sumx += x;
184             Sxy += ( x - sumx / ( usedPairs + 1 ) ) * ( y - sumy / usedPairs );
185             sumy += y;
186             usedPairs++;
187         }
188         return Sxy / ( usedPairs - 1 );
189     }
190 
191     /**
192      * Durbin-Watson computation. This measures the serial correlation in a data series.
193      *
194      * @param data DoubleArrayList
195      * @return double
196      * @todo this will still break in some situations where there are missing values
197      */
198     public static double durbinWatson( DoubleArrayList data ) {
199         int size = data.size();
200         if ( size < 2 ) {
201             throw new IllegalArgumentException( "data sequence must contain at least two values." );
202         }
203 
204         double[] elements = data.elements();
205         double run = 0;
206         double run_sq = 0;
207         int firstNotNaN = 0;
208         while ( firstNotNaN < size ) {
209             run_sq = elements[firstNotNaN] * elements[firstNotNaN];
210 
211             if ( !Double.isNaN( elements[firstNotNaN] ) ) {
212                 break;
213             }
214 
215             firstNotNaN++;
216         }
217 
218         if ( firstNotNaN > 0 && size - firstNotNaN < 2 ) {
219             throw new IllegalArgumentException( "data sequence must contain at least two non-missing values." );
220 
221         }
222 
223         for ( int i = firstNotNaN + 1; i < size; i++ ) {
224             int gap = 1;
225             while ( i < size && Double.isNaN( elements[i] ) ) {
226                 gap++;
227                 i++;
228                 continue;
229             }
230             if ( i >= size ) continue; // missing value at end will cause this.
231             double x = elements[i] - elements[i - gap];
232             /**  */
233             run += x * x;
234             run_sq += elements[i] * elements[i];
235         }
236 
237         return run / run_sq;
238     }
239 
240     /**
241      * Returns the geometric mean of a data sequence. Missing values are ignored. Note that for a geometric mean to be
242      * meaningful, the minimum of the data sequence must not be less or equal to zero. <br>
243      * The geometric mean is given by <tt>pow( Product( data[i] ),
244      * 1/data.size())</tt>. This method tries to avoid overflows at the expense of an equivalent but somewhat slow
245      * definition: <tt>geo = Math.exp( Sum(
246      * Log(data[i]) ) / data.size())</tt>.
247      *
248      * @param data DoubleArrayList
249      * @return double
250      */
251     public static double geometricMean( DoubleArrayList data ) {
252         return Descriptive.geometricMean( sizeWithoutMissingValues( data ), sumOfLogarithms( data, 0, data.size() - 1 ) );
253     }
254 
255     /**
256      * Returns the kurtosis (aka excess) of a data sequence, which is <tt>-3 +
257      * moment(data,4,mean) / standardDeviation<sup>4</sup></tt>.
258      *
259      * @param data              DoubleArrayList
260      * @param mean              double
261      * @param standardDeviation double
262      * @return double
263      */
264     public static double kurtosis( DoubleArrayList data, double mean, double standardDeviation ) {
265         return Descriptive.kurtosis( moment( data, 4, mean ), standardDeviation );
266     }
267 
268     /**
269      * Returns the median absolute deviation from the median.
270      *
271      * @param data the data, does not have to be sorted
272      */
273     public static double mad( DoubleArrayList data ) {
274         data = data.copy();
275         sortAndRemoveMissing( data );
276         double median = Descriptive.median( data );
277         DoubleArrayList devsum = new DoubleArrayList( data.size() );
278         for ( int i = 0; i < data.size(); i++ ) {
279             double v = data.getQuick( i );
280             devsum.add( Math.abs( v - median ) );
281         }
282         devsum.sort();
283         return Descriptive.median( devsum );
284     }
285 
286     public static double max( DoubleArrayList input ) {
287         int size = input.size();
288         if ( size == 0 ) throw new IllegalArgumentException();
289 
290         double[] elements = input.elements();
291         double max = Double.MIN_VALUE;
292         for ( int i = 0; i < size; i++ ) {
293             if ( Double.isNaN( elements[i] ) ) continue;
294             if ( elements[i] > max ) max = elements[i];
295         }
296 
297         return max;
298     }
299 
300     /**
301      * @param data Values to be analyzed.
302      * @return Mean of the values in x. Missing values are ignored in the analysis.
303      */
304     public static double mean( DoubleArrayList data ) {
305         return sum( data ) / sizeWithoutMissingValues( data );
306     }
307 
308     /**
309      * Special mean calculation where we use the effective size as an input.
310      *
311      * @param x             The data
312      * @param effectiveSize The effective size used for the mean calculation.
313      * @return double
314      */
315     public static double mean( DoubleArrayList x, int effectiveSize ) {
316 
317         int length = x.size();
318 
319         if ( 0 == effectiveSize ) {
320             return Double.NaN;
321         }
322 
323         double sum = 0.0;
324         int i, count;
325         count = 0;
326         double value;
327         double[] elements = x.elements();
328         for ( i = 0; i < length; i++ ) {
329             value = elements[i];
330             if ( Double.isNaN( value ) ) {
331                 continue;
332             }
333             sum += value;
334             count++;
335         }
336         if ( 0.0 == count ) {
337             return Double.NaN;
338         }
339         return sum / effectiveSize;
340 
341     }
342 
343     /**
344      * Calculate the mean of the values above to a particular quantile of an array.
345      *
346      * @param data     Array for which we want to get the quantile.
347      * @param quantile A value from 0 to 1
348      * @return double
349      */
350     public static double meanAboveQuantile( DoubleArrayList data, double quantile ) {
351         if ( quantile < 0.0 || quantile > 1.0 ) {
352             throw new IllegalArgumentException( "Quantile must be between 0 and 1" );
353         }
354 
355         data = data.copy();
356         sortAndRemoveMissing( data );
357         double quantileValue = Descriptive.quantile( data, quantile );
358 
359         double returnvalue = 0.0;
360         int k = 0;
361 
362         for ( int i = 0; i < data.size(); i++ ) {
363             if ( data.get( i ) > quantileValue ) {
364                 returnvalue += data.get( i );
365                 k++;
366             }
367         }
368 
369         if ( k == 0 ) {
370             throw new ArithmeticException( "No values found above quantile" );
371         }
372 
373         return returnvalue / k;
374     }
375 
376     /**
377      * Returns the median. Missing values are ignored entirely.
378      *
379      * @param data the data sequence, does not have to be sorted.
380      * @return double
381      */
382     public static double median( DoubleArrayList data ) {
383         data = data.copy();
384         sortAndRemoveMissing( data );
385         return Descriptive.median( data );
386     }
387 
388     public static double min( DoubleArrayList input ) {
389         int size = input.size();
390         if ( size == 0 ) throw new IllegalArgumentException();
391 
392         double[] elements = input.elements();
393         double min = Double.MAX_VALUE;
394         for ( int i = 0; i < size; i++ ) {
395             if ( Double.isNaN( elements[i] ) ) continue;
396             if ( elements[i] < min ) min = elements[i];
397         }
398 
399         return min;
400     }
401 
402     /**
403      * Returns the moment of <tt>k</tt> -th order with constant <tt>c</tt> of a data sequence, which is
404      * <tt>Sum( (data[i]-c)<sup>k</sup> ) /
405      * data.size()</tt>.
406      *
407      * @param data DoubleArrayList
408      * @param k    int
409      * @param c    double
410      * @return double
411      */
412     public static double moment( DoubleArrayList data, int k, double c ) {
413         return sumOfPowerDeviations( data, k, c ) / sizeWithoutMissingValues( data );
414     }
415 
416     /**
417      * Returns the product of a data sequence, which is <tt>Prod( data[i] )</tt>. Missing values are ignored. In other
418      * words: <tt>data[0]*data[1]*...*data[data.size()-1]</tt>. Note that you may easily get numeric overflows.
419      *
420      * @param data DoubleArrayList
421      * @return double
422      */
423     public static double product( DoubleArrayList data ) {
424         int size = data.size();
425         double[] elements = data.elements();
426 
427         double product = 1;
428         for ( int i = size; --i >= 0; ) {
429             if ( Double.isNaN( elements[i] ) ) {
430                 continue;
431             }
432             product *= elements[i];
433 
434         }
435         return product;
436     }
437 
438     /**
439      * Returns the <tt>phi-</tt> quantile; that is, an element <tt>elem</tt> for which holds that <tt>phi</tt> percent
440      * of data elements are less than <tt>elem</tt>. Missing values are ignored. The quantile need not necessarily be
441      * contained in the data sequence, it can be a linear interpolation.
442      *
443      * @param data the data sequence, does not have to be sorted.
444      * @param phi  the percentage; must satisfy <tt>0 &lt;= phi &lt;= 1</tt>.
445      * @return double
446      * @todo possibly implement so a copy is not made.
447      */
448     public static double quantile( DoubleArrayList data, double phi ) {
449         data = data.copy();
450         sortAndRemoveMissing( data );
451         return Descriptive.quantile( data, phi );
452     }
453 
454     /**
455      * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
456      * interpolation if the element is not contained but lies in between two contained elements. Missing values are
457      * ignored.
458      *
459      * @param data    the list to be searched
460      * @param element the element to search for.
461      * @return the percentage <tt>phi</tt> of elements <tt>&lt;= element</tt>(<tt>0.0 &lt;= phi &lt;= 1.0)</tt>.
462      */
463     public static double quantileInverse( DoubleArrayList data, double element ) {
464         data = data.copy();
465         sortAndRemoveMissing( data );
466         return rankInterpolated( data, element ) / data.size();
467     }
468 
469     /**
470      * Returns the quantiles of the specified percentages. The quantiles need not necessarily be contained in the data
471      * sequence, it can be a linear interpolation.
472      *
473      * @param data        the data sequence; does not have to be sorted
474      * @param percentages the percentages for which quantiles are to be computed. Each percentage must be in the
475      *                    interval <tt>[0.0,1.0]</tt>.
476      * @return the quantiles.
477      */
478     public static DoubleArrayList quantiles( DoubleArrayList data, DoubleArrayList percentages ) {
479         data = data.copy();
480         sortAndRemoveMissing( data );
481         int s = percentages.size();
482         DoubleArrayList quantiles = new DoubleArrayList( s );
483         for ( int i = 0; i < s; i++ ) {
484             quantiles.add( Descriptive.quantile( data, percentages.get( i ) ) );
485         }
486         return quantiles;
487     }
488 
489     /**
490      * Returns the linearly interpolated number of elements in a list less or equal to a given element. Missing values
491      * are ignored. The rank is the number of elements <= element. Ranks are of the form
492      * <tt>{0, 1, 2,..., sortedList.size()}</tt>. If no element is <= element, then the rank is zero. If the element
493      * lies in between two contained elements, then linear interpolation is used and a non integer value is returned.
494      *
495      * @param data    the list to be searched, does not have to be sorted
496      * @param element the element to search for.
497      * @return the rank of the element.
498      * @todo possibly implement so a copy is not made.
499      */
500     public static double rankInterpolated( DoubleArrayList data, double element ) {
501         data = data.copy();
502         sortAndRemoveMissing( data );
503         return Descriptive.rankInterpolated( data, element );
504     }
505 
506     /**
507      * Returns the sample kurtosis (aka excess) of a data sequence.
508      *
509      * @param data           DoubleArrayList
510      * @param mean           double
511      * @param sampleVariance double
512      * @return double
513      */
514     public static double sampleKurtosis( DoubleArrayList data, double mean, double sampleVariance ) {
515         return Descriptive.sampleKurtosis( sizeWithoutMissingValues( data ), moment( data, 4, mean ), sampleVariance );
516     }
517 
518     /**
519      * Returns the sample skew of a data sequence.
520      *
521      * @param data           DoubleArrayList
522      * @param mean           double
523      * @param sampleVariance double
524      * @return double
525      */
526     public static double sampleSkew( DoubleArrayList data, double mean, double sampleVariance ) {
527         return Descriptive.sampleSkew( sizeWithoutMissingValues( data ), moment( data, 3, mean ), sampleVariance );
528     }
529 
530     /**
531      * Returns the sample variance of a data sequence. That is <tt>Sum (
532      * (data[i]-mean)^2 ) / (data.size()-1)</tt>.
533      *
534      * @param data DoubleArrayList
535      * @param mean double
536      * @return double
537      */
538     public static double sampleVariance( DoubleArrayList data, double mean ) {
539         double[] elements = data.elements();
540         int size = data.size();
541         double sum = 0;
542         int effsize = 0;
543         // find the sum of the squares
544         for ( int i = size; --i >= 0; ) {
545             if ( Double.isNaN( elements[i] ) ) {
546                 continue;
547             }
548             double delta = elements[i] - mean;
549             sum += delta * delta;
550             effsize++;
551         }
552 
553         return sum / ( effsize - 1 );
554     }
555 
556     /**
557      * Returns the skew of a data sequence, which is <tt>moment(data,3,mean) /
558      * standardDeviation<sup>3</sup></tt>.
559      *
560      * @param data              DoubleArrayList
561      * @param mean              double
562      * @param standardDeviation double
563      * @return double
564      */
565     public static double skew( DoubleArrayList data, double mean, double standardDeviation ) {
566         return Descriptive.skew( moment( data, 3, mean ), standardDeviation );
567     }
568 
569     /**
570      * Standardize. Note that this does something slightly different than standardize in the superclass, because our
571      * sampleStandardDeviation does not use the correction of the superclass (which isn't really standard).
572      *
573      * @param data DoubleArrayList
574      */
575     public static void standardize( DoubleArrayList data ) {
576         double mean = mean( data );
577         double stdev = Math.sqrt( sampleVariance( data, mean ) );
578         standardize( data, mean, stdev );
579     }
580 
581     /**
582      * Modifies a data sequence to be standardized. Missing values are ignored. Changes each element <tt>data[i]</tt> as
583      * follows: <tt>data[i] = (data[i]-mean)/standardDeviation</tt> unless the standard deviation is 0.00 or very close to zero (indicating the data are constant),
584      * in which case we return a vector of zeros (in effect just doing mean subtraction)
585      *
586      * @param data              DoubleArrayList
587      * @param mean              mean of data
588      * @param standardDeviation stdev of data. |stdev| < Constants.TINY is treated as zero.
589      */
590     public static void standardize( DoubleArrayList data, double mean, double standardDeviation ) {
591         double[] elements = data.elements();
592         int size = data.size();
593 
594         if ( Math.abs( 0.0 - standardDeviation ) < Constants.TINY ) {
595             for ( int i = 0; i < size; i++ ) {
596                 if ( !Double.isNaN( elements[i] ) ) {
597                     elements[i] = 0.0;
598                 }
599             }
600             return;
601         }
602 
603         for ( int i = 0; i < size; i++ ) {
604             if ( Double.isNaN( elements[i] ) ) {
605                 continue;
606             }
607             elements[i] = ( elements[i] - mean ) / standardDeviation;
608         }
609     }
610 
611     /**
612      * Returns the sum of a data sequence. That is <tt>Sum( data[i] )</tt>.
613      *
614      * @param data DoubleArrayList
615      * @return double
616      */
617     public static double sum( DoubleArrayList data ) {
618         return sumOfPowerDeviations( data, 1, 0.0 );
619     }
620 
621     /**
622      * Returns the sum of inversions of a data sequence, which is <tt>Sum( 1.0 /
623      * data[i])</tt>.
624      *
625      * @param data the data sequence.
626      * @param from the index of the first data element (inclusive).
627      * @param to   the index of the last data element (inclusive).
628      * @return double
629      */
630     public static double sumOfInversions( DoubleArrayList data, int from, int to ) {
631         return sumOfPowerDeviations( data, -1, 0.0, from, to );
632     }
633 
634     /**
635      * Returns the sum of logarithms of a data sequence, which is <tt>Sum(
636      * Log(data[i])</tt>. Missing values are ignored.
637      *
638      * @param data the data sequence.
639      * @param from the index of the first data element (inclusive).
640      * @param to   the index of the last data element (inclusive).
641      * @return double
642      */
643     public static double sumOfLogarithms( DoubleArrayList data, int from, int to ) {
644         double[] elements = data.elements();
645         double logsum = 0;
646         for ( int i = from - 1; ++i <= to; ) {
647             if ( Double.isNaN( elements[i] ) ) {
648                 continue;
649             }
650             logsum += Math.log( elements[i] );
651         }
652         return logsum;
653     }
654 
655     /**
656      * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt>; optimized for common parameters like <tt>c == 0.0</tt> and/or
657      * <tt>k == -2 .. 4</tt>.
658      *
659      * @param data DoubleArrayList
660      * @param k    int
661      * @param c    double
662      * @return double
663      */
664     public static double sumOfPowerDeviations( DoubleArrayList data, int k, double c ) {
665         return sumOfPowerDeviations( data, k, c, 0, data.size() - 1 );
666     }
667 
668     /**
669      * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt> for all <tt>i = from ..
670      * to</tt>; optimized for common parameters like <tt>c == 0.0</tt> and/or <tt>k == -2 .. 5</tt>. Missing values are
671      * ignored.
672      *
673      * @param data DoubleArrayList
674      * @param k    int
675      * @param c    double
676      * @param from int
677      * @param to   int
678      * @return double
679      */
680     public static double sumOfPowerDeviations( final DoubleArrayList data, final int k, final double c, final int from, final int to ) {
681         final double[] elements = data.elements();
682         double sum = 0;
683         double v;
684         int i;
685         switch ( k ) { // optimized for speed
686             case -2:
687                 if ( c == 0.0 ) {
688                     for ( i = from - 1; ++i <= to; ) {
689                         if ( Double.isNaN( elements[i] ) ) {
690                             continue;
691                         }
692                         v = elements[i];
693                         sum += 1 / ( v * v );
694                     }
695                 } else {
696                     for ( i = from - 1; ++i <= to; ) {
697                         if ( Double.isNaN( elements[i] ) ) {
698                             continue;
699                         }
700                         v = elements[i] - c;
701                         sum += 1 / ( v * v );
702                     }
703                 }
704                 break;
705             case -1:
706                 if ( c == 0.0 ) {
707                     for ( i = from - 1; ++i <= to; ) {
708                         if ( Double.isNaN( elements[i] ) ) {
709                             continue;
710                         }
711                         sum += 1 / elements[i];
712                     }
713                 } else {
714                     for ( i = from - 1; ++i <= to; ) {
715                         if ( Double.isNaN( elements[i] ) ) {
716                             continue;
717                         }
718                         sum += 1 / ( elements[i] - c );
719                     }
720                 }
721                 break;
722             case 0:
723                 for ( i = from - 1; ++i <= to; ) {
724                     if ( Double.isNaN( elements[i] ) ) {
725                         continue;
726                     }
727                     sum += 1;
728                 }
729                 break;
730             case 1:
731                 if ( c == 0.0 ) {
732                     for ( i = from - 1; ++i <= to; ) {
733                         if ( Double.isNaN( elements[i] ) ) {
734                             continue;
735                         }
736                         sum += elements[i];
737                     }
738                 } else {
739                     for ( i = from - 1; ++i <= to; ) {
740                         if ( Double.isNaN( elements[i] ) ) {
741                             continue;
742                         }
743                         sum += elements[i] - c;
744                     }
745                 }
746                 break;
747             case 2:
748                 if ( c == 0.0 ) {
749                     for ( i = from - 1; ++i <= to; ) {
750                         if ( Double.isNaN( elements[i] ) ) {
751                             continue;
752                         }
753                         v = elements[i];
754                         sum += v * v;
755                     }
756                 } else {
757                     for ( i = from - 1; ++i <= to; ) {
758                         if ( Double.isNaN( elements[i] ) ) {
759                             continue;
760                         }
761                         v = elements[i] - c;
762                         sum += v * v;
763                     }
764                 }
765                 break;
766             case 3:
767                 if ( c == 0.0 ) {
768                     for ( i = from - 1; ++i <= to; ) {
769                         if ( Double.isNaN( elements[i] ) ) {
770                             continue;
771                         }
772                         v = elements[i];
773                         sum += v * v * v;
774                     }
775                 } else {
776                     for ( i = from - 1; ++i <= to; ) {
777                         if ( Double.isNaN( elements[i] ) ) {
778                             continue;
779                         }
780                         v = elements[i] - c;
781                         sum += v * v * v;
782                     }
783                 }
784                 break;
785             case 4:
786                 if ( c == 0.0 ) {
787                     for ( i = from - 1; ++i <= to; ) {
788                         if ( Double.isNaN( elements[i] ) ) {
789                             continue;
790                         }
791                         v = elements[i];
792                         sum += v * v * v * v;
793                     }
794                 } else {
795                     for ( i = from - 1; ++i <= to; ) {
796                         if ( Double.isNaN( elements[i] ) ) {
797                             continue;
798                         }
799                         v = elements[i] - c;
800                         sum += v * v * v * v;
801                     }
802                 }
803                 break;
804             case 5:
805                 if ( c == 0.0 ) {
806                     for ( i = from - 1; ++i <= to; ) {
807                         if ( Double.isNaN( elements[i] ) ) {
808                             continue;
809                         }
810                         v = elements[i];
811                         sum += v * v * v * v * v;
812                     }
813                 } else {
814                     for ( i = from - 1; ++i <= to; ) {
815                         if ( Double.isNaN( elements[i] ) ) {
816                             continue;
817                         }
818                         v = elements[i] - c;
819                         sum += v * v * v * v * v;
820                     }
821                 }
822                 break;
823             default:
824                 for ( i = from - 1; ++i <= to; ) {
825                     if ( Double.isNaN( elements[i] ) ) {
826                         continue;
827                     }
828                     sum += Math.pow( elements[i] - c, k );
829                 }
830                 break;
831         }
832         return sum;
833     }
834 
835     /**
836      * Returns the sum of powers of a data sequence, which is <tt>Sum (
837      * data[i]<sup>k</sup> )</tt>.
838      *
839      * @param data DoubleArrayList
840      * @param k    int
841      * @return double
842      */
843     public static double sumOfPowers( DoubleArrayList data, int k ) {
844         return sumOfPowerDeviations( data, k, 0 );
845     }
846 
847     /**
848      * Compute the sum of the squared deviations from the mean of a data sequence. Missing values are ignored.
849      *
850      * @param data DoubleArrayList
851      * @return double
852      */
853     public static double sumOfSquaredDeviations( DoubleArrayList data ) {
854         int size = sizeWithoutMissingValues( data );
855         return Descriptive.sumOfSquaredDeviations( size, Descriptive.variance( size, sum( data ), sumOfSquares( data ) ) );
856     }
857 
858     /**
859      * Returns the sum of squares of a data sequence. Skips missing values.
860      *
861      * @param data DoubleArrayList
862      * @return double
863      */
864     public static double sumOfSquares( DoubleArrayList data ) {
865         return sumOfPowerDeviations( data, 2, 0.0 );
866     }
867 
868     /**
869      * Returns the trimmed mean of a data sequence. Missing values are completely ignored.
870      *
871      * @param data  the data sequence
872      * @param left  int the number of leading elements to trim.
873      * @param right int number of trailing elements to trim.
874      * @return double
875      */
876     public static double trimmedMean( DoubleArrayList data, int left, int right ) {
877         data = data.copy();
878         sortAndRemoveMissing( data );
879         return Descriptive.trimmedMean( data, Descriptive.mean( data ), left, right );
880     }
881 
882     /**
883      * Provided for convenience!
884      *
885      * @param data DoubleArrayList
886      * @return double
887      */
888     public static double variance( DoubleArrayList data ) {
889         return Descriptive.variance( sizeWithoutMissingValues( data ), sum( data ), sumOfSquares( data ) );
890     }
891 
892     /**
893      * Returns the weighted mean of a data sequence. That is <tt> Sum (data[i] *
894      * weights[i]) / Sum ( weights[i] )</tt>.
895      *
896      * @param data    DoubleArrayList
897      * @param weights DoubleArrayList
898      * @return double
899      */
900     public static double weightedMean( DoubleArrayList data, DoubleArrayList weights ) {
901         int size = data.size();
902         if ( size != weights.size() || size == 0 ) {
903             throw new IllegalArgumentException();
904         }
905 
906         double[] elements = data.elements();
907         double[] theWeights = weights.elements();
908         double sum = 0.0;
909         double weightsSum = 0.0;
910         for ( int i = size; --i >= 0; ) {
911             double w = theWeights[i];
912             if ( Double.isNaN( elements[i] ) || Double.isNaN( w ) ) {
913                 continue;
914             }
915             sum += elements[i] * w;
916             weightsSum += w;
917         }
918 
919         return sum / weightsSum;
920     }
921 
922     /**
923      * Makes a copy of the list that doesn't have the missing values and sort the values.
924      * <p>
925      * This takes advantage that {@link Double#NaN} are always sorted last, so we can simply set the size of the
926      * resulting array to the first NaN value.
927      */
928     private static void sortAndRemoveMissing( DoubleArrayList data ) {
929         data.sort();
930         double[] elements = data.elements();
931         int size = data.size();
932         for ( int i = 0; i < size; i++ ) {
933             if ( Double.isNaN( elements[i] ) ) {
934                 data.setSize( i );
935                 break;
936             }
937         }
938     }
939 
940     /**
941      * Return the size of the list, ignoring missing values.
942      *
943      * @param list DoubleArrayList
944      * @return int
945      */
946     private static int sizeWithoutMissingValues( DoubleArrayList list ) {
947         int size = 0;
948         for ( int i = 0; i < list.size(); i++ ) {
949             if ( !Double.isNaN( list.get( i ) ) ) {
950                 size++;
951             }
952         }
953         return size;
954     }
955 
956     /* private methods */
957 
958     private DescriptiveWithMissing() {
959     }
960 
961 } // end of class