1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.math;
20
21 import cern.colt.list.DoubleArrayList;
22 import cern.jet.stat.Descriptive;
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42 public class DescriptiveWithMissing {
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 public static double correlation( double[] x, double[] y, double[] selfSquaredX, double[] selfSquaredY, boolean[] nanStatusX, boolean[] nanStatusY ) {
61 double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
62 int numused = 0;
63 syy = 0.0;
64 sxy = 0.0;
65 sxx = 0.0;
66 sx = 0.0;
67 sy = 0.0;
68
69 int length = x.length;
70
71 if ( y.length != length || selfSquaredX.length != length || selfSquaredY.length != length || nanStatusX.length != length || nanStatusY.length != length ) {
72 throw new ArithmeticException();
73 }
74
75 for ( int j = 0; j < length; j++ ) {
76 xj = x[j];
77 yj = y[j];
78
79 if ( nanStatusX[j] || nanStatusY[j] ) {
80 continue;
81 }
82 sx += xj;
83 sy += yj;
84 sxy += xj * yj;
85 sxx += selfSquaredX[j];
86 syy += selfSquaredY[j];
87 numused++;
88 }
89
90 if ( numused > 0 ) {
91 ay = sy / numused;
92 ax = sx / numused;
93 return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
94 }
95 return Double.NaN;
96 }
97
98
99
100
101
102
103
104
105 public static double correlation( DoubleArrayList x, DoubleArrayList y ) {
106 int j;
107 double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
108 int numused;
109 syy = 0.0;
110 sxy = 0.0;
111 sxx = 0.0;
112 sx = 0.0;
113 sy = 0.0;
114 numused = 0;
115 if ( x.size() != y.size() ) {
116 throw new ArithmeticException( "Unequal vector sizes: " + x.size() + " != " + y.size() );
117 }
118
119 double[] xel = x.elements();
120 double[] yel = y.elements();
121
122 int length = x.size();
123 for ( j = 0; j < length; j++ ) {
124 xj = xel[j];
125 yj = yel[j];
126
127 if ( !Double.isNaN( xj ) && !Double.isNaN( yj ) ) {
128 sx += xj;
129 sy += yj;
130 sxy += xj * yj;
131 sxx += xj * xj;
132 syy += yj * yj;
133 numused++;
134 }
135 }
136
137 if ( numused > 0 ) {
138 ay = sy / numused;
139 ax = sx / numused;
140 return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
141 }
142
143 return Double.NaN;
144 }
145
146
147
148
149
150
151
152
153
154 public static double covariance( DoubleArrayList data1, DoubleArrayList data2 ) {
155 int size = data1.size();
156 if ( size != data2.size() || size == 0 ) {
157 throw new IllegalArgumentException();
158 }
159 double[] elements1 = data1.elements();
160 double[] elements2 = data2.elements();
161
162
163
164 int i = 0;
165 double sumx = 0.0, sumy = 0.0, Sxy = 0.0;
166 while ( i < size ) {
167 sumx = elements1[i];
168 sumy = elements2[i];
169 if ( !Double.isNaN( elements1[i] ) && !Double.isNaN( elements2[i] ) ) {
170 break;
171 }
172 i++;
173 }
174 i++;
175 int usedPairs = 1;
176 for ( ; i < size; ++i ) {
177 double x = elements1[i];
178 double y = elements2[i];
179 if ( Double.isNaN( x ) || Double.isNaN( y ) ) {
180 continue;
181 }
182
183 sumx += x;
184 Sxy += ( x - sumx / ( usedPairs + 1 ) ) * ( y - sumy / usedPairs );
185 sumy += y;
186 usedPairs++;
187 }
188 return Sxy / ( usedPairs - 1 );
189 }
190
191
192
193
194
195
196
197
198 public static double durbinWatson( DoubleArrayList data ) {
199 int size = data.size();
200 if ( size < 2 ) {
201 throw new IllegalArgumentException( "data sequence must contain at least two values." );
202 }
203
204 double[] elements = data.elements();
205 double run = 0;
206 double run_sq = 0;
207 int firstNotNaN = 0;
208 while ( firstNotNaN < size ) {
209 run_sq = elements[firstNotNaN] * elements[firstNotNaN];
210
211 if ( !Double.isNaN( elements[firstNotNaN] ) ) {
212 break;
213 }
214
215 firstNotNaN++;
216 }
217
218 if ( firstNotNaN > 0 && size - firstNotNaN < 2 ) {
219 throw new IllegalArgumentException( "data sequence must contain at least two non-missing values." );
220
221 }
222
223 for ( int i = firstNotNaN + 1; i < size; i++ ) {
224 int gap = 1;
225 while ( i < size && Double.isNaN( elements[i] ) ) {
226 gap++;
227 i++;
228 continue;
229 }
230 if ( i >= size ) continue;
231 double x = elements[i] - elements[i - gap];
232
233 run += x * x;
234 run_sq += elements[i] * elements[i];
235 }
236
237 return run / run_sq;
238 }
239
240
241
242
243
244
245
246
247
248
249
250
251 public static double geometricMean( DoubleArrayList data ) {
252 return Descriptive.geometricMean( sizeWithoutMissingValues( data ), sumOfLogarithms( data, 0, data.size() - 1 ) );
253 }
254
255
256
257
258
259
260
261
262
263
264 public static double kurtosis( DoubleArrayList data, double mean, double standardDeviation ) {
265 return Descriptive.kurtosis( moment( data, 4, mean ), standardDeviation );
266 }
267
268
269
270
271
272
273 public static double mad( DoubleArrayList data ) {
274 data = data.copy();
275 sortAndRemoveMissing( data );
276 double median = Descriptive.median( data );
277 DoubleArrayList devsum = new DoubleArrayList( data.size() );
278 for ( int i = 0; i < data.size(); i++ ) {
279 double v = data.getQuick( i );
280 devsum.add( Math.abs( v - median ) );
281 }
282 devsum.sort();
283 return Descriptive.median( devsum );
284 }
285
286 public static double max( DoubleArrayList input ) {
287 int size = input.size();
288 if ( size == 0 ) throw new IllegalArgumentException();
289
290 double[] elements = input.elements();
291 double max = Double.MIN_VALUE;
292 for ( int i = 0; i < size; i++ ) {
293 if ( Double.isNaN( elements[i] ) ) continue;
294 if ( elements[i] > max ) max = elements[i];
295 }
296
297 return max;
298 }
299
300
301
302
303
304 public static double mean( DoubleArrayList data ) {
305 return sum( data ) / sizeWithoutMissingValues( data );
306 }
307
308
309
310
311
312
313
314
315 public static double mean( DoubleArrayList x, int effectiveSize ) {
316
317 int length = x.size();
318
319 if ( 0 == effectiveSize ) {
320 return Double.NaN;
321 }
322
323 double sum = 0.0;
324 int i, count;
325 count = 0;
326 double value;
327 double[] elements = x.elements();
328 for ( i = 0; i < length; i++ ) {
329 value = elements[i];
330 if ( Double.isNaN( value ) ) {
331 continue;
332 }
333 sum += value;
334 count++;
335 }
336 if ( 0.0 == count ) {
337 return Double.NaN;
338 }
339 return sum / effectiveSize;
340
341 }
342
343
344
345
346
347
348
349
350 public static double meanAboveQuantile( DoubleArrayList data, double quantile ) {
351 if ( quantile < 0.0 || quantile > 1.0 ) {
352 throw new IllegalArgumentException( "Quantile must be between 0 and 1" );
353 }
354
355 data = data.copy();
356 sortAndRemoveMissing( data );
357 double quantileValue = Descriptive.quantile( data, quantile );
358
359 double returnvalue = 0.0;
360 int k = 0;
361
362 for ( int i = 0; i < data.size(); i++ ) {
363 if ( data.get( i ) > quantileValue ) {
364 returnvalue += data.get( i );
365 k++;
366 }
367 }
368
369 if ( k == 0 ) {
370 throw new ArithmeticException( "No values found above quantile" );
371 }
372
373 return returnvalue / k;
374 }
375
376
377
378
379
380
381
382 public static double median( DoubleArrayList data ) {
383 data = data.copy();
384 sortAndRemoveMissing( data );
385 return Descriptive.median( data );
386 }
387
388 public static double min( DoubleArrayList input ) {
389 int size = input.size();
390 if ( size == 0 ) throw new IllegalArgumentException();
391
392 double[] elements = input.elements();
393 double min = Double.MAX_VALUE;
394 for ( int i = 0; i < size; i++ ) {
395 if ( Double.isNaN( elements[i] ) ) continue;
396 if ( elements[i] < min ) min = elements[i];
397 }
398
399 return min;
400 }
401
402
403
404
405
406
407
408
409
410
411
412 public static double moment( DoubleArrayList data, int k, double c ) {
413 return sumOfPowerDeviations( data, k, c ) / sizeWithoutMissingValues( data );
414 }
415
416
417
418
419
420
421
422
423 public static double product( DoubleArrayList data ) {
424 int size = data.size();
425 double[] elements = data.elements();
426
427 double product = 1;
428 for ( int i = size; --i >= 0; ) {
429 if ( Double.isNaN( elements[i] ) ) {
430 continue;
431 }
432 product *= elements[i];
433
434 }
435 return product;
436 }
437
438
439
440
441
442
443
444
445
446
447
448 public static double quantile( DoubleArrayList data, double phi ) {
449 data = data.copy();
450 sortAndRemoveMissing( data );
451 return Descriptive.quantile( data, phi );
452 }
453
454
455
456
457
458
459
460
461
462
463 public static double quantileInverse( DoubleArrayList data, double element ) {
464 data = data.copy();
465 sortAndRemoveMissing( data );
466 return rankInterpolated( data, element ) / data.size();
467 }
468
469
470
471
472
473
474
475
476
477
478 public static DoubleArrayList quantiles( DoubleArrayList data, DoubleArrayList percentages ) {
479 data = data.copy();
480 sortAndRemoveMissing( data );
481 int s = percentages.size();
482 DoubleArrayList quantiles = new DoubleArrayList( s );
483 for ( int i = 0; i < s; i++ ) {
484 quantiles.add( Descriptive.quantile( data, percentages.get( i ) ) );
485 }
486 return quantiles;
487 }
488
489
490
491
492
493
494
495
496
497
498
499
500 public static double rankInterpolated( DoubleArrayList data, double element ) {
501 data = data.copy();
502 sortAndRemoveMissing( data );
503 return Descriptive.rankInterpolated( data, element );
504 }
505
506
507
508
509
510
511
512
513
514 public static double sampleKurtosis( DoubleArrayList data, double mean, double sampleVariance ) {
515 return Descriptive.sampleKurtosis( sizeWithoutMissingValues( data ), moment( data, 4, mean ), sampleVariance );
516 }
517
518
519
520
521
522
523
524
525
526 public static double sampleSkew( DoubleArrayList data, double mean, double sampleVariance ) {
527 return Descriptive.sampleSkew( sizeWithoutMissingValues( data ), moment( data, 3, mean ), sampleVariance );
528 }
529
530
531
532
533
534
535
536
537
538 public static double sampleVariance( DoubleArrayList data, double mean ) {
539 double[] elements = data.elements();
540 int size = data.size();
541 double sum = 0;
542 int effsize = 0;
543
544 for ( int i = size; --i >= 0; ) {
545 if ( Double.isNaN( elements[i] ) ) {
546 continue;
547 }
548 double delta = elements[i] - mean;
549 sum += delta * delta;
550 effsize++;
551 }
552
553 return sum / ( effsize - 1 );
554 }
555
556
557
558
559
560
561
562
563
564
565 public static double skew( DoubleArrayList data, double mean, double standardDeviation ) {
566 return Descriptive.skew( moment( data, 3, mean ), standardDeviation );
567 }
568
569
570
571
572
573
574
575 public static void standardize( DoubleArrayList data ) {
576 double mean = mean( data );
577 double stdev = Math.sqrt( sampleVariance( data, mean ) );
578 standardize( data, mean, stdev );
579 }
580
581
582
583
584
585
586
587
588
589
590 public static void standardize( DoubleArrayList data, double mean, double standardDeviation ) {
591 double[] elements = data.elements();
592 int size = data.size();
593
594 if ( Math.abs( 0.0 - standardDeviation ) < Constants.TINY ) {
595 for ( int i = 0; i < size; i++ ) {
596 if ( !Double.isNaN( elements[i] ) ) {
597 elements[i] = 0.0;
598 }
599 }
600 return;
601 }
602
603 for ( int i = 0; i < size; i++ ) {
604 if ( Double.isNaN( elements[i] ) ) {
605 continue;
606 }
607 elements[i] = ( elements[i] - mean ) / standardDeviation;
608 }
609 }
610
611
612
613
614
615
616
617 public static double sum( DoubleArrayList data ) {
618 return sumOfPowerDeviations( data, 1, 0.0 );
619 }
620
621
622
623
624
625
626
627
628
629
630 public static double sumOfInversions( DoubleArrayList data, int from, int to ) {
631 return sumOfPowerDeviations( data, -1, 0.0, from, to );
632 }
633
634
635
636
637
638
639
640
641
642
643 public static double sumOfLogarithms( DoubleArrayList data, int from, int to ) {
644 double[] elements = data.elements();
645 double logsum = 0;
646 for ( int i = from - 1; ++i <= to; ) {
647 if ( Double.isNaN( elements[i] ) ) {
648 continue;
649 }
650 logsum += Math.log( elements[i] );
651 }
652 return logsum;
653 }
654
655
656
657
658
659
660
661
662
663
664 public static double sumOfPowerDeviations( DoubleArrayList data, int k, double c ) {
665 return sumOfPowerDeviations( data, k, c, 0, data.size() - 1 );
666 }
667
668
669
670
671
672
673
674
675
676
677
678
679
680 public static double sumOfPowerDeviations( final DoubleArrayList data, final int k, final double c, final int from, final int to ) {
681 final double[] elements = data.elements();
682 double sum = 0;
683 double v;
684 int i;
685 switch ( k ) {
686 case -2:
687 if ( c == 0.0 ) {
688 for ( i = from - 1; ++i <= to; ) {
689 if ( Double.isNaN( elements[i] ) ) {
690 continue;
691 }
692 v = elements[i];
693 sum += 1 / ( v * v );
694 }
695 } else {
696 for ( i = from - 1; ++i <= to; ) {
697 if ( Double.isNaN( elements[i] ) ) {
698 continue;
699 }
700 v = elements[i] - c;
701 sum += 1 / ( v * v );
702 }
703 }
704 break;
705 case -1:
706 if ( c == 0.0 ) {
707 for ( i = from - 1; ++i <= to; ) {
708 if ( Double.isNaN( elements[i] ) ) {
709 continue;
710 }
711 sum += 1 / elements[i];
712 }
713 } else {
714 for ( i = from - 1; ++i <= to; ) {
715 if ( Double.isNaN( elements[i] ) ) {
716 continue;
717 }
718 sum += 1 / ( elements[i] - c );
719 }
720 }
721 break;
722 case 0:
723 for ( i = from - 1; ++i <= to; ) {
724 if ( Double.isNaN( elements[i] ) ) {
725 continue;
726 }
727 sum += 1;
728 }
729 break;
730 case 1:
731 if ( c == 0.0 ) {
732 for ( i = from - 1; ++i <= to; ) {
733 if ( Double.isNaN( elements[i] ) ) {
734 continue;
735 }
736 sum += elements[i];
737 }
738 } else {
739 for ( i = from - 1; ++i <= to; ) {
740 if ( Double.isNaN( elements[i] ) ) {
741 continue;
742 }
743 sum += elements[i] - c;
744 }
745 }
746 break;
747 case 2:
748 if ( c == 0.0 ) {
749 for ( i = from - 1; ++i <= to; ) {
750 if ( Double.isNaN( elements[i] ) ) {
751 continue;
752 }
753 v = elements[i];
754 sum += v * v;
755 }
756 } else {
757 for ( i = from - 1; ++i <= to; ) {
758 if ( Double.isNaN( elements[i] ) ) {
759 continue;
760 }
761 v = elements[i] - c;
762 sum += v * v;
763 }
764 }
765 break;
766 case 3:
767 if ( c == 0.0 ) {
768 for ( i = from - 1; ++i <= to; ) {
769 if ( Double.isNaN( elements[i] ) ) {
770 continue;
771 }
772 v = elements[i];
773 sum += v * v * v;
774 }
775 } else {
776 for ( i = from - 1; ++i <= to; ) {
777 if ( Double.isNaN( elements[i] ) ) {
778 continue;
779 }
780 v = elements[i] - c;
781 sum += v * v * v;
782 }
783 }
784 break;
785 case 4:
786 if ( c == 0.0 ) {
787 for ( i = from - 1; ++i <= to; ) {
788 if ( Double.isNaN( elements[i] ) ) {
789 continue;
790 }
791 v = elements[i];
792 sum += v * v * v * v;
793 }
794 } else {
795 for ( i = from - 1; ++i <= to; ) {
796 if ( Double.isNaN( elements[i] ) ) {
797 continue;
798 }
799 v = elements[i] - c;
800 sum += v * v * v * v;
801 }
802 }
803 break;
804 case 5:
805 if ( c == 0.0 ) {
806 for ( i = from - 1; ++i <= to; ) {
807 if ( Double.isNaN( elements[i] ) ) {
808 continue;
809 }
810 v = elements[i];
811 sum += v * v * v * v * v;
812 }
813 } else {
814 for ( i = from - 1; ++i <= to; ) {
815 if ( Double.isNaN( elements[i] ) ) {
816 continue;
817 }
818 v = elements[i] - c;
819 sum += v * v * v * v * v;
820 }
821 }
822 break;
823 default:
824 for ( i = from - 1; ++i <= to; ) {
825 if ( Double.isNaN( elements[i] ) ) {
826 continue;
827 }
828 sum += Math.pow( elements[i] - c, k );
829 }
830 break;
831 }
832 return sum;
833 }
834
835
836
837
838
839
840
841
842
843 public static double sumOfPowers( DoubleArrayList data, int k ) {
844 return sumOfPowerDeviations( data, k, 0 );
845 }
846
847
848
849
850
851
852
853 public static double sumOfSquaredDeviations( DoubleArrayList data ) {
854 int size = sizeWithoutMissingValues( data );
855 return Descriptive.sumOfSquaredDeviations( size, Descriptive.variance( size, sum( data ), sumOfSquares( data ) ) );
856 }
857
858
859
860
861
862
863
864 public static double sumOfSquares( DoubleArrayList data ) {
865 return sumOfPowerDeviations( data, 2, 0.0 );
866 }
867
868
869
870
871
872
873
874
875
876 public static double trimmedMean( DoubleArrayList data, int left, int right ) {
877 data = data.copy();
878 sortAndRemoveMissing( data );
879 return Descriptive.trimmedMean( data, Descriptive.mean( data ), left, right );
880 }
881
882
883
884
885
886
887
888 public static double variance( DoubleArrayList data ) {
889 return Descriptive.variance( sizeWithoutMissingValues( data ), sum( data ), sumOfSquares( data ) );
890 }
891
892
893
894
895
896
897
898
899
900 public static double weightedMean( DoubleArrayList data, DoubleArrayList weights ) {
901 int size = data.size();
902 if ( size != weights.size() || size == 0 ) {
903 throw new IllegalArgumentException();
904 }
905
906 double[] elements = data.elements();
907 double[] theWeights = weights.elements();
908 double sum = 0.0;
909 double weightsSum = 0.0;
910 for ( int i = size; --i >= 0; ) {
911 double w = theWeights[i];
912 if ( Double.isNaN( elements[i] ) || Double.isNaN( w ) ) {
913 continue;
914 }
915 sum += elements[i] * w;
916 weightsSum += w;
917 }
918
919 return sum / weightsSum;
920 }
921
922
923
924
925
926
927
928 private static void sortAndRemoveMissing( DoubleArrayList data ) {
929 data.sort();
930 double[] elements = data.elements();
931 int size = data.size();
932 for ( int i = 0; i < size; i++ ) {
933 if ( Double.isNaN( elements[i] ) ) {
934 data.setSize( i );
935 break;
936 }
937 }
938 }
939
940
941
942
943
944
945
946 private static int sizeWithoutMissingValues( DoubleArrayList list ) {
947 int size = 0;
948 for ( int i = 0; i < list.size(); i++ ) {
949 if ( !Double.isNaN( list.get( i ) ) ) {
950 size++;
951 }
952 }
953 return size;
954 }
955
956
957
958 private DescriptiveWithMissing() {
959 }
960
961 }