1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.math;
20
21 import cern.colt.list.DoubleArrayList;
22 import cern.jet.stat.Descriptive;
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 public class DescriptiveWithMissing extends cern.jet.stat.Descriptive {
48
49
50
51
52
53
54
55
56
57
58 public static double autoCorrelation( DoubleArrayList data, int lag, double mean, double variance ) {
59 throw new UnsupportedOperationException( "autoCorrelation not supported with missing values" );
60 }
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78 public static double correlation( double[] x, double[] y, double[] selfSquaredX, double[] selfSquaredY,
79 boolean[] nanStatusX, boolean[] nanStatusY ) {
80
81 double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
82 int numused = 0;
83 syy = 0.0;
84 sxy = 0.0;
85 sxx = 0.0;
86 sx = 0.0;
87 sy = 0.0;
88
89 int length = x.length;
90 for ( int j = 0; j < length; j++ ) {
91 xj = x[j];
92 yj = y[j];
93
94 if ( nanStatusX[j] || nanStatusY[j] ) {
95 continue;
96 }
97 sx += xj;
98 sy += yj;
99 sxy += xj * yj;
100 sxx += selfSquaredX[j];
101 syy += selfSquaredY[j];
102 numused++;
103 }
104
105 if ( numused > 0 ) {
106 ay = sy / numused;
107 ax = sx / numused;
108 return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
109 }
110 return Double.NaN;
111 }
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126 public static double correlation( DoubleArrayList data1, double standardDev1, DoubleArrayList data2,
127 double standardDev2 ) {
128 return correlation( data1, data2 );
129 }
130
131
132
133
134
135
136
137
138 public static double correlation( DoubleArrayList x, DoubleArrayList y ) {
139 int j;
140 double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
141 int numused;
142 syy = 0.0;
143 sxy = 0.0;
144 sxx = 0.0;
145 sx = 0.0;
146 sy = 0.0;
147 numused = 0;
148 if ( x.size() != y.size() ) {
149 throw new ArithmeticException( "Unequal vector sizes: " + x.size() + " != " + y.size() );
150 }
151
152 double[] xel = x.elements();
153 double[] yel = y.elements();
154
155 int length = x.size();
156 for ( j = 0; j < length; j++ ) {
157 xj = xel[j];
158 yj = yel[j];
159
160 if ( !Double.isNaN( xj ) && !Double.isNaN( yj ) ) {
161 sx += xj;
162 sy += yj;
163 sxy += xj * yj;
164 sxx += xj * xj;
165 syy += yj * yj;
166 numused++;
167 }
168 }
169
170 if ( numused > 0 ) {
171 ay = sy / numused;
172 ax = sx / numused;
173 return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
174 }
175 return Double.NaN;
176
177 }
178
179
180
181
182
183
184
185
186
187 public static double covariance( DoubleArrayList data1, DoubleArrayList data2 ) {
188 int size = data1.size();
189 if ( size != data2.size() || size == 0 ) {
190 throw new IllegalArgumentException();
191 }
192 double[] elements1 = data1.elements();
193 double[] elements2 = data2.elements();
194
195
196
197 int i = 0;
198 double sumx = 0.0, sumy = 0.0, Sxy = 0.0;
199 while ( i < size ) {
200 sumx = elements1[i];
201 sumy = elements2[i];
202 if ( !Double.isNaN( elements1[i] ) && !Double.isNaN( elements2[i] ) ) {
203 break;
204 }
205 i++;
206 }
207 i++;
208 int usedPairs = 1;
209 for ( ; i < size; ++i ) {
210 double x = elements1[i];
211 double y = elements2[i];
212 if ( Double.isNaN( x ) || Double.isNaN( y ) ) {
213 continue;
214 }
215
216 sumx += x;
217 Sxy += ( x - sumx / ( usedPairs + 1 ) ) * ( y - sumy / usedPairs );
218 sumy += y;
219 usedPairs++;
220 }
221 return Sxy / ( usedPairs - 1 );
222 }
223
224
225
226
227
228
229
230
231 public static double durbinWatson( DoubleArrayList data ) {
232 int size = data.size();
233 if ( size < 2 ) {
234 throw new IllegalArgumentException( "data sequence must contain at least two values." );
235 }
236
237 double[] elements = data.elements();
238 double run = 0;
239 double run_sq = 0;
240 int firstNotNaN = 0;
241 while ( firstNotNaN < size ) {
242 run_sq = elements[firstNotNaN] * elements[firstNotNaN];
243
244 if ( !Double.isNaN( elements[firstNotNaN] ) ) {
245 break;
246 }
247
248 firstNotNaN++;
249 }
250
251 if ( firstNotNaN > 0 && size - firstNotNaN < 2 ) {
252 throw new IllegalArgumentException( "data sequence must contain at least two non-missing values." );
253
254 }
255
256 for ( int i = firstNotNaN + 1; i < size; i++ ) {
257 int gap = 1;
258 while ( i < size && Double.isNaN( elements[i] ) ) {
259 gap++;
260 i++;
261 continue;
262 }
263 if ( i >= size ) continue;
264 double x = elements[i] - elements[i - gap];
265
266 run += x * x;
267 run_sq += elements[i] * elements[i];
268 }
269
270 return run / run_sq;
271 }
272
273
274
275
276
277
278
279
280
281
282
283
284 public static double geometricMean( DoubleArrayList data ) {
285 return geometricMean( sizeWithoutMissingValues( data ), sumOfLogarithms( data, 0, data.size() - 1 ) );
286 }
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313 public static void incrementalUpdate( DoubleArrayList data, int from, int to, double[] inOut ) {
314 throw new UnsupportedOperationException( "incrementalUpdate not supported with missing values" );
315 }
316
317
318
319
320
321
322
323
324
325
326
327 public static void incrementalUpdateSumsOfPowers( DoubleArrayList data, int from, int to, int fromSumIndex,
328 int toSumIndex, double[] sumOfPowers ) {
329 throw new UnsupportedOperationException( "incrementalUpdateSumsOfPowers not supported with missing values" );
330 }
331
332
333
334
335
336
337
338
339
340
341 public static void incrementalWeightedUpdate( DoubleArrayList data, DoubleArrayList weights, int from, int to,
342 double[] inOut ) {
343 throw new UnsupportedOperationException( "incrementalWeightedUpdate not supported with missing values" );
344 }
345
346
347
348
349
350
351
352
353 public static double kurtosis( double moment4, double standardDeviation ) {
354 return -3 + moment4 / ( standardDeviation * standardDeviation * standardDeviation * standardDeviation );
355 }
356
357
358
359
360
361
362
363
364
365
366 public static double kurtosis( DoubleArrayList data, double mean, double standardDeviation ) {
367 return kurtosis( moment( data, 4, mean ), standardDeviation );
368 }
369
370
371
372
373
374
375
376
377 public static double lag1( DoubleArrayList data, double mean ) {
378 throw new UnsupportedOperationException( "lag1 not supported with missing values" );
379 }
380
381
382
383
384
385 public static double mad( DoubleArrayList dat ) {
386 DoubleArrayList copy = removeMissing( dat.copy() );
387
388 double median = median( copy );
389
390 DoubleArrayList devsum = new DoubleArrayList( copy.size() );
391 for ( int i = 0; i < copy.size(); i++ ) {
392 double v = copy.getQuick( i );
393 devsum.add( Math.abs( v - median ) );
394 }
395
396 devsum.sort();
397 return Descriptive.median( devsum );
398 }
399
400 public static double max( DoubleArrayList input ) {
401 int size = input.size();
402 if ( size == 0 ) throw new IllegalArgumentException();
403
404 double[] elements = input.elements();
405 double max = Double.MIN_VALUE;
406 for ( int i = 0; i < size; i++ ) {
407 if ( Double.isNaN( elements[i] ) ) continue;
408 if ( elements[i] > max ) max = elements[i];
409 }
410
411 return max;
412 }
413
414
415
416
417
418
419
420
421 public static double mean( double[] elements, int effectiveSize ) {
422
423 int length = elements.length;
424
425 if ( 0 == effectiveSize ) {
426 return Double.NaN;
427 }
428
429 double sum = 0.0;
430 int i, count;
431 count = 0;
432 double value;
433 for ( i = 0; i < length; i++ ) {
434 value = elements[i];
435 if ( Double.isNaN( value ) ) {
436 continue;
437 }
438 sum += value;
439 count++;
440 }
441 if ( 0.0 == count ) {
442 return Double.NaN;
443 }
444 return sum / effectiveSize;
445 }
446
447
448
449
450
451 public static double mean( DoubleArrayList data ) {
452 return sum( data ) / sizeWithoutMissingValues( data );
453 }
454
455
456
457
458
459
460
461
462 public static double mean( DoubleArrayList x, int effectiveSize ) {
463
464 int length = x.size();
465
466 if ( 0 == effectiveSize ) {
467 return Double.NaN;
468 }
469
470 double sum = 0.0;
471 int i, count;
472 count = 0;
473 double value;
474 double[] elements = x.elements();
475 for ( i = 0; i < length; i++ ) {
476 value = elements[i];
477 if ( Double.isNaN( value ) ) {
478 continue;
479 }
480 sum += value;
481 count++;
482 }
483 if ( 0.0 == count ) {
484 return Double.NaN;
485 }
486 return sum / effectiveSize;
487
488 }
489
490
491
492
493
494
495
496
497 public static double meanAboveQuantile( double quantile, DoubleArrayList array ) {
498
499 if ( quantile < 0.0 || quantile > 1.0 ) {
500 throw new IllegalArgumentException( "Quantile must be between 0 and 1" );
501 }
502
503 double returnvalue = 0.0;
504 int k = 0;
505
506 double quantileValue = DescriptiveWithMissing.quantile( array, quantile );
507
508 for ( int i = 0; i < array.size(); i++ ) {
509 if ( array.get( i ) > quantileValue ) {
510 returnvalue += array.get( i );
511 k++;
512 }
513 }
514
515 if ( k == 0 ) {
516 throw new ArithmeticException( "No values found above quantile" );
517 }
518
519 return returnvalue / k;
520 }
521
522
523
524
525
526
527
528 public static double median( DoubleArrayList data ) {
529 return DescriptiveWithMissing.quantile( data, 0.5 );
530 }
531
532 public static double min( DoubleArrayList input ) {
533 int size = input.size();
534 if ( size == 0 ) throw new IllegalArgumentException();
535
536 double[] elements = input.elements();
537 double min = Double.MAX_VALUE;
538 for ( int i = 0; i < size; i++ ) {
539 if ( Double.isNaN( elements[i] ) ) continue;
540 if ( elements[i] < min ) min = elements[i];
541 }
542
543 return min;
544 }
545
546
547
548
549
550
551
552
553
554
555
556 public static double moment( DoubleArrayList data, int k, double c ) {
557 return sumOfPowerDeviations( data, k, c ) / sizeWithoutMissingValues( data );
558 }
559
560
561
562
563
564
565
566
567 public static double product( DoubleArrayList data ) {
568 int size = data.size();
569 double[] elements = data.elements();
570
571 double product = 1;
572 for ( int i = size; --i >= 0; ) {
573 if ( Double.isNaN( elements[i] ) ) {
574 continue;
575 }
576 product *= elements[i];
577
578 }
579 return product;
580 }
581
582
583
584
585
586
587
588
589
590
591
592 public static double quantile( DoubleArrayList data, double phi ) {
593 DoubleArrayList sortedData = removeMissing( data.copy() );
594 sortedData.sort();
595 return Descriptive.quantile( sortedData, phi );
596 }
597
598
599
600
601
602
603
604
605
606
607 public static double quantileInverse( DoubleArrayList data, double element ) {
608 DoubleArrayList sortedData = removeMissing( data.copy() );
609 sortedData.sort();
610 return rankInterpolated( sortedData, element ) / sortedData.size();
611 }
612
613
614
615
616
617
618
619
620
621
622 public static DoubleArrayList quantiles( DoubleArrayList sortedData, DoubleArrayList percentages ) {
623 int s = percentages.size();
624 DoubleArrayList quantiles = new DoubleArrayList( s );
625
626 for ( int i = 0; i < s; i++ ) {
627 quantiles.add( quantile( sortedData, percentages.get( i ) ) );
628 }
629
630 return quantiles;
631 }
632
633
634
635
636
637
638
639
640
641
642
643
644 public static double rankInterpolated( DoubleArrayList sortedList, double element ) {
645 return Descriptive.rankInterpolated( removeMissing( sortedList ), element );
646 }
647
648
649
650
651
652
653
654 public static DoubleArrayList removeMissing( DoubleArrayList data ) {
655 DoubleArrayList r = new DoubleArrayList( sizeWithoutMissingValues( data ) );
656 double[] elements = data.elements();
657 int size = data.size();
658 for ( int i = 0; i < size; i++ ) {
659 if ( Double.isNaN( elements[i] ) ) {
660 continue;
661 }
662 r.add( elements[i] );
663 }
664 return r;
665 }
666
667
668
669
670
671
672
673
674
675 public static double sampleKurtosis( DoubleArrayList data, double mean, double sampleVariance ) {
676 return sampleKurtosis( sizeWithoutMissingValues( data ), moment( data, 4, mean ), sampleVariance );
677 }
678
679
680
681
682
683
684
685
686
687 public static double sampleSkew( DoubleArrayList data, double mean, double sampleVariance ) {
688 return sampleSkew( sizeWithoutMissingValues( data ), moment( data, 3, mean ), sampleVariance );
689 }
690
691
692
693
694
695
696
697
698
699
700 public static double sampleStandardDeviation( int size, double sampleVariance ) {
701 return Math.sqrt( sampleVariance );
702 }
703
704
705
706
707
708
709
710
711
712 public static double sampleVariance( DoubleArrayList data, double mean ) {
713 double[] elements = data.elements();
714 int effsize = sizeWithoutMissingValues( data );
715 int size = data.size();
716 double sum = 0;
717
718 for ( int i = size; --i >= 0; ) {
719 if ( Double.isNaN( elements[i] ) ) {
720 continue;
721 }
722 double delta = elements[i] - mean;
723 sum += delta * delta;
724 }
725
726 return sum / ( effsize - 1 );
727 }
728
729
730
731
732
733
734
735 public static int sizeWithoutMissingValues( DoubleArrayList list ) {
736
737 int size = 0;
738 for ( int i = 0; i < list.size(); i++ ) {
739 if ( !Double.isNaN( list.get( i ) ) ) {
740 size++;
741 }
742 }
743 return size;
744 }
745
746
747
748
749
750
751
752
753
754
755 public static double skew( DoubleArrayList data, double mean, double standardDeviation ) {
756 return skew( moment( data, 3, mean ), standardDeviation );
757 }
758
759
760
761
762
763
764
765 public static void standardize( DoubleArrayList data ) {
766 double mean = mean( data );
767 double stdev = Math.sqrt( sampleVariance( data, mean ) );
768 DescriptiveWithMissing.standardize( data, mean, stdev );
769 }
770
771
772
773
774
775
776
777
778
779 public static void standardize( DoubleArrayList data, double mean, double standardDeviation ) {
780 double[] elements = data.elements();
781 for ( int i = 0; i < elements.length; i++ ) {
782 if ( Double.isNaN( elements[i] ) ) {
783 continue;
784 }
785 elements[i] = ( elements[i] - mean ) / standardDeviation;
786 }
787 }
788
789
790
791
792
793
794
795 public static double sum( DoubleArrayList data ) {
796 return sumOfPowerDeviations( data, 1, 0.0 );
797 }
798
799
800
801
802
803
804
805
806
807
808 public static double sumOfInversions( DoubleArrayList data, int from, int to ) {
809 return sumOfPowerDeviations( data, -1, 0.0, from, to );
810 }
811
812
813
814
815
816
817
818
819
820
821 public static double sumOfLogarithms( DoubleArrayList data, int from, int to ) {
822 double[] elements = data.elements();
823 double logsum = 0;
824 for ( int i = from - 1; ++i <= to; ) {
825 if ( Double.isNaN( elements[i] ) ) {
826 continue;
827 }
828 logsum += Math.log( elements[i] );
829 }
830 return logsum;
831 }
832
833
834
835
836
837
838
839
840
841
842 public static double sumOfPowerDeviations( DoubleArrayList data, int k, double c ) {
843 return sumOfPowerDeviations( data, k, c, 0, data.size() - 1 );
844 }
845
846
847
848
849
850
851
852
853
854
855
856
857
858 public static double sumOfPowerDeviations( final DoubleArrayList data, final int k, final double c, final int from,
859 final int to ) {
860 final double[] elements = data.elements();
861 double sum = 0;
862 double v;
863 int i;
864 switch ( k ) {
865 case -2:
866 if ( c == 0.0 ) {
867 for ( i = from - 1; ++i <= to; ) {
868 if ( Double.isNaN( elements[i] ) ) {
869 continue;
870 }
871 v = elements[i];
872 sum += 1 / ( v * v );
873 }
874 } else {
875 for ( i = from - 1; ++i <= to; ) {
876 if ( Double.isNaN( elements[i] ) ) {
877 continue;
878 }
879 v = elements[i] - c;
880 sum += 1 / ( v * v );
881 }
882 }
883 break;
884 case -1:
885 if ( c == 0.0 ) {
886 for ( i = from - 1; ++i <= to; ) {
887 if ( Double.isNaN( elements[i] ) ) {
888 continue;
889 }
890 sum += 1 / elements[i];
891 }
892 } else {
893 for ( i = from - 1; ++i <= to; ) {
894 if ( Double.isNaN( elements[i] ) ) {
895 continue;
896 }
897 sum += 1 / ( elements[i] - c );
898 }
899 }
900 break;
901 case 0:
902 for ( i = from - 1; ++i <= to; ) {
903 if ( Double.isNaN( elements[i] ) ) {
904 continue;
905 }
906 sum += 1;
907 }
908 break;
909 case 1:
910 if ( c == 0.0 ) {
911 for ( i = from - 1; ++i <= to; ) {
912 if ( Double.isNaN( elements[i] ) ) {
913 continue;
914 }
915 sum += elements[i];
916 }
917 } else {
918 for ( i = from - 1; ++i <= to; ) {
919 if ( Double.isNaN( elements[i] ) ) {
920 continue;
921 }
922 sum += elements[i] - c;
923 }
924 }
925 break;
926 case 2:
927 if ( c == 0.0 ) {
928 for ( i = from - 1; ++i <= to; ) {
929 if ( Double.isNaN( elements[i] ) ) {
930 continue;
931 }
932 v = elements[i];
933 sum += v * v;
934 }
935 } else {
936 for ( i = from - 1; ++i <= to; ) {
937 if ( Double.isNaN( elements[i] ) ) {
938 continue;
939 }
940 v = elements[i] - c;
941 sum += v * v;
942 }
943 }
944 break;
945 case 3:
946 if ( c == 0.0 ) {
947 for ( i = from - 1; ++i <= to; ) {
948 if ( Double.isNaN( elements[i] ) ) {
949 continue;
950 }
951 v = elements[i];
952 sum += v * v * v;
953 }
954 } else {
955 for ( i = from - 1; ++i <= to; ) {
956 if ( Double.isNaN( elements[i] ) ) {
957 continue;
958 }
959 v = elements[i] - c;
960 sum += v * v * v;
961 }
962 }
963 break;
964 case 4:
965 if ( c == 0.0 ) {
966 for ( i = from - 1; ++i <= to; ) {
967 if ( Double.isNaN( elements[i] ) ) {
968 continue;
969 }
970 v = elements[i];
971 sum += v * v * v * v;
972 }
973 } else {
974 for ( i = from - 1; ++i <= to; ) {
975 if ( Double.isNaN( elements[i] ) ) {
976 continue;
977 }
978 v = elements[i] - c;
979 sum += v * v * v * v;
980 }
981 }
982 break;
983 case 5:
984 if ( c == 0.0 ) {
985 for ( i = from - 1; ++i <= to; ) {
986 if ( Double.isNaN( elements[i] ) ) {
987 continue;
988 }
989 v = elements[i];
990 sum += v * v * v * v * v;
991 }
992 } else {
993 for ( i = from - 1; ++i <= to; ) {
994 if ( Double.isNaN( elements[i] ) ) {
995 continue;
996 }
997 v = elements[i] - c;
998 sum += v * v * v * v * v;
999 }
1000 }
1001 break;
1002 default:
1003 for ( i = from - 1; ++i <= to; ) {
1004 if ( Double.isNaN( elements[i] ) ) {
1005 continue;
1006 }
1007 sum += Math.pow( elements[i] - c, k );
1008 }
1009 break;
1010 }
1011 return sum;
1012 }
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022 public static double sumOfPowers( DoubleArrayList data, int k ) {
1023 return sumOfPowerDeviations( data, k, 0 );
1024 }
1025
1026
1027
1028
1029
1030
1031
1032 public static double sumOfSquaredDeviations( DoubleArrayList data ) {
1033 return sumOfSquaredDeviations( sizeWithoutMissingValues( data ),
1034 variance( sizeWithoutMissingValues( data ), sum( data ), sumOfSquares( data ) ) );
1035 }
1036
1037
1038
1039
1040
1041
1042
1043 public static double sumOfSquares( DoubleArrayList data ) {
1044 return sumOfPowerDeviations( data, 2, 0.0 );
1045 }
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 public static double trimmedMean( DoubleArrayList sortedData, double mean, int left, int right ) {
1057 return Descriptive.trimmedMean( removeMissing( sortedData ), mean, left, right );
1058 }
1059
1060
1061
1062
1063
1064
1065
1066 public static double variance( DoubleArrayList data ) {
1067 return variance( sizeWithoutMissingValues( data ), sum( data ), sumOfSquares( data ) );
1068 }
1069
1070
1071
1072
1073 public static double variance( int sizeWithoutMissing, double sum, double sumOfSquares ) {
1074 return Descriptive.variance( sizeWithoutMissing, sum, sumOfSquares );
1075 }
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085 public static double weightedMean( DoubleArrayList data, DoubleArrayList weights ) {
1086 int size = data.size();
1087 if ( size != weights.size() || size == 0 ) {
1088 throw new IllegalArgumentException();
1089 }
1090
1091 double[] elements = data.elements();
1092 double[] theWeights = weights.elements();
1093 double sum = 0.0;
1094 double weightsSum = 0.0;
1095 for ( int i = size; --i >= 0; ) {
1096 double w = theWeights[i];
1097 if ( Double.isNaN( elements[i] ) || Double.isNaN( w ) ) {
1098 continue;
1099 }
1100 sum += elements[i] * w;
1101 weightsSum += w;
1102 }
1103
1104 return sum / weightsSum;
1105 }
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117 public static double winsorizedMean( DoubleArrayList sortedData, double mean, int left, int right ) {
1118 DoubleArrayList sdm = removeMissing( sortedData );
1119 return Descriptive.winsorizedMean( sdm, mean( sdm ), left, right );
1120 }
1121
1122
1123
1124 private DescriptiveWithMissing() {
1125 }
1126
1127 }