DescriptiveWithMissing

/*
 * The baseCode project
 *
 * Copyright (c) 2006 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.basecode.math;

import cern.colt.list.DoubleArrayList;
import cern.jet.stat.Descriptive;

/**
 * Mathematical functions for statistics that allow missing values without scotching the calculations.
 * <p>
 * Be careful because some methods from cern.jet.stat.Descriptive have not been overridden and will yield a
 * UnsupportedOperationException if used.
 * </p>
 * <p>
 * Some functions that come with DoubleArrayLists will not work in an entirely compatible way with missing values. For
 * examples, size() reports the total number of elements, including missing values. To get a count of non-missing
 * values, use this.sizeWithoutMissingValues(). The right one to use may vary.
 * </p>
 * <p>
 * Not all methods need to be overridden. However, all methods that take a "size" parameter should be passed the results
 * of sizeWithoutMissingValues(data), instead of data.size().
 * <p>
 * Based in part on code from the colt package: Copyright &copy; 1999 CERN - European Organization for Nuclear Research.
 *
 * @see <a
 *      href="http://hoschek.home.cern.ch/hoschek/colt/V1.0.3/doc/cern/jet/stat/Descriptive.html">cern.jet.stat.Descriptive
 *      </a>
 * @author Paul Pavlidis
 *
 */
public class DescriptiveWithMissing extends cern.jet.stat.Descriptive {

    /**
     * <b>Not supported. </b>
     *
     * @param data DoubleArrayList
     * @param lag int
     * @param mean double
     * @param variance double
     * @return double
     */
    public static double autoCorrelation( DoubleArrayList data, int lag, double mean, double variance ) {
        throw new UnsupportedOperationException( "autoCorrelation not supported with missing values" );
    }

    /**
     * Highly optimized version of the correlation computation, where as much information is precomputed as possible.
     * Use of this method only makes sense if many comparisons with the inputs x and y are being performed.
     * <p>
     * Implementation note: In correlation(DoubleArrayList x, DoubleArrayList y), profiling shows that calls to
     * Double.NaN consume half the CPU time. The precomputation of the element-by-element squared values is another
     * obvious optimization. There is also no checking for matching lengths of the arrays.
     *
     * @param x
     * @param y
     * @param selfSquaredX double array containing values of x_i^2 for each x.
     * @param selfSquaredY
     * @param nanStatusX boolean array containing value of Double.isNaN() for each X.
     * @param nanStatusY
     * @return
     */
    public static double correlation( double[] x, double[] y, double[] selfSquaredX, double[] selfSquaredY,
            boolean[] nanStatusX, boolean[] nanStatusY ) {

        double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
        int numused = 0;
        syy = 0.0;
        sxy = 0.0;
        sxx = 0.0;
        sx = 0.0;
        sy = 0.0;

        int length = x.length;
        for ( int j = 0; j < length; j++ ) {
            xj = x[j];
            yj = y[j];

            if ( nanStatusX[j] || nanStatusY[j] ) {
                continue;
            }
            sx += xj;
            sy += yj;
            sxy += xj * yj;
            sxx += selfSquaredX[j];
            syy += selfSquaredY[j];
            numused++;
        }

        if ( numused > 0 ) {
            ay = sy / numused;
            ax = sx / numused;
            return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
        }
        return Double.NaN; // signifies that it could not be calculated.
    }

    /**
     * Returns the correlation of two data sequences. That is
     * <tt>covariance(data1,data2)/(standardDev1*standardDev2)</tt>. Missing values are ignored. This method is
     * overridden to stop users from using the method in the superclass when missing values are present. The problem is
     * that the standard deviation cannot be computed without knowning which values are not missing in both vectors to
     * be compared. Thus the standardDev parameters are thrown away by this method.
     *
     * @param data1 DoubleArrayList
     * @param standardDev1 double - not used
     * @param data2 DoubleArrayList
     * @param standardDev2 double - not used
     * @return double
     */
    public static double correlation( DoubleArrayList data1, double standardDev1, DoubleArrayList data2,
            double standardDev2 ) {
        return correlation( data1, data2 );
    }

    /**
     * Calculate the pearson correlation of two arrays. Missing values (NaNs) are ignored.
     *
     * @param x DoubleArrayList
     * @param y DoubleArrayList
     * @return double
     */
    public static double correlation( DoubleArrayList x, DoubleArrayList y ) {
        int j;
        double syy, sxy, sxx, sx, sy, xj, yj, ay, ax;
        int numused;
        syy = 0.0;
        sxy = 0.0;
        sxx = 0.0;
        sx = 0.0;
        sy = 0.0;
        numused = 0;
        if ( x.size() != y.size() ) {
            throw new ArithmeticException( "Unequal vector sizes: " + x.size() + " != " + y.size() );
        }

        double[] xel = x.elements();
        double[] yel = y.elements();

        int length = x.size();
        for ( j = 0; j < length; j++ ) {
            xj = xel[j];
            yj = yel[j];

            if ( !Double.isNaN( xj ) && !Double.isNaN( yj ) ) {
                sx += xj;
                sy += yj;
                sxy += xj * yj;
                sxx += xj * xj;
                syy += yj * yj;
                numused++;
            }
        }

        if ( numused > 0 ) {
            ay = sy / numused;
            ax = sx / numused;
            return ( sxy - sx * ay ) / Math.sqrt( ( sxx - sx * ax ) * ( syy - sy * ay ) );
        }
        return Double.NaN; // signifies that it could not be calculated.

    }

    /**
     * Returns the SAMPLE covariance of two data sequences. Pairs of values are only considered if both are not NaN. If
     * there are no non-missing pairs, the covariance is zero.
     *
     * @param data1 the first vector
     * @param data2 the second vector
     * @return double
     */
    public static double covariance( DoubleArrayList data1, DoubleArrayList data2 ) {
        int size = data1.size();
        if ( size != data2.size() || size == 0 ) {
            throw new IllegalArgumentException();
        }
        double[] elements1 = data1.elements();
        double[] elements2 = data2.elements();

        /* initialize sumx and sumy to the first non-NaN pair of values */

        int i = 0;
        double sumx = 0.0, sumy = 0.0, Sxy = 0.0;
        while ( i < size ) {
            sumx = elements1[i];
            sumy = elements2[i];
            if ( !Double.isNaN( elements1[i] ) && !Double.isNaN( elements2[i] ) ) {
                break;
            }
            i++;
        }
        i++;
        int usedPairs = 1;
        for ( ; i < size; ++i ) {
            double x = elements1[i];
            double y = elements2[i];
            if ( Double.isNaN( x ) || Double.isNaN( y ) ) {
                continue;
            }

            sumx += x;
            Sxy += ( x - sumx / ( usedPairs + 1 ) ) * ( y - sumy / usedPairs );
            sumy += y;
            usedPairs++;
        }
        return Sxy / ( usedPairs - 1 );
    }

    /**
     * Durbin-Watson computation. This measures the serial correlation in a data series.
     *
     * @param data DoubleArrayList
     * @return double
     * @todo this will still break in some situations where there are missing values
     */
    public static double durbinWatson( DoubleArrayList data ) {
        int size = data.size();
        if ( size < 2 ) {
            throw new IllegalArgumentException( "data sequence must contain at least two values." );
        }

        double[] elements = data.elements();
        double run = 0;
        double run_sq = 0;
        int firstNotNaN = 0;
        while ( firstNotNaN < size ) {
            run_sq = elements[firstNotNaN] * elements[firstNotNaN];

            if ( !Double.isNaN( elements[firstNotNaN] ) ) {
                break;
            }

            firstNotNaN++;
        }

        if ( firstNotNaN > 0 && size - firstNotNaN < 2 ) {
            throw new IllegalArgumentException( "data sequence must contain at least two non-missing values." );

        }

        for ( int i = firstNotNaN + 1; i < size; i++ ) {
            int gap = 1;
            while ( i < size && Double.isNaN( elements[i] ) ) {
                gap++;
                i++;
                continue;
            }
            if ( i >= size ) continue; // missing value at end will cause this.
            double x = elements[i] - elements[i - gap];
            /**  */
            run += x * x;
            run_sq += elements[i] * elements[i];
        }

        return run / run_sq;
    }

    /**
     * Returns the geometric mean of a data sequence. Missing values are ignored. Note that for a geometric mean to be
     * meaningful, the minimum of the data sequence must not be less or equal to zero. <br>
     * The geometric mean is given by <tt>pow( Product( data[i] ),
     * 1/data.size())</tt>. This method tries to avoid overflows at the expense of an equivalent but somewhat slow
     * definition: <tt>geo = Math.exp( Sum(
     * Log(data[i]) ) / data.size())</tt>.
     *
     * @param data DoubleArrayList
     * @return double
     */
    public static double geometricMean( DoubleArrayList data ) {
        return geometricMean( sizeWithoutMissingValues( data ), sumOfLogarithms( data, 0, data.size() - 1 ) );
    }

    /**
     * Incrementally maintains and updates minimum, maximum, sum and sum of squares of a data sequence.
     * <p>
     * Assume we have already recorded some data sequence elements and know their minimum, maximum, sum and sum of
     * squares. Assume further, we are to record some more elements and to derive updated values of minimum, maximum,
     * sum and sum of squares. This method computes those updated values without needing to know the already recorded
     * elements. This is interesting for interactive online monitoring and/or applications that cannot keep the entire
     * huge data sequence in memory.
     *
     * @param data the additional elements to be incorporated into min, max, etc.
     * @param from the index of the first element within <tt>data</tt> to consider.
     * @param to the index of the last element within <tt>data</tt> to consider. The method incorporates elements
     *        <tt>data[from], ..., data[to]</tt>.
     * @param inOut the old values in the following format:
     *        <ul>
     *        <li><tt>inOut[0]</tt> is the old minimum. <li><tt>inOut[1]</tt> is the old maximum. <li><tt>inOut[2]</tt>
     *        is the old sum. <li><tt>inOut[3]</tt> is the old sum of squares.
     *        </ul>
     *        If no data sequence elements have so far been recorded set the values as follows
     *        <ul>
     *        <li><tt>inOut[0] = Double.POSITIVE_INFINITY</tt> as the old minimum. <li><tt>inOut[1] =
     *        Double.NEGATIVE_INFINITY</tt> as the old maximum. <li><tt>inOut[2] = 0.0</tt> as the old sum. <li><tt>
     *        inOut[3] = 0.0</tt> as the old sum of squares.
     *        </ul>
     */
    public static void incrementalUpdate( DoubleArrayList data, int from, int to, double[] inOut ) {
        throw new UnsupportedOperationException( "incrementalUpdate not supported with missing values" );
    }

    /**
     * <b>Not supported. </b>
     *
     * @param data DoubleArrayList
     * @param from int
     * @param to int
     * @param fromSumIndex int
     * @param toSumIndex int
     * @param sumOfPowers double[]
     */
    public static void incrementalUpdateSumsOfPowers( DoubleArrayList data, int from, int to, int fromSumIndex,
            int toSumIndex, double[] sumOfPowers ) {
        throw new UnsupportedOperationException( "incrementalUpdateSumsOfPowers not supported with missing values" );
    }

    /**
     * <b>Not supported. </b>
     *
     * @param data DoubleArrayList
     * @param weights DoubleArrayList
     * @param from int
     * @param to int
     * @param inOut double[]
     */
    public static void incrementalWeightedUpdate( DoubleArrayList data, DoubleArrayList weights, int from, int to,
            double[] inOut ) {
        throw new UnsupportedOperationException( "incrementalWeightedUpdate not supported with missing values" );
    }

    /**
     * Returns the kurtosis (aka excess) of a data sequence.
     *
     * @param moment4 the fourth central moment, which is <tt>moment(data,4,mean)</tt>.
     * @param standardDeviation the standardDeviation.
     * @return double
     */
    public static double kurtosis( double moment4, double standardDeviation ) {
        return -3 + moment4 / ( standardDeviation * standardDeviation * standardDeviation * standardDeviation );
    }

    /**
     * Returns the kurtosis (aka excess) of a data sequence, which is <tt>-3 +
     * moment(data,4,mean) / standardDeviation<sup>4</sup></tt>.
     *
     * @param data DoubleArrayList
     * @param mean double
     * @param standardDeviation double
     * @return double
     */
    public static double kurtosis( DoubleArrayList data, double mean, double standardDeviation ) {
        return kurtosis( moment( data, 4, mean ), standardDeviation );
    }

    /**
     * <b>Not supported. </b>
     *
     * @param data DoubleArrayList
     * @param mean double
     * @return double
     */
    public static double lag1( DoubleArrayList data, double mean ) {
        throw new UnsupportedOperationException( "lag1 not supported with missing values" );
    }

    /**
     * @param dat
     * @return the median absolute deviation from the median.
     */
    public static double mad( DoubleArrayList dat ) {
        DoubleArrayList copy = removeMissing( dat.copy() );

        double median = median( copy );

        DoubleArrayList devsum = new DoubleArrayList( copy.size() );
        for ( int i = 0; i < copy.size(); i++ ) {
            double v = copy.getQuick( i );
            devsum.add( Math.abs( v - median ) );
        }

        devsum.sort();
        return Descriptive.median( devsum );
    }

    public static double max( DoubleArrayList input ) {
        int size = input.size();
        if ( size == 0 ) throw new IllegalArgumentException();

        double[] elements = input.elements();
        double max = Double.MIN_VALUE;
        for ( int i = 0; i < size; i++ ) {
            if ( Double.isNaN( elements[i] ) ) continue;
            if ( elements[i] > max ) max = elements[i];
        }

        return max;
    }

    /**
     * Special mean calculation where we use the effective size as an input.
     *
     * @param elements The data double array.
     * @param effectiveSize The effective size used for the mean calculation.
     * @return double
     */
    public static double mean( double[] elements, int effectiveSize ) {

        int length = elements.length;

        if ( 0 == effectiveSize ) {
            return Double.NaN;
        }

        double sum = 0.0;
        int i, count;
        count = 0;
        double value;
        for ( i = 0; i < length; i++ ) {
            value = elements[i];
            if ( Double.isNaN( value ) ) {
                continue;
            }
            sum += value;
            count++;
        }
        if ( 0.0 == count ) {
            return Double.NaN;
        }
        return sum / effectiveSize;
    }

    /**
     * @param data Values to be analyzed.
     * @return Mean of the values in x. Missing values are ignored in the analysis.
     */
    public static double mean( DoubleArrayList data ) {
        return sum( data ) / sizeWithoutMissingValues( data );
    }

    /**
     * Special mean calculation where we use the effective size as an input.
     *
     * @param x The data
     * @param effectiveSize The effective size used for the mean calculation.
     * @return double
     */
    public static double mean( DoubleArrayList x, int effectiveSize ) {

        int length = x.size();

        if ( 0 == effectiveSize ) {
            return Double.NaN;
        }

        double sum = 0.0;
        int i, count;
        count = 0;
        double value;
        double[] elements = x.elements();
        for ( i = 0; i < length; i++ ) {
            value = elements[i];
            if ( Double.isNaN( value ) ) {
                continue;
            }
            sum += value;
            count++;
        }
        if ( 0.0 == count ) {
            return Double.NaN;
        }
        return sum / effectiveSize;

    }

    /**
     * Calculate the mean of the values above to a particular quantile of an array.
     *
     * @param quantile A value from 0 to 1
     * @param array Array for which we want to get the quantile.
     * @return double
     */
    public static double meanAboveQuantile( double quantile, DoubleArrayList array ) {

        if ( quantile < 0.0 || quantile > 1.0 ) {
            throw new IllegalArgumentException( "Quantile must be between 0 and 1" );
        }

        double returnvalue = 0.0;
        int k = 0;

        double quantileValue = DescriptiveWithMissing.quantile( array, quantile );

        for ( int i = 0; i < array.size(); i++ ) {
            if ( array.get( i ) > quantileValue ) {
                returnvalue += array.get( i );
                k++;
            }
        }

        if ( k == 0 ) {
            throw new ArithmeticException( "No values found above quantile" );
        }

        return returnvalue / k;
    }

    /**
     * Returns the median. Missing values are ignored entirely.
     *
     * @param data the data sequence, does not have to be sorted.
     * @return double
     */
    public static double median( DoubleArrayList data ) {
        return DescriptiveWithMissing.quantile( data, 0.5 );
    }

    public static double min( DoubleArrayList input ) {
        int size = input.size();
        if ( size == 0 ) throw new IllegalArgumentException();

        double[] elements = input.elements();
        double min = Double.MAX_VALUE;
        for ( int i = 0; i < size; i++ ) {
            if ( Double.isNaN( elements[i] ) ) continue;
            if ( elements[i] < min ) min = elements[i];
        }

        return min;
    }

    /**
     * Returns the moment of <tt>k</tt> -th order with constant <tt>c</tt> of a data sequence, which is
     * <tt>Sum( (data[i]-c)<sup>k</sup> ) /
     * data.size()</tt>.
     *
     * @param data DoubleArrayList
     * @param k int
     * @param c double
     * @return double
     */
    public static double moment( DoubleArrayList data, int k, double c ) {
        return sumOfPowerDeviations( data, k, c ) / sizeWithoutMissingValues( data );
    }

    /**
     * Returns the product of a data sequence, which is <tt>Prod( data[i] )</tt>. Missing values are ignored. In other
     * words: <tt>data[0]*data[1]*...*data[data.size()-1]</tt>. Note that you may easily get numeric overflows.
     *
     * @param data DoubleArrayList
     * @return double
     */
    public static double product( DoubleArrayList data ) {
        int size = data.size();
        double[] elements = data.elements();

        double product = 1;
        for ( int i = size; --i >= 0; ) {
            if ( Double.isNaN( elements[i] ) ) {
                continue;
            }
            product *= elements[i];

        }
        return product;
    }

    /**
     * Returns the <tt>phi-</tt> quantile; that is, an element <tt>elem</tt> for which holds that <tt>phi</tt> percent
     * of data elements are less than <tt>elem</tt>. Missing values are ignored. The quantile need not necessarily be
     * contained in the data sequence, it can be a linear interpolation.
     *
     * @param data the data sequence, does not have to be sorted.
     * @param phi the percentage; must satisfy <tt>0 &lt;= phi &lt;= 1</tt>.
     * @todo possibly implement so a copy is not made.
     * @return double
     */
    public static double quantile( DoubleArrayList data, double phi ) {
        DoubleArrayList sortedData = removeMissing( data.copy() );
        sortedData.sort();
        return Descriptive.quantile( sortedData, phi );
    }

    /**
     * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
     * interpolation if the element is not contained but lies in between two contained elements. Missing values are
     * ignored.
     *
     * @param data the list to be searched
     * @param element the element to search for.
     * @return the percentage <tt>phi</tt> of elements <tt>&lt;= element</tt>(<tt>0.0 &lt;= phi &lt;= 1.0)</tt>.
     */
    public static double quantileInverse( DoubleArrayList data, double element ) {
        DoubleArrayList sortedData = removeMissing( data.copy() );
        sortedData.sort();
        return rankInterpolated( sortedData, element ) / sortedData.size();
    }

    /**
     * Returns the quantiles of the specified percentages. The quantiles need not necessarily be contained in the data
     * sequence, it can be a linear interpolation.
     *
     * @param sortedData the data sequence; <b>must be sorted ascending </b>.
     * @param percentages the percentages for which quantiles are to be computed. Each percentage must be in the
     *        interval <tt>[0.0,1.0]</tt>.
     * @return the quantiles.
     */
    public static DoubleArrayList quantiles( DoubleArrayList sortedData, DoubleArrayList percentages ) {
        int s = percentages.size();
        DoubleArrayList quantiles = new DoubleArrayList( s );

        for ( int i = 0; i < s; i++ ) {
            quantiles.add( quantile( sortedData, percentages.get( i ) ) );
        }

        return quantiles;
    }

    /**
     * Returns the linearly interpolated number of elements in a list less or equal to a given element. Missing values
     * are ignored. The rank is the number of elements <= element. Ranks are of the form
     * <tt>{0, 1, 2,..., sortedList.size()}</tt>. If no element is <= element, then the rank is zero. If the element
     * lies in between two contained elements, then linear interpolation is used and a non integer value is returned.
     *
     * @param sortedList the list to be searched (must be sorted ascending).
     * @param element the element to search for.
     * @return the rank of the element.
     * @todo possibly implement so a copy is not made.
     */
    public static double rankInterpolated( DoubleArrayList sortedList, double element ) {
        return Descriptive.rankInterpolated( removeMissing( sortedList ), element );
    }

    /**
     * Makes a copy of the list that doesn't have the missing values.
     *
     * @param data DoubleArrayList
     * @return DoubleArrayList
     */
    public static DoubleArrayList removeMissing( DoubleArrayList data ) {
        DoubleArrayList r = new DoubleArrayList( sizeWithoutMissingValues( data ) );
        double[] elements = data.elements();
        int size = data.size();
        for ( int i = 0; i < size; i++ ) {
            if ( Double.isNaN( elements[i] ) ) {
                continue;
            }
            r.add( elements[i] );
        }
        return r;
    }

    /**
     * Returns the sample kurtosis (aka excess) of a data sequence.
     *
     * @param data DoubleArrayList
     * @param mean double
     * @param sampleVariance double
     * @return double
     */
    public static double sampleKurtosis( DoubleArrayList data, double mean, double sampleVariance ) {
        return sampleKurtosis( sizeWithoutMissingValues( data ), moment( data, 4, mean ), sampleVariance );
    }

    /**
     * Returns the sample skew of a data sequence.
     *
     * @param data DoubleArrayList
     * @param mean double
     * @param sampleVariance double
     * @return double
     */
    public static double sampleSkew( DoubleArrayList data, double mean, double sampleVariance ) {
        return sampleSkew( sizeWithoutMissingValues( data ), moment( data, 3, mean ), sampleVariance );
    }

    /**
     * Returns the sample standard deviation.
     * <p>
     * This is included for compatibility with the superclass, but does not implement the correction used there.
     *
     * @see cern.jet.stat.Descriptive#sampleStandardDeviation(int, double)
     * @param size the number of elements of the data sequence.
     * @param sampleVariance the <b>sample variance </b>.
     */
    public static double sampleStandardDeviation( int size, double sampleVariance ) {
        return Math.sqrt( sampleVariance );
    }

    /**
     * Returns the sample variance of a data sequence. That is <tt>Sum (
     * (data[i]-mean)^2 ) / (data.size()-1)</tt>.
     *
     * @param data DoubleArrayList
     * @param mean double
     * @return double
     */
    public static double sampleVariance( DoubleArrayList data, double mean ) {
        double[] elements = data.elements();
        int effsize = sizeWithoutMissingValues( data );
        int size = data.size();
        double sum = 0;
        // find the sum of the squares
        for ( int i = size; --i >= 0; ) {
            if ( Double.isNaN( elements[i] ) ) {
                continue;
            }
            double delta = elements[i] - mean;
            sum += delta * delta;
        }

        return sum / ( effsize - 1 );
    }

    /**
     * Return the size of the list, ignoring missing values.
     *
     * @param list DoubleArrayList
     * @return int
     */
    public static int sizeWithoutMissingValues( DoubleArrayList list ) {

        int size = 0;
        for ( int i = 0; i < list.size(); i++ ) {
            if ( !Double.isNaN( list.get( i ) ) ) {
                size++;
            }
        }
        return size;
    }

    /**
     * Returns the skew of a data sequence, which is <tt>moment(data,3,mean) /
     * standardDeviation<sup>3</sup></tt>.
     *
     * @param data DoubleArrayList
     * @param mean double
     * @param standardDeviation double
     * @return double
     */
    public static double skew( DoubleArrayList data, double mean, double standardDeviation ) {
        return skew( moment( data, 3, mean ), standardDeviation );
    }

    /**
     * Standardize. Note that this does something slightly different than standardize in the superclass, because our
     * sampleStandardDeviation does not use the correction of the superclass (which isn't really standard).
     *
     * @param data DoubleArrayList
     */
    public static void standardize( DoubleArrayList data ) {
        double mean = mean( data );
        double stdev = Math.sqrt( sampleVariance( data, mean ) );
        DescriptiveWithMissing.standardize( data, mean, stdev );
    }

    /**
     * Modifies a data sequence to be standardized. Mising values are ignored. Changes each element <tt>data[i]</tt> as
     * follows: <tt>data[i] = (data[i]-mean)/standardDeviation</tt>.
     *
     * @param data DoubleArrayList
     * @param mean mean of data
     * @param standardDeviation stdev of data
     */
    public static void standardize( DoubleArrayList data, double mean, double standardDeviation ) {
        double[] elements = data.elements();
        for ( int i = 0; i < elements.length; i++ ) {
            if ( Double.isNaN( elements[i] ) ) {
                continue;
            }
            elements[i] = ( elements[i] - mean ) / standardDeviation;
        }
    }

    /**
     * Returns the sum of a data sequence. That is <tt>Sum( data[i] )</tt>.
     *
     * @param data DoubleArrayList
     * @return double
     */
    public static double sum( DoubleArrayList data ) {
        return sumOfPowerDeviations( data, 1, 0.0 );
    }

    /**
     * Returns the sum of inversions of a data sequence, which is <tt>Sum( 1.0 /
     * data[i])</tt>.
     *
     * @param data the data sequence.
     * @param from the index of the first data element (inclusive).
     * @param to the index of the last data element (inclusive).
     * @return double
     */
    public static double sumOfInversions( DoubleArrayList data, int from, int to ) {
        return sumOfPowerDeviations( data, -1, 0.0, from, to );
    }

    /**
     * Returns the sum of logarithms of a data sequence, which is <tt>Sum(
     * Log(data[i])</tt>. Missing values are ignored.
     *
     * @param data the data sequence.
     * @param from the index of the first data element (inclusive).
     * @param to the index of the last data element (inclusive).
     * @return double
     */
    public static double sumOfLogarithms( DoubleArrayList data, int from, int to ) {
        double[] elements = data.elements();
        double logsum = 0;
        for ( int i = from - 1; ++i <= to; ) {
            if ( Double.isNaN( elements[i] ) ) {
                continue;
            }
            logsum += Math.log( elements[i] );
        }
        return logsum;
    }

    /**
     * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt>; optimized for common parameters like <tt>c == 0.0</tt> and/or
     * <tt>k == -2 .. 4</tt>.
     *
     * @param data DoubleArrayList
     * @param k int
     * @param c double
     * @return double
     */
    public static double sumOfPowerDeviations( DoubleArrayList data, int k, double c ) {
        return sumOfPowerDeviations( data, k, c, 0, data.size() - 1 );
    }

    /**
     * Returns <tt>Sum( (data[i]-c)<sup>k</sup> )</tt> for all <tt>i = from ..
     * to</tt>; optimized for common parameters like <tt>c == 0.0</tt> and/or <tt>k == -2 .. 5</tt>. Missing values are
     * ignored.
     *
     * @param data DoubleArrayList
     * @param k int
     * @param c double
     * @param from int
     * @param to int
     * @return double
     */
    public static double sumOfPowerDeviations( final DoubleArrayList data, final int k, final double c, final int from,
            final int to ) {
        final double[] elements = data.elements();
        double sum = 0;
        double v;
        int i;
        switch ( k ) { // optimized for speed
            case -2:
                if ( c == 0.0 ) {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i];
                        sum += 1 / ( v * v );
                    }
                } else {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i] - c;
                        sum += 1 / ( v * v );
                    }
                }
                break;
            case -1:
                if ( c == 0.0 ) {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        sum += 1 / elements[i];
                    }
                } else {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        sum += 1 / ( elements[i] - c );
                    }
                }
                break;
            case 0:
                for ( i = from - 1; ++i <= to; ) {
                    if ( Double.isNaN( elements[i] ) ) {
                        continue;
                    }
                    sum += 1;
                }
                break;
            case 1:
                if ( c == 0.0 ) {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        sum += elements[i];
                    }
                } else {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        sum += elements[i] - c;
                    }
                }
                break;
            case 2:
                if ( c == 0.0 ) {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i];
                        sum += v * v;
                    }
                } else {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i] - c;
                        sum += v * v;
                    }
                }
                break;
            case 3:
                if ( c == 0.0 ) {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i];
                        sum += v * v * v;
                    }
                } else {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i] - c;
                        sum += v * v * v;
                    }
                }
                break;
            case 4:
                if ( c == 0.0 ) {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i];
                        sum += v * v * v * v;
                    }
                } else {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i] - c;
                        sum += v * v * v * v;
                    }
                }
                break;
            case 5:
                if ( c == 0.0 ) {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i];
                        sum += v * v * v * v * v;
                    }
                } else {
                    for ( i = from - 1; ++i <= to; ) {
                        if ( Double.isNaN( elements[i] ) ) {
                            continue;
                        }
                        v = elements[i] - c;
                        sum += v * v * v * v * v;
                    }
                 }
                 break;
             default:
                 for ( i = from - 1; ++i <= to; ) {
                     if ( Double.isNaN( elements[i] ) ) {
                         continue;
                     }
                     sum += Math.pow( elements[i] - c, k );
                 }
                 break;
         }
         return sum;
     }

     /**
      * Returns the sum of powers of a data sequence, which is <tt>Sum (
      * data[i]<sup>k</sup> )</tt>.
      *
      * @param data DoubleArrayList
      * @param k int
      * @return double
      */
     public static double sumOfPowers( DoubleArrayList data, int k ) {
         return sumOfPowerDeviations( data, k, 0 );
     }

     /**
      * Compute the sum of the squared deviations from the mean of a data sequence. Missing values are ignored.
      *
      * @param data DoubleArrayList
      * @return double
      */
     public static double sumOfSquaredDeviations( DoubleArrayList data ) {
         return sumOfSquaredDeviations( sizeWithoutMissingValues( data ),
                 variance( sizeWithoutMissingValues( data ), sum( data ), sumOfSquares( data ) ) );
     }

     /**
      * Returns the sum of squares of a data sequence. Skips missing values.
      *
      * @param data DoubleArrayList
      * @return double
      */
     public static double sumOfSquares( DoubleArrayList data ) {
         return sumOfPowerDeviations( data, 2, 0.0 );
     }

     /**
      * Returns the trimmed mean of a sorted data sequence. Missing values are completely ignored.
      *
      * @param sortedData the data sequence; <b>must be sorted ascending </b>.
      * @param mean the mean of the (full) sorted data sequence.
      * @param left int the number of leading elements to trim.
      * @param right int number of trailing elements to trim.
      * @return double
      */
     public static double trimmedMean( DoubleArrayList sortedData, double mean, int left, int right ) {
         return Descriptive.trimmedMean( removeMissing( sortedData ), mean, left, right );
     }

     /**
      * Provided for convenience!
      *
      * @param data DoubleArrayList
      * @return double
      */
     public static double variance( DoubleArrayList data ) {
         return variance( sizeWithoutMissingValues( data ), sum( data ), sumOfSquares( data ) );
     }

     /**
      *
      */
     public static double variance( int sizeWithoutMissing, double sum, double sumOfSquares ) {
         return Descriptive.variance( sizeWithoutMissing, sum, sumOfSquares );
     }

     /**
      * Returns the weighted mean of a data sequence. That is <tt> Sum (data[i] *
      * weights[i]) / Sum ( weights[i] )</tt>.
      *
      * @param data DoubleArrayList
      * @param weights DoubleArrayList
      * @return double
      */
     public static double weightedMean( DoubleArrayList data, DoubleArrayList weights ) {
         int size = data.size();
         if ( size != weights.size() || size == 0 ) {
             throw new IllegalArgumentException();
         }

         double[] elements = data.elements();
         double[] theWeights = weights.elements();
         double sum = 0.0;
         double weightsSum = 0.0;
         for ( int i = size; --i >= 0; ) {
             double w = theWeights[i];
             if ( Double.isNaN( elements[i] ) || Double.isNaN( w ) ) {
                 continue;
             }
             sum += elements[i] * w;
             weightsSum += w;
         }

         return sum / weightsSum;
     }

     /**
      * Returns the winsorized mean of a sorted data sequence.
      *
      * @param sortedData DoubleArrayList, must already be sorted ascending
      * @param mean the mean of the (full) sorted data sequence.
      * @param left the number of leading elements to trim. Refers to the number of elements to trim
      *        <em>excluding any missing values</em>
      * @param right the number of trailing elements to trim <em>excluding any missing values</em>
      * @return double
      */
     public static double winsorizedMean( DoubleArrayList sortedData, double mean, int left, int right ) {
         DoubleArrayList sdm = removeMissing( sortedData );
         return Descriptive.winsorizedMean( sdm, mean( sdm ), left, right );
     }

     /* private methods */

     private DescriptiveWithMissing() {
     }

 } // end of class