1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package ubic.basecode.math.linearmodels;
21
22 import java.util.List;
23
24 import cern.colt.function.DoubleFunction;
25 import cern.colt.list.BooleanArrayList;
26 import cern.colt.list.DoubleArrayList;
27 import org.apache.commons.math3.special.Gamma;
28
29 import cern.colt.matrix.DoubleMatrix1D;
30 import cern.colt.matrix.impl.DenseDoubleMatrix1D;
31 import cern.jet.math.Functions;
32 import ubic.basecode.dataStructure.matrix.MatrixUtil;
33 import ubic.basecode.math.DescriptiveWithMissing;
34 import ubic.basecode.math.SpecFunc;
35
36
37
38
39
40
41
42
43
44
45
46 public class ModeratedTstat {
47
48 private final static DoubleFunction digamma = new DoubleFunction() {
49 @Override
50 public double apply(double v) {
51 return Gamma.digamma(v);
52 }
53 };
54 private final static DoubleFunction trigammainverse = new DoubleFunction() {
55 @Override
56 public double apply(double v) {
57 return SpecFunc.trigammaInverse(v);
58 }
59 };
60 private final static DoubleFunction trigamma = new DoubleFunction() {
61 @Override
62 public double apply(double v) {
63 return Gamma.trigamma(v);
64 }
65 };
66 public static final double TOOSMALL = Math.pow(10, -15);
67
68
69
70
71
72
73 public static void ebayes(LeastSquaresFit fit) {
74 List<LinearModelSummary> summaries = fit.summarize();
75 DoubleMatrix1D sigmas = new DenseDoubleMatrix1D(new double[summaries.size()]);
76 DoubleMatrix1D dofs = new DenseDoubleMatrix1D(new double[summaries.size()]);
77 int i = 0;
78
79 for ( LinearModelSummary lms : summaries) {
80 if (Double.isNaN(lms.getSigma())) {
81 sigmas.set(i, Double.NaN);
82 dofs.set(i, Double.NaN);
83 } else {
84 sigmas.set(i, lms.getSigma());
85 dofs.set(i, lms.getResidualsDof());
86 }
87 i++;
88
89 }
90 squeezeVar(sigmas.copy().assign(Functions.square), dofs, fit);
91 }
92
93
94
95
96
97
98
99 private static final BooleanArrayList okVars(DoubleMatrix1D x) {
100 assert x.size() > 0;
101 BooleanArrayList answer = new BooleanArrayList(x.size());
102 for (int i = 0; i < x.size(); i++) {
103 double a = x.getQuick(i);
104 answer.add(!(Double.isNaN(a) || Double.isInfinite(a) || a < -1e-15));
105 }
106 return answer;
107 }
108
109 private static final BooleanArrayList okDfs(DoubleMatrix1D x) {
110 assert x.size() > 0;
111 BooleanArrayList answer = new BooleanArrayList(x.size());
112
113 for (int i = 0; i < x.size(); i++) {
114 double a = x.getQuick(i);
115 answer.add(!(Double.isNaN(a) || Double.isInfinite(a) || a < TOOSMALL));
116 }
117 return answer;
118 }
119
120
121
122
123
124
125 protected static double[] fitFDist(final DoubleMatrix1D vars, final DoubleMatrix1D df1s) {
126
127 BooleanArrayList ok = MatrixUtil.conjunction(okVars(vars), okDfs(df1s));
128
129 DoubleMatrix1D x = MatrixUtil.stripNonOK(vars, ok);
130 DoubleMatrix1D df1 = MatrixUtil.stripNonOK(df1s, ok);
131
132 if (x.size() == 0) {
133 throw new IllegalStateException("There were no valid values of variance to perform eBayes parameter estimation");
134 }
135
136
137 x = x.assign(Functions.max(1e-5 * DescriptiveWithMissing.median(new DoubleArrayList(vars.toArray()))));
138
139
140 DoubleMatrix1D z = x.copy().assign(Functions.log);
141
142
143 DoubleMatrix1D e1 = df1.copy().assign(Functions.div(2.0)).assign(digamma);
144 DoubleMatrix1D e2 = df1.copy().assign(Functions.div(2.0)).assign(Functions.log);
145 DoubleMatrix1D e = z.copy().assign(e1, Functions.minus).assign(e2, Functions.plus);
146
147 int n = x.size();
148 if (n < 2) {
149 throw new IllegalStateException("Too few valid variance values to do eBayes parameter estimation (require at least 2)");
150 }
151
152
153 double emean = e.zSum() / n;
154
155 double evar = e.copy().assign(Functions.minus(emean)).aggregate(Functions.plus, Functions.square)
156 / (n - 1);
157
158
159 evar = evar - df1.copy().assign(Functions.div(2.0)).assign(trigamma).zSum() / df1.size();
160 double df2;
161 double s20;
162 if (evar > 0.0) {
163
164 df2 = 2 * SpecFunc.trigammaInverse(evar);
165
166
167 s20 = Math.exp(emean + Gamma.digamma(df2 / 2.0) - Math.log(df2 / 2.0));
168
169 } else {
170 df2 = Double.POSITIVE_INFINITY;
171
172 s20 = x.copy().aggregate(Functions.plus, Functions.identity) / x.size();
173 }
174
175 return new double[]{s20, df2};
176 }
177
178
179
180
181
182 @Deprecated
183 private static double[] fitFDistNoMissing(DoubleMatrix1D x, double df1) {
184
185
186 x = x.copy().assign(Functions.max(1e-5));
187
188
189 DoubleMatrix1D e = x.copy().assign(Functions.log).assign(Functions.minus(Gamma.digamma(df1 / 2.0)))
190 .assign(Functions.plus(Math.log(df1 / 2.0)));
191
192 int nok = x.size();
193
194 double emean = e.copy().zSum() / nok;
195
196 double evar = e.copy().assign(Functions.minus(emean)).aggregate(Functions.plus, Functions.square)
197 / (nok - 1);
198
199 evar = evar - Gamma.trigamma(df1 / 2.0);
200
201 double df2;
202 double s20;
203 if (evar > 0) {
204
205 df2 = 2 * SpecFunc.trigammaInverse(evar);
206
207
208 s20 = Math.exp(emean + Gamma.digamma(df2 / 2.0) - Math.log(df2 / 2.0));
209
210 } else {
211 df2 = Double.POSITIVE_INFINITY;
212
213 s20 = x.copy().zSum() / x.size();
214 }
215
216 return new double[]{s20, df2};
217 }
218
219
220
221
222
223
224
225
226
227 protected static DoubleMatrix1D squeezeVar(DoubleMatrix1D var, DoubleMatrix1D df, LeastSquaresFit fit) {
228
229 double[] ffit = fitFDist(var, df);
230 double dfPrior = ffit[1];
231
232 DoubleMatrix1D varPost = squeezeVariances(var, df, ffit);
233
234 if (fit != null)
235 fit.ebayesUpdate(dfPrior, ffit[0], varPost);
236
237 return varPost;
238 }
239
240
241
242
243
244
245
246 protected static DoubleMatrix1D squeezeVariances(DoubleMatrix1D var, DoubleMatrix1D df, double[] fit) {
247 double varPrior = fit[0];
248 double dfPrior = fit[1];
249
250
251
252 DoubleMatrix1D dfTotal = df.copy().assign(Functions.plus(dfPrior));
253
254 return var.copy().assign(df, Functions.mult)
255 .assign(Functions.plus(dfPrior * varPrior)).assign(dfTotal, Functions.div);
256
257 }
258
259 }