1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package ubic.basecode.math.linearmodels;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.io.OutputStream;
25 import java.io.PrintStream;
26 import java.util.List;
27
28 import cern.colt.function.DoubleFunction;
29 import cern.colt.list.BooleanArrayList;
30 import cern.colt.list.DoubleArrayList;
31 import org.apache.commons.math3.special.Gamma;
32
33 import cern.colt.matrix.DoubleMatrix1D;
34 import cern.colt.matrix.impl.DenseDoubleMatrix1D;
35 import cern.jet.math.Functions;
36 import ubic.basecode.dataStructure.matrix.MatrixUtil;
37 import ubic.basecode.math.DescriptiveWithMissing;
38 import ubic.basecode.math.SpecFunc;
39
40
41
42
43
44
45
46
47
48
49
50 public class ModeratedTstat {
51
52 private final static DoubleFunction digamma = new DoubleFunction() {
53 @Override
54 public double apply(double v) {
55 return Gamma.digamma(v);
56 }
57 };
58 private final static DoubleFunction trigammainverse = new DoubleFunction() {
59 @Override
60 public double apply(double v) {
61 return SpecFunc.trigammaInverse(v);
62 }
63 };
64 private final static DoubleFunction trigamma = new DoubleFunction() {
65 @Override
66 public double apply(double v) {
67 return Gamma.trigamma(v);
68 }
69 };
70 public static final double TOOSMALL = Math.pow(10, -15);
71
72
73
74
75
76
77 public static void ebayes(LeastSquaresFit fit) {
78 List<LinearModelSummary> summaries = fit.summarize();
79 DoubleMatrix1D sigmas = new DenseDoubleMatrix1D(new double[summaries.size()]);
80 DoubleMatrix1D dofs = new DenseDoubleMatrix1D(new double[summaries.size()]);
81 int i = 0;
82
83 for (LinearModelSummary lms : summaries) {
84 if (lms.getSigma() == null) {
85 sigmas.set(i, Double.NaN);
86 dofs.set(i, Double.NaN);
87 } else {
88 sigmas.set(i, lms.getSigma());
89 dofs.set(i, lms.getResidualDof());
90 }
91 i++;
92
93 }
94 squeezeVar(sigmas.copy().assign(Functions.square), dofs, fit);
95 }
96
97
98
99
100
101
102
103 private static final BooleanArrayList okVars(DoubleMatrix1D x) {
104 assert x.size() > 0;
105 BooleanArrayList answer = new BooleanArrayList(x.size());
106 for (int i = 0; i < x.size(); i++) {
107 double a = x.getQuick(i);
108 answer.add(!(Double.isNaN(a) || Double.isInfinite(a) || a < -1e-15));
109 }
110 return answer;
111 }
112
113 private static final BooleanArrayList okDfs(DoubleMatrix1D x) {
114 assert x.size() > 0;
115 BooleanArrayList answer = new BooleanArrayList(x.size());
116
117 for (int i = 0; i < x.size(); i++) {
118 double a = x.getQuick(i);
119 answer.add(!(Double.isNaN(a) || Double.isInfinite(a) || a < TOOSMALL));
120 }
121 return answer;
122 }
123
124
125
126
127
128
129 protected static double[] fitFDist(final DoubleMatrix1D vars, final DoubleMatrix1D df1s) {
130
131 BooleanArrayList ok = MatrixUtil.conjunction(okVars(vars), okDfs(df1s));
132
133 DoubleMatrix1D x = MatrixUtil.stripNonOK(vars, ok);
134 DoubleMatrix1D df1 = MatrixUtil.stripNonOK(df1s, ok);
135
136 if (x.size() == 0) {
137 throw new IllegalStateException("There were no valid values of variance to perform eBayes parameter estimation");
138 }
139
140
141 x = x.assign(Functions.max(1e-5 * DescriptiveWithMissing.median(new DoubleArrayList(vars.toArray()))));
142
143
144 DoubleMatrix1D z = x.copy().assign(Functions.log);
145
146
147 DoubleMatrix1D e1 = df1.copy().assign(Functions.div(2.0)).assign(digamma);
148 DoubleMatrix1D e2 = df1.copy().assign(Functions.div(2.0)).assign(Functions.log);
149 DoubleMatrix1D e = z.copy().assign(e1, Functions.minus).assign(e2, Functions.plus);
150
151 int n = x.size();
152 if (n < 2) {
153 throw new IllegalStateException("Too few valid variance values to do eBayes parameter estimation (require at least 2)");
154 }
155
156
157 double emean = e.zSum() / n;
158
159 double evar = e.copy().assign(Functions.minus(emean)).aggregate(Functions.plus, Functions.square)
160 / (n - 1);
161
162
163 evar = evar - df1.copy().assign(Functions.div(2.0)).assign(trigamma).zSum() / df1.size();
164 double df2;
165 double s20;
166 if (evar > 0.0) {
167
168 df2 = 2 * SpecFunc.trigammaInverse(evar);
169
170
171 s20 = Math.exp(emean + Gamma.digamma(df2 / 2.0) - Math.log(df2 / 2.0));
172
173 } else {
174 df2 = Double.POSITIVE_INFINITY;
175
176 s20 = x.copy().aggregate(Functions.plus, Functions.identity) / x.size();
177 }
178
179 return new double[]{s20, df2};
180 }
181
182
183
184
185
186 @Deprecated
187 private static double[] fitFDistNoMissing(DoubleMatrix1D x, double df1) {
188
189
190 x = x.copy().assign(Functions.max(1e-5));
191
192
193 DoubleMatrix1D e = x.copy().assign(Functions.log).assign(Functions.minus(Gamma.digamma(df1 / 2.0)))
194 .assign(Functions.plus(Math.log(df1 / 2.0)));
195
196 int nok = x.size();
197
198 double emean = e.copy().zSum() / nok;
199
200 double evar = e.copy().assign(Functions.minus(emean)).aggregate(Functions.plus, Functions.square)
201 / (nok - 1);
202
203 evar = evar - Gamma.trigamma(df1 / 2.0);
204
205 double df2;
206 double s20;
207 if (evar > 0) {
208
209 df2 = 2 * SpecFunc.trigammaInverse(evar);
210
211
212 s20 = Math.exp(emean + Gamma.digamma(df2 / 2.0) - Math.log(df2 / 2.0));
213
214 } else {
215 df2 = Double.POSITIVE_INFINITY;
216
217 s20 = x.copy().zSum() / x.size();
218 }
219
220 return new double[]{s20, df2};
221 }
222
223
224
225
226
227
228
229
230
231 protected static DoubleMatrix1D squeezeVar(DoubleMatrix1D var, DoubleMatrix1D df, LeastSquaresFit fit) {
232
233 double[] ffit = fitFDist(var, df);
234 double dfPrior = ffit[1];
235
236 DoubleMatrix1D varPost = squeezeVariances(var, df, ffit);
237
238 if (fit != null)
239 fit.ebayesUpdate(dfPrior, ffit[0], varPost);
240
241 return varPost;
242 }
243
244
245
246
247
248
249
250 protected static DoubleMatrix1D squeezeVariances(DoubleMatrix1D var, DoubleMatrix1D df, double[] fit) {
251 double varPrior = fit[0];
252 double dfPrior = fit[1];
253
254
255
256 DoubleMatrix1D dfTotal = df.copy().assign(Functions.plus(dfPrior));
257
258 return var.copy().assign(df, Functions.mult)
259 .assign(Functions.plus(dfPrior * varPrior)).assign(dfTotal, Functions.div);
260
261 }
262
263 }