View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2017 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  
20  package ubic.basecode.math.linearmodels;
21  
22  import static org.junit.Assert.*;
23  
24  import java.util.Map;
25  import java.util.zip.GZIPInputStream;
26  
27  import org.apache.commons.lang3.ArrayUtils;
28  import org.junit.Test;
29  
30  import cern.colt.matrix.DoubleMatrix1D;
31  import cern.colt.matrix.impl.DenseDoubleMatrix1D;
32  import ubic.basecode.dataStructure.matrix.DoubleMatrix;
33  import ubic.basecode.dataStructure.matrix.StringMatrix;
34  import ubic.basecode.io.reader.DoubleMatrixReader;
35  import ubic.basecode.io.reader.StringMatrixReader;
36  import ubic.basecode.util.RegressionTesting;
37  
38  /**
39   * TODO Document Me
40   *
41   * @author paul
42   */
43  public class ModeratedTstatTest {
44  
45      @Test
46      public void testLimma() throws Exception {
47  
48          DoubleMatrixReader f = new DoubleMatrixReader();
49          DoubleMatrix<String, String> testMatrix = f.read(this.getClass().getResourceAsStream(
50                  "/data/limmatest.data.txt"));
51  
52          StringMatrixReader of = new StringMatrixReader();
53          StringMatrix<String, String> sampleInfo = of.read(this.getClass()
54                  .getResourceAsStream("/data/limmatest.design.txt"));
55  
56          DesignMatrix d = new DesignMatrix(sampleInfo, true);
57  
58          LeastSquaresFit fit = new LeastSquaresFit(d, testMatrix);
59          Map<String, LinearModelSummary> sums = fit.summarizeByKeys(true);
60          assertEquals(100, sums.size());
61  
62          for (LinearModelSummary lms : sums.values()) {
63              GenericAnovaResult a = lms.getAnova();
64              assertNotNull(a);
65          }
66  
67          LinearModelSummary s = sums.get("Gene 1");
68          assertNotNull(s);
69          assertEquals(0.2264, s.getContrastCoefficients().get(0, 0), 0.001);
70          //
71          f = new DoubleMatrixReader();
72  
73          DoubleMatrix<String, String> expectedEffects = f.read(this.getClass().getResourceAsStream(
74                  "/data/limmatest.fit.effects.txt"));
75          double[] expEffects1 = expectedEffects.getRowByName("Gene 1");
76  
77          Double[] effects = ArrayUtils.subarray(s.getEffects(), 0, 2);
78          assertArrayEquals(ArrayUtils.subarray(expEffects1, 0, 2), ArrayUtils.toPrimitive(effects), 1e-7);
79  
80          Double[] stdevUnscaled = s.getStdevUnscaled(); //
81          assertEquals(0.5773502692, stdevUnscaled[0], 1e-8);
82          assertEquals(0.8164965809, stdevUnscaled[1], 1e-8);
83  
84          Double sigma = s.getSigma();
85          assertEquals(0.3069360050, sigma, 0.0001);
86  
87          ModeratedTstat.ebayes(fit);
88          DoubleMatrix1D squeezedVars = fit.getVarPost();
89          DoubleMatrix<String, String> expectedvars = f.read(this.getClass().getResourceAsStream(
90                  "/data/limmatest.fit.squeezevar.txt"));
91  
92          assertArrayEquals(expectedvars.viewColumn(0).toArray(), squeezedVars.toArray(), 1e-7);
93  
94          sums = fit.summarizeByKeys(true);
95  
96      }
97  
98  
99      @Test
100     public void testLimmaMissing() throws Exception {
101 
102         DoubleMatrixReader f = new DoubleMatrixReader();
103         DoubleMatrix<String, String> testMatrix = f.read(this.getClass().getResourceAsStream(
104                 "/data/limmatest.data.missing.txt"));
105 
106         StringMatrixReader of = new StringMatrixReader();
107         StringMatrix<String, String> sampleInfo = of.read(this.getClass()
108                 .getResourceAsStream("/data/limmatest.design.txt"));
109 
110         DesignMatrix d = new DesignMatrix(sampleInfo, true);
111 
112         LeastSquaresFit fit = new LeastSquaresFit(d, testMatrix);
113         Map<String, LinearModelSummary> sums = fit.summarizeByKeys(true);
114         assertEquals(100, sums.size());
115 
116         for (LinearModelSummary lms : sums.values()) {
117             GenericAnovaResult a = lms.getAnova();
118             assertNotNull(a);
119         }
120 
121         LinearModelSummary s = sums.get("Gene 4"); // has missing
122         assertNotNull(s);
123         assertEquals(-0.2289641839, s.getContrastCoefficients().get(0, 0), 0.001);
124         //
125         f = new DoubleMatrixReader();
126 
127 
128         Double[] stdevUnscaled = s.getStdevUnscaled(); //
129         assertEquals(0.70710678118654757274, stdevUnscaled[0], 1e-8);
130         assertEquals(1.00, stdevUnscaled[1], 1e-8);
131 
132         Double sigma = s.getSigma();
133         assertEquals(0.054738603786, sigma, 0.0001);
134 
135         ModeratedTstat.ebayes(fit);
136         DoubleMatrix1D squeezedVars = fit.getVarPost();
137         DoubleMatrix<String, String> expectedvars = f.read(this.getClass().getResourceAsStream(
138                 "/data/limmatest.fit.squeezevar.missing.txt"));
139 
140         assertArrayEquals(expectedvars.viewColumn(0).toArray(), squeezedVars.toArray(), 1e-7);
141 
142         sums = fit.summarizeByKeys(true);
143 
144     }
145 
146     @Test
147     public void testFdist() {
148         double[] x = new double[]{0.30232520254346584299,
149                 1.2587139907883573287,
150                 2.0240947459164679856,
151                 1.3173359748527122548,
152                 1.2939746589607614702,
153                 0.94840671150632327446,
154                 0.61447313184876728442,
155                 0.8902037123242685368,
156                 1.6434385371581794466,
157                 1.5338456384344794081,
158                 1.8693396370149581998,
159                 2.4611664510167110542,
160                 0.63397720519047617849,
161                 1.7349622569136011752,
162                 1.0498928059690204595,
163                 0.47002356809001444304,
164                 0.96196714542170125295,
165                 1.5599374206292948575,
166                 0.71751554483321655642,
167                 1.063604497013816097,
168                 1.3053827692251576131,
169                 2.3189475705362436742,
170                 1.3580619393152200125,
171                 0.95174911537147166563,
172                 0.43469878517674137575,
173                 2.2143189808841414745,
174                 2.9474034257347128118,
175                 1.1991277120337802131,
176                 1.1784342356369026383,
177                 1.5785961856334371767,
178                 1.7796216292386706215,
179                 0.74972769329538446748,
180                 1.0536395435846341861,
181                 0.43047612065723450669,
182                 1.6601774958162751616,
183                 1.2771936358386075661,
184                 0.46603761724121628429,
185                 0.79398642097171134857,
186                 1.1773795371506889929,
187                 1.2395685899737831637,
188                 0.46982243224851727437,
189                 1.4858828634002363422,
190                 1.6126603974023236976,
191                 0.4268711755609173597,
192                 2.0034615521916472325,
193                 2.4145616290101932222,
194                 3.0194248816641504618,
195                 3.0159052575760272319,
196                 8.0343270926054497494,
197                 0.13498271661933478049,
198                 1.6427943654309600241,
199                 1.1641007130379361634,
200                 1.4911776412456962948,
201                 0.76670333277130309213,
202                 1.4521154624148258083,
203                 1.6509375299627260247,
204                 0.24270003666993020253,
205                 1.4791928651055363808,
206                 1.1217945174234764671,
207                 1.3626624361552859277,
208                 0.33959672095353571342,
209                 0.34115286592320381853,
210                 1.2846832678451005627,
211                 0.74447709249622817662,
212                 2.7931665832011107753,
213                 0.43258102179980667534,
214                 1.8698561980559311735,
215                 1.3895517560704246929,
216                 0.87525496922730172678,
217                 1.2515572943043009602,
218                 0.89447243726299607847,
219                 1.047010435680864715,
220                 2.4232153061673851191,
221                 1.7072766276669044672,
222                 1.2815027503208382686,
223                 0.88618459460442955411,
224                 0.75360919523644709361,
225                 0.464584291292655438,
226                 0.41517499402172436396,
227                 0.46995735844141434123,
228                 0.49873665629095587093,
229                 0.47448118961970647822,
230                 0.65749687756215469125,
231                 1.1703813632124464572,
232                 0.96751033642495487541,
233                 3.1811230382103570236,
234                 1.7676684213405762236,
235                 2.3023718075763692781,
236                 1.0511880790473360214,
237                 1.1018508289489394869,
238                 0.66448593865758720511,
239                 1.2717064171054943689,
240                 0.28427148223900100543,
241                 0.60061815966622811302,
242                 0.62610631792546678209,
243                 0.58023360243084076693,
244                 0.73838780576776485987,
245                 1.6227135882232157638,
246                 0.59470012513348813332,
247                 0.91864958973763821692};
248 
249         // x was generated with x <- rf(100,df1=8,df2=16).
250         // fitFDist(x, df1 = 8)
251         DenseDoubleMatrix1D dfs = new DenseDoubleMatrix1D(x.length);
252         dfs.assign(8);
253         double[] expected = new double[]{1.1056298866365792399, 14.544682227013733922};
254         //   double[] actualold = ModeratedTstat.fitFDistNoMissing(new DenseDoubleMatrix1D(x), 8);
255         double[] actual = ModeratedTstat.fitFDist(new DenseDoubleMatrix1D(x), dfs);
256         assertTrue(RegressionTesting.closeEnough(actual, expected, 1e-10));
257 
258     }
259 
260     @Test
261     public void testSqueezeVar() {
262 
263         double[] s2 = new double[]{1.7837804639416141583,
264                 0.32411186620655069168,
265                 0.18750362204593082338,
266                 0.7340608406949435949,
267                 0.84846200919003345042,
268                 0.49854708464318148176,
269                 0.87067912044480400002,
270                 2.1014900506235241195,
271                 0.49783053618211009494,
272                 1.9627067036621308471,
273                 1.0949289573268001785,
274                 0.45883145861230423268,
275                 2.2923386473988114354,
276                 2.7849256010365417424,
277                 0.73148557590083596036,
278                 1.4929731181234273674,
279                 1.001362671921577352,
280                 1.6273398718171947497,
281                 0.56578600627572539494,
282                 0.48078128591210089748};
283 
284         // s2 <- rchisq(20,df=5)/5
285 
286         //    squeezeVar(s2, df=5)$var.post
287         double[] expected = new double[]{1.1346316090723296277,
288                 1.0235740856472144156,
289                 1.0131803748422600897,
290                 1.0547646708992308717,
291                 1.0634687770708084464,
292                 1.0368458264983575479,
293                 1.0651591452526298909,
294                 1.1588042465600849606,
295                 1.0367913085772184623,
296                 1.148245045087848748,
297                 1.0822209848724206882,
298                 1.0338241001454639978,
299                 1.1733247839888614195,
300                 1.2108028027853292574,
301                 1.0545687342800276198,
302                 1.1125058034815864527,
303                 1.0751020813423211031,
304                 1.1227289725756237626,
305                 1.0419616371185715931,
306                 1.0354941322769402046};
307 
308         DenseDoubleMatrix1D dfs = new DenseDoubleMatrix1D(s2.length);
309         dfs.assign(5);
310         double[] actual = ModeratedTstat.squeezeVar(new DenseDoubleMatrix1D(s2), dfs, null).toArray();
311         assertTrue(RegressionTesting.closeEnough(actual, expected, 1e-10));
312 
313     }
314 
315 
316     @Test
317     public void testSqueezeVarWMissing() {
318 
319         double[] s2 = new double[]{1.7837804639416141583,
320                 0.32411186620655069168,
321                 0.18750362204593082338,
322                 0.7340608406949435949,
323                 0.84846200919003345042,
324                 0.49854708464318148176,
325                 0.87067912044480400002,
326                 2.1014900506235241195,
327                 0.49783053618211009494,
328                 Double.NaN,
329                 1.0949289573268001785,
330                 0.45883145861230423268,
331                 2.2923386473988114354,
332                 2.7849256010365417424,
333                 0.73148557590083596036,
334                 1.4929731181234273674,
335                 1.001362671921577352,
336                 1.6273398718171947497,
337                 0.56578600627572539494,
338                 0.48078128591210089748};
339 
340         // s2 <- rchisq(20,df=5)/5
341 
342         //    squeezeVar(s2, df=5)$var.post
343         double[] expected = new double[]{1.08541236147134,
344                 0.997928715983356, 0.989741249854792, 1.02249856067821, 1.02935506994584,
345                 1.00838330056098, 1.03068662827887, 1.10445393905446, 1.00834035501044, Double.NaN, 1.04412679768511,
346                 1.00600298782876, 1.11589224156236, 1.1454149034124,
347                 1.02234421499136, 1.06798314035476, 1.03851900441291, 1.07603626519677, 1.01241319199814, 1.00731852678945};
348 
349         DenseDoubleMatrix1D dfs = new DenseDoubleMatrix1D(s2.length);
350         dfs.assign(5);
351         double[] actual = ModeratedTstat.squeezeVar(new DenseDoubleMatrix1D(s2), dfs, null).toArray();
352         assertTrue(RegressionTesting.closeEnough(actual, expected, 1e-10));
353 
354     }
355 
356     /**
357      * Test based on GSE112792; see test-squeezevar.R
358      *
359      * @throws Exception
360      */
361     @Test
362     public void testSqueezeVarB() throws Exception {
363         DoubleMatrixReader f = new DoubleMatrixReader();
364 
365         // variances from limma
366         DoubleMatrix<String, String> testMatrix = f.read(new GZIPInputStream(this.getClass().getResourceAsStream(
367                 "/data/test-vars.gz")));
368         DoubleMatrix1D vars = new DenseDoubleMatrix1D(testMatrix.getColumn(0));
369         DoubleMatrix1D df1s = new DenseDoubleMatrix1D(vars.size());
370 
371         df1s.assign(a -> 6);
372         double[] actual = ModeratedTstat.fitFDist(vars, df1s);
373         double[] expected = new double[]{0.031699809959242764013,1.5383043501108832896};
374         assertTrue(RegressionTesting.closeEnough(expected, actual, 1e-10));
375 
376         DoubleMatrix1D squeezedActual =        ModeratedTstat.squeezeVariances(vars, df1s, actual);
377 
378         DoubleMatrix<String, String> testSqueezed = f.read(new GZIPInputStream(this.getClass().getResourceAsStream(
379                 "/data/test-var.post.gz")));
380 
381         assertTrue(RegressionTesting.closeEnough(testSqueezed.getColumn(0), squeezedActual.toArray(), 1e-10));
382     }
383 
384 }