1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.io.reader;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.text.DecimalFormat;
27 import java.text.NumberFormat;
28 import java.text.ParseException;
29 import java.util.*;
30
31 import org.apache.commons.lang3.StringUtils;
32
33 import ubic.basecode.dataStructure.matrix.DoubleMatrix;
34 import ubic.basecode.dataStructure.matrix.DoubleMatrixFactory;
35 import ubic.basecode.util.FileTools;
36 import cern.colt.list.DoubleArrayList;
37
38
39
40
41
42
43
44 public class DoubleMatrixReader extends AbstractMatrixReader<DoubleMatrix<String, String>, Double> {
45
46 private List<String> colNames;
47 private int numHeadings;
48
49
50
51
52
53
54 @Override
55 public DoubleMatrix<String, String> read( InputStream stream ) throws IOException {
56 return read( stream, null, 0 );
57 }
58
59
60
61
62
63
64
65
66 public DoubleMatrix<String, String> read( InputStream stream, Collection<String> wantedRowNames )
67 throws IOException {
68 return read( stream, wantedRowNames, true, 0, -1 );
69 }
70
71
72
73
74
75
76
77
78
79
80 @SuppressWarnings("resource")
81 public DoubleMatrix<String, String> read( InputStream stream, Collection<String> wantedRowNames,
82 boolean createEmptyRows, int skipColumns, int maxRows ) throws IOException {
83
84 NumberFormat nf = NumberFormat.getInstance( Locale.ENGLISH );
85 if ( nf instanceof DecimalFormat ) {
86
87 }
88
89 BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
90
91 List<DoubleArrayList> MTemp = new Vector<DoubleArrayList>();
92
93 List<String> rowNames = new Vector<String>();
94 String row;
95
96
97
98
99
100
101 Collection<String> wantedRowsFound = new HashSet<String>();
102
103 colNames = readHeader( dis, skipColumns );
104
105 numHeadings = colNames.size();
106
107 int rowNumber = 0;
108
109 while ( ( row = dis.readLine() ) != null ) {
110
111 if ( StringUtils.isBlank( row ) ) {
112 continue;
113 }
114
115 String rowName = parseRow( row, rowNames, MTemp, wantedRowNames, skipColumns, nf );
116
117 if ( rowName == null ) {
118
119 continue;
120 }
121
122 if ( wantedRowNames != null ) {
123
124
125 if ( wantedRowsFound.size() >= wantedRowNames.size() ) {
126 assert wantedRowsFound.containsAll( wantedRowNames );
127 log.info( "Found all rows needed" );
128 return createMatrix( MTemp, rowNames, colNames );
129 }
130
131 if ( wantedRowNames.contains( rowName ) ) {
132 wantedRowsFound.add( rowName );
133 }
134 }
135
136 if ( maxRows > 0 && ++rowNumber == maxRows ) break;
137
138 }
139 stream.close();
140
141
142
143
144 if ( wantedRowNames != null && wantedRowNames.size() != wantedRowsFound.size() && createEmptyRows ) {
145 Iterator<String> iterator = wantedRowNames.iterator();
146 while ( iterator.hasNext() ) {
147 String s = iterator.next();
148 if ( !wantedRowsFound.contains( s ) ) {
149 if ( log.isDebugEnabled() ) log.debug( s + " was not found, adding empty row" );
150 DoubleArrayList emptyRow = createEmptyRow( numHeadings );
151 rowNames.add( s );
152 MTemp.add( emptyRow );
153 }
154 }
155 }
156 assert rowNames.size() == MTemp.size();
157 return createMatrix( MTemp, rowNames, colNames );
158
159 }
160
161
162
163
164
165
166
167
168 public DoubleMatrix<String, String> read( InputStream stream, Collection<String> wantedRowNames,
169 int numberOfColumnsToSkip ) throws IOException {
170 return read( stream, wantedRowNames, true, numberOfColumnsToSkip, -1 );
171 }
172
173
174
175
176
177
178 @Override
179 public DoubleMatrix<String, String> read( String filename ) throws IOException {
180 return read( filename, null, -1 );
181 }
182
183
184
185
186
187
188
189
190
191 @SuppressWarnings("resource")
192 public DoubleMatrix<String, String> read( String filename, Collection<String> wantedRowNames ) throws IOException {
193 File infile = new File( filename );
194 if ( !infile.exists() || !infile.canRead() ) {
195 throw new IOException( "Could not read from file " + filename );
196 }
197 InputStream stream = FileTools.getInputStreamFromPlainOrCompressedFile( filename );
198 return read( stream, wantedRowNames, -1 );
199 }
200
201
202
203
204
205
206
207
208
209 @SuppressWarnings("resource")
210 public DoubleMatrix<String, String> read( String fileName, Collection<String> wantedRowNames,
211 int numberOfColumnsToSkip ) throws IOException {
212 File infile = new File( fileName );
213 if ( !infile.exists() || !infile.canRead() ) {
214 throw new IOException( "Could not read from file " + fileName );
215 }
216 InputStream stream = FileTools.getInputStreamFromPlainOrCompressedFile( fileName );
217 return read( stream, wantedRowNames, true, numberOfColumnsToSkip, -1 );
218 }
219
220 @Override
221 public DoubleMatrix<String, String> read( String filename, int maxRows ) throws IOException {
222 return read( filename, null, maxRows );
223 }
224
225 protected DoubleArrayList createEmptyRow( int numColumns ) {
226
227 DoubleArrayList row = new DoubleArrayList();
228 for ( int i = 0; i < numColumns; i++ ) {
229 row.add( Double.NaN );
230 }
231 return row;
232 }
233
234
235
236
237
238 protected DoubleMatrix<String, String> createMatrix( List<DoubleArrayList> MTemp, List<String> rowNames,
239 List<String> colNames1 ) {
240
241 if ( MTemp.isEmpty() ) {
242 throw new IllegalArgumentException( "Must provide vectors" );
243 }
244 DoubleMatrix<String, String> matrix = DoubleMatrixFactory.fastrow( MTemp.size(), MTemp.get( 0 ).size() );
245
246 for ( int i = 0; i < matrix.rows(); i++ ) {
247 for ( int j = 0; j < matrix.columns(); j++ ) {
248 if ( MTemp.get( i ).size() < j + 1 ) {
249 matrix.set( i, j, Double.NaN );
250
251
252 } else {
253 matrix.set( i, j, MTemp.get( i ).elements()[j] );
254 }
255 }
256 }
257
258 assert matrix.rows() == MTemp.size();
259 assert matrix.rows() == rowNames.size();
260 assert matrix.columns() == colNames.size() : "Got " + matrix.columns() + " != " + colNames.size();
261
262 matrix.setRowNames( rowNames );
263 matrix.setColumnNames( colNames1 );
264 return matrix;
265
266 }
267
268
269
270
271
272
273
274
275
276
277
278 private String parseRow( String row, Collection<String> rowNames, List<DoubleArrayList> MTemp,
279 Collection<String> wantedRowNames, int skipColumns, NumberFormat nf ) throws IOException {
280
281 if ( row.startsWith( "#" ) || row.startsWith( "!" ) ) {
282 return null;
283 }
284
285 String[] tokens = StringUtils.splitPreserveAllTokens( row, "\t" );
286
287 DoubleArrayList rowTemp = new DoubleArrayList();
288 int columnNumber = 0;
289 String previousToken = "";
290 String currentRowName = null;
291 for ( int i = 0; i < tokens.length; i++ ) {
292 String tok = tokens[i];
293 boolean missing = false;
294
295 if ( tok.compareTo( "\t" ) == 0 ) {
296
297 if ( previousToken.compareTo( "\t" ) == 0 ) {
298 missing = true;
299 } else if ( i == tokens.length - 1 ) {
300 missing = true;
301 } else {
302 previousToken = tok;
303 continue;
304 }
305 } else if ( StringUtils.isBlank( tok ) || tok.compareTo( "NaN" ) == 0 || tok.compareTo( "NA" ) == 0 ) {
306 missing = true;
307 }
308
309 if ( columnNumber > 0 ) {
310
311 if ( skipColumns > 0 && columnNumber <= skipColumns ) {
312
313 } else if ( missing ) {
314 rowTemp.add( Double.NaN );
315 } else {
316 try {
317
318
319
320
321
322
323
324
325
326 rowTemp.add( nf.parse( tok.toUpperCase() ).doubleValue() );
327 } catch ( ParseException e ) {
328 throw new RuntimeException( e );
329 }
330 }
331 } else {
332
333
334 if ( missing ) {
335 throw new IOException( "Missing values not allowed for row labels ("
336 + StringUtils.abbreviate( row, 20 ) + ")" );
337 }
338
339 currentRowName = tok;
340
341
342 if ( wantedRowNames != null && !wantedRowNames.contains( currentRowName ) ) {
343 return currentRowName;
344 }
345
346 rowNames.add( currentRowName );
347 }
348
349 columnNumber++;
350 previousToken = tok;
351 }
352
353
354 if ( rowTemp.size() > numHeadings ) {
355 throw new IOException( "Too many values (" + rowTemp.size() + ") in row (based on headings count of "
356 + numHeadings + ")" );
357 }
358
359 MTemp.add( rowTemp );
360 return currentRowName;
361
362 }
363
364 }