1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.io.reader;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.text.DecimalFormat;
27 import java.text.NumberFormat;
28 import java.text.ParseException;
29 import java.util.*;
30
31 import org.apache.commons.lang3.StringUtils;
32
33 import ubic.basecode.dataStructure.matrix.DoubleMatrix;
34 import ubic.basecode.dataStructure.matrix.DoubleMatrixFactory;
35 import ubic.basecode.util.FileTools;
36 import cern.colt.list.DoubleArrayList;
37
38
39
40
41
42
43
44 public class DoubleMatrixReader extends AbstractMatrixReader<DoubleMatrix<String, String>, Double> {
45
46 private static NumberFormat nf = NumberFormat.getInstance( Locale.ENGLISH );
47 static {
48 if ( nf instanceof DecimalFormat ) {
49
50 }
51 }
52 private List<String> colNames;
53 private int numHeadings;
54
55
56
57
58
59
60 @Override
61 public DoubleMatrix<String, String> read( InputStream stream ) throws IOException {
62 return read( stream, null, 0 );
63 }
64
65
66
67
68
69
70
71
72 public DoubleMatrix<String, String> read( InputStream stream, Collection<String> wantedRowNames )
73 throws IOException {
74 return read( stream, wantedRowNames, true, 0, -1 );
75 }
76
77
78
79
80
81
82
83
84
85
86 @SuppressWarnings("resource")
87 public DoubleMatrix<String, String> read( InputStream stream, Collection<String> wantedRowNames,
88 boolean createEmptyRows, int skipColumns, int maxRows ) throws IOException {
89
90 BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
91
92 List<DoubleArrayList> MTemp = new Vector<DoubleArrayList>();
93
94 List<String> rowNames = new Vector<String>();
95 String row;
96
97
98
99
100
101
102 Collection<String> wantedRowsFound = new HashSet<String>();
103
104 colNames = readHeader( dis, skipColumns );
105
106 numHeadings = colNames.size();
107
108 int rowNumber = 0;
109
110 while ( ( row = dis.readLine() ) != null ) {
111
112 if ( StringUtils.isBlank( row ) ) {
113 continue;
114 }
115
116 String rowName = parseRow( row, rowNames, MTemp, wantedRowNames, skipColumns );
117
118 if ( rowName == null ) {
119
120 continue;
121 }
122
123 if ( wantedRowNames != null ) {
124
125
126 if ( wantedRowsFound.size() >= wantedRowNames.size() ) {
127 assert wantedRowsFound.containsAll( wantedRowNames );
128 log.info( "Found all rows needed" );
129 return createMatrix( MTemp, rowNames, colNames );
130 }
131
132 if ( wantedRowNames.contains( rowName ) ) {
133 wantedRowsFound.add( rowName );
134 }
135 }
136
137 if ( maxRows > 0 && ++rowNumber == maxRows ) break;
138
139 }
140 stream.close();
141
142
143
144
145 if ( wantedRowNames != null && wantedRowNames.size() != wantedRowsFound.size() && createEmptyRows ) {
146 Iterator<String> iterator = wantedRowNames.iterator();
147 while ( iterator.hasNext() ) {
148 String s = iterator.next();
149 if ( !wantedRowsFound.contains( s ) ) {
150 if ( log.isDebugEnabled() ) log.debug( s + " was not found, adding empty row" );
151 DoubleArrayList emptyRow = createEmptyRow( numHeadings );
152 rowNames.add( s );
153 MTemp.add( emptyRow );
154 }
155 }
156 }
157 assert rowNames.size() == MTemp.size();
158 return createMatrix( MTemp, rowNames, colNames );
159
160 }
161
162
163
164
165
166
167
168
169 public DoubleMatrix<String, String> read( InputStream stream, Collection<String> wantedRowNames,
170 int numberOfColumnsToSkip ) throws IOException {
171 return read( stream, wantedRowNames, true, numberOfColumnsToSkip, -1 );
172 }
173
174
175
176
177
178
179 @Override
180 public DoubleMatrix<String, String> read( String filename ) throws IOException {
181 return read( filename, null, -1 );
182 }
183
184
185
186
187
188
189
190
191
192 @SuppressWarnings("resource")
193 public DoubleMatrix<String, String> read( String filename, Collection<String> wantedRowNames ) throws IOException {
194 File infile = new File( filename );
195 if ( !infile.exists() || !infile.canRead() ) {
196 throw new IOException( "Could not read from file " + filename );
197 }
198 InputStream stream = FileTools.getInputStreamFromPlainOrCompressedFile( filename );
199 return read( stream, wantedRowNames, -1 );
200 }
201
202
203
204
205
206
207
208
209
210 @SuppressWarnings("resource")
211 public DoubleMatrix<String, String> read( String fileName, Collection<String> wantedRowNames,
212 int numberOfColumnsToSkip ) throws IOException {
213 File infile = new File( fileName );
214 if ( !infile.exists() || !infile.canRead() ) {
215 throw new IOException( "Could not read from file " + fileName );
216 }
217 InputStream stream = FileTools.getInputStreamFromPlainOrCompressedFile( fileName );
218 return read( stream, wantedRowNames, true, numberOfColumnsToSkip, -1 );
219 }
220
221 @Override
222 public DoubleMatrix<String, String> read( String filename, int maxRows ) throws IOException {
223 return read( filename, null, maxRows );
224 }
225
226 protected DoubleArrayList createEmptyRow( int numColumns ) {
227
228 DoubleArrayList row = new DoubleArrayList();
229 for ( int i = 0; i < numColumns; i++ ) {
230 row.add( Double.NaN );
231 }
232 return row;
233 }
234
235
236
237
238
239 protected DoubleMatrix<String, String> createMatrix( List<DoubleArrayList> MTemp, List<String> rowNames,
240 List<String> colNames1 ) {
241
242 if ( MTemp.isEmpty() ) {
243 throw new IllegalArgumentException( "Must provide vectors" );
244 }
245 DoubleMatrix<String, String> matrix = DoubleMatrixFactory.fastrow( MTemp.size(), MTemp.get( 0 ).size() );
246
247 for ( int i = 0; i < matrix.rows(); i++ ) {
248 for ( int j = 0; j < matrix.columns(); j++ ) {
249 if ( MTemp.get( i ).size() < j + 1 ) {
250 matrix.set( i, j, Double.NaN );
251
252
253 } else {
254 matrix.set( i, j, MTemp.get( i ).elements()[j] );
255 }
256 }
257 }
258
259 assert matrix.rows() == MTemp.size();
260 assert matrix.rows() == rowNames.size();
261 assert matrix.columns() == colNames.size() : "Got " + matrix.columns() + " != " + colNames.size();
262
263 matrix.setRowNames( rowNames );
264 matrix.setColumnNames( colNames1 );
265 return matrix;
266
267 }
268
269
270
271
272
273
274
275
276
277
278
279 private String parseRow( String row, Collection<String> rowNames, List<DoubleArrayList> MTemp,
280 Collection<String> wantedRowNames, int skipColumns ) throws IOException {
281
282 if ( row.startsWith( "#" ) || row.startsWith( "!" ) ) {
283 return null;
284 }
285
286 String[] tokens = StringUtils.splitPreserveAllTokens( row, "\t" );
287
288 DoubleArrayList rowTemp = new DoubleArrayList();
289 int columnNumber = 0;
290 String previousToken = "";
291 String currentRowName = null;
292 for ( int i = 0; i < tokens.length; i++ ) {
293 String tok = tokens[i];
294 boolean missing = false;
295
296 if ( tok.compareTo( "\t" ) == 0 ) {
297
298 if ( previousToken.compareTo( "\t" ) == 0 ) {
299 missing = true;
300 } else if ( i == tokens.length - 1 ) {
301 missing = true;
302 } else {
303 previousToken = tok;
304 continue;
305 }
306 } else if ( StringUtils.isBlank( tok ) || tok.compareTo( "NaN" ) == 0 || tok.compareTo( "NA" ) == 0 ) {
307 missing = true;
308 }
309
310 if ( columnNumber > 0 ) {
311
312 if ( skipColumns > 0 && columnNumber <= skipColumns ) {
313
314 } else if ( missing ) {
315 rowTemp.add( Double.NaN );
316 } else {
317 try {
318
319
320
321
322
323
324
325
326
327 rowTemp.add( nf.parse( tok.toUpperCase() ).doubleValue() );
328 } catch ( ParseException e ) {
329 throw new RuntimeException( e );
330 }
331 }
332 } else {
333
334
335 if ( missing ) {
336 throw new IOException( "Missing values not allowed for row labels ("
337 + StringUtils.abbreviate( row, 20 ) + ")" );
338 }
339
340 currentRowName = tok;
341
342
343 if ( wantedRowNames != null && !wantedRowNames.contains( currentRowName ) ) {
344 return currentRowName;
345 }
346
347 rowNames.add( currentRowName );
348 }
349
350 columnNumber++;
351 previousToken = tok;
352 }
353
354
355 if ( rowTemp.size() > numHeadings ) {
356 throw new IOException( "Too many values (" + rowTemp.size() + ") in row (based on headings count of "
357 + numHeadings + ")" );
358 }
359
360 MTemp.add( rowTemp );
361 return currentRowName;
362
363 }
364
365 }