1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.io.reader;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.util.List;
27 import java.util.StringTokenizer;
28 import java.util.Vector;
29
30 import org.apache.commons.lang3.StringUtils;
31
32 import ubic.basecode.dataStructure.matrix.StringMatrix;
33 import ubic.basecode.util.FileTools;
34
35
36
37
38
39
40
41 public class StringMatrixReader extends AbstractMatrixReader<StringMatrix<String, String>, String> {
42
43 @Override
44 public StringMatrix<String, String> read( InputStream stream ) throws IOException {
45 return this.read( stream, -1, -1 );
46 }
47
48
49
50
51
52
53
54
55
56 @SuppressWarnings("resource")
57 public StringMatrix<String, String> read( InputStream stream, int maxRows, int numColumnsToSkip )
58 throws IOException {
59 StringMatrix<String, String> matrix = null;
60 List<List<String>> MTemp = new Vector<List<String>>();
61 List<String> rowNames = new Vector<String>();
62 List<String> columnNames;
63 BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
64
65 int columnNumber = 0;
66 int rowNumber = 0;
67 String row;
68
69 columnNames = readHeader( dis, -1 );
70 int numHeadings = columnNames.size();
71
72 while ( ( row = dis.readLine() ) != null ) {
73 StringTokenizer st = new StringTokenizer( row, "\t", true );
74 List<String> rowTemp = new Vector<String>();
75 columnNumber = 0;
76 String previousToken = "";
77
78 String rowName = st.nextToken();
79 if ( StringUtils.isBlank( rowName ) ) {
80 throw new IOException( "Missing values not allowed for row labels" );
81 }
82 rowNames.add( rowName );
83
84 while ( st.hasMoreTokens() ) {
85 String s = st.nextToken();
86
87 boolean missing = false;
88
89 if ( s.compareTo( "\t" ) == 0 ) {
90
91 if ( previousToken.compareTo( "\t" ) == 0 ) {
92 missing = true;
93 } else if ( !st.hasMoreTokens() ) {
94 missing = true;
95 } else {
96 previousToken = s;
97 continue;
98 }
99 }
100
101 if ( numColumnsToSkip >= 0 && columnNumber <= numColumnsToSkip ) {
102
103 } else if ( missing ) {
104 rowTemp.add( "" );
105 } else {
106 rowTemp.add( s );
107 }
108 columnNumber++;
109
110 previousToken = s;
111 }
112 MTemp.add( rowTemp );
113 if ( rowTemp.size() > numHeadings ) {
114 throw new IOException( "Warning: too many values (" + rowTemp.size() + ") in row " + rowNumber
115 + " (based on headings count of " + numHeadings + ")" );
116 }
117 rowNumber++;
118
119 if ( maxRows > 0 && rowNumber == maxRows ) break;
120 }
121
122 matrix = new StringMatrix<String, String>( rowNumber, numHeadings );
123 matrix.setColumnNames( columnNames );
124 matrix.setRowNames( rowNames );
125
126 for ( int i = 0; i < matrix.rows(); i++ ) {
127 for ( int j = 0; j < matrix.columns(); j++ ) {
128 if ( MTemp.get( i ).size() < j + 1 ) {
129 matrix.set( i, j, "" );
130
131 } else {
132 matrix.set( i, j, MTemp.get( i ).get( j ) );
133 }
134 }
135 }
136 stream.close();
137 return matrix;
138 }
139
140 @Override
141 public StringMatrix<String, String> read( String filename ) throws IOException {
142 return this.read( filename, -1 );
143 }
144
145
146
147
148
149
150 @Override
151 public StringMatrix<String, String> read( String filename, int maxRows ) throws IOException {
152 return read( filename, maxRows, -1 );
153 }
154
155
156
157
158
159
160
161
162 @SuppressWarnings("resource")
163 public StringMatrix<String, String> read( String filename, int maxRows, int numColumnsToSkip ) throws IOException {
164 File infile = new File( filename );
165 if ( !infile.exists() || !infile.canRead() ) {
166 throw new IllegalArgumentException( "Could not read from " + filename );
167 }
168 InputStream stream = FileTools.getInputStreamFromPlainOrCompressedFile( filename );
169 return read( stream, maxRows, numColumnsToSkip );
170 }
171
172 }