View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.io.reader;
20  
21  import java.io.BufferedReader;
22  import java.io.File;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.InputStreamReader;
26  import java.util.List;
27  import java.util.StringTokenizer;
28  import java.util.Vector;
29  
30  import org.apache.commons.lang3.StringUtils;
31  
32  import ubic.basecode.dataStructure.matrix.StringMatrix;
33  import ubic.basecode.util.FileTools;
34  
35  /**
36   * Reader for {@link basecode.dataStructure.matrix.StringMatrix}
37   * 
38   * @author Paul Pavlidis
39   * 
40   */
41  public class StringMatrixReader extends AbstractMatrixReader<StringMatrix<String, String>, String> {
42  
43      @Override
44      public StringMatrix<String, String> read( InputStream stream ) throws IOException {
45          return this.read( stream, -1, -1 );
46      }
47  
48      /**
49       * @param stream
50       * @param maxRows
51       * @param numColumnsToSkip How many data columns to skip. 0 or -1 means none; 1 means one column will be skipped,
52       *        etc.
53       * @return
54       * @throws IOException
55       */
56      @SuppressWarnings("resource")
57      public StringMatrix<String, String> read( InputStream stream, int maxRows, int numColumnsToSkip )
58              throws IOException {
59          StringMatrix<String, String> matrix = null;
60          List<List<String>> MTemp = new Vector<List<String>>();
61          List<String> rowNames = new Vector<String>();
62          List<String> columnNames;
63          BufferedReader dis = new BufferedReader( new InputStreamReader( stream ) );
64          // BufferedReader dis = new BufferedReader( new FileReader( filename ) );
65          int columnNumber = 0;
66          int rowNumber = 0;
67          String row;
68  
69          columnNames = readHeader( dis, -1 );
70          int numHeadings = columnNames.size();
71  
72          while ( ( row = dis.readLine() ) != null ) {
73              StringTokenizer st = new StringTokenizer( row, "\t", true );
74              List<String> rowTemp = new Vector<String>();
75              columnNumber = 0;
76              String previousToken = "";
77  
78              String rowName = st.nextToken();
79              if ( StringUtils.isBlank( rowName ) ) {
80                  throw new IOException( "Missing values not allowed for row labels" );
81              }
82              rowNames.add( rowName );
83  
84              while ( st.hasMoreTokens() ) {
85                  String s = st.nextToken();
86  
87                  boolean missing = false;
88  
89                  if ( s.compareTo( "\t" ) == 0 ) {
90                      /* two tabs in a row */
91                      if ( previousToken.compareTo( "\t" ) == 0 ) {
92                          missing = true;
93                      } else if ( !st.hasMoreTokens() ) { // at end of line.
94                          missing = true;
95                      } else {
96                          previousToken = s;
97                          continue;
98                      }
99                  }
100 
101                 if ( numColumnsToSkip >= 0 && columnNumber <= numColumnsToSkip ) {
102                     // do nothing.
103                 } else if ( missing ) {
104                     rowTemp.add( "" );
105                 } else {
106                     rowTemp.add( s );
107                 }
108                 columnNumber++;
109 
110                 previousToken = s;
111             }
112             MTemp.add( rowTemp );
113             if ( rowTemp.size() > numHeadings ) {
114                 throw new IOException( "Warning: too many values (" + rowTemp.size() + ") in row " + rowNumber
115                         + " (based on headings count of " + numHeadings + ")" );
116             }
117             rowNumber++;
118 
119             if ( maxRows > 0 && rowNumber == maxRows ) break;
120         }
121 
122         matrix = new StringMatrix<String, String>( rowNumber, numHeadings );
123         matrix.setColumnNames( columnNames );
124         matrix.setRowNames( rowNames );
125 
126         for ( int i = 0; i < matrix.rows(); i++ ) {
127             for ( int j = 0; j < matrix.columns(); j++ ) {
128                 if ( MTemp.get( i ).size() < j + 1 ) {
129                     matrix.set( i, j, "" );
130                     // this allows the input file to have ragged ends.
131                 } else {
132                     matrix.set( i, j, MTemp.get( i ).get( j ) );
133                 }
134             }
135         }
136         stream.close();
137         return matrix;
138     }
139 
140     @Override
141     public StringMatrix<String, String> read( String filename ) throws IOException {
142         return this.read( filename, -1 );
143     }
144 
145     /*
146      * (non-Javadoc)
147      * 
148      * @see ubic.basecode.io.reader.AbstractMatrixReader#read(java.lang.String, int)
149      */
150     @Override
151     public StringMatrix<String, String> read( String filename, int maxRows ) throws IOException {
152         return read( filename, maxRows, -1 );
153     }
154 
155     /**
156      * @param filename
157      * @param maxRows
158      * @param numColumnsToSkip
159      * @return
160      * @throws IOException
161      */
162     @SuppressWarnings("resource")
163     public StringMatrix<String, String> read( String filename, int maxRows, int numColumnsToSkip ) throws IOException {
164         File infile = new File( filename );
165         if ( !infile.exists() || !infile.canRead() ) {
166             throw new IllegalArgumentException( "Could not read from " + filename );
167         }
168         InputStream stream = FileTools.getInputStreamFromPlainOrCompressedFile( filename );
169         return read( stream, maxRows, numColumnsToSkip );
170     }
171 
172 }