View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.io.reader;
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.util.List;
25  import java.util.Vector;
26  
27  import org.apache.commons.lang3.StringUtils;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  
31  import ubic.basecode.dataStructure.matrix.Matrix2D;
32  
33  /**
34   * Abstract class representing an object that can read in a {@link Matrix2D}from a file.
35   * 
36   * @author Paul Pavlidis
37   * 
38   */
39  public abstract class AbstractMatrixReader<M extends Matrix2D<String, String, V>, V> {
40  
41      protected static Logger log = LoggerFactory.getLogger( AbstractMatrixReader.class );
42  
43      public abstract M read( InputStream stream ) throws IOException;
44  
45      public abstract M read( String filename ) throws IOException;
46  
47      public abstract M read( String filename, int maxRows ) throws IOException;
48  
49      /**
50       * @param dis
51       * @param skipColumns how many data columns shoul be ignored
52       * @return
53       * @throws IOException
54       */
55      protected List<String> readHeader( BufferedReader dis, int skipColumns ) throws IOException {
56          List<String> headerVec = new Vector<String>();
57          String header = null;
58  
59          /*
60           * Read past comments.
61           */
62          while ( ( header = dis.readLine() ) != null ) {
63              if ( header.startsWith( "#" ) || header.startsWith( "!" ) || StringUtils.isBlank( header ) ) {
64                  continue;
65              }
66              break;
67          }
68  
69          if ( header == null ) return headerVec;
70  
71          if ( header.startsWith( "\t" ) ) header = "c" + header;
72  
73          String[] tokens = StringUtils.splitPreserveAllTokens( header, "\t" );
74          // delims.
75  
76          String previousToken = "";
77          int columnNumber = 0;
78  
79          for ( int i = 0; i < tokens.length; i++ ) {
80              String s = StringUtils.strip( tokens[i], " " );
81              boolean missing = false;
82  
83              if ( s.compareTo( "\t" ) == 0 ) {
84  
85                  if ( previousToken.compareTo( "\t" ) == 0 ) { /* two tabs in a row */
86                      missing = true;
87                  } else if ( i == tokens.length - 1 ) { // at end of line.
88                      missing = true;
89                  } else {
90                      previousToken = s;
91                      continue;
92                  }
93              } else if ( StringUtils.isBlank( s ) ) {
94                  missing = true;
95              }
96  
97              if ( missing ) {
98                  throw new IOException( "Missing values are not allowed in the header (column " + columnNumber + " at '"
99                          + header + "')" );
100             }
101             if ( columnNumber > 0 ) {
102 
103                 if ( skipColumns > 0 && columnNumber <= skipColumns ) {
104 
105                     // ignore, but count it.
106                 } else {
107                     headerVec.add( s );
108                 }
109             } else {
110                 // corner string.
111             }
112             columnNumber++;
113 
114             previousToken = s;
115         }
116 
117         return headerVec;
118 
119     }
120 }