View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.datafilter;
20  
21  import java.util.List;
22  import java.util.Vector;
23  
24  import ubic.basecode.dataStructure.matrix.Matrix2D;
25  import ubic.basecode.dataStructure.matrix.MatrixUtil;
26  import ubic.basecode.dataStructure.matrix.StringMatrix;
27  
28  /**
29   * Filter a data matrix according to flags given in a separate matrix.
30   * <p>
31   * The flags can be 'A', 'P' or 'M', for absent, present and marginal, following the Affymetrix convention. By default,
32   * Marginal flags are counted as "absent", but this can be changed by the user.
33   * 
34   * @author Paul Pavlidis
35   * 
36   */
37  public class RowAbsentFilter<M extends Matrix2D<R, C, V>, R, C, V> extends AbstractFilter<M, R, C, V> {
38  
39      private boolean countIsSet = false;
40  
41      private StringMatrix<R, C> flags = null;
42      private boolean flagsSet = false;
43      private boolean fractionIsSet = false;
44      private boolean keepMarginal = false;
45      private int minPresentCount = 0;
46      private double minPresentFraction = 0.0;
47  
48      /**
49       * The data is going to be filtered in accordance to strings in 'flags'. These are either 'A', 'P' or 'M' for
50       * absent, present and marginal.
51       * 
52       * @param data The input matrix
53       * @return Matrix after filtering.
54       */
55      @Override
56      public M filter( M data ) {
57  
58          int numRows = data.rows();
59          int numCols = data.columns();
60  
61          if ( minPresentCount > numCols ) {
62              throw new IllegalStateException( "Minimum present count is set to " + minPresentCount
63                      + " but there are only " + numCols + " columns in the matrix." );
64          }
65  
66          if ( flags == null ) {
67              throw new IllegalStateException( "Flag matrix is null" );
68          }
69  
70          // no filtering requested.
71          if ( !fractionIsSet && !countIsSet ) {
72              log.info( "No filtering was requested" );
73              return data;
74          }
75  
76          if ( !flagsSet ) {
77              log.info( "No flag matrix was provided." );
78              return data;
79          }
80  
81          validateFlags( data );
82  
83          // nothing will happen.
84          if ( minPresentFraction == 0.0 && minPresentCount == 0 ) {
85              log.info( "Criteria are set too low to result in any changes to the input." );
86              return data;
87          }
88  
89          List<V[]> MTemp = new Vector<V[]>();
90          List<R> rowNames = new Vector<R>();
91  
92          int kept = 0;
93          for ( int i = 0; i < numRows; i++ ) {
94              R rowName = data.getRowName( i );
95  
96              if ( !flags.containsRowName( rowName ) ) {
97                  log.debug( "Row " + rowName + " not found in flags, skipping." );
98                  continue;
99              }
100 
101             int numPresent = 0;
102             for ( int j = 0; j < numCols; j++ ) {
103                 C colName = data.getColName( j );
104 
105                 if ( !flags.containsColumnName( colName ) ) {
106                     log.debug( "Column " + colName + " not found in flags, skipping." );
107                     continue;
108                 }
109 
110                 // count missing values in the data as "absent", whatever the
111                 // flag really is.
112                 if ( data.isMissing( i, j ) ) {
113                     // log.debug( "Found missing data, counting as absent." );
114                     continue;
115                 }
116 
117                 String flag = flags.get( flags.getRowIndexByName( rowName ), flags.getColIndexByName( colName ) );
118 
119                 if ( flags.isMissing( flags.getRowIndexByName( rowName ), flags.getColIndexByName( colName ) ) ) {
120                     log.warn( "Flags had no value for an item, counting as present." );
121                 } else if ( flag.equals( "A" ) ) {
122                     continue;
123                 } else if ( flag.equals( "M" ) && !keepMarginal ) {
124                     continue;
125                 } else if ( !flag.equals( "P" ) && !flag.equals( "M" ) ) {
126                     log.warn( "Found a flag I don't know about, ignoring " + flag + " and counting as present." );
127                 }
128 
129                 numPresent++;
130             }
131 
132             /* decide whether this row is a keeper */
133             if ( countIsSet && numPresent >= minPresentCount || fractionIsSet
134                     && ( double ) numPresent / numCols >= minPresentFraction ) {
135                 MTemp.add( MatrixUtil.getRow( data, i ) );
136                 rowNames.add( rowName );
137                 kept++;
138             }
139         }
140 
141         M returnval = getOutputMatrix( data, MTemp.size(), numCols );
142         for ( int i = 0; i < MTemp.size(); i++ ) {
143             for ( int j = 0; j < numCols; j++ ) {
144                 returnval.set( i, j, MTemp.get( i )[j] );
145             }
146         }
147         returnval.setColumnNames( data.getColNames() );
148         returnval.setRowNames( rowNames );
149 
150         log.info( "There are " + kept + " rows left after filtering." );
151 
152         return returnval;
153     }
154 
155     /**
156      * @param f the matrix containing the flags.
157      */
158     public void setFlagMatrix( StringMatrix<R, C> f ) {
159         if ( f == null ) {
160             throw new IllegalArgumentException( "Flag matrix is null" );
161         }
162         flags = f;
163         flagsSet = true;
164     }
165 
166     /**
167      * @param k whether to count 'marginal' as 'present'. Default is false.
168      */
169     public void setKeepMarginal( boolean k ) {
170         keepMarginal = k;
171     }
172 
173     /**
174      * @param k the minimum number of present values there must be in order to keep the row.
175      */
176     public void setMinPresentCount( int k ) {
177         if ( k < 0 ) {
178             throw new IllegalArgumentException( "Minimum present count must be > 0." );
179         }
180         minPresentCount = k;
181         countIsSet = true;
182     }
183 
184     /**
185      * @param k the minimum fraction of present values that there must be, in order to keep the row.
186      */
187     public void setMinPresentFraction( double k ) {
188         if ( k < 0.0 || k > 1.0 )
189             throw new IllegalArgumentException( "Min present fraction must be between 0 and 1, got " + k );
190         minPresentFraction = k;
191         fractionIsSet = true;
192     }
193 
194     /**
195      * @param data NamedMatrix
196      * @todo this should check more carefully - actually test that the rows are all the same.
197      */
198     private void validateFlags( Matrix2D<?, ?, ?> data ) {
199         if ( flags == null || flags.rows() < data.rows() || flags.columns() < data.columns() ) {
200             throw new IllegalStateException( "Flags do not match data." );
201         }
202     }
203 
204 }