1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  package ubic.basecode.datafilter;
20  
21  import java.util.List;
22  import java.util.Vector;
23  
24  import ubic.basecode.dataStructure.matrix.Matrix2D;
25  import ubic.basecode.dataStructure.matrix.MatrixUtil;
26  import ubic.basecode.dataStructure.matrix.StringMatrix;
27  
28  
29  
30  
31  
32  
33  
34  
35  
36  
37  public class RowAbsentFilter<M extends Matrix2D<R, C, V>, R, C, V> extends AbstractFilter<M, R, C, V> {
38  
39      private boolean countIsSet = false;
40  
41      private StringMatrix<R, C> flags = null;
42      private boolean flagsSet = false;
43      private boolean fractionIsSet = false;
44      private boolean keepMarginal = false;
45      private int minPresentCount = 0;
46      private double minPresentFraction = 0.0;
47  
48      
49  
50  
51  
52  
53  
54  
55      @Override
56      public M filter( M data ) {
57  
58          int numRows = data.rows();
59          int numCols = data.columns();
60  
61          if ( minPresentCount > numCols ) {
62              throw new IllegalStateException( "Minimum present count is set to " + minPresentCount
63                      + " but there are only " + numCols + " columns in the matrix." );
64          }
65  
66          if ( flags == null ) {
67              throw new IllegalStateException( "Flag matrix is null" );
68          }
69  
70          
71          if ( !fractionIsSet && !countIsSet ) {
72              log.info( "No filtering was requested" );
73              return data;
74          }
75  
76          if ( !flagsSet ) {
77              log.info( "No flag matrix was provided." );
78              return data;
79          }
80  
81          validateFlags( data );
82  
83          
84          if ( minPresentFraction == 0.0 && minPresentCount == 0 ) {
85              log.info( "Criteria are set too low to result in any changes to the input." );
86              return data;
87          }
88  
89          List<V[]> MTemp = new Vector<V[]>();
90          List<R> rowNames = new Vector<R>();
91  
92          int kept = 0;
93          for ( int i = 0; i < numRows; i++ ) {
94              R rowName = data.getRowName( i );
95  
96              if ( !flags.containsRowName( rowName ) ) {
97                  log.debug( "Row " + rowName + " not found in flags, skipping." );
98                  continue;
99              }
100 
101             int numPresent = 0;
102             for ( int j = 0; j < numCols; j++ ) {
103                 C colName = data.getColName( j );
104 
105                 if ( !flags.containsColumnName( colName ) ) {
106                     log.debug( "Column " + colName + " not found in flags, skipping." );
107                     continue;
108                 }
109 
110                 
111                 
112                 if ( data.isMissing( i, j ) ) {
113                     
114                     continue;
115                 }
116 
117                 String flag = flags.get( flags.getRowIndexByName( rowName ), flags.getColIndexByName( colName ) );
118 
119                 if ( flags.isMissing( flags.getRowIndexByName( rowName ), flags.getColIndexByName( colName ) ) ) {
120                     log.warn( "Flags had no value for an item, counting as present." );
121                 } else if ( flag.equals( "A" ) ) {
122                     continue;
123                 } else if ( flag.equals( "M" ) && !keepMarginal ) {
124                     continue;
125                 } else if ( !flag.equals( "P" ) && !flag.equals( "M" ) ) {
126                     log.warn( "Found a flag I don't know about, ignoring " + flag + " and counting as present." );
127                 }
128 
129                 numPresent++;
130             }
131 
132             
133             if ( countIsSet && numPresent >= minPresentCount || fractionIsSet
134                     && ( double ) numPresent / numCols >= minPresentFraction ) {
135                 MTemp.add( MatrixUtil.getRow( data, i ) );
136                 rowNames.add( rowName );
137                 kept++;
138             }
139         }
140 
141         M returnval = getOutputMatrix( data, MTemp.size(), numCols );
142         for ( int i = 0; i < MTemp.size(); i++ ) {
143             for ( int j = 0; j < numCols; j++ ) {
144                 returnval.set( i, j, MTemp.get( i )[j] );
145             }
146         }
147         returnval.setColumnNames( data.getColNames() );
148         returnval.setRowNames( rowNames );
149 
150         log.info( "There are " + kept + " rows left after filtering." );
151 
152         return returnval;
153     }
154 
155     
156 
157 
158     public void setFlagMatrix( StringMatrix<R, C> f ) {
159         if ( f == null ) {
160             throw new IllegalArgumentException( "Flag matrix is null" );
161         }
162         flags = f;
163         flagsSet = true;
164     }
165 
166     
167 
168 
169     public void setKeepMarginal( boolean k ) {
170         keepMarginal = k;
171     }
172 
173     
174 
175 
176     public void setMinPresentCount( int k ) {
177         if ( k < 0 ) {
178             throw new IllegalArgumentException( "Minimum present count must be > 0." );
179         }
180         minPresentCount = k;
181         countIsSet = true;
182     }
183 
184     
185 
186 
187     public void setMinPresentFraction( double k ) {
188         if ( k < 0.0 || k > 1.0 )
189             throw new IllegalArgumentException( "Min present fraction must be between 0 and 1, got " + k );
190         minPresentFraction = k;
191         fractionIsSet = true;
192     }
193 
194     
195 
196 
197 
198     private void validateFlags( Matrix2D<?, ?, ?> data ) {
199         if ( flags == null || flags.rows() < data.rows() || flags.columns() < data.columns() ) {
200             throw new IllegalStateException( "Flags do not match data." );
201         }
202     }
203 
204 }