View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2007-2019 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.dataStructure.matrix;
20  
21  import java.io.File;
22  import java.io.FileWriter;
23  import java.io.IOException;
24  import java.util.List;
25  
26  import no.uib.cipr.matrix.sparse.FlexCompRowMatrix;
27  import no.uib.cipr.matrix.sparse.SparseVector;
28  
29  /**
30   * Named compressed sparse bit matrix.
31   * 
32   * @author xwan
33   * 
34   */
35  public class CompressedBitMatrix<R, C> extends AbstractMatrix<R, C, double[]> implements ObjectMatrix<R, C, double[]> {
36  
37      public static long BIT1 = 0x1L;
38  
39      public static int BITS_PER_ELEMENT = Double.SIZE - 1;
40  
41      private static final long serialVersionUID = 1775002416710933373L;
42  
43      private FlexCompRowMatrix[] matrix;
44  
45      private int rows = 0, cols = 0;
46  
47      private int totalBitsPerItem;
48  
49      /**
50       * Constructs a matrix with specified rows, columns, and total bits per cell
51       * <p>
52       * Implementation note: this is created by maintaining one or more Double matrices; the Doubles are used as bit
53       * fields.
54       * 
55       * @param rows - number of rows in the matrix
56       * @param cols - number of columns in the matrix
57       * @param totalBitsPerItem - the number of bits for each cell
58       */
59      public CompressedBitMatrix( int rows, int cols, int totalBitsPerItem ) {
60          super();
61          // calculate number of matrices required
62          int num = ( int ) Math.floor( ( double ) totalBitsPerItem / BITS_PER_ELEMENT ) + 1;
63          matrix = new FlexCompRowMatrix[num];
64          for ( int i = 0; i < num; i++ ) {
65              matrix[i] = new FlexCompRowMatrix( rows, cols );
66          }
67          this.totalBitsPerItem = totalBitsPerItem;
68          this.rows = rows;
69          this.cols = cols;
70      }
71  
72      /**
73       * Count the number of one-bits at the specified cell position
74       * 
75       * @param r
76       * @param c
77       * @return
78       */
79      public int bitCount( int r, int c ) {
80          int bits = 0;
81          if ( r > this.rows || c > this.cols ) return bits;
82          for ( FlexCompRowMatrix cell : this.matrix ) {
83              double val = cell.get( r, c );
84              if ( val != 0 ) bits = bits + countBits( val );
85          }
86          return bits;
87      }
88  
89      @Override
90      public int columns() {
91          return this.cols;
92      }
93  
94      /*
95       * (non-Javadoc)
96       * 
97       * @see ubic.basecode.dataStructure.matrix.ObjectMatrix#get(int, int)
98       */
99      @Override
100     public double[] get( int row, int col ) {
101         double[] a = new double[matrix.length];
102         for ( int i = 0; i < a.length; i++ ) {
103             a[i] = matrix[i].get( row, col );
104         }
105         return a;
106     }
107 
108     /**
109      * Checks the bit of the specified cell at the specified index.
110      * 
111      * @param row - matrix row
112      * @param col - matrix column
113      * @param index - bit vector index
114      * @return true if bit is 1, false if 0.
115      */
116     public boolean get( int row, int col, int index ) {
117         if ( index >= this.totalBitsPerItem || row > this.rows || col > this.cols ) {
118             throw new ArrayIndexOutOfBoundsException( "Attempt to access row=" + row + " col=" + col + " index="
119                     + index );
120         }
121         int num = ( int ) Math.floor( ( double ) index / CompressedBitMatrix.BITS_PER_ELEMENT );
122         int bit_index = index % CompressedBitMatrix.BITS_PER_ELEMENT;
123         long binVal = Double.doubleToRawLongBits( matrix[num].get( row, col ) );
124         long res = binVal & CompressedBitMatrix.BIT1 << bit_index;
125         return res != 0;
126     }
127 
128     /**
129      * Returns all of the bits for a cell
130      * 
131      * @param row - the cell row
132      * @param col - the cell column
133      * @return all the bits encoded as an array of <code>longs</code>
134      */
135     public long[] getAllBits( int row, int col ) {
136         long[] allBits = new long[this.matrix.length];
137         for ( int i = 0; i < this.matrix.length; i++ )
138             allBits[i] = Double.doubleToRawLongBits( this.matrix[i].get( row, col ) );
139         return allBits;
140     }
141 
142     /**
143      * Returns the total number of bits in a matrix cell
144      * 
145      * @return the number of bits per cell
146      */
147     public int getBitNum() {
148         return this.totalBitsPerItem;
149     }
150 
151     /*
152      * (non-Javadoc)
153      * 
154      * @see ubic.basecode.dataStructure.matrix.Matrix2D#getByKeys(java.lang.Object, java.lang.Object)
155      */
156     @Override
157     public double[] getByKeys( R r, C c ) {
158         return this.get( getRowIndexByName( r ), getColIndexByName( c ) );
159     }
160 
161     /*
162      * (non-Javadoc)
163      * 
164      * @see ubic.basecode.dataStructure.matrix.AbstractNamedMatrix#getColObj(int)
165      */
166     @Override
167     public double[][] getColumn( int i ) {
168         throw new UnsupportedOperationException();
169     }
170 
171     /*
172      * (non-Javadoc)
173      * 
174      * @see ubic.basecode.dataStructure.matrix.Matrix2D#getEntry(int, int)
175      */
176     @Override
177     public double[] getEntry( int row, int column ) {
178         return get( row, column );
179     }
180 
181     /*
182      * (non-Javadoc)
183      * 
184      * @see ubic.basecode.dataStructure.matrix.AbstractNamedMatrix#getRowObj(int)
185      */
186     @Override
187     public double[][] getRow( int i ) {
188         throw new UnsupportedOperationException();
189     }
190 
191     /**
192      * @param row
193      * @return - array of counts of one-bits for each cell in the row.
194      */
195     public int[] getRowBitCount( int row ) {
196         int[] bits = new int[columns()];
197         for ( FlexCompRowMatrix cell : this.matrix ) {
198             SparseVector vector = cell.getRow( row );
199 
200             /*
201              * Sparse vector: has indices (>0 except for first position) saying where values are; data are the values.
202              */
203             double[] data = vector.getData();
204             int[] indices = vector.getIndex();
205             for ( int j = 0; j < data.length; j++ ) {
206                 if ( indices[j] == 0 && j > 0 ) break;
207                 if ( data[j] != 0.0 ) bits[indices[j]] += countBits( data[j] );
208             }
209         }
210         return bits;
211     }
212 
213     /*
214      * (non-Javadoc)
215      * 
216      * @see ubic.basecode.dataStructure.matrix.AbstractNamedMatrix#isMissing(int, int)
217      */
218     @Override
219     public boolean isMissing( int i, int j ) {
220         throw new UnsupportedOperationException();
221     }
222 
223     /**
224      * Counts the number of one-bits that are in common between the two specified cells; i.e. performs an AND operation
225      * on the two bit vectors and counts the remaining 1 bits.
226      * 
227      * @param row1 - cell 1 row
228      * @param col1 - cell 1 column
229      * @param row2 - cell 2 row
230      * @param col2 - cell 2 column
231      * @return number of bits in common
232      */
233     public int overlap( int row1, int col1, int row2, int col2 ) {
234         int bits = 0;
235 
236         double[] val1 = this.get( row1, col1 );
237         double[] val2 = this.get( row2, col2 );
238 
239         assert val1.length == matrix.length;
240 
241         for ( int i = 0; i < val1.length; i++ ) {
242             long binVal1 = Double.doubleToRawLongBits( val1[i] );
243             long binVal2 = Double.doubleToRawLongBits( val2[i] );
244             bits += Long.bitCount( binVal1 & binVal2 );
245         }
246 
247         return bits;
248     }
249 
250     public void reset( int r, int c ) {
251         for ( FlexCompRowMatrix cell : this.matrix ) {
252             cell.set( r, c, 0 );
253         }
254     }
255 
256     /*
257      * (non-Javadoc)
258      * 
259      * @see ubic.basecode.dataStructure.matrix.AbstractNamedMatrix#rows()
260      */
261     @Override
262     public int rows() {
263         return this.rows;
264     }
265 
266     /**
267      * Set the matrix cell to the specified bit vector
268      * 
269      * @param row
270      * @param col
271      * @param val
272      * @return true if set successfully
273      */
274     @Override
275     public void set( int row, int col, double[] val ) {
276         if ( val.length != this.matrix.length || row >= this.rows || col >= this.cols )
277             throw new IllegalArgumentException( "Value out of range" );
278         for ( int mi = 0; mi < val.length; mi++ ) {
279             this.matrix[mi].set( row, col, val[mi] );
280         }
281     }
282 
283     /**
284      * Sets the bit of the specified cell at the specified index to 1.
285      * 
286      * @param row - matrix row
287      * @param col - matrix column
288      * @param index - bit vector index
289      */
290     public void set( int row, int col, int index ) {
291         if ( index >= this.totalBitsPerItem || row > this.rows || col > this.cols ) {
292             throw new ArrayIndexOutOfBoundsException( "Attempt to access row=" + row + " col=" + col + " index="
293                     + index );
294         }
295 
296         /*
297          * We can only fit BITS_PER_ELEMENT values in each matrix cell. If num > 0, we have multiple 'stacked' matrices.
298          */
299         int num = ( int ) Math.floor( ( double ) index / BITS_PER_ELEMENT );
300         assert num >= 0 && num < matrix.length;
301         int whichBitToSet = index % BITS_PER_ELEMENT;
302         long currentValue = Double.doubleToRawLongBits( matrix[num].get( row, col ) );
303         double res = Double.longBitsToDouble( currentValue | CompressedBitMatrix.BIT1 << whichBitToSet );
304         matrix[num].set( row, col, res );
305     }
306 
307     /*
308      * (non-Javadoc)
309      * 
310      * @see ubic.basecode.dataStructure.matrix.NamedMatrix#set(java.lang.Object, java.lang.Object, java.lang.Object)
311      */
312     @Override
313     public void setByKeys( R r, C c, double[] v ) {
314         this.set( getRowIndexByName( r ), getColIndexByName( c ), v );
315     }
316 
317     @Override
318     public int size() {
319         return this.rows() * this.columns();
320     }
321 
322     @Override
323     public ObjectMatrix<R, C, double[]> subset( int startRow, int startCol, int numRow, int numCol ) {
324         int endRow = startRow + numRow - 1;
325         super.checkRowRange( startRow, endRow );
326         int endCol = startCol + numCol - 1;
327         super.checkColRange( startCol, endCol );
328         ObjectMatrix<R, C, double[]> result = new CompressedBitMatrix<R, C>( numRow, numCol, this.totalBitsPerItem );
329         int r = 0;
330         for ( int i = startRow; i < endRow; i++ ) {
331             int c = 0;
332             for ( int j = startCol; j < endCol; j++ ) {
333                 result.set( r, c++, this.get( i, j ) );
334             }
335             r++;
336         }
337         /*
338          * FIXME set up the row/column names.
339          */
340         return result;
341 
342     }
343 
344     /**
345      * @param columns
346      * @return
347      */
348     @Override
349     public ObjectMatrix<R, C, double[]> subsetColumns( List<C> columns ) {
350         CompressedBitMatrix<R, C> returnval = new CompressedBitMatrix<R, C>( this.rows(), columns.size(),
351                 this.totalBitsPerItem );
352         returnval.setRowNames( this.getRowNames() );
353         for ( int i = 0; i < this.rows(); i++ ) {
354             int currentColumn = 0;
355             for ( C c : columns ) {
356                 int j = this.getColIndexByName( c );
357 
358                 returnval.set( i, currentColumn, this.get( i, j ) );
359 
360                 if ( i == 0 ) {
361                     returnval.setColumnName( c, currentColumn );
362                 }
363                 currentColumn++;
364 
365             }
366         }
367         return returnval;
368     }
369 
370     /**
371      * Save the matrix to the specified file
372      * 
373      * @param fileName - save file
374      */
375     public void toFile( String fileName ) throws IOException {
376         FileWriter out = new FileWriter( new File( fileName ) );
377         out.write( this.rows + "\t" + this.cols + "\t" + this.totalBitsPerItem + "\n" );
378         Object[] rowNames = this.getRowNames().toArray();
379         for ( int i = 0; i < rowNames.length; i++ ) {
380             out.write( rowNames[i].toString() );
381             if ( i != rowNames.length - 1 ) out.write( "\t" );
382         }
383         out.write( "\n" );
384         Object[] colNames = this.getColNames().toArray();
385         for ( int i = 0; i < colNames.length; i++ ) {
386             out.write( colNames[i].toString() );
387             if ( i != colNames.length - 1 ) out.write( "\t" );
388         }
389         out.write( "\n" );
390         for ( int i = 0; i < this.rows; i++ )
391             for ( int j = 0; j < this.cols; j++ ) {
392                 if ( this.bitCount( i, j ) != 0 ) {
393                     out.write( i + "\t" + j );
394                     for ( FlexCompRowMatrix cell : this.matrix ) {
395                         long binVal = Double.doubleToRawLongBits( cell.get( i, j ) );
396                         /* Long.parseLong( hexString, 16) to get it back; */
397                         String hexString = Long.toHexString( binVal );
398                         out.write( "\t" + hexString );
399                     }
400                     out.write( "\n" );
401                 }
402             }
403         out.close();
404     }
405 
406     /**
407      * Number of ones in the entire matrix.
408      * 
409      * @return
410      */
411     public long totalBitCount() {
412         long result = 0L;
413         for ( FlexCompRowMatrix cell : this.matrix ) {
414             for ( int j = 0; j < cols; j++ ) {
415 
416                 SparseVector vector = cell.getRow( j );
417 
418                 /*
419                  * Sparse vector: has indices (>0 except for first position) saying where values are; data are the
420                  * values.
421                  */
422                 double[] data = vector.getData();
423                 for ( double cell2 : data ) {
424                     result += countBits( cell2 );
425                 }
426             }
427         }
428         return result;
429     }
430 
431     public void unset( int row, int col, int index ) {
432         if ( index >= this.totalBitsPerItem || row > this.rows || col > this.cols ) {
433             throw new ArrayIndexOutOfBoundsException( "Attempt to access row=" + row + " col=" + col + " index="
434                     + index );
435         }
436 
437         /*
438          * We can only fit BITS_PER_ELEMENT values in each matrix cell. If num > 0, we have multiple 'stacked' matrices.
439          */
440         int num = ( int ) Math.floor( ( double ) index / BITS_PER_ELEMENT );
441         assert num >= 0 && num < matrix.length;
442         int whichBitToSet = index % BITS_PER_ELEMENT;
443         long currentValue = Double.doubleToRawLongBits( matrix[num].get( row, col ) );
444 
445         if ( ( currentValue & CompressedBitMatrix.BIT1 << whichBitToSet ) == 0 ) {
446             return;
447         }
448         double res = Double.longBitsToDouble( currentValue ^ CompressedBitMatrix.BIT1 << whichBitToSet );
449         matrix[num].set( row, col, res );
450     }
451 
452     /**
453      * Count the number of one-bits of the passed-in <code>double</code> val.
454      * 
455      * @param val
456      * @return number of one-bits in val
457      */
458     private int countBits( double val ) {
459         if ( val == 0.0 ) return 0;
460         long binVal = Double.doubleToRawLongBits( val );
461         return Long.bitCount( binVal );
462     }
463 
464 }