View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2006 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.io;
20  
21  import org.apache.commons.lang3.ArrayUtils;
22  import org.apache.commons.lang3.StringUtils;
23  
24  import java.lang.reflect.Array;
25  import java.nio.*;
26  import java.nio.charset.Charset;
27  import java.util.ArrayList;
28  import java.util.List;
29  
30  /**
31   * Class to convert byte arrays (e.g., Blobs) to and from other types of arrays.
32   *
33   * @author Kiran Keshav
34   * @author Paul Pavlidis
35   */
36  public final class ByteArrayConverter {
37  
38      // sizes are in bytes.
39      // TODO: these could be static methods.
40  
41      private static final int DOUBLE_SIZE = 8;
42  
43      public byte[] booleanArrayToBytes( boolean[] boolarray ) {
44          if ( boolarray == null ) return null;
45          ByteBuffer buffer = ByteBuffer.allocate( boolarray.length );
46          for ( boolean b : boolarray ) {
47              buffer.put( b ? ( byte ) 1 : ( byte ) 0 );
48          }
49          return buffer.array();
50      }
51  
52      public boolean[] byteArrayToBooleans( byte[] barray ) {
53          if ( barray == null ) return null;
54          boolean[] iarray = new boolean[barray.length];
55          for ( int i = 0; i < barray.length; i++ ) {
56              iarray[i] = barray[i] != 0;
57          }
58          return iarray;
59      }
60  
61      public byte[] doubleArrayToBytes( double[] darray ) {
62          if ( darray == null ) {
63              return null;
64          }
65          ByteBuffer buffer = ByteBuffer.allocate( 8 * darray.length );
66          for ( double d : darray ) {
67              buffer.putDouble( d );
68          }
69          return buffer.array();
70      }
71  
72      public double[] byteArrayToDoubles( byte[] barray ) {
73          if ( barray == null ) return null;
74          DoubleBuffer buf = ByteBuffer.wrap( barray ).asDoubleBuffer();
75          double[] array = new double[buf.remaining()];
76          buf.get( array );
77          return array;
78      }
79  
80      public byte[] doubleMatrixToBytes( double[][] testm ) {
81          if ( testm == null ) {
82              return null;
83          }
84          if ( testm.length == 0 ) {
85              return new byte[0];
86          }
87          int rowSize = testm[0].length;
88          double[] a = new double[testm.length * rowSize];
89          for ( int i = 0; i < testm.length; i++ ) {
90              if ( testm[i].length != rowSize ) {
91                  throw new IllegalArgumentException( "Cannot serialize ragged matrix" );
92              }
93              System.arraycopy( testm[i], 0, a, rowSize * i, rowSize );
94          }
95          return doubleArrayToBytes( a );
96      }
97  
98      /**
99       * Convert a byte array to a double matrix, assuming it is square.
100      */
101     public double[][] byteArrayToDoubleMatrix( byte[] barray ) {
102         if ( barray == null ) {
103             return null;
104         }
105         int numDoubles = barray.length / DOUBLE_SIZE;
106         int columns = ( int ) Math.sqrt( numDoubles );
107         if ( columns * columns != barray.length / DOUBLE_SIZE ) {
108             throw new IllegalArgumentException( "The byte array is not square." );
109         }
110         return byteArrayToDoubleMatrix( barray, columns );
111     }
112 
113     /**
114      * Convert a byte array to a double matrix.
115      *
116      * @param columns the number of columns in the matrix
117      */
118     public double[][] byteArrayToDoubleMatrix( byte[] barray, int columns ) throws IllegalArgumentException {
119         if ( barray == null ) {
120             return null;
121         }
122         if ( barray.length == 0 ) {
123             return new double[0][columns];
124         }
125         int numDoubles = barray.length / DOUBLE_SIZE;
126         if ( numDoubles % columns != 0 ) {
127             throw new IllegalArgumentException( "The number of doubles in the byte array (" + numDoubles
128                 + ") does not divide evenly into the number of items expected per row (" + columns + ")." );
129         }
130         int numRows = numDoubles / columns;
131         double[][] answer = new double[numRows][];
132         byte[] row = new byte[columns * DOUBLE_SIZE];
133         int bytesPerRow = columns * DOUBLE_SIZE;
134         for ( int rownum = 0; rownum < numRows; rownum++ ) {
135             System.arraycopy( barray, rownum * bytesPerRow, row, 0, bytesPerRow );
136             answer[rownum] = byteArrayToDoubles( row );
137         }
138         return answer;
139     }
140 
141     /**
142      * Note that this method cannot differentiate between empty strings and null strings. A string that is empty will be
143      * returned as an empty string, not null, while a null string will be stored as an empty string.
144      *
145      * @param charset charset to use when converting strings to bytes
146      */
147     public byte[] stringArrayToBytes( String[] stringArray, Charset charset ) {
148         if ( stringArray == null ) return null;
149         int size = 0;
150         for ( String element : stringArray ) {
151             size += element != null ? element.getBytes( charset ).length : 0;
152             size += 1;
153         }
154         ByteBuffer buffer = ByteBuffer.allocate( size );
155         for ( String element : stringArray ) {
156             if ( element != null ) {
157                 buffer.put( element.getBytes( charset ) );
158             }
159             buffer.put( ( byte ) 0 );
160         }
161         return buffer.array();
162     }
163 
164     /**
165      * Convert a byte array into a array of Strings.
166      * <p>
167      * It is assumed that separate strings are delimited by a '\u0000' (NUL) character. Note that this method cannot
168      * differentiate between empty strings and null strings. A string that is empty will be returned as an empty string,
169      * not null.
170      *
171      * @param charset charset to use when decoding bytes into strings
172      */
173     public String[] byteArrayToStrings( byte[] bytes, Charset charset ) {
174         if ( bytes == null ) {
175             return null;
176         }
177         List<String> strings = new ArrayList<>();
178         int len = 0;
179         for ( int i = 0; i < bytes.length; i++ ) {
180             byte element = bytes[i];
181             if ( element == '\u0000' ) {
182                 strings.add( new String( bytes, i - len, len, charset ) );
183                 len = 0;
184             } else {
185                 len++;
186             }
187         }
188         return strings.toArray( new String[0] );
189     }
190 
191     /**
192      * Convert an array of strings to a byte array where the delimiter is a tab character.
193      * <p>
194      * If the string contains actual {@code \t} characters, they are escaped as {@code \\t}. Note that those will be
195      * decoded as escaped tabs by {@link #byteArrayToTabbedStrings(byte[], Charset)}.
196      * <p>
197      * This method does not distinguish between null and empty strings. Those will be decoded as empty strings by
198      * {@link #byteArrayToTabbedStrings(byte[], Charset)}.
199      */
200     public byte[] stringArrayToTabbedBytes( String[] strings, Charset charset ) {
201         if ( strings == null ) return null;
202         String[] escapedStrings = new String[strings.length];
203         for ( int i = 0; i < strings.length; i++ ) {
204             escapedStrings[i] = formatAsTabbedString( strings[i] );
205         }
206         return StringUtils.join( escapedStrings, '\t' ).getBytes( charset );
207     }
208 
209     public String[] byteArrayToTabbedStrings( byte[] bytes, Charset charset ) {
210         if ( bytes == null ) return null;
211         return StringUtils.splitPreserveAllTokens( new String( bytes, charset ), '\t' );
212     }
213 
214     /**
215      * Convert a byte array to a tab-delimited string.
216      *
217      * @param type    The Class of primitives the bytes are to be interpreted as. If this is String, then the bytes are
218      *                directly interpreted as tab-delimited string (e.g., no extra tabs are added).
219      * @param charset charset to use when decoding bytes into strings
220      * @throws UnsupportedOperationException if Class is a type that can't be converted by this.
221      */
222     public String byteArrayToTabbedString( byte[] bytes, Class<?> type, Charset charset ) {
223         if ( bytes == null ) return null;
224         if ( type.equals( Float.class ) ) {
225             Float[] array = ArrayUtils.toObject( byteArrayToFloats( bytes ) );
226             return formatAsTabbedString( array );
227         } else if ( type.equals( Double.class ) ) {
228             Double[] array = ArrayUtils.toObject( byteArrayToDoubles( bytes ) );
229             return formatAsTabbedString( array );
230         } else if ( type.equals( Integer.class ) ) {
231             Integer[] array = ArrayUtils.toObject( byteArrayToInts( bytes ) );
232             return formatAsTabbedString( array );
233         } else if ( type.equals( Long.class ) ) {
234             Long[] array = ArrayUtils.toObject( byteArrayToLongs( bytes ) );
235             return formatAsTabbedString( array );
236         } else if ( type.equals( String.class ) ) {
237             return new String( bytes, charset );
238         } else if ( type.equals( Boolean.class ) ) {
239             Boolean[] array = ArrayUtils.toObject( byteArrayToBooleans( bytes ) );
240             return formatAsTabbedString( array );
241         } else if ( type.equals( Character.class ) ) {
242             Character[] array = ArrayUtils.toObject( byteArrayToChars( bytes ) );
243             return formatAsTabbedString( array );
244         } else {
245             throw new UnsupportedOperationException( "Can't convert " + type.getName() );
246         }
247     }
248 
249     private String formatAsTabbedString( Object[] array ) {
250         StringBuilder buf = new StringBuilder();
251         for ( int i = 0; i < array.length; i++ ) {
252             buf.append( formatAsTabbedString( array[i] ) );
253             if ( i != array.length - 1 ) buf.append( "\t" ); // so we don't have a trailing tab.
254         }
255         return buf.toString();
256     }
257 
258     private String formatAsTabbedString( Object object ) {
259         if ( object == null ) {
260             return "";
261         }
262         return String.valueOf( object )
263             .replace( "\t", "\\t" );
264     }
265 
266     public byte[] charArrayToBytes( char[] carray ) {
267         if ( carray == null ) return null;
268         ByteBuffer buffer = ByteBuffer.allocate( 2 * carray.length );
269         for ( char c : carray ) {
270             buffer.putChar( c );
271         }
272         return buffer.array();
273     }
274 
275     public char[] byteArrayToChars( byte[] barray ) {
276         if ( barray == null ) return null;
277         CharBuffer buf = ByteBuffer.wrap( barray ).asCharBuffer();
278         char[] array = new char[buf.remaining()];
279         buf.get( array );
280         return array;
281     }
282 
283     public byte[] floatArrayToBytes( float[] darray ) {
284         if ( darray == null ) {
285             return null;
286         }
287         ByteBuffer buffer = ByteBuffer.allocate( 4 * darray.length );
288         for ( float d : darray ) {
289             buffer.putFloat( d );
290         }
291         return buffer.array();
292     }
293 
294     public float[] byteArrayToFloats( byte[] barray ) {
295         if ( barray == null ) return null;
296         FloatBuffer buf = ByteBuffer.wrap( barray ).asFloatBuffer();
297         float[] array = new float[buf.remaining()];
298         buf.get( array );
299         return array;
300     }
301 
302     public byte[] intArrayToBytes( int[] iarray ) {
303         if ( iarray == null ) {
304             return null;
305         }
306         ByteBuffer buffer = ByteBuffer.allocate( 4 * iarray.length );
307         for ( int i : iarray ) {
308             buffer.putInt( i );
309         }
310         return buffer.array();
311     }
312 
313     public int[] byteArrayToInts( byte[] barray ) {
314         if ( barray == null ) return null;
315         IntBuffer intBuf = ByteBuffer.wrap( barray ).asIntBuffer();
316         int[] array = new int[intBuf.remaining()];
317         intBuf.get( array );
318         return array;
319     }
320 
321     public byte[] longArrayToBytes( long[] larray ) {
322         if ( larray == null ) {
323             return null;
324         }
325         ByteBuffer buffer = ByteBuffer.allocate( 8 * larray.length );
326         for ( long i : larray ) {
327             buffer.putLong( i );
328         }
329         return buffer.array();
330     }
331 
332     /**
333      * @return long[] resulting from parse of the bytes.
334      */
335     public long[] byteArrayToLongs( byte[] barray ) {
336         if ( barray == null ) return null;
337         LongBuffer buf = ByteBuffer.wrap( barray ).asLongBuffer();
338         long[] array = new long[buf.remaining()];
339         buf.get( array );
340         return array;
341     }
342 
343     /**
344      * Convert an array of Objects into an array of bytes.
345      *
346      * @param array   of objects to be converted to bytes.
347      * @param charset charset to use when converting strings to bytes
348      * @throws UnsupportedOperationException if Objects are a type that can't be converted by this.
349      */
350     public <T> byte[] objectArrayToBytes( T[] array, Charset charset ) {
351         if ( array == null ) return null;
352         if ( array instanceof Boolean[] ) {
353             return booleanArrayToBytes( ArrayUtils.toPrimitive( ( Boolean[] ) array ) );
354         } else if ( array instanceof Float[] ) {
355             return floatArrayToBytes( ArrayUtils.toPrimitive( ( Float[] ) array ) );
356         } else if ( array instanceof Double[] ) {
357             return doubleArrayToBytes( ArrayUtils.toPrimitive( ( Double[] ) array ) );
358         } else if ( array instanceof Character[] ) {
359             return charArrayToBytes( ArrayUtils.toPrimitive( ( Character[] ) array ) );
360         } else if ( array instanceof String[] ) {
361             return stringArrayToBytes( ( String[] ) array, charset );
362         } else if ( array instanceof Integer[] ) {
363             return intArrayToBytes( ArrayUtils.toPrimitive( ( Integer[] ) array ) );
364         } else if ( array instanceof Long[] ) {
365             return longArrayToBytes( ArrayUtils.toPrimitive( ( Long[] ) array ) );
366         } else if ( array.getClass().equals( Object[].class ) ) {
367             // require a copy...
368             if ( array.length != 0 ) {
369                 //noinspection unchecked
370                 T[] typedArray = ( T[] ) Array.newInstance( array[0].getClass(), array.length );
371                 System.arraycopy( array, 0, typedArray, 0, array.length );
372                 return objectArrayToBytes( typedArray, charset );
373             } else {
374                 return new byte[0];
375             }
376         } else {
377             throw new UnsupportedOperationException( "Can't convert " + array[0].getClass() + " to bytes" );
378         }
379     }
380 
381     /**
382      * @param charset charset to use when decoding strings from bytes
383      * @throws UnsupportedOperationException if type is a type that can't be converted by this.
384      */
385     @SuppressWarnings("unchecked")
386     public <T> T[] byteArrayToObjects( byte[] barray, Class<T> type, Charset charset ) {
387         if ( Boolean.class.isAssignableFrom( type ) ) {
388             return ( T[] ) ArrayUtils.toObject( byteArrayToBooleans( barray ) );
389         } else if ( Character.class.isAssignableFrom( type ) ) {
390             return ( T[] ) ArrayUtils.toObject( byteArrayToChars( barray ) );
391         } else if ( Float.class.isAssignableFrom( type ) ) {
392             return ( T[] ) ArrayUtils.toObject( byteArrayToFloats( barray ) );
393         } else if ( Double.class.isAssignableFrom( type ) ) {
394             return ( T[] ) ArrayUtils.toObject( byteArrayToDoubles( barray ) );
395         } else if ( Integer.class.isAssignableFrom( type ) ) {
396             return ( T[] ) ArrayUtils.toObject( byteArrayToInts( barray ) );
397         } else if ( Long.class.isAssignableFrom( type ) ) {
398             return ( T[] ) ArrayUtils.toObject( byteArrayToLongs( barray ) );
399         } else if ( String.class.isAssignableFrom( type ) ) {
400             return ( T[] ) byteArrayToStrings( barray, charset );
401         } else {
402             throw new UnsupportedOperationException( "Can't convert " + type + " from bytes" );
403         }
404     }
405 }