View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2006 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.util;
20  
21  import java.io.IOException;
22  import java.io.StringReader;
23  import java.util.Collection;
24  
25  import com.opencsv.CSVReader;
26  import com.opencsv.exceptions.CsvValidationException;
27  import org.apache.commons.lang3.StringUtils;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  
31  /**
32   * @author pavlidis
33   */
34  public class StringUtil {
35  
36      private static final Logger log = LoggerFactory.getLogger( StringUtil.class );
37  
38      /**
39       * @param appendee The string to be added to
40       * @param appendant The string to add to the end of the appendee
41       * @param separator The string to put between the joined strings, if necessary.
42       * @return appendee + separator + separator unless appendee is empty, in which case the appendant is returned.
43       */
44      public static String append( String appendee, String appendant, String separator ) {
45          if ( StringUtils.isBlank( appendee ) ) {
46              return appendant;
47          }
48          return appendee + separator + appendant;
49  
50      }
51  
52      /**
53       * Given a set of strings, identify any prefix they have in common.
54       * 
55       * @param strings
56       * @return the common prefix, null if there isn't one.
57       */
58      public static String commonPrefix( Collection<String> strings ) {
59          // find the shortest string; this is the maximum length of the prefix. It is itself the prefix to look for.
60          String shortest = shortestString( strings );
61  
62          if ( shortest == null || shortest.length() == 0 ) return null;
63  
64          String test = shortest;
65          while ( test.length() > 0 ) {
66              boolean found = true;
67              for ( String string : strings ) {
68                  if ( !string.startsWith( test ) ) {
69                      found = false;
70                      break;
71                  }
72              }
73              if ( found ) return test;
74              test = test.substring( 0, test.length() - 1 );
75          }
76          return null;
77      }
78  
79      /**
80       * Given a set of strings, identify any suffix they have in common.
81       * 
82       * @param strings
83       * @return the commons suffix, null if there isn't one.
84       */
85      public static String commonSuffix( Collection<String> strings ) {
86          String shortest = shortestString( strings );
87  
88          if ( shortest == null || shortest.length() == 0 ) return null;
89  
90          String test = shortest;
91          while ( test.length() > 0 ) {
92              boolean found = true;
93              for ( String string : strings ) {
94                  if ( !string.endsWith( test ) ) {
95                      found = false;
96                      break;
97                  }
98              }
99              if ( found ) return test;
100             test = test.substring( 1 );
101         }
102         return null;
103     }
104 
105     /**
106      * Checks a string to find "strange" character, used by phenocarta to check evidence description
107      * 
108      * @param the string to check
109      * @return return false if something strange was found 
110      * @author Nicolas?
111      */
112     public static boolean containsValidCharacter( String s ) {
113 
114         if ( s != null ) {
115 
116             for ( int i = 0; i < s.length(); i++ ) {
117 
118                 Character cha = s.charAt( i );
119 
120                 if ( !( isLatinLetter( cha ) || Character.isDigit( cha ) || cha == '=' || cha == ',' || cha == '('
121                         || cha == ')' || cha == '\'' || Character.isWhitespace( cha ) || cha == '/' || cha == '?'
122                         || cha == '+' || cha == ':' || cha == '-' || cha == '<' || cha == '>' || cha == '"'
123                         || cha == '%' || cha == '.' || cha == '*' || cha == '[' || cha == ']' || cha == ';'
124                         || cha == '_' || cha == '\\' || cha == '|' || cha == '&' || cha == '^' || cha == '#'
125                         || cha == '{' || cha == '}' || cha == '!' || cha == '~' || cha == '@' || cha == '—'
126                         || cha == '×' || cha == '–' || cha == ' ' ) ) {
127 
128                     // new cha to be added, special Öö≤≥âμ etc... TODO and check later if found
129 
130                     log.warn( "Illegal character found: " + cha + " found on description: " + s );
131 
132                     return false;
133                 }
134             }
135         }
136         return true;
137     }
138 
139     /**
140      * @param numFields
141      * @param line
142      * @return
143      */
144     public static String[] csvSplit( String line ) {
145 
146         @SuppressWarnings("resource")
147         CSVReader reader = new CSVReader( new StringReader( line ) );
148 
149         try {
150             return reader.readNext();
151         } catch ( IOException | CsvValidationException e ) {
152             throw new RuntimeException( e );
153         }
154     }
155 
156     /**
157      * Made by Nicolas
158      * 
159      * @param a line in a file cvs format
160      * @return the same line but in tsv format
161      */
162     public static String cvs2tsv( String line ) {
163 
164         StringBuffer newLine = new StringBuffer( line );
165 
166         boolean change = true;
167 
168         for ( int position = 0; position < newLine.length(); position++ ) {
169 
170             if ( newLine.charAt( position ) == ',' && change ) {
171                 newLine.setCharAt( position, '\t' );
172             } else if ( newLine.charAt( position ) == '"' ) {
173 
174                 if ( change ) {
175                     change = false;
176                 } else {
177                     change = true;
178                 }
179             }
180         }
181         return newLine.toString().replaceAll( "\"", "" );
182     }
183 
184     public static boolean isLatinLetter( char c ) {
185         return ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' );
186     }
187 
188     /**
189      * Mimics the make.names method in R (character.c) to make valid variables names; we use this for column headers in
190      * some output files. This doesn't give the exact sames results as R; we avoid repeated '.'.
191      * 
192      * @param s
193      * @return modified string
194      * @author paul
195      */
196     public static String makeValidForR( String s ) {
197 
198         // If string starts with a digit or "." and then a digit, prepend an X.
199         if ( s.matches( "^\\.?[0-9].+" ) ) {
200             s = "X" + s;
201         }
202 
203         // TODO: check for reserved words. https://stat.ethz.ch/R-manual/R-devel/library/base/html/Reserved.html
204 
205         // no dashes or white space or other punctuation. '.' is okay and so is "_", now.
206         return s.replaceAll( "[\\W]+", "." );
207     }
208 
209     /**
210      * @param stringi
211      * @param stringj
212      * @return
213      */
214     public static Long twoStringHashKey( String stringi, String stringj ) {
215         // use arbitrary but consistent method for ordering.
216         if ( stringi.hashCode() < stringj.hashCode() ) {
217             return new Long( stringi.hashCode() | ( long ) stringj.hashCode() << 32 );
218         }
219         return new Long( stringj.hashCode() | ( long ) stringi.hashCode() << 32 );
220     }
221 
222     private static String shortestString( Collection<String> strings ) {
223         String shortest = null;
224         for ( String string : strings ) {
225             if ( shortest == null || string.length() < shortest.length() ) shortest = string;
226         }
227         return shortest;
228     }
229 
230 }