View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2006 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.util;
20  
21  import org.apache.commons.csv.CSVFormat;
22  import org.apache.commons.csv.CSVParser;
23  import org.apache.commons.csv.CSVRecord;
24  import org.apache.commons.lang3.StringUtils;
25  import org.slf4j.Logger;
26  import org.slf4j.LoggerFactory;
27  
28  import java.io.IOException;
29  import java.io.StringReader;
30  import java.util.Collection;
31  
32  /**
33   * @author pavlidis
34   */
35  public class StringUtil {
36  
37      private static final Logger log = LoggerFactory.getLogger( StringUtil.class );
38  
39      /**
40       * @param appendee  The string to be added to
41       * @param appendant The string to add to the end of the appendee
42       * @param separator The string to put between the joined strings, if necessary.
43       * @return appendee + separator + separator unless appendee is empty, in which case the appendant is returned.
44       */
45      public static String append( String appendee, String appendant, String separator ) {
46          if ( StringUtils.isBlank( appendee ) ) {
47              return appendant;
48          }
49          return appendee + separator + appendant;
50  
51      }
52  
53      /**
54       * Given a set of strings, identify any prefix they have in common.
55       *
56       * @param strings
57       * @return the common prefix, null if there isn't one.
58       */
59      public static String commonPrefix( Collection<String> strings ) {
60          // find the shortest string; this is the maximum length of the prefix. It is itself the prefix to look for.
61          String shortest = shortestString( strings );
62  
63          if ( shortest == null || shortest.length() == 0 ) return null;
64  
65          String test = shortest;
66          while ( test.length() > 0 ) {
67              boolean found = true;
68              for ( String string : strings ) {
69                  if ( !string.startsWith( test ) ) {
70                      found = false;
71                      break;
72                  }
73              }
74              if ( found ) return test;
75              test = test.substring( 0, test.length() - 1 );
76          }
77          return null;
78      }
79  
80      /**
81       * Given a set of strings, identify any suffix they have in common.
82       *
83       * @param strings
84       * @return the commons suffix, null if there isn't one.
85       */
86      public static String commonSuffix( Collection<String> strings ) {
87          String shortest = shortestString( strings );
88  
89          if ( shortest == null || shortest.length() == 0 ) return null;
90  
91          String test = shortest;
92          while ( test.length() > 0 ) {
93              boolean found = true;
94              for ( String string : strings ) {
95                  if ( !string.endsWith( test ) ) {
96                      found = false;
97                      break;
98                  }
99              }
100             if ( found ) return test;
101             test = test.substring( 1 );
102         }
103         return null;
104     }
105 
106     /**
107      * Checks a string to find "strange" character, used by phenocarta to check evidence description
108      *
109      * @param the string to check
110      * @return return false if something strange was found
111      * @author Nicolas?
112      */
113     public static boolean containsValidCharacter( String s ) {
114 
115         if ( s != null ) {
116 
117             for ( int i = 0; i < s.length(); i++ ) {
118 
119                 Character cha = s.charAt( i );
120 
121                 if ( !( isLatinLetter( cha ) || Character.isDigit( cha ) || cha == '=' || cha == ',' || cha == '('
122                     || cha == ')' || cha == '\'' || Character.isWhitespace( cha ) || cha == '/' || cha == '?'
123                     || cha == '+' || cha == ':' || cha == '-' || cha == '<' || cha == '>' || cha == '"'
124                     || cha == '%' || cha == '.' || cha == '*' || cha == '[' || cha == ']' || cha == ';'
125                     || cha == '_' || cha == '\\' || cha == '|' || cha == '&' || cha == '^' || cha == '#'
126                     || cha == '{' || cha == '}' || cha == '!' || cha == '~' || cha == '@' || cha == '—'
127                     || cha == '×' || cha == '–' || cha == ' ' ) ) {
128 
129                     // new cha to be added, special Öö≤≥âμ etc... TODO and check later if found
130 
131                     log.warn( "Illegal character found: " + cha + " found on description: " + s );
132 
133                     return false;
134                 }
135             }
136         }
137         return true;
138     }
139 
140     /**
141      * @param line
142      * @return
143      */
144     public static String[] csvSplit( String line ) {
145         try ( CSVParser parser = CSVParser.parse( new StringReader( line ), CSVFormat.DEFAULT ) ) {
146             for ( CSVRecord record : parser ) {
147                 return record.values();
148             }
149             throw new IllegalArgumentException( "No CSV records found in line." );
150         } catch ( IOException e ) {
151             throw new RuntimeException( e );
152         }
153     }
154 
155     /**
156      * Made by Nicolas
157      *
158      * @param a line in a file cvs format
159      * @return the same line but in tsv format
160      */
161     public static String cvs2tsv( String line ) {
162 
163         StringBuffer newLine = new StringBuffer( line );
164 
165         boolean change = true;
166 
167         for ( int position = 0; position < newLine.length(); position++ ) {
168 
169             if ( newLine.charAt( position ) == ',' && change ) {
170                 newLine.setCharAt( position, '\t' );
171             } else if ( newLine.charAt( position ) == '"' ) {
172 
173                 if ( change ) {
174                     change = false;
175                 } else {
176                     change = true;
177                 }
178             }
179         }
180         return newLine.toString().replaceAll( "\"", "" );
181     }
182 
183     public static boolean isLatinLetter( char c ) {
184         return ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' );
185     }
186 
187     /**
188      * Mimics the make.names method in R (character.c) to make valid variables names; we use this for column headers in
189      * some output files. This doesn't give the exact sames results as R; we avoid repeated '.'.
190      *
191      * @param s
192      * @return modified string
193      * @author paul
194      */
195     public static String makeValidForR( String s ) {
196 
197         // If string starts with a digit or "." and then a digit, prepend an X.
198         if ( s.matches( "^\\.?[0-9].+" ) ) {
199             s = "X" + s;
200         }
201 
202         // TODO: check for reserved words. https://stat.ethz.ch/R-manual/R-devel/library/base/html/Reserved.html
203 
204         // no dashes or white space or other punctuation. '.' is okay and so is "_", now.
205         return s.replaceAll( "[\\W]+", "." );
206     }
207 
208     /**
209      * @param stringi
210      * @param stringj
211      * @return
212      */
213     public static Long twoStringHashKey( String stringi, String stringj ) {
214         // use arbitrary but consistent method for ordering.
215         if ( stringi.hashCode() < stringj.hashCode() ) {
216             return new Long( stringi.hashCode() | ( long ) stringj.hashCode() << 32 );
217         }
218         return new Long( stringj.hashCode() | ( long ) stringi.hashCode() << 32 );
219     }
220 
221     private static String shortestString( Collection<String> strings ) {
222         String shortest = null;
223         for ( String string : strings ) {
224             if ( shortest == null || string.length() < shortest.length() ) shortest = string;
225         }
226         return shortest;
227     }
228 
229 }