1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.util;
20
21 import org.apache.commons.csv.CSVFormat;
22 import org.apache.commons.csv.CSVParser;
23 import org.apache.commons.csv.CSVRecord;
24 import org.apache.commons.lang3.StringUtils;
25 import org.slf4j.Logger;
26 import org.slf4j.LoggerFactory;
27
28 import java.io.IOException;
29 import java.io.StringReader;
30 import java.util.Collection;
31 import java.util.HashMap;
32 import java.util.Map;
33
34
35
36
37 public class StringUtil {
38
39 private static final Logger log = LoggerFactory.getLogger( StringUtil.class );
40
41
42
43
44
45
46
47 public static String append( String appendee, String appendant, String separator ) {
48 if ( StringUtils.isBlank( appendee ) ) {
49 return appendant;
50 }
51 return appendee + separator + appendant;
52
53 }
54
55
56
57
58
59
60
61 public static String commonPrefix( Collection<String> strings ) {
62
63 String shortest = shortestString( strings );
64
65 if ( shortest == null || shortest.length() == 0 ) return null;
66
67 String test = shortest;
68 while ( test.length() > 0 ) {
69 boolean found = true;
70 for ( String string : strings ) {
71 if ( !string.startsWith( test ) ) {
72 found = false;
73 break;
74 }
75 }
76 if ( found ) return test;
77 test = test.substring( 0, test.length() - 1 );
78 }
79 return null;
80 }
81
82
83
84
85
86
87
88 public static String commonSuffix( Collection<String> strings ) {
89 String shortest = shortestString( strings );
90
91 if ( shortest == null || shortest.length() == 0 ) return null;
92
93 String test = shortest;
94 while ( test.length() > 0 ) {
95 boolean found = true;
96 for ( String string : strings ) {
97 if ( !string.endsWith( test ) ) {
98 found = false;
99 break;
100 }
101 }
102 if ( found ) return test;
103 test = test.substring( 1 );
104 }
105 return null;
106 }
107
108
109
110
111
112
113
114
115 public static boolean containsValidCharacter( String s ) {
116
117 if ( s != null ) {
118
119 for ( int i = 0; i < s.length(); i++ ) {
120
121 Character cha = s.charAt( i );
122
123 if ( !( isLatinLetter( cha ) || Character.isDigit( cha ) || cha == '=' || cha == ',' || cha == '('
124 || cha == ')' || cha == '\'' || Character.isWhitespace( cha ) || cha == '/' || cha == '?'
125 || cha == '+' || cha == ':' || cha == '-' || cha == '<' || cha == '>' || cha == '"'
126 || cha == '%' || cha == '.' || cha == '*' || cha == '[' || cha == ']' || cha == ';'
127 || cha == '_' || cha == '\\' || cha == '|' || cha == '&' || cha == '^' || cha == '#'
128 || cha == '{' || cha == '}' || cha == '!' || cha == '~' || cha == '@' || cha == '—'
129 || cha == '×' || cha == '–' || cha == ' ' ) ) {
130
131
132
133 log.warn( "Illegal character found: " + cha + " found on description: " + s );
134
135 return false;
136 }
137 }
138 }
139 return true;
140 }
141
142
143
144
145
146 public static String[] csvSplit( String line ) {
147 try ( CSVParser parser = CSVParser.parse( new StringReader( line ), CSVFormat.DEFAULT ) ) {
148 for ( CSVRecord record : parser ) {
149 return record.values();
150 }
151 throw new IllegalArgumentException( "No CSV records found in line." );
152 } catch ( IOException e ) {
153 throw new RuntimeException( e );
154 }
155 }
156
157
158
159
160
161
162
163 public static String cvs2tsv( String line ) {
164
165 StringBuffer newLine = new StringBuffer( line );
166
167 boolean change = true;
168
169 for ( int position = 0; position < newLine.length(); position++ ) {
170
171 if ( newLine.charAt( position ) == ',' && change ) {
172 newLine.setCharAt( position, '\t' );
173 } else if ( newLine.charAt( position ) == '"' ) {
174
175 if ( change ) {
176 change = false;
177 } else {
178 change = true;
179 }
180 }
181 }
182 return newLine.toString().replaceAll( "\"", "" );
183 }
184
185 public static boolean isLatinLetter( char c ) {
186 return ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' );
187 }
188
189
190
191
192
193
194
195
196
197
198
199
200 public static String makeValidForR( String s ) {
201 return makeNames( s );
202 }
203
204
205
206
207
208
209 @Deprecated
210 public static String[] makeValidForR( String[] strings ) {
211 return makeNames( strings, true );
212 }
213
214
215
216
217
218
219
220
221 public static String[] makeNames( String[] strings, boolean unique ) {
222 String[] result = new String[strings.length];
223 if ( unique ) {
224 Map<String, Integer> counts = new HashMap<>();
225 for ( int i = 0; i < strings.length; i++ ) {
226 String s = strings[i];
227 String rs = makeNames( s );
228 if ( counts.containsKey( rs ) ) {
229 int count = counts.get( rs );
230 result[i] = rs + "." + count;
231 counts.put( rs, count + 1 );
232 } else {
233 result[i] = rs;
234 counts.put( rs, 1 );
235 }
236 }
237 } else {
238 for ( int i = 0; i < strings.length; i++ ) {
239 result[i] = makeNames( strings[i] );
240 }
241 }
242 return result;
243 }
244
245 private static final String[] R_RESERVED_WORDS = {
246 "if", "else", "repeat", "while", "function", "for", "in", "next", "break",
247 "TRUE", "FALSE", "NULL", "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_character_", "NA_complex_",
248 };
249
250
251
252
253
254 public static String makeNames( String s ) {
255 if ( s == null ) {
256 return "NA";
257 }
258 if ( s.isEmpty()
259
260 || ( !Character.isAlphabetic( s.charAt( 0 ) ) && s.charAt( 0 ) != '.' )
261
262 || ( s.charAt( 0 ) == '.' && s.length() > 1 && Character.isDigit( s.charAt( 1 ) ) ) ) {
263 return "X" + s.replaceAll( "[^A-Za-z0-9._]", "." );
264 }
265 if ( StringUtils.equalsAny( s, R_RESERVED_WORDS ) ) {
266 return s + ".";
267 }
268 return s.replaceAll( "[^A-Za-z0-9._]", "." );
269 }
270
271
272
273
274
275
276
277 public static String[] makeUnique( String[] strings ) {
278 Map<String, Integer> counts = new HashMap<>();
279 String[] result = new String[strings.length];
280 for ( int i = 0; i < strings.length; i++ ) {
281 String cn = strings[i];
282 if ( counts.containsKey( cn ) ) {
283 int count = counts.get( cn );
284 result[i] = cn + "." + count;
285 counts.put( cn, count + 1 );
286 } else {
287 result[i] = cn;
288 counts.put( cn, 1 );
289 }
290 }
291 return result;
292
293 }
294
295
296
297
298
299
300 public static Long twoStringHashKey( String stringi, String stringj ) {
301
302 if ( stringi.hashCode() < stringj.hashCode() ) {
303 return new Long( stringi.hashCode() | ( long ) stringj.hashCode() << 32 );
304 }
305 return new Long( stringj.hashCode() | ( long ) stringi.hashCode() << 32 );
306 }
307
308 private static String shortestString( Collection<String> strings ) {
309 String shortest = null;
310 for ( String string : strings ) {
311 if ( shortest == null || string.length() < shortest.length() ) shortest = string;
312 }
313 return shortest;
314 }
315
316 }