1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.util;
20
21 import org.apache.commons.csv.CSVFormat;
22 import org.apache.commons.csv.CSVParser;
23 import org.apache.commons.csv.CSVRecord;
24 import org.apache.commons.lang3.StringUtils;
25 import org.slf4j.Logger;
26 import org.slf4j.LoggerFactory;
27
28 import java.io.IOException;
29 import java.io.StringReader;
30 import java.util.Collection;
31
32
33
34
35 public class StringUtil {
36
37 private static final Logger log = LoggerFactory.getLogger( StringUtil.class );
38
39
40
41
42
43
44
45 public static String append( String appendee, String appendant, String separator ) {
46 if ( StringUtils.isBlank( appendee ) ) {
47 return appendant;
48 }
49 return appendee + separator + appendant;
50
51 }
52
53
54
55
56
57
58
59 public static String commonPrefix( Collection<String> strings ) {
60
61 String shortest = shortestString( strings );
62
63 if ( shortest == null || shortest.length() == 0 ) return null;
64
65 String test = shortest;
66 while ( test.length() > 0 ) {
67 boolean found = true;
68 for ( String string : strings ) {
69 if ( !string.startsWith( test ) ) {
70 found = false;
71 break;
72 }
73 }
74 if ( found ) return test;
75 test = test.substring( 0, test.length() - 1 );
76 }
77 return null;
78 }
79
80
81
82
83
84
85
86 public static String commonSuffix( Collection<String> strings ) {
87 String shortest = shortestString( strings );
88
89 if ( shortest == null || shortest.length() == 0 ) return null;
90
91 String test = shortest;
92 while ( test.length() > 0 ) {
93 boolean found = true;
94 for ( String string : strings ) {
95 if ( !string.endsWith( test ) ) {
96 found = false;
97 break;
98 }
99 }
100 if ( found ) return test;
101 test = test.substring( 1 );
102 }
103 return null;
104 }
105
106
107
108
109
110
111
112
113 public static boolean containsValidCharacter( String s ) {
114
115 if ( s != null ) {
116
117 for ( int i = 0; i < s.length(); i++ ) {
118
119 Character cha = s.charAt( i );
120
121 if ( !( isLatinLetter( cha ) || Character.isDigit( cha ) || cha == '=' || cha == ',' || cha == '('
122 || cha == ')' || cha == '\'' || Character.isWhitespace( cha ) || cha == '/' || cha == '?'
123 || cha == '+' || cha == ':' || cha == '-' || cha == '<' || cha == '>' || cha == '"'
124 || cha == '%' || cha == '.' || cha == '*' || cha == '[' || cha == ']' || cha == ';'
125 || cha == '_' || cha == '\\' || cha == '|' || cha == '&' || cha == '^' || cha == '#'
126 || cha == '{' || cha == '}' || cha == '!' || cha == '~' || cha == '@' || cha == '—'
127 || cha == '×' || cha == '–' || cha == ' ' ) ) {
128
129
130
131 log.warn( "Illegal character found: " + cha + " found on description: " + s );
132
133 return false;
134 }
135 }
136 }
137 return true;
138 }
139
140
141
142
143
144 public static String[] csvSplit( String line ) {
145 try ( CSVParser parser = CSVParser.parse( new StringReader( line ), CSVFormat.DEFAULT ) ) {
146 for ( CSVRecord record : parser ) {
147 return record.values();
148 }
149 throw new IllegalArgumentException( "No CSV records found in line." );
150 } catch ( IOException e ) {
151 throw new RuntimeException( e );
152 }
153 }
154
155
156
157
158
159
160
161 public static String cvs2tsv( String line ) {
162
163 StringBuffer newLine = new StringBuffer( line );
164
165 boolean change = true;
166
167 for ( int position = 0; position < newLine.length(); position++ ) {
168
169 if ( newLine.charAt( position ) == ',' && change ) {
170 newLine.setCharAt( position, '\t' );
171 } else if ( newLine.charAt( position ) == '"' ) {
172
173 if ( change ) {
174 change = false;
175 } else {
176 change = true;
177 }
178 }
179 }
180 return newLine.toString().replaceAll( "\"", "" );
181 }
182
183 public static boolean isLatinLetter( char c ) {
184 return ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' );
185 }
186
187
188
189
190
191
192
193
194
195 public static String makeValidForR( String s ) {
196
197
198 if ( s.matches( "^\\.?[0-9].+" ) ) {
199 s = "X" + s;
200 }
201
202
203
204
205 return s.replaceAll( "[\\W]+", "." );
206 }
207
208
209
210
211
212
213 public static Long twoStringHashKey( String stringi, String stringj ) {
214
215 if ( stringi.hashCode() < stringj.hashCode() ) {
216 return new Long( stringi.hashCode() | ( long ) stringj.hashCode() << 32 );
217 }
218 return new Long( stringj.hashCode() | ( long ) stringi.hashCode() << 32 );
219 }
220
221 private static String shortestString( Collection<String> strings ) {
222 String shortest = null;
223 for ( String string : strings ) {
224 if ( shortest == null || string.length() < shortest.length() ) shortest = string;
225 }
226 return shortest;
227 }
228
229 }