1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.ontology;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.InputStreamReader;
27 import java.io.Reader;
28 import java.net.HttpURLConnection;
29 import java.net.URL;
30 import java.net.URLConnection;
31 import java.nio.file.Files;
32 import java.nio.file.StandardCopyOption;
33 import java.util.Collection;
34 import java.util.HashSet;
35
36 import org.apache.commons.io.FileUtils;
37 import org.apache.commons.lang3.StringUtils;
38 import org.apache.commons.lang3.time.StopWatch;
39 import org.slf4j.Logger;
40 import org.slf4j.LoggerFactory;
41
42 import com.hp.hpl.jena.ontology.Individual;
43 import com.hp.hpl.jena.ontology.OntClass;
44 import com.hp.hpl.jena.ontology.OntModel;
45 import com.hp.hpl.jena.ontology.OntModelSpec;
46 import com.hp.hpl.jena.rdf.model.Model;
47 import com.hp.hpl.jena.rdf.model.ModelFactory;
48 import com.hp.hpl.jena.rdf.model.ModelMaker;
49 import com.hp.hpl.jena.util.iterator.ExtendedIterator;
50
51 import ubic.basecode.ontology.model.OntologyIndividual;
52 import ubic.basecode.ontology.model.OntologyIndividualImpl;
53 import ubic.basecode.ontology.model.OntologyProperty;
54 import ubic.basecode.ontology.model.OntologyResource;
55 import ubic.basecode.ontology.model.OntologyTerm;
56 import ubic.basecode.ontology.model.OntologyTermImpl;
57 import ubic.basecode.ontology.model.PropertyFactory;
58 import ubic.basecode.util.Configuration;
59
60
61
62
63
64
65 public class OntologyLoader {
66
67 private static Logger log = LoggerFactory.getLogger( OntologyLoader.class );
68 private static final int MAX_CONNECTION_TRIES = 3;
69 private static final String OLD_CACHE_SUFFIX = ".old";
70 private static final String TMP_CACHE_SUFFIX = ".tmp";
71
72
73
74
75
76
77 public static Collection<OntologyResource> initialize( String url, OntModel model ) {
78
79 Collection<OntologyResource> result = new HashSet<>();
80
81 ExtendedIterator<OntClass> classIt = model.listClasses();
82 int count = 0;
83 log.debug( "Reading classes for ontology: " + url );
84 while ( classIt.hasNext() ) {
85 OntClass element = classIt.next();
86 if ( element.isAnon() ) continue;
87 OntologyTerm ontologyTerm = new OntologyTermImpl( element );
88 result.add( ontologyTerm );
89 if ( ++count % 1000 == 0 ) {
90 log.debug( "Loaded " + count + " terms, last was " + ontologyTerm );
91 }
92 }
93
94 log.debug( "Loaded " + count + " terms" );
95
96 ExtendedIterator<com.hp.hpl.jena.ontology.ObjectProperty> propIt = model.listObjectProperties();
97 count = 0;
98 log.debug( "Reading object properties..." );
99 while ( propIt.hasNext() ) {
100 com.hp.hpl.jena.ontology.ObjectProperty element = propIt.next();
101 OntologyProperty ontologyTerm = PropertyFactory.asProperty( element );
102 if ( ontologyTerm == null ) continue;
103 result.add( ontologyTerm );
104 if ( ++count % 1000 == 0 ) {
105 log.debug( "Loaded " + count + " object properties, last was " + ontologyTerm );
106 }
107 }
108
109 ExtendedIterator<com.hp.hpl.jena.ontology.DatatypeProperty> dtPropIt = model.listDatatypeProperties();
110 log.debug( "Reading datatype properties..." );
111 while ( dtPropIt.hasNext() ) {
112 com.hp.hpl.jena.ontology.DatatypeProperty element = dtPropIt.next();
113 OntologyProperty ontologyTerm = PropertyFactory.asProperty( element );
114 if ( ontologyTerm == null ) continue;
115 result.add( ontologyTerm );
116 if ( ++count % 1000 == 0 ) {
117 log.debug( "Loaded " + count + " datatype properties, last was " + ontologyTerm );
118 }
119 }
120
121 log.debug( "Loaded " + count + " properties" );
122
123 ExtendedIterator<Individual> indiIt = model.listIndividuals();
124 count = 0;
125 log.debug( "Reading individuals..." );
126 while ( indiIt.hasNext() ) {
127 Individual element = indiIt.next();
128 if ( element.isAnon() ) continue;
129 OntologyIndividual ontologyTerm = new OntologyIndividualImpl( element );
130 result.add( ontologyTerm );
131 if ( ++count % 1000 == 0 ) {
132 log.debug( "Loaded " + count + " individuals, last was " + ontologyTerm );
133 }
134 }
135 log.debug( "Loaded " + count + " individuals" );
136 return result;
137 }
138
139
140
141
142
143
144
145
146
147 public static OntModel loadMemoryModel( InputStream is, String url, OntModelSpec spec ) {
148 OntModel model = getMemoryModel( url, spec );
149 model.read( is, null );
150 return model;
151 }
152
153
154
155
156
157
158
159
160 public static OntModel loadMemoryModel( String url ) {
161 return loadMemoryModel( url, OntModelSpec.OWL_MEM_TRANS_INF );
162 }
163
164
165
166
167
168
169
170
171
172 public static OntModel loadMemoryModel( String url, String cacheName ) {
173 return loadMemoryModel( url, OntModelSpec.OWL_MEM_TRANS_INF, cacheName );
174 }
175
176
177
178
179
180
181
182 public static OntModel loadMemoryModel( String url, OntModelSpec spec ) {
183 return loadMemoryModel( url, spec, null );
184 }
185
186
187
188
189
190
191
192
193
194
195 public static OntModel loadMemoryModel( String url, OntModelSpec spec, String cacheName ) {
196 StopWatch timer = new StopWatch();
197 timer.start();
198 OntModel model = getMemoryModel( url, spec );
199
200 URLConnection urlc = null;
201 int tries = 0;
202 while ( tries < MAX_CONNECTION_TRIES ) {
203 try {
204 urlc = new URL( url ).openConnection();
205
206 urlc.setRequestProperty( "Accept", "application/rdf+xml" );
207
208 try {
209 HttpURLConnection c = ( HttpURLConnection ) urlc;
210 c.setInstanceFollowRedirects( true );
211 } catch ( ClassCastException e ) {
212
213 }
214
215 if ( tries > 0 ) {
216 log.info( "Retrying connecting to " + url + " [" + tries + "/" + MAX_CONNECTION_TRIES
217 + " of max tries" );
218 } else {
219 log.info( "Connecting to " + url );
220 }
221
222 urlc.connect();
223
224 if ( urlc instanceof HttpURLConnection ) {
225 String newUrl = urlc.getHeaderField( "Location" );
226
227 if ( StringUtils.isNotBlank( newUrl ) ) {
228 log.info( "Redirect to " + newUrl );
229 urlc = new URL( newUrl ).openConnection();
230
231 urlc.setRequestProperty( "Accept", "application/rdf+xml" );
232 urlc.connect();
233 }
234 }
235
236 break;
237 } catch ( IOException e ) {
238
239 log.error( e + " retrying?" );
240 tries++;
241 }
242 }
243
244 if ( urlc != null ) {
245 try (InputStream in = urlc.getInputStream();) {
246 Reader reader;
247 if ( cacheName != null ) {
248
249 File tempFile = getTmpDiskCachePath( cacheName );
250 if ( tempFile == null ) {
251 reader = new InputStreamReader( in );
252 } else {
253 tempFile.getParentFile().mkdirs();
254 Files.copy( in, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
255 reader = new FileReader( tempFile );
256 }
257
258 } else {
259
260 reader = new InputStreamReader( in );
261 }
262
263 assert reader != null;
264 try (BufferedReader buf = new BufferedReader( reader );) {
265 model.read( buf, url );
266 }
267
268 log.info( "Load model: " + timer.getTime() + "ms" );
269 } catch ( IOException e ) {
270 log.error( e.getMessage(), e );
271 }
272 }
273
274 if ( cacheName != null ) {
275
276 File f = getDiskCachePath( cacheName );
277 File tempFile = getTmpDiskCachePath( cacheName );
278 File oldFile = getOldDiskCachePath( cacheName );
279
280 if ( model.isEmpty() ) {
281
282
283 if ( f == null ) {
284 throw new RuntimeException(
285 "Ontology cache directory required to load from disk: ontology.cache.dir" );
286 }
287
288 if ( f.exists() && !f.isDirectory() ) {
289 try (BufferedReader buf = new BufferedReader( new FileReader( f ) );) {
290 model.read( buf, url );
291
292
293
294 Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
295 log.info( "Load model from disk: " + timer.getTime() + "ms" );
296 } catch ( IOException e ) {
297 log.error( e.getMessage(), e );
298 throw new RuntimeException(
299 "Ontology failed load from URL (" + url + ") and disk cache: " + cacheName );
300 }
301 } else {
302 throw new RuntimeException(
303 "Ontology failed load from URL (" + url + ") and disk cache does not exist: " + cacheName );
304 }
305
306 } else {
307
308
309 log.info( "Caching ontology to disk: " + cacheName );
310 if ( f != null ) {
311 try {
312
313 f.createNewFile();
314 Files.move( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
315 Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING );
316 } catch ( IOException e ) {
317 log.error( e.getMessage(), e );
318 }
319 } else {
320 log.warn( "Ontology cache directory required to save to disk: ontology.cache.dir" );
321 }
322 }
323
324 }
325
326 assert !model.isEmpty();
327
328 return model;
329 }
330
331 public static boolean hasChanged( String cacheName ) {
332 boolean changed = false;
333 if ( StringUtils.isBlank( cacheName ) ) {
334 return changed;
335 }
336
337 File newFile = getDiskCachePath( cacheName );
338 File oldFile = getOldDiskCachePath( cacheName );
339
340 try {
341
342
343
344
345 if ( newFile != null && oldFile != null )
346 changed = !FileUtils.contentEquals( newFile, oldFile );
347 } catch ( IOException e ) {
348 log.error( e.getMessage() );
349 }
350
351 return changed;
352
353 }
354
355 public static boolean deleteOldCache( String cacheName ) {
356 File f = getOldDiskCachePath( cacheName );
357 if ( f != null )
358 return f.delete();
359 return false;
360 }
361
362
363
364
365
366
367
368 static OntModel getMemoryModel( String url ) {
369 return getMemoryModel( url, OntModelSpec.OWL_MEM_RDFS_INF );
370 }
371
372
373
374
375
376
377
378
379 static OntModel getMemoryModel( String url, OntModelSpec specification ) {
380 OntModelSpec spec = new OntModelSpec( specification );
381 ModelMaker maker = ModelFactory.createMemModelMaker();
382 Model base = maker.createModel( url, false );
383 spec.setImportModelMaker( maker );
384 spec.getDocumentManager().setProcessImports( false );
385
386 OntModel model = ModelFactory.createOntologyModel( spec, base );
387 model.setStrictMode( false );
388 return model;
389 }
390
391
392
393
394
395 public static File getDiskCachePath( String name ) {
396 String ontologyDir = Configuration.getString( "ontology.cache.dir" );
397 if ( StringUtils.isBlank( ontologyDir ) || StringUtils.isBlank( name ) ) {
398 return null;
399 }
400
401 if ( !new File( ontologyDir ).exists() ) {
402 new File( ontologyDir ).mkdirs();
403 }
404
405 assert ontologyDir != null;
406
407 String path = ontologyDir + File.separator + "ontology" + File.separator + name;
408
409 File indexFile = new File( path );
410
411 return indexFile;
412 }
413
414 static File getOldDiskCachePath( String name ) {
415 File indexFile = getDiskCachePath( name );
416 if ( indexFile == null ) {
417 return null;
418 }
419 return new File( indexFile.getAbsolutePath() + OLD_CACHE_SUFFIX );
420
421 }
422
423 static File getTmpDiskCachePath( String name ) {
424 File indexFile = getDiskCachePath( name );
425 if ( indexFile == null ) {
426 return null;
427 }
428 return new File( indexFile.getAbsolutePath() + TMP_CACHE_SUFFIX );
429
430 }
431
432 }