View Javadoc
1   /*
2    * The baseCode project
3    * 
4    * Copyright (c) 2010 University of British Columbia
5    * 
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.ontology;
20  
21  import java.io.BufferedReader;
22  import java.io.File;
23  import java.io.FileReader;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.InputStreamReader;
27  import java.io.Reader;
28  import java.net.HttpURLConnection;
29  import java.net.URL;
30  import java.net.URLConnection;
31  import java.nio.file.Files;
32  import java.nio.file.StandardCopyOption;
33  import java.util.Collection;
34  import java.util.HashSet;
35  
36  import org.apache.commons.io.FileUtils;
37  import org.apache.commons.lang3.StringUtils;
38  import org.apache.commons.lang3.time.StopWatch;
39  import org.slf4j.Logger;
40  import org.slf4j.LoggerFactory;
41  
42  import com.hp.hpl.jena.ontology.Individual;
43  import com.hp.hpl.jena.ontology.OntClass;
44  import com.hp.hpl.jena.ontology.OntModel;
45  import com.hp.hpl.jena.ontology.OntModelSpec;
46  import com.hp.hpl.jena.rdf.model.Model;
47  import com.hp.hpl.jena.rdf.model.ModelFactory;
48  import com.hp.hpl.jena.rdf.model.ModelMaker;
49  import com.hp.hpl.jena.util.iterator.ExtendedIterator;
50  
51  import ubic.basecode.ontology.model.OntologyIndividual;
52  import ubic.basecode.ontology.model.OntologyIndividualImpl;
53  import ubic.basecode.ontology.model.OntologyProperty;
54  import ubic.basecode.ontology.model.OntologyResource;
55  import ubic.basecode.ontology.model.OntologyTerm;
56  import ubic.basecode.ontology.model.OntologyTermImpl;
57  import ubic.basecode.ontology.model.PropertyFactory;
58  import ubic.basecode.util.Configuration;
59  
60  /**
61   * Reads ontologies from OWL resources
62   * 
63   * @author  paul
64   */
65  public class OntologyLoader {
66  
67      private static Logger log = LoggerFactory.getLogger( OntologyLoader.class );
68      private static final int MAX_CONNECTION_TRIES = 3;
69      private static final String OLD_CACHE_SUFFIX = ".old";
70      private static final String TMP_CACHE_SUFFIX = ".tmp";
71  
72      /**
73       * @param  url
74       * @param  model
75       * @return
76       */
77      public static Collection<OntologyResource> initialize( String url, OntModel model ) {
78  
79          Collection<OntologyResource> result = new HashSet<>();
80  
81          ExtendedIterator<OntClass> classIt = model.listClasses();
82          int count = 0;
83          log.debug( "Reading classes for ontology: " + url );
84          while ( classIt.hasNext() ) {
85              OntClass element = classIt.next();
86              if ( element.isAnon() ) continue;
87              OntologyTerm ontologyTerm = new OntologyTermImpl( element );
88              result.add( ontologyTerm );
89              if ( ++count % 1000 == 0 ) {
90                  log.debug( "Loaded " + count + " terms, last was " + ontologyTerm );
91              }
92          }
93  
94          log.debug( "Loaded " + count + " terms" );
95  
96          ExtendedIterator<com.hp.hpl.jena.ontology.ObjectProperty> propIt = model.listObjectProperties();
97          count = 0;
98          log.debug( "Reading object properties..." );
99          while ( propIt.hasNext() ) {
100             com.hp.hpl.jena.ontology.ObjectProperty element = propIt.next();
101             OntologyProperty ontologyTerm = PropertyFactory.asProperty( element );
102             if ( ontologyTerm == null ) continue; // couldn't be converted for some reason.
103             result.add( ontologyTerm );
104             if ( ++count % 1000 == 0 ) {
105                 log.debug( "Loaded " + count + " object properties, last was " + ontologyTerm );
106             }
107         }
108 
109         ExtendedIterator<com.hp.hpl.jena.ontology.DatatypeProperty> dtPropIt = model.listDatatypeProperties();
110         log.debug( "Reading datatype properties..." );
111         while ( dtPropIt.hasNext() ) {
112             com.hp.hpl.jena.ontology.DatatypeProperty element = dtPropIt.next();
113             OntologyProperty ontologyTerm = PropertyFactory.asProperty( element );
114             if ( ontologyTerm == null ) continue; // couldn't be converted for some reason.
115             result.add( ontologyTerm );
116             if ( ++count % 1000 == 0 ) {
117                 log.debug( "Loaded " + count + " datatype properties, last was " + ontologyTerm );
118             }
119         }
120 
121         log.debug( "Loaded " + count + " properties" );
122 
123         ExtendedIterator<Individual> indiIt = model.listIndividuals();
124         count = 0;
125         log.debug( "Reading individuals..." );
126         while ( indiIt.hasNext() ) {
127             Individual element = indiIt.next();
128             if ( element.isAnon() ) continue;
129             OntologyIndividual ontologyTerm = new OntologyIndividualImpl( element );
130             result.add( ontologyTerm );
131             if ( ++count % 1000 == 0 ) {
132                 log.debug( "Loaded " + count + " individuals, last was " + ontologyTerm );
133             }
134         }
135         log.debug( "Loaded " + count + " individuals" );
136         return result;
137     }
138 
139     /**
140      * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
141      * 
142      * @param  is
143      * @param       url, used as a key
144      * @param  spec
145      * @return
146      */
147     public static OntModel loadMemoryModel( InputStream is, String url, OntModelSpec spec ) {
148         OntModel model = getMemoryModel( url, spec );
149         model.read( is, null );
150         return model;
151     }
152 
153     /**
154      * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. Uses
155      * OWL_MEM_TRANS_INF
156      * 
157      * @param  url
158      * @return
159      */
160     public static OntModel loadMemoryModel( String url ) {
161         return loadMemoryModel( url, OntModelSpec.OWL_MEM_TRANS_INF );
162     }
163 
164     /**
165      * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. Uses
166      * OWL_MEM_TRANS_INF
167      * If load from URL fails, attempt to load from disk cache under @cacheName.
168      * 
169      * @param  url
170      * @return
171      */
172     public static OntModel loadMemoryModel( String url, String cacheName ) {
173         return loadMemoryModel( url, OntModelSpec.OWL_MEM_TRANS_INF, cacheName );
174     }
175 
176     /**
177      * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
178      * 
179      * @param  url
180      * @return
181      */
182     public static OntModel loadMemoryModel( String url, OntModelSpec spec ) {
183         return loadMemoryModel( url, spec, null );
184     }
185 
186     /**
187      * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
188      * If load from URL fails, attempt to load from disk cache under @cacheName.
189      * 
190      * @param  url
191      * @param  spec      e.g. OWL_MEM_TRANS_INF
192      * @param  cacheName unique name of this ontology, will be used to load from disk in case of failed url connection
193      * @return
194      */
195     public static OntModel loadMemoryModel( String url, OntModelSpec spec, String cacheName ) {
196         StopWatch timer = new StopWatch();
197         timer.start();
198         OntModel model = getMemoryModel( url, spec );
199 
200         URLConnection urlc = null;
201         int tries = 0;
202         while ( tries < MAX_CONNECTION_TRIES ) {
203             try {
204                 urlc = new URL( url ).openConnection();
205                 // help ensure mis-configured web servers aren't causing trouble.
206                 urlc.setRequestProperty( "Accept", "application/rdf+xml" );
207 
208                 try {
209                     HttpURLConnection c = ( HttpURLConnection ) urlc;
210                     c.setInstanceFollowRedirects( true );
211                 } catch ( ClassCastException e ) {
212                     // not via http, using a FileURLConnection.
213                 }
214 
215                 if ( tries > 0 ) {
216                     log.info( "Retrying connecting to " + url + " [" + tries + "/" + MAX_CONNECTION_TRIES
217                             + " of max tries" );
218                 } else {
219                     log.info( "Connecting to " + url );
220                 }
221 
222                 urlc.connect(); // Will error here on bad URL
223 
224                 if ( urlc instanceof HttpURLConnection ) {
225                     String newUrl = urlc.getHeaderField( "Location" );
226 
227                     if ( StringUtils.isNotBlank( newUrl ) ) {
228                         log.info( "Redirect to " + newUrl );
229                         urlc = new URL( newUrl ).openConnection();
230                         // help ensure mis-configured web servers aren't causing trouble.
231                         urlc.setRequestProperty( "Accept", "application/rdf+xml" );
232                         urlc.connect();
233                     }
234                 }
235 
236                 break;
237             } catch ( IOException e ) {
238                 // try to recover.
239                 log.error( e + " retrying?" );
240                 tries++;
241             }
242         }
243 
244         if ( urlc != null ) {
245             try (InputStream in = urlc.getInputStream();) {
246                 Reader reader;
247                 if ( cacheName != null ) {
248                     // write tmp to disk
249                     File tempFile = getTmpDiskCachePath( cacheName );
250                     if ( tempFile == null ) {
251                         reader = new InputStreamReader( in );
252                     } else {
253                         tempFile.getParentFile().mkdirs();
254                         Files.copy( in, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
255                         reader = new FileReader( tempFile );
256                     }
257 
258                 } else {
259                     // Skip the cache
260                     reader = new InputStreamReader( in );
261                 }
262 
263                 assert reader != null;
264                 try (BufferedReader buf = new BufferedReader( reader );) {
265                     model.read( buf, url );
266                 }
267 
268                 log.info( "Load model: " + timer.getTime() + "ms" );
269             } catch ( IOException e ) {
270                 log.error( e.getMessage(), e );
271             }
272         }
273 
274         if ( cacheName != null ) {
275 
276             File f = getDiskCachePath( cacheName );
277             File tempFile = getTmpDiskCachePath( cacheName );
278             File oldFile = getOldDiskCachePath( cacheName );
279 
280             if ( model.isEmpty() ) {
281                 // Attempt to load from disk cache
282 
283                 if ( f == null ) {
284                     throw new RuntimeException(
285                             "Ontology cache directory required to load from disk: ontology.cache.dir" );
286                 }
287 
288                 if ( f.exists() && !f.isDirectory() ) {
289                     try (BufferedReader buf = new BufferedReader( new FileReader( f ) );) {
290                         model.read( buf, url );
291                         // We successfully loaded the cached ontology. Copy the loaded ontology to oldFile
292                         // so that we don't recreate indices during initialization based on a false change in
293                         // the ontology.
294                         Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
295                         log.info( "Load model from disk: " + timer.getTime() + "ms" );
296                     } catch ( IOException e ) {
297                         log.error( e.getMessage(), e );
298                         throw new RuntimeException(
299                                 "Ontology failed load from URL (" + url + ") and disk cache: " + cacheName );
300                     }
301                 } else {
302                     throw new RuntimeException(
303                             "Ontology failed load from URL (" + url + ") and disk cache does not exist: " + cacheName );
304                 }
305 
306             } else {
307                 // Model was successfully loaded into memory from URL with given cacheName
308                 // Save cache to disk (rename temp file)
309                 log.info( "Caching ontology to disk: " + cacheName );
310                 if ( f != null ) {
311                     try {
312                         // Need to compare previous to current so instead of overwriting we'll move the old file
313                         f.createNewFile();
314                         Files.move( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
315                         Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING );
316                     } catch ( IOException e ) {
317                         log.error( e.getMessage(), e );
318                     }
319                 } else {
320                     log.warn( "Ontology cache directory required to save to disk: ontology.cache.dir" );
321                 }
322             }
323 
324         }
325 
326         assert !model.isEmpty();
327 
328         return model;
329     }
330 
331     public static boolean hasChanged( String cacheName ) {
332         boolean changed = false; // default
333         if ( StringUtils.isBlank( cacheName ) ) {
334             return changed;
335         }
336 
337         File newFile = getDiskCachePath( cacheName );
338         File oldFile = getOldDiskCachePath( cacheName );
339 
340         try {
341             // This might be slow considering it calls IOUtils.contentsEquals which compares byte-by-byte
342             // in the worst case scenario.
343             // In this case consider using NIO for higher-performance IO using Channels and Buffers.
344             // Ex. Use a 4MB Memory-Mapped IO operation.
345             if ( newFile != null && oldFile != null )
346                 changed = !FileUtils.contentEquals( newFile, oldFile );
347         } catch ( IOException e ) {
348             log.error( e.getMessage() );
349         }
350 
351         return changed;
352 
353     }
354 
355     public static boolean deleteOldCache( String cacheName ) {
356         File f = getOldDiskCachePath( cacheName );
357         if ( f != null )
358             return f.delete();
359         return false;
360     }
361 
362     /**
363      * Get model that is entirely in memory with default OntModelSpec.OWL_MEM_RDFS_INF.
364      * 
365      * @param  url
366      * @return
367      */
368     static OntModel getMemoryModel( String url ) {
369         return getMemoryModel( url, OntModelSpec.OWL_MEM_RDFS_INF );
370     }
371 
372     /**
373      * Get model that is entirely in memory.
374      * 
375      * @param  url
376      * @param  specification
377      * @return
378      */
379     static OntModel getMemoryModel( String url, OntModelSpec specification ) {
380         OntModelSpec spec = new OntModelSpec( specification );
381         ModelMaker maker = ModelFactory.createMemModelMaker();
382         Model base = maker.createModel( url, false );
383         spec.setImportModelMaker( maker );
384         spec.getDocumentManager().setProcessImports( false );
385 
386         OntModel model = ModelFactory.createOntologyModel( spec, base );
387         model.setStrictMode( false ); // fix for owl2 files
388         return model;
389     }
390 
391     /**
392      * @param  name
393      * @return
394      */
395     public static File getDiskCachePath( String name ) {
396         String ontologyDir = Configuration.getString( "ontology.cache.dir" ); // e.g., /something/gemmaData/ontologyCache
397         if ( StringUtils.isBlank( ontologyDir ) || StringUtils.isBlank( name ) ) {
398             return null;
399         }
400 
401         if ( !new File( ontologyDir ).exists() ) {
402             new File( ontologyDir ).mkdirs();
403         }
404 
405         assert ontologyDir != null;
406 
407         String path = ontologyDir + File.separator + "ontology" + File.separator + name;
408 
409         File indexFile = new File( path );
410 
411         return indexFile;
412     }
413 
414     static File getOldDiskCachePath( String name ) {
415         File indexFile = getDiskCachePath( name );
416         if ( indexFile == null ) {
417             return null;
418         }
419         return new File( indexFile.getAbsolutePath() + OLD_CACHE_SUFFIX );
420 
421     }
422 
423     static File getTmpDiskCachePath( String name ) {
424         File indexFile = getDiskCachePath( name );
425         if ( indexFile == null ) {
426             return null;
427         }
428         return new File( indexFile.getAbsolutePath() + TMP_CACHE_SUFFIX );
429 
430     }
431 
432 }