View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2010 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.ontology.jena;
20  
21  import com.hp.hpl.jena.ontology.OntDocumentManager;
22  import com.hp.hpl.jena.ontology.OntModel;
23  import com.hp.hpl.jena.ontology.OntModelSpec;
24  import com.hp.hpl.jena.query.Dataset;
25  import com.hp.hpl.jena.rdf.model.*;
26  import com.hp.hpl.jena.shared.CannotCreateException;
27  import com.hp.hpl.jena.shared.JenaException;
28  import com.hp.hpl.jena.tdb.TDBFactory;
29  import org.apache.commons.io.FileUtils;
30  import org.apache.commons.lang3.StringUtils;
31  import org.apache.commons.lang3.time.StopWatch;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  import ubic.basecode.util.Configuration;
35  
36  import javax.annotation.Nullable;
37  import java.io.*;
38  import java.net.HttpURLConnection;
39  import java.net.URL;
40  import java.net.URLConnection;
41  import java.nio.channels.ClosedByInterruptException;
42  import java.nio.file.Files;
43  import java.nio.file.Paths;
44  import java.nio.file.StandardCopyOption;
45  
46  /**
47   * Reads ontologies from OWL resources
48   *
49   * @author paul
50   */
51  class OntologyLoader {
52  
53      private static final Logger log = LoggerFactory.getLogger( OntologyLoader.class );
54  
55      private static final String OLD_CACHE_SUFFIX = ".old";
56      private static final String TMP_CACHE_SUFFIX = ".tmp";
57  
58      /**
59       * Load an ontology model into memory from a stream.
60       * <p>
61       * Uses {@link OntModelSpec#OWL_MEM_TRANS_INF}.
62       */
63      static OntModel createMemoryModel( InputStream is, String name, boolean processImports, OntModelSpec spec ) throws JenaException {
64          OntModel model = getModel( name, processImports, spec );
65          model.read( is, name );
66          return model;
67      }
68  
69      /**
70       * Load an ontology from a URL and store it in memory.
71       * <p>
72       * Use this type of model when fast access is critical and memory is available. If load from URL fails, attempt to
73       * load from disk cache under @cacheName.
74       *
75       * @param url            a URL where the OWL file is stored
76       * @param cacheName      unique name of this ontology, will be used to load from disk in case of failed url connection
77       * @param processImports process imports
78       * @param spec           spec to use as a basis
79       */
80      static OntModel createMemoryModel( String url, String name, @Nullable String cacheName, boolean processImports, OntModelSpec spec ) throws JenaException, IOException {
81          StopWatch timer = StopWatch.createStarted();
82          OntModel model = getModel( name, processImports, spec );
83          readModelFromUrl( model, url, cacheName );
84          log.debug( "Loading ontology model for {} took {} ms", url, timer.getTime() );
85          return model;
86      }
87  
88      private static void readModelFromUrl( OntModel model, String url, @Nullable String cacheName ) throws IOException {
89          boolean attemptToLoadFromDisk = false;
90          URLConnection urlc = null;
91          try {
92              urlc = openConnection( url );
93              try ( InputStream in = urlc.getInputStream() ) {
94                  if ( cacheName != null ) {
95                      // write tmp to disk
96                      File tempFile = getTmpDiskCachePath( cacheName );
97                      FileUtils.createParentDirectories( tempFile );
98                      Files.copy( in, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
99                      // read from disk
100                     try ( InputStream is = Files.newInputStream( tempFile.toPath() ) ) {
101                         model.read( is, url );
102                     }
103                 } else {
104                     // skip the cache and simply read the stream into the model
105                     model.read( in, url );
106                 }
107             }
108         } catch ( ClosedByInterruptException e ) {
109             throw e;
110         } catch ( IOException e ) {
111             log.error( "Failed to load ontology model for {}, will attempt to load from disk.", url, e );
112             attemptToLoadFromDisk = true;
113         } finally {
114             if ( urlc instanceof HttpURLConnection ) {
115                 ( ( HttpURLConnection ) urlc ).disconnect();
116             }
117         }
118 
119         if ( cacheName != null ) {
120             File f = getDiskCachePath( cacheName );
121             File tempFile = getTmpDiskCachePath( cacheName );
122             File oldFile = getOldDiskCachePath( cacheName );
123             if ( attemptToLoadFromDisk ) {
124                 // Attempt to load from disk cache
125                 if ( f.isFile() ) {
126                     StopWatch timer = StopWatch.createStarted();
127                     try ( BufferedReader buf = new BufferedReader( new FileReader( f ) ) ) {
128                         model.read( buf, url );
129                         // We successfully loaded the cached ontology. Copy the loaded ontology to oldFile
130                         // so that we don't recreate indices during initialization based on a false change in
131                         // the ontology.
132                         FileUtils.createParentDirectories( oldFile );
133                         Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
134                         log.debug( "Load model from disk took {} ms", timer.getTime() );
135                     }
136                 } else {
137                     throw new RuntimeException(
138                         "Ontology failed load from URL (" + url + ") and disk cache does not exist: " + cacheName );
139                 }
140             } else if ( tempFile.exists() ) {
141                 // Model was successfully loaded into memory from URL with given cacheName
142                 // Save cache to disk (rename temp file)
143                 log.debug( "Caching ontology to disk: {} under {}", cacheName, f.getAbsolutePath() );
144                 try {
145                     // Need to compare previous to current so instead of overwriting we'll move the old file
146                     if ( f.exists() ) {
147                         FileUtils.createParentDirectories( oldFile );
148                         Files.move( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
149                     } else {
150                         FileUtils.createParentDirectories( f );
151                     }
152                     Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING );
153                 } catch ( IOException e ) {
154                     log.error( "Failed to cache ontology {} to disk.", url, e );
155                 }
156             }
157         }
158     }
159 
160     /**
161      * Create an ontology model for a TDB.
162      * @param dataset        TDB dataset
163      * @param name           name of the model to load, or null for the default model
164      * @param processImports whether to process imports or not, it is preferable not to if your TDB directory already
165      *                       contains all the necessary definitions.
166      * @param spec           spec to use to create the ontology model
167      */
168     public static OntModel createTdbModel( Dataset dataset, @Nullable String name, boolean processImports, OntModelSpec spec ) {
169         ModelMaker maker = ModelFactory.createMemModelMaker();
170         Model base;
171         if ( name != null ) {
172             base = dataset.getNamedModel( name );
173         } else {
174             base = dataset.getDefaultModel();
175         }
176         if ( base.isEmpty() ) {
177             throw new IllegalStateException( String.format( "The %s at %s is empty.",
178                 name != null ? "named model " + name : "default model", dataset ) );
179         }
180         return getModel( maker, base, processImports, spec );
181     }
182 
183     /**
184      * ModelFactory.createMemModelMaker()
185      * Get model that is entirely in memory.
186      */
187     private static OntModel getModel( String name, boolean processImports, OntModelSpec spec ) {
188         ModelMaker maker = ModelFactory.createMemModelMaker();
189         Model base = maker.createModel( name, false );
190         return getModel( maker, base, processImports, spec );
191     }
192 
193     private static OntModel getModel( ModelMaker maker, Model base, boolean processImports, OntModelSpec spec ) {
194         // the spec is a shallow copy, so we need to copy the document manager as well to modify it
195         spec = new OntModelSpec( spec );
196         spec.setImportModelMaker( maker );
197         spec.setDocumentManager( new OntDocumentManager() );
198         spec.getDocumentManager().setProcessImports( processImports );
199         spec.setImportModelGetter( new ModelGetter() {
200             @Override
201             public Model getModel( String URL ) {
202                 return null;
203             }
204 
205             @Override
206             public Model getModel( String URL, ModelReader loadIfAbsent ) {
207                 Model model = maker.createModel( URL );
208                 URLConnection urlc = null;
209                 try {
210                     urlc = openConnection( URL );
211                     try ( InputStream in = urlc.getInputStream() ) {
212                         return model.read( in, URL );
213                     }
214                 } catch ( JenaException | IOException e ) {
215                     throw new CannotCreateException( String.format( "Failed to resolve import for %s.", URL ), e );
216                 } finally {
217                     if ( urlc instanceof HttpURLConnection ) {
218                         ( ( HttpURLConnection ) urlc ).disconnect();
219                     }
220                 }
221             }
222         } );
223         OntModel model = ModelFactory.createOntologyModel( spec, base );
224         model.setStrictMode( false ); // fix for owl2 files
225         return model;
226     }
227 
228     private static URLConnection openConnection( String url ) throws IOException {
229         URLConnection urlc = openConnectionInternal( url );
230 
231         // this happens if there is a change of protocol (http:// -> https://)
232         if ( urlc instanceof HttpURLConnection ) {
233             int code = ( ( HttpURLConnection ) urlc ).getResponseCode();
234             String newUrl = urlc.getHeaderField( "Location" );
235             if ( code >= 300 && code < 400 ) {
236                 if ( StringUtils.isBlank( newUrl ) ) {
237                     throw new RuntimeException( String.format( "Redirect response for %s is lacking a 'Location' header.", url ) );
238                 }
239                 log.debug( "Redirect to {} from {}", newUrl, url );
240                 urlc = openConnectionInternal( newUrl );
241             }
242         }
243 
244         return urlc;
245     }
246 
247     private static URLConnection openConnectionInternal( String url ) throws IOException {
248         URLConnection urlc = new URL( url ).openConnection();
249         // help ensure mis-configured web servers aren't causing trouble.
250         urlc.setRequestProperty( "Accept", "application/rdf+xml" );
251         if ( urlc instanceof HttpURLConnection ) {
252             ( ( HttpURLConnection ) urlc ).setInstanceFollowRedirects( true );
253         }
254         log.debug( "Connecting to {}", url );
255         urlc.connect(); // Will error here on bad URL
256         return urlc;
257     }
258 
259     static boolean hasChanged( String cacheName ) {
260         // default
261         if ( StringUtils.isBlank( cacheName ) ) {
262             return false;
263         }
264         try {
265             File newFile = getDiskCachePath( cacheName );
266             File oldFile = getOldDiskCachePath( cacheName );
267             // This might be slow considering it calls IOUtils.contentsEquals which compares byte-by-byte
268             // in the worst case scenario.
269             // In this case consider using NIO for higher-performance IO using Channels and Buffers.
270             // Ex. Use a 4MB Memory-Mapped IO operation.
271             return !FileUtils.contentEquals( newFile, oldFile );
272         } catch ( IOException e ) {
273             log.error( "Failed to compare current and previous cached ontologies, will report as not changed.", e );
274             return false;
275         }
276     }
277 
278     static void deleteOldCache( String cacheName ) throws IOException {
279         File dir = getOldDiskCachePath( cacheName );
280         if ( dir.exists() ) {
281             FileUtils.delete( dir );
282         }
283     }
284 
285     /**
286      * Obtain the path for the ontology cache.
287      */
288     static File getDiskCachePath( String name ) {
289         if ( StringUtils.isBlank( name ) ) {
290             throw new IllegalArgumentException( "The ontology must have a suitable name for being loaded from cache." );
291         }
292         String ontologyDir = Configuration.getString( "ontology.cache.dir" ); // e.g., /something/gemmaData/ontologyCache
293         if ( StringUtils.isBlank( ontologyDir ) ) {
294             return Paths.get( System.getProperty( "java.io.tmpdir" ), "ontologyCache", "ontology", name ).toFile();
295         }
296         return Paths.get( ontologyDir, "ontology", name ).toFile();
297     }
298 
299     static File getOldDiskCachePath( String name ) {
300         File indexFile = getDiskCachePath( name );
301         return new File( indexFile.getAbsolutePath() + OLD_CACHE_SUFFIX );
302     }
303 
304     static File getTmpDiskCachePath( String name ) {
305         File indexFile = getDiskCachePath( name );
306         return new File( indexFile.getAbsolutePath() + TMP_CACHE_SUFFIX );
307     }
308 }