View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2010 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.ontology.jena;
20  
21  import com.hp.hpl.jena.ontology.OntModel;
22  import com.hp.hpl.jena.ontology.OntModelSpec;
23  import com.hp.hpl.jena.rdf.model.*;
24  import org.apache.commons.io.FileUtils;
25  import org.apache.commons.lang3.StringUtils;
26  import org.apache.commons.lang3.time.StopWatch;
27  import org.slf4j.Logger;
28  import org.slf4j.LoggerFactory;
29  import ubic.basecode.util.Configuration;
30  
31  import javax.annotation.Nullable;
32  import java.io.*;
33  import java.net.HttpURLConnection;
34  import java.net.URL;
35  import java.net.URLConnection;
36  import java.nio.file.Files;
37  import java.nio.file.Paths;
38  import java.nio.file.StandardCopyOption;
39  
40  /**
41   * Reads ontologies from OWL resources
42   *
43   * @author paul
44   */
45  public class OntologyLoader {
46  
47      private static final Logger log = LoggerFactory.getLogger( OntologyLoader.class );
48      private static final int MAX_CONNECTION_TRIES = 3;
49      private static final String OLD_CACHE_SUFFIX = ".old";
50      private static final String TMP_CACHE_SUFFIX = ".tmp";
51  
52      /**
53       * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
54       */
55      public static OntModel loadMemoryModel( InputStream is, String url ) {
56          OntModel model = getMemoryModel( url );
57          model.read( is, null );
58          return model;
59      }
60  
61      /**
62       * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
63       *
64       * @see #loadMemoryModel(String, String)
65       */
66      public static OntModel loadMemoryModel( String url ) {
67          return loadMemoryModel( url, null );
68      }
69  
70      /**
71       * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
72       * If load from URL fails, attempt to load from disk cache under @cacheName.
73       * <p>
74       * Uses {@link OntModelSpec#OWL_MEM_TRANS_INF}.
75       *
76       * @param url       a URL where the OWL file is stored
77       * @param cacheName unique name of this ontology, will be used to load from disk in case of failed url connection
78       */
79      public static OntModel loadMemoryModel( String url, @Nullable String cacheName ) {
80          StopWatch timer = new StopWatch();
81          timer.start();
82          OntModel model = getMemoryModel( url );
83  
84          URLConnection urlc = openConnection( url );
85  
86          if ( urlc != null ) {
87              try ( InputStream in = urlc.getInputStream() ) {
88                  Reader reader;
89                  if ( cacheName != null ) {
90                      // write tmp to disk
91                      File tempFile = getTmpDiskCachePath( cacheName );
92                      tempFile.getParentFile().mkdirs();
93                      Files.copy( in, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
94                      reader = new FileReader( tempFile );
95  
96                  } else {
97                      // Skip the cache
98                      reader = new InputStreamReader( in );
99                  }
100 
101                 try ( BufferedReader buf = new BufferedReader( reader ) ) {
102                     model.read( buf, url );
103                 }
104 
105                 log.info( "Load model: " + timer.getTime() + "ms" );
106             } catch ( IOException e ) {
107                 log.error( e.getMessage(), e );
108             }
109         }
110 
111         if ( cacheName != null ) {
112 
113             File f = getDiskCachePath( cacheName );
114             File tempFile = getTmpDiskCachePath( cacheName );
115             File oldFile = getOldDiskCachePath( cacheName );
116 
117             if ( model.isEmpty() ) {
118                 // Attempt to load from disk cache
119 
120                 if ( f.exists() && !f.isDirectory() ) {
121                     try ( BufferedReader buf = new BufferedReader( new FileReader( f ) ) ) {
122                         model.read( buf, url );
123                         // We successfully loaded the cached ontology. Copy the loaded ontology to oldFile
124                         // so that we don't recreate indices during initialization based on a false change in
125                         // the ontology.
126                         Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
127                         log.info( "Load model from disk: " + timer.getTime() + "ms" );
128                     } catch ( IOException e ) {
129                         log.error( e.getMessage(), e );
130                         throw new RuntimeException(
131                                 "Ontology failed load from URL (" + url + ") and disk cache: " + cacheName );
132                     }
133                 } else {
134                     throw new RuntimeException(
135                             "Ontology failed load from URL (" + url + ") and disk cache does not exist: " + cacheName );
136                 }
137 
138             } else {
139                 // Model was successfully loaded into memory from URL with given cacheName
140                 // Save cache to disk (rename temp file)
141                 log.info( "Caching ontology to disk: " + cacheName + " under " + f.getAbsolutePath() );
142                 try {
143                     // Need to compare previous to current so instead of overwriting we'll move the old file
144                     f.createNewFile();
145                     Files.move( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
146                     Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING );
147                 } catch ( IOException e ) {
148                     log.error( e.getMessage(), e );
149                 }
150             }
151 
152         }
153 
154         assert !model.isEmpty();
155 
156         return model;
157     }
158 
159     public static boolean hasChanged( String cacheName ) {
160         boolean changed = false; // default
161         if ( StringUtils.isBlank( cacheName ) ) {
162             return changed;
163         }
164 
165         File newFile = getDiskCachePath( cacheName );
166         File oldFile = getOldDiskCachePath( cacheName );
167 
168         try {
169             // This might be slow considering it calls IOUtils.contentsEquals which compares byte-by-byte
170             // in the worst case scenario.
171             // In this case consider using NIO for higher-performance IO using Channels and Buffers.
172             // Ex. Use a 4MB Memory-Mapped IO operation.
173             changed = !FileUtils.contentEquals( newFile, oldFile );
174         } catch ( IOException e ) {
175             log.error( e.getMessage() );
176         }
177 
178         return changed;
179 
180     }
181 
182     public static void deleteOldCache( String cacheName ) {
183         getOldDiskCachePath( cacheName ).delete();
184     }
185 
186     /**
187      * Get model that is entirely in memory.
188      */
189     private static OntModel getMemoryModel( String url ) {
190         OntModelSpec spec = new OntModelSpec( OntModelSpec.OWL_MEM_TRANS_INF );
191         ModelMaker maker = ModelFactory.createMemModelMaker();
192         Model base = maker.createModel( url, false );
193         spec.setImportModelMaker( maker );
194         spec.getDocumentManager().setProcessImports( true );
195         spec.setImportModelGetter( new ModelGetter() {
196             @Override
197             public Model getModel( String URL ) {
198                 return null;
199             }
200 
201             @Override
202             public Model getModel( String URL, ModelReader loadIfAbsent ) {
203                 Model model = maker.createModel( URL );
204                 URLConnection urlc = openConnection( URL );
205                 if ( urlc != null ) {
206                     try ( InputStream in = urlc.getInputStream() ) {
207                         return model.read( in, URL );
208                     } catch ( IOException e ) {
209                         log.error( String.format( "Failed to load from %s.", URL ), e );
210                     }
211                 }
212                 return loadIfAbsent.readModel( model, URL );
213             }
214         } );
215         OntModel model = ModelFactory.createOntologyModel( spec, base );
216         model.setStrictMode( false ); // fix for owl2 files
217         return model;
218     }
219 
220     public static URLConnection openConnection( String url ) {
221         URLConnection urlc = null;
222         int tries = 0;
223         while ( tries < MAX_CONNECTION_TRIES ) {
224             try {
225                 urlc = new URL( url ).openConnection();
226                 // help ensure mis-configured web servers aren't causing trouble.
227                 urlc.setRequestProperty( "Accept", "application/rdf+xml" );
228 
229                 try {
230                     HttpURLConnection c = ( HttpURLConnection ) urlc;
231                     c.setInstanceFollowRedirects( true );
232                 } catch ( ClassCastException e ) {
233                     // not via http, using a FileURLConnection.
234                 }
235 
236                 if ( tries > 0 ) {
237                     log.info( "Retrying connecting to " + url + " [" + tries + "/" + MAX_CONNECTION_TRIES
238                             + " of max tries" );
239                 } else {
240                     log.info( "Connecting to " + url );
241                 }
242 
243                 urlc.connect(); // Will error here on bad URL
244 
245                 if ( urlc instanceof HttpURLConnection ) {
246                     String newUrl = urlc.getHeaderField( "Location" );
247 
248                     if ( StringUtils.isNotBlank( newUrl ) ) {
249                         log.info( "Redirect to " + newUrl );
250                         urlc = new URL( newUrl ).openConnection();
251                         // help ensure mis-configured web servers aren't causing trouble.
252                         urlc.setRequestProperty( "Accept", "application/rdf+xml" );
253                         urlc.connect();
254                     }
255                 }
256 
257                 break;
258             } catch ( IOException e ) {
259                 // try to recover.
260                 log.error( e + " retrying?" );
261                 tries++;
262             }
263         }
264 
265         return urlc;
266     }
267 
268     /**
269      * Obtain the path for the ontology cache.
270      */
271     public static File getDiskCachePath( String name ) {
272         String ontologyDir = Configuration.getString( "ontology.cache.dir" ); // e.g., /something/gemmaData/ontologyCache
273         if ( StringUtils.isBlank( ontologyDir ) ) {
274             throw new IllegalArgumentException( "The 'ontology.cache.dir' configuration must be set to cache ontologies." );
275         }
276         if ( StringUtils.isBlank( name ) ) {
277             throw new IllegalArgumentException( "The ontology must have a suitable name for being loaded from cache." );
278         }
279 
280         if ( !new File( ontologyDir ).exists() ) {
281             new File( ontologyDir ).mkdirs();
282         }
283 
284         return Paths.get( ontologyDir, "ontology", name ).toFile();
285     }
286 
287     private static File getOldDiskCachePath( String name ) {
288         File indexFile = getDiskCachePath( name );
289         return new File( indexFile.getAbsolutePath() + OLD_CACHE_SUFFIX );
290 
291     }
292 
293     private static File getTmpDiskCachePath( String name ) {
294         File indexFile = getDiskCachePath( name );
295         return new File( indexFile.getAbsolutePath() + TMP_CACHE_SUFFIX );
296 
297     }
298 
299 }