View Javadoc
1   /*
2    * The baseCode project
3    *
4    * Copyright (c) 2010 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  package ubic.basecode.ontology;
20  
21  import com.hp.hpl.jena.ontology.OntModel;
22  import com.hp.hpl.jena.ontology.OntModelSpec;
23  import com.hp.hpl.jena.rdf.model.Model;
24  import com.hp.hpl.jena.rdf.model.ModelFactory;
25  import com.hp.hpl.jena.rdf.model.ModelMaker;
26  import org.apache.commons.io.FileUtils;
27  import org.apache.commons.lang3.StringUtils;
28  import org.apache.commons.lang3.time.StopWatch;
29  import org.slf4j.Logger;
30  import org.slf4j.LoggerFactory;
31  import ubic.basecode.util.Configuration;
32  
33  import java.io.*;
34  import java.net.HttpURLConnection;
35  import java.net.URL;
36  import java.net.URLConnection;
37  import java.nio.file.Files;
38  import java.nio.file.StandardCopyOption;
39  
40  /**
41   * Reads ontologies from OWL resources
42   *
43   * @author paul
44   */
45  public class OntologyLoader {
46  
47      private static Logger log = LoggerFactory.getLogger( OntologyLoader.class );
48      private static final int MAX_CONNECTION_TRIES = 3;
49      private static final String OLD_CACHE_SUFFIX = ".old";
50      private static final String TMP_CACHE_SUFFIX = ".tmp";
51  
52      /**
53       * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
54       */
55      public static OntModel loadMemoryModel( InputStream is, String url ) {
56          OntModel model = getMemoryModel( url );
57          model.read( is, null );
58          return model;
59      }
60  
61      /**
62       * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
63       *
64       * @see #loadMemoryModel(String, String)
65       */
66      public static OntModel loadMemoryModel( String url ) {
67          return loadMemoryModel( url, null );
68      }
69  
70      /**
71       * Load an ontology into memory. Use this type of model when fast access is critical and memory is available.
72       * If load from URL fails, attempt to load from disk cache under @cacheName.
73       * <p>
74       * Uses {@link OntModelSpec#OWL_MEM_TRANS_INF}.
75       *
76       * @param url       a URL where the OWL file is stored
77       * @param cacheName unique name of this ontology, will be used to load from disk in case of failed url connection
78       */
79      public static OntModel loadMemoryModel( String url, String cacheName ) {
80          StopWatch timer = new StopWatch();
81          timer.start();
82          OntModel model = getMemoryModel( url );
83  
84          URLConnection urlc = null;
85          int tries = 0;
86          while ( tries < MAX_CONNECTION_TRIES ) {
87              try {
88                  urlc = new URL( url ).openConnection();
89                  // help ensure mis-configured web servers aren't causing trouble.
90                  urlc.setRequestProperty( "Accept", "application/rdf+xml" );
91  
92                  try {
93                      HttpURLConnection c = ( HttpURLConnection ) urlc;
94                      c.setInstanceFollowRedirects( true );
95                  } catch ( ClassCastException e ) {
96                      // not via http, using a FileURLConnection.
97                  }
98  
99                  if ( tries > 0 ) {
100                     log.info( "Retrying connecting to " + url + " [" + tries + "/" + MAX_CONNECTION_TRIES
101                             + " of max tries" );
102                 } else {
103                     log.info( "Connecting to " + url );
104                 }
105 
106                 urlc.connect(); // Will error here on bad URL
107 
108                 if ( urlc instanceof HttpURLConnection ) {
109                     String newUrl = urlc.getHeaderField( "Location" );
110 
111                     if ( StringUtils.isNotBlank( newUrl ) ) {
112                         log.info( "Redirect to " + newUrl );
113                         urlc = new URL( newUrl ).openConnection();
114                         // help ensure mis-configured web servers aren't causing trouble.
115                         urlc.setRequestProperty( "Accept", "application/rdf+xml" );
116                         urlc.connect();
117                     }
118                 }
119 
120                 break;
121             } catch ( IOException e ) {
122                 // try to recover.
123                 log.error( e + " retrying?" );
124                 tries++;
125             }
126         }
127 
128         if ( urlc != null ) {
129             try ( InputStream in = urlc.getInputStream(); ) {
130                 Reader reader;
131                 if ( cacheName != null ) {
132                     // write tmp to disk
133                     File tempFile = getTmpDiskCachePath( cacheName );
134                     if ( tempFile == null ) {
135                         reader = new InputStreamReader( in );
136                     } else {
137                         tempFile.getParentFile().mkdirs();
138                         Files.copy( in, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
139                         reader = new FileReader( tempFile );
140                     }
141 
142                 } else {
143                     // Skip the cache
144                     reader = new InputStreamReader( in );
145                 }
146 
147                 assert reader != null;
148                 try ( BufferedReader buf = new BufferedReader( reader ); ) {
149                     model.read( buf, url );
150                 }
151 
152                 log.info( "Load model: " + timer.getTime() + "ms" );
153             } catch ( IOException e ) {
154                 log.error( e.getMessage(), e );
155             }
156         }
157 
158         if ( cacheName != null ) {
159 
160             File f = getDiskCachePath( cacheName );
161             File tempFile = getTmpDiskCachePath( cacheName );
162             File oldFile = getOldDiskCachePath( cacheName );
163 
164             if ( model.isEmpty() ) {
165                 // Attempt to load from disk cache
166 
167                 if ( f == null ) {
168                     throw new RuntimeException(
169                             "Ontology cache directory required to load from disk: ontology.cache.dir" );
170                 }
171 
172                 if ( f.exists() && !f.isDirectory() ) {
173                     try ( BufferedReader buf = new BufferedReader( new FileReader( f ) ); ) {
174                         model.read( buf, url );
175                         // We successfully loaded the cached ontology. Copy the loaded ontology to oldFile
176                         // so that we don't recreate indices during initialization based on a false change in
177                         // the ontology.
178                         Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
179                         log.info( "Load model from disk: " + timer.getTime() + "ms" );
180                     } catch ( IOException e ) {
181                         log.error( e.getMessage(), e );
182                         throw new RuntimeException(
183                                 "Ontology failed load from URL (" + url + ") and disk cache: " + cacheName );
184                     }
185                 } else {
186                     throw new RuntimeException(
187                             "Ontology failed load from URL (" + url + ") and disk cache does not exist: " + cacheName );
188                 }
189 
190             } else {
191                 // Model was successfully loaded into memory from URL with given cacheName
192                 // Save cache to disk (rename temp file)
193                 log.info( "Caching ontology to disk: " + cacheName );
194                 if ( f != null ) {
195                     try {
196                         // Need to compare previous to current so instead of overwriting we'll move the old file
197                         f.createNewFile();
198                         Files.move( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
199                         Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING );
200                     } catch ( IOException e ) {
201                         log.error( e.getMessage(), e );
202                     }
203                 } else {
204                     log.warn( "Ontology cache directory required to save to disk: ontology.cache.dir" );
205                 }
206             }
207 
208         }
209 
210         assert !model.isEmpty();
211 
212         return model;
213     }
214 
215     public static boolean hasChanged( String cacheName ) {
216         boolean changed = false; // default
217         if ( StringUtils.isBlank( cacheName ) ) {
218             return changed;
219         }
220 
221         File newFile = getDiskCachePath( cacheName );
222         File oldFile = getOldDiskCachePath( cacheName );
223 
224         try {
225             // This might be slow considering it calls IOUtils.contentsEquals which compares byte-by-byte
226             // in the worst case scenario.
227             // In this case consider using NIO for higher-performance IO using Channels and Buffers.
228             // Ex. Use a 4MB Memory-Mapped IO operation.
229             if ( newFile != null && oldFile != null )
230                 changed = !FileUtils.contentEquals( newFile, oldFile );
231         } catch ( IOException e ) {
232             log.error( e.getMessage() );
233         }
234 
235         return changed;
236 
237     }
238 
239     public static boolean deleteOldCache( String cacheName ) {
240         File f = getOldDiskCachePath( cacheName );
241         if ( f != null )
242             return f.delete();
243         return false;
244     }
245 
246     /**
247      * Get model that is entirely in memory.
248      *
249      * @param url
250      * @return
251      */
252     private static OntModel getMemoryModel( String url ) {
253         OntModelSpec spec = new OntModelSpec( OntModelSpec.OWL_MEM_TRANS_INF );
254         ModelMaker maker = ModelFactory.createMemModelMaker();
255         Model base = maker.createModel( url, false );
256         spec.setImportModelMaker( maker );
257         spec.getDocumentManager().setProcessImports( false );
258 
259         OntModel model = ModelFactory.createOntologyModel( spec, base );
260         model.setStrictMode( false ); // fix for owl2 files
261         return model;
262     }
263 
264     /**
265      * @param name
266      * @return
267      */
268     public static File getDiskCachePath( String name ) {
269         String ontologyDir = Configuration.getString( "ontology.cache.dir" ); // e.g., /something/gemmaData/ontologyCache
270         if ( StringUtils.isBlank( ontologyDir ) || StringUtils.isBlank( name ) ) {
271             return null;
272         }
273 
274         if ( !new File( ontologyDir ).exists() ) {
275             new File( ontologyDir ).mkdirs();
276         }
277 
278         assert ontologyDir != null;
279 
280         String path = ontologyDir + File.separator + "ontology" + File.separator + name;
281 
282         File indexFile = new File( path );
283 
284         return indexFile;
285     }
286 
287     static File getOldDiskCachePath( String name ) {
288         File indexFile = getDiskCachePath( name );
289         if ( indexFile == null ) {
290             return null;
291         }
292         return new File( indexFile.getAbsolutePath() + OLD_CACHE_SUFFIX );
293 
294     }
295 
296     static File getTmpDiskCachePath( String name ) {
297         File indexFile = getDiskCachePath( name );
298         if ( indexFile == null ) {
299             return null;
300         }
301         return new File( indexFile.getAbsolutePath() + TMP_CACHE_SUFFIX );
302 
303     }
304 
305 }