1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package ubic.basecode.ontology.jena;
20
21 import com.hp.hpl.jena.ontology.OntModel;
22 import com.hp.hpl.jena.ontology.OntModelSpec;
23 import com.hp.hpl.jena.rdf.model.*;
24 import org.apache.commons.io.FileUtils;
25 import org.apache.commons.lang3.StringUtils;
26 import org.apache.commons.lang3.time.StopWatch;
27 import org.slf4j.Logger;
28 import org.slf4j.LoggerFactory;
29 import ubic.basecode.util.Configuration;
30
31 import javax.annotation.Nullable;
32 import java.io.*;
33 import java.net.HttpURLConnection;
34 import java.net.URL;
35 import java.net.URLConnection;
36 import java.nio.file.Files;
37 import java.nio.file.Paths;
38 import java.nio.file.StandardCopyOption;
39
40
41
42
43
44
45 public class OntologyLoader {
46
47 private static final Logger log = LoggerFactory.getLogger( OntologyLoader.class );
48 private static final int MAX_CONNECTION_TRIES = 3;
49 private static final String OLD_CACHE_SUFFIX = ".old";
50 private static final String TMP_CACHE_SUFFIX = ".tmp";
51
52
53
54
55 public static OntModel loadMemoryModel( InputStream is, String url ) {
56 OntModel model = getMemoryModel( url );
57 model.read( is, null );
58 return model;
59 }
60
61
62
63
64
65
66 public static OntModel loadMemoryModel( String url ) {
67 return loadMemoryModel( url, null );
68 }
69
70
71
72
73
74
75
76
77
78
79 public static OntModel loadMemoryModel( String url, @Nullable String cacheName ) {
80 StopWatch timer = new StopWatch();
81 timer.start();
82 OntModel model = getMemoryModel( url );
83
84 URLConnection urlc = openConnection( url );
85
86 if ( urlc != null ) {
87 try ( InputStream in = urlc.getInputStream() ) {
88 Reader reader;
89 if ( cacheName != null ) {
90
91 File tempFile = getTmpDiskCachePath( cacheName );
92 tempFile.getParentFile().mkdirs();
93 Files.copy( in, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
94 reader = new FileReader( tempFile );
95
96 } else {
97
98 reader = new InputStreamReader( in );
99 }
100
101 try ( BufferedReader buf = new BufferedReader( reader ) ) {
102 model.read( buf, url );
103 }
104
105 log.info( "Load model: " + timer.getTime() + "ms" );
106 } catch ( IOException e ) {
107 log.error( e.getMessage(), e );
108 }
109 }
110
111 if ( cacheName != null ) {
112
113 File f = getDiskCachePath( cacheName );
114 File tempFile = getTmpDiskCachePath( cacheName );
115 File oldFile = getOldDiskCachePath( cacheName );
116
117 if ( model.isEmpty() ) {
118
119
120 if ( f.exists() && !f.isDirectory() ) {
121 try ( BufferedReader buf = new BufferedReader( new FileReader( f ) ) ) {
122 model.read( buf, url );
123
124
125
126 Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
127 log.info( "Load model from disk: " + timer.getTime() + "ms" );
128 } catch ( IOException e ) {
129 log.error( e.getMessage(), e );
130 throw new RuntimeException(
131 "Ontology failed load from URL (" + url + ") and disk cache: " + cacheName );
132 }
133 } else {
134 throw new RuntimeException(
135 "Ontology failed load from URL (" + url + ") and disk cache does not exist: " + cacheName );
136 }
137
138 } else {
139
140
141 log.info( "Caching ontology to disk: " + cacheName + " under " + f.getAbsolutePath() );
142 try {
143
144 f.createNewFile();
145 Files.move( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING );
146 Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING );
147 } catch ( IOException e ) {
148 log.error( e.getMessage(), e );
149 }
150 }
151
152 }
153
154 assert !model.isEmpty();
155
156 return model;
157 }
158
159 public static boolean hasChanged( String cacheName ) {
160 boolean changed = false;
161 if ( StringUtils.isBlank( cacheName ) ) {
162 return changed;
163 }
164
165 File newFile = getDiskCachePath( cacheName );
166 File oldFile = getOldDiskCachePath( cacheName );
167
168 try {
169
170
171
172
173 changed = !FileUtils.contentEquals( newFile, oldFile );
174 } catch ( IOException e ) {
175 log.error( e.getMessage() );
176 }
177
178 return changed;
179
180 }
181
182 public static void deleteOldCache( String cacheName ) {
183 getOldDiskCachePath( cacheName ).delete();
184 }
185
186
187
188
189 private static OntModel getMemoryModel( String url ) {
190 OntModelSpec spec = new OntModelSpec( OntModelSpec.OWL_MEM_TRANS_INF );
191 ModelMaker maker = ModelFactory.createMemModelMaker();
192 Model base = maker.createModel( url, false );
193 spec.setImportModelMaker( maker );
194 spec.getDocumentManager().setProcessImports( true );
195 spec.setImportModelGetter( new ModelGetter() {
196 @Override
197 public Model getModel( String URL ) {
198 return null;
199 }
200
201 @Override
202 public Model getModel( String URL, ModelReader loadIfAbsent ) {
203 Model model = maker.createModel( URL );
204 URLConnection urlc = openConnection( URL );
205 if ( urlc != null ) {
206 try ( InputStream in = urlc.getInputStream() ) {
207 return model.read( in, URL );
208 } catch ( IOException e ) {
209 log.error( String.format( "Failed to load from %s.", URL ), e );
210 }
211 }
212 return loadIfAbsent.readModel( model, URL );
213 }
214 } );
215 OntModel model = ModelFactory.createOntologyModel( spec, base );
216 model.setStrictMode( false );
217 return model;
218 }
219
220 public static URLConnection openConnection( String url ) {
221 URLConnection urlc = null;
222 int tries = 0;
223 while ( tries < MAX_CONNECTION_TRIES ) {
224 try {
225 urlc = new URL( url ).openConnection();
226
227 urlc.setRequestProperty( "Accept", "application/rdf+xml" );
228
229 try {
230 HttpURLConnection c = ( HttpURLConnection ) urlc;
231 c.setInstanceFollowRedirects( true );
232 } catch ( ClassCastException e ) {
233
234 }
235
236 if ( tries > 0 ) {
237 log.info( "Retrying connecting to " + url + " [" + tries + "/" + MAX_CONNECTION_TRIES
238 + " of max tries" );
239 } else {
240 log.info( "Connecting to " + url );
241 }
242
243 urlc.connect();
244
245 if ( urlc instanceof HttpURLConnection ) {
246 String newUrl = urlc.getHeaderField( "Location" );
247
248 if ( StringUtils.isNotBlank( newUrl ) ) {
249 log.info( "Redirect to " + newUrl );
250 urlc = new URL( newUrl ).openConnection();
251
252 urlc.setRequestProperty( "Accept", "application/rdf+xml" );
253 urlc.connect();
254 }
255 }
256
257 break;
258 } catch ( IOException e ) {
259
260 log.error( e + " retrying?" );
261 tries++;
262 }
263 }
264
265 return urlc;
266 }
267
268
269
270
271 public static File getDiskCachePath( String name ) {
272 String ontologyDir = Configuration.getString( "ontology.cache.dir" );
273 if ( StringUtils.isBlank( ontologyDir ) ) {
274 throw new IllegalArgumentException( "The 'ontology.cache.dir' configuration must be set to cache ontologies." );
275 }
276 if ( StringUtils.isBlank( name ) ) {
277 throw new IllegalArgumentException( "The ontology must have a suitable name for being loaded from cache." );
278 }
279
280 if ( !new File( ontologyDir ).exists() ) {
281 new File( ontologyDir ).mkdirs();
282 }
283
284 return Paths.get( ontologyDir, "ontology", name ).toFile();
285 }
286
287 private static File getOldDiskCachePath( String name ) {
288 File indexFile = getDiskCachePath( name );
289 return new File( indexFile.getAbsolutePath() + OLD_CACHE_SUFFIX );
290
291 }
292
293 private static File getTmpDiskCachePath( String name ) {
294 File indexFile = getDiskCachePath( name );
295 return new File( indexFile.getAbsolutePath() + TMP_CACHE_SUFFIX );
296
297 }
298
299 }