View Javadoc
1   /*
2    * The basecode project
3    *
4    * Copyright (c) 2007-2019 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  
20  package ubic.basecode.ontology.jena;
21  
22  import com.hp.hpl.jena.ontology.*;
23  import com.hp.hpl.jena.rdf.arp.ARPErrorNumbers;
24  import com.hp.hpl.jena.rdf.arp.ParseException;
25  import com.hp.hpl.jena.rdf.model.Property;
26  import com.hp.hpl.jena.rdf.model.Resource;
27  import com.hp.hpl.jena.shared.JenaException;
28  import com.hp.hpl.jena.util.iterator.ExtendedIterator;
29  import org.apache.commons.lang3.RandomStringUtils;
30  import org.apache.commons.lang3.StringUtils;
31  import org.apache.commons.lang3.time.StopWatch;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  import ubic.basecode.ontology.jena.search.OntologyIndexer;
35  import ubic.basecode.ontology.jena.search.OntologySearch;
36  import ubic.basecode.ontology.jena.search.SearchIndex;
37  import ubic.basecode.ontology.jena.vocabulary.BFO;
38  import ubic.basecode.ontology.jena.vocabulary.RO;
39  import ubic.basecode.ontology.model.OntologyIndividual;
40  import ubic.basecode.ontology.model.OntologyResource;
41  import ubic.basecode.ontology.model.OntologyTerm;
42  import ubic.basecode.ontology.providers.OntologyService;
43  import ubic.basecode.ontology.search.OntologySearchException;
44  import ubic.basecode.util.Configuration;
45  
46  import javax.annotation.Nullable;
47  import java.io.InputStream;
48  import java.util.*;
49  import java.util.concurrent.locks.Lock;
50  import java.util.concurrent.locks.ReadWriteLock;
51  import java.util.concurrent.locks.ReentrantReadWriteLock;
52  import java.util.stream.Collectors;
53  
54  import static ubic.basecode.ontology.jena.JenaUtils.where;
55  
56  /**
57   * Base class for Jena-based ontology services.
58   *
59   * @author kelsey
60   */
61  @SuppressWarnings("unused")
62  public abstract class AbstractOntologyService implements OntologyService {
63  
64      protected static Logger log = LoggerFactory.getLogger( AbstractOntologyService.class );
65  
66      /**
67       * Properties through which propagation is allowed for {@link #getParents(Collection, boolean, boolean)}}
68       */
69      private static final Set<Property> additionalProperties;
70  
71      static {
72          additionalProperties = new HashSet<>();
73          additionalProperties.add( BFO.partOf );
74          additionalProperties.add( RO.properPartOf );
75      }
76  
77      /**
78       * Lock used to prevent reads while the ontology is being initialized.
79       */
80      private final ReadWriteLock rwLock = new ReentrantReadWriteLock();
81  
82      /* internal state protected by rwLock */
83      private OntModel model;
84      private Map<String, String> alternativeIDs;
85  
86      @Nullable
87      private SearchIndex index;
88  
89      private Set<Restriction> additionalRestrictions;
90  
91      private boolean isInitialized = false;
92  
93      public void initialize( boolean forceLoad, boolean forceIndexing ) {
94          initialize( null, forceLoad, forceIndexing );
95      }
96  
97      public void initialize( InputStream stream, boolean forceIndexing ) {
98          initialize( stream, true, forceIndexing );
99      }
100 
101     private void initialize( @Nullable InputStream stream, boolean forceLoad, boolean forceIndexing ) {
102         if ( !forceLoad && isInitialized ) {
103             log.warn( "{} is already loaded, and force=false, not restarting", this );
104             return;
105         }
106 
107         String ontologyUrl = getOntologyUrl();
108         String ontologyName = getOntologyName();
109         String cacheName = getCacheName();
110 
111         boolean loadOntology = isEnabled();
112 
113         // If loading ontologies is disabled in the configuration, return
114         if ( !forceLoad && !loadOntology ) {
115             log.debug( "Loading {} is disabled (force=false, Configuration load.{}=false)",
116                     this, ontologyName );
117             return;
118         }
119 
120         // Detect configuration problems.
121         if ( StringUtils.isBlank( ontologyUrl ) ) {
122             throw new IllegalStateException( "URL not defined for %s: ontology cannot be loaded. (" + this + ")" );
123         }
124 
125         // This thread indexes ontology and creates local cache for uri->ontology terms mappings.
126         if ( !forceIndexing ) {
127             log.info( "{} index will *not* be refreshed unless the ontology has changed or the index is missing", this );
128         }
129 
130         log.info( "Loading ontology: {}...", this );
131         StopWatch loadTime = StopWatch.createStarted();
132 
133         // use temporary variables, so that we can minimize the critical region for replacing the service's state
134         OntModel model;
135         SearchIndex index;
136 
137         // loading the model from disk or URL is lengthy
138         if ( checkIfInterrupted() )
139             return;
140 
141         model = stream != null ? loadModelFromStream( stream ) : loadModel(); // can take a while.
142         assert model != null;
143 
144         // retrieving restrictions is lengthy
145         if ( checkIfInterrupted() )
146             return;
147 
148         // compute additional restrictions
149         Set<Restriction> additionalRestrictions = model.listRestrictions()
150                 .filterKeep( new RestrictionWithOnPropertyFilter( additionalProperties ) )
151                 .toSet();
152 
153         //Checks if the current ontology has changed since it was last loaded.
154         boolean changed = cacheName == null || OntologyLoader.hasChanged( cacheName );
155         boolean indexExists = cacheName != null && OntologyIndexer.getSubjectIndex( cacheName ) != null;
156         boolean forceReindexing = forceLoad && forceIndexing;
157 
158         /*
159          * Indexing is slow, don't do it if we don't have to.
160          */
161         boolean force = forceReindexing || changed || !indexExists;
162 
163         // indexing is lengthy, don't bother if we're interrupted
164         if ( checkIfInterrupted() )
165             return;
166 
167         if ( cacheName != null ) {
168             index = OntologyIndexer.indexOntology( cacheName, model, force );
169         } else {
170             index = null;
171         }
172 
173         // if interrupted, we don't need to replace the model and clear the *old* cache
174         if ( checkIfInterrupted() )
175             return;
176 
177         Lock lock = rwLock.writeLock();
178         try {
179             lock.lock();
180             this.model = model;
181             this.additionalRestrictions = additionalRestrictions;
182             this.index = index;
183             this.isInitialized = true;
184             if ( cacheName != null ) {
185                 // now that the terms have been replaced, we can clear old caches
186                 OntologyLoader.deleteOldCache( cacheName );
187             }
188         } finally {
189             lock.unlock();
190         }
191 
192         loadTime.stop();
193 
194         log.info( "Finished loading {} in {}s", this, String.format( "%.2f", loadTime.getTime() / 1000.0 ) );
195     }
196 
197     private boolean checkIfInterrupted() {
198         if ( Thread.interrupted() ) {
199             log.warn( "The current thread is interrupted, initialization of {} will be stop.", this );
200             return true;
201         }
202         return false;
203     }
204 
205     /**
206      * Do not do this except before re-indexing.
207      */
208     public void closeIndex() {
209         if ( index == null ) return;
210         index.close();
211     }
212 
213     @Override
214     public Collection<OntologyIndividual> findIndividuals( String search, boolean keepObsoletes ) throws OntologySearchException {
215         Lock lock = rwLock.readLock();
216         try {
217             lock.lock();
218             if ( !isInitialized ) {
219                 log.warn( "Ontology {} is not ready, no individuals will be returned.", this );
220                 return Collections.emptySet();
221             }
222             if ( index == null ) {
223                 log.warn( "Attempt to search {} when index is null, no results will be returned.", this );
224                 return Collections.emptySet();
225             }
226             return OntologySearch.matchIndividuals( model, index, search )
227                     .mapWith( i -> ( OntologyIndividual ) new OntologyIndividualImpl( i.result, additionalRestrictions, i.score ) )
228                     .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) )
229                     .toSet();
230         } finally {
231             lock.unlock();
232         }
233     }
234 
235     @Override
236     public Collection<OntologyResource> findResources( String searchString, boolean keepObsoletes ) throws OntologySearchException {
237         Lock lock = rwLock.readLock();
238         try {
239             lock.lock();
240             if ( !isInitialized ) {
241                 log.warn( "Ontology {} is not ready, no resources will be returned.", this );
242                 return Collections.emptySet();
243             }
244             if ( index == null ) {
245                 log.warn( "Attempt to search {} when index is null, no results will be returned.", this );
246                 return Collections.emptySet();
247             }
248             return OntologySearch.matchResources( model, index, searchString )
249                     .filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) )
250                     .mapWith( r -> {
251                         OntologyResource res;
252                         if ( r.result.canAs( OntClass.class ) ) {
253                             res = new OntologyTermImpl( r.result.as( OntClass.class ), additionalRestrictions, r.score );
254                         } else {
255                             res = new OntologyIndividualImpl( r.result.as( Individual.class ), additionalRestrictions, r.score );
256                         }
257                         return res;
258                     } )
259                     .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) )
260                     .toSet();
261         } finally {
262             lock.unlock();
263         }
264     }
265 
266     @Override
267     public Collection<OntologyTerm> findTerm( String search, boolean keepObsoletes ) throws OntologySearchException {
268         if ( log.isDebugEnabled() ) log.debug( "Searching " + this + " for '" + search + "'" );
269         Lock lock = rwLock.readLock();
270         try {
271             lock.lock();
272             if ( !isInitialized ) {
273                 log.warn( "Ontology {} is not ready, no terms will be returned.", this );
274                 return Collections.emptySet();
275             }
276             if ( index == null ) {
277                 log.warn( "Attempt to search {} when index is null, no results will be returned.", this );
278                 return Collections.emptySet();
279             }
280             return OntologySearch.matchClasses( model, index, search )
281                     .mapWith( r -> ( OntologyTerm ) new OntologyTermImpl( r.result, additionalRestrictions, r.score ) )
282                     .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) )
283                     .toSet();
284         } finally {
285             lock.unlock();
286         }
287     }
288 
289     @Override
290     public OntologyTerm findUsingAlternativeId( String alternativeId ) {
291         Lock lock = alternativeIDs != null ? rwLock.readLock() : rwLock.writeLock();
292         try {
293             lock.lock();
294             if ( !isInitialized ) {
295                 log.warn( "Ontology {} is not ready, null will be returned for alternative ID match.", this );
296                 return null;
297             }
298             if ( alternativeIDs == null ) {
299                 log.info( "init search by alternativeID" );
300                 initSearchByAlternativeId();
301             }
302             String termUri = alternativeIDs.get( alternativeId );
303             return termUri != null ? getTerm( termUri ) : null;
304         } finally {
305             lock.unlock();
306         }
307     }
308 
309     @Override
310     public Set<String> getAllURIs() {
311         Lock lock = rwLock.readLock();
312         try {
313             lock.lock();
314             if ( !isInitialized ) {
315                 log.warn( "Ontology {} is not ready, no term  URIs will be returned.", this );
316                 return Collections.emptySet();
317             }
318             Set<String> allUris = new HashSet<>();
319             allUris.addAll( model.listClasses().mapWith( OntClass::getURI ).toSet() );
320             allUris.addAll( model.listIndividuals().mapWith( Individual::getURI ).toSet() );
321             return allUris;
322         } finally {
323             lock.unlock();
324         }
325     }
326 
327     @Override
328     public OntologyResource getResource( String uri ) {
329         Lock lock = rwLock.readLock();
330         try {
331             lock.lock();
332             if ( !isInitialized ) {
333                 return null;
334             }
335             OntologyResource res;
336             Resource resource = model.getResource( uri );
337             if ( resource.getURI() == null ) {
338                 return null;
339             }
340             if ( resource instanceof OntClass ) {
341                 // use the cached term
342                 res = new OntologyTermImpl( ( OntClass ) resource, additionalRestrictions );
343             } else if ( resource instanceof Individual ) {
344                 res = new OntologyIndividualImpl( ( Individual ) resource, additionalRestrictions );
345             } else if ( resource instanceof OntProperty ) {
346                 res = PropertyFactory.asProperty( ( ObjectProperty ) resource, additionalRestrictions );
347             } else {
348                 res = null;
349             }
350             return res;
351         } finally {
352             lock.unlock();
353         }
354     }
355 
356     @Override
357     public OntologyTerm getTerm( String uri ) {
358         Lock lock = rwLock.readLock();
359         try {
360             lock.lock();
361             if ( !isInitialized ) return null;
362             OntClass ontCls = model.getOntClass( uri );
363             // null or bnode
364             if ( ontCls == null || ontCls.getURI() == null ) {
365                 return null;
366             }
367             return new OntologyTermImpl( ontCls, additionalRestrictions );
368         } finally {
369             lock.unlock();
370         }
371     }
372 
373     @Override
374     public Collection<OntologyIndividual> getTermIndividuals( String uri ) {
375         Lock lock = rwLock.readLock();
376         try {
377             lock.lock();
378             if ( !isInitialized ) {
379                 return Collections.emptySet();
380             }
381             OntologyTerm term = getTerm( uri );
382             if ( term == null ) {
383                 /*
384                  * Either the ontology hasn't been loaded, or the id was not valid.
385                  */
386                 log.warn( "No term for URI={} in {}; make sure ontology is loaded and uri is valid", uri, this );
387                 return Collections.emptySet();
388             }
389             return term.getIndividuals( true );
390         } finally {
391             lock.unlock();
392         }
393     }
394 
395     @Override
396     public Set<OntologyTerm> getParents( Collection<OntologyTerm> terms, boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ) {
397         Lock lock = rwLock.readLock();
398         try {
399             lock.lock();
400             if ( !isInitialized ) {
401                 return Collections.emptySet();
402             }
403             return JenaUtils.getParents( model, getOntClassesFromTerms( terms ), direct, includeAdditionalProperties ? additionalRestrictions : null )
404                     .stream()
405                     .map( o -> new OntologyTermImpl( o, additionalRestrictions ) )
406                     .filter( o -> keepObsoletes || !o.isObsolete() )
407                     .collect( Collectors.toSet() );
408         } finally {
409             lock.unlock();
410         }
411     }
412 
413     @Override
414     public Set<OntologyTerm> getChildren( Collection<OntologyTerm> terms, boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ) {
415         Lock lock = rwLock.readLock();
416         try {
417             lock.lock();
418             if ( !isInitialized ) {
419                 return Collections.emptySet();
420             }
421             return JenaUtils.getChildren( model, getOntClassesFromTerms( terms ), direct, includeAdditionalProperties ? additionalRestrictions : null )
422                     .stream()
423                     .map( o -> new OntologyTermImpl( o, additionalRestrictions ) )
424                     .filter( o -> keepObsoletes || !o.isObsolete() )
425                     .collect( Collectors.toSet() );
426         } finally {
427             lock.unlock();
428         }
429     }
430 
431     @Override
432     public boolean isEnabled() {
433         // quick path: just lookup the configuration
434         String configParameter = "load." + getOntologyName();
435         if ( Configuration.getBoolean( configParameter ) ) {
436             return true;
437         }
438         // could have forced, without setting config
439         Lock lock = rwLock.readLock();
440         try {
441             lock.lock();
442             return isInitialized;
443         } finally {
444             lock.unlock();
445         }
446     }
447 
448     @Override
449     public boolean isOntologyLoaded() {
450         // it's fine not to use the read lock here
451         return isInitialized;
452     }
453 
454     private Thread initializationThread = null;
455 
456     @Override
457     public synchronized void startInitializationThread( boolean forceLoad, boolean forceIndexing ) {
458         if ( initializationThread != null && initializationThread.isAlive() ) {
459             log.warn( " Initialization thread for {} is currently running, not restarting.", this );
460             return;
461         }
462         // create and start the initialization thread
463         initializationThread = new Thread( () -> {
464             try {
465                 this.initialize( forceLoad, forceIndexing );
466             } catch ( JenaException e ) {
467                 if ( !( e.getCause() instanceof ParseException ) || ( ( ParseException ) e.getCause() ).getErrorNumber() != ARPErrorNumbers.ERR_INTERRUPTED ) {
468                     throw e;
469                 }
470             } catch ( Exception e ) {
471                 log.error( e.getMessage(), e );
472                 this.isInitialized = false;
473             }
474         }, getOntologyName() + "_load_thread_" + RandomStringUtils.randomAlphanumeric( 5 ) );
475         // To prevent VM from waiting on this thread to shut down (if shutting down).
476         initializationThread.setDaemon( true );
477         initializationThread.start();
478     }
479 
480     @Override
481     public boolean isInitializationThreadAlive() {
482         return initializationThread != null && initializationThread.isAlive();
483     }
484 
485     @Override
486     public boolean isInitializationThreadCancelled() {
487         return initializationThread != null && initializationThread.isInterrupted();
488     }
489 
490     /**
491      * Cancel the initialization thread.
492      */
493     @Override
494     public void cancelInitializationThread() {
495         if ( initializationThread == null ) {
496             throw new IllegalStateException( "The initialization thread has not started. Invoke startInitializationThread() first." );
497         }
498         initializationThread.interrupt();
499     }
500 
501     @Override
502     public void waitForInitializationThread() throws InterruptedException {
503         if ( initializationThread == null ) {
504             throw new IllegalStateException( "The initialization thread has not started. Invoke startInitializationThread() first." );
505         }
506         initializationThread.join();
507     }
508 
509     /**
510      * The simple getOntologyName() of the ontology. Used for indexing purposes. (ie this will determine the getOntologyName() of the underlying
511      * index for searching the ontology)
512      */
513     protected abstract String getOntologyName();
514 
515     /**
516      * Defines the location of the ontology eg: <a href="http://mged.sourceforge.net/ontologies/MGEDOntology.owl">MGED</a>
517      */
518     protected abstract String getOntologyUrl();
519 
520     /**
521      * Delegates the call as to load the model into memory or leave it on disk. Simply delegates to either
522      * OntologyLoader.loadMemoryModel( url ); OR OntologyLoader.loadPersistentModel( url, spec );
523      */
524     protected abstract OntModel loadModel();
525 
526 
527     /**
528      * Load a model from a given input stream.
529      */
530     protected abstract OntModel loadModelFromStream( InputStream stream );
531 
532     /**
533      * A name for caching this ontology, or null to disable caching.
534      * <p>
535      * Note that if null is returned, the ontology will not have full-text search capabilities.
536      */
537     @Nullable
538     protected String getCacheName() {
539         return getOntologyName();
540     }
541 
542     @Override
543     public void index( boolean force ) {
544         String cacheName = getCacheName();
545         if ( cacheName == null ) {
546             log.warn( "This ontology does not support indexing; assign a cache name to be used." );
547             return;
548         }
549         SearchIndex index;
550         Lock lock = rwLock.readLock();
551         try {
552             lock.lock();
553             if ( !isInitialized ) {
554                 log.warn( "Ontology {} is not initialized, cannot index it.", this );
555                 return;
556             }
557             index = OntologyIndexer.indexOntology( getCacheName(), model, force );
558         } finally {
559             lock.unlock();
560         }
561         // now we replace the index
562         lock = rwLock.writeLock();
563         try {
564             lock.lock();
565             this.index = index;
566         } finally {
567             lock.unlock();
568         }
569     }
570 
571     /**
572      * Initialize alternative IDs mapping.
573      * <p>
574      * this add alternative id in 2 ways
575      * <p>
576      * Example :
577      * <p>
578      * <a href="http://purl.obolibrary.org/obo/HP_0000005">HP_0000005</a> with alternative id : HP:0001453
579      * <p>
580      * by default way use in file 1- HP:0001453 -----> <a href="http://purl.obolibrary.org/obo/HP_0000005">HP_0000005</a>
581      * <p>
582      * trying <a href=" to use the value uri 2- http://purl.obol">HP_0001453</a>ibrary.org/obo/HP_0001453 ----->
583      * <a href="http://purl.obolibrary.org/obo/HP_0000005">HP_0000005</a>
584      */
585     private void initSearchByAlternativeId() {
586         alternativeIDs = new HashMap<>();
587         // for all Ontology terms that exist in the tree
588         ExtendedIterator<OntClass> iterator = model.listClasses();
589         while ( iterator.hasNext() ) {
590             OntClass ind = iterator.next();
591             OntologyTerm ontologyTerm = new OntologyTermImpl( ind, additionalRestrictions );
592             // lets find the baseUri, to change to valueUri
593             String baseOntologyUri = ontologyTerm.getUri().substring( 0, ontologyTerm.getUri().lastIndexOf( "/" ) + 1 );
594             for ( String alternativeId : ontologyTerm.getAlternativeIds() ) {
595                 // first way
596                 alternativeIDs.put( alternativeId, ontologyTerm.getUri() );
597                 // second way
598                 String alternativeIdModified = alternativeId.replace( ':', '_' );
599                 alternativeIDs.put( baseOntologyUri + alternativeIdModified, ontologyTerm.getUri() );
600             }
601         }
602     }
603 
604     @Override
605     public void loadTermsInNameSpace( InputStream is, boolean forceIndex ) {
606         // wait for the initialization thread to finish
607         if ( initializationThread != null && initializationThread.isAlive() ) {
608             log.warn( "{} initialization is already running, trying to cancel ...", this );
609             initializationThread.interrupt();
610             // wait for the thread to die.
611             int maxWait = 10;
612             int wait = 0;
613             while ( initializationThread.isAlive() ) {
614                 try {
615                     initializationThread.join( 5000 );
616                     log.warn( "Waiting for auto-initialization to stop so manual initialization can begin ..." );
617                 } catch ( InterruptedException e ) {
618                     Thread.currentThread().interrupt();
619                     log.warn( "Got interrupted while waiting for the initialization thread of {} to finish.", this );
620                     return;
621                 }
622                 ++wait;
623                 if ( wait >= maxWait && !initializationThread.isAlive() ) {
624                     throw new RuntimeException( String.format( "Got tired of waiting for %s's initialization thread.", this ) );
625                 }
626             }
627         }
628         initialize( is, forceIndex );
629     }
630 
631     @Override
632     public String toString() {
633         return String.format( "%s [%s]", getOntologyName(), getOntologyUrl() );
634     }
635 
636     private Set<OntClass> getOntClassesFromTerms( Collection<OntologyTerm> terms ) {
637         return terms.stream()
638                 .map( OntologyTerm::getUri )
639                 .filter( Objects::nonNull )
640                 .map( model::getOntClass )
641                 .filter( Objects::nonNull )
642                 .collect( Collectors.toSet() );
643     }
644 }