View Javadoc
1   /*
2    * The basecode project
3    *
4    * Copyright (c) 2007-2019 University of British Columbia
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   *
18   */
19  
20  package ubic.basecode.ontology.providers;
21  
22  import java.io.IOException;
23  import java.lang.Thread.State;
24  import java.util.Collection;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.Map;
28  import java.util.Set;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  
31  import org.apache.commons.lang3.RandomStringUtils;
32  import org.apache.commons.lang3.StringUtils;
33  import org.apache.commons.lang3.time.StopWatch;
34  import org.slf4j.Logger;
35  import org.slf4j.LoggerFactory;
36  
37  import com.hp.hpl.jena.ontology.OntModel;
38  
39  import ubic.basecode.ontology.OntologyLoader;
40  import ubic.basecode.ontology.model.OntologyIndividual;
41  import ubic.basecode.ontology.model.OntologyResource;
42  import ubic.basecode.ontology.model.OntologyTerm;
43  import ubic.basecode.ontology.search.OntologyIndexer;
44  import ubic.basecode.ontology.search.OntologySearch;
45  import ubic.basecode.ontology.search.OntologySearchException;
46  import ubic.basecode.ontology.search.SearchIndex;
47  import ubic.basecode.util.Configuration;
48  
49  /**
50   * @author kelsey
51   */
52  public abstract class AbstractOntologyService {
53  
54      protected class OntologyInitializationThread extends Thread {
55  
56          AtomicBoolean cancel = new AtomicBoolean( false );
57  
58          private boolean forceReindexing = false;
59  
60          public OntologyInitializationThread( boolean forceRefresh ) {
61              super();
62              this.forceReindexing = forceRefresh;
63          }
64  
65          public void cancel() {
66              this.cancel.set( true );
67              this.interrupt();
68          }
69  
70          public boolean isCancelled() {
71              return cancel.get();
72          }
73  
74          public boolean isForceReindexing() {
75              return forceReindexing;
76          }
77  
78          @Override
79          public void run() {
80  
81              terms = new HashMap<>();
82              individuals = new HashMap<>();
83  
84              if ( isCancelled() ) {
85                  log.warn( "Cancelled initialization" );
86                  return;
87              }
88  
89              log.info( "Loading ontology: " + getOntologyName() + " from " + getOntologyUrl() + " ..." );
90              StopWatch loadTime = new StopWatch();
91              loadTime.start();
92  
93              model = getModel(); // can take a while.
94              assert model != null;
95  
96              try {
97  
98                  //Checks if the current ontology has changed since it was last loaded.
99                  boolean changed = OntologyLoader.hasChanged( getOntologyName() );
100                 boolean indexExists = OntologyIndexer.getSubjectIndex( getOntologyName() ) != null;
101 
102                 /*
103                  * Indexing is slow, don't do it if we don't have to.
104                  */
105                 index( forceReindexing || changed || !indexExists );
106 
107                 indexReady.set( true );
108 
109                 if ( isCancelled() ) {
110                     log.error( "Cancelled initialization" );
111                     return;
112                 }
113 
114                 /*
115                  * This creates a cache of URI (String) --> OntologyTerms. ?? Does Jena provide an easier way to do
116                  * this?
117                  */
118 
119                 loadTermsInNameSpace( getOntologyUrl(), model );
120 
121                 cleanup();
122 
123                 cacheReady.set( true );
124 
125                 isInitialized.set( true );
126                 loadTime.stop();
127 
128                 log.info( "Finished loading " + getOntologyName() + " in " + String.format( "%.2f", loadTime.getTime() / 1000.0 )
129                         + "s" );
130 
131             } catch ( Exception e ) {
132                 log.error( e.getMessage(), e );
133                 isInitialized.set( false );
134             } finally {
135                 // no-op
136             }
137         }
138 
139         public void setForceReindexing( boolean forceReindexing ) {
140             this.forceReindexing = forceReindexing;
141         }
142 
143         private void cleanup() {
144             OntologyLoader.deleteOldCache( getOntologyName() );
145         }
146     }
147 
148     protected static Logger log = LoggerFactory.getLogger( AbstractOntologyService.class );
149 
150     protected AtomicBoolean cacheReady = new AtomicBoolean( false );
151 
152     protected SearchIndex index;
153 
154     protected AtomicBoolean indexReady = new AtomicBoolean( false );
155     protected Map<String, OntologyIndividual> individuals;
156 
157     protected OntologyInitializationThread initializationThread;
158     protected AtomicBoolean isInitialized = new AtomicBoolean( false );
159     protected OntModel model = null;
160 
161     protected AtomicBoolean modelReady = new AtomicBoolean( false );
162 
163     protected Map<String, OntologyTerm> terms = null;
164 
165     private Map<String, OntologyTerm> alternativeIDs = new HashMap<>();
166 
167     /**
168      *
169      */
170     public AbstractOntologyService() {
171         super();
172 
173         initializationThread = new OntologyInitializationThread( false );
174         initializationThread.setName( getOntologyName() + "_load_thread_" + RandomStringUtils.randomAlphanumeric( 5 ) );
175         // To prevent VM from waiting on this thread to shutdown (if shutting down).
176         initializationThread.setDaemon( true );
177 
178     }
179 
180     // private boolean enabled = false;
181 
182     /**
183      * Do not do this except before re-indexing.
184      */
185     public void closeIndex() {
186         if ( index == null ) return;
187         index.close();
188     }
189 
190     /**
191      * Looks for any OntologyIndividuals that match the given search string.
192      *
193      * @param  search
194      * @return
195      */
196     public Collection<OntologyIndividual> findIndividuals( String search ) throws OntologySearchException {
197 
198         if ( !isOntologyLoaded() ) return null;
199 
200         if ( index == null ) {
201             log.warn( "attempt to search " + this.getOntologyName() + " when index is null" );
202             return null;
203         }
204 
205         OntModel m = getModel();
206 
207         Collection<OntologyIndividual> indis = OntologySearch.matchIndividuals( m, index, search );
208 
209         return indis;
210     }
211 
212     /**
213      * Looks for any OntologyIndividuals or ontologyTerms that match the given search string
214      *
215      * @param  search
216      * @return        results, or an empty collection if the results are empty OR the ontology is not available to be
217      *                searched.
218      */
219     public Collection<OntologyResource> findResources( String searchString ) throws OntologySearchException {
220 
221         if ( !isOntologyLoaded() ) {
222             log.warn( "Ontology is not ready: " + this.getClass() );
223             return new HashSet<>();
224         }
225 
226         assert index != null : "attempt to search " + this.getOntologyName() + " when index is null";
227 
228         OntModel m = getModel();
229 
230         Collection<OntologyResource> results = OntologySearch.matchResources( m, index, searchString );
231 
232         return results;
233     }
234 
235     /**
236      * Looks for any ontologyTerms that match the given search string. Obsolete terms are filtered out.
237      *
238      * @param  search
239      * @return
240      */
241     public Collection<OntologyTerm> findTerm( String search ) throws OntologySearchException {
242 
243         if ( !isOntologyLoaded() ) return new HashSet<>();
244 
245         if ( log.isDebugEnabled() ) log.debug( "Searching " + this.getOntologyName() + " for '" + search + "'" );
246 
247         assert index != null : "attempt to search " + this.getOntologyName() + " when index is null";
248 
249         OntModel m = getModel();
250 
251         Collection<OntologyTerm> matches = OntologySearch.matchClasses( m, index, search );
252 
253         return matches;
254     }
255 
256     public OntologyTerm findUsingAlternativeId( String alternativeId ) {
257 
258         if ( alternativeIDs.isEmpty() ) {
259             log.info( "init search by alternativeID" );
260             initSearchByAlternativeId();
261         }
262 
263         if ( alternativeIDs.get( alternativeId ) != null ) {
264             return alternativeIDs.get( alternativeId );
265         }
266 
267         return null;
268     }
269 
270     public Set<String> getAllURIs() {
271         if ( terms == null ) return null;
272         return new HashSet<>( terms.keySet() );
273     }
274 
275     /**
276      * Looks through both Terms and Individuals for a OntologyResource that has a uri matching the uri given. If no
277      * OntologyTerm is found only then will ontologyIndividuals be searched. returns null if nothing is found.
278      *
279      * @param  uri
280      * @return
281      */
282     public OntologyResource getResource( String uri ) {
283 
284         if ( ( uri == null ) || ( !isInitialized.get() ) ) return null;
285 
286         OntologyResource resource = terms.get( uri );
287 
288         if ( resource == null ) resource = individuals.get( uri );
289 
290         return resource;
291     }
292 
293     /**
294      * Looks for a OntologyTerm that has the match in URI given
295      *
296      * @param  uri
297      * @return
298      */
299     public OntologyTerm getTerm( String uri ) {
300 
301         if ( !isInitialized.get() || terms == null ) return null;
302 
303         if ( uri == null ) throw new IllegalArgumentException( "URI cannot be null" );
304 
305         OntologyTerm term = terms.get( uri );
306 
307         return term;
308     }
309 
310     /**
311      * @param  uri
312      * @return
313      */
314     public Collection<OntologyIndividual> getTermIndividuals( String uri ) {
315 
316         if ( terms == null ) {
317             log.warn( "No term for URI=" + uri + " in " + this.getOntologyName()
318                     + " no terms loaded; make sure ontology is loaded and uri is valid" );
319             return new HashSet<>();
320         }
321 
322         OntologyTerm term = terms.get( uri );
323         if ( term == null ) {
324             /*
325              * Either the ontology hasn't been loaded, or the id was not valid.
326              */
327             log.warn( "No term for URI=" + uri + " in " + this.getOntologyName()
328                     + "; make sure ontology is loaded and uri is valid" );
329             return new HashSet<>();
330         }
331         return term.getIndividuals( true );
332 
333     }
334 
335     /**
336      * Create the search index.
337      *
338      * @param force
339      */
340     public void index( boolean force ) {
341         StopWatch timer = new StopWatch();
342         timer.start();
343         OntModel m = getModel();
344         assert m != null;
345 
346         index = OntologyIndexer.indexOntology( getOntologyName(), m, force );
347 
348     }
349 
350     /**
351      * @return
352      */
353     public boolean isEnabled() {
354         if ( isOntologyLoaded() ) return true; // could have forced, without setting config
355         String configParameter = "load." + getOntologyName();
356         return Configuration.getBoolean( configParameter );
357     }
358 
359     public boolean isInitializationThreadAlive() {
360         return initializationThread.isAlive();
361     }
362 
363     /**
364      * Used for determining if the Ontology has finished loading into memory. Although calls like getParents,
365      * getChildren will still work (its much faster once the ontologies have been preloaded into memory.)
366      *
367      * @returns boolean
368      */
369     public boolean isOntologyLoaded() {
370         return isInitialized.get();
371     }
372 
373     /**
374      * 
375      * @param forceLoad
376      * @param forceIndexing If forceLoad is also true, indexing will be performed. If you know the index is
377      *                      up to date, there's no need to do it again. Normally indexing is only done if there is no
378      *                      index, or if the ontology has changed since last loaded.
379      */
380     public void startInitializationThread( boolean forceLoad, boolean forceIndexing ) {
381         assert initializationThread != null;
382         synchronized ( initializationThread ) {
383             if ( initializationThread.isAlive() ) {
384                 log.warn( getOntologyName() + " initialization is already running, not restarting." );
385                 return;
386             } else if ( initializationThread.isInterrupted() ) {
387                 log.warn( getOntologyName() + " initialization was interrupted, not restarting." );
388                 return;
389             } else if ( !initializationThread.getState().equals( State.NEW ) ) {
390                 log.warn( getOntologyName() + " initialization was not ready to run: state="
391                         + initializationThread.getState() + ", not restarting." );
392                 return;
393             }
394 
395             if ( !forceLoad && this.isOntologyLoaded() ) {
396                 log.warn( getOntologyName() + " is already loaded, and force=false, not restarting" );
397                 return;
398             }
399 
400             boolean loadOntology = isEnabled();
401 
402             // If loading ontologies is disabled in the configuration, return
403             if ( !forceLoad && !loadOntology ) {
404                 log.debug( "Loading " + getOntologyName() + " is disabled (force=" + forceLoad + ", "
405                         + "Configuration load." + getOntologyName() + "=" + loadOntology + ")" );
406                 return;
407             }
408 
409             // Detect configuration problems.
410             if ( StringUtils.isBlank( this.getOntologyUrl() ) ) {
411                 throw new IllegalStateException( "URL not defined, ontology cannot be loaded ("
412                         + this.getClass().getSimpleName() + ")" );
413             }
414 
415             // This thread indexes ontology and creates local cache for uri->ontology terms mappings.
416             if ( !forceIndexing ) {
417                 log.info( getOntologyName() + " index will *not* be refreshed unless the ontology "
418                         + "has changed or the index is misssing" );
419             }
420             initializationThread.setForceReindexing( forceLoad && forceIndexing );
421             initializationThread.start();
422         }
423     }
424 
425     /**
426      * @param newTerms
427      */
428     protected void addTerms( Collection<OntologyResource> newTerms ) {
429 
430         if ( newTerms == null || newTerms.isEmpty() ) {
431             log.warn( "No terms!" );
432             return;
433         }
434 
435         if ( terms == null ) terms = new HashMap<>();
436         if ( individuals == null ) individuals = new HashMap<>();
437 
438         int i = 0;
439         for ( OntologyResource term : newTerms ) {
440             if ( term.getUri() == null ) continue;
441             if ( term instanceof OntologyTerm ) terms.put( term.getUri(), ( OntologyTerm ) term );
442             if ( term instanceof OntologyIndividual ) individuals.put( term.getUri(), ( OntologyIndividual ) term );
443 
444             if ( ++i % 1000 == 0 && initializationThread.isCancelled() ) {
445                 log.error( "Cancelled initialization" );
446                 this.isInitialized.set( false );
447                 return;
448             }
449         }
450     }
451 
452     protected synchronized OntModel getModel() {
453         if ( model == null ) {
454             model = loadModel();
455         }
456         return model;
457     }
458 
459     /**
460      * The simple name of the ontology. Used for indexing purposes. (ie this will determine the name of the underlying
461      * index for searching the ontology)
462      *
463      * @return
464      */
465     protected abstract String getOntologyName();
466 
467     /**
468      * Defines the location of the ontology eg: http://mged.sourceforge.net/ontologies/MGEDOntology.owl
469      *
470      * @return
471      */
472     protected abstract String getOntologyUrl();
473 
474     /**
475      * Delegates the call as to load the model into memory or leave it on disk. Simply delegates to either
476      * OntologyLoader.loadMemoryModel( url ); OR OntologyLoader.loadPersistentModel( url, spec );
477      *
478      * @param  url
479      * @return
480      * @throws IOException
481      */
482     protected abstract OntModel loadModel();
483 
484     /**
485      * @param  url
486      * @param  m
487      * @throws IOException
488      */
489     protected void loadTermsInNameSpace( String url, OntModel m ) {
490         Collection<OntologyResource> t = OntologyLoader.initialize( url, m );
491         addTerms( t );
492     }
493 
494     /*
495      * this add alternative id in 2 ways
496      *
497      * Example :
498      *
499      * http://purl.obolibrary.org/obo/HP_0000005 with alternative id : HP:0001453
500      *
501      * by default way use in file 1- HP:0001453 -----> http://purl.obolibrary.org/obo/HP_0000005
502      *
503      * trying to use the value uri 2- http://purl.obolibrary.org/obo/HP_0001453 ----->
504      * http://purl.obolibrary.org/obo/HP_0000005
505      */
506     private void initSearchByAlternativeId() {
507 
508         // lets find the baseUrl, to change to valueUri
509         String randomUri = terms.values().iterator().next().getUri();
510         String baseOntologyUri = randomUri.substring( 0, randomUri.lastIndexOf( "/" ) + 1 );
511 
512         // for all Ontology terms that exist in the tree
513         for ( OntologyTerm ontologyTerm : terms.values() ) {
514 
515             for ( String alternativeId : ontologyTerm.getAlternativeIds() ) {
516                 // first way
517                 alternativeIDs.put( alternativeId, ontologyTerm );
518 
519                 String alternativeIdModified = alternativeId.replace( ':', '_' );
520 
521                 // second way
522                 alternativeIDs.put( baseOntologyUri + alternativeIdModified, ontologyTerm );
523             }
524         }
525     }
526 
527 }