1 package ubic.basecode.ontology.ncbo;
2
3 import org.apache.commons.lang3.StringUtils;
4 import org.apache.commons.lang3.exception.ExceptionUtils;
5 import org.slf4j.Logger;
6 import org.slf4j.LoggerFactory;
7 import org.w3c.dom.Document;
8 import org.w3c.dom.Element;
9 import org.w3c.dom.Node;
10 import org.w3c.dom.NodeList;
11 import org.xml.sax.SAXException;
12 import ubic.basecode.util.Configuration;
13
14 import javax.xml.parsers.DocumentBuilder;
15 import javax.xml.parsers.DocumentBuilderFactory;
16 import javax.xml.parsers.ParserConfigurationException;
17 import java.io.IOException;
18 import java.io.InputStream;
19 import java.net.ConnectException;
20 import java.net.URL;
21 import java.util.Collection;
22 import java.util.TreeSet;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26
27
28
29
30 public class AnnotatorClient {
31
32
33 public static final String HP_ONTOLOGY = "HP";
34 public static final String DOID_ONTOLOGY = "DOID";
35
36 private final static int MAX_TRIES = 3;
37
38 private static Logger log = LoggerFactory.getLogger( AnnotatorClient.class );
39
40
41 private static String API_KEY = Configuration.getString( "ncbo.api.key" );
42
43 private static String ANNOTATOR_URL = "http://data.bioontology.org/annotator?";
44
45
46 private static String ontologies = HP_ONTOLOGY + "," + DOID_ONTOLOGY;
47
48
49
50
51
52
53
54
55
56
57 public static Collection<AnnotatorResponse> findTerm( String term )
58 throws IOException, ParserConfigurationException, IllegalStateException, SAXException {
59 if ( StringUtils.isBlank( API_KEY ) ) {
60 throw new IllegalStateException( "NCBO ncbo.api.key needs to be configured" );
61 }
62
63 Collection<AnnotatorResponse> responsesFound = new TreeSet<>();
64
65 String termClean = removeSpecialCharacters( term );
66
67 if ( StringUtils.isBlank( termClean ) ) return responsesFound;
68
69 String url = ANNOTATOR_URL + "apikey=" + API_KEY + "&max_level=0&ontologies=" + ontologies
70 + "&format=xml&text=" + termClean;
71
72 if ( log.isDebugEnabled() ) log.debug( "request url: " + url );
73
74 int tries = 0;
75
76 InputStream response = null;
77 while ( response == null && tries < MAX_TRIES ) {
78 try {
79 response = new URL( url ).openStream();
80 } catch ( IOException e ) {
81 try {
82 Thread.sleep( 10000 );
83 } catch ( InterruptedException e1 ) {
84
85 }
86 tries++;
87 }
88 }
89
90 if ( response == null ) {
91 log.warn( "Failed to get a response for " + url + " (original query=" + term + ")" );
92 return responsesFound;
93 }
94
95 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
96 DocumentBuilder builder = factory.newDocumentBuilder();
97 Document document = builder.parse( response );
98 NodeList nodes = document.getElementsByTagName( "annotation" );
99
100
101 for ( int temp = 0; temp < nodes.getLength(); temp++ ) {
102
103 Node nNode = nodes.item( temp );
104 Element eElement = ( Element ) nNode;
105
106
107 String valueUri = eElement.getElementsByTagName( "id" ).item( 0 ).getTextContent();
108
109
110 for ( int i = 0; i < eElement.getElementsByTagName( "annotations" ).getLength(); i++ ) {
111
112 Element infoE = ( Element ) eElement.getElementsByTagName( "annotations" ).item( i );
113
114 String matchType = infoE.getElementsByTagName( "matchType" ).item( 0 ).getTextContent();
115 String txtMatched = infoE.getElementsByTagName( "text" ).item( 0 ).getTextContent();
116 String ontologyUsed = findOntologyUsed( valueUri );
117
118 Integer from = new Integer( infoE.getElementsByTagName( "from" ).item( 0 ).getTextContent() );
119 Integer to = new Integer( infoE.getElementsByTagName( "to" ).item( 0 ).getTextContent() );
120
121 AnnotatorResponse annotatorResponse = new AnnotatorResponse( valueUri, matchType, txtMatched, from, to,
122 ontologyUsed, termClean );
123
124 responsesFound.add( annotatorResponse );
125 }
126 }
127
128 return responsesFound;
129 }
130
131
132
133
134
135
136
137 private static String findOntologyUsed( String url ) {
138
139 if ( url.indexOf( HP_ONTOLOGY ) != -1 ) {
140 return HP_ONTOLOGY;
141 } else if ( url.indexOf( DOID_ONTOLOGY ) != -1 ) {
142 return DOID_ONTOLOGY;
143 }
144
145 return "UNKNOWN";
146
147 }
148
149
150
151
152
153
154
155
156 public static String findLabelForIdentifier( String ontologyId, String identifier ) {
157
158 if ( StringUtils.isBlank( API_KEY ) ) {
159 throw new IllegalStateException( "NCBO ncbo.api.key needs to be configured" );
160 }
161
162
163
164
165
166
167
168
169
170
171
172
173 String url;
174
175
176 switch ( ontologyId ) {
177 case "OMIM":
178 case "MESH":
179 url = "http://data.bioontology.org/ontologies/" + ontologyId
180 + "/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FMESH%2F"
181 + identifier
182 + "/?apikey=" + API_KEY + "&format=xml";
183 break;
184 case "DOID":
185 url = "http://data.bioontology.org/ontologies/" + ontologyId + "/classes/http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F"
186 + identifier
187 + "/?apikey=" + API_KEY + "&format=xml";
188 break;
189 default:
190 throw new IllegalArgumentException( "Don't know how to deal with " + ontologyId );
191 }
192
193 log.debug( url );
194
195 for ( int i = 0; i < MAX_TRIES; i++ ) {
196 try ( InputStream response = new URL( url ).openStream() ) {
197 return findLabel( response );
198 } catch ( ConnectException ce ) {
199 try {
200 Thread.sleep( 500 );
201 } catch ( InterruptedException e ) {
202 }
203 } catch ( Exception e ) {
204 log.error( "Identifier: '" + identifier + "'" );
205 log.error( ExceptionUtils.getStackTrace( e ) );
206 }
207 }
208 return null;
209 }
210
211
212
213
214 private static String findLabel( InputStream response ) throws Exception {
215 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
216 DocumentBuilder builder = factory.newDocumentBuilder();
217 try ( InputStream content = response ) {
218 Document document = builder.parse( content );
219 NodeList nodes = document.getElementsByTagName( "prefLabel" );
220 if ( nodes == null ) {
221 log.debug( "No definition found" );
222 return null;
223 }
224 String labelName = ( ( Element ) nodes.item( 0 ) ).getTextContent();
225
226 return labelName;
227 }
228 }
229
230
231 public static String removeSpecialCharacters( String txt ) {
232
233 String simpleTxt = txt.trim();
234
235
236 int index1 = simpleTxt.indexOf( "(" );
237 int index2 = simpleTxt.indexOf( ")" );
238 if ( index1 != -1 && index2 != -1 ) {
239 simpleTxt = simpleTxt.substring( 0, index1 ) + simpleTxt.substring( index2 + 1, simpleTxt.length() );
240 }
241
242
243 Pattern pt = Pattern.compile( "[^\\w\\s-,]+" );
244 Matcher match = pt.matcher( simpleTxt );
245 while ( match.find() ) {
246 String s = match.group();
247 simpleTxt = simpleTxt.replaceAll( "\\" + s, "" );
248 }
249
250 return simpleTxt.trim().replaceAll( "\\s+", "+" );
251 }
252
253 }