24 | | private CrawledMetadata crawledMetadata = null; |
25 | | |
26 | | public String getEndpointURI(){ |
27 | | return crawledMetadata.getEndpointURI(); |
28 | | } |
29 | | |
30 | | public String[] getGraphURIs(){ |
31 | | return crawledMetadata.getGraphURIs(); |
32 | | } |
33 | | |
34 | | /* |
35 | | public static void main(String[] args) throws Exception{ |
36 | | |
37 | | JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\biosamplesF11.ttl"); |
38 | | // JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\reactomeF18s.ttl"); |
39 | | AcquiredStructureAnalyzer impl |
40 | | = new AcquiredStructureAnalyzer(jmGene.getEndpointURI(), jmGene.getGraphURIs(), jmGene.getModel()); |
41 | | |
42 | | System.out.println("--------------------------"); |
43 | | SClass[] scs = impl.getOWLClasses(null, null, null, true); |
44 | | System.out.println("list classes:---------------"); |
45 | | for(SClass sc: scs){ |
46 | | System.out.println(sc.toString()); |
47 | | } |
48 | | System.out.println("--------------------------"); |
49 | | |
50 | | // ClassLink[] cls = impl.getNextClass(null,"http://www.biopax.org/release/biopax-level3.owl#Protein",100,true ); |
51 | | // for(ClassLink cl: cls){ |
52 | | // System.out.println(cl.toString()); |
53 | | // } |
54 | | // System.out.println("--------------------------"); |
55 | | |
56 | | } |
57 | | */ |
58 | | |
59 | | public AcquiredStructureAnalyzer(CrawledMetadata crawledMetadata){ |
60 | | this.crawledMetadata = crawledMetadata; |
61 | | } |
62 | | |
63 | | private String filterGraphURIs(String orgGraphURIs){ |
64 | | // TODO |
65 | | return orgGraphURIs; |
66 | | } |
67 | | |
68 | | |
69 | | public SClass[] listClasses(String graphURI, boolean countInstances) throws Exception{ |
70 | | return getOWLClasses(graphURI, null, null, countInstances); |
71 | | } |
72 | | |
73 | | |
74 | | |
75 | | |
76 | | public SClass[] getOWLClasses(String graphURI, String[] keywords, String language, boolean countInstances) throws Exception{ |
77 | | return getOWLClassList(graphURI, keywords, language, countInstances).toArray(new SClass[0]); |
78 | | } |
79 | | |
80 | | public List<SClass> getOWLClassList(String graphURI, String[] keywords, String language, boolean countInstances) throws Exception{ |
81 | | String targetGraphURI = filterGraphURIs(graphURI); |
82 | | |
83 | | Dataset dataset = crawledMetadata.getDataset(targetGraphURI); |
84 | | |
85 | | |
86 | | StringBuffer queryStr = new StringBuffer(); |
87 | | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n"); |
88 | | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"); |
89 | | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"); |
90 | | queryStr.append("SELECT DISTINCT ?c ?pLabel ?entities\n"); |
91 | | if (targetGraphURI != null) { |
92 | | queryStr.append("FROM <"); |
93 | | queryStr.append(targetGraphURI); |
94 | | queryStr.append(">\n"); |
95 | | } |
96 | | queryStr.append("WHERE{\n"); |
97 | | queryStr.append(" ?cp <").append(URICollection.PROPERTY_VOID_CLASS).append("> ?c. \n"); |
98 | | queryStr.append(" ?cp <"); |
99 | | queryStr.append(URICollection.PROPERTY_VOID_ENTITIES); |
100 | | queryStr.append("> ?entities. \n"); |
101 | | queryStr.append(" OPTIONAL{ ?c <"); |
102 | | queryStr.append(URICollection.PROPERTY_RDFS_LABEL); |
103 | | queryStr.append("> ?pLabel. }\n"); |
104 | | |
105 | | if (keywords != null && keywords.length != 0) { |
106 | | |
107 | | queryStr.append(" ?c rdfs:label "); |
108 | | queryStr.append("?keywords").append(".\n"); |
109 | | queryStr.append(" filter((LANG(?keywords) = \'").append(language); |
110 | | queryStr.append("\') && \n ("); |
111 | | |
112 | | for (int i = 0; i < keywords.length; i++) { |
113 | | if (i > 0) |
114 | | queryStr.append(" || \n "); |
115 | | |
116 | | queryStr.append("regex(str(").append("?keywords") |
117 | | .append("),\""); |
118 | | queryStr.append(keywords[i]); |
119 | | queryStr.append("\", \"i\" )"); |
120 | | |
121 | | } |
122 | | queryStr.append("))\n"); |
123 | | |
124 | | } |
125 | | queryStr.append("}"); |
126 | | System.out.println(queryStr.toString()); |
127 | | |
128 | | Query query = QueryFactory.create(queryStr.toString()); |
129 | | |
130 | | QueryExecution qexec = null; |
131 | | ResultSet results = null; |
132 | | try { |
133 | | // long start = System.currentTimeMillis(); |
134 | | qexec = QueryExecutionFactory.create(query, dataset.getModel()); |
135 | | results = qexec.execSelect(); |
136 | | // long end = System.currentTimeMillis(); |
137 | | // System.out.println("EXEC TIME: " + (end - start)); |
138 | | } catch (Exception ex) { |
139 | | ex.printStackTrace(); |
140 | | throw ex; |
141 | | } |
142 | | |
143 | | HashMap<String, SClass> classMap = new HashMap<String, SClass>(); |
144 | | for (; results.hasNext();) { |
145 | | QuerySolution sol = results.next(); |
146 | | Resource res = sol.getResource("c"); |
147 | | if (res != null && res.getURI() != null) { |
148 | | String uri = res.getURI(); |
149 | | int numOfInstances = 0; |
150 | | if (countInstances) { |
151 | | numOfInstances = sol.getLiteral("entities").getInt(); |
152 | | } // |
153 | | Literal labelLiteral = sol.getLiteral("pLabel"); |
154 | | SClass sClass = null; |
155 | | if (classMap.containsKey(uri)) { |
156 | | sClass = classMap.get(uri); |
157 | | } else { |
158 | | sClass = new SClass(uri, null, numOfInstances); |
159 | | classMap.put(uri, sClass); |
160 | | } |
161 | | if (labelLiteral != null) { |
162 | | String label = labelLiteral.getString(); |
163 | | String lang = labelLiteral.getLanguage(); |
164 | | sClass.addLabel(new Label(label, lang)); |
165 | | } |
166 | | } |
167 | | } |
168 | | qexec.close(); |
169 | | return new LinkedList<SClass>(classMap.values()); |
170 | | } |
| 30 | // key: endpointURI, value: crawled metadata |
| 31 | private HashMap<String, CrawledMetadata> crawledMetadataTable= null; |
| 32 | |
| 33 | public String[] getEndpointURIs(){ |
| 34 | if( crawledMetadataTable == null ){ |
| 35 | return new String[0]; |
| 36 | }else{ |
| 37 | return crawledMetadataTable.keySet().toArray(new String[0]); |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | public String[] getGraphURIs(String endpointURI){ |
| 42 | if( crawledMetadataTable == null ){ |
| 43 | return new String[0]; |
| 44 | }else{ |
| 45 | CrawledMetadata crawledMetadata = crawledMetadataTable.get(endpointURI); |
| 46 | if( crawledMetadata == null ){ |
| 47 | return new String[0]; |
| 48 | }else{ |
| 49 | return crawledMetadata.getGraphURIs(); |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | |
| 55 | public AcquiredStructureAnalyzer(MetadataManager metadataManager){ |
| 56 | CrawledMetadata[] cmList = metadataManager.getCrawlerMetadataList(); |
| 57 | crawledMetadataTable = new HashMap<String, CrawledMetadata>(); |
| 58 | |
| 59 | if( cmList != null ){ |
| 60 | for(CrawledMetadata cm: cmList){ |
| 61 | String endpointURI = cm.getEndpointURI(); |
| 62 | crawledMetadataTable.put(endpointURI, cm); |
| 63 | } |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | |
| 68 | |
| 69 | public SClass[] listClasses() throws Exception{ |
| 70 | return getOWLClasses(null, null); |
| 71 | } |
| 72 | |
| 73 | |
| 74 | public SClass[] getOWLClasses(String[] keywords, String language) throws Exception{ |
| 75 | return getOWLClassList(keywords, language).toArray(new SClass[0]); |
| 76 | } |
| 77 | |
| 78 | |
| 79 | public List<SClass> getOWLClassList(String[] keywords, String language) throws Exception{ |
| 80 | ArrayList<SClass> classList = new ArrayList<SClass>(); |
| 81 | |
| 82 | Set<String> endpointURISet = crawledMetadataTable.keySet(); |
| 83 | for(String endpointURI: endpointURISet){ |
| 84 | CrawledMetadata cm = crawledMetadataTable.get(endpointURI); |
| 85 | // default |
| 86 | Dataset dataset = cm.getDefaultDataset(); |
| 87 | List<SClass> tempClassList = getOWLClassList(dataset, keywords, language); |
| 88 | for(SClass sClass: tempClassList){ |
| 89 | classList.add(sClass); |
| 90 | } |
| 91 | |
| 92 | // graphs |
| 93 | String[] graphURIs = cm.getGraphURIs(); |
| 94 | if( graphURIs != null ){ |
| 95 | for(String graphURI: graphURIs){ |
| 96 | dataset = cm.getDataset(graphURI); |
| 97 | tempClassList = getOWLClassList(dataset, keywords, language); |
| 98 | for(SClass sClass: tempClassList){ |
| 99 | classList.add(sClass); |
| 100 | } |
| 101 | } |
| 102 | } |
| 103 | } |
| 104 | return classList; |
| 105 | } |
| 106 | |
| 107 | private List<SClass> getOWLClassList(Dataset dataset, String[] keywords, String language) throws Exception{ |
| 108 | ArrayList<SClass> results = new ArrayList<SClass>(); |
| 109 | |
| 110 | ClassPartition[] classPartitionList = dataset.getClassPartitions(); |
| 111 | if( classPartitionList == null || classPartitionList.length == 0 ){ |
| 112 | return new ArrayList<SClass>(); |
| 113 | } |
| 114 | for( ClassPartition cp: classPartitionList){ |
| 115 | String classURI = cp.classDef.classURI; |
| 116 | Label[] rLabels = cp.classDef.labels; |
| 117 | org.biohackathon.SPARQLBuilder.OWL.Label[] labels = null; |
| 118 | if( rLabels == null ){ |
| 119 | labels = new org.biohackathon.SPARQLBuilder.OWL.Label[0]; |
| 120 | }else{ |
| 121 | labels = new org.biohackathon.SPARQLBuilder.OWL.Label[rLabels.length]; |
| 122 | for(int i = 0; i < rLabels.length; i++ ) { |
| 123 | labels[i] = new org.biohackathon.SPARQLBuilder.OWL.Label(rLabels[i].value, rLabels[i].language); |
| 124 | } |
| 125 | } |
| 126 | int entities = cp.entities; |
| 127 | |
| 128 | if( keywords == null || keywords.length == 0 ){ |
| 129 | SClass sClass = new SClass(classURI, labels, entities); |
| 130 | results.add(sClass); |
| 131 | }else{ |
| 132 | boolean hit = false; |
| 133 | for(org.biohackathon.SPARQLBuilder.OWL.Label label: labels){ |
| 134 | if( language == null || label.getLanguage().equals(language)){ |
| 135 | String value = label.getLabel(); |
| 136 | if( value != null ){ |
| 137 | value = value.toLowerCase().trim(); |
| 138 | for(String keyword: keywords){ |
| 139 | if( value.contains(keyword.toLowerCase().trim())){ |
| 140 | hit = true; |
| 141 | break; |
| 142 | } |
| 143 | } |
| 144 | } |
| 145 | } |
| 146 | if( hit ){ |
| 147 | SClass sClass = new SClass(classURI, labels, entities); |
| 148 | results.add(sClass); |
| 149 | } |
| 150 | } |
| 151 | } |
| 152 | } |
| 153 | return results; |
| 154 | } |
172 | | /* |
173 | | |
174 | | public Instance[] getInstances(String[] graphURIs, String keyword) throws Exception; |
175 | | */ |
176 | | |
177 | | |
178 | | public ClassLink[] getNextClass(String graphURI, String originClass, int limit, boolean countLinks) throws Exception{ |
179 | | String targetGraphURI = filterGraphURIs(graphURI); |
180 | | Dataset dataset = crawledMetadata.getDataset(targetGraphURI); |
181 | | |
182 | | |
183 | | StringBuffer queryStr = new StringBuffer(); |
184 | | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n"); |
185 | | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"); |
186 | | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"); |
187 | | |
188 | | // SELECT |
189 | | queryStr.append("SELECT DISTINCT ?indPropCat ?c ?dat ?d ?p ?numLnkInsStart ?numLnkInsEnd ?numInsDom ?numInsRan ?numTriples\n"); |
190 | | |
191 | | if (targetGraphURI != null) { |
192 | | queryStr.append("FROM <"); |
193 | | queryStr.append(targetGraphURI); |
194 | | queryStr.append(">\n"); |
195 | | } |
196 | | |
197 | | queryStr.append("WHERE{\n"); |
198 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_RDF_TYPE).append("> <").append(URICollection.RESOURCE_SB_CLASS_RELATION).append(">. \n"); |
199 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_SB_CLASS_RELATION).append("> ?cr. \n"); |
200 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_PROPERTY).append("> ?p. \n"); |
201 | | |
202 | | queryStr.append(" {"); |
203 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> <" + originClass + ">. \n"); |
204 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_CLASS).append("> ?d. \n"); |
205 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsStart. \n"); |
206 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsEnd. \n"); |
207 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); |
208 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/startClassLimitedQ> ?isStartClsLim. \n"); |
209 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/endClassLimitedQ> ?isEndClsLim. \n"); |
210 | | queryStr.append("}\n"); |
211 | | queryStr.append(" UNION\n"); |
212 | | queryStr.append(" {"); |
213 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> <" + originClass + ">. \n"); |
214 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_DATATYPE).append("> ?dat. \n"); |
215 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsStart. \n"); |
216 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsEnd. \n"); |
217 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); |
218 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/startClassLimitedQ> ?isStartClsLim. \n"); |
219 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/endClassLimitedQ> ?isEndClsLim. \n"); |
220 | | queryStr.append("}\n"); |
221 | | queryStr.append(" UNION\n"); |
222 | | queryStr.append(" {"); |
223 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_CLASS).append("> <" + originClass + ">. \n"); |
224 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> ?c. \n"); |
225 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsEnd. \n"); |
226 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsStart. \n"); |
227 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); |
228 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/startClassLimitedQ> ?isEndClsLim. \n"); |
229 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/endClassLimitedQ> ?isStartClsLim. \n"); |
230 | | queryStr.append("}\n"); |
231 | | |
232 | | // queryStr.append(" ?propPart <").append(URICollection.PROPERTY_RDF_TYPE).append("> <").append(URICollection.PROPERTY_VOID_PROPERTY_PARTITION).append(">. \n"); |
233 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_SB_PROPERTY_CATEGORY).append("> ?indPropCat. \n"); |
234 | | |
235 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numInsDom. \n"); |
236 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numInsRan. \n"); |
237 | | |
238 | | queryStr.append("}\n"); |
239 | | |
240 | | |
241 | | if (limit > 0) { |
242 | | queryStr.append("limit "); |
243 | | queryStr.append(limit); |
244 | | queryStr.append("\n"); |
245 | | } |
246 | | |
247 | | // System.out.println("getNextClasses SPARQL Query: "); |
248 | | // System.out.println(queryStr.toString()); |
249 | | |
250 | | Query query = QueryFactory.create(queryStr.toString()); |
251 | | QueryExecution qexec = null; |
252 | | ResultSet results = null; |
253 | | try { |
254 | | long start = System.currentTimeMillis(); |
255 | | qexec = QueryExecutionFactory.create(query, dataset.getModel()); |
256 | | results = qexec.execSelect(); |
257 | | long end = System.currentTimeMillis(); |
258 | | System.out.println("EXEC TIME: " + (end - start)); |
259 | | } catch (Exception ex) { |
260 | | ex.printStackTrace(); |
261 | | throw ex; |
262 | | } |
263 | | |
264 | | ArrayList<ClassLink> solCLs = new ArrayList<ClassLink>(); |
265 | | for (; results.hasNext();) { |
266 | | QuerySolution sol = results.next(); |
267 | | Resource pro = sol.getResource("p"); |
268 | | String clsURI = null; |
269 | | String datURI = null; |
270 | | if (pro != null) { |
271 | | int indPropCat = 4; |
272 | | Literal indPropCatLit = sol.getLiteral("indPropCat"); |
273 | | if( indPropCatLit != null ){ |
274 | | indPropCat = indPropCatLit.getInt(); |
275 | | } |
276 | | if( indPropCat < 4 ) { |
277 | | String proURI = pro.getURI(); |
278 | | Resource ccls = sol.getResource("c"); |
279 | | Resource dcls = sol.getResource("d"); |
280 | | Resource dat = sol.getResource("dat"); |
281 | | Direction direction = null; |
282 | | if(ccls != null && dcls == null ){ |
283 | | // direction forward |
284 | | direction = Direction.reverse; |
285 | | clsURI = ccls.getURI(); |
286 | | }else{ |
287 | | if( ccls == null && dcls != null ){ |
288 | | direction = Direction.forward; |
289 | | clsURI = dcls.getURI(); |
290 | | }else{ |
291 | | if( ccls == null && dat != null && dcls == null ){ |
292 | | clsURI = null; |
293 | | direction = Direction.forward; |
294 | | datURI = dat.getURI(); |
295 | | } |
296 | | } |
297 | | } |
298 | | |
299 | | |
300 | | int numTriples = 0; |
301 | | Literal numTriplesLit = sol.getLiteral("numTriples"); |
302 | | if( numTriplesLit != null ){ |
303 | | numTriples = numTriplesLit.getInt(); |
304 | | } |
305 | | |
306 | | int numLnkInsStart = 0; |
307 | | Literal numInsStartLit = sol.getLiteral("numLnkInsStart"); |
308 | | if( numInsStartLit != null ){ |
309 | | numLnkInsStart = numInsStartLit.getInt(); |
310 | | } |
311 | | int numLnkInsEnd = 0; |
312 | | Literal numInsEndLit = sol.getLiteral("numLnkInsEnd"); |
313 | | if( numInsEndLit != null ){ |
314 | | numLnkInsEnd = numInsEndLit.getInt(); |
315 | | } |
316 | | |
317 | | int numInsDom = 0; |
318 | | Literal numInsDomLit = sol.getLiteral("numInsDom"); |
319 | | if( numInsDomLit != null ){ |
320 | | numInsDom = numInsDomLit.getInt(); |
321 | | } |
322 | | int numInsRan = 0; |
323 | | Literal numInsRanLit = sol.getLiteral("numInsRan"); |
324 | | if( numInsRanLit != null ){ |
325 | | numInsRan = numInsRanLit.getInt(); |
326 | | } |
327 | | |
328 | | boolean isStartClsLim = false; |
329 | | Literal isStartClsLimLit = sol.getLiteral("isStartClsLim"); |
330 | | if( isStartClsLimLit != null ){ |
331 | | isStartClsLim = isStartClsLimLit.getBoolean(); |
332 | | } |
333 | | boolean isEndClsLim = false; |
334 | | Literal isEndClsLimLit = sol.getLiteral("isEndClsLim"); |
335 | | if( isEndClsLimLit != null ){ |
336 | | isEndClsLim = isEndClsLimLit.getBoolean(); |
337 | | } |
338 | | |
339 | | ClassLink cl = new ClassLink(proURI, clsURI, datURI, direction, |
340 | | numTriples, numInsDom, numInsRan, numLnkInsStart, numLnkInsEnd, isStartClsLim, isEndClsLim); |
341 | | solCLs.add(cl); |
342 | | |
343 | | } |
344 | | } |
345 | | } |
346 | | qexec.close(); |
347 | | return solCLs.toArray(new ClassLink[0]); |
348 | | } |
349 | | |
350 | | |
351 | | |
352 | | /* |
353 | | |
354 | | public ClassLink[] getNextClassViaInstanceLink(String[] graphURIs, String originClass, int limit) throws Exception; |
355 | | |
356 | | public Path[] getPaths(String startClass, String endClass, int mode, boolean countLinks) throws Exception; |
357 | | |
358 | | public String createSPARQL(Path path) throws Exception; |
359 | | |
360 | | InstanceLink[] getNextInstancesViaInstanceLink(String[] graphURIs, String originInstance, |
361 | | int limit) throws Exception; |
362 | | */ |
363 | | |
364 | | public LabelMap[] getLabels(String graphURI, String[] resourceURIs, |
365 | | String language) throws Exception { |
366 | | // if (resourceURI == null || resourceURIs.length == 0) { |
367 | | // return new LabelMap[0]; |
368 | | // } |
369 | | |
370 | | String targetGraphURI = filterGraphURIs(graphURI); |
371 | | Dataset dataset = crawledMetadata.getDataset(targetGraphURI); |
372 | | |
373 | | |
374 | | StringBuffer queryStr = new StringBuffer(); |
375 | | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n"); |
376 | | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"); |
377 | | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"); |
378 | | queryStr.append("SELECT DISTINCT ?res ?label \n"); |
379 | | if (targetGraphURI != null) { |
380 | | queryStr.append("FROM <"); |
381 | | queryStr.append(targetGraphURI); |
382 | | queryStr.append(">\n"); |
383 | | } |
384 | | queryStr.append("WHERE{\n"); |
385 | | queryStr.append(" ?res rdfs:label ?label.\n"); |
386 | | queryStr.append(" FILTER(?res IN ("); |
387 | | boolean f = false; |
388 | | for (String resourceURI : resourceURIs) { |
389 | | if (f) { |
390 | | queryStr.append(", "); |
391 | | } |
392 | | f = true; |
393 | | queryStr.append("<"); |
394 | | queryStr.append(resourceURI); |
395 | | queryStr.append(">"); |
396 | | } |
397 | | queryStr.append("))\n"); |
398 | | queryStr.append("}"); |
399 | | |
400 | | // System.out.println(queryStr.toString()); |
401 | | |
402 | | Query query = QueryFactory.create(queryStr.toString()); |
403 | | QueryExecution qexec = QueryExecutionFactory.create(query, dataset.getModel()); |
404 | | |
405 | | ResultSet results = qexec.execSelect(); |
406 | | HashMap<String, LabelMap> lMap = new HashMap<String, LabelMap>(); |
407 | | for (; results.hasNext();) { |
408 | | QuerySolution sol = results.next(); |
409 | | String uri = sol.getResource("res").getURI(); |
410 | | Literal literal = sol.getLiteral("label"); |
411 | | if (literal != null) { |
412 | | String label = literal.getString(); |
413 | | String lang = literal.getLanguage(); |
414 | | if (language != null && language.equals(lang)) { |
415 | | Label lbl = new Label(label, lang); |
416 | | if (lMap.containsKey(uri)) { |
417 | | LabelMap lm = lMap.get(uri); |
418 | | lm.addLabel(lbl); |
419 | | } else { |
420 | | LabelMap lm = new LabelMap(uri, new Label[] { lbl }); |
421 | | lMap.put(uri, lm); |
422 | | } |
423 | | } |
424 | | } |
425 | | } |
426 | | return lMap.values().toArray(new LabelMap[0]); |
427 | | } |
428 | | |
429 | | /* |
430 | | public ClassLink[] countLinks(String[] graphURIs, String startClassURI, ClassLink[] classLinks) throws Exception; |
431 | | public SClass[] countInstances(String[] graphURIs, SClass[] classes) throws Exception; |
432 | | |
433 | | */ |
| 156 | |
| 157 | |
| 158 | |
| 159 | |
| 160 | public ClassLink[] getNextClass(String originClass, int limit) throws Exception{ |
| 161 | |
| 162 | ArrayList<ClassLink> classLinkList = new ArrayList<ClassLink>(); |
| 163 | |
| 164 | Set<String> endpointURISet = crawledMetadataTable.keySet(); |
| 165 | for(String endpointURI: endpointURISet){ |
| 166 | CrawledMetadata cm = crawledMetadataTable.get(endpointURI); |
| 167 | // default |
| 168 | Dataset dataset = cm.getDefaultDataset(); |
| 169 | List<ClassLink> tempClassLinkList = getNextClass(endpointURI, null, dataset, originClass, limit); |
| 170 | for(ClassLink classLink: tempClassLinkList){ |
| 171 | classLinkList.add(classLink); |
| 172 | } |
| 173 | |
| 174 | // graphs |
| 175 | String[] graphURIs = cm.getGraphURIs(); |
| 176 | if( graphURIs != null ){ |
| 177 | for(String graphURI: graphURIs){ |
| 178 | dataset = cm.getDataset(graphURI); |
| 179 | tempClassLinkList = getNextClass(endpointURI, graphURI, dataset, originClass, limit); |
| 180 | for(ClassLink classLink: tempClassLinkList){ |
| 181 | classLinkList.add(classLink); |
| 182 | } |
| 183 | } |
| 184 | } |
| 185 | } |
| 186 | return classLinkList.toArray(new ClassLink[0]); |
| 187 | } |
| 188 | |
| 189 | |
| 190 | |
| 191 | private List<ClassLink> getNextClass(String endpointURI, String graphURI, Dataset dataset, String originClass, int limit) throws Exception{ |
| 192 | ArrayList<ClassLink> classLinkList = new ArrayList<ClassLink>(); |
| 193 | |
| 194 | PropertyPartition[] pps = dataset.getPropertyPartitions(); |
| 195 | if( pps == null ){ |
| 196 | return classLinkList; |
| 197 | } |
| 198 | |
| 199 | for(PropertyPartition pp: pps){ |
| 200 | ClassRelation[] classRelations = pp.classRelations; |
| 201 | if( classRelations != null ){ |
| 202 | for(ClassRelation classRelation: classRelations){ |
| 203 | String subjClassURI = classRelation.subjectClassURI; |
| 204 | String objClassURI = classRelation.objectClassURI; |
| 205 | boolean forward = false; |
| 206 | boolean reverse = false; |
| 207 | if( objClassURI != null && objClassURI.equals(originClass) ){ |
| 208 | if( subjClassURI != null ){ |
| 209 | reverse = true; |
| 210 | } |
| 211 | } |
| 212 | if(subjClassURI != null && subjClassURI.equals(originClass)){ |
| 213 | if( objClassURI != null || classRelation.objectDatatypeURI != null ){ |
| 214 | forward = true; |
| 215 | } |
| 216 | } |
| 217 | ClassLink classLink = null; |
| 218 | if( forward && !reverse ){ |
| 219 | classLink = new ClassLink(); |
| 220 | classLink.setDirection(Direction.forward); |
| 221 | classLink.setNumOfOriginClassInstances(classRelation.distinctSubjects); |
| 222 | if( objClassURI != null ){ |
| 223 | classLink.setLinkedClassURI(objClassURI); |
| 224 | classLink.setNumOfLinkedClassInstances(classRelation.distinctObjects); |
| 225 | }else{ |
| 226 | classLink.setLinkedLiteralDatatypeURI(classRelation.objectDatatypeURI); |
| 227 | classLink.setNumOfLinkedInstances(classRelation.triples); |
| 228 | } |
| 229 | classLink.setNumOfOriginInstances(pp.distinctSubjects); |
| 230 | classLink.setNumOfLinkedInstances(pp.distinctObjects); |
| 231 | |
| 232 | } |
| 233 | if( !forward && reverse ){ |
| 234 | classLink = new ClassLink(); |
| 235 | classLink.setDirection(Direction.reverse); |
| 236 | classLink.setLinkedClassURI(objClassURI); |
| 237 | classLink.setNumOfOriginClassInstances(classRelation.distinctObjects); |
| 238 | classLink.setNumOfOriginInstances(pp.distinctObjects); |
| 239 | classLink.setNumOfLinkedInstances(pp.distinctSubjects); |
| 240 | classLink.setNumOfLinkedClassInstances(classRelation.distinctSubjects); |
| 241 | } |
| 242 | if( forward && reverse){ |
| 243 | classLink = new ClassLink(); |
| 244 | classLink.setDirection(Direction.both); |
| 245 | classLink.setLinkedClassURI(objClassURI); |
| 246 | classLink.setNumOfOriginClassInstances(classRelation.distinctSubjects); |
| 247 | classLink.setNumOfOriginInstances(pp.distinctSubjects); |
| 248 | classLink.setNumOfLinkedInstances(pp.distinctObjects); |
| 249 | classLink.setNumOfLinkedClassInstances(classRelation.distinctObjects); |
| 250 | } |
| 251 | // hit |
| 252 | if( classLink != null ){ |
| 253 | classLink.setEndpointURI(endpointURI); |
| 254 | classLink.setGraphURI(graphURI); |
| 255 | classLink.setPropertyURI(pp.propertyDef.propertyURI); |
| 256 | classLink.setNumOfLinks(classRelation.triples); |
| 257 | classLinkList.add(classLink); |
| 258 | } |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | return classLinkList; |
| 263 | } |
| 264 | |
| 265 | |
| 266 | |
| 267 | |
| 268 | |
| 269 | |
| 270 | |
| 271 | public LabelMap[] getLabels(String[] resourceURIs, String language) throws Exception { |
| 272 | if( resourceURIs == null || resourceURIs.length == 0 ){ |
| 273 | return new LabelMap[0]; |
| 274 | } |
| 275 | |
| 276 | HashSet<String> resourceURIset = new HashSet<String>(); |
| 277 | for(String resourceURI: resourceURIs){ |
| 278 | resourceURIset.add(resourceURI); |
| 279 | } |
| 280 | |
| 281 | HashMap<String, LabelMap> labelMapTable = new HashMap<String, LabelMap>(); |
| 282 | |
| 283 | Set<String> endpointURISet = crawledMetadataTable.keySet(); |
| 284 | for(String endpointURI: endpointURISet){ |
| 285 | CrawledMetadata cm = crawledMetadataTable.get(endpointURI); |
| 286 | // default |
| 287 | Dataset dataset = cm.getDefaultDataset(); |
| 288 | labelMapTable = getLabels(dataset, resourceURIset, language, labelMapTable); |
| 289 | |
| 290 | // graphs |
| 291 | String[] graphURIs = cm.getGraphURIs(); |
| 292 | if( graphURIs != null ){ |
| 293 | for(String graphURI: graphURIs){ |
| 294 | dataset = cm.getDataset(graphURI); |
| 295 | labelMapTable = getLabels(dataset, resourceURIset, language, labelMapTable); |
| 296 | } |
| 297 | } |
| 298 | } |
| 299 | return labelMapTable.values().toArray(new LabelMap[0]); |
| 300 | } |
| 301 | |
| 302 | |
| 303 | private HashMap<String, LabelMap> getLabels(Dataset dataset, HashSet<String> resourceURISet , String language, HashMap<String, LabelMap> labelMapTable) throws Exception { |
| 304 | ClassPartition[] cps = dataset.getClassPartitions(); |
| 305 | if( cps != null ){ |
| 306 | for(ClassPartition cp: cps){ |
| 307 | String uri = cp.classDef.classURI; |
| 308 | if( resourceURISet.contains(uri)){ |
| 309 | Label[] rLabels = cp.classDef.labels; |
| 310 | if( rLabels != null ){ |
| 311 | for(Label rLabel: rLabels){ |
| 312 | if( language == null || ( rLabel.language == null || rLabel.language.equals(language))){ |
| 313 | LabelMap labelMap = null; |
| 314 | if( labelMapTable.containsKey(uri)){ |
| 315 | labelMap = labelMapTable.get(uri); |
| 316 | }else{ |
| 317 | labelMap = new LabelMap(); |
| 318 | labelMapTable.put(uri, labelMap); |
| 319 | labelMap.setResourceURI(uri); |
| 320 | } |
| 321 | labelMap.addLabel(new org.biohackathon.SPARQLBuilder.OWL.Label(rLabel.value, rLabel.language)); |
| 322 | } |
| 323 | } |
| 324 | } |
| 325 | } |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | PropertyPartition[] pps = dataset.getPropertyPartitions(); |
| 330 | if( pps != null ){ |
| 331 | for(PropertyPartition pp: pps){ |
| 332 | String uri = pp.propertyDef.propertyURI; |
| 333 | if( resourceURISet.contains(uri)){ |
| 334 | Label[] rLabels = pp.propertyDef.labels; |
| 335 | if( rLabels != null ){ |
| 336 | for(Label rLabel: rLabels){ |
| 337 | if( language == null || ( rLabel.language == null || rLabel.language.equals(language))){ |
| 338 | LabelMap labelMap = null; |
| 339 | if( labelMapTable.containsKey(uri)){ |
| 340 | labelMap = labelMapTable.get(uri); |
| 341 | }else{ |
| 342 | labelMap = new LabelMap(); |
| 343 | labelMapTable.put(uri, labelMap); |
| 344 | labelMap.setResourceURI(uri); |
| 345 | } |
| 346 | labelMap.addLabel(new org.biohackathon.SPARQLBuilder.OWL.Label(rLabel.value, rLabel.language)); |
| 347 | } |
| 348 | } |
| 349 | } |
| 350 | } |
| 351 | } |
| 352 | } |
| 353 | return labelMapTable; |
| 354 | } |