| 24 | | private CrawledMetadata crawledMetadata = null; |
| 25 | | |
| 26 | | public String getEndpointURI(){ |
| 27 | | return crawledMetadata.getEndpointURI(); |
| 28 | | } |
| 29 | | |
| 30 | | public String[] getGraphURIs(){ |
| 31 | | return crawledMetadata.getGraphURIs(); |
| 32 | | } |
| 33 | | |
| 34 | | /* |
| 35 | | public static void main(String[] args) throws Exception{ |
| 36 | | |
| 37 | | JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\biosamplesF11.ttl"); |
| 38 | | // JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\reactomeF18s.ttl"); |
| 39 | | AcquiredStructureAnalyzer impl |
| 40 | | = new AcquiredStructureAnalyzer(jmGene.getEndpointURI(), jmGene.getGraphURIs(), jmGene.getModel()); |
| 41 | | |
| 42 | | System.out.println("--------------------------"); |
| 43 | | SClass[] scs = impl.getOWLClasses(null, null, null, true); |
| 44 | | System.out.println("list classes:---------------"); |
| 45 | | for(SClass sc: scs){ |
| 46 | | System.out.println(sc.toString()); |
| 47 | | } |
| 48 | | System.out.println("--------------------------"); |
| 49 | | |
| 50 | | // ClassLink[] cls = impl.getNextClass(null,"http://www.biopax.org/release/biopax-level3.owl#Protein",100,true ); |
| 51 | | // for(ClassLink cl: cls){ |
| 52 | | // System.out.println(cl.toString()); |
| 53 | | // } |
| 54 | | // System.out.println("--------------------------"); |
| 55 | | |
| 56 | | } |
| 57 | | */ |
| 58 | | |
| 59 | | public AcquiredStructureAnalyzer(CrawledMetadata crawledMetadata){ |
| 60 | | this.crawledMetadata = crawledMetadata; |
| 61 | | } |
| 62 | | |
| 63 | | private String filterGraphURIs(String orgGraphURIs){ |
| 64 | | // TODO |
| 65 | | return orgGraphURIs; |
| 66 | | } |
| 67 | | |
| 68 | | |
| 69 | | public SClass[] listClasses(String graphURI, boolean countInstances) throws Exception{ |
| 70 | | return getOWLClasses(graphURI, null, null, countInstances); |
| 71 | | } |
| 72 | | |
| 73 | | |
| 74 | | |
| 75 | | |
| 76 | | public SClass[] getOWLClasses(String graphURI, String[] keywords, String language, boolean countInstances) throws Exception{ |
| 77 | | return getOWLClassList(graphURI, keywords, language, countInstances).toArray(new SClass[0]); |
| 78 | | } |
| 79 | | |
| 80 | | public List<SClass> getOWLClassList(String graphURI, String[] keywords, String language, boolean countInstances) throws Exception{ |
| 81 | | String targetGraphURI = filterGraphURIs(graphURI); |
| 82 | | |
| 83 | | Dataset dataset = crawledMetadata.getDataset(targetGraphURI); |
| 84 | | |
| 85 | | |
| 86 | | StringBuffer queryStr = new StringBuffer(); |
| 87 | | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n"); |
| 88 | | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"); |
| 89 | | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"); |
| 90 | | queryStr.append("SELECT DISTINCT ?c ?pLabel ?entities\n"); |
| 91 | | if (targetGraphURI != null) { |
| 92 | | queryStr.append("FROM <"); |
| 93 | | queryStr.append(targetGraphURI); |
| 94 | | queryStr.append(">\n"); |
| 95 | | } |
| 96 | | queryStr.append("WHERE{\n"); |
| 97 | | queryStr.append(" ?cp <").append(URICollection.PROPERTY_VOID_CLASS).append("> ?c. \n"); |
| 98 | | queryStr.append(" ?cp <"); |
| 99 | | queryStr.append(URICollection.PROPERTY_VOID_ENTITIES); |
| 100 | | queryStr.append("> ?entities. \n"); |
| 101 | | queryStr.append(" OPTIONAL{ ?c <"); |
| 102 | | queryStr.append(URICollection.PROPERTY_RDFS_LABEL); |
| 103 | | queryStr.append("> ?pLabel. }\n"); |
| 104 | | |
| 105 | | if (keywords != null && keywords.length != 0) { |
| 106 | | |
| 107 | | queryStr.append(" ?c rdfs:label "); |
| 108 | | queryStr.append("?keywords").append(".\n"); |
| 109 | | queryStr.append(" filter((LANG(?keywords) = \'").append(language); |
| 110 | | queryStr.append("\') && \n ("); |
| 111 | | |
| 112 | | for (int i = 0; i < keywords.length; i++) { |
| 113 | | if (i > 0) |
| 114 | | queryStr.append(" || \n "); |
| 115 | | |
| 116 | | queryStr.append("regex(str(").append("?keywords") |
| 117 | | .append("),\""); |
| 118 | | queryStr.append(keywords[i]); |
| 119 | | queryStr.append("\", \"i\" )"); |
| 120 | | |
| 121 | | } |
| 122 | | queryStr.append("))\n"); |
| 123 | | |
| 124 | | } |
| 125 | | queryStr.append("}"); |
| 126 | | System.out.println(queryStr.toString()); |
| 127 | | |
| 128 | | Query query = QueryFactory.create(queryStr.toString()); |
| 129 | | |
| 130 | | QueryExecution qexec = null; |
| 131 | | ResultSet results = null; |
| 132 | | try { |
| 133 | | // long start = System.currentTimeMillis(); |
| 134 | | qexec = QueryExecutionFactory.create(query, dataset.getModel()); |
| 135 | | results = qexec.execSelect(); |
| 136 | | // long end = System.currentTimeMillis(); |
| 137 | | // System.out.println("EXEC TIME: " + (end - start)); |
| 138 | | } catch (Exception ex) { |
| 139 | | ex.printStackTrace(); |
| 140 | | throw ex; |
| 141 | | } |
| 142 | | |
| 143 | | HashMap<String, SClass> classMap = new HashMap<String, SClass>(); |
| 144 | | for (; results.hasNext();) { |
| 145 | | QuerySolution sol = results.next(); |
| 146 | | Resource res = sol.getResource("c"); |
| 147 | | if (res != null && res.getURI() != null) { |
| 148 | | String uri = res.getURI(); |
| 149 | | int numOfInstances = 0; |
| 150 | | if (countInstances) { |
| 151 | | numOfInstances = sol.getLiteral("entities").getInt(); |
| 152 | | } // |
| 153 | | Literal labelLiteral = sol.getLiteral("pLabel"); |
| 154 | | SClass sClass = null; |
| 155 | | if (classMap.containsKey(uri)) { |
| 156 | | sClass = classMap.get(uri); |
| 157 | | } else { |
| 158 | | sClass = new SClass(uri, null, numOfInstances); |
| 159 | | classMap.put(uri, sClass); |
| 160 | | } |
| 161 | | if (labelLiteral != null) { |
| 162 | | String label = labelLiteral.getString(); |
| 163 | | String lang = labelLiteral.getLanguage(); |
| 164 | | sClass.addLabel(new Label(label, lang)); |
| 165 | | } |
| 166 | | } |
| 167 | | } |
| 168 | | qexec.close(); |
| 169 | | return new LinkedList<SClass>(classMap.values()); |
| 170 | | } |
| | 30 | // key: endpointURI, value: crawled metadata |
| | 31 | private HashMap<String, CrawledMetadata> crawledMetadataTable= null; |
| | 32 | |
| | 33 | public String[] getEndpointURIs(){ |
| | 34 | if( crawledMetadataTable == null ){ |
| | 35 | return new String[0]; |
| | 36 | }else{ |
| | 37 | return crawledMetadataTable.keySet().toArray(new String[0]); |
| | 38 | } |
| | 39 | } |
| | 40 | |
| | 41 | public String[] getGraphURIs(String endpointURI){ |
| | 42 | if( crawledMetadataTable == null ){ |
| | 43 | return new String[0]; |
| | 44 | }else{ |
| | 45 | CrawledMetadata crawledMetadata = crawledMetadataTable.get(endpointURI); |
| | 46 | if( crawledMetadata == null ){ |
| | 47 | return new String[0]; |
| | 48 | }else{ |
| | 49 | return crawledMetadata.getGraphURIs(); |
| | 50 | } |
| | 51 | } |
| | 52 | } |
| | 53 | |
| | 54 | |
| | 55 | public AcquiredStructureAnalyzer(MetadataManager metadataManager){ |
| | 56 | CrawledMetadata[] cmList = metadataManager.getCrawlerMetadataList(); |
| | 57 | crawledMetadataTable = new HashMap<String, CrawledMetadata>(); |
| | 58 | |
| | 59 | if( cmList != null ){ |
| | 60 | for(CrawledMetadata cm: cmList){ |
| | 61 | String endpointURI = cm.getEndpointURI(); |
| | 62 | crawledMetadataTable.put(endpointURI, cm); |
| | 63 | } |
| | 64 | } |
| | 65 | } |
| | 66 | |
| | 67 | |
| | 68 | |
| | 69 | public SClass[] listClasses() throws Exception{ |
| | 70 | return getOWLClasses(null, null); |
| | 71 | } |
| | 72 | |
| | 73 | |
| | 74 | public SClass[] getOWLClasses(String[] keywords, String language) throws Exception{ |
| | 75 | return getOWLClassList(keywords, language).toArray(new SClass[0]); |
| | 76 | } |
| | 77 | |
| | 78 | |
| | 79 | public List<SClass> getOWLClassList(String[] keywords, String language) throws Exception{ |
| | 80 | ArrayList<SClass> classList = new ArrayList<SClass>(); |
| | 81 | |
| | 82 | Set<String> endpointURISet = crawledMetadataTable.keySet(); |
| | 83 | for(String endpointURI: endpointURISet){ |
| | 84 | CrawledMetadata cm = crawledMetadataTable.get(endpointURI); |
| | 85 | // default |
| | 86 | Dataset dataset = cm.getDefaultDataset(); |
| | 87 | List<SClass> tempClassList = getOWLClassList(dataset, keywords, language); |
| | 88 | for(SClass sClass: tempClassList){ |
| | 89 | classList.add(sClass); |
| | 90 | } |
| | 91 | |
| | 92 | // graphs |
| | 93 | String[] graphURIs = cm.getGraphURIs(); |
| | 94 | if( graphURIs != null ){ |
| | 95 | for(String graphURI: graphURIs){ |
| | 96 | dataset = cm.getDataset(graphURI); |
| | 97 | tempClassList = getOWLClassList(dataset, keywords, language); |
| | 98 | for(SClass sClass: tempClassList){ |
| | 99 | classList.add(sClass); |
| | 100 | } |
| | 101 | } |
| | 102 | } |
| | 103 | } |
| | 104 | return classList; |
| | 105 | } |
| | 106 | |
| | 107 | private List<SClass> getOWLClassList(Dataset dataset, String[] keywords, String language) throws Exception{ |
| | 108 | ArrayList<SClass> results = new ArrayList<SClass>(); |
| | 109 | |
| | 110 | ClassPartition[] classPartitionList = dataset.getClassPartitions(); |
| | 111 | if( classPartitionList == null || classPartitionList.length == 0 ){ |
| | 112 | return new ArrayList<SClass>(); |
| | 113 | } |
| | 114 | for( ClassPartition cp: classPartitionList){ |
| | 115 | String classURI = cp.classDef.classURI; |
| | 116 | Label[] rLabels = cp.classDef.labels; |
| | 117 | org.biohackathon.SPARQLBuilder.OWL.Label[] labels = null; |
| | 118 | if( rLabels == null ){ |
| | 119 | labels = new org.biohackathon.SPARQLBuilder.OWL.Label[0]; |
| | 120 | }else{ |
| | 121 | labels = new org.biohackathon.SPARQLBuilder.OWL.Label[rLabels.length]; |
| | 122 | for(int i = 0; i < rLabels.length; i++ ) { |
| | 123 | labels[i] = new org.biohackathon.SPARQLBuilder.OWL.Label(rLabels[i].value, rLabels[i].language); |
| | 124 | } |
| | 125 | } |
| | 126 | int entities = cp.entities; |
| | 127 | |
| | 128 | if( keywords == null || keywords.length == 0 ){ |
| | 129 | SClass sClass = new SClass(classURI, labels, entities); |
| | 130 | results.add(sClass); |
| | 131 | }else{ |
| | 132 | boolean hit = false; |
| | 133 | for(org.biohackathon.SPARQLBuilder.OWL.Label label: labels){ |
| | 134 | if( language == null || label.getLanguage().equals(language)){ |
| | 135 | String value = label.getLabel(); |
| | 136 | if( value != null ){ |
| | 137 | value = value.toLowerCase().trim(); |
| | 138 | for(String keyword: keywords){ |
| | 139 | if( value.contains(keyword.toLowerCase().trim())){ |
| | 140 | hit = true; |
| | 141 | break; |
| | 142 | } |
| | 143 | } |
| | 144 | } |
| | 145 | } |
| | 146 | if( hit ){ |
| | 147 | SClass sClass = new SClass(classURI, labels, entities); |
| | 148 | results.add(sClass); |
| | 149 | } |
| | 150 | } |
| | 151 | } |
| | 152 | } |
| | 153 | return results; |
| | 154 | } |
| 172 | | /* |
| 173 | | |
| 174 | | public Instance[] getInstances(String[] graphURIs, String keyword) throws Exception; |
| 175 | | */ |
| 176 | | |
| 177 | | |
| 178 | | public ClassLink[] getNextClass(String graphURI, String originClass, int limit, boolean countLinks) throws Exception{ |
| 179 | | String targetGraphURI = filterGraphURIs(graphURI); |
| 180 | | Dataset dataset = crawledMetadata.getDataset(targetGraphURI); |
| 181 | | |
| 182 | | |
| 183 | | StringBuffer queryStr = new StringBuffer(); |
| 184 | | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n"); |
| 185 | | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"); |
| 186 | | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"); |
| 187 | | |
| 188 | | // SELECT |
| 189 | | queryStr.append("SELECT DISTINCT ?indPropCat ?c ?dat ?d ?p ?numLnkInsStart ?numLnkInsEnd ?numInsDom ?numInsRan ?numTriples\n"); |
| 190 | | |
| 191 | | if (targetGraphURI != null) { |
| 192 | | queryStr.append("FROM <"); |
| 193 | | queryStr.append(targetGraphURI); |
| 194 | | queryStr.append(">\n"); |
| 195 | | } |
| 196 | | |
| 197 | | queryStr.append("WHERE{\n"); |
| 198 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_RDF_TYPE).append("> <").append(URICollection.RESOURCE_SB_CLASS_RELATION).append(">. \n"); |
| 199 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_SB_CLASS_RELATION).append("> ?cr. \n"); |
| 200 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_PROPERTY).append("> ?p. \n"); |
| 201 | | |
| 202 | | queryStr.append(" {"); |
| 203 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> <" + originClass + ">. \n"); |
| 204 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_CLASS).append("> ?d. \n"); |
| 205 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsStart. \n"); |
| 206 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsEnd. \n"); |
| 207 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); |
| 208 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/startClassLimitedQ> ?isStartClsLim. \n"); |
| 209 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/endClassLimitedQ> ?isEndClsLim. \n"); |
| 210 | | queryStr.append("}\n"); |
| 211 | | queryStr.append(" UNION\n"); |
| 212 | | queryStr.append(" {"); |
| 213 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> <" + originClass + ">. \n"); |
| 214 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_DATATYPE).append("> ?dat. \n"); |
| 215 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsStart. \n"); |
| 216 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsEnd. \n"); |
| 217 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); |
| 218 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/startClassLimitedQ> ?isStartClsLim. \n"); |
| 219 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/endClassLimitedQ> ?isEndClsLim. \n"); |
| 220 | | queryStr.append("}\n"); |
| 221 | | queryStr.append(" UNION\n"); |
| 222 | | queryStr.append(" {"); |
| 223 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_CLASS).append("> <" + originClass + ">. \n"); |
| 224 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> ?c. \n"); |
| 225 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsEnd. \n"); |
| 226 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsStart. \n"); |
| 227 | | queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); |
| 228 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/startClassLimitedQ> ?isEndClsLim. \n"); |
| 229 | | // queryStr.append(" ?cr <http://sparqlbuilder.org/endClassLimitedQ> ?isStartClsLim. \n"); |
| 230 | | queryStr.append("}\n"); |
| 231 | | |
| 232 | | // queryStr.append(" ?propPart <").append(URICollection.PROPERTY_RDF_TYPE).append("> <").append(URICollection.PROPERTY_VOID_PROPERTY_PARTITION).append(">. \n"); |
| 233 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_SB_PROPERTY_CATEGORY).append("> ?indPropCat. \n"); |
| 234 | | |
| 235 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numInsDom. \n"); |
| 236 | | queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numInsRan. \n"); |
| 237 | | |
| 238 | | queryStr.append("}\n"); |
| 239 | | |
| 240 | | |
| 241 | | if (limit > 0) { |
| 242 | | queryStr.append("limit "); |
| 243 | | queryStr.append(limit); |
| 244 | | queryStr.append("\n"); |
| 245 | | } |
| 246 | | |
| 247 | | // System.out.println("getNextClasses SPARQL Query: "); |
| 248 | | // System.out.println(queryStr.toString()); |
| 249 | | |
| 250 | | Query query = QueryFactory.create(queryStr.toString()); |
| 251 | | QueryExecution qexec = null; |
| 252 | | ResultSet results = null; |
| 253 | | try { |
| 254 | | long start = System.currentTimeMillis(); |
| 255 | | qexec = QueryExecutionFactory.create(query, dataset.getModel()); |
| 256 | | results = qexec.execSelect(); |
| 257 | | long end = System.currentTimeMillis(); |
| 258 | | System.out.println("EXEC TIME: " + (end - start)); |
| 259 | | } catch (Exception ex) { |
| 260 | | ex.printStackTrace(); |
| 261 | | throw ex; |
| 262 | | } |
| 263 | | |
| 264 | | ArrayList<ClassLink> solCLs = new ArrayList<ClassLink>(); |
| 265 | | for (; results.hasNext();) { |
| 266 | | QuerySolution sol = results.next(); |
| 267 | | Resource pro = sol.getResource("p"); |
| 268 | | String clsURI = null; |
| 269 | | String datURI = null; |
| 270 | | if (pro != null) { |
| 271 | | int indPropCat = 4; |
| 272 | | Literal indPropCatLit = sol.getLiteral("indPropCat"); |
| 273 | | if( indPropCatLit != null ){ |
| 274 | | indPropCat = indPropCatLit.getInt(); |
| 275 | | } |
| 276 | | if( indPropCat < 4 ) { |
| 277 | | String proURI = pro.getURI(); |
| 278 | | Resource ccls = sol.getResource("c"); |
| 279 | | Resource dcls = sol.getResource("d"); |
| 280 | | Resource dat = sol.getResource("dat"); |
| 281 | | Direction direction = null; |
| 282 | | if(ccls != null && dcls == null ){ |
| 283 | | // direction forward |
| 284 | | direction = Direction.reverse; |
| 285 | | clsURI = ccls.getURI(); |
| 286 | | }else{ |
| 287 | | if( ccls == null && dcls != null ){ |
| 288 | | direction = Direction.forward; |
| 289 | | clsURI = dcls.getURI(); |
| 290 | | }else{ |
| 291 | | if( ccls == null && dat != null && dcls == null ){ |
| 292 | | clsURI = null; |
| 293 | | direction = Direction.forward; |
| 294 | | datURI = dat.getURI(); |
| 295 | | } |
| 296 | | } |
| 297 | | } |
| 298 | | |
| 299 | | |
| 300 | | int numTriples = 0; |
| 301 | | Literal numTriplesLit = sol.getLiteral("numTriples"); |
| 302 | | if( numTriplesLit != null ){ |
| 303 | | numTriples = numTriplesLit.getInt(); |
| 304 | | } |
| 305 | | |
| 306 | | int numLnkInsStart = 0; |
| 307 | | Literal numInsStartLit = sol.getLiteral("numLnkInsStart"); |
| 308 | | if( numInsStartLit != null ){ |
| 309 | | numLnkInsStart = numInsStartLit.getInt(); |
| 310 | | } |
| 311 | | int numLnkInsEnd = 0; |
| 312 | | Literal numInsEndLit = sol.getLiteral("numLnkInsEnd"); |
| 313 | | if( numInsEndLit != null ){ |
| 314 | | numLnkInsEnd = numInsEndLit.getInt(); |
| 315 | | } |
| 316 | | |
| 317 | | int numInsDom = 0; |
| 318 | | Literal numInsDomLit = sol.getLiteral("numInsDom"); |
| 319 | | if( numInsDomLit != null ){ |
| 320 | | numInsDom = numInsDomLit.getInt(); |
| 321 | | } |
| 322 | | int numInsRan = 0; |
| 323 | | Literal numInsRanLit = sol.getLiteral("numInsRan"); |
| 324 | | if( numInsRanLit != null ){ |
| 325 | | numInsRan = numInsRanLit.getInt(); |
| 326 | | } |
| 327 | | |
| 328 | | boolean isStartClsLim = false; |
| 329 | | Literal isStartClsLimLit = sol.getLiteral("isStartClsLim"); |
| 330 | | if( isStartClsLimLit != null ){ |
| 331 | | isStartClsLim = isStartClsLimLit.getBoolean(); |
| 332 | | } |
| 333 | | boolean isEndClsLim = false; |
| 334 | | Literal isEndClsLimLit = sol.getLiteral("isEndClsLim"); |
| 335 | | if( isEndClsLimLit != null ){ |
| 336 | | isEndClsLim = isEndClsLimLit.getBoolean(); |
| 337 | | } |
| 338 | | |
| 339 | | ClassLink cl = new ClassLink(proURI, clsURI, datURI, direction, |
| 340 | | numTriples, numInsDom, numInsRan, numLnkInsStart, numLnkInsEnd, isStartClsLim, isEndClsLim); |
| 341 | | solCLs.add(cl); |
| 342 | | |
| 343 | | } |
| 344 | | } |
| 345 | | } |
| 346 | | qexec.close(); |
| 347 | | return solCLs.toArray(new ClassLink[0]); |
| 348 | | } |
| 349 | | |
| 350 | | |
| 351 | | |
| 352 | | /* |
| 353 | | |
| 354 | | public ClassLink[] getNextClassViaInstanceLink(String[] graphURIs, String originClass, int limit) throws Exception; |
| 355 | | |
| 356 | | public Path[] getPaths(String startClass, String endClass, int mode, boolean countLinks) throws Exception; |
| 357 | | |
| 358 | | public String createSPARQL(Path path) throws Exception; |
| 359 | | |
| 360 | | InstanceLink[] getNextInstancesViaInstanceLink(String[] graphURIs, String originInstance, |
| 361 | | int limit) throws Exception; |
| 362 | | */ |
| 363 | | |
| 364 | | public LabelMap[] getLabels(String graphURI, String[] resourceURIs, |
| 365 | | String language) throws Exception { |
| 366 | | // if (resourceURI == null || resourceURIs.length == 0) { |
| 367 | | // return new LabelMap[0]; |
| 368 | | // } |
| 369 | | |
| 370 | | String targetGraphURI = filterGraphURIs(graphURI); |
| 371 | | Dataset dataset = crawledMetadata.getDataset(targetGraphURI); |
| 372 | | |
| 373 | | |
| 374 | | StringBuffer queryStr = new StringBuffer(); |
| 375 | | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n"); |
| 376 | | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"); |
| 377 | | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"); |
| 378 | | queryStr.append("SELECT DISTINCT ?res ?label \n"); |
| 379 | | if (targetGraphURI != null) { |
| 380 | | queryStr.append("FROM <"); |
| 381 | | queryStr.append(targetGraphURI); |
| 382 | | queryStr.append(">\n"); |
| 383 | | } |
| 384 | | queryStr.append("WHERE{\n"); |
| 385 | | queryStr.append(" ?res rdfs:label ?label.\n"); |
| 386 | | queryStr.append(" FILTER(?res IN ("); |
| 387 | | boolean f = false; |
| 388 | | for (String resourceURI : resourceURIs) { |
| 389 | | if (f) { |
| 390 | | queryStr.append(", "); |
| 391 | | } |
| 392 | | f = true; |
| 393 | | queryStr.append("<"); |
| 394 | | queryStr.append(resourceURI); |
| 395 | | queryStr.append(">"); |
| 396 | | } |
| 397 | | queryStr.append("))\n"); |
| 398 | | queryStr.append("}"); |
| 399 | | |
| 400 | | // System.out.println(queryStr.toString()); |
| 401 | | |
| 402 | | Query query = QueryFactory.create(queryStr.toString()); |
| 403 | | QueryExecution qexec = QueryExecutionFactory.create(query, dataset.getModel()); |
| 404 | | |
| 405 | | ResultSet results = qexec.execSelect(); |
| 406 | | HashMap<String, LabelMap> lMap = new HashMap<String, LabelMap>(); |
| 407 | | for (; results.hasNext();) { |
| 408 | | QuerySolution sol = results.next(); |
| 409 | | String uri = sol.getResource("res").getURI(); |
| 410 | | Literal literal = sol.getLiteral("label"); |
| 411 | | if (literal != null) { |
| 412 | | String label = literal.getString(); |
| 413 | | String lang = literal.getLanguage(); |
| 414 | | if (language != null && language.equals(lang)) { |
| 415 | | Label lbl = new Label(label, lang); |
| 416 | | if (lMap.containsKey(uri)) { |
| 417 | | LabelMap lm = lMap.get(uri); |
| 418 | | lm.addLabel(lbl); |
| 419 | | } else { |
| 420 | | LabelMap lm = new LabelMap(uri, new Label[] { lbl }); |
| 421 | | lMap.put(uri, lm); |
| 422 | | } |
| 423 | | } |
| 424 | | } |
| 425 | | } |
| 426 | | return lMap.values().toArray(new LabelMap[0]); |
| 427 | | } |
| 428 | | |
| 429 | | /* |
| 430 | | public ClassLink[] countLinks(String[] graphURIs, String startClassURI, ClassLink[] classLinks) throws Exception; |
| 431 | | public SClass[] countInstances(String[] graphURIs, SClass[] classes) throws Exception; |
| 432 | | |
| 433 | | */ |
| | 156 | |
| | 157 | |
| | 158 | |
| | 159 | |
| | 160 | public ClassLink[] getNextClass(String originClass, int limit) throws Exception{ |
| | 161 | |
| | 162 | ArrayList<ClassLink> classLinkList = new ArrayList<ClassLink>(); |
| | 163 | |
| | 164 | Set<String> endpointURISet = crawledMetadataTable.keySet(); |
| | 165 | for(String endpointURI: endpointURISet){ |
| | 166 | CrawledMetadata cm = crawledMetadataTable.get(endpointURI); |
| | 167 | // default |
| | 168 | Dataset dataset = cm.getDefaultDataset(); |
| | 169 | List<ClassLink> tempClassLinkList = getNextClass(endpointURI, null, dataset, originClass, limit); |
| | 170 | for(ClassLink classLink: tempClassLinkList){ |
| | 171 | classLinkList.add(classLink); |
| | 172 | } |
| | 173 | |
| | 174 | // graphs |
| | 175 | String[] graphURIs = cm.getGraphURIs(); |
| | 176 | if( graphURIs != null ){ |
| | 177 | for(String graphURI: graphURIs){ |
| | 178 | dataset = cm.getDataset(graphURI); |
| | 179 | tempClassLinkList = getNextClass(endpointURI, graphURI, dataset, originClass, limit); |
| | 180 | for(ClassLink classLink: tempClassLinkList){ |
| | 181 | classLinkList.add(classLink); |
| | 182 | } |
| | 183 | } |
| | 184 | } |
| | 185 | } |
| | 186 | return classLinkList.toArray(new ClassLink[0]); |
| | 187 | } |
| | 188 | |
| | 189 | |
| | 190 | |
| | 191 | private List<ClassLink> getNextClass(String endpointURI, String graphURI, Dataset dataset, String originClass, int limit) throws Exception{ |
| | 192 | ArrayList<ClassLink> classLinkList = new ArrayList<ClassLink>(); |
| | 193 | |
| | 194 | PropertyPartition[] pps = dataset.getPropertyPartitions(); |
| | 195 | if( pps == null ){ |
| | 196 | return classLinkList; |
| | 197 | } |
| | 198 | |
| | 199 | for(PropertyPartition pp: pps){ |
| | 200 | ClassRelation[] classRelations = pp.classRelations; |
| | 201 | if( classRelations != null ){ |
| | 202 | for(ClassRelation classRelation: classRelations){ |
| | 203 | String subjClassURI = classRelation.subjectClassURI; |
| | 204 | String objClassURI = classRelation.objectClassURI; |
| | 205 | boolean forward = false; |
| | 206 | boolean reverse = false; |
| | 207 | if( objClassURI != null && objClassURI.equals(originClass) ){ |
| | 208 | if( subjClassURI != null ){ |
| | 209 | reverse = true; |
| | 210 | } |
| | 211 | } |
| | 212 | if(subjClassURI != null && subjClassURI.equals(originClass)){ |
| | 213 | if( objClassURI != null || classRelation.objectDatatypeURI != null ){ |
| | 214 | forward = true; |
| | 215 | } |
| | 216 | } |
| | 217 | ClassLink classLink = null; |
| | 218 | if( forward && !reverse ){ |
| | 219 | classLink = new ClassLink(); |
| | 220 | classLink.setDirection(Direction.forward); |
| | 221 | classLink.setNumOfOriginClassInstances(classRelation.distinctSubjects); |
| | 222 | if( objClassURI != null ){ |
| | 223 | classLink.setLinkedClassURI(objClassURI); |
| | 224 | classLink.setNumOfLinkedClassInstances(classRelation.distinctObjects); |
| | 225 | }else{ |
| | 226 | classLink.setLinkedLiteralDatatypeURI(classRelation.objectDatatypeURI); |
| | 227 | classLink.setNumOfLinkedInstances(classRelation.triples); |
| | 228 | } |
| | 229 | classLink.setNumOfOriginInstances(pp.distinctSubjects); |
| | 230 | classLink.setNumOfLinkedInstances(pp.distinctObjects); |
| | 231 | |
| | 232 | } |
| | 233 | if( !forward && reverse ){ |
| | 234 | classLink = new ClassLink(); |
| | 235 | classLink.setDirection(Direction.reverse); |
| | 236 | classLink.setLinkedClassURI(objClassURI); |
| | 237 | classLink.setNumOfOriginClassInstances(classRelation.distinctObjects); |
| | 238 | classLink.setNumOfOriginInstances(pp.distinctObjects); |
| | 239 | classLink.setNumOfLinkedInstances(pp.distinctSubjects); |
| | 240 | classLink.setNumOfLinkedClassInstances(classRelation.distinctSubjects); |
| | 241 | } |
| | 242 | if( forward && reverse){ |
| | 243 | classLink = new ClassLink(); |
| | 244 | classLink.setDirection(Direction.both); |
| | 245 | classLink.setLinkedClassURI(objClassURI); |
| | 246 | classLink.setNumOfOriginClassInstances(classRelation.distinctSubjects); |
| | 247 | classLink.setNumOfOriginInstances(pp.distinctSubjects); |
| | 248 | classLink.setNumOfLinkedInstances(pp.distinctObjects); |
| | 249 | classLink.setNumOfLinkedClassInstances(classRelation.distinctObjects); |
| | 250 | } |
| | 251 | // hit |
| | 252 | if( classLink != null ){ |
| | 253 | classLink.setEndpointURI(endpointURI); |
| | 254 | classLink.setGraphURI(graphURI); |
| | 255 | classLink.setPropertyURI(pp.propertyDef.propertyURI); |
| | 256 | classLink.setNumOfLinks(classRelation.triples); |
| | 257 | classLinkList.add(classLink); |
| | 258 | } |
| | 259 | } |
| | 260 | } |
| | 261 | } |
| | 262 | return classLinkList; |
| | 263 | } |
| | 264 | |
| | 265 | |
| | 266 | |
| | 267 | |
| | 268 | |
| | 269 | |
| | 270 | |
| | 271 | public LabelMap[] getLabels(String[] resourceURIs, String language) throws Exception { |
| | 272 | if( resourceURIs == null || resourceURIs.length == 0 ){ |
| | 273 | return new LabelMap[0]; |
| | 274 | } |
| | 275 | |
| | 276 | HashSet<String> resourceURIset = new HashSet<String>(); |
| | 277 | for(String resourceURI: resourceURIs){ |
| | 278 | resourceURIset.add(resourceURI); |
| | 279 | } |
| | 280 | |
| | 281 | HashMap<String, LabelMap> labelMapTable = new HashMap<String, LabelMap>(); |
| | 282 | |
| | 283 | Set<String> endpointURISet = crawledMetadataTable.keySet(); |
| | 284 | for(String endpointURI: endpointURISet){ |
| | 285 | CrawledMetadata cm = crawledMetadataTable.get(endpointURI); |
| | 286 | // default |
| | 287 | Dataset dataset = cm.getDefaultDataset(); |
| | 288 | labelMapTable = getLabels(dataset, resourceURIset, language, labelMapTable); |
| | 289 | |
| | 290 | // graphs |
| | 291 | String[] graphURIs = cm.getGraphURIs(); |
| | 292 | if( graphURIs != null ){ |
| | 293 | for(String graphURI: graphURIs){ |
| | 294 | dataset = cm.getDataset(graphURI); |
| | 295 | labelMapTable = getLabels(dataset, resourceURIset, language, labelMapTable); |
| | 296 | } |
| | 297 | } |
| | 298 | } |
| | 299 | return labelMapTable.values().toArray(new LabelMap[0]); |
| | 300 | } |
| | 301 | |
| | 302 | |
| | 303 | private HashMap<String, LabelMap> getLabels(Dataset dataset, HashSet<String> resourceURISet , String language, HashMap<String, LabelMap> labelMapTable) throws Exception { |
| | 304 | ClassPartition[] cps = dataset.getClassPartitions(); |
| | 305 | if( cps != null ){ |
| | 306 | for(ClassPartition cp: cps){ |
| | 307 | String uri = cp.classDef.classURI; |
| | 308 | if( resourceURISet.contains(uri)){ |
| | 309 | Label[] rLabels = cp.classDef.labels; |
| | 310 | if( rLabels != null ){ |
| | 311 | for(Label rLabel: rLabels){ |
| | 312 | if( language == null || ( rLabel.language == null || rLabel.language.equals(language))){ |
| | 313 | LabelMap labelMap = null; |
| | 314 | if( labelMapTable.containsKey(uri)){ |
| | 315 | labelMap = labelMapTable.get(uri); |
| | 316 | }else{ |
| | 317 | labelMap = new LabelMap(); |
| | 318 | labelMapTable.put(uri, labelMap); |
| | 319 | labelMap.setResourceURI(uri); |
| | 320 | } |
| | 321 | labelMap.addLabel(new org.biohackathon.SPARQLBuilder.OWL.Label(rLabel.value, rLabel.language)); |
| | 322 | } |
| | 323 | } |
| | 324 | } |
| | 325 | } |
| | 326 | } |
| | 327 | } |
| | 328 | |
| | 329 | PropertyPartition[] pps = dataset.getPropertyPartitions(); |
| | 330 | if( pps != null ){ |
| | 331 | for(PropertyPartition pp: pps){ |
| | 332 | String uri = pp.propertyDef.propertyURI; |
| | 333 | if( resourceURISet.contains(uri)){ |
| | 334 | Label[] rLabels = pp.propertyDef.labels; |
| | 335 | if( rLabels != null ){ |
| | 336 | for(Label rLabel: rLabels){ |
| | 337 | if( language == null || ( rLabel.language == null || rLabel.language.equals(language))){ |
| | 338 | LabelMap labelMap = null; |
| | 339 | if( labelMapTable.containsKey(uri)){ |
| | 340 | labelMap = labelMapTable.get(uri); |
| | 341 | }else{ |
| | 342 | labelMap = new LabelMap(); |
| | 343 | labelMapTable.put(uri, labelMap); |
| | 344 | labelMap.setResourceURI(uri); |
| | 345 | } |
| | 346 | labelMap.addLabel(new org.biohackathon.SPARQLBuilder.OWL.Label(rLabel.value, rLabel.language)); |
| | 347 | } |
| | 348 | } |
| | 349 | } |
| | 350 | } |
| | 351 | } |
| | 352 | } |
| | 353 | return labelMapTable; |
| | 354 | } |