[80] | 1 | package org.biohackathon.SPARQLBuilder.OWL;
|
---|
| 2 |
|
---|
| 3 | import java.util.ArrayList;
|
---|
| 4 | import java.util.HashMap;
|
---|
| 5 |
|
---|
| 6 | import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.JenaModelGenerator;
|
---|
| 7 |
|
---|
| 8 | import com.hp.hpl.jena.query.Query;
|
---|
| 9 | import com.hp.hpl.jena.query.QueryExecution;
|
---|
| 10 | import com.hp.hpl.jena.query.QueryExecutionFactory;
|
---|
| 11 | import com.hp.hpl.jena.query.QueryFactory;
|
---|
| 12 | import com.hp.hpl.jena.query.QuerySolution;
|
---|
| 13 | import com.hp.hpl.jena.query.ResultSet;
|
---|
| 14 | import com.hp.hpl.jena.rdf.model.Literal;
|
---|
| 15 | import com.hp.hpl.jena.rdf.model.Model;
|
---|
| 16 | import com.hp.hpl.jena.rdf.model.Resource;
|
---|
| 17 |
|
---|
| 18 | //public class OWLQueryBuilderForCrawlerImpl implements OWLQueryBuilder {
|
---|
| 19 | public class AcquiredStructureAnalyzer implements RDFSchemaAnalyzer {
|
---|
| 20 |
|
---|
| 21 | private Model model = null;
|
---|
| 22 | private String endpointURI = null;
|
---|
| 23 | private String[] graphURIs = null;
|
---|
| 24 |
|
---|
| 25 | public String getEndpointURI(){
|
---|
| 26 | return endpointURI;
|
---|
| 27 | }
|
---|
| 28 |
|
---|
| 29 | public String[] getGraphURIs(){
|
---|
| 30 | return graphURIs;
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 | public static void main(String[] args) throws Exception{
|
---|
| 35 | JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\allie.ttl");
|
---|
| 36 | AcquiredStructureAnalyzer impl
|
---|
| 37 | = new AcquiredStructureAnalyzer(jmGene.getEndpointURI(), jmGene.getGraphURIs(), jmGene.getModel());
|
---|
| 38 | SClass[] scs = impl.getOWLClasses(null, null, null, true);
|
---|
| 39 | for(SClass sc: scs){
|
---|
| 40 | System.out.println(sc.toString());
|
---|
| 41 | }
|
---|
| 42 | ClassLink[] cls = impl.getNextClass(null,"http://purl.org/goodrelations/v1#Offering",100,true );
|
---|
| 43 | for(ClassLink cl: cls){
|
---|
| 44 | System.out.println(cl.toString());
|
---|
| 45 | }
|
---|
| 46 |
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 |
|
---|
| 50 | public AcquiredStructureAnalyzer(String endpointURI, String[] graphURIs, Model model){
|
---|
| 51 | this.model = model;
|
---|
| 52 | this.endpointURI = endpointURI;
|
---|
| 53 | this.graphURIs = graphURIs;
|
---|
| 54 | }
|
---|
| 55 |
|
---|
| 56 | private String[] filterGraphURIs(String[] orgGraphURIs){
|
---|
| 57 | // TODO
|
---|
| 58 | return graphURIs;
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 |
|
---|
| 62 | public SClass[] listClasses(String[] graphURIs, boolean countInstances) throws Exception{
|
---|
| 63 | return getOWLClasses(graphURIs, null, null, countInstances);
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 |
|
---|
[93] | 67 |
|
---|
| 68 |
|
---|
[80] | 69 | public SClass[] getOWLClasses(String[] graphURIs, String[] keywords, String language, boolean countInstances) throws Exception{
|
---|
| 70 | String[] targetGraphURIs = filterGraphURIs(graphURIs);
|
---|
| 71 |
|
---|
| 72 | StringBuffer queryStr = new StringBuffer();
|
---|
| 73 | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n");
|
---|
| 74 | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n");
|
---|
| 75 | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
|
---|
| 76 | queryStr.append("SELECT DISTINCT ?c ?pLabel ?numOfInstances\n");
|
---|
| 77 | if (targetGraphURIs != null) {
|
---|
| 78 | for (String graphURI : targetGraphURIs) {
|
---|
| 79 | queryStr.append("FROM <");
|
---|
| 80 | queryStr.append(graphURI);
|
---|
| 81 | queryStr.append(">\n");
|
---|
| 82 | }
|
---|
| 83 | }
|
---|
| 84 | queryStr.append("WHERE{\n");
|
---|
| 85 |
|
---|
| 86 | //
|
---|
| 87 | queryStr.append(" ?c rdf:type rdfs:Class. \n");
|
---|
| 88 | queryStr.append(" ?c <http://sparqlbuilder.org/numberOfInstances> ?numOfInstances. \n");
|
---|
| 89 | queryStr.append(" OPTIONAL{ ?c rdfs:label ?pLabel. }\n");
|
---|
| 90 |
|
---|
| 91 | if (keywords != null && keywords.length != 0) {
|
---|
| 92 |
|
---|
| 93 | queryStr.append(" ?c rdfs:label ");
|
---|
| 94 | queryStr.append("?keywords").append(".\n");
|
---|
| 95 | queryStr.append(" filter((LANG(?keywords) = \'").append(language);
|
---|
| 96 | queryStr.append("\') && \n (");
|
---|
| 97 |
|
---|
| 98 | // (LANG(?keywords) = 'en') &&
|
---|
| 99 |
|
---|
| 100 | for (int i = 0; i < keywords.length; i++) {
|
---|
| 101 | if (i > 0)
|
---|
| 102 | queryStr.append(" || \n ");
|
---|
| 103 |
|
---|
| 104 | queryStr.append("regex(str(").append("?keywords")
|
---|
| 105 | .append("),\"");
|
---|
| 106 | queryStr.append(keywords[i]);
|
---|
| 107 | queryStr.append("\", \"i\" )");
|
---|
| 108 |
|
---|
| 109 | }
|
---|
| 110 | queryStr.append("))\n");
|
---|
| 111 |
|
---|
| 112 | }
|
---|
| 113 | queryStr.append("}");
|
---|
| 114 | System.out.println(queryStr.toString());
|
---|
| 115 |
|
---|
| 116 | Query query = QueryFactory.create(queryStr.toString());
|
---|
| 117 |
|
---|
| 118 | QueryExecution qexec = null;
|
---|
| 119 | ResultSet results = null;
|
---|
| 120 | try {
|
---|
| 121 | long start = System.currentTimeMillis();
|
---|
| 122 | qexec = QueryExecutionFactory.create(query, model);
|
---|
| 123 | results = qexec.execSelect();
|
---|
| 124 | long end = System.currentTimeMillis();
|
---|
| 125 | System.out.println("EXEC TIME: " + (end - start));
|
---|
| 126 | } catch (Exception ex) {
|
---|
| 127 | ex.printStackTrace();
|
---|
| 128 | throw ex;
|
---|
| 129 | }
|
---|
| 130 |
|
---|
| 131 | HashMap<String, SClass> classMap = new HashMap<String, SClass>();
|
---|
| 132 | for (; results.hasNext();) {
|
---|
| 133 | QuerySolution sol = results.next();
|
---|
| 134 | Resource res = sol.getResource("c");
|
---|
| 135 | if (res != null) {
|
---|
| 136 | String uri = res.getURI();
|
---|
| 137 | int numOfInstances = 0;
|
---|
| 138 | if (countInstances) {
|
---|
| 139 | numOfInstances = sol.getLiteral("numOfInstances").getInt();
|
---|
| 140 | } //
|
---|
| 141 | Literal labelLiteral = sol.getLiteral("pLabel");
|
---|
| 142 | SClass sClass = null;
|
---|
| 143 | if (classMap.containsKey(uri)) {
|
---|
| 144 | sClass = classMap.get(uri);
|
---|
| 145 | } else {
|
---|
| 146 | sClass = new SClass(uri, null, numOfInstances);
|
---|
| 147 | classMap.put(uri, sClass);
|
---|
| 148 | }
|
---|
| 149 | if (labelLiteral != null) {
|
---|
| 150 | String label = labelLiteral.getString();
|
---|
| 151 | String lang = labelLiteral.getLanguage();
|
---|
| 152 | sClass.addLabel(new Label(label, lang));
|
---|
| 153 | }
|
---|
| 154 | }
|
---|
| 155 | }
|
---|
| 156 | qexec.close();
|
---|
| 157 | return classMap.values().toArray(new SClass[0]);
|
---|
| 158 |
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | /*
|
---|
| 162 |
|
---|
| 163 | public Instance[] getInstances(String[] graphURIs, String keyword) throws Exception;
|
---|
| 164 | */
|
---|
| 165 |
|
---|
| 166 |
|
---|
| 167 | public ClassLink[] getNextClass(String[] graphURIs, String originClass, int limit, boolean countLinks) throws Exception{
|
---|
| 168 | String[] targetGraphURIs = filterGraphURIs(graphURIs);
|
---|
| 169 |
|
---|
| 170 | StringBuffer queryStr = new StringBuffer();
|
---|
| 171 | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n");
|
---|
| 172 | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n");
|
---|
| 173 | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
|
---|
| 174 |
|
---|
| 175 | // SELECT
|
---|
| 176 | queryStr.append("SELECT DISTINCT ?c ?d ?p ?numInsStart ?numInsEnd ?numTriples \n");
|
---|
| 177 |
|
---|
| 178 | if (targetGraphURIs != null) {
|
---|
| 179 | for (String graphURI : targetGraphURIs) {
|
---|
| 180 | queryStr.append("FROM <");
|
---|
| 181 | queryStr.append(graphURI);
|
---|
| 182 | queryStr.append(">\n");
|
---|
| 183 | }
|
---|
| 184 | }
|
---|
| 185 |
|
---|
| 186 | queryStr.append("WHERE{\n");
|
---|
| 187 | queryStr.append(" ?cr rdf:type <http://sparqlbuilder.org/ClassRelation>. \n");
|
---|
[88] | 188 | queryStr.append(" <" + originClass + "> <http://sparqlbuilder.org/numberOfInstances> ?numInsStart. \n");
|
---|
[80] | 189 | queryStr.append(" {");
|
---|
| 190 | queryStr.append(" ?cr <http://sparqlbuilder.org/startClass> <" + originClass + ">. \n");
|
---|
| 191 | queryStr.append(" ?cr <http://sparqlbuilder.org/endClass> ?c. \n");
|
---|
| 192 | queryStr.append(" ?cr <http://sparqlbuilder.org/property> ?p. \n");
|
---|
| 193 | queryStr.append(" ?cr <http://sparqlbuilder.org/numberOfTriples> ?numTriples. \n");
|
---|
| 194 | queryStr.append(" ?c <http://sparqlbuilder.org/numberOfInstances> ?numInsEnd. \n");
|
---|
| 195 | queryStr.append("}\n");
|
---|
| 196 | queryStr.append(" UNION\n");
|
---|
| 197 | queryStr.append(" {");
|
---|
| 198 | queryStr.append(" ?cr <http://sparqlbuilder.org/endClass> <" + originClass + ">. \n");
|
---|
| 199 | queryStr.append(" ?cr <http://sparqlbuilder.org/startClass> ?d. \n");
|
---|
| 200 | queryStr.append(" ?cr <http://sparqlbuilder.org/property> ?p. \n");
|
---|
| 201 | queryStr.append(" ?cr <http://sparqlbuilder.org/numberOfTriples> ?numTriples.\n");
|
---|
| 202 | queryStr.append(" ?d <http://sparqlbuilder.org/numberOfInstances> ?numInsEnd. \n");
|
---|
| 203 | queryStr.append("}\n");
|
---|
| 204 | queryStr.append("}\n");
|
---|
| 205 |
|
---|
| 206 |
|
---|
| 207 | if (limit > 0) {
|
---|
| 208 | queryStr.append("limit ");
|
---|
| 209 | queryStr.append(limit);
|
---|
| 210 | queryStr.append("\n");
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 | // System.out.println("getNextClasses SPARQL Query: ");
|
---|
| 214 | // System.out.println(queryStr.toString());
|
---|
| 215 |
|
---|
| 216 | Query query = QueryFactory.create(queryStr.toString());
|
---|
| 217 | QueryExecution qexec = null;
|
---|
| 218 | ResultSet results = null;
|
---|
| 219 | try {
|
---|
| 220 | long start = System.currentTimeMillis();
|
---|
| 221 | qexec = QueryExecutionFactory.create(query, model);
|
---|
| 222 | results = qexec.execSelect();
|
---|
| 223 | long end = System.currentTimeMillis();
|
---|
| 224 | System.out.println("EXEC TIME: " + (end - start));
|
---|
| 225 | } catch (Exception ex) {
|
---|
| 226 | ex.printStackTrace();
|
---|
| 227 | throw ex;
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | ArrayList<ClassLink> solCLs = new ArrayList<ClassLink>();
|
---|
| 231 | for (; results.hasNext();) {
|
---|
| 232 | QuerySolution sol = results.next();
|
---|
| 233 | Resource pro = sol.getResource("p");
|
---|
| 234 | String clsURI = null;
|
---|
| 235 | if (pro != null) {
|
---|
| 236 | String proURI = pro.getURI();
|
---|
| 237 | Resource ccls = sol.getResource("c");
|
---|
| 238 | Resource dcls = sol.getResource("d");
|
---|
| 239 | Direction direction = null;
|
---|
| 240 | if(ccls != null && dcls == null ){
|
---|
| 241 | // direction forward
|
---|
| 242 | direction = Direction.forward;
|
---|
| 243 | clsURI = ccls.getURI();
|
---|
| 244 | }else{
|
---|
| 245 | if( ccls == null && dcls != null ){
|
---|
| 246 | direction = Direction.reverse;
|
---|
| 247 | clsURI = dcls.getURI();
|
---|
| 248 | }
|
---|
| 249 | }
|
---|
| 250 | int numTriples = 0;
|
---|
| 251 | Literal numTriplesLit = sol.getLiteral("numTriples");
|
---|
| 252 | if( numTriplesLit != null ){
|
---|
| 253 | numTriples = numTriplesLit.getInt();
|
---|
| 254 | }
|
---|
[88] | 255 | int numInsStart = 0;
|
---|
| 256 | Literal numInsStartLit = sol.getLiteral("numInsStart");
|
---|
| 257 | if( numInsStartLit != null ){
|
---|
| 258 | numInsStart = numInsStartLit.getInt();
|
---|
| 259 | }
|
---|
| 260 | int numInsEnd = 0;
|
---|
| 261 | Literal numInsEndLit = sol.getLiteral("numInsEnd");
|
---|
| 262 | if( numInsEndLit != null ){
|
---|
| 263 | numInsEnd = numInsEndLit.getInt();
|
---|
| 264 | }
|
---|
[80] | 265 | ClassLink cl = new ClassLink(proURI, clsURI, direction,
|
---|
[88] | 266 | numTriples, 0, 0, numInsEnd, numInsStart);
|
---|
[80] | 267 | solCLs.add(cl);
|
---|
| 268 | }
|
---|
| 269 | }
|
---|
| 270 | qexec.close();
|
---|
| 271 | return solCLs.toArray(new ClassLink[0]);
|
---|
| 272 | }
|
---|
| 273 |
|
---|
| 274 |
|
---|
| 275 |
|
---|
| 276 | /*
|
---|
| 277 |
|
---|
| 278 | public ClassLink[] getNextClassViaInstanceLink(String[] graphURIs, String originClass, int limit) throws Exception;
|
---|
| 279 |
|
---|
| 280 | public Path[] getPaths(String startClass, String endClass, int mode, boolean countLinks) throws Exception;
|
---|
| 281 |
|
---|
| 282 | public String createSPARQL(Path path) throws Exception;
|
---|
| 283 |
|
---|
| 284 | InstanceLink[] getNextInstancesViaInstanceLink(String[] graphURIs, String originInstance,
|
---|
| 285 | int limit) throws Exception;
|
---|
| 286 | */
|
---|
| 287 |
|
---|
| 288 | public LabelMap[] getLabels(String[] graphURIs, String[] resourceURIs,
|
---|
| 289 | String language) throws Exception {
|
---|
| 290 | if (resourceURIs == null || resourceURIs.length == 0) {
|
---|
| 291 | return new LabelMap[0];
|
---|
| 292 | }
|
---|
| 293 | StringBuffer queryStr = new StringBuffer();
|
---|
| 294 | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n");
|
---|
| 295 | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n");
|
---|
| 296 | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
|
---|
| 297 | queryStr.append("SELECT DISTINCT ?res ?label \n");
|
---|
| 298 | if (graphURIs != null) {
|
---|
| 299 | for (String graphURI : graphURIs) {
|
---|
| 300 | queryStr.append("FROM <");
|
---|
| 301 | queryStr.append(graphURI);
|
---|
| 302 | queryStr.append(">\n");
|
---|
| 303 | }
|
---|
| 304 | }
|
---|
| 305 | queryStr.append("WHERE{\n");
|
---|
| 306 | queryStr.append(" ?res rdfs:label ?label.\n");
|
---|
| 307 | queryStr.append(" FILTER(?res IN (");
|
---|
| 308 | boolean f = false;
|
---|
| 309 | for (String resourceURI : resourceURIs) {
|
---|
| 310 | if (f) {
|
---|
| 311 | queryStr.append(", ");
|
---|
| 312 | }
|
---|
| 313 | f = true;
|
---|
| 314 | queryStr.append("<");
|
---|
| 315 | queryStr.append(resourceURI);
|
---|
| 316 | queryStr.append(">");
|
---|
| 317 | }
|
---|
| 318 | queryStr.append("))\n");
|
---|
| 319 | queryStr.append("}");
|
---|
| 320 |
|
---|
| 321 | System.out.println(queryStr.toString());
|
---|
| 322 |
|
---|
| 323 | Query query = QueryFactory.create(queryStr.toString());
|
---|
| 324 | QueryExecution qexec = QueryExecutionFactory.create(query, model);
|
---|
| 325 |
|
---|
| 326 | ResultSet results = qexec.execSelect();
|
---|
| 327 | HashMap<String, LabelMap> lMap = new HashMap<String, LabelMap>();
|
---|
| 328 | for (; results.hasNext();) {
|
---|
| 329 | QuerySolution sol = results.next();
|
---|
| 330 | String uri = sol.getResource("res").getURI();
|
---|
| 331 | Literal literal = sol.getLiteral("label");
|
---|
| 332 | if (literal != null) {
|
---|
| 333 | String label = literal.getString();
|
---|
| 334 | String lang = literal.getLanguage();
|
---|
| 335 | if (language != null && language.equals(lang)) {
|
---|
| 336 | Label lbl = new Label(label, lang);
|
---|
| 337 | if (lMap.containsKey(uri)) {
|
---|
| 338 | LabelMap lm = lMap.get(uri);
|
---|
| 339 | lm.addLabel(lbl);
|
---|
| 340 | } else {
|
---|
| 341 | LabelMap lm = new LabelMap(uri, new Label[] { lbl });
|
---|
| 342 | lMap.put(uri, lm);
|
---|
| 343 | }
|
---|
| 344 | }
|
---|
| 345 | }
|
---|
| 346 | }
|
---|
| 347 | return lMap.values().toArray(new LabelMap[0]);
|
---|
| 348 | }
|
---|
| 349 | /*
|
---|
| 350 | public ClassLink[] countLinks(String[] graphURIs, String startClassURI,
|
---|
| 351 | ClassLink[] classLinks) throws Exception;
|
---|
| 352 |
|
---|
| 353 | public SClass[] countInstances(String[] graphURIs, SClass[] classes) throws Exception;
|
---|
| 354 |
|
---|
| 355 |
|
---|
| 356 | */
|
---|
| 357 | }
|
---|