[80] | 1 | package org.biohackathon.SPARQLBuilder.OWL;
|
---|
| 2 |
|
---|
| 3 | import java.util.ArrayList;
|
---|
| 4 | import java.util.HashMap;
|
---|
| 5 |
|
---|
| 6 | import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.JenaModelGenerator;
|
---|
| 7 |
|
---|
| 8 | import com.hp.hpl.jena.query.Query;
|
---|
| 9 | import com.hp.hpl.jena.query.QueryExecution;
|
---|
| 10 | import com.hp.hpl.jena.query.QueryExecutionFactory;
|
---|
| 11 | import com.hp.hpl.jena.query.QueryFactory;
|
---|
| 12 | import com.hp.hpl.jena.query.QuerySolution;
|
---|
| 13 | import com.hp.hpl.jena.query.ResultSet;
|
---|
| 14 | import com.hp.hpl.jena.rdf.model.Literal;
|
---|
| 15 | import com.hp.hpl.jena.rdf.model.Model;
|
---|
| 16 | import com.hp.hpl.jena.rdf.model.Resource;
|
---|
| 17 |
|
---|
| 18 | //public class OWLQueryBuilderForCrawlerImpl implements OWLQueryBuilder {
|
---|
| 19 | public class AcquiredStructureAnalyzer implements RDFSchemaAnalyzer {
|
---|
| 20 |
|
---|
| 21 | private Model model = null;
|
---|
| 22 | private String endpointURI = null;
|
---|
| 23 | private String[] graphURIs = null;
|
---|
| 24 |
|
---|
| 25 | public String getEndpointURI(){
|
---|
| 26 | return endpointURI;
|
---|
| 27 | }
|
---|
| 28 |
|
---|
| 29 | public String[] getGraphURIs(){
|
---|
| 30 | return graphURIs;
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 | public static void main(String[] args) throws Exception{
|
---|
| 35 | JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\allie.ttl");
|
---|
| 36 | AcquiredStructureAnalyzer impl
|
---|
| 37 | = new AcquiredStructureAnalyzer(jmGene.getEndpointURI(), jmGene.getGraphURIs(), jmGene.getModel());
|
---|
| 38 | SClass[] scs = impl.getOWLClasses(null, null, null, true);
|
---|
| 39 | for(SClass sc: scs){
|
---|
| 40 | System.out.println(sc.toString());
|
---|
| 41 | }
|
---|
| 42 | ClassLink[] cls = impl.getNextClass(null,"http://purl.org/goodrelations/v1#Offering",100,true );
|
---|
| 43 | for(ClassLink cl: cls){
|
---|
| 44 | System.out.println(cl.toString());
|
---|
| 45 | }
|
---|
| 46 |
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 |
|
---|
| 50 | public AcquiredStructureAnalyzer(String endpointURI, String[] graphURIs, Model model){
|
---|
| 51 | this.model = model;
|
---|
| 52 | this.endpointURI = endpointURI;
|
---|
| 53 | this.graphURIs = graphURIs;
|
---|
| 54 | }
|
---|
| 55 |
|
---|
| 56 | private String[] filterGraphURIs(String[] orgGraphURIs){
|
---|
| 57 | // TODO
|
---|
| 58 | return graphURIs;
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 |
|
---|
| 62 | public SClass[] listClasses(String[] graphURIs, boolean countInstances) throws Exception{
|
---|
| 63 | return getOWLClasses(graphURIs, null, null, countInstances);
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 |
|
---|
| 67 | public SClass[] getOWLClasses(String[] graphURIs, String[] keywords, String language, boolean countInstances) throws Exception{
|
---|
| 68 | String[] targetGraphURIs = filterGraphURIs(graphURIs);
|
---|
| 69 |
|
---|
| 70 | StringBuffer queryStr = new StringBuffer();
|
---|
| 71 | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n");
|
---|
| 72 | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n");
|
---|
| 73 | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
|
---|
| 74 | queryStr.append("SELECT DISTINCT ?c ?pLabel ?numOfInstances\n");
|
---|
| 75 | if (targetGraphURIs != null) {
|
---|
| 76 | for (String graphURI : targetGraphURIs) {
|
---|
| 77 | queryStr.append("FROM <");
|
---|
| 78 | queryStr.append(graphURI);
|
---|
| 79 | queryStr.append(">\n");
|
---|
| 80 | }
|
---|
| 81 | }
|
---|
| 82 | queryStr.append("WHERE{\n");
|
---|
| 83 |
|
---|
| 84 | //
|
---|
| 85 | queryStr.append(" ?c rdf:type rdfs:Class. \n");
|
---|
| 86 | queryStr.append(" ?c <http://sparqlbuilder.org/numberOfInstances> ?numOfInstances. \n");
|
---|
| 87 | queryStr.append(" OPTIONAL{ ?c rdfs:label ?pLabel. }\n");
|
---|
| 88 |
|
---|
| 89 | if (keywords != null && keywords.length != 0) {
|
---|
| 90 |
|
---|
| 91 | queryStr.append(" ?c rdfs:label ");
|
---|
| 92 | queryStr.append("?keywords").append(".\n");
|
---|
| 93 | queryStr.append(" filter((LANG(?keywords) = \'").append(language);
|
---|
| 94 | queryStr.append("\') && \n (");
|
---|
| 95 |
|
---|
| 96 | // (LANG(?keywords) = 'en') &&
|
---|
| 97 |
|
---|
| 98 | for (int i = 0; i < keywords.length; i++) {
|
---|
| 99 | if (i > 0)
|
---|
| 100 | queryStr.append(" || \n ");
|
---|
| 101 |
|
---|
| 102 | queryStr.append("regex(str(").append("?keywords")
|
---|
| 103 | .append("),\"");
|
---|
| 104 | queryStr.append(keywords[i]);
|
---|
| 105 | queryStr.append("\", \"i\" )");
|
---|
| 106 |
|
---|
| 107 | }
|
---|
| 108 | queryStr.append("))\n");
|
---|
| 109 |
|
---|
| 110 | }
|
---|
| 111 | queryStr.append("}");
|
---|
| 112 | System.out.println(queryStr.toString());
|
---|
| 113 |
|
---|
| 114 | Query query = QueryFactory.create(queryStr.toString());
|
---|
| 115 |
|
---|
| 116 | QueryExecution qexec = null;
|
---|
| 117 | ResultSet results = null;
|
---|
| 118 | try {
|
---|
| 119 | long start = System.currentTimeMillis();
|
---|
| 120 | qexec = QueryExecutionFactory.create(query, model);
|
---|
| 121 | results = qexec.execSelect();
|
---|
| 122 | long end = System.currentTimeMillis();
|
---|
| 123 | System.out.println("EXEC TIME: " + (end - start));
|
---|
| 124 | } catch (Exception ex) {
|
---|
| 125 | ex.printStackTrace();
|
---|
| 126 | throw ex;
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | HashMap<String, SClass> classMap = new HashMap<String, SClass>();
|
---|
| 130 | for (; results.hasNext();) {
|
---|
| 131 | QuerySolution sol = results.next();
|
---|
| 132 | Resource res = sol.getResource("c");
|
---|
| 133 | if (res != null) {
|
---|
| 134 | String uri = res.getURI();
|
---|
| 135 | int numOfInstances = 0;
|
---|
| 136 | if (countInstances) {
|
---|
| 137 | numOfInstances = sol.getLiteral("numOfInstances").getInt();
|
---|
| 138 | } //
|
---|
| 139 | Literal labelLiteral = sol.getLiteral("pLabel");
|
---|
| 140 | SClass sClass = null;
|
---|
| 141 | if (classMap.containsKey(uri)) {
|
---|
| 142 | sClass = classMap.get(uri);
|
---|
| 143 | } else {
|
---|
| 144 | sClass = new SClass(uri, null, numOfInstances);
|
---|
| 145 | classMap.put(uri, sClass);
|
---|
| 146 | }
|
---|
| 147 | if (labelLiteral != null) {
|
---|
| 148 | String label = labelLiteral.getString();
|
---|
| 149 | String lang = labelLiteral.getLanguage();
|
---|
| 150 | sClass.addLabel(new Label(label, lang));
|
---|
| 151 | }
|
---|
| 152 | }
|
---|
| 153 | }
|
---|
| 154 | qexec.close();
|
---|
| 155 | return classMap.values().toArray(new SClass[0]);
|
---|
| 156 |
|
---|
| 157 | }
|
---|
| 158 |
|
---|
| 159 | /*
|
---|
| 160 |
|
---|
| 161 | public Instance[] getInstances(String[] graphURIs, String keyword) throws Exception;
|
---|
| 162 | */
|
---|
| 163 |
|
---|
| 164 |
|
---|
| 165 | public ClassLink[] getNextClass(String[] graphURIs, String originClass, int limit, boolean countLinks) throws Exception{
|
---|
| 166 | String[] targetGraphURIs = filterGraphURIs(graphURIs);
|
---|
| 167 |
|
---|
| 168 | StringBuffer queryStr = new StringBuffer();
|
---|
| 169 | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n");
|
---|
| 170 | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n");
|
---|
| 171 | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
|
---|
| 172 |
|
---|
| 173 | // SELECT
|
---|
| 174 | queryStr.append("SELECT DISTINCT ?c ?d ?p ?numInsStart ?numInsEnd ?numTriples \n");
|
---|
| 175 |
|
---|
| 176 | if (targetGraphURIs != null) {
|
---|
| 177 | for (String graphURI : targetGraphURIs) {
|
---|
| 178 | queryStr.append("FROM <");
|
---|
| 179 | queryStr.append(graphURI);
|
---|
| 180 | queryStr.append(">\n");
|
---|
| 181 | }
|
---|
| 182 | }
|
---|
| 183 |
|
---|
| 184 | queryStr.append("WHERE{\n");
|
---|
| 185 | queryStr.append(" ?cr rdf:type <http://sparqlbuilder.org/ClassRelation>. \n");
|
---|
| 186 | queryStr.append(" <" + originClass + "> <http://sparqlbuilder.org/numberOfInstances> ?numInsEnd. \n");
|
---|
| 187 | queryStr.append(" {");
|
---|
| 188 | queryStr.append(" ?cr <http://sparqlbuilder.org/startClass> <" + originClass + ">. \n");
|
---|
| 189 | queryStr.append(" ?cr <http://sparqlbuilder.org/endClass> ?c. \n");
|
---|
| 190 | queryStr.append(" ?cr <http://sparqlbuilder.org/property> ?p. \n");
|
---|
| 191 | queryStr.append(" ?cr <http://sparqlbuilder.org/numberOfTriples> ?numTriples. \n");
|
---|
| 192 | queryStr.append(" ?c <http://sparqlbuilder.org/numberOfInstances> ?numInsEnd. \n");
|
---|
| 193 | queryStr.append("}\n");
|
---|
| 194 | queryStr.append(" UNION\n");
|
---|
| 195 | queryStr.append(" {");
|
---|
| 196 | queryStr.append(" ?cr <http://sparqlbuilder.org/endClass> <" + originClass + ">. \n");
|
---|
| 197 | queryStr.append(" ?cr <http://sparqlbuilder.org/startClass> ?d. \n");
|
---|
| 198 | queryStr.append(" ?cr <http://sparqlbuilder.org/property> ?p. \n");
|
---|
| 199 | queryStr.append(" ?cr <http://sparqlbuilder.org/numberOfTriples> ?numTriples.\n");
|
---|
| 200 | queryStr.append(" ?d <http://sparqlbuilder.org/numberOfInstances> ?numInsEnd. \n");
|
---|
| 201 | queryStr.append("}\n");
|
---|
| 202 | queryStr.append("}\n");
|
---|
| 203 |
|
---|
| 204 |
|
---|
| 205 | if (limit > 0) {
|
---|
| 206 | queryStr.append("limit ");
|
---|
| 207 | queryStr.append(limit);
|
---|
| 208 | queryStr.append("\n");
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | // System.out.println("getNextClasses SPARQL Query: ");
|
---|
| 212 | // System.out.println(queryStr.toString());
|
---|
| 213 |
|
---|
| 214 | Query query = QueryFactory.create(queryStr.toString());
|
---|
| 215 | QueryExecution qexec = null;
|
---|
| 216 | ResultSet results = null;
|
---|
| 217 | try {
|
---|
| 218 | long start = System.currentTimeMillis();
|
---|
| 219 | qexec = QueryExecutionFactory.create(query, model);
|
---|
| 220 | results = qexec.execSelect();
|
---|
| 221 | long end = System.currentTimeMillis();
|
---|
| 222 | System.out.println("EXEC TIME: " + (end - start));
|
---|
| 223 | } catch (Exception ex) {
|
---|
| 224 | ex.printStackTrace();
|
---|
| 225 | throw ex;
|
---|
| 226 | }
|
---|
| 227 |
|
---|
| 228 | ArrayList<ClassLink> solCLs = new ArrayList<ClassLink>();
|
---|
| 229 | for (; results.hasNext();) {
|
---|
| 230 | QuerySolution sol = results.next();
|
---|
| 231 | Resource pro = sol.getResource("p");
|
---|
| 232 | String clsURI = null;
|
---|
| 233 | if (pro != null) {
|
---|
| 234 | String proURI = pro.getURI();
|
---|
| 235 | Resource ccls = sol.getResource("c");
|
---|
| 236 | Resource dcls = sol.getResource("d");
|
---|
| 237 | Direction direction = null;
|
---|
| 238 | if(ccls != null && dcls == null ){
|
---|
| 239 | // direction forward
|
---|
| 240 | direction = Direction.forward;
|
---|
| 241 | clsURI = ccls.getURI();
|
---|
| 242 | }else{
|
---|
| 243 | if( ccls == null && dcls != null ){
|
---|
| 244 | direction = Direction.reverse;
|
---|
| 245 | clsURI = dcls.getURI();
|
---|
| 246 | }
|
---|
| 247 | }
|
---|
| 248 | int numTriples = 0;
|
---|
| 249 | Literal numTriplesLit = sol.getLiteral("numTriples");
|
---|
| 250 | if( numTriplesLit != null ){
|
---|
| 251 | numTriples = numTriplesLit.getInt();
|
---|
| 252 | }
|
---|
| 253 | ClassLink cl = new ClassLink(proURI, clsURI, direction,
|
---|
| 254 | numTriples, 0, 0, 0, 0);
|
---|
| 255 | solCLs.add(cl);
|
---|
| 256 | }
|
---|
| 257 | }
|
---|
| 258 | qexec.close();
|
---|
| 259 | return solCLs.toArray(new ClassLink[0]);
|
---|
| 260 | }
|
---|
| 261 |
|
---|
| 262 |
|
---|
| 263 |
|
---|
| 264 | /*
|
---|
| 265 |
|
---|
| 266 | public ClassLink[] getNextClassViaInstanceLink(String[] graphURIs, String originClass, int limit) throws Exception;
|
---|
| 267 |
|
---|
| 268 | public Path[] getPaths(String startClass, String endClass, int mode, boolean countLinks) throws Exception;
|
---|
| 269 |
|
---|
| 270 | public String createSPARQL(Path path) throws Exception;
|
---|
| 271 |
|
---|
| 272 | InstanceLink[] getNextInstancesViaInstanceLink(String[] graphURIs, String originInstance,
|
---|
| 273 | int limit) throws Exception;
|
---|
| 274 | */
|
---|
| 275 |
|
---|
| 276 | public LabelMap[] getLabels(String[] graphURIs, String[] resourceURIs,
|
---|
| 277 | String language) throws Exception {
|
---|
| 278 | if (resourceURIs == null || resourceURIs.length == 0) {
|
---|
| 279 | return new LabelMap[0];
|
---|
| 280 | }
|
---|
| 281 | StringBuffer queryStr = new StringBuffer();
|
---|
| 282 | queryStr.append("PREFIX owl: <http://www.w3.org/2002/07/owl#>\n");
|
---|
| 283 | queryStr.append("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n");
|
---|
| 284 | queryStr.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n");
|
---|
| 285 | queryStr.append("SELECT DISTINCT ?res ?label \n");
|
---|
| 286 | if (graphURIs != null) {
|
---|
| 287 | for (String graphURI : graphURIs) {
|
---|
| 288 | queryStr.append("FROM <");
|
---|
| 289 | queryStr.append(graphURI);
|
---|
| 290 | queryStr.append(">\n");
|
---|
| 291 | }
|
---|
| 292 | }
|
---|
| 293 | queryStr.append("WHERE{\n");
|
---|
| 294 | queryStr.append(" ?res rdfs:label ?label.\n");
|
---|
| 295 | queryStr.append(" FILTER(?res IN (");
|
---|
| 296 | boolean f = false;
|
---|
| 297 | for (String resourceURI : resourceURIs) {
|
---|
| 298 | if (f) {
|
---|
| 299 | queryStr.append(", ");
|
---|
| 300 | }
|
---|
| 301 | f = true;
|
---|
| 302 | queryStr.append("<");
|
---|
| 303 | queryStr.append(resourceURI);
|
---|
| 304 | queryStr.append(">");
|
---|
| 305 | }
|
---|
| 306 | queryStr.append("))\n");
|
---|
| 307 | queryStr.append("}");
|
---|
| 308 |
|
---|
| 309 | System.out.println(queryStr.toString());
|
---|
| 310 |
|
---|
| 311 | Query query = QueryFactory.create(queryStr.toString());
|
---|
| 312 | QueryExecution qexec = QueryExecutionFactory.create(query, model);
|
---|
| 313 |
|
---|
| 314 | ResultSet results = qexec.execSelect();
|
---|
| 315 | HashMap<String, LabelMap> lMap = new HashMap<String, LabelMap>();
|
---|
| 316 | for (; results.hasNext();) {
|
---|
| 317 | QuerySolution sol = results.next();
|
---|
| 318 | String uri = sol.getResource("res").getURI();
|
---|
| 319 | Literal literal = sol.getLiteral("label");
|
---|
| 320 | if (literal != null) {
|
---|
| 321 | String label = literal.getString();
|
---|
| 322 | String lang = literal.getLanguage();
|
---|
| 323 | if (language != null && language.equals(lang)) {
|
---|
| 324 | Label lbl = new Label(label, lang);
|
---|
| 325 | if (lMap.containsKey(uri)) {
|
---|
| 326 | LabelMap lm = lMap.get(uri);
|
---|
| 327 | lm.addLabel(lbl);
|
---|
| 328 | } else {
|
---|
| 329 | LabelMap lm = new LabelMap(uri, new Label[] { lbl });
|
---|
| 330 | lMap.put(uri, lm);
|
---|
| 331 | }
|
---|
| 332 | }
|
---|
| 333 | }
|
---|
| 334 | }
|
---|
| 335 | return lMap.values().toArray(new LabelMap[0]);
|
---|
| 336 | }
|
---|
| 337 | /*
|
---|
| 338 | public ClassLink[] countLinks(String[] graphURIs, String startClassURI,
|
---|
| 339 | ClassLink[] classLinks) throws Exception;
|
---|
| 340 |
|
---|
| 341 | public SClass[] countInstances(String[] graphURIs, SClass[] classes) throws Exception;
|
---|
| 342 |
|
---|
| 343 |
|
---|
| 344 | */
|
---|
| 345 | }
|
---|