package org.biohackathon.SPARQLBuilder.OWL; import java.util.ArrayList; import java.util.HashMap; import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.JenaModelGenerator; import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.URICollection; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.Resource; //public class OWLQueryBuilderForCrawlerImpl implements OWLQueryBuilder { public class AcquiredStructureAnalyzer implements RDFSchemaAnalyzer { private Model model = null; private String endpointURI = null; private String[] graphURIs = null; public String getEndpointURI(){ return endpointURI; } public String[] getGraphURIs(){ return graphURIs; } public static void main(String[] args) throws Exception{ JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\biosamplesF11.ttl"); // JenaModelGenerator jmGene = new JenaModelGenerator("c:\\temp\\reactomeF18s.ttl"); AcquiredStructureAnalyzer impl = new AcquiredStructureAnalyzer(jmGene.getEndpointURI(), jmGene.getGraphURIs(), jmGene.getModel()); System.out.println("--------------------------"); SClass[] scs = impl.getOWLClasses(null, null, null, true); System.out.println("list classes:---------------"); for(SClass sc: scs){ System.out.println(sc.toString()); } System.out.println("--------------------------"); // ClassLink[] cls = impl.getNextClass(null,"http://www.biopax.org/release/biopax-level3.owl#Protein",100,true ); // for(ClassLink cl: cls){ // System.out.println(cl.toString()); // } // System.out.println("--------------------------"); } public AcquiredStructureAnalyzer(String endpointURI, String[] graphURIs, Model model){ this.model = model; this.endpointURI = endpointURI; this.graphURIs = graphURIs; } private String[] filterGraphURIs(String[] orgGraphURIs){ // TODO return graphURIs; } public SClass[] listClasses(String[] graphURIs, boolean countInstances) throws Exception{ return getOWLClasses(graphURIs, null, null, countInstances); } public SClass[] getOWLClasses(String[] graphURIs, String[] keywords, String language, boolean countInstances) throws Exception{ String[] targetGraphURIs = filterGraphURIs(graphURIs); StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT DISTINCT ?c ?pLabel ?entities\n"); // if (targetGraphURIs != null) { // for (String graphURI : targetGraphURIs) { // queryStr.append("FROM <"); // queryStr.append(graphURI); // queryStr.append(">\n"); // } // } queryStr.append("WHERE{\n"); queryStr.append(" ?cp <").append(URICollection.PROPERTY_VOID_CLASS).append("> ?c. \n"); queryStr.append(" ?cp <"); queryStr.append(URICollection.PROPERTY_VOID_ENTITIES); queryStr.append("> ?entities. \n"); queryStr.append(" OPTIONAL{ ?c <"); queryStr.append(URICollection.PROPERTY_RDFS_LABEL); queryStr.append("> ?pLabel. }\n"); if (keywords != null && keywords.length != 0) { queryStr.append(" ?c rdfs:label "); queryStr.append("?keywords").append(".\n"); queryStr.append(" filter((LANG(?keywords) = \'").append(language); queryStr.append("\') && \n ("); for (int i = 0; i < keywords.length; i++) { if (i > 0) queryStr.append(" || \n "); queryStr.append("regex(str(").append("?keywords") .append("),\""); queryStr.append(keywords[i]); queryStr.append("\", \"i\" )"); } queryStr.append("))\n"); } queryStr.append("}"); System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = null; ResultSet results = null; try { // long start = System.currentTimeMillis(); qexec = QueryExecutionFactory.create(query, model); results = qexec.execSelect(); // long end = System.currentTimeMillis(); // System.out.println("EXEC TIME: " + (end - start)); } catch (Exception ex) { ex.printStackTrace(); throw ex; } HashMap classMap = new HashMap(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource res = sol.getResource("c"); if (res != null && res.getURI() != null) { String uri = res.getURI(); int numOfInstances = 0; if (countInstances) { numOfInstances = sol.getLiteral("entities").getInt(); } // Literal labelLiteral = sol.getLiteral("pLabel"); SClass sClass = null; if (classMap.containsKey(uri)) { sClass = classMap.get(uri); } else { sClass = new SClass(uri, null, numOfInstances); classMap.put(uri, sClass); } if (labelLiteral != null) { String label = labelLiteral.getString(); String lang = labelLiteral.getLanguage(); sClass.addLabel(new Label(label, lang)); } } } qexec.close(); return classMap.values().toArray(new SClass[0]); } /* public Instance[] getInstances(String[] graphURIs, String keyword) throws Exception; */ public ClassLink[] getNextClass(String[] graphURIs, String originClass, int limit, boolean countLinks) throws Exception{ String[] targetGraphURIs = filterGraphURIs(graphURIs); StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); // SELECT queryStr.append("SELECT DISTINCT ?indPropCat ?c ?dat ?d ?p ?numLnkInsStart ?numLnkInsEnd ?numInsDom ?numInsRan ?numTriples\n"); // if (targetGraphURIs != null) { // for (String graphURI : targetGraphURIs) { // queryStr.append("FROM <"); // queryStr.append(graphURI); // queryStr.append(">\n"); // } // } queryStr.append("WHERE{\n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_RDF_TYPE).append("> <").append(URICollection.RESOURCE_SB_CLASS_RELATION).append(">. \n"); queryStr.append(" ?propPart <").append(URICollection.PROPERTY_SB_CLASS_RELATION).append("> ?cr. \n"); queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_PROPERTY).append("> ?p. \n"); queryStr.append(" {"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> <" + originClass + ">. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_CLASS).append("> ?d. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsStart. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsEnd. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); // queryStr.append(" ?cr ?isStartClsLim. \n"); // queryStr.append(" ?cr ?isEndClsLim. \n"); queryStr.append("}\n"); queryStr.append(" UNION\n"); queryStr.append(" {"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> <" + originClass + ">. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_DATATYPE).append("> ?dat. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsStart. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsEnd. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); // queryStr.append(" ?cr ?isStartClsLim. \n"); // queryStr.append(" ?cr ?isEndClsLim. \n"); queryStr.append("}\n"); queryStr.append(" UNION\n"); queryStr.append(" {"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_OBJECT_CLASS).append("> <" + originClass + ">. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_SB_SUBJECT_CLASS).append("> ?c. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numLnkInsEnd. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numLnkInsStart. \n"); queryStr.append(" ?cr <").append(URICollection.PROPERTY_VOID_TRIPLES).append("> ?numTriples. \n"); // queryStr.append(" ?cr ?isEndClsLim. \n"); // queryStr.append(" ?cr ?isStartClsLim. \n"); queryStr.append("}\n"); // queryStr.append(" ?propPart <").append(URICollection.PROPERTY_RDF_TYPE).append("> <").append(URICollection.PROPERTY_VOID_PROPERTY_PARTITION).append(">. \n"); queryStr.append(" ?propPart <").append(URICollection.PROPERTY_SB_PROPERTY_CATEGORY).append("> ?indPropCat. \n"); queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_DISTINCT_SUBJECTS).append("> ?numInsDom. \n"); queryStr.append(" ?propPart <").append(URICollection.PROPERTY_VOID_DISTINCT_OBJECTS).append("> ?numInsRan. \n"); queryStr.append("}\n"); if (limit > 0) { queryStr.append("limit "); queryStr.append(limit); queryStr.append("\n"); } // System.out.println("getNextClasses SPARQL Query: "); // System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = null; ResultSet results = null; try { long start = System.currentTimeMillis(); qexec = QueryExecutionFactory.create(query, model); results = qexec.execSelect(); long end = System.currentTimeMillis(); System.out.println("EXEC TIME: " + (end - start)); } catch (Exception ex) { ex.printStackTrace(); throw ex; } ArrayList solCLs = new ArrayList(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource pro = sol.getResource("p"); String clsURI = null; String datURI = null; if (pro != null) { int indPropCat = 4; Literal indPropCatLit = sol.getLiteral("indPropCat"); if( indPropCatLit != null ){ indPropCat = indPropCatLit.getInt(); } if( indPropCat < 4 ) { String proURI = pro.getURI(); Resource ccls = sol.getResource("c"); Resource dcls = sol.getResource("d"); Resource dat = sol.getResource("dat"); Direction direction = null; if(ccls != null && dcls == null ){ // direction forward direction = Direction.reverse; clsURI = ccls.getURI(); }else{ if( ccls == null && dcls != null ){ direction = Direction.forward; clsURI = dcls.getURI(); }else{ if( ccls == null && dat != null && dcls == null ){ clsURI = null; direction = Direction.forward; datURI = dat.getURI(); } } } int numTriples = 0; Literal numTriplesLit = sol.getLiteral("numTriples"); if( numTriplesLit != null ){ numTriples = numTriplesLit.getInt(); } int numLnkInsStart = 0; Literal numInsStartLit = sol.getLiteral("numLnkInsStart"); if( numInsStartLit != null ){ numLnkInsStart = numInsStartLit.getInt(); } int numLnkInsEnd = 0; Literal numInsEndLit = sol.getLiteral("numLnkInsEnd"); if( numInsEndLit != null ){ numLnkInsEnd = numInsEndLit.getInt(); } int numInsDom = 0; Literal numInsDomLit = sol.getLiteral("numInsDom"); if( numInsDomLit != null ){ numInsDom = numInsDomLit.getInt(); } int numInsRan = 0; Literal numInsRanLit = sol.getLiteral("numInsRan"); if( numInsRanLit != null ){ numInsRan = numInsRanLit.getInt(); } boolean isStartClsLim = false; Literal isStartClsLimLit = sol.getLiteral("isStartClsLim"); if( isStartClsLimLit != null ){ isStartClsLim = isStartClsLimLit.getBoolean(); } boolean isEndClsLim = false; Literal isEndClsLimLit = sol.getLiteral("isEndClsLim"); if( isEndClsLimLit != null ){ isEndClsLim = isEndClsLimLit.getBoolean(); } ClassLink cl = new ClassLink(proURI, clsURI, datURI, direction, numTriples, numInsDom, numInsRan, numLnkInsStart, numLnkInsEnd, isStartClsLim, isEndClsLim); solCLs.add(cl); } } } qexec.close(); return solCLs.toArray(new ClassLink[0]); } /* public ClassLink[] getNextClassViaInstanceLink(String[] graphURIs, String originClass, int limit) throws Exception; public Path[] getPaths(String startClass, String endClass, int mode, boolean countLinks) throws Exception; public String createSPARQL(Path path) throws Exception; InstanceLink[] getNextInstancesViaInstanceLink(String[] graphURIs, String originInstance, int limit) throws Exception; */ public LabelMap[] getLabels(String[] graphURIs, String[] resourceURIs, String language) throws Exception { if (resourceURIs == null || resourceURIs.length == 0) { return new LabelMap[0]; } StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT DISTINCT ?res ?label \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); queryStr.append(" ?res rdfs:label ?label.\n"); queryStr.append(" FILTER(?res IN ("); boolean f = false; for (String resourceURI : resourceURIs) { if (f) { queryStr.append(", "); } f = true; queryStr.append("<"); queryStr.append(resourceURI); queryStr.append(">"); } queryStr.append("))\n"); queryStr.append("}"); // System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = QueryExecutionFactory.create(query, model); ResultSet results = qexec.execSelect(); HashMap lMap = new HashMap(); for (; results.hasNext();) { QuerySolution sol = results.next(); String uri = sol.getResource("res").getURI(); Literal literal = sol.getLiteral("label"); if (literal != null) { String label = literal.getString(); String lang = literal.getLanguage(); if (language != null && language.equals(lang)) { Label lbl = new Label(label, lang); if (lMap.containsKey(uri)) { LabelMap lm = lMap.get(uri); lm.addLabel(lbl); } else { LabelMap lm = new LabelMap(uri, new Label[] { lbl }); lMap.put(uri, lm); } } } } return lMap.values().toArray(new LabelMap[0]); } /* public ClassLink[] countLinks(String[] graphURIs, String startClassURI, ClassLink[] classLinks) throws Exception; public SClass[] countInstances(String[] graphURIs, SClass[] classes) throws Exception; */ }