package org.biohackathon.SPARQLBuilder.OWL; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Set; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Resource; /** * クエリを生成するための機能を提供する核クラス * * @author Norio KOBAYASHI * @since 28.01.2014 * @version 29.01.2014 */ public class EndpointAnalyzer implements RDFSchemaAnalyzer { // private Model model = null; private String endpointURI = null; /** * アクセスするSPARQL endpointのURIを指定する構成子 * * @param endpointURI *  アクセスするSPARQL endpointのURI * @throws Exception * @since 28.01.2014 */ public EndpointAnalyzer(String endpointURI) { this.endpointURI = endpointURI; } /** * テストに使用するmainメソッド *

* クエリビルダーの本番プログラムではこのメソッドは使用しない *

* * @param args * 使用しない * @throws Exception * @since 28.01.2014 */ public static void main(String[] args) throws Exception { // String sparqlEndpoint = "http://dbpedia.org/sparql"; String sparqlEndpoint = "http://lsd.dbcls.jp/sparql"; // String keyword = "artiste"; String[] keyword = { "Frequency", "Class" }; String[] graphURIs = new String[0]; // keyword = null; EndpointAnalyzer builder = new EndpointAnalyzer(sparqlEndpoint); SClass[] clz = builder.getOWLClasses(null, keyword, "en", false); if (clz != null) { for (SClass cls : clz) { System.out.println(cls); } clz = builder.countInstances(null, clz); for (SClass cls : clz) { System.out.println(cls); } } /* * clz = builder.listClasses(null, false); * System.out.println(clz.length); for (SClass cls : clz) { * System.out.println(cls); } */ /* * SClass[] clz = builder.getOWLClasses(graphURIs, keyword, false); for * (SClass cls : clz) { System.out.println(cls); } clz = * builder.countInstances(null, clz); for (SClass cls : clz) { * System.out.println(cls); } */ // builder.getOWLClasses(null, keyword, "en", false); /* * SClass[] clz = builder.getOWLClasses(graphURIs, keyword, false); for * (SClass cls : clz) { System.out.println(cls); } clz = * builder.countInstances(null, clz); for (SClass cls : clz) { * System.out.println(cls); } */ /* * System.out.println("CLS"); * * String uri = * "http://purl.jp/bio/10/lsd/ontology/201209#EnglishEntry"; * * ClassLink[] cls = null; try{ cls = builder.getNextClass(null, uri, * 100, false); }catch(Exception ex) { ex.printStackTrace(); } * * System.out.println("Solutions: "); if (cls != null) { for (ClassLink * cl : cls) { System.out.println(cl.toString()); } cls = * builder.countLinks(null, uri, cls); for (ClassLink cl : cls) { * System.out.println(cl.toString()); } } */ /* * * String uri = * "http://purl.jp/bio/10/lsd/ontology/201209#EnglishEntry"; * * ClassLink[] cls = null; * * System.out.println("CLS-INS"); cls = null; try{ cls = * builder.getNextClassViaInstanceLink(null, uri, 100); }catch(Exception * ex){ ex.printStackTrace(); } if (cls != null) { for (ClassLink cl : * cls) { System.out.println(cl.toString()); } } */ /* * System.out.println("Instances"); Instance[] ins = * builder.getInstances(null, "\"A.C. Reed\"@en"); if (ins != null) { * for (Instance in : ins) { System.out.println(in.toString()); } } */ /* * System.out.println("INS-INS"); ins = builder.getInstances(null, * "\"A.C. Reed\"@en"); InstanceLink[] iLinks = * builder.getNextInstancesViaInstanceLink(null, * ins[0].getInstanceURI(), 100); * * if (iLinks != null) { for (InstanceLink in : iLinks) { * System.out.println(in.toString()); } } */ } /** * 明示的にRDFで書かれているクラスを取得する *

* 指定されたgraphURIsの中から、キーワードにヒットするラベルを持つクラス(rdfs:Class)をすべて返す
*

* * @param graphURIs *  検索対象のgraphのURI配列 (nullや長さ0の配列も可) * @param keyword * (nullや空文字は不可) * @return クラスURIの配列 * @throws Exception * @since 28.01.2014 */ public SClass[] getOWLClasses(String[] graphURIs, String[] keywords, String language, boolean countInstances) throws Exception { StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); if (countInstances) { queryStr.append("SELECT DISTINCT ?c ?pLabel (COUNT(?i) AS ?numOfInstances)\n"); } else { queryStr.append("SELECT DISTINCT ?c ?pLabel \n"); } if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); // queryStr.append(" {?c rdf:type rdfs:Class} UNION {?c rdf:type owl:Class}\n"); // queryStr.append(" ?i rdf:type ?c.\n"); if (countInstances) { queryStr.append(" ?c rdfs:label ?label.\n"); } queryStr.append(" ?c rdfs:label ?pLabel.\n"); if (countInstances) { queryStr.append(" ?i rdf:type ?c.\n"); } // queryStr.append(" ?c rdfs:label "); // queryStr.append(keyword); // queryStr.append("."); // TODO OR relationship between keywords if (keywords != null && keywords.length != 0) { queryStr.append(" ?c rdfs:label "); queryStr.append("?keywords").append(".\n"); queryStr.append(" filter((LANG(?keywords) = \'").append(language); queryStr.append("\') && \n ("); // (LANG(?keywords) = 'en') && for (int i = 0; i < keywords.length; i++) { if (i > 0) queryStr.append(" || \n "); queryStr.append("regex(str(").append("?keywords") .append("),\""); queryStr.append(keywords[i]); queryStr.append("\", \"i\" )"); } queryStr.append("))\n"); } if (countInstances) { queryStr.append("} GROUP BY ?c ?pLabel"); } else { queryStr.append("}"); } System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = null; ResultSet results = null; try { long start = System.currentTimeMillis(); qexec = QueryExecutionFactory.sparqlService(endpointURI, query); results = qexec.execSelect(); long end = System.currentTimeMillis(); System.out.println("EXEC TIME: " + (end - start)); } catch (Exception ex) { ex.printStackTrace(); throw ex; } HashMap classMap = new HashMap(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource res = sol.getResource("c"); if (res != null) { String uri = res.getURI(); int numOfInstances = 0; if (countInstances) { numOfInstances = sol.getLiteral("numOfInstances").getInt(); } // System.out.println(numOfInstances); Literal labelLiteral = sol.getLiteral("pLabel"); SClass sClass = null; if (classMap.containsKey(uri)) { sClass = classMap.get(uri); } else { sClass = new SClass(uri, null, numOfInstances); classMap.put(uri, sClass); } if (labelLiteral != null) { String label = labelLiteral.getString(); String lang = labelLiteral.getLanguage(); sClass.addLabel(new Label(label, lang)); } } } qexec.close(); return classMap.values().toArray(new SClass[0]); } public SClass[] listClasses(String[] graphURIs, boolean countInstances) throws Exception { StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); if (countInstances) { queryStr.append("SELECT DISTINCT ?cls ?pLabel (COUNT(?i) AS ?numOfInstances)\n"); } else { queryStr.append("SELECT DISTINCT ?cls ?pLabel \n"); } if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); queryStr.append("\t{ ?cls rdf:type rdfs:Class. }\n"); queryStr.append("\tUNION\n"); queryStr.append("\t{ ?cls rdf:type owl:Class. }\n"); queryStr.append("\tUNION\n"); queryStr.append("\t{ [] rdfs:type ?cls. }\n"); queryStr.append("\tUNION\n"); queryStr.append("\t{ [] rdfs:domain ?cls. }\n"); queryStr.append("\tUNION\n"); queryStr.append("\t{ [] rdfs:range ?cls. }\n"); queryStr.append("\tUNION\n"); queryStr.append("\t{ ?cls rdfs:subclassOf []. }\n"); queryStr.append("\t?cls rdfs:label ?pLabel.\n"); if (countInstances) { queryStr.append("\t?i rdf:type ?c.\n"); } if (countInstances) { queryStr.append("} GROUP BY ?c ?pLabel"); } else { queryStr.append("}"); } System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = null; ResultSet results = null; try { long start = System.currentTimeMillis(); qexec = QueryExecutionFactory.sparqlService(endpointURI, query); results = qexec.execSelect(); long end = System.currentTimeMillis(); System.out.println("EXEC TIME: " + (end - start)); } catch (Exception ex) { ex.printStackTrace(); throw ex; } HashMap classMap = new HashMap(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource res = sol.getResource("cls"); if (res != null) { String uri = res.getURI(); int numOfInstances = 0; if (countInstances) { numOfInstances = sol.getLiteral("numOfInstances").getInt(); } // System.out.println(numOfInstances); Literal labelLiteral = sol.getLiteral("pLabel"); SClass sClass = null; if (classMap.containsKey(uri)) { sClass = classMap.get(uri); } else { sClass = new SClass(uri, null, numOfInstances); classMap.put(uri, sClass); } if (labelLiteral != null) { String label = labelLiteral.getString(); String lang = labelLiteral.getLanguage(); sClass.addLabel(new Label(label, lang)); } } } qexec.close(); return classMap.values().toArray(new SClass[0]); } /** *  インスタンスを取得する *

* 指定されたgraphURIsの中から、キーワードにヒットするラベルを持つインスタンスをすべて返す
* ここでインスタンスとは、rdf:typeの主語として記述されているものをいう
*

* * @param graphURIs *  検索対象のgraphのURI配列 (nullや長さ0の配列も可) * @param keyword * (nullや空文字は不可) * @return クラスURIの配列 * @throws Exception * @since 28.01.2014 */ public Instance[] getInstances(String[] graphURIs, String keyword) throws Exception { StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT DISTINCT ?ins ?c \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); // queryStr.append(" {?c rdf:type rdfs:Class} UNION {?c rdf:type owl:Class}\n"); queryStr.append(" ?ins rdf:type ?c.\n"); queryStr.append(" ?ins rdfs:label "); queryStr.append(keyword); queryStr.append(".\n"); queryStr.append(" FILTER (?c != rdf:Property)"); queryStr.append("}"); // System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointURI, query); ResultSet results = qexec.execSelect(); HashMap> instanceMap = new HashMap>(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource cls = sol.getResource("c"); Resource ins = sol.getResource("ins"); String clsURI = cls.getURI(); String insURI = ins.getURI(); if (instanceMap.containsKey(insURI)) { HashSet classes = instanceMap.get(insURI); classes.add(clsURI); } else { HashSet classes = new HashSet(); instanceMap.put(insURI, classes); classes.add(clsURI); } } qexec.close(); Set keySet = instanceMap.keySet(); ArrayList instanceList = new ArrayList(); for (String key : keySet) { Instance ins = new Instance(key, instanceMap.get(key).toArray( new String[0])); instanceList.add(ins); } return instanceList.toArray(new Instance[0]); } /** *  指定されたクラスを起点とし、明示的に記述されているOWLのproperty制約を調べ、そのproperty制約で * 関連づけられているクラスを網羅的に取得する *

* 処理対象データをgraphURIsで指定することができる
*
*

* * @param graphURIs *  検索対象のgraphのURI配列  (nullや長さ0の配列も可) * @param originClass * 起点となるクラスのURI (null不可) * @param limit * 解として返されるClassLink配列の長さの上限値を指定する。(上限値を設定しない場合は0以下の値を指定する) * @return ClassLinkの配列。ここには、取得されたクラスと関係づけられているプロパティ、関係の向きが含まれる。 * @throws Exception * @since 28.01.2014 */ public ClassLink[] getNextClass(String[] graphURIs, String originClass, int limit, boolean countLinks) throws Exception { ClassLink[] cLinks = getNextClassSub(graphURIs, originClass, limit, false); if (countLinks) { cLinks = countLinks(graphURIs, originClass, cLinks); } return cLinks; } private ClassLink[] getNextClassSub(String[] graphURIs, String originClass, int limit, boolean countLinks) throws Exception { StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); // SELECT if (countLinks) { queryStr.append("SELECT DISTINCT ?p ?pDirection ?c (COUNT(?oci) AS ?numOfOriginalClassInstances) (COUNT(?lci) AS ?numOfLinkedClassInstances) (COUNT(?s) AS ?numOfLinks) (COUNT(DISTINCT(?s)) AS ?numOfLinkedInstances) \n"); } else { queryStr.append("SELECT DISTINCT ?p ?pDirection ?c \n"); } if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); // queryStr.append(" { ?c rdf:type rdfs:Class. }\n UNION\n { ?c rdf:type owl:Class. }\n"); queryStr.append(" { ?p rdfs:domain <"); queryStr.append(originClass); queryStr.append(">.\n ?p rdfs:range ?c.\n"); if (countLinks) { queryStr.append(" ?o ?p ?s."); } else { queryStr.append("filter(exists{\n"); queryStr.append(" ?o rdf:type ?c.\n"); queryStr.append(" ?s rdf:type <"); queryStr.append(originClass); queryStr.append(">.\n"); queryStr.append(" ?s ?p ?o.})\n"); } queryStr.append("}\n UNION\n"); queryStr.append(" { ?p rdfs:range <"); queryStr.append(originClass); queryStr.append(">.\n"); queryStr.append(" ?p rdfs:domain ?c.\n"); if (countLinks) { queryStr.append(" ?s ?p ?o."); } else { queryStr.append("filter(exists{\n"); queryStr.append(" ?s rdf:type ?c.\n"); queryStr.append(" ?o rdf:type <"); queryStr.append(originClass); queryStr.append(">.\n"); queryStr.append(" ?s ?p ?o.})\n"); } queryStr.append("}\n"); queryStr.append(" ?p ?pDirection ?c.\n"); // queryStr.append(" ?s rdf:type ?c.\n"); if (countLinks) { queryStr.append("}\nGROUP BY ?p ?pDirection ?c\n"); } else { queryStr.append("}\n"); } if (limit > 0) { queryStr.append("limit "); queryStr.append(limit); queryStr.append("\n"); } System.out.println("getNextClasses SPARQL Query: "); System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = null; try { qexec = QueryExecutionFactory.sparqlService(endpointURI, query); } catch (Exception ex) { ex.printStackTrace(); throw ex; } ResultSet results = null; try { long start = System.currentTimeMillis(); results = qexec.execSelect(); long end = System.currentTimeMillis(); System.out.println("EXEC TIME: " + (end - start)); } catch (Exception ex) { ex.printStackTrace(); throw ex; } ArrayList solCLs = new ArrayList(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource pro = sol.getResource("p"); if (pro != null) { Resource cls = sol.getResource("c"); Resource dir = sol.getResource("pDirection"); String proURI = pro.getURI(); String clsURI = cls.getURI(); String dirURI = dir.getURI(); // System.out.println(pro.getURI() + " " + cls.getURI() + " " + // dir.getURI()); Direction direction = null; if (dirURI .equals("http://www.w3.org/2000/01/rdf-schema#domain") || dirURI.equals("rdfs:domain")) { direction = Direction.reverse; } if (dirURI.equals("http://www.w3.org/2000/01/rdf-schema#range") || dirURI.equals("rdfs:range")) { if (direction != null) { direction = Direction.both; } else { direction = Direction.forward; } } // System.out.println(direction); int numOfLinks = 0; if (countLinks) { numOfLinks = sol.getLiteral("numOfLinks").getInt(); } ClassLink cl = new ClassLink(proURI, clsURI, direction, numOfLinks, 0, 0, 0, 0); solCLs.add(cl); } } qexec.close(); return solCLs.toArray(new ClassLink[0]); } /** *  指定されたクラスを起点とし、そのクラスに属しているインスタンスとリンクが張られているインスタンスの集合を取得し、 * 取得したインスタンスのクラスを網羅的に取得する *

* ここでインスタンスとは、rdf:typeの主語として記述されているものをいう
* 処理対象データをgraphURIsで指定することができる
*

* * @param graphURIs *  検索対象のgraphのURI配列  (nullや長さ0の配列も可) * @param originClass * 起点となるクラスのURI (null不可) * @param limit * 解として返されるClassLink配列の長さの上限値を指定する。(上限値を設定しない場合は0以下の値を指定する) * @return ClassLinkの配列。ここには、取得されたクラスと関係づけられているプロパティ、関係の向きが含まれる。 * @throws Exception * @since 28.01.2014 */ public ClassLink[] getNextClassViaInstanceLink(String[] graphURIs, String originClass, int limit) throws Exception { StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT DISTINCT ?pf ?pr (COUNT(?pf) AS ?numOfForwardLinks) (COUNT(?pr) AS ?numOfReverseLinks) (COUNT(DISTINCT(?insOrg)) AS ?numOfOriginInstances) (COUNT(DISTINCT(?ins)) AS ?numOfLinkedInstances) ?c \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); queryStr.append(" ?ins rdf:type ?c.\n"); queryStr.append(" ?insOrg rdf:type <"); queryStr.append(originClass); queryStr.append(">.\n"); queryStr.append(" { ?ins ?pr ?insOrg. }\n UNION { ?insOrg ?pf ?ins. }\n"); queryStr.append("}\n"); queryStr.append("GROUP BY ?pf ?pr ?c\n"); if (limit > 0) { queryStr.append("limit "); queryStr.append(limit); queryStr.append("\n"); } System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointURI, query); ResultSet results = null; try { long start = System.currentTimeMillis(); results = qexec.execSelect(); long end = System.currentTimeMillis(); System.out.println("EXEC TIME: " + (end - start)); } catch (Exception ex) { ex.printStackTrace(); throw ex; } ArrayList solCLs = new ArrayList(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource proForward = sol.getResource("pf"); Resource proReverse = sol.getResource("pr"); Resource cls = sol.getResource("c"); Direction direction = null; String propURI = null; int numOfLinks = 0; int numOfLinkedInstances = 0; int numOfOriginInstances = 0; if (proForward != null) { if (proReverse != null) { numOfLinks = sol.getLiteral("numOfForwardLinks").getInt(); numOfLinks += sol.getLiteral("numOfReverseLinks").getInt(); numOfLinkedInstances = sol.getLiteral( "numOfLinkedInstances").getInt(); numOfOriginInstances = sol.getLiteral( "numOfOriginInstances").getInt(); direction = Direction.both; } else { numOfLinkedInstances = sol.getLiteral( "numOfLinkedInstances").getInt(); numOfOriginInstances = sol.getLiteral( "numOfOriginInstances").getInt(); numOfLinks = sol.getLiteral("numOfForwardLinks").getInt(); direction = Direction.forward; } propURI = proForward.getURI(); } else { direction = Direction.reverse; propURI = proReverse.getURI(); numOfLinkedInstances = sol.getLiteral("numOfOriginInstances") .getInt(); numOfOriginInstances = sol.getLiteral("numOfLinkedInstances") .getInt(); numOfLinks = sol.getLiteral("numOfReverseLinks").getInt(); } String clsURI = cls.getURI(); // System.out.println(propURI + " " + clsURI + " " + direction); ClassLink cl = new ClassLink(propURI, clsURI, direction, numOfLinks, numOfOriginInstances, numOfLinkedInstances, 0, 0); solCLs.add(cl); } qexec.close(); return solCLs.toArray(new ClassLink[0]); } /* public Path[] getPaths(String startClass, String endClass, int mode, boolean countLinks) throws Exception { OWLClassGraph graph = new OWLClassGraph(startClass, endClass); // mode = 2 & countLinks = true are recommended return graph.getPaths(this, mode, countLinks); } public String createSPARQL(Path path) throws Exception { return null; } */ /* * private String executeSelect(String sparqlQuery) throws Exception { * HttpClient client = HttpClientBuilder.create().build(); HttpPost httppost * = new HttpPost(endpointURI); httppost.setHeader("Content-Type", * "application/x-www-form-urlencoded"); httppost.setHeader("Accept", * "application/sparql-results+xml"); List nvpList = new * ArrayList(); nvpList.add(new BasicNameValuePair("query", * sparqlQuery)); // nvpList.add(new BasicNameValuePair("format", // * outputFormat.getMime())); httppost.setEntity(new * UrlEncodedFormEntity(nvpList, Charset .forName("UTF-8"))); HttpResponse * response = client.execute(httppost); // * System.out.println("[StatusLine] " + response.getStatusLine()); * HttpEntity entity = response.getEntity(); String entityString = * EntityUtils.toString(entity, "UTF-8"); return entityString; } */ /** *  指定されたインスタンスを起点とし、そのインスタンスにリンクが張られているインスタンスの集合を取得する。 *  取得された各インスタンスのクラスも網羅的に取得する *

* ここでインスタンスとは、rdf:typeの主語として記述されているものをいう
* 処理対象データをgraphURIsで指定することができる
*

* * @param graphURIs *  検索対象のgraphのURI配列  (nullや長さ0の配列も可) * @param originInstance * 起点となるインスタンスのURI (null不可) * @param limit * 解として返されるInstanceLink配列の長さの上限値を指定する。(上限値を設定しない場合は0以下の値を指定する) * @return InstanceLinkの配列。ここには、取得されたクラスと関係づけられているプロパティ、関係の向きが含まれる。 * @throws Exception * @since 28.01.2014 */ public InstanceLink[] getNextInstancesViaInstanceLink(String[] graphURIs, String originInstance, int limit) throws Exception { StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT DISTINCT ?pf ?pr ?ins ?c \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); // queryStr.append(" { ?c rdf:type rdfs:Class. }\n UNION\n { ?c rdf:type owl:Class. }\n"); queryStr.append(" ?ins rdf:type ?c.\n"); queryStr.append(" { ?ins ?pr <"); queryStr.append(originInstance); queryStr.append(">. }\n UNION { <"); queryStr.append(originInstance); queryStr.append("> ?pf ?ins. }\n"); queryStr.append(" FILTER (?c != rdf:Property)"); queryStr.append("}\n"); if (limit > 0) { queryStr.append("limit "); queryStr.append(limit); queryStr.append("\n"); } // System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointURI, query); ResultSet results = qexec.execSelect(); HashMap insLinkMap = new HashMap(); for (; results.hasNext();) { QuerySolution sol = results.next(); Resource proForward = sol.getResource("pf"); Resource proReverse = sol.getResource("pr"); Resource ins = sol.getResource("ins"); Resource cls = sol.getResource("c"); Direction direction = null; String propURI = null; if (proForward != null) { if (proReverse != null) { direction = Direction.both; } else { direction = Direction.forward; } propURI = proForward.getURI(); } else { direction = Direction.reverse; propURI = proReverse.getURI(); } String clsURI = cls.getURI(); String insURI = ins.getURI(); String key = propURI + "\t" + insURI + "\t" + direction; if (insLinkMap.containsKey(key)) { InstanceLink insLink = insLinkMap.get(key); insLink.addLinkedClassURI(clsURI); } else { InstanceLink insLink = new InstanceLink(propURI, insURI, new String[] { clsURI }, direction); insLinkMap.put(key, insLink); } } qexec.close(); Collection values = insLinkMap.values(); return values.toArray(new InstanceLink[0]); } public LabelMap[] getLabels(String[] graphURIs, String[] resourceURIs, String language) throws Exception { if (resourceURIs == null || resourceURIs.length == 0) { return new LabelMap[0]; } StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT DISTINCT ?res ?label \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); queryStr.append(" ?res rdfs:label ?label.\n"); queryStr.append(" FILTER(?res IN ("); boolean f = false; for (String resourceURI : resourceURIs) { if (f) { queryStr.append(", "); } f = true; queryStr.append("<"); queryStr.append(resourceURI); queryStr.append(">"); } queryStr.append("))\n"); queryStr.append("}"); System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointURI, query); ResultSet results = qexec.execSelect(); HashMap lMap = new HashMap(); for (; results.hasNext();) { QuerySolution sol = results.next(); String uri = sol.getResource("res").getURI(); Literal literal = sol.getLiteral("label"); if (literal != null) { String label = literal.getString(); String lang = literal.getLanguage(); if (language != null && language.equals(lang)) { Label lbl = new Label(label, lang); if (lMap.containsKey(uri)) { LabelMap lm = lMap.get(uri); lm.addLabel(lbl); } else { LabelMap lm = new LabelMap(uri, new Label[] { lbl }); lMap.put(uri, lm); } } } } return lMap.values().toArray(new LabelMap[0]); } public ClassLink[] countLinks(String[] graphURIs, String startClassURI, ClassLink[] classLinks) throws Exception { if (classLinks == null || classLinks.length == 0) { return new ClassLink[0]; } for (ClassLink classLink : classLinks) { StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT (COUNT(?os) AS ?numOfLinks) (COUNT(DISTINCT(?is)) AS ?numOfOriginInstances) (COUNT(DISTINCT(?os)) AS ?numOfLinkedInstances) \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); if (classLink.getDirection() == Direction.forward) { queryStr.append("?is rdf:type <"); queryStr.append(startClassURI); queryStr.append(">.\n"); queryStr.append("?os rdf:type <"); queryStr.append(classLink.getLinkedClassURI()); queryStr.append(">.\n"); queryStr.append("?is <"); queryStr.append(classLink.getPropertyURI()); queryStr.append("> ?os.\n"); queryStr.append("}"); } else { if (classLink.getDirection() == Direction.forward) { queryStr.append("?os rdf:type <"); queryStr.append(startClassURI); queryStr.append(">.\n"); queryStr.append("?is rdf:type <"); queryStr.append(classLink.getLinkedClassURI()); queryStr.append(">.\n"); queryStr.append("?is <"); queryStr.append(classLink.getPropertyURI()); queryStr.append("> ?os.\n"); queryStr.append("}"); } else { queryStr.append("?os rdf:type <"); queryStr.append(startClassURI); queryStr.append(">.\n"); queryStr.append("?is rdf:type <"); queryStr.append(classLink.getLinkedClassURI()); queryStr.append(">.\n"); queryStr.append("{?is <"); queryStr.append(classLink.getPropertyURI()); queryStr.append("> ?os.}\n"); queryStr.append("UNION\n"); queryStr.append("{?os <"); queryStr.append(classLink.getPropertyURI()); queryStr.append("> ?is.}\n"); queryStr.append("}"); } } System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = QueryExecutionFactory.sparqlService( endpointURI, query); ResultSet results = qexec.execSelect(); if (results.hasNext()) { QuerySolution sol = results.next(); Literal lit = sol.getLiteral("numOfLinks"); if (lit != null) { int numOfLinks = lit.getInt(); classLink.setNumOfLinks(numOfLinks); } lit = sol.getLiteral("numOfLinkedInstances"); if (lit != null) { int numOfLinkedInstances = lit.getInt(); classLink.setNumOfLinkedInstances(numOfLinkedInstances); } lit = sol.getLiteral("numOfOriginInstances"); if (lit != null) { int numOfOriginInstances = lit.getInt(); classLink.setNumOfOriginInstances(numOfOriginInstances); } lit = sol.getLiteral("numOfOriginClassInstances"); if (lit != null) { int numOfOriginClassInstances = lit.getInt(); classLink .setNumOfOriginClassInstances(numOfOriginClassInstances); } lit = sol.getLiteral("numOfLinkedClassInstances"); if (lit != null) { int numOfLinkedClassInstances = lit.getInt(); classLink .setNumOfLinkedClassInstances(numOfLinkedClassInstances); } } qexec.close(); // count instances queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT (COUNT(?ics) AS ?numOfOriginClassInstances) \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); if (classLink.getDirection() == Direction.forward) { queryStr.append("?ics rdf:type <"); queryStr.append(startClassURI); queryStr.append(">.\n"); queryStr.append("}"); } else { if (classLink.getDirection() == Direction.forward) { queryStr.append("?ics rdf:type <"); queryStr.append(classLink.getLinkedClassURI()); queryStr.append(">.\n"); queryStr.append("}"); } else { queryStr.append("?ics rdf:type <"); queryStr.append(classLink.getLinkedClassURI()); queryStr.append(">.\n"); queryStr.append("}"); } } System.out.println(queryStr.toString()); query = QueryFactory.create(queryStr.toString()); qexec = QueryExecutionFactory.sparqlService(endpointURI, query); results = qexec.execSelect(); if (results.hasNext()) { QuerySolution sol = results.next(); Literal lit = null; lit = sol.getLiteral("numOfOriginClassInstances"); if (lit != null) { int numOfOriginClassInstances = lit.getInt(); classLink .setNumOfOriginClassInstances(numOfOriginClassInstances); } } qexec.close(); // count instances queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT (COUNT(?ocs) AS ?numOfLinkedClassInstances) \n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); if (classLink.getDirection() == Direction.forward) { queryStr.append("?ocs rdf:type <"); queryStr.append(classLink.getLinkedClassURI()); queryStr.append(">.\n"); queryStr.append("}"); } else { if (classLink.getDirection() == Direction.forward) { queryStr.append("?ocs rdf:type <"); queryStr.append(startClassURI); queryStr.append(">.\n"); queryStr.append("}"); } else { queryStr.append("?ocs rdf:type <"); queryStr.append(startClassURI); queryStr.append(">.\n"); queryStr.append("}"); } } System.out.println(queryStr.toString()); query = QueryFactory.create(queryStr.toString()); qexec = QueryExecutionFactory.sparqlService(endpointURI, query); results = qexec.execSelect(); if (results.hasNext()) { QuerySolution sol = results.next(); Literal lit = null; lit = sol.getLiteral("numOfLinkedClassInstances"); if (lit != null) { int numOfLinkedClassInstances = lit.getInt(); classLink .setNumOfLinkedClassInstances(numOfLinkedClassInstances); } } qexec.close(); } return classLinks; } public SClass[] countInstances(String[] graphURIs, SClass[] classes) throws Exception { if (classes == null || classes.length == 0) { return new SClass[0]; } HashMap classMap = new HashMap(); for (SClass sc : classes) { classMap.put(sc.getClassURI(), sc); } StringBuffer queryStr = new StringBuffer(); queryStr.append("PREFIX owl: \n"); queryStr.append("PREFIX rdfs: \n"); queryStr.append("PREFIX rdf: \n"); queryStr.append("SELECT DISTINCT ?cls (COUNT(?is) AS ?numOfInstances)\n"); if (graphURIs != null) { for (String graphURI : graphURIs) { queryStr.append("FROM <"); queryStr.append(graphURI); queryStr.append(">\n"); } } queryStr.append("WHERE{\n"); queryStr.append(" ?is rdf:type ?cls.\n"); queryStr.append(" FILTER(?cls IN ("); boolean f = false; Set clsSet = classMap.keySet(); for (String clsURI : clsSet) { if (f) { queryStr.append(", "); } f = true; queryStr.append("<"); queryStr.append(clsURI); queryStr.append(">"); } queryStr.append("))\n"); queryStr.append("} GROUP BY ?cls"); System.out.println(queryStr.toString()); Query query = QueryFactory.create(queryStr.toString()); QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointURI, query); ResultSet results = qexec.execSelect(); for (; results.hasNext();) { QuerySolution sol = results.next(); String uri = sol.getResource("cls").getURI(); SClass sc = classMap.get(uri); Literal lit = sol.getLiteral("numOfInstances"); if (lit != null) { int numOfInstances = lit.getInt(); sc.setNumOfInstances(numOfInstances); } } qexec.close(); return classes; } }