package hozo.sparql; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.impl.ResourceImpl; /** * Sparqlとのデータ * @author kato * */ public class PlainSparqlAccessor implements ThreadedSparqlAccessor { private EndpointSettings setting; private SparqlQueryListener queryListener; public PlainSparqlAccessor(EndpointSettings endpoint, SparqlQueryListener queryListener){ this.queryListener = queryListener; this.setting = endpoint; } public PlainSparqlAccessor(EndpointSettings endpoint){ this(endpoint, null); } public EndpointSettings getSetting(){ return this.setting; } /** * queryを叩いて結果を返す * @param query * @return */ protected QueryExecution makeQuery(String queryString){ System.out.println("query:["+queryString+"]"); Query query = QueryFactory.create(queryString); QueryExecution qe = QueryExecutionFactory.sparqlService(getSetting().getEndpoint(), query); return qe; } /** * Threadを起動してquery実行を行う。結果はlistenerに返る。 * @param queryString query * @param resultListener 結果を受け取るlistener */ public boolean executeQuery(String queryString, SparqlResultListener resultListener){ Thread thread = new QueryThread(queryString, resultListener){ public void run(){ try { getSparqlResultListener().resultReceived(new SparqlResultSet(executeQuery(getQueryString()))); } catch (Exception e) { throw new RuntimeException(e); } } }; thread.setUncaughtExceptionHandler(resultListener); thread.start(); return true; } /** * query実行を行う。結果は戻り値として返る。 * @param queryString * @return * @throws Exception */ public List> executeQuery(String queryString) throws Exception{ List> ret = new ArrayList>(); QueryExecution qe = makeQuery(queryString); if (qe == null){ throw new Exception("Can't connect to endpoint"); } try { // System.out.println("query:"+queryString); if (this.queryListener != null){ queryListener.sparqlExecuted(queryString); } ResultSet results = null; try { if (!setting.isUseCustomParam()){ results = qe.execSelect(); } else { results = customQuery(queryString); } List keys = results.getResultVars(); while(results.hasNext()){ QuerySolution result = results.next(); HashMap map = new HashMap(); for (String key : keys){ RDFNode node = result.get(key); map.put(key, node); } ret.add(map); } } catch(Exception e){ e.printStackTrace(); // results = customQuery(queryString); } } catch(Exception e){ e.printStackTrace(); throw e; } finally { qe.close(); } return ret; } private ResultSet customQuery(String query) throws Exception { URL url = new URL(this.setting.getEndpoint() + "?" +setting.getQueryKey() + "=" + URLEncoder.encode(query, setting.getEncoding()) + "&" + setting.getOption());//POSTするデータ HttpURLConnection conn = (HttpURLConnection)url.openConnection(); conn.setRequestProperty("Accept-Language", "ja");// ヘッダを設定 conn.setRequestProperty("Referer", setting.getEndpoint());// ヘッダを設定 int resultType = setting.getResultType(); ResultSet ret = null; if (resultType == EndpointSettings.RESULT_TYPE_JSON){ ret = ResultSetFactory.fromJSON(conn.getInputStream()); } else if (resultType == EndpointSettings.RESULT_TYPE_XML){ ret = ResultSetFactory.fromXML(conn.getInputStream()); } else if (resultType == EndpointSettings.RESULT_TYPE_SSE){ ret = ResultSetFactory.fromSSE(conn.getInputStream()); } else if (resultType == EndpointSettings.RESULT_TYPE_TSV){ ret = ResultSetFactory.fromTSV(conn.getInputStream()); } conn.disconnect(); return ret; } /** * word文字列を含むsubjectを取得して返す * @param word * @param fullMatch 完全一致検索か * @param limit 検索最大数 * @param offset 検索オフセット * @param type 検索対象種別 * @return * @throws Exception */ public SparqlResultSet findSubject(String word, boolean fullMatch, Integer limit, Integer offset, int type, String[] propList) throws Exception{ String query; word = word.replace(" ", "%20"); if (!fullMatch){ if (type == PlainSparqlAccessor.FIND_TARGET_SUBJECT){ query = "select distinct ?s where {\n" + "?s ?o \n" + "FILTER(regex(str(?s), \""+word+"\", \"m\"))\n" + "}"; } else if (type == PlainSparqlAccessor.FIND_TARGET_OBJECT){ query = "select distinct ?s where {\n" + "?s ?p ?o \n" + "FILTER(regex(str(?o), \""+word+"\", \"m\"))\n" + "}"; } else if (type == PlainSparqlAccessor.FIND_TARGET_SPECIFIC_OBJECT){ query = "select distinct ?s where {\n"; query += getPropertySparql(propList); query += "FILTER(regex(str(?o), \""+word+"\", \"m\"))\n" + "}"; } else { // TODO query = "select distinct ?s where {\n"; query += getPropertySparql(propList); query += "FILTER(regex(str(?o), \""+word+"\", \"m\"))\n" + "}"; } } else { if (type == PlainSparqlAccessor.FIND_TARGET_OBJECT){ query = "select distinct ?s where \n{" + "{?s ?p \""+word+"\"} UNION \n" + "{?s ?p \""+word+"\"@en} UNION \n" + "{?s ?p \""+word+"\"@ja} "; String[] namespaces = setting.getNamespaceList(); if (namespaces != null && namespaces.length > 0){ query += "UNION \n"; } for (int i=0; i} "; if (i != namespaces.length-1){ query += "UNION \n"; } else { query += "\n"; } } query += "}"; } else if (type == PlainSparqlAccessor.FIND_TARGET_SPECIFIC_OBJECT){ query = // TODO "select distinct ?s where \n{"; query += getPropertySparql(propList, word); query += "}"; } else if (type == PlainSparqlAccessor.FIND_TARGET_SUBJECT){ String[] namespaces = setting.getNamespaceList(); List> ret = new ArrayList>(); for (int i=0; i ?p ?o \n"; query += "}"; List> temp = executeQuery(query); if (temp != null && temp.size() > 0){ HashMap node = new HashMap(); node.put("s", new ResourceImpl(ns+"/"+word)); ret.add(node); } } // TODO 結果の最大数はnamespaces.sizeなので厳密にはLIMIT, OFFSETも定義できるようにしておいた方が良い return new SparqlResultSet(ret, false); } else { // TODO query = "select distinct ?s where {\n"; query += getPropertySparql(propList, word); query += "}"; } } if (limit != null && limit > 0){ query +="\n LIMIT " + String.valueOf(limit+1); } if (offset != null && offset > 0){ query += "\n OFFSET " + String.valueOf(offset); } List> result = executeQuery(query); SparqlResultSet ret = new SparqlResultSet(result); if (limit != null){ if (result != null && result.size() > limit){ result = result.subList(0, limit); ret.setDefaultResult(result); ret.setHasNext(true); } } return ret; } private String getPropertySparql(String[] propList){ StringBuilder sb = new StringBuilder(); if (propList == null){ // 念のため propList = new String[]{"http://www.w3.org/2000/01/rdf-schema#label"}; } if (propList.length == 1){ sb.append("?s <"); sb.append(propList[0]); sb.append("> ?o \n"); } else { sb.append("{\n"); for (int i=0; i ?o "); if (i == propList.length-1){ sb.append("}\n"); } else { sb.append("} UNION \n"); } } sb.append("}"); } return sb.toString(); } private String getPropertySparql(String[] propList, String word){ StringBuilder sb = new StringBuilder(); if (propList == null){ // 念のため propList = new String[]{"http://www.w3.org/2000/01/rdf-schema#label"}; } sb.append("{\n"); for (int i=0; i \""+word+"\"} UNION \n"); sb.append("{?s <" +propList[i] + "> \""+word+"\"@en} UNION \n"); sb.append("{?s <" +propList[i] + "> \""+word+"\"@ja"); if (i == propList.length-1){ sb.append("}\n"); } else { sb.append("} UNION \n"); } } sb.append("}"); return sb.toString(); } /** * Threadを起動してword文字列を含むsubjectを取得して返す。結果はlistenerに返る。 * @param word 検索文字列 * @param resultListener 結果を受け取るlistener */ public boolean findSubject(String word, boolean fullMatch, Integer limit, Integer offset, int type, String[] propList, SparqlResultListener resultListener){ Thread thread = new QueryThread(word, new Object[]{new Boolean(fullMatch), limit, offset, new Integer(type), propList}, resultListener){ public void run(){ try { Boolean fullMatch = (Boolean)((Object[])getOption())[0]; Integer limit = (Integer)((Object[])getOption())[1]; Integer offset = (Integer)((Object[])getOption())[2]; Integer type = (Integer)((Object[])getOption())[3]; String[] propList = (String[])((Object[])getOption())[4]; getSparqlResultListener().resultReceived(findSubject(getQueryString(), fullMatch, limit, offset, type, propList)); } catch(Exception e){ throw new RuntimeException(e); } } }; thread.setUncaughtExceptionHandler(resultListener); thread.start(); return true; } /** * 指定されたsubjectを持つtriple(subjectは確定しているのでpropertyとobjectのみ)を返す * @param triple * @return * @throws Exception */ public List> findTripleFromSubject(String subject) throws Exception{ subject = subject.replace(" ", "%20"); String query = "select ?p ?o where {\n" + "<" + subject + "> ?p ?o\n"+ "}"; return executeQuery(query); } public boolean findTripleFromSubject(String subject, SparqlResultListener listener){ Thread thread = new QueryThread(subject, listener){ public void run(){ try { getSparqlResultListener().resultReceived(new SparqlResultSet(findTripleFromSubject(getQueryString()))); } catch(Exception e){ throw new RuntimeException(e); } } }; thread.setUncaughtExceptionHandler(listener); thread.start(); return true; } @Override public List> findPropertyList() throws Exception { // TODO 本来はdistinctにしたいがそれだと重いendpointがある List> ret = new ArrayList>(); Map result = new HashMap(); ret.add(result); String query = "select ?s {\n" + "?s ?p ?o\n"+ "} LIMIT 10"; List> subjects = executeQuery(query); for (Map subjectMap : subjects){ RDFNode subject = subjectMap.get("s"); query = "select distinct ?p where {\n" + "<" + subject + "> ?p ?o\n"+ "}"; List> properties = executeQuery(query); for (Map propertyMap : properties){ RDFNode property = propertyMap.get("p"); // propertyの場合は、戻りのMapの形式を変える // key:要素種別、 value:要素 ではなく、 // key:要素の文字列表現、 value:要素 とする。 // (要素種別が確定しているのと、要素の重複を削除するため) result.put(property.toString(), property); System.out.println("p["+property.toString()+"]"); } } return ret; } @Override public boolean findPropertyList(SparqlResultListener listener) { Thread thread = new QueryThread(null, listener){ public void run(){ try { getSparqlResultListener().resultReceived(new SparqlResultSet(findPropertyList())); } catch(Exception e){ throw new RuntimeException(e); } } }; thread.setUncaughtExceptionHandler(listener); thread.start(); return true; } public static void main(String[] args){ try { URL url = new URL("http://www.wikipediaontology.org/query/?q=" + URLEncoder.encode("select * {?s ?p ?o}", "UTF-8") + "&type=xml&LIMIT=100");//POSTするデータ HttpURLConnection conn = (HttpURLConnection)url.openConnection(); conn.setRequestProperty("User-Agent", "test");// ヘッダを設定 conn.setRequestProperty("Accept-Language", "ja");// ヘッダを設定 conn.setRequestProperty("Referer", "http://www.wikipediaontology.org/query/");// ヘッダを設定 InputStreamReader isr = new java.io.InputStreamReader(conn.getInputStream(), "UTF-8"); BufferedReader br = new java.io.BufferedReader(isr); // 受信したストリームを表示 String line = null; while (null != (line = br.readLine())) { System.out.println(line); } // ストリームならびに接続をクローズ br.close(); conn.disconnect(); } catch(Exception e){ e.printStackTrace(); } } } class QueryThread extends Thread { private String queryString; private Object option; private SparqlResultListener listener; public QueryThread(String queryString, Object option, SparqlResultListener listener){ this.queryString = queryString; this.option = option; this.listener = listener; } public QueryThread(String queryString, SparqlResultListener listener){ this(queryString, null, listener); } protected String getQueryString(){ return this.queryString; } protected Object getOption(){ return this.option; } protected SparqlResultListener getSparqlResultListener(){ return this.listener; } }