package hozo.sparql.plugin.compare; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import com.hp.hpl.jena.rdf.model.RDFNode; import com.ibm.icu.text.SimpleDateFormat; import hozo.sparql.EndpointSettings; import hozo.sparql.EndpointSettingsManager; import hozo.sparql.PlainSparqlAccessor; import hozo.sparql.SparqlQueryListener; public class CompareSubject { private File csvFile; private File propertyFile; private HashMap accessorMap; private static final String SEPARATOR = ","; private Thread thread; private boolean finalizeThread = false; private SparqlQueryListener listener; public CompareSubject(File csvFile, File endpointFile){ this(csvFile, endpointFile, null); } public CompareSubject(File csvFile, File endpointFile, SparqlQueryListener listener){ this.csvFile = csvFile; this.propertyFile = endpointFile; this.listener = listener; this.accessorMap = new HashMap(); } public void outputResult(File out, CompareResultListener listener){ thread = new QueryThread(new Object[]{out}, listener){ public void run(){ try { File file = (File)((Object[])getOption())[0]; getCompareResultListener().resultReceived(outputResult(file)); } catch(Exception e){ throw new RuntimeException(e); } } }; thread.setUncaughtExceptionHandler(listener); thread.start(); } private boolean outputResult(File out) throws IOException { if (out.getParentFile() != null && !out.getParentFile().exists()){ out.getParentFile().mkdirs(); } out.createNewFile(); if (!out.canWrite()){ return false; } return outputResult(new PrintWriter(new OutputStreamWriter(new FileOutputStream(out), "SJIS"))); } public void stop(){ println(getCurrentTime()+"Stop Request"); finalizeThread = true; } private boolean outputResult(PrintWriter out){ HashMap> propertyHash = readPropertyFile(); getResources(propertyHash, out); out.flush(); out.close(); println(getCurrentTime()+"query end."); return true; // TODO } private HashMap> readPropertyFile(){ HashMap> propertyHash = new HashMap>(); List contents = FileUtil.readFileText(propertyFile, "UTF-8"); for (String line : contents){ List property = FileUtil.splitLine(line, SEPARATOR); if (property != null && property.size() > 1){ // 一カラム目はendpoint List properties = new ArrayList(); propertyHash.put(property.get(0), properties); for (int i=1; i> propertyHash, PrintWriter out){ List endpoints = null; List files = FileUtil.readFileText(csvFile, "SJIS"); println(); println(getCurrentTime()+"query start."); if (files != null && files.size() > 0){ // 一行目はヘッダ endpoints = getEndpoints(files.get(0)); setEndpoint(endpoints); } // header出力 StringBuilder line = new StringBuilder(); line.append(""); line.append(SEPARATOR); for (String endpoint : endpoints){ line.append(endpoint); line.append(SEPARATOR); line.append(""); line.append(SEPARATOR); line.append(""); line.append(SEPARATOR); } out.append(line.toString()); out.append("\n"); for (int i=1; i endpoints, List contents, HashMap> propertyHash, PrintWriter out){ int index=1; int max = 0; String label = contents.get(0); List> ret = new ArrayList>(); print(getCurrentTime()+"searching ["+label+"]"); for (String endpoint : endpoints){ print("*"); HashMap properToResultHash = new HashMap(); ret.add(properToResultHash); // エンドポイントごとのリソース取得 List properties = propertyHash.get(endpoint); if (properties != null){ HashMap> results = getResource(endpoint, contents.get(index), properties); if (max < results.size()){ max = results.size(); } for (String property : results.keySet()){ // propertyごとの結果 String result = getResult(results.get(property)); properToResultHash.put(property, result); } } index++; } println(); // TODO properToResultHashの最大値を取得する // その数だけ行を追加する。 /* 1, endpoint1, , endpoint2, ,..... * label1, property1-1, object1-1, property2-1, object2-1 ,....o  ↑ * label1, property1-2, object1-2, property2-2, object2-2 ,....o この範囲を司る * label1, property1-3, object1-3, , ,....o  ↓ * label2, property1-4, object1-1, property2-3, object2-3 ,..... * label2, property1-5, object1-2, property2-4, object2-4 ,..... * label2, , , property2-5, object2-5 ,..... */ for (int i=0; i values : ret){ String[] hoge = values.keySet().toArray(new String[]{}); if (hoge.length > i && !contents.get(lp).trim().isEmpty()){ String resultProperty = hoge[i]; String resultObject = values.get(resultProperty); line.append(contents.get(lp)); line.append(SEPARATOR); line.append(resultProperty); line.append(SEPARATOR); line.append(resultObject); line.append(SEPARATOR); } else { line.append(SEPARATOR); line.append(SEPARATOR); line.append(SEPARATOR); } lp++; } out.append(line.toString()); out.append("\n"); } } private String getResult(List result){ if (result.size() == 0){ return ""; } if (result.size() == 1){ return result.get(0); } StringBuilder sb = new StringBuilder(); for (int i=0; i"); // TODO セパレータ } sb.append(result.get(result.size()-1)); return sb.toString(); } /** * * @param endpoint * @param content * @param properties * @return propertyとそのresultのmap */ private HashMap> getResource(String endpoint, String content, List properties){ HashMap> ret = new HashMap>(); PlainSparqlAccessor sa = accessorMap.get(endpoint); for (String property : properties){ List res = new ArrayList(); ret.put(property, res); if (!content.trim().isEmpty()){ String query = makeQuery(content, property); try { List> results = sa.executeQuery(query); if (results != null){ for (Map result : results){ RDFNode s = result.get("o"); res.add(s.toString()); } } } catch (Exception e) { e.printStackTrace(); } } } return ret; } private String makeQuery(String s, String p){ return "select distinct ?o where {\n" + "<" + s.trim() + "> <" + p.trim() + "> ?o \n" + "}"; } public void addEndpoint(String endpoint){ EndpointSettings settings = EndpointSettingsManager.instance.getSetting(endpoint); PlainSparqlAccessor sa = new PlainSparqlAccessor(settings); accessorMap.put(endpoint, sa); } private void setEndpoint(List endpoints){ for (String ep : endpoints){ addEndpoint(ep); } } private List getEndpoints(String line){ List headers = FileUtil.splitLine(line, SEPARATOR); if (headers.size() > 0){ headers.remove(0); } return headers; } private String getCurrentTime(){ Timestamp time = new Timestamp(new Date().getTime()); return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").format(time) + ":"; } private class QueryThread extends Thread { private Object option; private CompareResultListener listener; public QueryThread(Object option, CompareResultListener listener){ this.option = option; this.listener = listener; } protected Object getOption(){ return this.option; } protected CompareResultListener getCompareResultListener(){ return this.listener; } } public static void main(String[] args){ String epFile = "C:\\Users\\kato\\Desktop\\out\\endp_prop.txt"; String inFile = "C:\\works\\daily\\20130926\\input\\動詞索引.csv"; String outFile = "C:\\works\\daily\\20130926\\input\\動詞索引_out.csv"; try { /* for (int i=0; i