package org.biohackathon.SPARQLBuilder.OWL; import java.io.File; import java.util.HashMap; import java.util.Map; import java.util.Set; import jp.riken.accc.db.rdf.crawler.dataStructure.SchemaCategory; import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.JenaModelGenerator; import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.RDFsCrawlerImpl; public class StructureCrawler { private File dataDir = null; // this is just for a test /* public static void main(String[] args) throws Exception{ StructureCrawler sc = new StructureCrawler(new File("c:\\cdata")); // sc.crawl("http://dbe-rdf.biosciencedbc.jp/sparql", "biosciencedbc.ttl"); // System.out.println("done"); Map acTable = sc.getAcquiredStructureFiles(); Set keySet = acTable.keySet(); for(String key: keySet){ String val = acTable.get(key); System.out.println("File: " + key + " --- " + val); } } */ public Map getAcquiredStructureFiles(){ Map table = new HashMap(); if( dataDir.isDirectory() ){ // read files File[] files = dataDir.listFiles(); for(File file: files){ String uri = null; try{ JenaModelGenerator jmGene = new JenaModelGenerator(file.getAbsolutePath()); uri = jmGene.getEndpointURI(); System.out.println("URI: "+ uri); }catch(Exception ex){ // } if( uri != null ){ table.put(uri, file.getAbsolutePath()); } } }else{ if( dataDir.isFile() ){ String uri = null; try{ JenaModelGenerator jmGene = new JenaModelGenerator(dataDir.getAbsolutePath()); uri = jmGene.getEndpointURI(); }catch(Exception ex){ // } if( uri != null ){ table.put(uri, dataDir.getAbsolutePath()); } } } return table; } public StructureCrawler(File dataDir) throws Exception { this.dataDir = dataDir; } public void crawl(String endPURI, String outFileName) throws Exception { RDFsCrawlerImpl impl = new RDFsCrawlerImpl(endPURI); // Resource[] res = impl.getRDFProperties(); // Resource[] res = impl.getInferedRDFsClassesFromInstances(); // Resource[] res = impl.getDomainRangeDeclaredRDFProperties(); // Resource[] res = impl.getDeclaredRDFsClasses(); // for(Resource r: res){ // System.out.println(r.getURI().toString()); // } // Model model = impl.getProperiesFromDomainRangeDecls(); // Model model = impl.getPropertiesFromInstanceDecls(); // RDFWriter writer = model.getWriter("N3"); // writer.setProperty("showXMLDeclaration","true"); // writer.write(model,System.out,""); // model.close(); SchemaCategory sc = impl.determineSchemaCategory(); // RDF/XML, RDF/XML-ABBREV, N-TRIPLE, N3 File outFile = null; if (outFileName == null) { String tFileName = null; if (endPURI.lastIndexOf("/", endPURI.length() - 2) > 0) { tFileName = endPURI.substring( endPURI.lastIndexOf("/", endPURI.length() - 2) + 1, endPURI.length()); } else { tFileName = endPURI; } outFile = new File(dataDir, tFileName); if (outFile.exists()) { outFile = File.createTempFile(tFileName, "", dataDir); } } else { outFile = new File(dataDir, outFileName); } sc.write2File(outFile.getAbsolutePath(), "Turtle"); // System.out.println("Category:" + sc.getCategory()); } }