[85] | 1 | package org.biohackathon.SPARQLBuilder.OWL;
|
---|
| 2 |
|
---|
| 3 | import java.io.File;
|
---|
| 4 | import java.util.HashMap;
|
---|
| 5 | import java.util.Map;
|
---|
| 6 | import java.util.Set;
|
---|
| 7 |
|
---|
| 8 | import jp.riken.accc.db.rdf.crawler.dataStructure.SchemaCategory;
|
---|
| 9 | import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.JenaModelGenerator;
|
---|
| 10 | import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.RDFsCrawlerImpl;
|
---|
| 11 |
|
---|
| 12 | public class StructureCrawler {
|
---|
| 13 |
|
---|
| 14 | private File dataDir = null;
|
---|
| 15 |
|
---|
[93] | 16 | // this is just for a test
|
---|
[85] | 17 | public static void main(String[] args) throws Exception{
|
---|
[103] | 18 | StructureCrawler sc = new StructureCrawler(new File("c:\\cdata"));
|
---|
| 19 | // sc.crawl("http://dbe-rdf.biosciencedbc.jp/sparql", "biosciencedbc.ttl");
|
---|
[85] | 20 |
|
---|
[103] | 21 | // System.out.println("done");
|
---|
[86] | 22 |
|
---|
| 23 |
|
---|
[85] | 24 | Map<String,String> acTable = sc.getAcquiredStructureFiles();
|
---|
| 25 | Set<String> keySet = acTable.keySet();
|
---|
| 26 | for(String key: keySet){
|
---|
| 27 | String val = acTable.get(key);
|
---|
| 28 | System.out.println("File: " + key + " --- " + val);
|
---|
| 29 | }
|
---|
| 30 | }
|
---|
| 31 |
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 | public Map<String, String> getAcquiredStructureFiles(){
|
---|
| 35 | Map<String, String> table = new HashMap<String, String>();
|
---|
| 36 | if( dataDir.isDirectory() ){
|
---|
| 37 | // read files
|
---|
| 38 | File[] files = dataDir.listFiles();
|
---|
| 39 | for(File file: files){
|
---|
| 40 | String uri = null;
|
---|
| 41 | try{
|
---|
| 42 | JenaModelGenerator jmGene = new JenaModelGenerator(file.getAbsolutePath());
|
---|
| 43 | uri = jmGene.getEndpointURI();
|
---|
[88] | 44 | System.out.println("URI: "+ uri);
|
---|
[85] | 45 | }catch(Exception ex){
|
---|
| 46 | //
|
---|
| 47 | }
|
---|
| 48 | if( uri != null ){
|
---|
| 49 | table.put(uri, file.getAbsolutePath());
|
---|
| 50 | }
|
---|
| 51 | }
|
---|
| 52 | }else{
|
---|
| 53 | if( dataDir.isFile() ){
|
---|
| 54 | String uri = null;
|
---|
| 55 | try{
|
---|
| 56 | JenaModelGenerator jmGene = new JenaModelGenerator(dataDir.getAbsolutePath());
|
---|
| 57 | uri = jmGene.getEndpointURI();
|
---|
[88] | 58 |
|
---|
[85] | 59 | }catch(Exception ex){
|
---|
| 60 | //
|
---|
| 61 | }
|
---|
| 62 | if( uri != null ){
|
---|
| 63 | table.put(uri, dataDir.getAbsolutePath());
|
---|
| 64 | }
|
---|
| 65 | }
|
---|
| 66 | }
|
---|
| 67 | return table;
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 |
|
---|
| 71 |
|
---|
| 72 | public StructureCrawler(File dataDir) throws Exception {
|
---|
| 73 | this.dataDir = dataDir;
|
---|
| 74 | }
|
---|
| 75 |
|
---|
| 76 | public void crawl(String endPURI, String outFileName) throws Exception {
|
---|
| 77 | RDFsCrawlerImpl impl = new RDFsCrawlerImpl(endPURI);
|
---|
| 78 | // Resource[] res = impl.getRDFProperties();
|
---|
| 79 | // Resource[] res = impl.getInferedRDFsClassesFromInstances();
|
---|
| 80 | // Resource[] res = impl.getDomainRangeDeclaredRDFProperties();
|
---|
| 81 | // Resource[] res = impl.getDeclaredRDFsClasses();
|
---|
| 82 | // for(Resource r: res){
|
---|
| 83 | // System.out.println(r.getURI().toString());
|
---|
| 84 | // }
|
---|
| 85 | // Model model = impl.getProperiesFromDomainRangeDecls();
|
---|
| 86 | // Model model = impl.getPropertiesFromInstanceDecls();
|
---|
| 87 | // RDFWriter writer = model.getWriter("N3");
|
---|
| 88 | // writer.setProperty("showXMLDeclaration","true");
|
---|
| 89 | // writer.write(model,System.out,"");
|
---|
| 90 | // model.close();
|
---|
| 91 |
|
---|
| 92 | SchemaCategory sc = impl.determineSchemaCategory();
|
---|
| 93 |
|
---|
| 94 | // RDF/XML, RDF/XML-ABBREV, N-TRIPLE, N3
|
---|
[86] | 95 | File outFile = null;
|
---|
[85] | 96 | if (outFileName == null) {
|
---|
[86] | 97 | String tFileName = null;
|
---|
[85] | 98 | if (endPURI.lastIndexOf("/", endPURI.length() - 2) > 0) {
|
---|
| 99 | tFileName = endPURI.substring(
|
---|
| 100 | endPURI.lastIndexOf("/", endPURI.length() - 2) + 1,
|
---|
| 101 | endPURI.length());
|
---|
| 102 | } else {
|
---|
| 103 | tFileName = endPURI;
|
---|
| 104 | }
|
---|
[86] | 105 | outFile = new File(dataDir, tFileName);
|
---|
[85] | 106 | if (outFile.exists()) {
|
---|
| 107 | outFile = File.createTempFile(tFileName, "", dataDir);
|
---|
| 108 | }
|
---|
[86] | 109 | } else {
|
---|
| 110 | outFile = new File(dataDir, outFileName);
|
---|
[85] | 111 | }
|
---|
[86] | 112 | sc.write2File(outFile.getAbsolutePath(), "Turtle");
|
---|
| 113 | // System.out.println("Category:" + sc.getCategory());
|
---|
[85] | 114 | }
|
---|
| 115 |
|
---|
| 116 | }
|
---|