[85] | 1 | package org.biohackathon.SPARQLBuilder.OWL;
|
---|
| 2 |
|
---|
| 3 | import java.io.File;
|
---|
| 4 | import java.util.HashMap;
|
---|
| 5 | import java.util.Map;
|
---|
| 6 | import java.util.Set;
|
---|
| 7 |
|
---|
| 8 | import jp.riken.accc.db.rdf.crawler.dataStructure.SchemaCategory;
|
---|
| 9 | import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.JenaModelGenerator;
|
---|
| 10 | import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.RDFsCrawlerImpl;
|
---|
| 11 |
|
---|
| 12 | public class StructureCrawler {
|
---|
| 13 |
|
---|
| 14 | private File dataDir = null;
|
---|
| 15 |
|
---|
[93] | 16 | // this is just for a test
|
---|
[85] | 17 | public static void main(String[] args) throws Exception{
|
---|
| 18 | StructureCrawler sc = new StructureCrawler(new File("c:\\temp"));
|
---|
[89] | 19 | sc.crawl("http://dbe-rdf.biosciencedbc.jp/sparql", "biosciencedbc.ttl");
|
---|
[85] | 20 |
|
---|
[86] | 21 | System.out.println("done");
|
---|
| 22 |
|
---|
| 23 |
|
---|
[85] | 24 | Map<String,String> acTable = sc.getAcquiredStructureFiles();
|
---|
| 25 | Set<String> keySet = acTable.keySet();
|
---|
| 26 | for(String key: keySet){
|
---|
| 27 | String val = acTable.get(key);
|
---|
| 28 | System.out.println("File: " + key + " --- " + val);
|
---|
| 29 | }
|
---|
| 30 | }
|
---|
| 31 |
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 | public Map<String, String> getAcquiredStructureFiles(){
|
---|
| 35 | Map<String, String> table = new HashMap<String, String>();
|
---|
| 36 | if( dataDir.isDirectory() ){
|
---|
| 37 | // read files
|
---|
| 38 | File[] files = dataDir.listFiles();
|
---|
| 39 | for(File file: files){
|
---|
| 40 | String uri = null;
|
---|
| 41 | try{
|
---|
| 42 | JenaModelGenerator jmGene = new JenaModelGenerator(file.getAbsolutePath());
|
---|
| 43 | uri = jmGene.getEndpointURI();
|
---|
[88] | 44 |
|
---|
| 45 | System.out.println("URI: "+ uri);
|
---|
[85] | 46 | }catch(Exception ex){
|
---|
| 47 | //
|
---|
| 48 | }
|
---|
| 49 | if( uri != null ){
|
---|
| 50 | table.put(uri, file.getAbsolutePath());
|
---|
| 51 | }
|
---|
| 52 | }
|
---|
| 53 | }else{
|
---|
| 54 | if( dataDir.isFile() ){
|
---|
| 55 | String uri = null;
|
---|
| 56 | try{
|
---|
| 57 | JenaModelGenerator jmGene = new JenaModelGenerator(dataDir.getAbsolutePath());
|
---|
| 58 | uri = jmGene.getEndpointURI();
|
---|
[88] | 59 |
|
---|
[85] | 60 | }catch(Exception ex){
|
---|
| 61 | //
|
---|
| 62 | }
|
---|
| 63 | if( uri != null ){
|
---|
| 64 | table.put(uri, dataDir.getAbsolutePath());
|
---|
| 65 | }
|
---|
| 66 | }
|
---|
| 67 | }
|
---|
| 68 | return table;
|
---|
| 69 | }
|
---|
| 70 |
|
---|
| 71 |
|
---|
| 72 |
|
---|
| 73 | public StructureCrawler(File dataDir) throws Exception {
|
---|
| 74 | this.dataDir = dataDir;
|
---|
| 75 | }
|
---|
| 76 |
|
---|
| 77 | public void crawl(String endPURI, String outFileName) throws Exception {
|
---|
| 78 | RDFsCrawlerImpl impl = new RDFsCrawlerImpl(endPURI);
|
---|
| 79 | // Resource[] res = impl.getRDFProperties();
|
---|
| 80 | // Resource[] res = impl.getInferedRDFsClassesFromInstances();
|
---|
| 81 | // Resource[] res = impl.getDomainRangeDeclaredRDFProperties();
|
---|
| 82 | // Resource[] res = impl.getDeclaredRDFsClasses();
|
---|
| 83 | // for(Resource r: res){
|
---|
| 84 | // System.out.println(r.getURI().toString());
|
---|
| 85 | // }
|
---|
| 86 | // Model model = impl.getProperiesFromDomainRangeDecls();
|
---|
| 87 | // Model model = impl.getPropertiesFromInstanceDecls();
|
---|
| 88 | // RDFWriter writer = model.getWriter("N3");
|
---|
| 89 | // writer.setProperty("showXMLDeclaration","true");
|
---|
| 90 | // writer.write(model,System.out,"");
|
---|
| 91 | // model.close();
|
---|
| 92 |
|
---|
| 93 | SchemaCategory sc = impl.determineSchemaCategory();
|
---|
| 94 |
|
---|
| 95 | // RDF/XML, RDF/XML-ABBREV, N-TRIPLE, N3
|
---|
[86] | 96 | File outFile = null;
|
---|
[85] | 97 | if (outFileName == null) {
|
---|
[86] | 98 | String tFileName = null;
|
---|
[85] | 99 | if (endPURI.lastIndexOf("/", endPURI.length() - 2) > 0) {
|
---|
| 100 | tFileName = endPURI.substring(
|
---|
| 101 | endPURI.lastIndexOf("/", endPURI.length() - 2) + 1,
|
---|
| 102 | endPURI.length());
|
---|
| 103 | } else {
|
---|
| 104 | tFileName = endPURI;
|
---|
| 105 | }
|
---|
[86] | 106 | outFile = new File(dataDir, tFileName);
|
---|
[85] | 107 | if (outFile.exists()) {
|
---|
| 108 | outFile = File.createTempFile(tFileName, "", dataDir);
|
---|
| 109 | }
|
---|
[86] | 110 | } else {
|
---|
| 111 | outFile = new File(dataDir, outFileName);
|
---|
[85] | 112 | }
|
---|
[86] | 113 | sc.write2File(outFile.getAbsolutePath(), "Turtle");
|
---|
| 114 | // System.out.println("Category:" + sc.getCategory());
|
---|
[85] | 115 | }
|
---|
| 116 |
|
---|
| 117 | }
|
---|