Context Navigation

StructureCrawler.java

リビジョン 269, 2.9 KB (コミッタ: nori, 10 年前)
クローラー結果としてCrawledDataset が扱えるようにの暫定版

行番号
1	package org.biohackathon.SPARQLBuilder.OWL;
2
3	import java.io.File;
4	import java.util.HashMap;
5	import java.util.Map;
6	import java.util.Set;
7
8	import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.SchemaCategory;
9	import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.JenaModelGenerator;
10	import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.RDFsCrawlerImpl;
11
12	public class StructureCrawler {
13
14	private File dataDir = null;
15
16	// this is just for a test
17	/*
18	public static void main(String[] args) throws Exception{
19	StructureCrawler sc = new StructureCrawler(new File("c:\\cdata"));
20	// sc.crawl("http://dbe-rdf.biosciencedbc.jp/sparql", "biosciencedbc.ttl");
21
22	// System.out.println("done");
23
24
25	Map<String,String> acTable = sc.getAcquiredStructureFiles();
26	Set<String> keySet = acTable.keySet();
27	for(String key: keySet){
28	String val = acTable.get(key);
29	System.out.println("File: " + key + " --- " + val);
30	}
31	}
32	*/
33
34
35	public Map<String, String> getAcquiredStructureFiles(){
36	Map<String, String> table = new HashMap<String, String>();
37	if( dataDir.isDirectory() ){
38	// read files
39	File[] files = dataDir.listFiles();
40	for(File file: files){
41	String uri = null;
42	try{
43	JenaModelGenerator jmGene = new JenaModelGenerator(file.getAbsolutePath());
44	uri = jmGene.getEndpointURI();
45	System.out.println("URI: "+ uri);
46	}catch(Exception ex){
47	//
48	}
49	if( uri != null ){
50	table.put(uri, file.getAbsolutePath());
51	}
52	}
53	}else{
54	if( dataDir.isFile() ){
55	String uri = null;
56	try{
57	JenaModelGenerator jmGene = new JenaModelGenerator(dataDir.getAbsolutePath());
58	uri = jmGene.getEndpointURI();
59
60	}catch(Exception ex){
61	//
62	}
63	if( uri != null ){
64	table.put(uri, dataDir.getAbsolutePath());
65	}
66	}
67	}
68	return table;
69	}
70
71
72
73	public StructureCrawler(File dataDir) throws Exception {
74	this.dataDir = dataDir;
75	}
76
77	public void crawl(String endPURI, String crawlName, String outFileName) throws Exception {
78	RDFsCrawlerImpl impl = new RDFsCrawlerImpl(endPURI, crawlName);
79	SchemaCategory sc = impl.determineSchemaCategory();
80
81	// RDF/XML, RDF/XML-ABBREV, N-TRIPLE, N3
82	File outFile = null;
83	if (outFileName == null) {
84	String tFileName = null;
85	if (endPURI.lastIndexOf("/", endPURI.length() - 2) > 0) {
86	tFileName = endPURI.substring(
87	endPURI.lastIndexOf("/", endPURI.length() - 2) + 1,
88	endPURI.length());
89	} else {
90	tFileName = endPURI;
91	}
92	outFile = new File(dataDir, tFileName);
93	if (outFile.exists()) {
94	outFile = File.createTempFile(tFileName, "", dataDir);
95	}
96	} else {
97	outFile = new File(dataDir, outFileName);
98	}
99	sc.write2File(outFile.getAbsolutePath(), "Turtle");
100	// System.out.println("Category:" + sc.getCategory());
101	}
102
103	}

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/SPARQLBuilderWWW2016/src/java/org/biohackathon/SPARQLBuilder/OWL/StructureCrawler.java

異なるフォーマットでダウンロード: