package org.biohackathon.SPARQLBuilder.endpointMetadata; import java.io.File; import java.util.Calendar; import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.URICollection; import org.apache.jena.riot.RDFDataMgr; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.NodeIterator; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; public class MetadataFile { Model model = null; String endpointURI = null; Calendar startDateTime = null; Calendar endDateTime = null; long numTriples = 0; long numClasses = 0; public long getNumTriples(){ return numTriples; } public long getNumClasses(){ return numClasses; } public MetadataFile(File file) throws Exception{ readFile(file); } public String getEndpointURI(){ return endpointURI; } public Calendar getStartDateTime(){ return startDateTime; } public Calendar getEndDateTime(){ return endDateTime; } private void readFile(File file) throws Exception{ model = RDFDataMgr.loadModel(file.getAbsolutePath()); Property sd_endpoint = model.getProperty(URICollection.PROPERTY_SD_ENDPOINT); NodeIterator nit = model.listObjectsOfProperty(sd_endpoint); Resource endPointRes = null; endpointURI = null; if( nit.hasNext() ){ RDFNode endPointNode = nit.next(); endPointRes = endPointNode.asResource(); endpointURI = endPointRes.getURI(); } Property sd_default_dataset = model.getProperty(URICollection.PROPERTY_SD_DEFAULT_DATA_SET); nit = model.listObjectsOfProperty(sd_default_dataset); Resource defaultDataSet = null; if( nit.hasNext() ){ RDFNode node = nit.next(); defaultDataSet = node.asResource(); } // log Property sbm_crawlLog = model.getProperty(URICollection.PROPERTY_SB_CRAWL_LOG); nit = model.listObjectsOfProperty(sbm_crawlLog); Resource crawlLogBlankNode = null; if( nit.hasNext() ){ RDFNode node = nit.next(); crawlLogBlankNode = node.asResource(); } // start Property sbm_startTime = model.getProperty(URICollection.PROPERTY_SB_CRAWL_START_TIME); nit = model.listObjectsOfProperty(sbm_startTime); startDateTime = null; if( nit.hasNext() ){ Literal startTimeLit = null; RDFNode node = nit.next(); startTimeLit = node.asLiteral(); startDateTime = ((com.hp.hpl.jena.datatypes.xsd.XSDDateTime)(startTimeLit.getValue())).asCalendar(); } // end Property sbm_endTime = model.getProperty(URICollection.PROPERTY_SB_CRAWL_END_TIME); nit = model.listObjectsOfProperty(sbm_endTime); endDateTime = null; if( nit.hasNext() ){ Literal endTimeLit = null; RDFNode node = nit.next(); endTimeLit = node.asLiteral(); endDateTime = ((com.hp.hpl.jena.datatypes.xsd.XSDDateTime)(endTimeLit.getValue())).asCalendar(); } // numTriples Property void_triples = model.getProperty(URICollection.PROPERTY_VOID_TRIPLES); nit = model.listObjectsOfProperty(defaultDataSet, void_triples); if( nit.hasNext()){ Literal numTriplesLit = nit.next().asLiteral(); numTriples = numTriplesLit.getLong(); } // numClasses Property void_classes = model.getProperty(URICollection.PROPERTY_VOID_CLASSES); nit = model.listObjectsOfProperty(defaultDataSet, void_classes); if( nit.hasNext()){ Literal numClassesLit = nit.next().asLiteral(); numClasses = numClassesLit.getLong(); } } }