package org.biohackathon.SPARQLBuilder.endpointMetadata; import java.io.File; import java.util.ArrayList; import java.util.Calendar; import jp.riken.accc.db.rdf.crawler.dataStructure.sparql.URICollection; import org.apache.jena.riot.RDFDataMgr; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.NodeIterator; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.ResIterator; import com.hp.hpl.jena.rdf.model.Resource; public class MetadataFile { Model model = null; File path = null; String endpointURI = null; Calendar startDateTime = null; Calendar endDateTime = null; long numTriples = 0; long numClasses = 0; public long getNumTriples(){ return numTriples; } public long getNumClasses(){ return numClasses; } public MetadataFile(File file) throws Exception{ readFile(file); this.path = file; } public String getFilePath(){ return path.getAbsolutePath(); } public String getEndpointURI(){ return endpointURI; } public Calendar getStartDateTime(){ return startDateTime; } public Calendar getEndDateTime(){ return endDateTime; } //TODO public static boolean validate(File file) throws Exception{ Model tmpModel = RDFDataMgr.loadModel(file.getAbsolutePath()); String endpointURI = getEndpointURI(tmpModel); //TODO return true; } public static Resource getDefaultDataset(Model model) throws Exception{ Property sd_default_dataset = model.getProperty(URICollection.PROPERTY_SD_DEFAULT_DATA_SET); NodeIterator nit = model.listObjectsOfProperty(sd_default_dataset); Resource defaultDataSet = null; if( nit.hasNext() ){ RDFNode node = nit.next(); defaultDataSet = node.asResource(); } return defaultDataSet; } public static long getTriples(Resource defaultDataset) throws Exception{ // numTriples Model model = defaultDataset.getModel(); Property void_triples = model.getProperty(URICollection.PROPERTY_VOID_TRIPLES); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, void_triples); long numTriples = 0; if( nit.hasNext()){ Literal numTriplesLit = nit.next().asLiteral(); numTriples = numTriplesLit.getLong(); } return numTriples; } public static long getProperties(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property void_properties = model.getProperty(URICollection.PROPERTY_VOID_PROPERTIES); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, void_properties); long numProperties = 0; if( nit.hasNext()){ Literal numPropertiesLit = nit.next().asLiteral(); numProperties = numPropertiesLit.getLong(); } return numProperties; } public static long getClasses(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property void_classes = model.getProperty(URICollection.PROPERTY_VOID_CLASSES); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, void_classes); long numClasses = 0; if( nit.hasNext()){ Literal numClassesLit = nit.next().asLiteral(); numClasses = numClassesLit.getLong(); } return numClasses; } public static long getDatatypes(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property sbm_datatypes = model.getProperty(URICollection.PROPERTY_SB_DATATYPES); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, sbm_datatypes); long numDatatypes = 0; if( nit.hasNext()){ Literal numDatatypesLit = nit.next().asLiteral(); numDatatypes = numDatatypesLit.getLong(); } return numDatatypes; } public static long getEndpointCategory(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property sbm_endpointCategory = model.getProperty(URICollection.PROPERTY_SB_ENDPOINT_CATEGORY); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, sbm_endpointCategory); long endpointCategory = 0; if( nit.hasNext()){ Literal endpointCategoryLit = nit.next().asLiteral(); endpointCategory = endpointCategoryLit.getLong(); } return endpointCategory; } public static long getPropertyategory(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property sbm_propertyCategory = model.getProperty(URICollection.PROPERTY_SB_PROPERTY_CATEGORY); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, sbm_propertyCategory); long propertyCategory = 0; if( nit.hasNext()){ Literal propertyCategoryLit = nit.next().asLiteral(); propertyCategory = propertyCategoryLit.getLong(); } return propertyCategory; } public static long getClassCategory(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property sbm_classCategory = model.getProperty(URICollection.PROPERTY_SB_CLASS_CATEGORY); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, sbm_classCategory); long classCategory = 0; if( nit.hasNext()){ Literal classCategoryLit = nit.next().asLiteral(); classCategory = classCategoryLit.getLong(); } return classCategory; } public static Resource[] getPropertyPartitions(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property void_propertyPartition = model.getProperty(URICollection.PROPERTY_VOID_PROPERTY_PARTITION); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, void_propertyPartition); ArrayList propertyPartitions = new ArrayList(); if( nit.hasNext()){ Resource propertyPartition = nit.next().asResource(); propertyPartitions.add(propertyPartition); } return propertyPartitions.toArray(new Resource[0]); } public static Resource[] getClassPartitions(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property void_classPartition = model.getProperty(URICollection.PROPERTY_VOID_CLASS_PARTITION); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, void_classPartition); ArrayList classPartitions = new ArrayList(); if( nit.hasNext()){ Resource classPartition = nit.next().asResource(); classPartitions.add(classPartition); } return classPartitions.toArray(new Resource[0]); } public static Resource[] getNamedGraphs(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property sd_namedGraph = model.getProperty(URICollection.PROPERTY_SD_NAMED_GRAPH); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, sd_namedGraph); ArrayList namedGraphs = new ArrayList(); if( nit.hasNext()){ Resource namedGraph = nit.next().asResource(); namedGraphs.add(namedGraph); } return namedGraphs.toArray(new Resource[0]); } public static String getName(Resource namedGraph) throws Exception{ Model model = namedGraph.getModel(); Property sd_name = model.getProperty(URICollection.PROPERTY_SD_NAME); NodeIterator nit = model.listObjectsOfProperty(namedGraph, sd_name); String name = null; if( nit.hasNext()){ Literal nameLit = nit.next().asLiteral(); name = nameLit.getString(); } return name; } public static Resource[] getPropertyCategorySubsets(Resource defaultDataset) throws Exception{ Model model = defaultDataset.getModel(); Property void_propertyCategorySubset = model.getProperty(URICollection.PROPERTY_SB_PROPERTY_CATEGORY_SUBSET); NodeIterator nit = model.listObjectsOfProperty(defaultDataset, void_propertyCategorySubset); ArrayList propertyCategorySubsets = new ArrayList(); if( nit.hasNext()){ Resource propertyCategorySubset = nit.next().asResource(); propertyCategorySubsets.add(propertyCategorySubset); } return propertyCategorySubsets.toArray(new Resource[0]); } public static Property[] getProperties(Model model) throws Exception{ Resource rdf_property = model.getResource(URICollection.CLASS_RDF_PROPERTY); Property rdf_type = model.getProperty(URICollection.PROPERTY_RDF_TYPE); ResIterator rit = model.listSubjectsWithProperty(rdf_type, rdf_property); ArrayList properties = new ArrayList(); if( rit.hasNext()){ String propertyURI = rit.next().getURI(); Property property = model.getProperty(propertyURI); properties.add(property); } return properties.toArray(new Property[0]); } public static Resource[] getClasses(Model model) throws Exception{ Resource rdfs_class = model.getResource(URICollection.RESOURCE_RDFS_CLASS); Property rdf_type = model.getProperty(URICollection.PROPERTY_RDF_TYPE); ResIterator rit = model.listSubjectsWithProperty(rdf_type, rdfs_class); ArrayList classes = new ArrayList(); if( rit.hasNext()){ Resource classRes = rit.next(); classes.add(classRes); } return classes.toArray(new Resource[0]); } public static Resource[] getClassRelations(Resource propertyPartition) throws Exception{ Model model = propertyPartition.getModel(); Property sbm_classRelation = model.getProperty(URICollection.PROPERTY_SB_CLASS_RELATION); NodeIterator nit = model.listObjectsOfProperty(propertyPartition, sbm_classRelation); ArrayList classRelations = new ArrayList(); if( nit.hasNext()){ Resource classRelation = nit.next().asResource(); classRelations.add(classRelation); } return classRelations.toArray(new Resource[0]); } public static String getEndpointURI(Model model) throws Exception{ Property sd_endpoint = model.getProperty(URICollection.PROPERTY_SD_ENDPOINT); NodeIterator nit = model.listObjectsOfProperty(sd_endpoint); Resource endPointRes = null; String endpointURI = null; if( nit.hasNext() ){ RDFNode endPointNode = nit.next(); endPointRes = endPointNode.asResource(); endpointURI = endPointRes.getURI(); } return endpointURI; } private Resource getCrawlLogBlankNode(Resource defaultDataset) throws Exception { Model model = defaultDataset.getModel(); // log Property sbm_crawlLog = model.getProperty(URICollection.PROPERTY_SB_CRAWL_LOG); NodeIterator nit = model.listObjectsOfProperty(sbm_crawlLog); Resource crawlLogBlankNode = null; if( nit.hasNext() ){ RDFNode node = nit.next(); crawlLogBlankNode = node.asResource(); } return crawlLogBlankNode; } private Calendar getStartTime(Resource crawlLogBlankNode) throws Exception{ Model model = crawlLogBlankNode.getModel(); Property sbm_startTime = model.getProperty(URICollection.PROPERTY_SB_CRAWL_START_TIME); NodeIterator nit = model.listObjectsOfProperty(sbm_startTime); Calendar startCal = null; if( nit.hasNext() ){ Literal startTimeLit = null; RDFNode node = nit.next(); startTimeLit = node.asLiteral(); startCal = ((com.hp.hpl.jena.datatypes.xsd.XSDDateTime)(startTimeLit.getValue())).asCalendar(); } return startCal; } private Calendar getEndTime(Resource crawlLogBlankNode) throws Exception{ Model model = crawlLogBlankNode.getModel(); Property sbm_endTime = model.getProperty(URICollection.PROPERTY_SB_CRAWL_END_TIME); NodeIterator nit = model.listObjectsOfProperty(sbm_endTime); Calendar endCal = null; if( nit.hasNext() ){ Literal endTimeLit = null; RDFNode node = nit.next(); endTimeLit = node.asLiteral(); endCal = ((com.hp.hpl.jena.datatypes.xsd.XSDDateTime)(endTimeLit.getValue())).asCalendar(); } return endCal; } private void readFile(File file) throws Exception{ System.out.println("readfile: " + file.getAbsolutePath()); model = RDFDataMgr.loadModel(file.getAbsolutePath()); Property sd_endpoint = model.getProperty(URICollection.PROPERTY_SD_ENDPOINT); NodeIterator nit = model.listObjectsOfProperty(sd_endpoint); Resource endPointRes = null; endpointURI = null; if( nit.hasNext() ){ RDFNode endPointNode = nit.next(); endPointRes = endPointNode.asResource(); endpointURI = endPointRes.getURI(); } Property sd_default_dataset = model.getProperty(URICollection.PROPERTY_SD_DEFAULT_DATA_SET); nit = model.listObjectsOfProperty(sd_default_dataset); Resource defaultDataSet = null; if( nit.hasNext() ){ RDFNode node = nit.next(); defaultDataSet = node.asResource(); } // log Property sbm_crawlLog = model.getProperty(URICollection.PROPERTY_SB_CRAWL_LOG); nit = model.listObjectsOfProperty(sbm_crawlLog); Resource crawlLogBlankNode = null; if( nit.hasNext() ){ RDFNode node = nit.next(); crawlLogBlankNode = node.asResource(); } // start Property sbm_startTime = model.getProperty(URICollection.PROPERTY_SB_CRAWL_START_TIME); nit = model.listObjectsOfProperty(sbm_startTime); startDateTime = null; if( nit.hasNext() ){ Literal startTimeLit = null; RDFNode node = nit.next(); startTimeLit = node.asLiteral(); startDateTime = ((com.hp.hpl.jena.datatypes.xsd.XSDDateTime)(startTimeLit.getValue())).asCalendar(); } // end Property sbm_endTime = model.getProperty(URICollection.PROPERTY_SB_CRAWL_END_TIME); nit = model.listObjectsOfProperty(sbm_endTime); endDateTime = null; if( nit.hasNext() ){ Literal endTimeLit = null; RDFNode node = nit.next(); endTimeLit = node.asLiteral(); endDateTime = ((com.hp.hpl.jena.datatypes.xsd.XSDDateTime)(endTimeLit.getValue())).asCalendar(); } // numTriples Property void_triples = model.getProperty(URICollection.PROPERTY_VOID_TRIPLES); nit = model.listObjectsOfProperty(defaultDataSet, void_triples); if( nit.hasNext()){ Literal numTriplesLit = nit.next().asLiteral(); numTriples = numTriplesLit.getLong(); } // numClasses Property void_classes = model.getProperty(URICollection.PROPERTY_VOID_CLASSES); nit = model.listObjectsOfProperty(defaultDataSet, void_classes); if( nit.hasNext()){ Literal numClassesLit = nit.next().asLiteral(); numClasses = numClassesLit.getLong(); } } }