package org.biohackathon.SPARQLBuilder.OWL; import java.util.*; import org.biohackathon.SPARQLBuilder.endpointMetadata.MetadataManager; import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.crawler.ClassPartition; import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.crawler.ClassRelation; import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.crawler.CrawledMetadata; import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.crawler.Dataset; import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.crawler.Label; import jp.riken.accc.db.sparqlBuilderMetadata.crawler.dataStructure.sparql.crawler.PropertyPartition; //public class OWLQueryBuilderForCrawlerImpl implements OWLQueryBuilder { public class AcquiredStructureAnalyzer implements RDFSchemaAnalyzer { // key: endpointURI, value: crawled metadata private HashMap crawledMetadataTable= null; public String[] getEndpointURIs(){ if( crawledMetadataTable == null ){ return new String[0]; }else{ return crawledMetadataTable.keySet().toArray(new String[0]); } } public String[] getGraphURIs(String endpointURI){ if( crawledMetadataTable == null ){ return new String[0]; }else{ CrawledMetadata crawledMetadata = crawledMetadataTable.get(endpointURI); if( crawledMetadata == null ){ return new String[0]; }else{ return crawledMetadata.getGraphURIs(); } } } public AcquiredStructureAnalyzer(MetadataManager metadataManager){ CrawledMetadata[] cmList = metadataManager.getCrawlerMetadataList(); crawledMetadataTable = new HashMap(); if( cmList != null ){ for(CrawledMetadata cm: cmList){ String endpointURI = cm.getEndpointURI(); crawledMetadataTable.put(endpointURI, cm); } } } public SClass[] listClasses() throws Exception{ return getOWLClasses(null, null); } public SClass[] getOWLClasses(String[] keywords, String language) throws Exception{ return getOWLClassList(keywords, language).toArray(new SClass[0]); } public List getOWLClassList(String[] keywords, String language) throws Exception{ ArrayList classList = new ArrayList(); Set endpointURISet = crawledMetadataTable.keySet(); for(String endpointURI: endpointURISet){ CrawledMetadata cm = crawledMetadataTable.get(endpointURI); // default Dataset dataset = cm.getDefaultDataset(); List tempClassList = getOWLClassList(endpointURI, null, dataset, keywords, language); for(SClass sClass: tempClassList){ classList.add(sClass); } // graphs String[] graphURIs = cm.getGraphURIs(); if( graphURIs != null ){ for(String graphURI: graphURIs){ dataset = cm.getDataset(graphURI); tempClassList = getOWLClassList(endpointURI, graphURI, dataset, keywords, language); for(SClass sClass: tempClassList){ classList.add(sClass); } } } } return classList; } private List getOWLClassList(String endpointURI, String graphURI, Dataset dataset, String[] keywords, String language) throws Exception{ ArrayList results = new ArrayList(); ClassPartition[] classPartitionList = dataset.getClassPartitions(); if( classPartitionList == null || classPartitionList.length == 0 ){ return new ArrayList(); } for( ClassPartition cp: classPartitionList){ String classURI = cp.classDef.classURI; Label[] rLabels = cp.classDef.labels; org.biohackathon.SPARQLBuilder.OWL.Label[] labels = null; if( rLabels == null ){ labels = new org.biohackathon.SPARQLBuilder.OWL.Label[0]; }else{ labels = new org.biohackathon.SPARQLBuilder.OWL.Label[rLabels.length]; for(int i = 0; i < rLabels.length; i++ ) { labels[i] = new org.biohackathon.SPARQLBuilder.OWL.Label(rLabels[i].value, rLabels[i].language); } } int entities = cp.entities; if( keywords == null || keywords.length == 0 ){ SClass sClass = new SClass(classURI, labels, entities, endpointURI, graphURI); results.add(sClass); }else{ boolean hit = false; for(org.biohackathon.SPARQLBuilder.OWL.Label label: labels){ if( language == null || label.getLanguage().equals(language)){ String value = label.getLabel(); if( value != null ){ value = value.toLowerCase().trim(); for(String keyword: keywords){ if( value.contains(keyword.toLowerCase().trim())){ hit = true; break; } } } } if( hit ){ SClass sClass = new SClass(classURI, labels, entities, endpointURI, graphURI); results.add(sClass); } } } } return results; } public ClassLink[] getNextClass(String originClass, int limit) throws Exception{ ArrayList classLinkList = new ArrayList(); Set endpointURISet = crawledMetadataTable.keySet(); for(String endpointURI: endpointURISet){ CrawledMetadata cm = crawledMetadataTable.get(endpointURI); // default Dataset dataset = cm.getDefaultDataset(); List tempClassLinkList = getNextClass(endpointURI, null, dataset, originClass, limit); for(ClassLink classLink: tempClassLinkList){ classLinkList.add(classLink); } // graphs String[] graphURIs = cm.getGraphURIs(); if( graphURIs != null ){ for(String graphURI: graphURIs){ dataset = cm.getDataset(graphURI); tempClassLinkList = getNextClass(endpointURI, graphURI, dataset, originClass, limit); for(ClassLink classLink: tempClassLinkList){ classLinkList.add(classLink); } } } } return classLinkList.toArray(new ClassLink[0]); } private List getNextClass(String endpointURI, String graphURI, Dataset dataset, String originClass, int limit) throws Exception{ ArrayList classLinkList = new ArrayList(); PropertyPartition[] pps = dataset.getPropertyPartitions(); if( pps == null ){ return classLinkList; } for(PropertyPartition pp: pps){ ClassRelation[] classRelations = pp.classRelations; if( classRelations != null ){ for(ClassRelation classRelation: classRelations){ String subjClassURI = classRelation.subjectClassURI; String objClassURI = classRelation.objectClassURI; boolean forward = false; boolean reverse = false; if( objClassURI != null && objClassURI.equals(originClass) ){ if( subjClassURI != null ){ reverse = true; } } if(subjClassURI != null && subjClassURI.equals(originClass)){ if( objClassURI != null || classRelation.objectDatatypeURI != null ){ forward = true; } } ClassLink classLink = null; if( forward && !reverse ){ classLink = new ClassLink(); classLink.setDirection(Direction.forward); classLink.setNumOfOriginClassInstances(classRelation.distinctSubjects); if( objClassURI != null ){ classLink.setLinkedClassURI(objClassURI); classLink.setNumOfLinkedClassInstances(classRelation.distinctObjects); }else{ classLink.setLinkedLiteralDatatypeURI(classRelation.objectDatatypeURI); classLink.setNumOfLinkedInstances(classRelation.triples); } classLink.setNumOfOriginInstances(pp.distinctSubjects); classLink.setNumOfLinkedInstances(pp.distinctObjects); } if( !forward && reverse ){ classLink = new ClassLink(); classLink.setDirection(Direction.reverse); classLink.setLinkedClassURI(objClassURI); classLink.setNumOfOriginClassInstances(classRelation.distinctObjects); classLink.setNumOfOriginInstances(pp.distinctObjects); classLink.setNumOfLinkedInstances(pp.distinctSubjects); classLink.setNumOfLinkedClassInstances(classRelation.distinctSubjects); } if( forward && reverse){ classLink = new ClassLink(); classLink.setDirection(Direction.both); classLink.setLinkedClassURI(objClassURI); classLink.setNumOfOriginClassInstances(classRelation.distinctSubjects); classLink.setNumOfOriginInstances(pp.distinctSubjects); classLink.setNumOfLinkedInstances(pp.distinctObjects); classLink.setNumOfLinkedClassInstances(classRelation.distinctObjects); } // hit if( classLink != null ){ classLink.setEndpointURI(endpointURI); classLink.setGraphURI(graphURI); classLink.setPropertyURI(pp.propertyDef.propertyURI); classLink.setNumOfLinks(classRelation.triples); classLinkList.add(classLink); } } } } return classLinkList; } public LabelMap[] getLabels(String[] resourceURIs, String language) throws Exception { if( resourceURIs == null || resourceURIs.length == 0 ){ return new LabelMap[0]; } HashSet resourceURIset = new HashSet(); for(String resourceURI: resourceURIs){ resourceURIset.add(resourceURI); } HashMap labelMapTable = new HashMap(); Set endpointURISet = crawledMetadataTable.keySet(); for(String endpointURI: endpointURISet){ CrawledMetadata cm = crawledMetadataTable.get(endpointURI); // default Dataset dataset = cm.getDefaultDataset(); labelMapTable = getLabels(dataset, resourceURIset, language, labelMapTable); // graphs String[] graphURIs = cm.getGraphURIs(); if( graphURIs != null ){ for(String graphURI: graphURIs){ dataset = cm.getDataset(graphURI); labelMapTable = getLabels(dataset, resourceURIset, language, labelMapTable); } } } return labelMapTable.values().toArray(new LabelMap[0]); } private HashMap getLabels(Dataset dataset, HashSet resourceURISet , String language, HashMap labelMapTable) throws Exception { ClassPartition[] cps = dataset.getClassPartitions(); if( cps != null ){ for(ClassPartition cp: cps){ String uri = cp.classDef.classURI; if( resourceURISet.contains(uri)){ Label[] rLabels = cp.classDef.labels; if( rLabels != null ){ for(Label rLabel: rLabels){ if( language == null || ( rLabel.language == null || rLabel.language.equals(language))){ LabelMap labelMap = null; if( labelMapTable.containsKey(uri)){ labelMap = labelMapTable.get(uri); }else{ labelMap = new LabelMap(); labelMapTable.put(uri, labelMap); labelMap.setResourceURI(uri); } labelMap.addLabel(new org.biohackathon.SPARQLBuilder.OWL.Label(rLabel.value, rLabel.language)); } } } } } } PropertyPartition[] pps = dataset.getPropertyPartitions(); if( pps != null ){ for(PropertyPartition pp: pps){ String uri = pp.propertyDef.propertyURI; if( resourceURISet.contains(uri)){ Label[] rLabels = pp.propertyDef.labels; if( rLabels != null ){ for(Label rLabel: rLabels){ if( language == null || ( rLabel.language == null || rLabel.language.equals(language))){ LabelMap labelMap = null; if( labelMapTable.containsKey(uri)){ labelMap = labelMapTable.get(uri); }else{ labelMap = new LabelMap(); labelMapTable.put(uri, labelMap); labelMap.setResourceURI(uri); } labelMap.addLabel(new org.biohackathon.SPARQLBuilder.OWL.Label(rLabel.value, rLabel.language)); } } } } } } return labelMapTable; } }