| 1 | """ |
|---|
| 2 | XML format classes |
|---|
| 3 | """ |
|---|
| 4 | import data |
|---|
| 5 | import logging |
|---|
| 6 | from galaxy.datatypes.sniff import * |
|---|
| 7 | |
|---|
| 8 | log = logging.getLogger(__name__) |
|---|
| 9 | |
|---|
| 10 | class BlastXml( data.Text ): |
|---|
| 11 | """NCBI Blast XML Output data""" |
|---|
| 12 | file_ext = "blastxml" |
|---|
| 13 | |
|---|
| 14 | def set_peek( self, dataset, is_multi_byte=False ): |
|---|
| 15 | """Set the peek and blurb text""" |
|---|
| 16 | if not dataset.dataset.purged: |
|---|
| 17 | dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
|---|
| 18 | dataset.blurb = 'NCBI Blast XML data' |
|---|
| 19 | else: |
|---|
| 20 | dataset.peek = 'file does not exist' |
|---|
| 21 | dataset.blurb = 'file purged from disk' |
|---|
| 22 | def sniff( self, filename ): |
|---|
| 23 | """ |
|---|
| 24 | Determines whether the file is blastxml |
|---|
| 25 | |
|---|
| 26 | >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) |
|---|
| 27 | >>> BlastXml().sniff( fname ) |
|---|
| 28 | True |
|---|
| 29 | >>> fname = get_test_fname( 'interval.interval' ) |
|---|
| 30 | >>> BlastXml().sniff( fname ) |
|---|
| 31 | False |
|---|
| 32 | """ |
|---|
| 33 | blastxml_header = [ '<?xml version="1.0"?>', |
|---|
| 34 | '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', |
|---|
| 35 | '<BlastOutput>' ] |
|---|
| 36 | for i, line in enumerate( file( filename ) ): |
|---|
| 37 | if i >= len( blastxml_header ): |
|---|
| 38 | return True |
|---|
| 39 | line = line.rstrip( '\n\r' ) |
|---|
| 40 | if line != blastxml_header[ i ]: |
|---|
| 41 | return False |
|---|