[2] | 1 |
|
---|
| 2 | def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
|
---|
| 3 | # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to
|
---|
| 4 | # set data.name in exec_after_process().
|
---|
| 5 | microbe_info= {}
|
---|
| 6 | orgs = {}
|
---|
| 7 |
|
---|
| 8 | filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR
|
---|
| 9 | for i, line in enumerate( open( filename ) ):
|
---|
| 10 | line = line.rstrip( '\r\n' )
|
---|
| 11 | if line and not line.startswith( '#' ):
|
---|
| 12 | fields = line.split( sep )
|
---|
| 13 | #read each line, if not enough fields, go to next line
|
---|
| 14 | try:
|
---|
| 15 | info_type = fields.pop(0)
|
---|
| 16 | if info_type.upper() == "ORG":
|
---|
| 17 | #ORG 12521 Clostridium perfringens SM101 bacteria Firmicutes CP000312,CP000313,CP000314,CP000315 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521
|
---|
| 18 | org_num = fields.pop(0)
|
---|
| 19 | name = fields.pop(0)
|
---|
| 20 | kingdom = fields.pop(0)
|
---|
| 21 | group = fields.pop(0)
|
---|
| 22 | chromosomes = fields.pop(0)
|
---|
| 23 | info_url = fields.pop(0)
|
---|
| 24 | link_site = fields.pop(0)
|
---|
| 25 | if org_num not in orgs:
|
---|
| 26 | orgs[ org_num ] = {}
|
---|
| 27 | orgs[ org_num ][ 'chrs' ] = {}
|
---|
| 28 | orgs[ org_num ][ 'name' ] = name
|
---|
| 29 | orgs[ org_num ][ 'kingdom' ] = kingdom
|
---|
| 30 | orgs[ org_num ][ 'group' ] = group
|
---|
| 31 | orgs[ org_num ][ 'chromosomes' ] = chromosomes
|
---|
| 32 | orgs[ org_num ][ 'info_url' ] = info_url
|
---|
| 33 | orgs[ org_num ][ 'link_site' ] = link_site
|
---|
| 34 | elif info_type.upper() == "CHR":
|
---|
| 35 | #CHR 12521 CP000315 Clostridium perfringens phage phiSM101, complete genome 38092 110684521 CP000315.1
|
---|
| 36 | org_num = fields.pop(0)
|
---|
| 37 | chr_acc = fields.pop(0)
|
---|
| 38 | name = fields.pop(0)
|
---|
| 39 | length = fields.pop(0)
|
---|
| 40 | gi = fields.pop(0)
|
---|
| 41 | gb = fields.pop(0)
|
---|
| 42 | info_url = fields.pop(0)
|
---|
| 43 | chr = {}
|
---|
| 44 | chr[ 'name' ] = name
|
---|
| 45 | chr[ 'length' ] = length
|
---|
| 46 | chr[ 'gi' ] = gi
|
---|
| 47 | chr[ 'gb' ] = gb
|
---|
| 48 | chr[ 'info_url' ] = info_url
|
---|
| 49 | if org_num not in orgs:
|
---|
| 50 | orgs[ org_num ] = {}
|
---|
| 51 | orgs[ org_num ][ 'chrs' ] = {}
|
---|
| 52 | orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr
|
---|
| 53 | elif info_type.upper() == "DATA":
|
---|
| 54 | #DATA 12521_12521_CDS 12521 CP000315 CDS bed /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed
|
---|
| 55 | uid = fields.pop(0)
|
---|
| 56 | org_num = fields.pop(0)
|
---|
| 57 | chr_acc = fields.pop(0)
|
---|
| 58 | feature = fields.pop(0)
|
---|
| 59 | filetype = fields.pop(0)
|
---|
| 60 | path = fields.pop(0)
|
---|
| 61 | data = {}
|
---|
| 62 | data[ 'filetype' ] = filetype
|
---|
| 63 | data[ 'path' ] = path
|
---|
| 64 | data[ 'feature' ] = feature
|
---|
| 65 |
|
---|
| 66 | if org_num not in orgs:
|
---|
| 67 | orgs[ org_num ] = {}
|
---|
| 68 | orgs[ org_num ][ 'chrs' ] = {}
|
---|
| 69 | if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]:
|
---|
| 70 | orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {}
|
---|
| 71 | orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data
|
---|
| 72 | else: continue
|
---|
| 73 | except: continue
|
---|
| 74 | for org_num in orgs:
|
---|
| 75 | org = orgs[ org_num ]
|
---|
| 76 | if org[ 'kingdom' ] not in microbe_info:
|
---|
| 77 | microbe_info[ org[ 'kingdom' ] ] = {}
|
---|
| 78 | if org_num not in microbe_info[ org[ 'kingdom' ] ]:
|
---|
| 79 | microbe_info[ org[ 'kingdom' ] ][org_num] = org
|
---|
| 80 | return microbe_info
|
---|
| 81 |
|
---|
| 82 | #post processing, set build for data and add additional data to history
|
---|
| 83 | from galaxy import datatypes, config, jobs, tools
|
---|
| 84 | from shutil import copyfile
|
---|
| 85 |
|
---|
| 86 | def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
|
---|
| 87 | base_dataset = out_data.items()[0][1]
|
---|
| 88 | history = base_dataset.history
|
---|
| 89 | if history == None:
|
---|
| 90 | print "unknown history!"
|
---|
| 91 | return
|
---|
| 92 | kingdom = param_dict.get( 'kingdom', None )
|
---|
| 93 | #group = param_dict.get( 'group', None )
|
---|
| 94 | org = param_dict.get( 'org', None )
|
---|
| 95 |
|
---|
| 96 | #if not (kingdom or group or org):
|
---|
| 97 | if not (kingdom or org):
|
---|
| 98 | print "Parameters are not available."
|
---|
| 99 | #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values |
---|
| 100 | if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ): |
---|
| 101 | kingdom = kingdom.value
|
---|
| 102 | if isinstance( org, tools.parameters.basic.UnvalidatedValue ): |
---|
| 103 | org = org.value |
---|
| 104 | |
---|
| 105 | GALAXY_DATA_INDEX_DIR = app.config.tool_data_path
|
---|
| 106 | microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' )
|
---|
| 107 | new_stdout = ""
|
---|
| 108 | split_stdout = stdout.split("\n")
|
---|
| 109 | basic_name = ""
|
---|
| 110 | for line in split_stdout:
|
---|
| 111 | fields = line.split("\t")
|
---|
| 112 | if fields[0] == "#File1":
|
---|
| 113 | description = fields[1]
|
---|
| 114 | chr = fields[2]
|
---|
| 115 | dbkey = fields[3]
|
---|
| 116 | file_type = fields[4]
|
---|
| 117 | name, data = out_data.items()[0]
|
---|
| 118 | data.set_size()
|
---|
| 119 | basic_name = data.name
|
---|
| 120 | data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")"
|
---|
| 121 | data.dbkey = dbkey
|
---|
| 122 | data.info = data.name
|
---|
| 123 | data = app.datatypes_registry.change_datatype( data, file_type )
|
---|
| 124 | data.init_meta()
|
---|
| 125 | data.set_peek()
|
---|
| 126 | app.model.context.add( data )
|
---|
| 127 | app.model.context.flush()
|
---|
| 128 | elif fields[0] == "#NewFile":
|
---|
| 129 | description = fields[1]
|
---|
| 130 | chr = fields[2]
|
---|
| 131 | dbkey = fields[3]
|
---|
| 132 | filepath = fields[4]
|
---|
| 133 | file_type = fields[5]
|
---|
| 134 | newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library
|
---|
| 135 | newdata.set_size()
|
---|
| 136 | newdata.extension = file_type
|
---|
| 137 | newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")"
|
---|
| 138 | app.model.context.add( newdata )
|
---|
| 139 | app.model.context.flush()
|
---|
| 140 | app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset )
|
---|
| 141 | history.add_dataset( newdata )
|
---|
| 142 | app.model.context.add( history )
|
---|
| 143 | app.model.context.flush()
|
---|
| 144 | try:
|
---|
| 145 | copyfile(filepath,newdata.file_name)
|
---|
| 146 | newdata.info = newdata.name
|
---|
| 147 | newdata.state = jobs.JOB_OK
|
---|
| 148 | except:
|
---|
| 149 | newdata.info = "The requested file is missing from the system."
|
---|
| 150 | newdata.state = jobs.JOB_ERROR
|
---|
| 151 | newdata.dbkey = dbkey
|
---|
| 152 | newdata.init_meta()
|
---|
| 153 | newdata.set_peek()
|
---|
| 154 | app.model.context.flush()
|
---|