[2] | 1 | #!/usr/bin/env python |
---|
| 2 | """ |
---|
| 3 | Fetch jobs using gops_intersect, gops_merge, gops_subtract, gops_complement, gops_coverage |
---|
| 4 | wherein the second dataset doesn't have chr, start and end in standard columns 1, 2 and 3. |
---|
| 5 | """ |
---|
| 6 | |
---|
| 7 | from galaxy import eggs |
---|
| 8 | import sys, os, ConfigParser, tempfile |
---|
| 9 | import galaxy.app |
---|
| 10 | import galaxy.model.mapping |
---|
| 11 | import pkg_resources |
---|
| 12 | |
---|
| 13 | pkg_resources.require( "SQLAlchemy >= 0.4" ) |
---|
| 14 | import sqlalchemy as sa |
---|
| 15 | |
---|
| 16 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 17 | |
---|
| 18 | class TestApplication( object ): |
---|
| 19 | """Encapsulates the state of a Universe application""" |
---|
| 20 | def __init__( self, database_connection=None, file_path=None ): |
---|
| 21 | print >> sys.stderr, "python path is: " + ", ".join( sys.path ) |
---|
| 22 | if database_connection is None: |
---|
| 23 | raise Exception( "CleanupDatasetsApplication requires a database_connection value" ) |
---|
| 24 | if file_path is None: |
---|
| 25 | raise Exception( "CleanupDatasetsApplication requires a file_path value" ) |
---|
| 26 | self.database_connection = database_connection |
---|
| 27 | self.file_path = file_path |
---|
| 28 | # Setup the database engine and ORM |
---|
| 29 | self.model = galaxy.model.mapping.init( self.file_path, self.database_connection, engine_options={}, create_tables=False ) |
---|
| 30 | |
---|
| 31 | def main(): |
---|
| 32 | ini_file = sys.argv[1] |
---|
| 33 | conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} ) |
---|
| 34 | conf_parser.read( ini_file ) |
---|
| 35 | configuration = {} |
---|
| 36 | for key, value in conf_parser.items( "app:main" ): |
---|
| 37 | configuration[key] = value |
---|
| 38 | database_connection = configuration['database_connection'] |
---|
| 39 | file_path = configuration['file_path'] |
---|
| 40 | app = TestApplication( database_connection=database_connection, file_path=file_path ) |
---|
| 41 | jobs = {} |
---|
| 42 | try: |
---|
| 43 | for job in app.model.Job.filter( sa.and_( app.model.Job.table.c.create_time.between( '2008-05-23', '2008-11-29' ), |
---|
| 44 | app.model.Job.table.c.state == 'ok', |
---|
| 45 | sa.or_( |
---|
| 46 | sa.and_( sa.or_( app.model.Job.table.c.tool_id == 'gops_intersect_1', |
---|
| 47 | app.model.Job.table.c.tool_id == 'gops_subtract_1', |
---|
| 48 | app.model.Job.table.c.tool_id == 'gops_coverage_1', |
---|
| 49 | ), |
---|
| 50 | sa.not_( app.model.Job.table.c.command_line.like( '%-2 1,2,3%' ) ) |
---|
| 51 | ), |
---|
| 52 | sa.and_( sa.or_( app.model.Job.table.c.tool_id == 'gops_complement_1', |
---|
| 53 | app.model.Job.table.c.tool_id == 'gops_merge_1', |
---|
| 54 | ), |
---|
| 55 | sa.not_( app.model.Job.table.c.command_line.like( '%-1 1,2,3%' ) ) |
---|
| 56 | ) |
---|
| 57 | ) |
---|
| 58 | ) |
---|
| 59 | ).all(): |
---|
| 60 | print "# processing job id %s" % str( job.id ) |
---|
| 61 | for jtoda in job.output_datasets: |
---|
| 62 | print "# --> processing JobToOutputDatasetAssociation id %s" % str( jtoda.id ) |
---|
| 63 | hda = app.model.HistoryDatasetAssociation.get( jtoda.dataset_id ) |
---|
| 64 | print "# ----> processing HistoryDatasetAssociation id %s" % str( hda.id ) |
---|
| 65 | if not hda.deleted: |
---|
| 66 | # Probably don't need this check, since the job state should suffice, but... |
---|
| 67 | if hda.dataset.state == 'ok': |
---|
| 68 | history = app.model.History.get( hda.history_id ) |
---|
| 69 | print "# ------> processing history id %s" % str( history.id ) |
---|
| 70 | if history.user_id: |
---|
| 71 | cmd_line = str( job.command_line ) |
---|
| 72 | new_output = tempfile.NamedTemporaryFile('w') |
---|
| 73 | if job.tool_id in ['gops_intersect_1','gops_subtract_1','gops_coverage_1']: |
---|
| 74 | new_cmd_line = " ".join(map(str,cmd_line.split()[:4])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[5:])) |
---|
| 75 | job_output = cmd_line.split()[4] |
---|
| 76 | else: |
---|
| 77 | new_cmd_line = " ".join(map(str,cmd_line.split()[:3])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[4:])) |
---|
| 78 | job_output = cmd_line.split()[3] |
---|
| 79 | try: |
---|
| 80 | os.system(new_cmd_line) |
---|
| 81 | except: |
---|
| 82 | pass |
---|
| 83 | diff_status = os.system('diff %s %s >> /dev/null' %(new_output.name, job_output)) |
---|
| 84 | if diff_status == 0: |
---|
| 85 | continue |
---|
| 86 | print "# --------> Outputs differ" |
---|
| 87 | user = app.model.User.get( history.user_id ) |
---|
| 88 | jobs[ job.id ] = {} |
---|
| 89 | jobs[ job.id ][ 'hda_id' ] = hda.id |
---|
| 90 | jobs[ job.id ][ 'hda_name' ] = hda.name |
---|
| 91 | jobs[ job.id ][ 'hda_info' ] = hda.info |
---|
| 92 | jobs[ job.id ][ 'history_id' ] = history.id |
---|
| 93 | jobs[ job.id ][ 'history_name' ] = history.name |
---|
| 94 | jobs[ job.id ][ 'history_update_time' ] = history.update_time |
---|
| 95 | jobs[ job.id ][ 'user_email' ] = user.email |
---|
| 96 | except Exception, e: |
---|
| 97 | print "# caught exception: %s" % str( e ) |
---|
| 98 | |
---|
| 99 | print "\n\n# Number of incorrect Jobs: %d\n\n" % ( len( jobs ) ) |
---|
| 100 | print "#job_id\thda_id\thda_name\thda_info\thistory_id\thistory_name\thistory_update_time\tuser_email" |
---|
| 101 | for jid in jobs: |
---|
| 102 | print '%s\t%s\t"%s"\t"%s"\t%s\t"%s"\t"%s"\t%s' % \ |
---|
| 103 | ( str( jid ), |
---|
| 104 | str( jobs[ jid ][ 'hda_id' ] ), |
---|
| 105 | jobs[ jid ][ 'hda_name' ], |
---|
| 106 | jobs[ jid ][ 'hda_info' ], |
---|
| 107 | str( jobs[ jid ][ 'history_id' ] ), |
---|
| 108 | jobs[ jid ][ 'history_name' ], |
---|
| 109 | jobs[ jid ][ 'history_update_time' ], |
---|
| 110 | jobs[ jid ][ 'user_email' ] |
---|
| 111 | ) |
---|
| 112 | sys.exit(0) |
---|
| 113 | |
---|
| 114 | if __name__ == "__main__": |
---|
| 115 | main() |
---|