[3] | 1 | """ |
---|
| 2 | Data Generator |
---|
| 3 | """ |
---|
| 4 | import random |
---|
| 5 | import pathfix |
---|
| 6 | from testutil import commify |
---|
| 7 | |
---|
| 8 | def fasta_generator(fname, seqnum, seqlen): |
---|
| 9 | """ |
---|
| 10 | Generates a FASTA file |
---|
| 11 | """ |
---|
| 12 | fp = file( fname, 'wt') |
---|
| 13 | seq = [ random.choice("ATGC") for x in range(seqlen) ] |
---|
| 14 | |
---|
| 15 | snum, slen = commify(seqnum ), commify(seqlen ) |
---|
| 16 | print "Fasta Generator N=%s L=%s into '%s' " % (snum, slen, fname) |
---|
| 17 | |
---|
| 18 | for id in xrange( seqnum): |
---|
| 19 | line1 = "id%06d" % id |
---|
| 20 | random.shuffle( seq ) |
---|
| 21 | line2 = "".join( seq ) |
---|
| 22 | fp.write( '>%s\n' % line1 ) |
---|
| 23 | fp.write( '%s\n' % line2 ) |
---|
| 24 | |
---|
| 25 | fp.close() |
---|
| 26 | |
---|
| 27 | def run(): |
---|
| 28 | seqnum = 10**5 |
---|
| 29 | seqlen = 10**2 |
---|
| 30 | fname = 'data/100K.fasta' |
---|
| 31 | fasta_generator( fname, seqnum, seqlen) |
---|
| 32 | |
---|
| 33 | if __name__ == '__main__': |
---|
| 34 | run() |
---|