1 | """ |
---|
2 | Data Generator |
---|
3 | """ |
---|
4 | import random |
---|
5 | import pathfix |
---|
6 | from testutil import commify |
---|
7 | |
---|
8 | def fasta_generator(fname, seqnum, seqlen): |
---|
9 | """ |
---|
10 | Generates a FASTA file |
---|
11 | """ |
---|
12 | fp = file( fname, 'wt') |
---|
13 | seq = [ random.choice("ATGC") for x in range(seqlen) ] |
---|
14 | |
---|
15 | snum, slen = commify(seqnum ), commify(seqlen ) |
---|
16 | print "Fasta Generator N=%s L=%s into '%s' " % (snum, slen, fname) |
---|
17 | |
---|
18 | for id in xrange( seqnum): |
---|
19 | line1 = "id%06d" % id |
---|
20 | random.shuffle( seq ) |
---|
21 | line2 = "".join( seq ) |
---|
22 | fp.write( '>%s\n' % line1 ) |
---|
23 | fp.write( '%s\n' % line2 ) |
---|
24 | |
---|
25 | fp.close() |
---|
26 | |
---|
27 | def run(): |
---|
28 | seqnum = 10**5 |
---|
29 | seqlen = 10**2 |
---|
30 | fname = 'data/100K.fasta' |
---|
31 | fasta_generator( fname, seqnum, seqlen) |
---|
32 | |
---|
33 | if __name__ == '__main__': |
---|
34 | run() |
---|