1 | #!/usr/bin/env python |
---|
2 | # Kanwei Li, 2010 |
---|
3 | # Selects N random lines from a file and outputs to another file |
---|
4 | |
---|
5 | import random, sys |
---|
6 | |
---|
7 | def main(): |
---|
8 | infile = open(sys.argv[1], 'r') |
---|
9 | total_lines = int(sys.argv[2]) |
---|
10 | |
---|
11 | if total_lines < 1: |
---|
12 | sys.stderr.write( "Must select at least one line." ) |
---|
13 | sys.exit() |
---|
14 | |
---|
15 | kept = [] |
---|
16 | n = 0 |
---|
17 | for line in infile: |
---|
18 | line = line.rstrip("\n") |
---|
19 | n += 1 |
---|
20 | if (n <= total_lines): |
---|
21 | kept.append(line) |
---|
22 | elif random.randint(1, n) <= total_lines: |
---|
23 | kept.pop(random.randint(0, total_lines-1)) |
---|
24 | kept.append(line) |
---|
25 | |
---|
26 | if n < total_lines: |
---|
27 | sys.stderr.write( "Error: asked to select more lines than there were in the file." ) |
---|
28 | sys.exit() |
---|
29 | |
---|
30 | open(sys.argv[3], 'w').write( "\n".join(kept) ) |
---|
31 | |
---|
32 | if __name__ == "__main__": |
---|
33 | main() |
---|