[2] | 1 | #!/usr/bin/env python |
---|
| 2 | # Kanwei Li, 2010 |
---|
| 3 | # Selects N random lines from a file and outputs to another file |
---|
| 4 | |
---|
| 5 | import random, sys |
---|
| 6 | |
---|
| 7 | def main(): |
---|
| 8 | infile = open(sys.argv[1], 'r') |
---|
| 9 | total_lines = int(sys.argv[2]) |
---|
| 10 | |
---|
| 11 | if total_lines < 1: |
---|
| 12 | sys.stderr.write( "Must select at least one line." ) |
---|
| 13 | sys.exit() |
---|
| 14 | |
---|
| 15 | kept = [] |
---|
| 16 | n = 0 |
---|
| 17 | for line in infile: |
---|
| 18 | line = line.rstrip("\n") |
---|
| 19 | n += 1 |
---|
| 20 | if (n <= total_lines): |
---|
| 21 | kept.append(line) |
---|
| 22 | elif random.randint(1, n) <= total_lines: |
---|
| 23 | kept.pop(random.randint(0, total_lines-1)) |
---|
| 24 | kept.append(line) |
---|
| 25 | |
---|
| 26 | if n < total_lines: |
---|
| 27 | sys.stderr.write( "Error: asked to select more lines than there were in the file." ) |
---|
| 28 | sys.exit() |
---|
| 29 | |
---|
| 30 | open(sys.argv[3], 'w').write( "\n".join(kept) ) |
---|
| 31 | |
---|
| 32 | if __name__ == "__main__": |
---|
| 33 | main() |
---|