| 1 | echo "This will take several hours to finish due to the size of the databases (about 30GB)..." |
|---|
| 2 | echo "Getting nt database from NCBI..." |
|---|
| 3 | wget ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nt.gz |
|---|
| 4 | echo "Changing fasta title lines to >ginumber_seqlength..." |
|---|
| 5 | echo "Formatting nt database to chunks of 2GB each..." |
|---|
| 6 | gunzip -c nt.gz | python convert_title.py | formatdb -i stdin -p F -n "nt.chunk" -v 2000 |
|---|
| 7 | echo "Remove the zip file, keep the formatted files." |
|---|
| 8 | rm nt.gz |
|---|
| 9 | |
|---|
| 10 | echo "Getting wgs database from NCBI..." |
|---|
| 11 | wget ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/wgs.gz |
|---|
| 12 | echo "Changing fasta title lines to >ginumber_seqlength..." |
|---|
| 13 | echo "Formatting wgs database to chunks of 2GB each..." |
|---|
| 14 | gunzip -c wgs.gz | python convert_title.py | formatdb -i stdin -p F -n "wgs.chunk" -v 2000 |
|---|
| 15 | echo "Remove the zip file, keep the formatted files." |
|---|
| 16 | rm wgs.gz |
|---|
| 17 | |
|---|
| 18 | echo "Job finished" |
|---|