Small script correction
This commit is contained in:
parent
17253595a3
commit
120587d98d
4
wiki.sh
4
wiki.sh
@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Download the latest Wikipedia dump file
|
# Download the latest Wikipedia dump file
|
||||||
#wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
|
wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
|
||||||
|
|
||||||
# Extract the text from the dump file using native Bash commands
|
# Extract the text from the dump file using native Bash commands
|
||||||
bzcat enwiki-latest-pages-articles.xml.bz2 | \
|
bzcat enwiki-latest-pages-articles.xml.bz2 | \
|
||||||
@ -14,4 +14,4 @@ bzcat enwiki-latest-pages-articles.xml.bz2 | \
|
|||||||
enwiki-latest-corpus.txt
|
enwiki-latest-corpus.txt
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
#rm enwiki-latest-pages-articles.xml.bz2
|
rm enwiki-latest-pages-articles.xml.bz2
|
||||||
|
Loading…
Reference in New Issue
Block a user