Small script correction

This commit is contained in:
Kuba 2023-03-16 00:07:03 +01:00
parent 17253595a3
commit 120587d98d

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# Download the latest Wikipedia dump file # Download the latest Wikipedia dump file
#wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
# Extract the text from the dump file using native Bash commands # Extract the text from the dump file using native Bash commands
bzcat enwiki-latest-pages-articles.xml.bz2 | \ bzcat enwiki-latest-pages-articles.xml.bz2 | \
@ -14,4 +14,4 @@ bzcat enwiki-latest-pages-articles.xml.bz2 | \
enwiki-latest-corpus.txt enwiki-latest-corpus.txt
# Clean up # Clean up
#rm enwiki-latest-pages-articles.xml.bz2 rm enwiki-latest-pages-articles.xml.bz2