Small script correction

This commit is contained in:
Kuba 2023-03-16 00:07:03 +01:00
parent 17253595a3
commit 120587d98d

View File

@ -1,7 +1,7 @@
#!/bin/bash
# Download the latest Wikipedia dump file
#wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
wget https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
# Extract the text from the dump file using native Bash commands
bzcat enwiki-latest-pages-articles.xml.bz2 | \
@ -14,4 +14,4 @@ bzcat enwiki-latest-pages-articles.xml.bz2 | \
enwiki-latest-corpus.txt
# Clean up
#rm enwiki-latest-pages-articles.xml.bz2
rm enwiki-latest-pages-articles.xml.bz2