This commit is contained in:
jakubknczny 2021-11-29 21:42:16 +01:00
commit e5ecb0eb49
3 changed files with 17 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
mt-summit-corpora

BIN
mt-summit-corpora.tar.gz Normal file

Binary file not shown.

16
prepare-dataset.sh Normal file
View File

@ -0,0 +1,16 @@
#!/bin/bash
tar xvf mt-summit-corpora.tar.gz
cut -f1 mt-summit-corpora/train/train.en-pl > mt-summit-corpora/train/train.en
cut -f2 mt-summit-corpora/train/train.en-pl > mt-summit-corpora/train/train.pl
cut -f1 mt-summit-corpora/dev/dev.en-pl > mt-summit-corpora/dev/dev.en
cut -f2 mt-summit-corpora/dev/dev.en-pl > mt-summit-corpora/dev/dev.pl
cut -f1 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora/test/constrained-scenario-test.en
cut -f2 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora/test/constrained-scenario-test.pl
cut -f2 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.pl
cut -f1 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.en
rm mt-summit-corpora/train/train.en-pl
rm mt-summit-corpora/test/*.tsv
rm mt-summit-corpora/dev/dev.en-pl