Merge branch 'master' of git.wmi.amu.edu.pl:s470607/mt-summit-corpora

This commit is contained in:
jakubknczny 2022-01-18 10:28:15 +01:00
commit 5e3a91e2e4
2 changed files with 40 additions and 0 deletions

1
prepare-dataset.sh Normal file → Executable file
View File

@ -11,6 +11,7 @@ cut -f2 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora
cut -f2 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.pl cut -f2 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.pl
cut -f1 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.en cut -f1 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.en
rm mt-summit-corpora.tar.gz
rm mt-summit-corpora/train/train.en-pl rm mt-summit-corpora/train/train.en-pl
rm mt-summit-corpora/test/*.tsv rm mt-summit-corpora/test/*.tsv
rm mt-summit-corpora/dev/dev.en-pl rm mt-summit-corpora/dev/dev.en-pl

39
prepare-gonito.sh Executable file
View File

@ -0,0 +1,39 @@
#!/bin/bash
tar xvf mt-summit-corpora.tar.gz >> /dev/null 2>&1
cut -f1 mt-summit-corpora/train/train.en-pl > mt-summit-corpora/train/train.en
cut -f2 mt-summit-corpora/train/train.en-pl > mt-summit-corpora/train/train.pl
cut -f1 mt-summit-corpora/dev/dev.en-pl > mt-summit-corpora/dev/dev.en
cut -f2 mt-summit-corpora/dev/dev.en-pl > mt-summit-corpora/dev/dev.pl
cut -f1 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora/test/constrained-scenario-test.en
cut -f2 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora/test/constrained-scenario-test.pl
cut -f2 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.pl
cut -f1 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.en
rm mt-summit-corpora.tar.gz
rm mt-summit-corpora/train/train.en-pl
rm mt-summit-corpora/test/*.tsv
rm mt-summit-corpora/dev/dev.en-pl
mv mt-summit-corpora/dev ./dev-0
mv mt-summit-corpora/train ./train
mv mt-summit-corpora/test ./test-A
rm -r mt-summit-corpora
mv dev-0/dev.en dev-0/in.tsv
mv dev-0/dev.pl dev-0/expected.tsv
mv train/train.en train/in.tsv
mv train/train.pl train/expected.tsv
cd train
tar -czf in.tar.gz in.tsv
tar -czf expected.tar.gz expected.tsv
rm expected.tsv
rm in.tsv
cd ..
mv test-A/general-scenario-test.en test-A/in.tsv
mv test-A/general-scenario-test.pl test-A/expected.tsv
mkdir test-B
mv test-A/constrained-scenario-test.en test-B/in.tsv
mv test-A/constrained-scenario-test.pl test-B/expected.tsv
mv kompendium.tsv glossary.tsv