From f5b51392775afe389301ac73696855df174bc639 Mon Sep 17 00:00:00 2001 From: Jakub Konieczny Date: Tue, 30 Nov 2021 11:20:38 +0000 Subject: [PATCH 1/4] add prepare-dataset --- prepare-dataset.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 prepare-dataset.sh diff --git a/prepare-dataset.sh b/prepare-dataset.sh old mode 100644 new mode 100755 From 5917c2799cb125be5d12d5cf166678571925daef Mon Sep 17 00:00:00 2001 From: Jakub Konieczny Date: Fri, 14 Jan 2022 18:52:41 +0100 Subject: [PATCH 2/4] adjust prepare script --- prepare-dataset.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/prepare-dataset.sh b/prepare-dataset.sh index e368ef9..14a9aac 100755 --- a/prepare-dataset.sh +++ b/prepare-dataset.sh @@ -11,6 +11,7 @@ cut -f2 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora cut -f2 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.pl cut -f1 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.en +rm mt-summit-corpora.tar.gz rm mt-summit-corpora/train/train.en-pl rm mt-summit-corpora/test/*.tsv rm mt-summit-corpora/dev/dev.en-pl From a4b31d2f0c9018f2ee81304a2a2057debd579a64 Mon Sep 17 00:00:00 2001 From: jakubknczny Date: Sat, 15 Jan 2022 22:21:56 +0100 Subject: [PATCH 3/4] add prepare-gonito --- prepare-gonito.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100755 prepare-gonito.sh diff --git a/prepare-gonito.sh b/prepare-gonito.sh new file mode 100755 index 0000000..02e20b3 --- /dev/null +++ b/prepare-gonito.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +tar xvf mt-summit-corpora.tar.gz >> /dev/null 2>&1 + +cut -f1 mt-summit-corpora/train/train.en-pl > mt-summit-corpora/train/train.en +cut -f2 mt-summit-corpora/train/train.en-pl > mt-summit-corpora/train/train.pl +cut -f1 mt-summit-corpora/dev/dev.en-pl > mt-summit-corpora/dev/dev.en +cut -f2 mt-summit-corpora/dev/dev.en-pl > mt-summit-corpora/dev/dev.pl +cut -f1 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora/test/constrained-scenario-test.en +cut -f2 mt-summit-corpora/test/constrained-scenario-test.tsv > mt-summit-corpora/test/constrained-scenario-test.pl +cut -f2 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.pl +cut -f1 mt-summit-corpora/test/general-scenario-test.tsv > mt-summit-corpora/test/general-scenario-test.en + +rm mt-summit-corpora.tar.gz +rm mt-summit-corpora/train/train.en-pl +rm mt-summit-corpora/test/*.tsv +rm mt-summit-corpora/dev/dev.en-pl + +mv mt-summit-corpora/dev ./dev-0 +mv mt-summit-corpora/train ./train +mv mt-summit-corpora/test ./test-A +rm -r mt-summit-corpora + +mv dev-0/dev.en dev-0/in.tsv +mv dev-0/dev.pl dev-0/expected.tsv +mv train/train.en train/in.tsv +mv train/train.pl train/expected.tsv +mv test-A/general-scenario-test.en test-A/in.tsv +mv test-A/general-scenario-test.pl test-A/expected.tsv +mkdir test-B +mv test-A/constrained-scenario-test.en test-B/in.tsv +mv test-A/constrained-scenario-test.pl test-B/expected.tsv +mv kompendium.tsv glossary.tsv + From b10119c69a13630f5422d7e0cf617982cfb81b28 Mon Sep 17 00:00:00 2001 From: jakubknczny Date: Sat, 15 Jan 2022 22:42:55 +0100 Subject: [PATCH 4/4] tar train gonito --- prepare-gonito.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/prepare-gonito.sh b/prepare-gonito.sh index 02e20b3..0259f42 100755 --- a/prepare-gonito.sh +++ b/prepare-gonito.sh @@ -25,10 +25,15 @@ mv dev-0/dev.en dev-0/in.tsv mv dev-0/dev.pl dev-0/expected.tsv mv train/train.en train/in.tsv mv train/train.pl train/expected.tsv +cd train +tar -czf in.tar.gz in.tsv +tar -czf expected.tar.gz expected.tsv +rm expected.tsv +rm in.tsv +cd .. mv test-A/general-scenario-test.en test-A/in.tsv mv test-A/general-scenario-test.pl test-A/expected.tsv mkdir test-B mv test-A/constrained-scenario-test.en test-B/in.tsv mv test-A/constrained-scenario-test.pl test-B/expected.tsv mv kompendium.tsv glossary.tsv -