diff --git a/TaskE05/description.txt b/TaskE05/description.txt new file mode 100644 index 0000000..28dbf83 --- /dev/null +++ b/TaskE05/description.txt @@ -0,0 +1,21 @@ +Text normalization for a TTS +============================ + +The task is to write a Thrax grammar for normalizing text for a +text-to-speech system, i.e. the text should be converted to a form +closer to speech (but without phonetic transcription). + +For instance, "I bought 21 books from prof. Smith" should +be transformed into "i bought twenty-one books from professor smith" + +You should: + +- convert numbers 0-99 into words +- convert Roman numbers I-X into adjectives (the first, the second, etc.) +- expand abbreviations: "e.g." ("for example"), , "prof." ("professor"), "dr." ("doctor"), + "p." ("page"), "pp." ("pages") +- remove punctuations (except for hyphen) +- lower-case everything + +POINTS: 10 +DEADLINE: 2020-01-28 23:59 diff --git a/TaskE05/test.exp b/TaskE05/test.exp new file mode 100644 index 0000000..f38a27d --- /dev/null +++ b/TaskE05/test.exp @@ -0,0 +1,8 @@ +Input string: Output string: i bought twenty-one books from professor smith +Input string: Output string: doctor +Input string: Output string: for example world war the second and other things +Input string: Output string: ninety-nine helium-oxygen balloons +Input string: Output string: four four four +Input string: Output string: nothing to normalize +Input string: Output string: thirteen +Input string: \ No newline at end of file diff --git a/TaskE05/test.in b/TaskE05/test.in new file mode 100644 index 0000000..7c5d370 --- /dev/null +++ b/TaskE05/test.in @@ -0,0 +1,7 @@ +I bought 21 books from prof. Smith! +dr. +E.g. World War II, and other things. +99 helium-oxygen balloons +4 4 4 +nothing to normalize +13 diff --git a/blend.sh b/blend.sh index 738bc75..e69bb8c 100755 --- a/blend.sh +++ b/blend.sh @@ -19,7 +19,7 @@ cp "${PREFIX}/count-points.pl" arena/ cp "${PREFIX}/overrides.txt" arena/ cp "${PREFIX}/Makefile" arena/ -for TX in X01 X02 X03 X04 X05 X06 X07 B00 B01 B02 B03 B04 B05 B06 C00 C01 C02 C03 C04 C05 C06 E01 E02 E03 E04 # X05 X06 X07 X08 X09 X10 B03 B04 X10 +for TX in X01 X02 X03 X04 X05 X06 X07 B00 B01 B02 B03 B04 B05 B06 C00 C01 C02 C03 C04 C05 C06 E01 E02 E03 E04 E05 # X05 X06 X07 X08 X09 X10 B03 B04 X10 do mkdir -p arena/Task$TX done