Fixes in README

This commit is contained in:
Filip Gralinski 2020-02-19 06:55:24 +01:00
parent 561f568437
commit 40a3b26658

View File

@ -534,9 +534,9 @@ be given to generate another type of toy challenge:
challenge to be submitted. The suggested way to do this will be challenge to be submitted. The suggested way to do this will be
presented as a [Makefile](https://en.wikipedia.org/wiki/Makefile), but presented as a [Makefile](https://en.wikipedia.org/wiki/Makefile), but
of course you could use any other scripting language and the commands of course you could use any other scripting language and the commands
should be clear if you know Bash and some basic facts about Makefile: should be clear if you know Bash and some basic facts about Makefiles:
* a Makefile consists of rules, each rule specify how to build a _target_ out _dependencies_ using * a Makefile consists of rules, each rule specifies how to build a _target_ out of _dependencies_ using
shell commands shell commands
* `$@` is the (first) target, whereas `$<` — the first dependency * `$@` is the (first) target, whereas `$<` — the first dependency
* the indentation should be done with TABs, not spaces! * the indentation should be done with TABs, not spaces!
@ -550,10 +550,14 @@ SHELL=/bin/bash
# the directory where the challenge will be created # the directory where the challenge will be created
output_directory=... output_directory=...
# let's define which files are necessary, other files will be created if needed # let's define which files are necessary, other files will be created if needed;
# we'll compress the input files with xz and leave `expected.tsv` files uncompressed
# (but you could decide otherwise)
all: $(output_directory)/train/in.tsv.xz $(output_directory)/train/expected.tsv \ all: $(output_directory)/train/in.tsv.xz $(output_directory)/train/expected.tsv \
$(output_directory)/dev-0/in.tsv.xz $(output_directory)/dev-0/expected.tsv \ $(output_directory)/dev-0/in.tsv.xz $(output_directory)/dev-0/expected.tsv \
$(output_directory)/test-A/in.tsv.xz $(output_directory)/test-A/expected.tsv $(output_directory)/test-A/in.tsv.xz $(output_directory)/test-A/expected.tsv
# always validate the challenge
geval --validate --expected-directory $(output_directory)
$(output_directory)/config.txt: $(output_directory)/config.txt:
mkdir -p $(output_directory) mkdir -p $(output_directory)
@ -564,7 +568,7 @@ $(output_directory)/config.txt:
rm -f $(output_directory)/{train,dev-0,test-A}/{in,expected}.tsv rm -f $(output_directory)/{train,dev-0,test-A}/{in,expected}.tsv
# a "total" TSV containing all the data, we'll split it later # a "total" TSV containing all the data, we'll split it later
all-data.tsv.xz: some-other-files all-data.tsv.xz: prepare.py some-other-files
# the data are generated using your script, let's say prepare.py and # the data are generated using your script, let's say prepare.py and
# some other files (of course, it depends on your task); # some other files (of course, it depends on your task);
# the file will be compressed with xz # the file will be compressed with xz
@ -577,7 +581,7 @@ all-data.tsv.xz: some-other-files
# but _stable_ manner, the set into which an item is assigned should depend on the MD5 sum # but _stable_ manner, the set into which an item is assigned should depend on the MD5 sum
# of some field in the input data (a field unlikely to change). Let's assume # of some field in the input data (a field unlikely to change). Let's assume
# that you created a script `filter.py` that takes as an argument a regular expression that will be applied # that you created a script `filter.py` that takes as an argument a regular expression that will be applied
# for the MD5 sum (written in the hexadecimal format). # to the MD5 sum (written in the hexadecimal format).
$(output_directory)/train/in.tsv.xz $(output_directory)/train/expected.tsv: all-data.tsv.xz filter.py config.txt $(output_directory)/train/in.tsv.xz $(output_directory)/train/expected.tsv: all-data.tsv.xz filter.py config.txt
# 1. xzcat for decompression # 1. xzcat for decompression
@ -588,11 +592,11 @@ $(output_directory)/train/in.tsv.xz $(output_directory)/train/expected.tsv: all-
xzcat $< | ./filter.py '[0-9abcd]$' | tee >(cut -f 1 > $(output_directory)/train/expected.tsv) | cut -f 2- | xz > $@ xzcat $< | ./filter.py '[0-9abcd]$' | tee >(cut -f 1 > $(output_directory)/train/expected.tsv) | cut -f 2- | xz > $@
$(output_directory)/dev-0/in.tsv.xz $(output_directory)/dev-0/expected.tsv: all-data.tsv.xz filter.py config.txt $(output_directory)/dev-0/in.tsv.xz $(output_directory)/dev-0/expected.tsv: all-data.tsv.xz filter.py config.txt
# 1/16 of items for dev-0 set # 1/16 of items goes to dev-0 set
xzcat $< | ./filter.py 'e$' | tee >(cut -f 1 > $(output_directory)/dev-0/expected.tsv) | cut -f 2- | xz > $@ xzcat $< | ./filter.py 'e$' | tee >(cut -f 1 > $(output_directory)/dev-0/expected.tsv) | cut -f 2- | xz > $@
$(output_directory)/test-A/in.tsv.xz $(output_directory)/test-A/expected.tsv: all-data.tsv.xz filter.py config.txt $(output_directory)/test-A/in.tsv.xz $(output_directory)/test-A/expected.tsv: all-data.tsv.xz filter.py config.txt
# ( other)1/16 of items for test-A set # (other) 1/16 of items goes to test-A set
xzcat $< | ./filter.py 'f$' | tee >(cut -f 1 > $(output_directory)/test-A/expected.tsv) | cut -f 2- | xz > $@ xzcat $< | ./filter.py 'f$' | tee >(cut -f 1 > $(output_directory)/test-A/expected.tsv) | cut -f 2- | xz > $@
# wiping out the challenge, if you are desperate # wiping out the challenge, if you are desperate