concordia-server/tests/lemmatizer-test/tokenize.sh

8 lines
239 B
Bash
Raw Permalink Normal View History

2018-12-31 11:13:16 +01:00
#!/bin/bash
for corpus_file in `ls /mnt/storage/rjawor_storage/copycat_corpus/cleaned/*txt`
do
a=`basename $corpus_file`
concordia-sentence-tokenizer -c /home/rjawor/concordia-server/concordia.cfg < $corpus_file > corpora/$a
done