concordia-server/concordia.cfg.in

24 lines
857 B
INI
Raw Permalink Normal View History

2015-06-09 13:01:42 +02:00
#----------------------------
# Concordia configuration file
#---------------------------
#
# File containing all html tags (one per line)
2017-04-28 13:49:30 +02:00
html_tags_path = "@RESOURCES_DIRECTORY@/tokenizer/html_tags.txt"
2015-06-09 13:01:42 +02:00
# File containing all symbols to be replaced by spaces
2017-04-28 13:49:30 +02:00
space_symbols_path = "@RESOURCES_DIRECTORY@/tokenizer/space_symbols.txt"
2015-06-09 13:01:42 +02:00
# If set to true, words from predefined list are removed
stop_words_enabled = "@STOP_WORDS_ENABLED@"
# If stop_words_enabled is true, set the path to the stop words file
2017-04-28 13:49:30 +02:00
#stop_words_path = "@RESOURCES_DIRECTORY@/tokenizer/stop_words.txt"
2015-06-09 13:01:42 +02:00
# File containing regular expressions that match named entities
2017-04-28 13:49:30 +02:00
named_entities_path = "@RESOURCES_DIRECTORY@/tokenizer/named_entities.txt"
2015-06-09 13:01:42 +02:00
# File containing special symbols (one per line) to be removed
2017-04-28 13:49:30 +02:00
stop_symbols_path = "@RESOURCES_DIRECTORY@/tokenizer/stop_symbols.txt"
2015-06-09 13:01:42 +02:00
### eof