#---------------------------- # Concordia configuration file #--------------------------- # # File containing all html tags (one per line) html_tags_path = "@RESOURCES_DIRECTORY@/tokenizer/html_tags.txt" # File containing all symbols to be replaced by spaces space_symbols_path = "@RESOURCES_DIRECTORY@/tokenizer/space_symbols.txt" # If set to true, words from predefined list are removed stop_words_enabled = "@STOP_WORDS_ENABLED@" # If stop_words_enabled is true, set the path to the stop words file #stop_words_path = "@RESOURCES_DIRECTORY@/tokenizer/stop_words.txt" # File containing regular expressions that match named entities named_entities_path = "@RESOURCES_DIRECTORY@/tokenizer/named_entities.txt" # File containing special symbols (one per line) to be removed stop_symbols_path = "@RESOURCES_DIRECTORY@/tokenizer/stop_symbols.txt" ### eof