concordia-library/concordia-sentence-tokenizer/concordia-sentence-tokenizer.cpp
2017-04-26 17:02:18 +02:00

71 lines
2.2 KiB
C++

#include <iostream>
#include <fstream>
#include <boost/program_options.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp>
#include "concordia/concordia_config.hpp"
#include "concordia/sentence_tokenizer.hpp"
#include "concordia/tokenized_sentence.hpp"
#include "concordia/common/config.hpp"
#include "concordia/common/utils.hpp"
namespace po = boost::program_options;
int main(int argc, char** argv) {
po::options_description desc("Allowed options");
desc.add_options()
("help,h", "Display this message")
("config,c", boost::program_options::value<std::string>(),
"Concordia configuration file (required)");
po::variables_map cli;
po::store(po::parse_command_line(argc, argv, desc), cli);
po::notify(cli);
if (cli.count("help")) {
std::cerr << desc << std::endl;
return 1;
}
std::string configFile;
if (cli.count("config")) {
configFile = cli["config"].as<std::string>();
} else {
std::cerr << "No Concordia configuration file given. Terminating."
<< std::endl;
return 1;
}
try {
boost::shared_ptr<ConcordiaConfig> config =
boost::shared_ptr<ConcordiaConfig> (
new ConcordiaConfig(configFile));
SentenceTokenizer sentenceTokenizer(config);
for (std::string line; std::getline(std::cin, line);) {
TokenizedSentence ts = sentenceTokenizer.tokenize(line);
std::cout << ts.getTokenizedSentence() << std::endl;
}
} catch(ConcordiaException & e) {
std::cerr << "ConcordiaException caught with message: "
<< std::endl
<< e.what()
<< std::endl
<< "Terminating execution."
<< std::endl;
return 1;
} catch(std::exception & e) {
std::cerr << "Unexpected exception caught with message: "
<< std::endl
<< e.what()
<< std::endl
<< "Terminating execution."
<< std::endl;
return 1;
}
return 0;
}