changes in CMakeLists.txt
This commit is contained in:
parent
cf7b1592f7
commit
c3826919ba
5
TODO.txt
5
TODO.txt
@ -1,5 +1,4 @@
|
||||
---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) -----------------------------
|
||||
- concordia helper - aplikacja windowsowa, która wyszukuje w Concordii zdanie zaznaczone w dowolnej aplikacji: https://www.jayway.com/2013/02/06/how-to-get-selected-text-from-another-windows-program/
|
||||
- document analysis - concordia score should be calculated for a document. Idea - for each fragment: (fragment length/document length) * log penalty at sentence level.
|
||||
- multiple indexes based on different hashes. One can be word-net base forms, other - pos-tags and so on. Develop a method of combining results. It may be a way to implement lemmatization.
|
||||
IN PROGRESS - document the code (classes, cfg files) and update tutorial
|
||||
@ -10,6 +9,7 @@ IN PROGRESS - document the code (classes, cfg files) and update tutorial
|
||||
|
||||
|
||||
---------------------------- Archive -----------------------------
|
||||
DONE - concordia helper - aplikacja windowsowa, która wyszukuje w Concordii zdanie zaznaczone w dowolnej aplikacji: https://www.jayway.com/2013/02/06/how-to-get-selected-text-from-another-windows-program/
|
||||
DONE - rethink passing variables such as TokenizedSentence by smart pointers. Consider using references in getters.
|
||||
DONE - moving/extending concordia matches on demand - powered by concordia-server
|
||||
DONE - testy zużycia pamięci
|
||||
@ -36,7 +36,7 @@ DONE - wyłączyć stopWords
|
||||
|
||||
DONE - Przy concordia searCh dodatkowo obliczany ma być zestaw optymalnego pokrycia patternu. Może siłowo? (jeśli przyjąć max dł. zdania 500 tokenów, to nie powinno być źle)
|
||||
|
||||
DONE - wyszukiwanie zdania: wyszukanie najdłuższych pasujących fragmentów Anubisem, 1D (approximate) bin packing. Nazwijmy to concordia search. Wyszukiwane są wszystkie najdłuższe dopasowania patternu dzięki LCP search. Zwracany jest wynik w postaci listy najdłuższych dopasowanych fragmentów, posortowanych malejąco po długości, z maksymalnie 3 przedstawicielami każdej długości.
|
||||
DONE - wyszukiwanie zdania: wyszukanie najdłuższych pasujących fragmentów Anubisem, 1D (approximate) bin packing. Nazwijmy to concordia search. Wyszukiwane są wszystkie najdłuższe dopasowania patternu dzięki LCP search. Zwracany jest wynik w postaci listy najdłuższych dopasowanych fragmentów, posortowanych malejąco po długości, z maksymalnie 3 przedstawicielami każdej długości.
|
||||
|
||||
DONE 1. lokalizowane to_lower (wykorzystać utf8case, naprawić testy)
|
||||
DONE 2. anonimizacja zdań
|
||||
@ -55,4 +55,3 @@ DONE - !important! rezygnacja z ptr_vector
|
||||
DONE - zwracanie wektorów
|
||||
DONE - powyrzucać using namespace std
|
||||
DONE - profiling
|
||||
|
||||
|
@ -5,62 +5,30 @@ foreach(dir ${ALL_DIRECTORIES})
|
||||
add_subdirectory(${dir})
|
||||
endforeach(dir)
|
||||
|
||||
file(GLOB main_sources "*.cpp")
|
||||
file(GLOB common_sources "common/*.cpp")
|
||||
|
||||
|
||||
add_library(concordia SHARED
|
||||
token_annotation.cpp
|
||||
tokenized_sentence.cpp
|
||||
concordia_search_result.cpp
|
||||
matched_pattern_fragment.cpp
|
||||
concordia_searcher.cpp
|
||||
regex_rule.cpp
|
||||
sentence_tokenizer.cpp
|
||||
interval.cpp
|
||||
tm_matches.cpp
|
||||
anubis_search_result.cpp
|
||||
substring_occurence.cpp
|
||||
example.cpp
|
||||
index_searcher.cpp
|
||||
concordia_index.cpp
|
||||
word_map.cpp
|
||||
hash_generator.cpp
|
||||
concordia.cpp
|
||||
concordia_config.cpp
|
||||
concordia_exception.cpp
|
||||
common/logging.cpp
|
||||
common/utils.cpp
|
||||
common/text_utils.cpp
|
||||
${main_sources}
|
||||
${common_sources}
|
||||
)
|
||||
|
||||
add_subdirectory(t)
|
||||
# =====================================
|
||||
|
||||
install(TARGETS concordia DESTINATION lib/)
|
||||
install(FILES
|
||||
token_annotation.hpp
|
||||
tokenized_sentence.hpp
|
||||
concordia_search_result.hpp
|
||||
matched_pattern_fragment.hpp
|
||||
concordia_searcher.hpp
|
||||
regex_rule.hpp
|
||||
sentence_tokenizer.hpp
|
||||
interval.hpp
|
||||
tm_matches.hpp
|
||||
anubis_search_result.hpp
|
||||
substring_occurence.hpp
|
||||
example.hpp
|
||||
index_searcher.hpp
|
||||
concordia_index.hpp
|
||||
word_map.hpp
|
||||
hash_generator.hpp
|
||||
concordia.hpp
|
||||
concordia_config.hpp
|
||||
concordia_exception.hpp
|
||||
|
||||
|
||||
file(GLOB main_headers "*.hpp")
|
||||
file(GLOB common_headers "common/*.hpp")
|
||||
|
||||
install(FILES
|
||||
${main_headers}
|
||||
DESTINATION include/concordia/)
|
||||
|
||||
install(FILES
|
||||
common/config.hpp
|
||||
common/logging.hpp
|
||||
common/utils.hpp
|
||||
common/text_utils.hpp
|
||||
install(FILES
|
||||
${common_headers}
|
||||
DESTINATION include/concordia/common/)
|
||||
|
||||
# ----------------------------------------------------
|
||||
@ -75,11 +43,7 @@ if(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
||||
link_directories(${LIBCONFIG_LIB})
|
||||
endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
||||
|
||||
target_link_libraries(concordia config++)
|
||||
target_link_libraries(concordia log4cpp)
|
||||
target_link_libraries(concordia ${Boost_LIBRARIES})
|
||||
target_link_libraries(concordia divsufsort)
|
||||
target_link_libraries(concordia utf8case)
|
||||
target_link_libraries(concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case ${Boost_LIBRARIES})
|
||||
|
||||
if (WITH_RE2)
|
||||
target_link_libraries(concordia re2)
|
||||
@ -91,4 +55,3 @@ else(WITH_RE2)
|
||||
target_link_libraries(concordia pcrecpp)
|
||||
endif(WITH_PCRE)
|
||||
endif(WITH_RE2)
|
||||
|
||||
|
@ -1,22 +1,7 @@
|
||||
file(GLOB test_sources "*.cpp")
|
||||
|
||||
add_library(concordia-tests
|
||||
test_hash_generator.cpp
|
||||
test_regex_rule.cpp
|
||||
test_tokenized_sentence.cpp
|
||||
test_concordia_searcher.cpp
|
||||
test_sentence_tokenizer.cpp
|
||||
test_text_utils.cpp
|
||||
test_example.cpp
|
||||
test_tm_matches.cpp
|
||||
test_interval.cpp
|
||||
test_logging.cpp
|
||||
test_utils.cpp
|
||||
test_word_map.cpp
|
||||
test_concordia_index.cpp
|
||||
test_concordia_config.cpp
|
||||
test_concordia.cpp
|
||||
range_based_case_converter_tests.cpp
|
||||
simple_convert_tests.cpp
|
||||
special_casing_converter_tests.cpp
|
||||
${test_sources}
|
||||
)
|
||||
|
||||
target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common utf8case)
|
||||
|
@ -4,8 +4,8 @@ echo "CONCORDIA RUNNER: Running Concordia"
|
||||
|
||||
rm ../prod/resources/temp/*
|
||||
echo "CONCORDIA RUNNER: reading from file"
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt
|
||||
concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
|
||||
concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||
concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||
|
Loading…
Reference in New Issue
Block a user