diff --git a/CONCORDIA_OPTIMAL_MATCH.txt b/CONCORDIA_OPTIMAL_MATCH.txt deleted file mode 100644 index 751ac6d..0000000 --- a/CONCORDIA_OPTIMAL_MATCH.txt +++ /dev/null @@ -1,25 +0,0 @@ -S = set of concordia results -maxCoverage = empty // max coverage - -/* -getPossibleCoverages - param A - set of concordia results, current coverage - return isTerminal - returns true if nothing from S can be added to A -*/ -void getPossibleCoverages(A) { - allTerminal = true - for s in S: // to consider - sort intervals in S and always search from the last interval in A - // however - how to sort the intervals? maybe by their ends? - if not A intersects {s} // given the above, this check would only require to check if s overlaps with the last interval in A - getPossibleCoverages(A+{s}) - allTerminal = false - - if allTerminal then - score = scoreCoverage(A) - if score > scoreCoverage(maxCoverage) - maxCoverage = A - -} - - - diff --git a/TODO.txt b/TODO.txt index f1fc806..848cce2 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,3 +1,6 @@ +---------------------------- Developer's private notes ----------------------------- + + IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie? - testy zużycia pamięci - Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła. diff --git a/concordia-anubissearch-jrc.sh b/concordia-anubissearch-jrc.sh deleted file mode 100755 index 9988208..0000000 --- a/concordia-anubissearch-jrc.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - - -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -a "$1" - diff --git a/concordia-clear-jrc.sh b/concordia-clear-jrc.sh deleted file mode 100755 index 55427ae..0000000 --- a/concordia-clear-jrc.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -rm prod/resources/temp/* -rm prod/resources/text-files/jrc_smaller.txt diff --git a/concordia-concordiasearch-jrc.sh b/concordia-concordiasearch-jrc.sh deleted file mode 100755 index 57cce90..0000000 --- a/concordia-concordiasearch-jrc.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - - -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -x "$1" - diff --git a/concordia-index-jrc.sh b/concordia-index-jrc.sh deleted file mode 100755 index 197026f..0000000 --- a/concordia-index-jrc.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -echo "CONCORDIA INDEXER: Decompressing test file" - -xz -dk prod/resources/text-files/jrc_smaller.txt.xz - -echo "CONCORDIA INDEXER: Running Concordia" - -rm prod/resources/temp/* - -echo "CONCORDIA INDEXER: reading from file" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/jrc_smaller.txt - - diff --git a/concordia-runner-jrc.sh b/concordia-runner-jrc.sh deleted file mode 100755 index f9622dc..0000000 --- a/concordia-runner-jrc.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh - -echo "CONCORDIA RUNNER: Decompressing test file" - -xz -dk prod/resources/text-files/jrc_smaller.txt.xz - -echo "CONCORDIA RUNNER: Running Concordia" - -rm prod/resources/temp/* - -echo "CONCORDIA RUNNER: reading from file" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/jrc_smaller.txt - - -echo "CONCORDIA RUNNER: concordia searching for pattern: \"Współpraca Państw Członkowskich i Komisji Europejskiej\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej" -echo "CONCORDIA RUNNER: concordia searching for pattern: \"8. W odniesieniu do artykułu 45 ustęp 12\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12" -echo "CONCORDIA RUNNER: concordia searching for pattern: \"Prawo europejskie umożliwia handel zagraniczny\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny" - - -echo "CONCORDIA RUNNER: searching for pattern: \"Parlamentu Europejskiego\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n -echo "CONCORDIA RUNNER: searching for pattern: \"Dostęp do zatrudnienia\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n -echo "CONCORDIA RUNNER: searching for pattern: \"Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n - -#rm prod/resources/text-files/jrc_smaller.txt diff --git a/concordia-runner-large.sh b/concordia-runner-large.sh deleted file mode 100755 index 60dcb0c..0000000 --- a/concordia-runner-large.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -echo "CONCORDIA RUNNER: Decompressing test file" - -bunzip2 --keep prod/resources/text-files/large.txt.bz2 - -echo "CONCORDIA RUNNER: Running Concordia" - -rm prod/resources/temp/* - -echo "CONCORDIA RUNNER: reading from file" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/large.txt -echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n -echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n - -rm prod/resources/text-files/large.txt diff --git a/concordia-runner.sh b/concordia-runner.sh deleted file mode 100755 index 63898d6..0000000 --- a/concordia-runner.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -echo "CONCORDIA RUNNER: Running Concordia" - -rm prod/resources/temp/* -echo "CONCORDIA RUNNER: reading from file" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/medium.txt -echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n diff --git a/concordia-search-jrc.sh b/concordia-search-jrc.sh deleted file mode 100755 index 785d6a9..0000000 --- a/concordia-search-jrc.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - - -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "$1" -n - diff --git a/concordia-test-jrc.sh b/concordia-test-jrc.sh deleted file mode 100755 index 083c29d..0000000 --- a/concordia-test-jrc.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -echo "CONCORDIA RUNNER: Decompressing test file" - -xz -dk prod/resources/text-files/jrc_smaller.txt.xz - -echo "CONCORDIA RUNNER: Running Concordia" - -rm prod/resources/temp/* - -echo "CONCORDIA RUNNER: testing" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -t prod/resources/text-files/jrc_smaller.txt - -#rm prod/resources/text-files/jrc_smaller.txt diff --git a/concordia-test-medium.sh b/concordia-test-medium.sh deleted file mode 100755 index 8ae78a7..0000000 --- a/concordia-test-medium.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -echo "CONCORDIA RUNNER: Running Concordia" - -rm prod/resources/temp/* - -echo "CONCORDIA RUNNER: testing" -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -t prod/resources/text-files/medium.txt - diff --git a/getSentenceFromJRC.sh b/getSentenceFromJRC.sh deleted file mode 100755 index 3401a89..0000000 --- a/getSentenceFromJRC.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -head -$1 prod/resources/text-files/jrc_smaller.txt | tail -1 diff --git a/callgrind.out.3952 b/profiling/callgrind.out.3952 similarity index 100% rename from callgrind.out.3952 rename to profiling/callgrind.out.3952 diff --git a/valgrind.txt b/profiling/valgrind.txt similarity index 100% rename from valgrind.txt rename to profiling/valgrind.txt diff --git a/scripts/concordia-anubissearch-jrc.sh b/scripts/concordia-anubissearch-jrc.sh new file mode 100755 index 0000000..5846737 --- /dev/null +++ b/scripts/concordia-anubissearch-jrc.sh @@ -0,0 +1,5 @@ +#!/bin/sh + + +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -a "$1" + diff --git a/scripts/concordia-clear-jrc.sh b/scripts/concordia-clear-jrc.sh new file mode 100755 index 0000000..521f4cb --- /dev/null +++ b/scripts/concordia-clear-jrc.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +rm ../prod/resources/temp/* +rm ../prod/resources/text-files/jrc_smaller.txt diff --git a/scripts/concordia-concordiasearch-jrc.sh b/scripts/concordia-concordiasearch-jrc.sh new file mode 100755 index 0000000..899ad1e --- /dev/null +++ b/scripts/concordia-concordiasearch-jrc.sh @@ -0,0 +1,5 @@ +#!/bin/sh + + +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "$1" + diff --git a/scripts/concordia-index-jrc.sh b/scripts/concordia-index-jrc.sh new file mode 100755 index 0000000..695443d --- /dev/null +++ b/scripts/concordia-index-jrc.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +echo "CONCORDIA INDEXER: Decompressing test file" + +xz -dk ../prod/resources/text-files/jrc_smaller.txt.xz + +echo "CONCORDIA INDEXER: Running Concordia" + +rm ../prod/resources/temp/* + +echo "CONCORDIA INDEXER: reading from file" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt + + diff --git a/scripts/concordia-runner-jrc.sh b/scripts/concordia-runner-jrc.sh new file mode 100755 index 0000000..1811b5f --- /dev/null +++ b/scripts/concordia-runner-jrc.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +echo "CONCORDIA RUNNER: Decompressing test file" + +xz -dk ../prod/resources/text-files/jrc_smaller.txt.xz + +echo "CONCORDIA RUNNER: Running Concordia" + +rm ../prod/resources/temp/* + +echo "CONCORDIA RUNNER: reading from file" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt + + +echo "CONCORDIA RUNNER: concordia searching for pattern: \"Współpraca Państw Członkowskich i Komisji Europejskiej\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej" +echo "CONCORDIA RUNNER: concordia searching for pattern: \"8. W odniesieniu do artykułu 45 ustęp 12\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12" +echo "CONCORDIA RUNNER: concordia searching for pattern: \"Prawo europejskie umożliwia handel zagraniczny\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny" + + +echo "CONCORDIA RUNNER: searching for pattern: \"Parlamentu Europejskiego\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n +echo "CONCORDIA RUNNER: searching for pattern: \"Dostęp do zatrudnienia\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n +echo "CONCORDIA RUNNER: searching for pattern: \"Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n + +rm ../prod/resources/text-files/jrc_smaller.txt diff --git a/scripts/concordia-runner-large.sh b/scripts/concordia-runner-large.sh new file mode 100755 index 0000000..e8c15be --- /dev/null +++ b/scripts/concordia-runner-large.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +echo "CONCORDIA RUNNER: Decompressing test file" + +bunzip2 --keep ../prod/resources/text-files/large.txt.bz2 + +echo "CONCORDIA RUNNER: Running Concordia" + +rm ../prod/resources/temp/* + +echo "CONCORDIA RUNNER: reading from file" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/large.txt +echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n +echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n + +rm ../prod/resources/text-files/large.txt diff --git a/scripts/concordia-runner.sh b/scripts/concordia-runner.sh new file mode 100755 index 0000000..3cab4c5 --- /dev/null +++ b/scripts/concordia-runner.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +echo "CONCORDIA RUNNER: Running Concordia" + +rm ../prod/resources/temp/* +echo "CONCORDIA RUNNER: reading from file" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt +echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" +echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n diff --git a/scripts/concordia-search-jrc.sh b/scripts/concordia-search-jrc.sh new file mode 100755 index 0000000..0e729b7 --- /dev/null +++ b/scripts/concordia-search-jrc.sh @@ -0,0 +1,5 @@ +#!/bin/sh + + +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "$1" -n + diff --git a/scripts/concordia-test-jrc.sh b/scripts/concordia-test-jrc.sh new file mode 100755 index 0000000..2824f08 --- /dev/null +++ b/scripts/concordia-test-jrc.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +echo "CONCORDIA RUNNER: Decompressing test file" + +xz -dk ../prod/resources/text-files/jrc_smaller.txt.xz + +echo "CONCORDIA RUNNER: Running Concordia" + +rm ../prod/resources/temp/* + +echo "CONCORDIA RUNNER: testing" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/jrc_smaller.txt + +rm ../prod/resources/text-files/jrc_smaller.txt diff --git a/scripts/concordia-test-medium.sh b/scripts/concordia-test-medium.sh new file mode 100755 index 0000000..3138e3e --- /dev/null +++ b/scripts/concordia-test-medium.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +echo "CONCORDIA RUNNER: Running Concordia" + +rm ../prod/resources/temp/* + +echo "CONCORDIA RUNNER: testing" +../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/medium.txt + diff --git a/scripts/getSentenceFromJRC.sh b/scripts/getSentenceFromJRC.sh new file mode 100755 index 0000000..c6ce92f --- /dev/null +++ b/scripts/getSentenceFromJRC.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +head -$1 ../prod/resources/text-files/jrc_smaller.txt | tail -1