From ac7bc4cdbe5de3c29fbc9a91f7c288082483dc79 Mon Sep 17 00:00:00 2001 From: rjawor Date: Fri, 21 Aug 2015 08:37:54 +0200 Subject: [PATCH] working concordia server --- TODO.txt | 2 ++ clearIndex.sh | 5 +++++ db/concordiaDb.sh | 2 +- db/recreateDb.sh | 4 ++-- tests/addFile.py | 4 ++-- tests/addJrc.sh | 4 ++++ tests/addingError2.txt | 23 +++++++++++++++++++++++ tests/performanceAdding.txt | 7 +++++++ 8 files changed, 46 insertions(+), 5 deletions(-) create mode 100755 clearIndex.sh create mode 100755 tests/addJrc.sh create mode 100644 tests/addingError2.txt create mode 100644 tests/performanceAdding.txt diff --git a/TODO.txt b/TODO.txt index a6677de..fdb70ff 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,2 +1,4 @@ +- report SA generation time and possible errors on concordia-server startup. Rethink the starting scripts. + DONE - check the parameters and return types (should be const ref) DONE - implement connection pooling with PgBouncer diff --git a/clearIndex.sh b/clearIndex.sh new file mode 100755 index 0000000..df5335d --- /dev/null +++ b/clearIndex.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +rm index/* +cd db +./recreateDb.sh diff --git a/db/concordiaDb.sh b/db/concordiaDb.sh index 511ef18..b3bf759 100755 --- a/db/concordiaDb.sh +++ b/db/concordiaDb.sh @@ -1,3 +1,3 @@ #!/bin/sh -sudo -u concordia psql concordia_server +psql -U concordia concordia_server diff --git a/db/recreateDb.sh b/db/recreateDb.sh index 6f899bb..36b2ce8 100755 --- a/db/recreateDb.sh +++ b/db/recreateDb.sh @@ -1,13 +1,13 @@ #!/bin/sh echo "Recreating database schema..." -psql -W -U concordia concordia_server -f concordia_server.sql +psql -U concordia concordia_server -f concordia_server.sql echo "Inserting initial data..." for initFile in `ls init/*` do echo "Init file:" $initFile - psql -W -U concordia concordia_server -f $initFile + psql -U concordia concordia_server -f $initFile done echo "Concordia server database recreation complete!" diff --git a/tests/addFile.py b/tests/addFile.py index 08f43c7..fe17005 100755 --- a/tests/addFile.py +++ b/tests/addFile.py @@ -7,6 +7,8 @@ import urllib2 import sys import time +BUFFER_SIZE = 500 + def file_len(fname): with open(fname) as f: for i, l in enumerate(f): @@ -27,8 +29,6 @@ if file_len(targetFile) != totalLines: print "File lengths do not match" sys.exit(1) -BUFFER_SIZE = 1000 - data = { 'operation': 'addSentences' } diff --git a/tests/addJrc.sh b/tests/addJrc.sh new file mode 100755 index 0000000..3526eff --- /dev/null +++ b/tests/addJrc.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1 + diff --git a/tests/addingError2.txt b/tests/addingError2.txt new file mode 100644 index 0000000..2de7239 --- /dev/null +++ b/tests/addingError2.txt @@ -0,0 +1,23 @@ +Added 1134500 of 1254468 sentences. Time elapsed: 1164.9647 s, current speed: 973.8492 sentences/second +Added 1135000 of 1254468 sentences. Time elapsed: 1165.3810 s, current speed: 973.9304 sentences/second +Added 1135500 of 1254468 sentences. Time elapsed: 1165.8651 s, current speed: 973.9548 sentences/second +Added 1136000 of 1254468 sentences. Time elapsed: 1166.2967 s, current speed: 974.0232 sentences/second +Traceback (most recent call last): + File "./addFile.py", line 48, in + add_data(data) + File "./addFile.py", line 21, in add_data + urllib2.urlopen(req, json.dumps(data)).read() + File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen + return _opener.open(url, data, timeout) + File "/usr/lib/python2.7/urllib2.py", line 410, in open + response = meth(req, response) + File "/usr/lib/python2.7/urllib2.py", line 523, in http_response + 'http', request, response, code, msg, hdrs) + File "/usr/lib/python2.7/urllib2.py", line 448, in error + return self._call_chain(*args) + File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain + result = func(*args) + File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default + raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) +urllib2.HTTPError: HTTP Error 413: Request Entity Too Large + diff --git a/tests/performanceAdding.txt b/tests/performanceAdding.txt new file mode 100644 index 0000000..c89162a --- /dev/null +++ b/tests/performanceAdding.txt @@ -0,0 +1,7 @@ +Added 999500 of 1000000 sentences. Time elapsed: 1019.4110 s, current speed: 980.4681 sentences/second +Added 1000000 of 1000000 sentences. Time elapsed: 1023.4541 s, current speed: 977.0834 sentences/second +Added all 1000000 sentences. Time elapsed: 1023.4542 s, overall speed: 977.0832 sentences/second +Generating index... +Index regeneration complete. The operation took 9.2251 s + +memory: concordia_server 676MB