working concordia server

This commit is contained in:
rjawor 2015-08-21 08:37:54 +02:00
parent 9eae5aa5b1
commit ac7bc4cdbe
8 changed files with 46 additions and 5 deletions

View File

@ -1,2 +1,4 @@
- report SA generation time and possible errors on concordia-server startup. Rethink the starting scripts.
DONE - check the parameters and return types (should be const ref)
DONE - implement connection pooling with PgBouncer

5
clearIndex.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/sh
rm index/*
cd db
./recreateDb.sh

View File

@ -1,3 +1,3 @@
#!/bin/sh
sudo -u concordia psql concordia_server
psql -U concordia concordia_server

View File

@ -1,13 +1,13 @@
#!/bin/sh
echo "Recreating database schema..."
psql -W -U concordia concordia_server -f concordia_server.sql
psql -U concordia concordia_server -f concordia_server.sql
echo "Inserting initial data..."
for initFile in `ls init/*`
do
echo "Init file:" $initFile
psql -W -U concordia concordia_server -f $initFile
psql -U concordia concordia_server -f $initFile
done
echo "Concordia server database recreation complete!"

View File

@ -7,6 +7,8 @@ import urllib2
import sys
import time
BUFFER_SIZE = 500
def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f):
@ -27,8 +29,6 @@ if file_len(targetFile) != totalLines:
print "File lengths do not match"
sys.exit(1)
BUFFER_SIZE = 1000
data = {
'operation': 'addSentences'
}

4
tests/addJrc.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1

23
tests/addingError2.txt Normal file
View File

@ -0,0 +1,23 @@
Added 1134500 of 1254468 sentences. Time elapsed: 1164.9647 s, current speed: 973.8492 sentences/second
Added 1135000 of 1254468 sentences. Time elapsed: 1165.3810 s, current speed: 973.9304 sentences/second
Added 1135500 of 1254468 sentences. Time elapsed: 1165.8651 s, current speed: 973.9548 sentences/second
Added 1136000 of 1254468 sentences. Time elapsed: 1166.2967 s, current speed: 974.0232 sentences/second
Traceback (most recent call last):
File "./addFile.py", line 48, in <module>
add_data(data)
File "./addFile.py", line 21, in add_data
urllib2.urlopen(req, json.dumps(data)).read()
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 410, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 448, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 413: Request Entity Too Large

View File

@ -0,0 +1,7 @@
Added 999500 of 1000000 sentences. Time elapsed: 1019.4110 s, current speed: 980.4681 sentences/second
Added 1000000 of 1000000 sentences. Time elapsed: 1023.4541 s, current speed: 977.0834 sentences/second
Added all 1000000 sentences. Time elapsed: 1023.4542 s, overall speed: 977.0832 sentences/second
Generating index...
Index regeneration complete. The operation took 9.2251 s
memory: concordia_server 676MB