finished adding multiple sentences, memory leak
This commit is contained in:
parent
823c1fbdb2
commit
9eae5aa5b1
@ -57,6 +57,8 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
}
|
||||
}
|
||||
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds);
|
||||
} else if (operation == REFRESH_INDEX_OP) {
|
||||
_indexController->refreshIndexFromRAM(jsonWriter);
|
||||
} else if (operation == SIMPLE_SEARCH_OP) {
|
||||
std::string pattern = d[PATTERN_PARAM].GetString();
|
||||
_searcherController->simpleSearch(jsonWriter, pattern);
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
#define REFRESH_INDEX_OP "refreshIndex"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
#define REFRESH_INDEX_OP "refreshIndex"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
|
||||
|
@ -45,7 +45,6 @@ void IndexController::addSentences(
|
||||
std::vector<TokenizedSentence> tokenizedSentences = _concordia->tokenizeAll(sourceSentences);
|
||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds);
|
||||
_concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
|
||||
_concordia->refreshSAfromRAM();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
@ -58,3 +57,20 @@ void IndexController::addSentences(
|
||||
}
|
||||
}
|
||||
|
||||
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter) {
|
||||
try {
|
||||
_concordia->refreshSAfromRAM();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.EndObject();
|
||||
} catch (ConcordiaException & e) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "concordia error: " << e.what();
|
||||
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,6 +30,8 @@ public:
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds);
|
||||
|
||||
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter);
|
||||
|
||||
private:
|
||||
boost::shared_ptr<Concordia> _concordia;
|
||||
|
||||
|
@ -64,6 +64,9 @@ std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<Matc
|
||||
connection.getStringValue(result,0,1),
|
||||
connection.getStringValue(result,0,2)));
|
||||
connection.clearResult(result);
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
}
|
||||
connection.endTransaction();
|
||||
return results;
|
||||
|
@ -1,13 +1,13 @@
|
||||
#!/bin/sh
|
||||
|
||||
echo "Recreating database schema..."
|
||||
sudo -u concordia psql concordia_server -f concordia_server.sql
|
||||
psql -W -U concordia concordia_server -f concordia_server.sql
|
||||
|
||||
echo "Inserting initial data..."
|
||||
for initFile in `ls init/*`
|
||||
do
|
||||
echo "Init file:" $initFile
|
||||
sudo -u concordia psql concordia_server -f $initFile
|
||||
psql -W -U concordia concordia_server -f $initFile
|
||||
done
|
||||
|
||||
echo "Concordia server database recreation complete!"
|
||||
|
73
tests/addFile.py
Executable file
73
tests/addFile.py
Executable file
@ -0,0 +1,73 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import unittest
|
||||
import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
|
||||
def file_len(fname):
|
||||
with open(fname) as f:
|
||||
for i, l in enumerate(f):
|
||||
pass
|
||||
return i + 1
|
||||
|
||||
def add_data(data):
|
||||
req = urllib2.Request('http://localhost')
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
urllib2.urlopen(req, json.dumps(data)).read()
|
||||
|
||||
sourceFile = sys.argv[1]
|
||||
targetFile = sys.argv[2]
|
||||
tmId = int(sys.argv[3])
|
||||
|
||||
totalLines = file_len(sourceFile)
|
||||
if file_len(targetFile) != totalLines:
|
||||
print "File lengths do not match"
|
||||
sys.exit(1)
|
||||
|
||||
BUFFER_SIZE = 1000
|
||||
|
||||
data = {
|
||||
'operation': 'addSentences'
|
||||
}
|
||||
|
||||
sentences = []
|
||||
start = time.time()
|
||||
with open(sys.argv[1]) as sourceSentences:
|
||||
with open(sys.argv[2]) as targetSentences:
|
||||
lineNumber = 0
|
||||
for sourceSentence in sourceSentences:
|
||||
lineNumber += 1
|
||||
targetSentence = targetSentences.readline()
|
||||
sentences.append([tmId, sourceSentence, targetSentence])
|
||||
if lineNumber % BUFFER_SIZE == 0:
|
||||
data['sentences'] = sentences
|
||||
sentences = []
|
||||
add_data(data)
|
||||
mark = time.time()
|
||||
print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % (lineNumber, totalLines, mark-start, lineNumber/(mark-start))
|
||||
|
||||
|
||||
if len(sentences) > 0:
|
||||
data['sentences'] = sentences
|
||||
add_data(data)
|
||||
|
||||
end = time.time()
|
||||
print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start))
|
||||
|
||||
print "Generating index..."
|
||||
start = time.time()
|
||||
data = {
|
||||
'operation': 'refreshIndex'
|
||||
}
|
||||
req = urllib2.Request('http://localhost')
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
urllib2.urlopen(req, json.dumps(data)).read()
|
||||
|
||||
end = time.time()
|
||||
print "Index regeneration complete. The operation took %.4f s" % (end - start)
|
||||
|
||||
|
||||
|
27
tests/addSentence.py
Executable file
27
tests/addSentence.py
Executable file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import unittest
|
||||
import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
|
||||
data = {
|
||||
'operation': 'addSentence',
|
||||
'sourceSentence':sys.argv[1],
|
||||
'targetSentence':sys.argv[2],
|
||||
'tmId':sys.argv[3]
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
req = urllib2.Request('http://localhost')
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
end = time.time()
|
||||
|
||||
print "Execution time: %.4f seconds." % (end-start)
|
||||
print "Result: "
|
||||
print response
|
||||
|
||||
|
20
tests/addingError.txt
Normal file
20
tests/addingError.txt
Normal file
@ -0,0 +1,20 @@
|
||||
Added 1101000 of 1254468 sentences. Time elapsed: 984.7707 s, current speed: 1118.0268 sentences/second
|
||||
Traceback (most recent call last):
|
||||
File "./addFile.py", line 48, in <module>
|
||||
add_data(data)
|
||||
File "./addFile.py", line 19, in add_data
|
||||
urllib2.urlopen(req, json.dumps(data)).read()
|
||||
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
|
||||
return _opener.open(url, data, timeout)
|
||||
File "/usr/lib/python2.7/urllib2.py", line 410, in open
|
||||
response = meth(req, response)
|
||||
File "/usr/lib/python2.7/urllib2.py", line 523, in http_response
|
||||
'http', request, response, code, msg, hdrs)
|
||||
File "/usr/lib/python2.7/urllib2.py", line 448, in error
|
||||
return self._call_chain(*args)
|
||||
File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
|
||||
result = func(*args)
|
||||
File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default
|
||||
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
|
||||
urllib2.HTTPError: HTTP Error 413: Request Entity Too Large
|
||||
|
25
tests/simpleSearch.py
Executable file
25
tests/simpleSearch.py
Executable file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import unittest
|
||||
import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
|
||||
data = {
|
||||
'operation': 'simpleSearch',
|
||||
'pattern':sys.argv[1]
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
req = urllib2.Request('http://localhost')
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
end = time.time()
|
||||
|
||||
print "Execution time: %.4f seconds." % (end-start)
|
||||
print "Result: "
|
||||
print response
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user