finished adding multiple sentences, memory leak
This commit is contained in:
parent
823c1fbdb2
commit
9eae5aa5b1
@ -57,6 +57,8 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds);
|
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds);
|
||||||
|
} else if (operation == REFRESH_INDEX_OP) {
|
||||||
|
_indexController->refreshIndexFromRAM(jsonWriter);
|
||||||
} else if (operation == SIMPLE_SEARCH_OP) {
|
} else if (operation == SIMPLE_SEARCH_OP) {
|
||||||
std::string pattern = d[PATTERN_PARAM].GetString();
|
std::string pattern = d[PATTERN_PARAM].GetString();
|
||||||
_searcherController->simpleSearch(jsonWriter, pattern);
|
_searcherController->simpleSearch(jsonWriter, pattern);
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
#define ADD_SENTENCE_OP "addSentence"
|
#define ADD_SENTENCE_OP "addSentence"
|
||||||
#define ADD_SENTENCES_OP "addSentences"
|
#define ADD_SENTENCES_OP "addSentences"
|
||||||
|
#define REFRESH_INDEX_OP "refreshIndex"
|
||||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
#define ADD_SENTENCE_OP "addSentence"
|
#define ADD_SENTENCE_OP "addSentence"
|
||||||
#define ADD_SENTENCES_OP "addSentences"
|
#define ADD_SENTENCES_OP "addSentences"
|
||||||
|
#define REFRESH_INDEX_OP "refreshIndex"
|
||||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||||
|
|
||||||
|
@ -45,7 +45,6 @@ void IndexController::addSentences(
|
|||||||
std::vector<TokenizedSentence> tokenizedSentences = _concordia->tokenizeAll(sourceSentences);
|
std::vector<TokenizedSentence> tokenizedSentences = _concordia->tokenizeAll(sourceSentences);
|
||||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds);
|
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds);
|
||||||
_concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
|
_concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
|
||||||
_concordia->refreshSAfromRAM();
|
|
||||||
|
|
||||||
jsonWriter.StartObject();
|
jsonWriter.StartObject();
|
||||||
jsonWriter.String("status");
|
jsonWriter.String("status");
|
||||||
@ -58,3 +57,20 @@ void IndexController::addSentences(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter) {
|
||||||
|
try {
|
||||||
|
_concordia->refreshSAfromRAM();
|
||||||
|
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("status");
|
||||||
|
jsonWriter.String("success");
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
} catch (ConcordiaException & e) {
|
||||||
|
std::stringstream errorstream;
|
||||||
|
errorstream << "concordia error: " << e.what();
|
||||||
|
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +30,8 @@ public:
|
|||||||
const std::vector<std::string> & targetSentences,
|
const std::vector<std::string> & targetSentences,
|
||||||
const std::vector<int> & tmIds);
|
const std::vector<int> & tmIds);
|
||||||
|
|
||||||
|
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
boost::shared_ptr<Concordia> _concordia;
|
boost::shared_ptr<Concordia> _concordia;
|
||||||
|
|
||||||
|
@ -64,6 +64,9 @@ std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<Matc
|
|||||||
connection.getStringValue(result,0,1),
|
connection.getStringValue(result,0,1),
|
||||||
connection.getStringValue(result,0,2)));
|
connection.getStringValue(result,0,2)));
|
||||||
connection.clearResult(result);
|
connection.clearResult(result);
|
||||||
|
BOOST_FOREACH (QueryParam * param, params) {
|
||||||
|
delete param;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
connection.endTransaction();
|
connection.endTransaction();
|
||||||
return results;
|
return results;
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
echo "Recreating database schema..."
|
echo "Recreating database schema..."
|
||||||
sudo -u concordia psql concordia_server -f concordia_server.sql
|
psql -W -U concordia concordia_server -f concordia_server.sql
|
||||||
|
|
||||||
echo "Inserting initial data..."
|
echo "Inserting initial data..."
|
||||||
for initFile in `ls init/*`
|
for initFile in `ls init/*`
|
||||||
do
|
do
|
||||||
echo "Init file:" $initFile
|
echo "Init file:" $initFile
|
||||||
sudo -u concordia psql concordia_server -f $initFile
|
psql -W -U concordia concordia_server -f $initFile
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "Concordia server database recreation complete!"
|
echo "Concordia server database recreation complete!"
|
||||||
|
73
tests/addFile.py
Executable file
73
tests/addFile.py
Executable file
@ -0,0 +1,73 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import json
|
||||||
|
import urllib2
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
def file_len(fname):
|
||||||
|
with open(fname) as f:
|
||||||
|
for i, l in enumerate(f):
|
||||||
|
pass
|
||||||
|
return i + 1
|
||||||
|
|
||||||
|
def add_data(data):
|
||||||
|
req = urllib2.Request('http://localhost')
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
urllib2.urlopen(req, json.dumps(data)).read()
|
||||||
|
|
||||||
|
sourceFile = sys.argv[1]
|
||||||
|
targetFile = sys.argv[2]
|
||||||
|
tmId = int(sys.argv[3])
|
||||||
|
|
||||||
|
totalLines = file_len(sourceFile)
|
||||||
|
if file_len(targetFile) != totalLines:
|
||||||
|
print "File lengths do not match"
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
BUFFER_SIZE = 1000
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'operation': 'addSentences'
|
||||||
|
}
|
||||||
|
|
||||||
|
sentences = []
|
||||||
|
start = time.time()
|
||||||
|
with open(sys.argv[1]) as sourceSentences:
|
||||||
|
with open(sys.argv[2]) as targetSentences:
|
||||||
|
lineNumber = 0
|
||||||
|
for sourceSentence in sourceSentences:
|
||||||
|
lineNumber += 1
|
||||||
|
targetSentence = targetSentences.readline()
|
||||||
|
sentences.append([tmId, sourceSentence, targetSentence])
|
||||||
|
if lineNumber % BUFFER_SIZE == 0:
|
||||||
|
data['sentences'] = sentences
|
||||||
|
sentences = []
|
||||||
|
add_data(data)
|
||||||
|
mark = time.time()
|
||||||
|
print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % (lineNumber, totalLines, mark-start, lineNumber/(mark-start))
|
||||||
|
|
||||||
|
|
||||||
|
if len(sentences) > 0:
|
||||||
|
data['sentences'] = sentences
|
||||||
|
add_data(data)
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start))
|
||||||
|
|
||||||
|
print "Generating index..."
|
||||||
|
start = time.time()
|
||||||
|
data = {
|
||||||
|
'operation': 'refreshIndex'
|
||||||
|
}
|
||||||
|
req = urllib2.Request('http://localhost')
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
urllib2.urlopen(req, json.dumps(data)).read()
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
print "Index regeneration complete. The operation took %.4f s" % (end - start)
|
||||||
|
|
||||||
|
|
||||||
|
|
27
tests/addSentence.py
Executable file
27
tests/addSentence.py
Executable file
@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import json
|
||||||
|
import urllib2
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'operation': 'addSentence',
|
||||||
|
'sourceSentence':sys.argv[1],
|
||||||
|
'targetSentence':sys.argv[2],
|
||||||
|
'tmId':sys.argv[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
req = urllib2.Request('http://localhost')
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print "Execution time: %.4f seconds." % (end-start)
|
||||||
|
print "Result: "
|
||||||
|
print response
|
||||||
|
|
||||||
|
|
20
tests/addingError.txt
Normal file
20
tests/addingError.txt
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
Added 1101000 of 1254468 sentences. Time elapsed: 984.7707 s, current speed: 1118.0268 sentences/second
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "./addFile.py", line 48, in <module>
|
||||||
|
add_data(data)
|
||||||
|
File "./addFile.py", line 19, in add_data
|
||||||
|
urllib2.urlopen(req, json.dumps(data)).read()
|
||||||
|
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
|
||||||
|
return _opener.open(url, data, timeout)
|
||||||
|
File "/usr/lib/python2.7/urllib2.py", line 410, in open
|
||||||
|
response = meth(req, response)
|
||||||
|
File "/usr/lib/python2.7/urllib2.py", line 523, in http_response
|
||||||
|
'http', request, response, code, msg, hdrs)
|
||||||
|
File "/usr/lib/python2.7/urllib2.py", line 448, in error
|
||||||
|
return self._call_chain(*args)
|
||||||
|
File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
|
||||||
|
result = func(*args)
|
||||||
|
File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default
|
||||||
|
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
|
||||||
|
urllib2.HTTPError: HTTP Error 413: Request Entity Too Large
|
||||||
|
|
25
tests/simpleSearch.py
Executable file
25
tests/simpleSearch.py
Executable file
@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import json
|
||||||
|
import urllib2
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'operation': 'simpleSearch',
|
||||||
|
'pattern':sys.argv[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
req = urllib2.Request('http://localhost')
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print "Execution time: %.4f seconds." % (end-start)
|
||||||
|
print "Result: "
|
||||||
|
print response
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user