add sources

This commit is contained in:
Rafał Jaworski 2019-03-03 15:24:34 +01:00
parent 250d82ebf8
commit c46be0d3e5
6 changed files with 89 additions and 19 deletions

View File

@ -312,6 +312,25 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
jsonWriter.String("newSourceId"); jsonWriter.String("newSourceId");
jsonWriter.Int(newId); jsonWriter.Int(newId);
jsonWriter.EndObject(); jsonWriter.EndObject();
} else if (operation == ADD_SOURCES_OP) {
std::vector<int> externalIds;
std::vector<std::string> names;
std::vector<std::string> links;
const rapidjson::Value & sourcesArray = d[SOURCES_PARAM];
for (rapidjson::SizeType i = 0; i < sourcesArray.Size(); i++) {
externalIds.push_back(sourcesArray[i][0].GetInt());
names.push_back(sourcesArray[i][1].GetString());
links.push_back(sourcesArray[i][2].GetString());
}
_sourceDAO.addSources(externalIds, names, links);
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.EndObject();
} else { } else {
JsonGenerator::signalError(jsonWriter, "no such operation: " + operation); JsonGenerator::signalError(jsonWriter, "no such operation: " + operation);
} }

View File

@ -30,6 +30,7 @@
#define INTERVALS_PARAM "intervals" #define INTERVALS_PARAM "intervals"
#define EXTERNAL_ID_PARAM "externalId" #define EXTERNAL_ID_PARAM "externalId"
#define LINK_PARAM "link" #define LINK_PARAM "link"
#define SOURCES_PARAM "sources"
#define ADD_SENTENCE_OP "addSentence" #define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences" #define ADD_SENTENCES_OP "addSentences"
@ -49,5 +50,6 @@
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch" #define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
#define ADD_TM_OP "addTm" #define ADD_TM_OP "addTm"
#define ADD_SOURCE_OP "addSource" #define ADD_SOURCE_OP "addSource"
#define ADD_SOURCES_OP "addSources"
#define LEMMATIZER_DELIMITER "@#@" #define LEMMATIZER_DELIMITER "@#@"

View File

@ -34,4 +34,38 @@ int SourceDAO::addSource(const int externalId, const std::string & name, const s
return newId; return newId;
} }
void SourceDAO::addSources(const std::vector<int> & externalIds,
const std::vector<std::string> & names,
const std::vector<std::string> & links) {
DBconnection connection;
connection.startTransaction();
for(int i=0;i<externalIds.size();i++) {
int externalId = externalIds.at(i);
std::string name = names.at(i);
std::string link = links.at(i);
std::string query = "INSERT INTO source(external_id, name, link) values($1::integer,$2::text,$3::text) RETURNING id";
std::vector<QueryParam*> params;
params.push_back(new IntParam(externalId));
params.push_back(new StringParam(name));
params.push_back(new StringParam(link));
PGresult * result = connection.execute(query, params);
connection.clearResult(result);
BOOST_FOREACH (QueryParam * param, params) {
delete param;
}
}
connection.endTransaction();
}

View File

@ -17,6 +17,10 @@ public:
int addSource(const int externalId, const std::string & name, const std::string & link); int addSource(const int externalId, const std::string & name, const std::string & link);
void addSources(const std::vector<int> & externalIds,
const std::vector<std::string> & names,
const std::vector<std::string> & links);
private: private:
}; };

View File

@ -24,7 +24,7 @@ def file_len(fname):
def add_examples(examplesData): def add_examples(examplesData):
req = urllib2.Request(address) req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json') req.add_header('Content-Type', 'application/json')
response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), 3600).read()) response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), timeout = 3600).read())
print(response) print(response)
if response['status'] == 'error': if response['status'] == 'error':
raise Exception(response['message']) raise Exception(response['message'])
@ -64,7 +64,7 @@ data = {
req = urllib2.Request(address) req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json') req.add_header('Content-Type', 'application/json')
response = json.loads(urllib2.urlopen(req, json.dumps(data), 3600).read()) response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read())
print(response) print(response)
tmId = int(response['newTmId']) tmId = int(response['newTmId'])
print "Added new tm: %d" % tmId print "Added new tm: %d" % tmId
@ -111,7 +111,7 @@ data = {
} }
req = urllib2.Request(address) req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json') req.add_header('Content-Type', 'application/json')
urllib2.urlopen(req, json.dumps(data), 3600).read() urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()
end = time.time() end = time.time()
print "Index regeneration complete. The operation took %.4f s" % (end - start) print "Index regeneration complete. The operation took %.4f s" % (end - start)

View File

@ -7,6 +7,20 @@ import sys
import time import time
import host import host
BUFFER_SIZE = 500
def addSources(sources_buffer):
data = {
'operation': 'addSources',
'sources':sources
}
req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json')
urllib2.urlopen(req, json.dumps(data))
address = 'http://'+host.concordia_host address = 'http://'+host.concordia_host
if len(host.concordia_port) > 0: if len(host.concordia_port) > 0:
address += ':'+host.concordia_port address += ':'+host.concordia_port
@ -14,23 +28,20 @@ if len(host.concordia_port) > 0:
with open(sys.argv[1]) as sources_file: with open(sys.argv[1]) as sources_file:
counter = 0 counter = 0
sources_buffer = []
for line in sources_file: for line in sources_file:
counter += 1 counter += 1
idStr, link, name = line.rstrip().split('\t') sources_buffer.append(line.rstrip().split('\t'))
if len(sources_buffer) == BUFFER_SIZE:
addSources(sources_buffer)
sources_buffer = []
print("Added %d sources" % counter)
if len(sources_buffer) > 0:
addSources(sources_buffer)
print("Added all %d sources" % counter)
data = {
'operation': 'addSource',
'externalId':int(idStr),
'name':name,
'link':link
}
req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json')
urllib2.urlopen(req, json.dumps(data)).read()
if counter % 1000 == 0:
print("Done %d sources" % counter)