add sources
This commit is contained in:
parent
250d82ebf8
commit
c46be0d3e5
@ -312,6 +312,25 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
jsonWriter.String("newSourceId");
|
||||
jsonWriter.Int(newId);
|
||||
jsonWriter.EndObject();
|
||||
} else if (operation == ADD_SOURCES_OP) {
|
||||
|
||||
std::vector<int> externalIds;
|
||||
std::vector<std::string> names;
|
||||
std::vector<std::string> links;
|
||||
|
||||
const rapidjson::Value & sourcesArray = d[SOURCES_PARAM];
|
||||
for (rapidjson::SizeType i = 0; i < sourcesArray.Size(); i++) {
|
||||
externalIds.push_back(sourcesArray[i][0].GetInt());
|
||||
names.push_back(sourcesArray[i][1].GetString());
|
||||
links.push_back(sourcesArray[i][2].GetString());
|
||||
}
|
||||
|
||||
_sourceDAO.addSources(externalIds, names, links);
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such operation: " + operation);
|
||||
}
|
||||
|
@ -30,6 +30,7 @@
|
||||
#define INTERVALS_PARAM "intervals"
|
||||
#define EXTERNAL_ID_PARAM "externalId"
|
||||
#define LINK_PARAM "link"
|
||||
#define SOURCES_PARAM "sources"
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
@ -49,5 +50,6 @@
|
||||
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
|
||||
#define ADD_TM_OP "addTm"
|
||||
#define ADD_SOURCE_OP "addSource"
|
||||
#define ADD_SOURCES_OP "addSources"
|
||||
|
||||
#define LEMMATIZER_DELIMITER "@#@"
|
||||
|
@ -34,4 +34,38 @@ int SourceDAO::addSource(const int externalId, const std::string & name, const s
|
||||
|
||||
return newId;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void SourceDAO::addSources(const std::vector<int> & externalIds,
|
||||
const std::vector<std::string> & names,
|
||||
const std::vector<std::string> & links) {
|
||||
|
||||
|
||||
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
|
||||
for(int i=0;i<externalIds.size();i++) {
|
||||
int externalId = externalIds.at(i);
|
||||
std::string name = names.at(i);
|
||||
std::string link = links.at(i);
|
||||
|
||||
std::string query = "INSERT INTO source(external_id, name, link) values($1::integer,$2::text,$3::text) RETURNING id";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(externalId));
|
||||
params.push_back(new StringParam(name));
|
||||
params.push_back(new StringParam(link));
|
||||
|
||||
PGresult * result = connection.execute(query, params);
|
||||
connection.clearResult(result);
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
connection.endTransaction();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -17,6 +17,10 @@ public:
|
||||
|
||||
int addSource(const int externalId, const std::string & name, const std::string & link);
|
||||
|
||||
void addSources(const std::vector<int> & externalIds,
|
||||
const std::vector<std::string> & names,
|
||||
const std::vector<std::string> & links);
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
@ -24,7 +24,7 @@ def file_len(fname):
|
||||
def add_examples(examplesData):
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), 3600).read())
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), timeout = 3600).read())
|
||||
print(response)
|
||||
if response['status'] == 'error':
|
||||
raise Exception(response['message'])
|
||||
@ -64,7 +64,7 @@ data = {
|
||||
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data), 3600).read())
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read())
|
||||
print(response)
|
||||
tmId = int(response['newTmId'])
|
||||
print "Added new tm: %d" % tmId
|
||||
@ -111,7 +111,7 @@ data = {
|
||||
}
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
urllib2.urlopen(req, json.dumps(data), 3600).read()
|
||||
urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()
|
||||
|
||||
end = time.time()
|
||||
print "Index regeneration complete. The operation took %.4f s" % (end - start)
|
||||
|
@ -7,6 +7,20 @@ import sys
|
||||
import time
|
||||
import host
|
||||
|
||||
BUFFER_SIZE = 500
|
||||
|
||||
|
||||
def addSources(sources_buffer):
|
||||
data = {
|
||||
'operation': 'addSources',
|
||||
'sources':sources
|
||||
}
|
||||
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
urllib2.urlopen(req, json.dumps(data))
|
||||
|
||||
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
@ -14,23 +28,20 @@ if len(host.concordia_port) > 0:
|
||||
|
||||
with open(sys.argv[1]) as sources_file:
|
||||
counter = 0
|
||||
sources_buffer = []
|
||||
for line in sources_file:
|
||||
counter += 1
|
||||
idStr, link, name = line.rstrip().split('\t')
|
||||
sources_buffer.append(line.rstrip().split('\t'))
|
||||
if len(sources_buffer) == BUFFER_SIZE:
|
||||
addSources(sources_buffer)
|
||||
sources_buffer = []
|
||||
print("Added %d sources" % counter)
|
||||
|
||||
|
||||
if len(sources_buffer) > 0:
|
||||
addSources(sources_buffer)
|
||||
|
||||
print("Added all %d sources" % counter)
|
||||
|
||||
|
||||
data = {
|
||||
'operation': 'addSource',
|
||||
'externalId':int(idStr),
|
||||
'name':name,
|
||||
'link':link
|
||||
}
|
||||
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
urllib2.urlopen(req, json.dumps(data)).read()
|
||||
|
||||
if counter % 1000 == 0:
|
||||
print("Done %d sources" % counter)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user