add sources
This commit is contained in:
parent
250d82ebf8
commit
c46be0d3e5
@ -312,6 +312,25 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
|||||||
jsonWriter.String("newSourceId");
|
jsonWriter.String("newSourceId");
|
||||||
jsonWriter.Int(newId);
|
jsonWriter.Int(newId);
|
||||||
jsonWriter.EndObject();
|
jsonWriter.EndObject();
|
||||||
|
} else if (operation == ADD_SOURCES_OP) {
|
||||||
|
|
||||||
|
std::vector<int> externalIds;
|
||||||
|
std::vector<std::string> names;
|
||||||
|
std::vector<std::string> links;
|
||||||
|
|
||||||
|
const rapidjson::Value & sourcesArray = d[SOURCES_PARAM];
|
||||||
|
for (rapidjson::SizeType i = 0; i < sourcesArray.Size(); i++) {
|
||||||
|
externalIds.push_back(sourcesArray[i][0].GetInt());
|
||||||
|
names.push_back(sourcesArray[i][1].GetString());
|
||||||
|
links.push_back(sourcesArray[i][2].GetString());
|
||||||
|
}
|
||||||
|
|
||||||
|
_sourceDAO.addSources(externalIds, names, links);
|
||||||
|
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("status");
|
||||||
|
jsonWriter.String("success");
|
||||||
|
jsonWriter.EndObject();
|
||||||
} else {
|
} else {
|
||||||
JsonGenerator::signalError(jsonWriter, "no such operation: " + operation);
|
JsonGenerator::signalError(jsonWriter, "no such operation: " + operation);
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
#define INTERVALS_PARAM "intervals"
|
#define INTERVALS_PARAM "intervals"
|
||||||
#define EXTERNAL_ID_PARAM "externalId"
|
#define EXTERNAL_ID_PARAM "externalId"
|
||||||
#define LINK_PARAM "link"
|
#define LINK_PARAM "link"
|
||||||
|
#define SOURCES_PARAM "sources"
|
||||||
|
|
||||||
#define ADD_SENTENCE_OP "addSentence"
|
#define ADD_SENTENCE_OP "addSentence"
|
||||||
#define ADD_SENTENCES_OP "addSentences"
|
#define ADD_SENTENCES_OP "addSentences"
|
||||||
@ -49,5 +50,6 @@
|
|||||||
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
|
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
|
||||||
#define ADD_TM_OP "addTm"
|
#define ADD_TM_OP "addTm"
|
||||||
#define ADD_SOURCE_OP "addSource"
|
#define ADD_SOURCE_OP "addSource"
|
||||||
|
#define ADD_SOURCES_OP "addSources"
|
||||||
|
|
||||||
#define LEMMATIZER_DELIMITER "@#@"
|
#define LEMMATIZER_DELIMITER "@#@"
|
||||||
|
@ -35,3 +35,37 @@ int SourceDAO::addSource(const int externalId, const std::string & name, const s
|
|||||||
return newId;
|
return newId;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SourceDAO::addSources(const std::vector<int> & externalIds,
|
||||||
|
const std::vector<std::string> & names,
|
||||||
|
const std::vector<std::string> & links) {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
DBconnection connection;
|
||||||
|
connection.startTransaction();
|
||||||
|
|
||||||
|
for(int i=0;i<externalIds.size();i++) {
|
||||||
|
int externalId = externalIds.at(i);
|
||||||
|
std::string name = names.at(i);
|
||||||
|
std::string link = links.at(i);
|
||||||
|
|
||||||
|
std::string query = "INSERT INTO source(external_id, name, link) values($1::integer,$2::text,$3::text) RETURNING id";
|
||||||
|
std::vector<QueryParam*> params;
|
||||||
|
params.push_back(new IntParam(externalId));
|
||||||
|
params.push_back(new StringParam(name));
|
||||||
|
params.push_back(new StringParam(link));
|
||||||
|
|
||||||
|
PGresult * result = connection.execute(query, params);
|
||||||
|
connection.clearResult(result);
|
||||||
|
BOOST_FOREACH (QueryParam * param, params) {
|
||||||
|
delete param;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
connection.endTransaction();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -17,6 +17,10 @@ public:
|
|||||||
|
|
||||||
int addSource(const int externalId, const std::string & name, const std::string & link);
|
int addSource(const int externalId, const std::string & name, const std::string & link);
|
||||||
|
|
||||||
|
void addSources(const std::vector<int> & externalIds,
|
||||||
|
const std::vector<std::string> & names,
|
||||||
|
const std::vector<std::string> & links);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -24,7 +24,7 @@ def file_len(fname):
|
|||||||
def add_examples(examplesData):
|
def add_examples(examplesData):
|
||||||
req = urllib2.Request(address)
|
req = urllib2.Request(address)
|
||||||
req.add_header('Content-Type', 'application/json')
|
req.add_header('Content-Type', 'application/json')
|
||||||
response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), 3600).read())
|
response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), timeout = 3600).read())
|
||||||
print(response)
|
print(response)
|
||||||
if response['status'] == 'error':
|
if response['status'] == 'error':
|
||||||
raise Exception(response['message'])
|
raise Exception(response['message'])
|
||||||
@ -64,7 +64,7 @@ data = {
|
|||||||
|
|
||||||
req = urllib2.Request(address)
|
req = urllib2.Request(address)
|
||||||
req.add_header('Content-Type', 'application/json')
|
req.add_header('Content-Type', 'application/json')
|
||||||
response = json.loads(urllib2.urlopen(req, json.dumps(data), 3600).read())
|
response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read())
|
||||||
print(response)
|
print(response)
|
||||||
tmId = int(response['newTmId'])
|
tmId = int(response['newTmId'])
|
||||||
print "Added new tm: %d" % tmId
|
print "Added new tm: %d" % tmId
|
||||||
@ -111,7 +111,7 @@ data = {
|
|||||||
}
|
}
|
||||||
req = urllib2.Request(address)
|
req = urllib2.Request(address)
|
||||||
req.add_header('Content-Type', 'application/json')
|
req.add_header('Content-Type', 'application/json')
|
||||||
urllib2.urlopen(req, json.dumps(data), 3600).read()
|
urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print "Index regeneration complete. The operation took %.4f s" % (end - start)
|
print "Index regeneration complete. The operation took %.4f s" % (end - start)
|
||||||
|
@ -7,6 +7,20 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import host
|
import host
|
||||||
|
|
||||||
|
BUFFER_SIZE = 500
|
||||||
|
|
||||||
|
|
||||||
|
def addSources(sources_buffer):
|
||||||
|
data = {
|
||||||
|
'operation': 'addSources',
|
||||||
|
'sources':sources
|
||||||
|
}
|
||||||
|
|
||||||
|
req = urllib2.Request(address)
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
urllib2.urlopen(req, json.dumps(data))
|
||||||
|
|
||||||
|
|
||||||
address = 'http://'+host.concordia_host
|
address = 'http://'+host.concordia_host
|
||||||
if len(host.concordia_port) > 0:
|
if len(host.concordia_port) > 0:
|
||||||
address += ':'+host.concordia_port
|
address += ':'+host.concordia_port
|
||||||
@ -14,23 +28,20 @@ if len(host.concordia_port) > 0:
|
|||||||
|
|
||||||
with open(sys.argv[1]) as sources_file:
|
with open(sys.argv[1]) as sources_file:
|
||||||
counter = 0
|
counter = 0
|
||||||
|
sources_buffer = []
|
||||||
for line in sources_file:
|
for line in sources_file:
|
||||||
counter += 1
|
counter += 1
|
||||||
idStr, link, name = line.rstrip().split('\t')
|
sources_buffer.append(line.rstrip().split('\t'))
|
||||||
|
if len(sources_buffer) == BUFFER_SIZE:
|
||||||
|
addSources(sources_buffer)
|
||||||
|
sources_buffer = []
|
||||||
|
print("Added %d sources" % counter)
|
||||||
|
|
||||||
|
|
||||||
data = {
|
if len(sources_buffer) > 0:
|
||||||
'operation': 'addSource',
|
addSources(sources_buffer)
|
||||||
'externalId':int(idStr),
|
|
||||||
'name':name,
|
print("Added all %d sources" % counter)
|
||||||
'link':link
|
|
||||||
}
|
|
||||||
|
|
||||||
req = urllib2.Request(address)
|
|
||||||
req.add_header('Content-Type', 'application/json')
|
|
||||||
urllib2.urlopen(req, json.dumps(data)).read()
|
|
||||||
|
|
||||||
if counter % 1000 == 0:
|
|
||||||
print("Done %d sources" % counter)
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user