example source
This commit is contained in:
parent
d900e806d9
commit
0fc4558ea7
@ -66,7 +66,8 @@ function presentFullSearchResults(data) {
|
|||||||
|
|
||||||
for (j=0;j<data['result']['occurrences'].length;j++) {
|
for (j=0;j<data['result']['occurrences'].length;j++) {
|
||||||
var occurence = data['result']['occurrences'][j];
|
var occurence = data['result']['occurrences'][j];
|
||||||
result += '<table class="example"><tr><td>';
|
result += '<table class="example">';
|
||||||
|
result += '<tr><td>';
|
||||||
|
|
||||||
// source segment
|
// source segment
|
||||||
var sourceSegment = occurence['sourceSegment'];
|
var sourceSegment = occurence['sourceSegment'];
|
||||||
@ -88,7 +89,8 @@ function presentFullSearchResults(data) {
|
|||||||
currStart = occurence['targetFragments'][i][1];
|
currStart = occurence['targetFragments'][i][1];
|
||||||
}
|
}
|
||||||
result += targetSegment.slice(currStart);
|
result += targetSegment.slice(currStart);
|
||||||
result += '</td></tr></table>';
|
result += '</td></tr>';
|
||||||
|
result += '<tr><td colspan="2" style="text-align:right;font-style:italic;font-size:70%">Źródło: <a target="_blank" href="'+occurence['sourceLink']+'">'+occurence['sourceName']+'</a><td></tr></table>';
|
||||||
}
|
}
|
||||||
|
|
||||||
$('#occurrences').html(result);
|
$('#occurrences').html(result);
|
||||||
|
8
cat/versions_available/opensubtitles_plen.cfg
Normal file
8
cat/versions_available/opensubtitles_plen.cfg
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
dir@#@opensubtitles_plen
|
||||||
|
concordia_host@#@concordia.poleng
|
||||||
|
concordia_port@#@8800
|
||||||
|
tmid@#@2
|
||||||
|
desc@#@Witamy w interaktywnym demo systemu Concordia. System znajduje najdłuższe fragmenty zdania wejściowego w pamięci tłumaczeń. Proszę wpisać polskie zdanie w poniższe pole i nacisnąć Enter (albo użyć przycisku "search"). Aby zapoznać się z systemem możesz użyć wcześniej przygotowanych przykładów - po prostu kliknij link "apply" przy wybranym przykładzie. Po wyszukaniu, kliknij na wybrany podświetlony fragment, aby zobaczyć jego kontekst.
|
||||||
|
enjoy@#@Życzymy udanej pracy z systemem!
|
||||||
|
prompt@#@Wprowadź zdanie (po polsku):
|
||||||
|
suggestion@#@Nawet zepsute zegary pokazują dwa razy dziennie właściwą godzinę
|
1
cat/versions_enabled/opensubtitles_plen.cfg
Symbolic link
1
cat/versions_enabled/opensubtitles_plen.cfg
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../versions_available/opensubtitles_plen.cfg
|
@ -5,12 +5,16 @@ ExampleOccurrence::ExampleOccurrence(
|
|||||||
const int matchedExampleStart,
|
const int matchedExampleStart,
|
||||||
const int matchedExampleEnd,
|
const int matchedExampleEnd,
|
||||||
const std::string & sourceSegment,
|
const std::string & sourceSegment,
|
||||||
const std::string & targetSegment):
|
const std::string & targetSegment,
|
||||||
|
const std::string & sourceName,
|
||||||
|
const std::string & sourceLink):
|
||||||
_id(id),
|
_id(id),
|
||||||
_matchedExampleStart(matchedExampleStart),
|
_matchedExampleStart(matchedExampleStart),
|
||||||
_matchedExampleEnd(matchedExampleEnd),
|
_matchedExampleEnd(matchedExampleEnd),
|
||||||
_sourceSegment(sourceSegment),
|
_sourceSegment(sourceSegment),
|
||||||
_targetSegment(targetSegment) {
|
_targetSegment(targetSegment),
|
||||||
|
_sourceName(sourceName),
|
||||||
|
_sourceLink(sourceLink) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ExampleOccurrence::~ExampleOccurrence() {
|
ExampleOccurrence::~ExampleOccurrence() {
|
||||||
|
@ -12,7 +12,9 @@ public:
|
|||||||
const int matchedExampleStart,
|
const int matchedExampleStart,
|
||||||
const int matchedExampleEnd,
|
const int matchedExampleEnd,
|
||||||
const std::string & sourceSegment,
|
const std::string & sourceSegment,
|
||||||
const std::string & targetSegment
|
const std::string & targetSegment,
|
||||||
|
const std::string & sourceName,
|
||||||
|
const std::string & sourceLink
|
||||||
);
|
);
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
*/
|
*/
|
||||||
@ -42,6 +44,14 @@ public:
|
|||||||
return _targetFragments;
|
return _targetFragments;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const std::string & getSourceName() const {
|
||||||
|
return _sourceName;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string & getSourceLink() const {
|
||||||
|
return _sourceLink;
|
||||||
|
}
|
||||||
|
|
||||||
void addMatchedTargetFragment(const std::pair<int,int> & targetFragment);
|
void addMatchedTargetFragment(const std::pair<int,int> & targetFragment);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -56,6 +66,10 @@ private:
|
|||||||
std::string _targetSegment;
|
std::string _targetSegment;
|
||||||
|
|
||||||
std::vector<std::pair<int,int> > _targetFragments;
|
std::vector<std::pair<int,int> > _targetFragments;
|
||||||
|
|
||||||
|
std::string _sourceName;
|
||||||
|
|
||||||
|
std::string _sourceLink;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -72,10 +72,12 @@ void IndexController::addSentences(rapidjson::Writer<rapidjson::StringBuffer> &
|
|||||||
try {
|
try {
|
||||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||||
if (it != _concordiasMap->end()) {
|
if (it != _concordiasMap->end()) {
|
||||||
std::vector<TokenizedSentence> tokenizedLemmatizedSentences = it->second->tokenizeAll(_lemmatizerFacade->lemmatizeSentencesIfNeeded(sourceSentences, tmId));
|
std::vector<TokenizedSentence> tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, false, false);
|
||||||
std::vector<TokenizedSentence> tokenizedSentences = it->second->tokenizeAll(sourceSentences);
|
std::vector<TokenizedSentence> tokenizedLemmatizedSourceSentences = it->second->tokenizeAll(lemmatizedSourceSentences, true, true);
|
||||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmId);
|
std::vector<TokenizedSentence> tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, false, false);
|
||||||
it->second->addAllTokenizedExamples(tokenizedLemmatizedSentences, sentenceIds);
|
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, alignments, sourceIds, tmId);
|
||||||
|
it->second->addAllTokenizedExamples(tokenizedLemmatizedSourceSentences, sentenceIds);
|
||||||
|
|
||||||
jsonWriter.StartObject();
|
jsonWriter.StartObject();
|
||||||
jsonWriter.String("status");
|
jsonWriter.String("status");
|
||||||
|
@ -82,6 +82,10 @@ void JsonGenerator::writeFullSearchResult(rapidjson::Writer<rapidjson::StringBuf
|
|||||||
jsonWriter.String(occurrence.getSourceSegment().c_str());
|
jsonWriter.String(occurrence.getSourceSegment().c_str());
|
||||||
jsonWriter.String("targetSegment");
|
jsonWriter.String("targetSegment");
|
||||||
jsonWriter.String(occurrence.getTargetSegment().c_str());
|
jsonWriter.String(occurrence.getTargetSegment().c_str());
|
||||||
|
jsonWriter.String("sourceName");
|
||||||
|
jsonWriter.String(occurrence.getSourceName().c_str());
|
||||||
|
jsonWriter.String("sourceLink");
|
||||||
|
jsonWriter.String(occurrence.getSourceLink().c_str());
|
||||||
jsonWriter.String("targetFragments");
|
jsonWriter.String("targetFragments");
|
||||||
jsonWriter.StartArray(); // all target fragments
|
jsonWriter.StartArray(); // all target fragments
|
||||||
for (std::vector<std::pair<int,int> >::const_iterator it = occurrence.getTargetFragments().begin();
|
for (std::vector<std::pair<int,int> >::const_iterator it = occurrence.getTargetFragments().begin();
|
||||||
|
@ -54,6 +54,7 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
|||||||
return newIds;
|
return newIds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
|
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
|
||||||
const std::vector<TokenizedSentence> & sourceSentences,
|
const std::vector<TokenizedSentence> & sourceSentences,
|
||||||
const std::vector<TokenizedSentence> & targetSentences,
|
const std::vector<TokenizedSentence> & targetSentences,
|
||||||
@ -72,6 +73,26 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
|
|||||||
return newIds;
|
return newIds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
|
||||||
|
const std::vector<TokenizedSentence> & sourceSentences,
|
||||||
|
const std::vector<TokenizedSentence> & targetSentences,
|
||||||
|
const std::vector<std::vector<std::vector<int> > > & allAlignments,
|
||||||
|
const std::vector<int> & sourceIds,
|
||||||
|
const int tmId) throw (ConcordiaException) {
|
||||||
|
|
||||||
|
DBconnection connection;
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> newIds;
|
||||||
|
connection.startTransaction();
|
||||||
|
|
||||||
|
for (int i=0; i< sourceSentences.size(); i++) {
|
||||||
|
newIds.push_back(_addAlignedUnit(connection, sourceSentences.at(i), targetSentences.at(i), allAlignments.at(i), sourceIds.at(i), tmId));
|
||||||
|
}
|
||||||
|
|
||||||
|
connection.endTransaction();
|
||||||
|
return newIds;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment & fragment) {
|
SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment & fragment) {
|
||||||
SimpleSearchResult result(fragment.getStart(), fragment.getEnd());
|
SimpleSearchResult result(fragment.getStart(), fragment.getEnd());
|
||||||
TokenizedSentence ts("");
|
TokenizedSentence ts("");
|
||||||
@ -146,7 +167,8 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, const SubstringOccurrence sOccurrence, const int matchedLength) {
|
ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, const SubstringOccurrence sOccurrence, const int matchedLength) {
|
||||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer], target_tokens, alignments FROM unit WHERE id = $3::integer;";
|
Logger::log("_getExampleOccurence");
|
||||||
|
std::string query = "SELECT unit.id, unit.source_segment, unit.target_segment, unit.source_tokens[$1::integer], unit.source_tokens[$2::integer], unit.target_tokens, unit.alignments, source.name, source.link FROM unit left join source on unit.source_id = source.external_id where unit.id = $3::integer;";
|
||||||
std::vector<QueryParam*> params;
|
std::vector<QueryParam*> params;
|
||||||
params.push_back(new IntParam(2*sOccurrence.getOffset()+1));
|
params.push_back(new IntParam(2*sOccurrence.getOffset()+1));
|
||||||
params.push_back(new IntParam(2*(sOccurrence.getOffset()+matchedLength)));
|
params.push_back(new IntParam(2*(sOccurrence.getOffset()+matchedLength)));
|
||||||
@ -156,7 +178,9 @@ ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, cons
|
|||||||
connection.getIntValue(result,0,3), // matched example start
|
connection.getIntValue(result,0,3), // matched example start
|
||||||
connection.getIntValue(result,0,4), // matched example end
|
connection.getIntValue(result,0,4), // matched example end
|
||||||
connection.getStringValue(result,0,1), // source segment
|
connection.getStringValue(result,0,1), // source segment
|
||||||
connection.getStringValue(result,0,2)); // target segment
|
connection.getStringValue(result,0,2), // target segment
|
||||||
|
connection.getStringValue(result,0,7), // source name
|
||||||
|
connection.getStringValue(result,0,8)); // source link
|
||||||
std::string targetTokensRaw = connection.getStringValue(result,0,5);
|
std::string targetTokensRaw = connection.getStringValue(result,0,5);
|
||||||
std::string alignmentsRaw = connection.getStringValue(result,0,6);
|
std::string alignmentsRaw = connection.getStringValue(result,0,6);
|
||||||
|
|
||||||
@ -273,6 +297,43 @@ int UnitDAO::_addAlignedUnit (
|
|||||||
return newId;
|
return newId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int UnitDAO::_addAlignedUnit (
|
||||||
|
DBconnection & connection,
|
||||||
|
const TokenizedSentence & sourceSentence,
|
||||||
|
const TokenizedSentence & targetSentence,
|
||||||
|
const std::vector<std::vector<int> > & alignments,
|
||||||
|
const int sourceId,
|
||||||
|
const int tmId) throw(ConcordiaException) {
|
||||||
|
|
||||||
|
if (sourceSentence.getTokens().size() != alignments.size()) {
|
||||||
|
// Here we check if the source sentence, taken from src.tok,
|
||||||
|
// is shorter than alignments array.
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "The size of source sentence is different than the size of alignments array. Source sentence: " << sourceSentence.getSentence() << ", alignments size:" << alignments.size();
|
||||||
|
throw ConcordiaException(ss.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens, target_tokens, alignments, source_id) values($1::text,$2::text,$3::integer,$4,$5,$6,$7) RETURNING id";
|
||||||
|
std::vector<QueryParam*> params;
|
||||||
|
params.push_back(new StringParam(sourceSentence.getOriginalSentence()));
|
||||||
|
params.push_back(new StringParam(targetSentence.getOriginalSentence()));
|
||||||
|
params.push_back(new IntParam(tmId));
|
||||||
|
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
|
||||||
|
params.push_back(new IntArrayParam(_getTokenPositions(targetSentence)));
|
||||||
|
params.push_back(new Int2DArrayParam(alignments));
|
||||||
|
params.push_back(new IntParam(sourceId));
|
||||||
|
|
||||||
|
PGresult * result = connection.execute(query, params);
|
||||||
|
int newId = connection.getIntValue(result, 0, 0);
|
||||||
|
connection.clearResult(result);
|
||||||
|
BOOST_FOREACH (QueryParam * param, params) {
|
||||||
|
delete param;
|
||||||
|
}
|
||||||
|
|
||||||
|
return newId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<int> UnitDAO::_getArray(std::string arrayString) {
|
std::vector<int> UnitDAO::_getArray(std::string arrayString) {
|
||||||
std::vector<int> result;
|
std::vector<int> result;
|
||||||
if (arrayString.length()>2) {
|
if (arrayString.length()>2) {
|
||||||
|
@ -43,6 +43,13 @@ public:
|
|||||||
const std::vector<std::vector<std::vector<int> > > & allAlignments,
|
const std::vector<std::vector<std::vector<int> > > & allAlignments,
|
||||||
const int tmId) throw (ConcordiaException);
|
const int tmId) throw (ConcordiaException);
|
||||||
|
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> addAlignedSentences(
|
||||||
|
const std::vector<TokenizedSentence> & sourceSentences,
|
||||||
|
const std::vector<TokenizedSentence> & targetSentences,
|
||||||
|
const std::vector<std::vector<std::vector<int> > > & allAlignments,
|
||||||
|
const std::vector<int> & sourceIds,
|
||||||
|
const int tmId) throw (ConcordiaException);
|
||||||
|
|
||||||
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
|
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
|
||||||
|
|
||||||
FullSearchResult getFullSearchResult(const OccurrencesList & occurrencesList, const int patternLength);
|
FullSearchResult getFullSearchResult(const OccurrencesList & occurrencesList, const int patternLength);
|
||||||
@ -78,6 +85,14 @@ private:
|
|||||||
const std::vector<std::vector<int> > & alignments,
|
const std::vector<std::vector<int> > & alignments,
|
||||||
const int tmId) throw(ConcordiaException);
|
const int tmId) throw(ConcordiaException);
|
||||||
|
|
||||||
|
int _addAlignedUnit(
|
||||||
|
DBconnection & connection,
|
||||||
|
const TokenizedSentence & sourceSentence,
|
||||||
|
const TokenizedSentence & targetSentence,
|
||||||
|
const std::vector<std::vector<int> > & alignments,
|
||||||
|
const int sourceId,
|
||||||
|
const int tmId) throw(ConcordiaException);
|
||||||
|
|
||||||
std::vector<int> _getArray(std::string arrayString);
|
std::vector<int> _getArray(std::string arrayString);
|
||||||
|
|
||||||
std::vector<std::vector<int> > _get2DArray(std::string arrayString);
|
std::vector<std::vector<int> > _get2DArray(std::string arrayString);
|
||||||
|
Loading…
Reference in New Issue
Block a user