example source
This commit is contained in:
parent
d900e806d9
commit
0fc4558ea7
@ -66,7 +66,8 @@ function presentFullSearchResults(data) {
|
||||
|
||||
for (j=0;j<data['result']['occurrences'].length;j++) {
|
||||
var occurence = data['result']['occurrences'][j];
|
||||
result += '<table class="example"><tr><td>';
|
||||
result += '<table class="example">';
|
||||
result += '<tr><td>';
|
||||
|
||||
// source segment
|
||||
var sourceSegment = occurence['sourceSegment'];
|
||||
@ -88,7 +89,8 @@ function presentFullSearchResults(data) {
|
||||
currStart = occurence['targetFragments'][i][1];
|
||||
}
|
||||
result += targetSegment.slice(currStart);
|
||||
result += '</td></tr></table>';
|
||||
result += '</td></tr>';
|
||||
result += '<tr><td colspan="2" style="text-align:right;font-style:italic;font-size:70%">Źródło: <a target="_blank" href="'+occurence['sourceLink']+'">'+occurence['sourceName']+'</a><td></tr></table>';
|
||||
}
|
||||
|
||||
$('#occurrences').html(result);
|
||||
|
8
cat/versions_available/opensubtitles_plen.cfg
Normal file
8
cat/versions_available/opensubtitles_plen.cfg
Normal file
@ -0,0 +1,8 @@
|
||||
dir@#@opensubtitles_plen
|
||||
concordia_host@#@concordia.poleng
|
||||
concordia_port@#@8800
|
||||
tmid@#@2
|
||||
desc@#@Witamy w interaktywnym demo systemu Concordia. System znajduje najdłuższe fragmenty zdania wejściowego w pamięci tłumaczeń. Proszę wpisać polskie zdanie w poniższe pole i nacisnąć Enter (albo użyć przycisku "search"). Aby zapoznać się z systemem możesz użyć wcześniej przygotowanych przykładów - po prostu kliknij link "apply" przy wybranym przykładzie. Po wyszukaniu, kliknij na wybrany podświetlony fragment, aby zobaczyć jego kontekst.
|
||||
enjoy@#@Życzymy udanej pracy z systemem!
|
||||
prompt@#@Wprowadź zdanie (po polsku):
|
||||
suggestion@#@Nawet zepsute zegary pokazują dwa razy dziennie właściwą godzinę
|
1
cat/versions_enabled/opensubtitles_plen.cfg
Symbolic link
1
cat/versions_enabled/opensubtitles_plen.cfg
Symbolic link
@ -0,0 +1 @@
|
||||
../versions_available/opensubtitles_plen.cfg
|
@ -5,12 +5,16 @@ ExampleOccurrence::ExampleOccurrence(
|
||||
const int matchedExampleStart,
|
||||
const int matchedExampleEnd,
|
||||
const std::string & sourceSegment,
|
||||
const std::string & targetSegment):
|
||||
const std::string & targetSegment,
|
||||
const std::string & sourceName,
|
||||
const std::string & sourceLink):
|
||||
_id(id),
|
||||
_matchedExampleStart(matchedExampleStart),
|
||||
_matchedExampleEnd(matchedExampleEnd),
|
||||
_sourceSegment(sourceSegment),
|
||||
_targetSegment(targetSegment) {
|
||||
_targetSegment(targetSegment),
|
||||
_sourceName(sourceName),
|
||||
_sourceLink(sourceLink) {
|
||||
}
|
||||
|
||||
ExampleOccurrence::~ExampleOccurrence() {
|
||||
|
@ -12,7 +12,9 @@ public:
|
||||
const int matchedExampleStart,
|
||||
const int matchedExampleEnd,
|
||||
const std::string & sourceSegment,
|
||||
const std::string & targetSegment
|
||||
const std::string & targetSegment,
|
||||
const std::string & sourceName,
|
||||
const std::string & sourceLink
|
||||
);
|
||||
/*! Destructor.
|
||||
*/
|
||||
@ -42,6 +44,14 @@ public:
|
||||
return _targetFragments;
|
||||
}
|
||||
|
||||
const std::string & getSourceName() const {
|
||||
return _sourceName;
|
||||
}
|
||||
|
||||
const std::string & getSourceLink() const {
|
||||
return _sourceLink;
|
||||
}
|
||||
|
||||
void addMatchedTargetFragment(const std::pair<int,int> & targetFragment);
|
||||
|
||||
private:
|
||||
@ -56,6 +66,10 @@ private:
|
||||
std::string _targetSegment;
|
||||
|
||||
std::vector<std::pair<int,int> > _targetFragments;
|
||||
|
||||
std::string _sourceName;
|
||||
|
||||
std::string _sourceLink;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -72,10 +72,12 @@ void IndexController::addSentences(rapidjson::Writer<rapidjson::StringBuffer> &
|
||||
try {
|
||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||
if (it != _concordiasMap->end()) {
|
||||
std::vector<TokenizedSentence> tokenizedLemmatizedSentences = it->second->tokenizeAll(_lemmatizerFacade->lemmatizeSentencesIfNeeded(sourceSentences, tmId));
|
||||
std::vector<TokenizedSentence> tokenizedSentences = it->second->tokenizeAll(sourceSentences);
|
||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmId);
|
||||
it->second->addAllTokenizedExamples(tokenizedLemmatizedSentences, sentenceIds);
|
||||
std::vector<TokenizedSentence> tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, false, false);
|
||||
std::vector<TokenizedSentence> tokenizedLemmatizedSourceSentences = it->second->tokenizeAll(lemmatizedSourceSentences, true, true);
|
||||
std::vector<TokenizedSentence> tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, false, false);
|
||||
|
||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, alignments, sourceIds, tmId);
|
||||
it->second->addAllTokenizedExamples(tokenizedLemmatizedSourceSentences, sentenceIds);
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
|
@ -82,6 +82,10 @@ void JsonGenerator::writeFullSearchResult(rapidjson::Writer<rapidjson::StringBuf
|
||||
jsonWriter.String(occurrence.getSourceSegment().c_str());
|
||||
jsonWriter.String("targetSegment");
|
||||
jsonWriter.String(occurrence.getTargetSegment().c_str());
|
||||
jsonWriter.String("sourceName");
|
||||
jsonWriter.String(occurrence.getSourceName().c_str());
|
||||
jsonWriter.String("sourceLink");
|
||||
jsonWriter.String(occurrence.getSourceLink().c_str());
|
||||
jsonWriter.String("targetFragments");
|
||||
jsonWriter.StartArray(); // all target fragments
|
||||
for (std::vector<std::pair<int,int> >::const_iterator it = occurrence.getTargetFragments().begin();
|
||||
|
@ -54,6 +54,7 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
||||
return newIds;
|
||||
}
|
||||
|
||||
|
||||
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
|
||||
const std::vector<TokenizedSentence> & sourceSentences,
|
||||
const std::vector<TokenizedSentence> & targetSentences,
|
||||
@ -72,6 +73,26 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
|
||||
return newIds;
|
||||
}
|
||||
|
||||
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
|
||||
const std::vector<TokenizedSentence> & sourceSentences,
|
||||
const std::vector<TokenizedSentence> & targetSentences,
|
||||
const std::vector<std::vector<std::vector<int> > > & allAlignments,
|
||||
const std::vector<int> & sourceIds,
|
||||
const int tmId) throw (ConcordiaException) {
|
||||
|
||||
DBconnection connection;
|
||||
std::vector<SUFFIX_MARKER_TYPE> newIds;
|
||||
connection.startTransaction();
|
||||
|
||||
for (int i=0; i< sourceSentences.size(); i++) {
|
||||
newIds.push_back(_addAlignedUnit(connection, sourceSentences.at(i), targetSentences.at(i), allAlignments.at(i), sourceIds.at(i), tmId));
|
||||
}
|
||||
|
||||
connection.endTransaction();
|
||||
return newIds;
|
||||
}
|
||||
|
||||
|
||||
SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment & fragment) {
|
||||
SimpleSearchResult result(fragment.getStart(), fragment.getEnd());
|
||||
TokenizedSentence ts("");
|
||||
@ -146,7 +167,8 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
|
||||
}
|
||||
|
||||
ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, const SubstringOccurrence sOccurrence, const int matchedLength) {
|
||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer], target_tokens, alignments FROM unit WHERE id = $3::integer;";
|
||||
Logger::log("_getExampleOccurence");
|
||||
std::string query = "SELECT unit.id, unit.source_segment, unit.target_segment, unit.source_tokens[$1::integer], unit.source_tokens[$2::integer], unit.target_tokens, unit.alignments, source.name, source.link FROM unit left join source on unit.source_id = source.external_id where unit.id = $3::integer;";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(2*sOccurrence.getOffset()+1));
|
||||
params.push_back(new IntParam(2*(sOccurrence.getOffset()+matchedLength)));
|
||||
@ -156,7 +178,9 @@ ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, cons
|
||||
connection.getIntValue(result,0,3), // matched example start
|
||||
connection.getIntValue(result,0,4), // matched example end
|
||||
connection.getStringValue(result,0,1), // source segment
|
||||
connection.getStringValue(result,0,2)); // target segment
|
||||
connection.getStringValue(result,0,2), // target segment
|
||||
connection.getStringValue(result,0,7), // source name
|
||||
connection.getStringValue(result,0,8)); // source link
|
||||
std::string targetTokensRaw = connection.getStringValue(result,0,5);
|
||||
std::string alignmentsRaw = connection.getStringValue(result,0,6);
|
||||
|
||||
@ -273,6 +297,43 @@ int UnitDAO::_addAlignedUnit (
|
||||
return newId;
|
||||
}
|
||||
|
||||
int UnitDAO::_addAlignedUnit (
|
||||
DBconnection & connection,
|
||||
const TokenizedSentence & sourceSentence,
|
||||
const TokenizedSentence & targetSentence,
|
||||
const std::vector<std::vector<int> > & alignments,
|
||||
const int sourceId,
|
||||
const int tmId) throw(ConcordiaException) {
|
||||
|
||||
if (sourceSentence.getTokens().size() != alignments.size()) {
|
||||
// Here we check if the source sentence, taken from src.tok,
|
||||
// is shorter than alignments array.
|
||||
std::stringstream ss;
|
||||
ss << "The size of source sentence is different than the size of alignments array. Source sentence: " << sourceSentence.getSentence() << ", alignments size:" << alignments.size();
|
||||
throw ConcordiaException(ss.str());
|
||||
}
|
||||
|
||||
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens, target_tokens, alignments, source_id) values($1::text,$2::text,$3::integer,$4,$5,$6,$7) RETURNING id";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new StringParam(sourceSentence.getOriginalSentence()));
|
||||
params.push_back(new StringParam(targetSentence.getOriginalSentence()));
|
||||
params.push_back(new IntParam(tmId));
|
||||
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
|
||||
params.push_back(new IntArrayParam(_getTokenPositions(targetSentence)));
|
||||
params.push_back(new Int2DArrayParam(alignments));
|
||||
params.push_back(new IntParam(sourceId));
|
||||
|
||||
PGresult * result = connection.execute(query, params);
|
||||
int newId = connection.getIntValue(result, 0, 0);
|
||||
connection.clearResult(result);
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
return newId;
|
||||
}
|
||||
|
||||
|
||||
std::vector<int> UnitDAO::_getArray(std::string arrayString) {
|
||||
std::vector<int> result;
|
||||
if (arrayString.length()>2) {
|
||||
|
@ -43,6 +43,13 @@ public:
|
||||
const std::vector<std::vector<std::vector<int> > > & allAlignments,
|
||||
const int tmId) throw (ConcordiaException);
|
||||
|
||||
std::vector<SUFFIX_MARKER_TYPE> addAlignedSentences(
|
||||
const std::vector<TokenizedSentence> & sourceSentences,
|
||||
const std::vector<TokenizedSentence> & targetSentences,
|
||||
const std::vector<std::vector<std::vector<int> > > & allAlignments,
|
||||
const std::vector<int> & sourceIds,
|
||||
const int tmId) throw (ConcordiaException);
|
||||
|
||||
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
|
||||
|
||||
FullSearchResult getFullSearchResult(const OccurrencesList & occurrencesList, const int patternLength);
|
||||
@ -78,6 +85,14 @@ private:
|
||||
const std::vector<std::vector<int> > & alignments,
|
||||
const int tmId) throw(ConcordiaException);
|
||||
|
||||
int _addAlignedUnit(
|
||||
DBconnection & connection,
|
||||
const TokenizedSentence & sourceSentence,
|
||||
const TokenizedSentence & targetSentence,
|
||||
const std::vector<std::vector<int> > & alignments,
|
||||
const int sourceId,
|
||||
const int tmId) throw(ConcordiaException);
|
||||
|
||||
std::vector<int> _getArray(std::string arrayString);
|
||||
|
||||
std::vector<std::vector<int> > _get2DArray(std::string arrayString);
|
||||
|
Loading…
Reference in New Issue
Block a user