occurrence refactoring

This commit is contained in:
Rafał Jaworski 2019-01-22 14:32:25 +01:00
parent e4b4a77de3
commit 66a51192a7
17 changed files with 115 additions and 105 deletions

View File

@ -2,7 +2,14 @@
$url = 'http://@concordia_host@:@concordia_port@';
$intervalsArray = array();
$data = array("operation" => $_POST["operation"],"tmId" => intval($_POST["tmId"]),"limit" => intval($_POST["limit"]),"offset" => intval($_POST["offset"]),"pattern" => $_POST["pattern"],"intervals" => $intervalsArray);
$data = array("operation" => $_POST["operation"],"tmId" => intval($_POST["tmId"]),"pattern" => $_POST["pattern"],"intervals" => $intervalsArray);
if(array_key_exists("limit", $_POST)) {
$data["limit"] = intval($_POST["limit"]);
}
if(array_key_exists("offset", $_POST)) {
$data["offset"] = intval($_POST["offset"]);
}
// use key 'http' even if you send the request to https://...
$options = array(

View File

@ -47,7 +47,7 @@
</div>
<input type="hidden" id="current-offset" value="0" />
<div id="occurences">
<div id="occurrences">
</div>
</div>
</body>

View File

@ -64,8 +64,8 @@ function presentFullSearchResults(data) {
result += '<button onclick=prevPage()>previous page</button>&nbsp;';
result += '<button onclick=nextPage()>next page</button><br>';
for (j=0;j<data['result']['occurences'].length;j++) {
var occurence = data['result']['occurences'][j];
for (j=0;j<data['result']['occurrences'].length;j++) {
var occurence = data['result']['occurrences'][j];
result += '<table class="example"><tr><td>';
// source segment
@ -91,7 +91,7 @@ function presentFullSearchResults(data) {
result += '</td></tr></table>';
}
$('#occurences').html(result);
$('#occurrences').html(result);
}
@ -168,7 +168,7 @@ function phraseSearchHandle(tmid, intervals) {
}
function renderResult(data) {
$('#occurences').html('');
$('#occurrences').html('');
var res = '';
var disablePhraseSearch = true;
@ -222,8 +222,8 @@ function htmlEncode(value){
function renderFragment(fragment, number) {
var result = '<div style="display:none" id="fragment'+number+'" class="fragmentDetails">';
for (j=0;j<fragment['occurences'].length;j++) {
var occurence = fragment['occurences'][j];
for (j=0;j<fragment['occurrences'].length;j++) {
var occurence = fragment['occurrences'][j];
result += '<table class="example"><tr><td>';
// source segment

View File

@ -1,7 +1,7 @@
dir@#@stocznia_plen
concordia_host@#@concordia.poleng
concordia_port@#@8800
tmid@#@2
tmid@#@1
desc@#@Witamy w interaktywnym demo systemu Concordia. System znajduje najdłuższe fragmenty zdania wejściowego w pamięci tłumaczeń. Proszę wpisać polskie zdanie w poniższe pole i nacisnąć Enter (albo użyć przycisku "search"). Aby zapoznać się z systemem możesz użyć wcześniej przygotowanych przykładów - po prostu kliknij link "apply" przy wybranym przykładzie. Po wyszukaniu, kliknij na wybrany podświetlony fragment, aby zobaczyć jego kontekst.
enjoy@#@Życzymy udanej pracy z systemem!
prompt@#@Wprowadź zdanie (po polsku):

View File

@ -0,0 +1 @@
../versions_available/stocznia_plen.cfg

View File

@ -1,6 +1,6 @@
#include "example_occurence.hpp"
#include "example_occurrence.hpp"
ExampleOccurence::ExampleOccurence(
ExampleOccurrence::ExampleOccurrence(
const int id,
const int matchedExampleStart,
const int matchedExampleEnd,
@ -13,9 +13,9 @@ ExampleOccurence::ExampleOccurence(
_targetSegment(targetSegment) {
}
ExampleOccurence::~ExampleOccurence() {
ExampleOccurrence::~ExampleOccurrence() {
}
void ExampleOccurence::addMatchedTargetFragment(const std::pair<int,int> & targetFragment) {
void ExampleOccurrence::addMatchedTargetFragment(const std::pair<int,int> & targetFragment) {
_targetFragments.push_back(targetFragment);
}

View File

@ -1,14 +1,14 @@
#ifndef EXAMPLE_OCCURENCE_HDR
#define EXAMPLE_OCCURENCE_HDR
#ifndef EXAMPLE_OCCURRENCE_HDR
#define EXAMPLE_OCCURRENCE_HDR
#include <string>
#include <vector>
class ExampleOccurence {
class ExampleOccurrence {
public:
/*! Constructor.
*/
ExampleOccurence (const int id,
ExampleOccurrence (const int id,
const int matchedExampleStart,
const int matchedExampleEnd,
const std::string & sourceSegment,
@ -16,7 +16,7 @@ public:
);
/*! Destructor.
*/
virtual ~ExampleOccurence();
virtual ~ExampleOccurrence();
int getId() const {
return _id;

View File

@ -8,6 +8,6 @@ FullSearchResult::FullSearchResult(
FullSearchResult::~FullSearchResult() {
}
void FullSearchResult::addOccurence(const ExampleOccurence & occurence) {
_occurences.push_back(occurence);
void FullSearchResult::addOccurrence(const ExampleOccurrence & occurrence) {
_occurrences.push_back(occurrence);
}

View File

@ -1,7 +1,7 @@
#ifndef FULL_SEARCH_RESULT_HDR
#define FULL_SEARCH_RESULT_HDR
#include "example_occurence.hpp"
#include "example_occurrence.hpp"
#include <string>
#include <vector>
@ -18,16 +18,16 @@ public:
return _totalCount;
}
std::vector<ExampleOccurence> getOccurences() const {
return _occurences;
std::vector<ExampleOccurrence> getOccurrences() const {
return _occurrences;
}
void addOccurence(const ExampleOccurence & occurence);
void addOccurrence(const ExampleOccurrence & occurrence);
void offsetPattern(int offset);
private:
std::vector<ExampleOccurence> _occurences;
std::vector<ExampleOccurrence> _occurrences;
int _totalCount;
};

View File

@ -1,7 +1,7 @@
#include "json_generator.hpp"
#include <boost/foreach.hpp>
#include "example_occurence.hpp"
#include "example_occurrence.hpp"
JsonGenerator::JsonGenerator() {
}
@ -21,42 +21,44 @@ void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jso
}
void JsonGenerator::writeSimpleSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const SimpleSearchResult & result) {
const SimpleSearchResult & result, bool writeOccurrences) {
jsonWriter.StartObject();
jsonWriter.String("matchedPatternStart");
jsonWriter.Int(result.getMatchedPatternStart());
jsonWriter.String("matchedPatternEnd");
jsonWriter.Int(result.getMatchedPatternEnd());
jsonWriter.String("occurences");
jsonWriter.StartArray();
BOOST_FOREACH(ExampleOccurence occurence, result.getOccurences()) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(occurence.getId());
jsonWriter.String("matchedExampleStart");
jsonWriter.Int(occurence.getMatchedExampleStart());
jsonWriter.String("matchedExampleEnd");
jsonWriter.Int(occurence.getMatchedExampleEnd());
jsonWriter.String("sourceSegment");
jsonWriter.String(occurence.getSourceSegment().c_str());
jsonWriter.String("targetSegment");
jsonWriter.String(occurence.getTargetSegment().c_str());
jsonWriter.String("targetFragments");
jsonWriter.StartArray(); // all target fragments
for (std::vector<std::pair<int,int> >::const_iterator it = occurence.getTargetFragments().begin();
it != occurence.getTargetFragments().end(); it++) {
jsonWriter.StartArray(); // single target fragment
jsonWriter.Int(it->first);
jsonWriter.Int(it->second);
jsonWriter.EndArray(); // single target fragment
if (writeOccurrences) {
jsonWriter.String("occurrences");
jsonWriter.StartArray();
BOOST_FOREACH(ExampleOccurrence occurrence, result.getOccurrences()) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(occurrence.getId());
jsonWriter.String("matchedExampleStart");
jsonWriter.Int(occurrence.getMatchedExampleStart());
jsonWriter.String("matchedExampleEnd");
jsonWriter.Int(occurrence.getMatchedExampleEnd());
jsonWriter.String("sourceSegment");
jsonWriter.String(occurrence.getSourceSegment().c_str());
jsonWriter.String("targetSegment");
jsonWriter.String(occurrence.getTargetSegment().c_str());
jsonWriter.String("targetFragments");
jsonWriter.StartArray(); // all target fragments
for (std::vector<std::pair<int,int> >::const_iterator it = occurrence.getTargetFragments().begin();
it != occurrence.getTargetFragments().end(); it++) {
jsonWriter.StartArray(); // single target fragment
jsonWriter.Int(it->first);
jsonWriter.Int(it->second);
jsonWriter.EndArray(); // single target fragment
}
jsonWriter.EndArray(); // all target fragments
jsonWriter.EndObject(); // occurrence
}
jsonWriter.EndArray(); // all target fragments
jsonWriter.EndObject(); // occurence
jsonWriter.EndArray(); //occurrences
}
jsonWriter.EndArray(); //occurences
jsonWriter.EndObject(); //simple search result
}
@ -65,35 +67,35 @@ void JsonGenerator::writeFullSearchResult(rapidjson::Writer<rapidjson::StringBuf
jsonWriter.StartObject();
jsonWriter.String("totalCount");
jsonWriter.Int(result.getTotalCount());
jsonWriter.String("occurences");
jsonWriter.String("occurrences");
jsonWriter.StartArray();
BOOST_FOREACH(ExampleOccurence occurence, result.getOccurences()) {
BOOST_FOREACH(ExampleOccurrence occurrence, result.getOccurrences()) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(occurence.getId());
jsonWriter.Int(occurrence.getId());
jsonWriter.String("matchedExampleStart");
jsonWriter.Int(occurence.getMatchedExampleStart());
jsonWriter.Int(occurrence.getMatchedExampleStart());
jsonWriter.String("matchedExampleEnd");
jsonWriter.Int(occurence.getMatchedExampleEnd());
jsonWriter.Int(occurrence.getMatchedExampleEnd());
jsonWriter.String("sourceSegment");
jsonWriter.String(occurence.getSourceSegment().c_str());
jsonWriter.String(occurrence.getSourceSegment().c_str());
jsonWriter.String("targetSegment");
jsonWriter.String(occurence.getTargetSegment().c_str());
jsonWriter.String(occurrence.getTargetSegment().c_str());
jsonWriter.String("targetFragments");
jsonWriter.StartArray(); // all target fragments
for (std::vector<std::pair<int,int> >::const_iterator it = occurence.getTargetFragments().begin();
it != occurence.getTargetFragments().end(); it++) {
for (std::vector<std::pair<int,int> >::const_iterator it = occurrence.getTargetFragments().begin();
it != occurrence.getTargetFragments().end(); it++) {
jsonWriter.StartArray(); // single target fragment
jsonWriter.Int(it->first);
jsonWriter.Int(it->second);
jsonWriter.EndArray(); // single target fragment
}
jsonWriter.EndArray(); // all target fragments
jsonWriter.EndObject(); // occurence
jsonWriter.EndObject(); // occurrence
}
jsonWriter.EndArray(); //occurences
jsonWriter.EndArray(); //occurrences
jsonWriter.EndObject(); //full search result
}
@ -103,13 +105,13 @@ void JsonGenerator::writeLexiconSearchResult(rapidjson::Writer<rapidjson::String
const SimpleSearchResult & result) {
jsonWriter.StartArray();
BOOST_FOREACH(ExampleOccurence occurence, result.getOccurences()) {
BOOST_FOREACH(ExampleOccurrence occurrence, result.getOccurrences()) {
jsonWriter.StartObject();
jsonWriter.String("sourceSegment");
jsonWriter.String(occurence.getSourceSegment().c_str());
jsonWriter.String(occurrence.getSourceSegment().c_str());
jsonWriter.String("targetSegment");
jsonWriter.String(occurence.getTargetSegment().c_str());
jsonWriter.EndObject(); // occurence
jsonWriter.String(occurrence.getTargetSegment().c_str());
jsonWriter.EndObject(); // occurrence
}
jsonWriter.EndArray(); //simple search result

View File

@ -21,7 +21,7 @@ public:
const std::string & message);
static void writeSimpleSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const SimpleSearchResult & result);
const SimpleSearchResult & result, bool writeOccurrences = true);
static void writeFullSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const FullSearchResult & result);

View File

@ -102,7 +102,7 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::Stri
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.String("found");
if (shortPatternResult.getOccurences().size() > 0) {
if (shortPatternResult.getOccurrences().size() > 0) {
jsonWriter.Bool(true);
@ -169,7 +169,7 @@ void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuff
jsonWriter.String("bestOverlay");
jsonWriter.StartArray();
BOOST_FOREACH(const SimpleSearchResult & simpleResult, result.getBestOverlay()) {
JsonGenerator::writeSimpleSearchResult(jsonWriter, simpleResult);
JsonGenerator::writeSimpleSearchResult(jsonWriter, simpleResult, false);
}
jsonWriter.EndArray();
jsonWriter.EndObject();

View File

@ -10,8 +10,8 @@ SimpleSearchResult::SimpleSearchResult(
SimpleSearchResult::~SimpleSearchResult() {
}
void SimpleSearchResult::addOccurence(const ExampleOccurence & occurence) {
_occurences.push_back(occurence);
void SimpleSearchResult::addOccurrence(const ExampleOccurrence & occurrence) {
_occurrences.push_back(occurrence);
}
void SimpleSearchResult::offsetPattern(int offset) {

View File

@ -1,7 +1,7 @@
#ifndef SIMPLE_SEARCH_RESULT_HDR
#define SIMPLE_SEARCH_RESULT_HDR
#include "example_occurence.hpp"
#include "example_occurrence.hpp"
#include <string>
#include <vector>
@ -31,16 +31,16 @@ public:
_matchedPatternEnd = newEnd;
}
std::vector<ExampleOccurence> getOccurences() const {
return _occurences;
std::vector<ExampleOccurrence> getOccurrences() const {
return _occurrences;
}
void addOccurence(const ExampleOccurence & occurence);
void addOccurrence(const ExampleOccurrence & occurrence);
void offsetPattern(int offset);
private:
std::vector<ExampleOccurence> _occurences;
std::vector<ExampleOccurrence> _occurrences;
int _matchedPatternStart;

View File

@ -12,7 +12,7 @@
#include "int_array_param.hpp"
#include "int_2d_array_param.hpp"
#include "logger.hpp"
#include "example_occurence.hpp"
#include "example_occurrence.hpp"
#include <libpq-fe.h>
#include <boost/foreach.hpp>
@ -78,13 +78,13 @@ SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment &
return _getResultFromFragment(fragment, ts, true);
}
FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList, const int patternLength) {
FullSearchResult UnitDAO::getFullSearchResult(const OccurrencesList & occurrencesList, const int patternLength) {
FullSearchResult result(occurencesList.getTotalCount());
FullSearchResult result(occurrencesList.getTotalCount());
DBconnection connection;
connection.startTransaction();
BOOST_FOREACH(SubstringOccurence sOccurence, occurencesList.getOccurences()) {
result.addOccurence(_getExampleOccurence(connection, sOccurence, patternLength));
BOOST_FOREACH(SubstringOccurrence sOccurrence, occurrencesList.getOccurrences()) {
result.addOccurrence(_getExampleOccurrence(connection, sOccurrence, patternLength));
}
connection.endTransaction();
@ -117,7 +117,7 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
SimpleSearchResult UnitDAO::_getResultFromFragment(
const MatchedPatternFragment & fragment,
const TokenizedSentence & tokenizedPattern,
bool getOccurences) {
bool getOccurrences) {
DBconnection connection;
connection.startTransaction();
@ -134,9 +134,9 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
if (getOccurences) {
BOOST_FOREACH(SubstringOccurence sOccurence, fragment.getOccurences()) {
ssResult.addOccurence(_getExampleOccurence(connection, sOccurence, fragment.getMatchedLength()));
if (getOccurrences) {
BOOST_FOREACH(SubstringOccurrence sOccurrence, fragment.getOccurrences()) {
ssResult.addOccurrence(_getExampleOccurrence(connection, sOccurrence, fragment.getMatchedLength()));
}
}
@ -145,14 +145,14 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
return ssResult;
}
ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength) {
ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, const SubstringOccurrence sOccurrence, const int matchedLength) {
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer], target_tokens, alignments FROM unit WHERE id = $3::integer;";
std::vector<QueryParam*> params;
params.push_back(new IntParam(2*sOccurence.getOffset()+1));
params.push_back(new IntParam(2*(sOccurence.getOffset()+matchedLength)));
params.push_back(new IntParam(sOccurence.getId()));
params.push_back(new IntParam(2*sOccurrence.getOffset()+1));
params.push_back(new IntParam(2*(sOccurrence.getOffset()+matchedLength)));
params.push_back(new IntParam(sOccurrence.getId()));
PGresult * result = connection.execute(query, params);
ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
ExampleOccurrence occurrence(connection.getIntValue(result,0,0), // example id
connection.getIntValue(result,0,3), // matched example start
connection.getIntValue(result,0,4), // matched example end
connection.getStringValue(result,0,1), // source segment
@ -169,7 +169,7 @@ ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const
std::vector<std::vector<int> > alignments = _get2DArray(alignmentsRaw);
std::set<int> matchedTargetTokens;
for(int sourceTokenIndex = sOccurence.getOffset(); sourceTokenIndex < sOccurence.getOffset()+matchedLength; sourceTokenIndex++) {
for(int sourceTokenIndex = sOccurrence.getOffset(); sourceTokenIndex < sOccurrence.getOffset()+matchedLength; sourceTokenIndex++) {
BOOST_FOREACH(int & targetTokenIndex, alignments.at(sourceTokenIndex)) {
matchedTargetTokens.insert(targetTokenIndex);
}
@ -188,7 +188,7 @@ ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const
if (prevPos < targetPos - 1) { // beginning of detached fragment
// check if there is a fragment to end
if (currStart >= 0) {
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
occurrence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
}
currStart = targetStart;
}
@ -200,10 +200,10 @@ ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const
// check if there are remaining fragments
if (currStart >= 0) {
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
occurrence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
}
return occurence;
return occurrence;
}

View File

@ -6,9 +6,9 @@
#include <concordia/common/config.hpp>
#include <concordia/tokenized_sentence.hpp>
#include <concordia/substring_occurence.hpp>
#include <concordia/substring_occurrence.hpp>
#include <concordia/matched_pattern_fragment.hpp>
#include <concordia/occurences_list.hpp>
#include <concordia/occurrences_list.hpp>
#include <concordia/concordia_search_result.hpp>
#include <concordia/concordia_exception.hpp>
#include <boost/shared_ptr.hpp>
@ -45,7 +45,7 @@ public:
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
FullSearchResult getFullSearchResult(const OccurencesList & occurencesList, const int patternLength);
FullSearchResult getFullSearchResult(const OccurrencesList & occurrencesList, const int patternLength);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
@ -59,11 +59,11 @@ private:
SimpleSearchResult _getResultFromFragment(
const MatchedPatternFragment & fragment,
const TokenizedSentence & tokenizedPattern,
bool getOccurences);
bool getOccurrences);
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
ExampleOccurence _getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength);
ExampleOccurrence _getExampleOccurrence(DBconnection & connection, const SubstringOccurrence sOccurrence, const int matchedLength);
int _addSingleSentence(
DBconnection & connection,

View File

@ -1,7 +1,7 @@
#!/bin/sh
CORPUS_NAME="europarl_sample"
SRC_LANG_ID=2
TRG_LANG_ID=1
CORPUS_NAME="stocznia_plen"
SRC_LANG_ID=1
TRG_LANG_ID=2
./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt