From bbe7a6853d7ec354e909bdaf952869c731fd715d Mon Sep 17 00:00:00 2001 From: Kacper Date: Sat, 4 Jun 2022 14:24:51 +0200 Subject: [PATCH] NLU, DST updates, directory structurization --- DST_DP_lab_9-10/DST.py | 51 - .../__pycache__/dialogue_state.cpython-37.pyc | Bin 1078 -> 0 bytes NLU_lab_7-8/NLU.py | 115 - NLU_lab_7-8/evaluation.txt | 32 - NLU_lab_7-8/test-pl.conllu | 545 ---- __pycache__/dialogue_state.cpython-38.pyc | Bin 0 -> 541 bytes data/NLU_data_intent/train.tsv | 285 +++ .../NLU_data_slots}/dialog-05-02-01.tsv | 0 .../NLU_data_slots}/dialog-05-04-01.tsv | 0 .../NLU_data_slots}/dialog-05-06-01.tsv | 0 .../NLU_data_slots}/dialog-05-08-01.tsv | 0 .../NLU_data_slots}/dialog-05-10-01.tsv | 0 .../NLU_data_slots}/dialog-05-12-01.tsv | 0 .../NLU_data_slots}/dialog-05-14-01.tsv | 0 .../NLU_data_slots}/dialog-05-16-01.tsv | 0 .../NLU_data_slots}/dialog-05-18-01.tsv | 0 .../NLU_data_slots}/dialog-06-03-01.tsv | 0 .../NLU_data_slots}/dialog-06-05-01.tsv | 0 .../NLU_data_slots}/dialog-06-07-01.tsv | 0 .../NLU_data_slots}/dialog-06-09-01.tsv | 0 .../NLU_data_slots}/dialog-06-11-01.tsv | 0 .../NLU_data_slots}/dialog-06-15-01.tsv | 0 .../NLU_data_slots}/dialog-06-17-01.tsv | 0 .../NLU_data_slots}/dialog-06-19-01.tsv | 0 .../NLU_data_slots}/dialog-07-04-01.tsv | 0 .../NLU_data_slots}/dialog-07-04-02.tsv | 0 .../NLU_data_slots}/dialog-07-08-01.tsv | 0 .../NLU_data_slots}/dialog-07-10-01.tsv | 0 .../NLU_data_slots}/dialog-07-12-01.tsv | 0 .../NLU_data_slots}/dialog-07-14-01.tsv | 0 .../NLU_data_slots}/dialog-07-16-01.tsv | 0 .../NLU_data_slots}/dialog-07-18-01.tsv | 0 .../NLU_data_slots}/dialog-07-18-02.tsv | 0 data/create_intent_dataset.py | 21 + .../create_slot_datasets.py | 20 +- .../data_cleaner}/dialog-05-02-01.tsv | 0 .../data_cleaner}/dialog-05-04-01.tsv | 0 .../data_cleaner}/dialog-05-06-01.tsv | 0 .../data_cleaner}/dialog-05-08-01.tsv | 0 .../data_cleaner}/dialog-05-10-01.tsv | 0 .../data_cleaner}/dialog-05-12-01.tsv | 0 .../data_cleaner}/dialog-05-14-01.tsv | 0 .../data_cleaner}/dialog-05-16-01.tsv | 0 .../data_cleaner}/dialog-05-18-01.tsv | 0 .../data_cleaner}/dialog-06-03-01.tsv | 0 .../data_cleaner}/dialog-06-05-01.tsv | 0 .../data_cleaner}/dialog-06-07-01.tsv | 0 .../data_cleaner}/dialog-06-09-01.tsv | 0 .../data_cleaner}/dialog-06-11-01.tsv | 0 .../data_cleaner}/dialog-06-15-01.tsv | 0 .../data_cleaner}/dialog-06-17-01.tsv | 0 .../data_cleaner}/dialog-06-19-01.tsv | 0 .../data_cleaner}/dialog-07-04-01.tsv | 0 .../data_cleaner}/dialog-07-04-02.tsv | 0 .../data_cleaner}/dialog-07-08-01.tsv | 0 .../data_cleaner}/dialog-07-10-01.tsv | 0 .../data_cleaner}/dialog-07-12-01.tsv | 0 .../data_cleaner}/dialog-07-14-01.tsv | 0 .../data_cleaner}/dialog-07-16-01.tsv | 0 .../data_cleaner}/dialog-07-18-01.tsv | 0 .../data_cleaner}/dialog-07-18-02.tsv | 0 {dane => data/data_dirty}/dialog-05-02-01.tsv | 0 {dane => data/data_dirty}/dialog-05-04-01.tsv | 0 .../data_dirty}/dialog-05-06-01.tsv | 0 {dane => data/data_dirty}/dialog-05-08-01.tsv | 0 {dane => data/data_dirty}/dialog-05-10-01.tsv | 0 {dane => data/data_dirty}/dialog-05-12-01.tsv | 0 {dane => data/data_dirty}/dialog-05-14-01.tsv | 0 {dane => data/data_dirty}/dialog-05-16-01.tsv | 0 {dane => data/data_dirty}/dialog-05-18-01.tsv | 0 {dane => data/data_dirty}/dialog-06-03-01.tsv | 0 {dane => data/data_dirty}/dialog-06-05-01.tsv | 0 {dane => data/data_dirty}/dialog-06-07-01.tsv | 0 {dane => data/data_dirty}/dialog-06-09-01.tsv | 0 {dane => data/data_dirty}/dialog-06-11-01.tsv | 0 {dane => data/data_dirty}/dialog-06-15-01.tsv | 0 {dane => data/data_dirty}/dialog-06-17-01.tsv | 0 {dane => data/data_dirty}/dialog-06-19-01.tsv | 0 {dane => data/data_dirty}/dialog-07-04-01.tsv | 0 {dane => data/data_dirty}/dialog-07-04-02.tsv | 0 {dane => data/data_dirty}/dialog-07-08-01.tsv | 0 {dane => data/data_dirty}/dialog-07-10-01.tsv | 0 {dane => data/data_dirty}/dialog-07-12-01.tsv | 0 {dane => data/data_dirty}/dialog-07-14-01.tsv | 0 {dane => data/data_dirty}/dialog-07-16-01.tsv | 0 {dane => data/data_dirty}/dialog-07-18-01.tsv | 0 {dane => data/data_dirty}/dialog-07-18-02.tsv | 0 data/intent_test_and_train_version/test.tsv | 51 + data/intent_test_and_train_version/train.tsv | 234 ++ data/test-pl.conllu | 472 ++++ data/train+test-pl.conllu | 2231 +++++++++++++++++ {NLU_lab_7-8 => data}/train-pl.conllu | 2035 +++++++-------- dialogue_system.py | 58 + {DST_DP_lab_9-10 => modules}/DP.py | 0 modules/DST.py | 71 + {NLG_lab_11 => modules}/NLG.py | 0 modules/NLU.py | 184 ++ {NLU_lab_7-8 => modules}/README.md | 0 modules/__pycache__/DP.cpython-38.pyc | Bin 0 -> 2755 bytes modules/__pycache__/DST.cpython-38.pyc | Bin 0 -> 1785 bytes modules/__pycache__/NLG.cpython-38.pyc | Bin 0 -> 3764 bytes modules/__pycache__/NLU.cpython-38.pyc | Bin 0 -> 5592 bytes .../__pycache__/dialogue_state.cpython-38.pyc | Bin 0 -> 569 bytes .../dialogue_state.py | 9 +- modules/nlu_evaluation.txt | 28 + {DST_DP_lab_9-10 => modules}/value_dict.json | 0 106 files changed, 4713 insertions(+), 1729 deletions(-) delete mode 100644 DST_DP_lab_9-10/DST.py delete mode 100644 DST_DP_lab_9-10/__pycache__/dialogue_state.cpython-37.pyc delete mode 100644 NLU_lab_7-8/NLU.py delete mode 100644 NLU_lab_7-8/evaluation.txt delete mode 100644 NLU_lab_7-8/test-pl.conllu create mode 100644 __pycache__/dialogue_state.cpython-38.pyc create mode 100644 data/NLU_data_intent/train.tsv rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-02-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-04-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-06-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-08-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-10-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-12-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-14-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-16-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-05-18-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-03-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-05-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-07-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-09-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-11-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-15-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-17-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-06-19-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-04-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-04-02.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-08-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-10-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-12-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-14-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-16-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-18-01.tsv (100%) rename {dane_NLU_input => data/NLU_data_slots}/dialog-07-18-02.tsv (100%) create mode 100644 data/create_intent_dataset.py rename NLU_lab_7-8/create_datasets.py => data/create_slot_datasets.py (86%) rename {dane_cleaner => data/data_cleaner}/dialog-05-02-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-05-04-01.tsv (100%) rename {dane => data/data_cleaner}/dialog-05-06-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-05-08-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-05-10-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-05-12-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-05-14-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-05-16-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-05-18-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-03-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-05-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-07-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-09-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-11-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-15-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-17-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-06-19-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-04-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-04-02.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-08-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-10-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-12-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-14-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-16-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-18-01.tsv (100%) rename {dane_cleaner => data/data_cleaner}/dialog-07-18-02.tsv (100%) rename {dane => data/data_dirty}/dialog-05-02-01.tsv (100%) rename {dane => data/data_dirty}/dialog-05-04-01.tsv (100%) rename {dane_cleaner => data/data_dirty}/dialog-05-06-01.tsv (100%) rename {dane => data/data_dirty}/dialog-05-08-01.tsv (100%) rename {dane => data/data_dirty}/dialog-05-10-01.tsv (100%) rename {dane => data/data_dirty}/dialog-05-12-01.tsv (100%) rename {dane => data/data_dirty}/dialog-05-14-01.tsv (100%) rename {dane => data/data_dirty}/dialog-05-16-01.tsv (100%) rename {dane => data/data_dirty}/dialog-05-18-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-03-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-05-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-07-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-09-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-11-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-15-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-17-01.tsv (100%) rename {dane => data/data_dirty}/dialog-06-19-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-04-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-04-02.tsv (100%) rename {dane => data/data_dirty}/dialog-07-08-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-10-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-12-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-14-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-16-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-18-01.tsv (100%) rename {dane => data/data_dirty}/dialog-07-18-02.tsv (100%) create mode 100644 data/intent_test_and_train_version/test.tsv create mode 100644 data/intent_test_and_train_version/train.tsv create mode 100644 data/test-pl.conllu create mode 100644 data/train+test-pl.conllu rename {NLU_lab_7-8 => data}/train-pl.conllu (65%) create mode 100644 dialogue_system.py rename {DST_DP_lab_9-10 => modules}/DP.py (100%) create mode 100644 modules/DST.py rename {NLG_lab_11 => modules}/NLG.py (100%) create mode 100644 modules/NLU.py rename {NLU_lab_7-8 => modules}/README.md (100%) create mode 100644 modules/__pycache__/DP.cpython-38.pyc create mode 100644 modules/__pycache__/DST.cpython-38.pyc create mode 100644 modules/__pycache__/NLG.cpython-38.pyc create mode 100644 modules/__pycache__/NLU.cpython-38.pyc create mode 100644 modules/__pycache__/dialogue_state.cpython-38.pyc rename {DST_DP_lab_9-10 => modules}/dialogue_state.py (78%) create mode 100644 modules/nlu_evaluation.txt rename {DST_DP_lab_9-10 => modules}/value_dict.json (100%) diff --git a/DST_DP_lab_9-10/DST.py b/DST_DP_lab_9-10/DST.py deleted file mode 100644 index 15a8904..0000000 --- a/DST_DP_lab_9-10/DST.py +++ /dev/null @@ -1,51 +0,0 @@ -from dialogue_state import default_state - - -# Monitor stanu dialogu -class DST: - def __init__(self): - self.state = default_state() - - def update(self, user_act=None): - for intent, domain, slot, value in user_act: - domain = domain.lower() - intent = intent.lower() - slot = slot.lower() - - k = slot - - if 'inform' in intent: - - if k is None: - continue - - domain_dic = self.state['belief_state'][domain] - - if k in domain_dic['semi']: - self.state['belief_state'][domain]['semi'][k] = value - elif k in domain_dic['book']: - self.state['belief_state'][domain]['book'][k] = value - - if 'request' in intent: - - if domain not in self.state['request_state']: - self.state['request_state'][domain] = {} - if k not in self.state['request_state'][domain]: - self.state['request_state'][domain][k] = 0 - - self.state['user_action'].append([intent, domain, slot, value]) - - return self.state - - def init_session(self): - self.state = default_state() - - -# Przykładowe uruchomienie dla kodu w izolacji -""" -dst = DST() -print(dst.state) - -dst.update([['hello_inform', 'Cinema', 'Price', '15 zł'], ['Inform', 'Cinema', 'Movie', 'Batman']]) -print(dst.state) -""" diff --git a/DST_DP_lab_9-10/__pycache__/dialogue_state.cpython-37.pyc b/DST_DP_lab_9-10/__pycache__/dialogue_state.cpython-37.pyc deleted file mode 100644 index 2d2fdfe54a3791a85881c5a704a2a026644f2c8a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1078 zcmZuv&2H2%5Oxyhr)jtU2jC5ISQd^*6@pT6tdPpdmz8%t-L;y;!EshBDmSiugS-F_ z#Kk8h9s!91C&o#(KY*tmG)f7s@vocNw=F_`IAybZ6kZ|KJIDkP-WS4% z@GnRf%+b;<5BVIR>GD8-)C2rv@5PcJUMqLEmrY#+$UJu(yM6EAk^zhIF z7d8-EZG49w;-qcxz-#-S*AB|J(Zxsv(z~bkF9^hLkLvR7;afKLKr=-P&bT1l|f ztSiWTOI;j`j)l}Aen>{ULq{m<7#RjNe4WKQLB&$8fwQ>c`bw4yn|7f!FBhQQWUxe* z_~H_bc2kfA-O;(~J!kkgx{(EBl+jD2gx*KY9h#*AFq0{uG+1(+17E>wW48r1$TbDK zX}5YzqroOzYq^5cb>`m{k%jueMLl%P6{L?HJ3^KvHk!*43eKWR<=7+^E|r0trQ8@j zd{?S58*i{k!;+<{D`|MnsNo+a3lSwt?Z!IuEERHY@N}Iv@RjICX#agSS*iji_*oM! z4a}DlA^9MVYcbCGY;tz-etPz9isJOe`1v!3$kn2OehQveYdaEf$(wwb3zx92kP(l1 z{;(6;OHr_~_+A=De)J!Echlk0@Szua1de*EyWPmXcTDvY%z2)9-PzT5k3Ggao17F% NG&#I7s|m2ncXHQKm-qvg9tAm+bAisl7LV` zc1C7T$vGI4N>UMwspB!e2eCnjEj4}_1gtj5=U3oafd!Nc+;SDzclYUG)mS~5xz5#)@d`#<5JeBxQ8st<3j6e zDl*Y_vx#m@&Mb15M@rvH%p%WfuJA6#p5n5~qp6W4`nYaXfyOg8n_?Dv*3R+NsibND zD+_3(EFHyYaoH=F9m;c-S<$O_(Pk<=Zt(XJ{prl70!O^5 zmd|QG+V;soz=H5kzv509{xg=W;Sctm?3+F67-qpDF0NT~yHxYZk?\[\]^_`{|}~]+", '', slot[3]) + slot[3] = re.sub("[!\"#$%&\'()*+,.;:<=>?\[\]^_`{|}~]+$", '', slot[3]) + return out_list + +def main(): + nlu = NLU() + dst = DST() + #dp = DP() + #nlg = NLG() + + nlu.train_slot_model('data/train+test-pl.conllu', 'data/train+test-pl.conllu') + nlu.train_intent_model('data/NLU_data_intent') + # nlu.load_slot_model('slot-model-pl') + # nlu.load_intent_model('intent-model-pl') + + print('===========================================') + print('### By otrzymać pomoc, wpisz /pomoc ###') + print('### By zakończyć rozmowę, wpisz /koniec ###') + print('Witaj, jestem Usher - system do rezerwacji biletów kinowych. W czym mogę Ci pomóc?') + + # WIP + while True: + user_input = input('> ') + if user_input == '/pomoc': + print('TEKST_POMOCY_WIP') + elif user_input == '/koniec': + print('Dziękuję za skorzystanie z moich usług. Miłego dnia!') + break + else: + slots = nlu.predict_slots(user_input) + intent = nlu.predict_intent(user_input) + formatted_prediction = format_prediction(slots, intent) + print(formatted_prediction) # NLU output + dst.update(formatted_prediction) + print(dst.state) # DST output + #DP, NLG... + + +if __name__ == '__main__': + main() diff --git a/DST_DP_lab_9-10/DP.py b/modules/DP.py similarity index 100% rename from DST_DP_lab_9-10/DP.py rename to modules/DP.py diff --git a/modules/DST.py b/modules/DST.py new file mode 100644 index 0000000..038486f --- /dev/null +++ b/modules/DST.py @@ -0,0 +1,71 @@ +from modules.dialogue_state import default_state +import re + + +# Monitor stanu dialogu +class DST: + def __init__(self): + self.state = default_state() + + def txt2num(self, text): + mapping = {'jeden': '1', 'dwa': '2', 'trzy': '3', 'cztery': '4', 'pięć': '5', 'sześć': '6', 'siedem': '7', 'osiem': '8', 'dziewięć': '9', 'dziesięć': '10'} + for key in mapping: + if key in text: + text = text.replace(key, mapping[key]) + return text + + def update(self, user_act=None): + for intent, domain, slot, value in user_act: + domain = domain.lower() + intent = intent.lower() + slot = slot.lower() + + k = slot + + if intent == 'inform': + + if k is None: + continue + + domain_dic = self.state['belief_state'][domain] + + if k in domain_dic['semi']: + if k == 'quantity': + value = self.txt2num(value) + try: + self.state['belief_state'][domain]['semi'][k] += int(re.sub('[^0-9]', '', value)) + except: + pass + elif k == 'tickettype': + self.state['belief_state'][domain]['semi'][k] += ' ' + value + else: + self.state['belief_state'][domain]['semi'][k] = value + elif k in domain_dic['book']: + if k == 'seat' or k == 'row': + self.state['belief_state'][domain]['book'][k] += ' ' + value + else: + self.state['belief_state'][domain]['book'][k] = value + + if intent == 'request': + + if domain not in self.state['request_state']: + self.state['request_state'][domain] = {} + if k not in self.state['request_state'][domain]: + self.state['request_state'][domain][k] = 0 + + self.state['user_action'].append([intent, domain, slot, value]) + + return self.state + + def init_session(self): + self.state = default_state() + + +# Przykładowe uruchomienie dla kodu w izolacji +""" +dst = DST() +print(dst.state) + +dst.update([['hello_inform', 'Cinema', 'Price', '15 zł'], ['Inform', 'Cinema', 'Movie', 'Batman']]) +print(dst.state) +""" diff --git a/NLG_lab_11/NLG.py b/modules/NLG.py similarity index 100% rename from NLG_lab_11/NLG.py rename to modules/NLG.py diff --git a/modules/NLU.py b/modules/NLU.py new file mode 100644 index 0000000..eea9f40 --- /dev/null +++ b/modules/NLU.py @@ -0,0 +1,184 @@ +import re +from conllu import parse_incr +from flair.data import Corpus, Sentence, Token +from flair.datasets import SentenceDataset +from flair.embeddings import StackedEmbeddings +from flair.models import SequenceTagger +from flair.trainers import ModelTrainer +import random +import torch +from flair.datasets import CSVClassificationCorpus +from flair.embeddings import WordEmbeddings, FlairEmbeddings, CharacterEmbeddings, DocumentRNNEmbeddings +from flair.models import TextClassifier +import os + + +class NLU: + def __init__(self): + self.slot_model = None + self.intent_model = None + + def nolabel2o(self, line, i): + return 'O' if line[i] == 'NoLabel' else line[i] + + def conllu2flair(self, sentences, label=None): + fsentences = [] + for sentence in sentences: + fsentence = Sentence() + for token in sentence: + ftoken = Token(token['form']) + if label: + ftoken.add_tag(label, token[label]) + fsentence.add_token(ftoken) + fsentences.append(fsentence) + return SentenceDataset(fsentences) + + def load_slot_model(self, model_path): + try: + self.slot_model = SequenceTagger.load(f'{model_path}/best-model.pt') + except: + self.slot_model = SequenceTagger.load(f'{model_path}/final-model.pt') + + def train_slot_model(self, train_path, test_path): + fields = ['id', 'form', 'frame', 'slot'] + + with open(train_path, encoding='utf-8') as trainfile: + trainset = list(parse_incr(trainfile, fields=fields, field_parsers={'slot': self.nolabel2o})) + with open(test_path, encoding='utf-8') as testfile: + testset = list(parse_incr(testfile, fields=fields, field_parsers={'slot': self.nolabel2o})) + + random.seed(42) + torch.manual_seed(42) + + if torch.cuda.is_available(): + torch.cuda.manual_seed(0) + torch.cuda.manual_seed_all(0) + torch.backends.cudnn.enabled = False + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + corpus = Corpus(train=self.conllu2flair(trainset, 'slot'), test=self.conllu2flair(testset, 'slot')) + tag_dictionary = corpus.make_tag_dictionary(tag_type='slot') + + embedding_types = [ + WordEmbeddings('pl'), + FlairEmbeddings('pl-forward'), + FlairEmbeddings('pl-backward'), + CharacterEmbeddings(), + ] + + embeddings = StackedEmbeddings(embeddings=embedding_types) + tagger = SequenceTagger(hidden_size=512, embeddings=embeddings, + tag_dictionary=tag_dictionary, + tag_type='slot', use_crf=True) + trainer = ModelTrainer(tagger, corpus) + + dirpath = 'slot-model-pl' + + if not os.path.isdir(dirpath): + trainer.train(dirpath, + learning_rate=0.1, + mini_batch_size=32, + max_epochs=20, + train_with_dev=True) + + self.load_slot_model(dirpath) + + # Tworzenie osobnego pliku z metrykami dla modelu + log_file = open('slot-model-pl/training.log', encoding='utf-8') + log_lines = log_file.readlines() + log_file.close() + with open('slot-model-pl/training.log', encoding='utf-8') as log_file, open('nlu_evaluation.txt', 'w', + encoding='utf-8') \ + as eval_file: + for num, line in enumerate(log_file): + if line == 'Results:\n': + lines_to_write_start = num + eval_file.write('*** This evaluation file was generated automatically by the training script ***\n\n') + for line in log_lines[lines_to_write_start:]: + eval_file.write(line) + + def predict_slots(self, sentence): + sentence = sentence.split() + csentence = [{'form': word} for word in sentence] + fsentence = self.conllu2flair([csentence])[0] + self.slot_model.predict(fsentence) + return [(token, ftoken.get_tag('slot').value) for token, ftoken in zip(sentence, fsentence)] + + def load_intent_model(self, model_path): + try: + self.intent_model = TextClassifier.load(f'{model_path}/best-model.pt') + except: + self.intent_model = TextClassifier.load(f'{model_path}/final-model.pt') + + def train_intent_model(self, data_path): + column_name_map = {0: "text", 1: "label_intent"} + corpus = CSVClassificationCorpus(data_path, + column_name_map, + skip_header=False, + delimiter='\t', label_type='label_intent' + ) + label_dict = corpus.make_label_dictionary(label_type='label_intent') + + word_embeddings = [ + WordEmbeddings('pl'), + FlairEmbeddings('polish-forward'), + FlairEmbeddings('polish-backward'), + CharacterEmbeddings(), + ] + + document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=512) + classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, label_type='label_intent') + trainer = ModelTrainer(classifier, corpus) + + dirpath = 'intent-model-pl' + + if not os.path.isdir(dirpath): + trainer.train(dirpath, + learning_rate=0.1, + mini_batch_size=32, + anneal_factor=0.5, + patience=5, + max_epochs=20) + + self.load_intent_model(dirpath) + + def predict_intent(self, sentence): + sentence = Sentence(sentence) + self.intent_model.predict(sentence) + label_text = sentence.labels[0].value + return label_text + + +def format_prediction(prediction, intent): + out_list = [] + for idx, tup in enumerate(prediction): + if tup[1][0] == 'B': + slot_list = [intent, 'Cinema', tup[1][2:], tup[0]] + for tup in prediction[idx + 1:]: + if tup[1][0] != 'I': + break + else: + slot_list[3] += ' ' + tup[0] + out_list.append(slot_list) + for slot in out_list: + slot[3] = re.sub("^[!\"#$%&\'()*+,.;:<=>?\[\]^_`{|}~]+", '', slot[3]) + slot[3] = re.sub("[!\"#$%&\'()*+,.;:<=>?\[\]^_`{|}~]+$", '', slot[3]) + return out_list + + +# Testy +""" +nlu = NLU() +# raz: +nlu.train_slot_model('../data/train+test-pl.conllu', '../data/train+test-pl.conllu') +nlu.train_intent_model('../data/intent_data') +# potem: +# nlu.load_slot_model('slot-model-pl') +# nlu.load_intent_model('intent-model-pl') +sentence = "3 studenckie, miejsca 2-5, rząd 7" +slots = nlu.predict_slots(sentence) +intent = nlu.predict_intent(sentence) +formatted_prediction = format_prediction(slots, intent) +print(formatted_prediction) +""" diff --git a/NLU_lab_7-8/README.md b/modules/README.md similarity index 100% rename from NLU_lab_7-8/README.md rename to modules/README.md diff --git a/modules/__pycache__/DP.cpython-38.pyc b/modules/__pycache__/DP.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9afafe8d5f8a7482d742c025ce937a7a3d0b7c09 GIT binary patch literal 2755 zcmZ`*TW{RP6`mPhmdjnqj;%UYng*(qI9ViK1$`*;Ah>a@r0qktg5(4UVS}J{W@UDH zSq-^%ByfQO@()OWJOz1>9{bwg&|fjHeQJL~fujA+aAiq`Lt+lkT+Yn7eP{U9%1X%a zO#k}h{_pyX{gXPEpB_4&p;h04NG5s4avpI`yvRiEM2_X1%+0;Xv%H)6c_->v-phi# z8+9%3XJH;jA^46AWLJi=C;M_muF7ljx_slOZpx$HADFx$Z@ysirt(gls4uVYvBBEE za5@`s)01kF%(Ggi;~L#SDm5FIvm@~SPMM`+;)3m@PWF>ZfnV84XDUkz_257iB;suE zJu?17jO2-N>!X=6-b|-s6?s*i=*qZ7k}KoL9iOII@D<$q{t1f|2)SEBLt;#MWh@#$?LnN1*c=TJ;`?ViA`t5|T^tf<=yW zq>HaBJ?Z1?$qw8R7_q%G=2(9I5bvNpM5}Ir)a;Zu>_`SlJ{q{%#k>jP zI4#mTj>84DRC3~-u_p)SvGcV;l zQ`P47&Tw|5*C2Kit)dec5BYHq6Xyqw$75+Pz15%RJ9P09gSbm*`OTNyw z*^@hu-G#gG7XG5sIE~l16S2-toyKoE6G3JkzSB5Zx3&lxcIq|(R`EuhxtRH)_N0)` z3)VQN{uy>V;U`@G(6GNS{ey&SiIYC|>%h&OGjSjecnU2jOCcprgH~$MZMx7X++s}^ z&g_j$Z(b=KC*wLTiz@*k4^XS9eF)O9CmRdiaOimJu~>+O(+J6?z78e-nnBC(P)LDY zoq-d%$fzTI9b)}b>0@|or&@9wYfX0xnXbN_>HF`ZTalw( zCYX;mV8_3rNNGj*8E$3eILWH}!Hla2xNy{oXlTJ9PKjBx~*R^;-}(zAS;S!uV-X z1FZU&7}RgmjQ1?b=IT{T!>uf>>T#LR?tg$)RRH3%(3*Pu&T9>QDPUHrqpDW~zrP7q9#-)|UPi8Y!NV7Jd$)r~C6&d3s1)y$KtBiA?YU2SXc{K<%K@fqeA*9~} zuh)tEn#cwbvP$0pF~NBTMZJsNt5;3!EMrE%tjJl6`#j)*SmU=vz;8IW(0^Opz}Rtk zVfmP_eg04>Cl^`BZ6T%&M{EG8w$?PLHS4VLPT3h-XVL{6_%iJ~wE#pMz{<`xfXE4| zOYAki^rk}pqxLcHop9-+H$fg?r35gySn-%5Zy~(92=HmNlZn!i|EFS7>fChZ#k44&7m@$)X^G2m;Au*o$opeit1Jp1)U|G_Uew;ZDhhY8 zT{726QAgcf1z6^Qj=lylO{q8N880iJE3H4KA##}h1&HyWh(6Nq6YpfIFyZd!U&MRg z?8Vy;^hdOe&}|$jJLp+tX{t6Yq{uNX!ZwH_gqhyBEU*}*1ZXCtMg8f1Oyx4Rce_8f zHofX+s?u~^MYv=@(j9)=3HT~s6@8#=m4EDnwzt7UfxGR^pNYX};M46&N5!!TNpPa8Li{f|=RBq+$E9zXH+uEq<}+rP#bqvtf?1m_9F$hjPCS6;+k!?$4ir z|AeeOh!gtd(v-ED?g{-V5i;p@wwhq8-S9r$Hq~8_KDRHt-**HTxWtyU{r=bepWD6u G+y4Wcq>cFi literal 0 HcmV?d00001 diff --git a/modules/__pycache__/DST.cpython-38.pyc b/modules/__pycache__/DST.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a335a4836fa51225578bb8210b27d08d9f10b0b GIT binary patch literal 1785 zcma)7OK%)S5bo}IdUkD06d7zGghYh23ijIJkwjQ13YIt^?ZqHqh#8Hy+sxU84W2#o~0~gUlML{`1ZKC%IqjV@iDKi_;ha-rmm=LeClV4TFE_dmi%fI}f_LLY ziT=YbPvR-NTJqI2-tA@uuToKV!SmX1Wc(;f@eF zhJIw|$A*4l=%HuR36cMaQE?*yUqNN|w@o`-R0D>)v8Zhx$V991raFib9fx_DaE z35>-r&o7>aZkfP|EOZKBvI^^ij}vj&XkISMdI^{8Nij_0zUbJ}2UTM$@kALfi-*G` z-#6AkjB0=zg2#=go~T=SmEod9Lg)(h;dhOWTL|A&-n#-HT1Vv?zMtW;HQgf5=+27= z_JmEWi9K<&bxO_%z{ct?({}x}wLNh)RMvn!qq3)6P6ocbe_(6!2(Dlmpj{>tPrKUV zzyaWYf{s04@`Wa+Ogm?E;*8g4wNX#I4p}D3Que8XR|tC65xq?;vPr%r2Y*n4zwd$j zj^y{jq6ZeuzgywHcHSb**?dVm|J2$f`GY#bIgD_cM}QfX*THtH&h5^XbD-J8_@CBm ztxW>>v}+qDP-uwDJoxdz&*KChQM1+-=(&HdrWz-FVRlcupoJ&iSSF`{JJ_?evu~e4 z+h{O{(p~HsH_7)3nT73LktSlV=@R%Tvn2EnsyJ7P8jal_eq3L@yJJWgsH8s-N{xo% zC4DcncZ*^Wf_kisC5uC>#DhwdDx9A(jYWxlA#7EpkWt)MNs)IvVXU&+H7-=I$a%-FYdZ8V4`a+qfTqifEKYKWHZ7EK9>-}V3>}0)vmf!K|9aKC z)sRCQd=tEtn?MMZI~KjqY!)z|GPc0luns7r>(s4fH`v>BfnKLK*b?Y0*8DZL!o~qU zlg8lx?|~9KkaUpXgMU51cHPWMQI>!aMtIrM7SY+PutR%*A+*0d&O{WMAc~sS2z)z= z;O^2!BC+4q0Lz=meR$O^HeQKuTH;>;UPg(p95AtbFSuFncIu|RIQ_7>@qQx87vDR6t~ztB_Pn;lA|q#jCuGjHDh-tT?P z+ac$7)6-Q0J~!|DWuyI~Vf+&pqn`>aZooJB7zSoAGc=kwTcH_QP0KX6@ukfjW4yH|D4R z4IYfud9$#Yi1Jr!wff5XeSc-m59x+~bz!kl|AdAa_gT$@udr9|-ENtH8-7qmf?M!SuqezN7(V0*aBHv)e>3C+;Qq5%-x3k8cQ~ulAmy#CIt!>47ZSDr@z*24vXCcr@LKEki;C|D zagh4Hi{8+!Vdhi#aQL95l=uKXp8l%XK6eabV6GTD&L2Me*m-0=nFATb^Vc>?rJ|TE9BhJPyEIq@|;3Caz`V7 zsE|JzB8QoQ&#xh(^D2(7A-f-IN#(b5WPjmUCYC&}?ND;qD`1CGX=loRX}?70aJj6c zVl4e3x5It{b|@Y9_v#6MSUHT-f*LyPr&=0T<~5bCC7Jg7GnMb_!+hi6&i?!a@6$ux zr@;G$;(dBp$9c{Bcg_0?#rw-c-r0<0Urzo?~>ZRuFHHdK0u#_C^G=wXj6pFn>PXzW&7eh=tNT5{QC z=t5imL&yBqc6J8Y%2lbh8hD zj3vh@j8VW?$g^vwF&^wm!8+M!oR>#Qb(l4h&4KwUR40#VNb`foJR0I^r0ISrJ_&d> zdlIu5gi%%_x%+DnNLB*`+0Yf~L2Y{hPeU5_$##}XK?G?Fs1|O+(&Dv?4XB9wo6Suw ziav1ADbZy0~X?nr-u_Pj1pQqOdJc z^D96LWq&W)$iQ&|D)7`$o>EDJ7=G?)Kerx`{OUNzh{cfPz91NCuivFHL-wUcN#!t_ z^4wjC14{D7IL}xt4UAkxXK%uqB>R6Ny|LcMcE)aA9&7DO&;l=G`$SaX`>ieAKH#mYymdp`;?1h^r{s)` zdt4^lP?9ls(2f@L=4;1+xGCBbZwu{-L#>cKuY6d(z$Bj@p@ayZSG#%S1*%)QCt5TO zpe$J7@+2WaLV}pw#fDm=Jvb`JQhgaZsClLY__!tctVZJkO+MgZ7>J~B(jemVlexQi z37Q3_rEXj%3YXHsTx&H7aOWNDVXg`8iHYq$2hj3_T>kx$M>iu0j zp+$_H--Uvwu{hWRxy027oC5p1o$_7JDS>{Mc|S=hCYwW^Nsb+tB?phQHZ)8+!V9Qg z08|Mu5I7p5AkI>FV{^{RI>AW?zXhlI|9>6NTN2N#qIz_5nD-?9n})`RczNX0R$6#) z;lPu~3*r>&0c{(Kv~ZP?`iox<)v^!L0RF5E|29o#36fO$6>p0yAo>sKjSDW@08(R$6J}YjaAGk L^Q?LHo@f3SG*b!` literal 0 HcmV?d00001 diff --git a/modules/__pycache__/NLU.cpython-38.pyc b/modules/__pycache__/NLU.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6966565980a3e0cf6de111da6bc1bb372fea9d26 GIT binary patch literal 5592 zcmb_gOOxDI71q_O)f)9-uH;V;LK7_3INkM_wRNsD(v8??o7`+$+-lq0 zR%K>fYCGIfx)qn(E_an~$1`n@drB|Gm3Ea^mCoWCuc`j)ypFqr_OpCe)s@kHj?cB{ z`FwkUFSHl=qAGXM*AibEwLiv>y{d^B;oZ}Ox1;mpq9UsIG=4&`HLY3O$N06?Cd=Jk zAd}FKIxX2Wa{EeG_WDWgtc9I4?6kt%y4JlJc3@Z=7hMg~APG|x&90?E>t-mv+g=Za zh&r1I3ae}3tv)rq7Hn>Y5+&Xbx+09P$sp>e=VMpa-n|kBNfK>DtssrMo#L%cGp}Cn z%Bk-1+ADDo$%Fp$`72w23|eV8UATC)+v>M5;8h|;jNwIZTmq@GX2`8Zf#K{x$4Ojaie@a1`|SKHXC?UJ+5le@{E6^PsV&44;jn!6F31kby*ks+UOZ$H&%IRwfjb}j$yMO zsu4B~Igci}8N&r~7OBEQEK+&7)5RFW_(GQ+6MB!v1{E62q-OQda2!~aaH%EaP&T!p zk+G}Vjn{@|s_ke)D>L!5JhW4m*@B(WcG$3#>GEP$+SM~XvuXHyII@|wYh=r;qF2*E;%p z@Y*%)Gh=m7U0Dy4^t^J{YFdCEU<^yqxk+1@mVMhdqTmay;ZAD@Qb zuvE{?40V~dZA46z?!yCy)^+V!?JezX?L90)rcdnQS+KXiVqcoFH+O*=Y=-ZE?m^iY)E3HJD@b2qFCv&#v zgefZb*|40>i|P(b7er0TI^^PxhFQDmQs#Z=vnx5D`x%*I7-D97yWHqa7pUSFYQ7nmLus?p^!s*d=AEtV85OJbdn_(@h?VjG# zhIQN@*;CFluKRdI3x5vTR|hx%}CIG@dnQ#bWbn0%1U4HrIP zw;p7(yJy71^p>oiEeP!wIJ>Z~!xlB!Jk`MNF9$1speaB-y_)NhkcjD8Zf(e*9RflD zuLG-}Zk&G}i0U9_bt#AedF_oTj77pL>JHCDh?L3xIkc7p-mhGPk(E+J<7NuCr}h8? zKfT)vbG;W2++KVhm~lIhVlV@J9f6V_VCaUEaJf&1uxaHpTagf9$4{a`C|78VRq9Pd zEsBnTyerS6keB+1)GfI&sL;@hlXboq|NX(I^!wvW`wdv_6C5CvR^l*_9n8m$bKMUR6`^Z%65tFT#%oClB?%qH56-&e`T*t`qnD@S`B^ ztI)li-br(Idr*EmO!{$}TyzI-ojZ4~acwI~8V6>LjVKNqw}Ygy8G;31M4}P&({3A` zw1POk+gQKbNVmeq_+5>pC8J*2KwqxgJe!+cKr021B$b3*@+3rV%b+8=ZH{ml3Ia~q z+Tt_qpwkaxpUSw|>WhGTQQ`+51rZQ94tZ_Lz{ktEL)!!721diY)5%L=hb#pzufr|2 z+JU^uDZJ}G!dLy3NmHZd zW!m+k)WE=1jN%Kb&QH6(nt`9BflQC0eZ82^0o>C}l644;d3u$dWw>;gxwt66L4$sZ zd4`QpU^DXzv*hBgGhNgFV;in+qLq54vM+4gH9Uk!hb=JuOIz0wC|x|cxDOU6kQ^Pr zp@2!jpDqtGlLVH9u{+%h#mo{Sxx z?7=Ur)kEx%yqjLLhD*ivO@QnT53j)sBlau(mPrgzVVPP%V3EH3G&gU9J0D$A_Y$3$ zR=3@|yh;60$2L%B@HkDkCqu#*6=0L)OK}%RF}Vz7+(x0@q0y1s6TeZTYd}$>BjBc~ zW8%AD1bb|oZRnGcJ^{0)&P$tNO2;I(2t7g&g72Y1K0=~Ff{?2a6tv~{A+~Xi25l2k zT1hXCQg!YWhsaB#IpoG5>NQPuFyzB%#mlWRxss=;yTiTbl|z#c={+ul27jn30yUWZ z$`T!N2im9q6MvFt(HGXEhMRnp#2ksoNE{jVtl~P;*+2}d!<=Yw!ilEs4h^2BN<2#W z@&hOZEBc*2)QJ;;<(P=iI+&3K4>q=mGnK#?%v2W+n1mt9O1hAyVigwnvPB6ec!-_a z;6?CtFcD)|N=sRZ{C$W_3B1JFF7LV-SWogx@D*YoHOibQYZ43hkuc)~c~noSEL?pN z{5wS=$2~=$k+@JQ)YO%8?yGqGIV(2}K9k&xdj1yhGnB~2!Z>OJEoJT&?I<|o+`NO? z%4LW_wbzBGZ%qPqZS*`2*79jsp~W+F(|Nt0{QmmV zKBWxuS%{`3zeW0U5WEs}Iw4^t656ih4lp&M%vnB9mK8~x#1~a``O2%0#Fr{{s@#X- zi+qX1%Ot){LPeLipf%l5Rw9tZt7yS%t!~_Jcl-`Gk>3t_l8}v$mW<}gYXmI*Bs3&^ zUX1eX=|ZU!|A6M&gL~&J_SV?UoW|39Xg}95xB33BCVqThplS;Y<>mdk1LF z^ncly!i7dDJA zvpA5e&~jVtMIzt8!zi&W0B7uGaMh#pu8zT`804?fbaZ4XVMD4mH;pxg5q$*kC{b5C zIX3QrJmf>IJI(~w57V>s|5YJcHSd;6v#w~?UVyY(hOreta8(lbKOJJ=^=&)Mty zGL@r6q-UViDr4?eu{Bmc)g$%a3@@no*;e~BOB z&sFO16m_7>FgKF^db5O&Y(=02na}NFcXOxPPr;ayG&dq~CpXf5FE1K>2?i|5Kngb!O`);ZzPg&SV>P&6ASi8wb~;PX~%W<1++( zKv*UqD7dV0a{?+}J*WajOw{)bUM~s{U_fzN?iIxEb0bYdz1 literal 0 HcmV?d00001 diff --git a/modules/__pycache__/dialogue_state.cpython-38.pyc b/modules/__pycache__/dialogue_state.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f7aa68b582b231b334f1885d283ac238b6d4a99 GIT binary patch literal 569 zcmYjOy>8nu5I)M3tT@q>RP}E3be?WrRbt1pv8+#SQH(SYM~<8vSu$(zd)WK z@4_`x<_rOnp*`8I;XB;VcZd5Uzh~J9h<<diq0q}UC}g9+RK%@|#r z=Seth9okD#d1ab1bDi_3w=f({u~v9>>Adh5GK<$OI=_MC$fMP&p^OZ}xpLmvu1qS` zpcVnBGxx$R0+)R)pPSYO?ojwJu;ynOYx6_lGu;At9C#x%{z`DHg{sSVX;p3P0 zmG8pHt7?JXcT3Epz^BGgUx!-R{snVJLsaF-#_LTVz#dclZBopQ#$ti85XxgUFQgJ% zckJZ27Slo-+1BX#Ol@W@ZjSq~>_R5-O0@O%-2MyPM^eBzi~jR_GK``fV$mBv*r9j+ f%KxUv_LOQaa<;bXz4p`z