From ac5ed0c0fc21e679150a6392bebba3226c3ed5ae Mon Sep 17 00:00:00 2001 From: s444417 Date: Mon, 23 May 2022 07:53:43 +0200 Subject: [PATCH] add form token --- ...g-semantyczny-uczenie(zmodyfikowany).ipynb | 99 ++++++++++--------- tasks/zad8/pl/test.conllu | 10 +- tasks/zad8/pl/train.conllu | 49 ++++----- 3 files changed, 82 insertions(+), 76 deletions(-) diff --git a/lab/08-parsing-semantyczny-uczenie(zmodyfikowany).ipynb b/lab/08-parsing-semantyczny-uczenie(zmodyfikowany).ipynb index 068fd49..5bdbcf1 100644 --- a/lab/08-parsing-semantyczny-uczenie(zmodyfikowany).ipynb +++ b/lab/08-parsing-semantyczny-uczenie(zmodyfikowany).ipynb @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -172,7 +172,7 @@ "'\\n\\n\\n\\n\\n
1wybieraminformO
2batmana informB-title
'" ] }, - "execution_count": 14, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -184,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -202,7 +202,7 @@ "'\\n\\n\\n\\n\\n\\n
1chcę informO
2zarezerwowaćinformB-goal
3bilety informO
'" ] }, - "execution_count": 15, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -213,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -232,7 +232,7 @@ "'\\n\\n\\n\\n\\n\\n\\n
1chciałbym informO
2anulować informO
3rezerwacjęinformO
4biletu informO
'" ] }, - "execution_count": 16, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -251,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -288,20 +288,20 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Corpus: 345 train + 38 dev + 32 test sentences\n", - "Dictionary with 20 tags: , O, B-interval, I-interval, B-title, B-date, I-date, B-time, B-quantity, B-area, I-area, B-goal, I-goal, I-title, I-time, I-quantity, B-seats, I-seats, , \n" + "Corpus: 346 train + 38 dev + 32 test sentences\n", + "Dictionary with 78 tags: , O, O/reqmore, B-interval/reqmore, I-interval/reqmore, O/inform, B-title/inform, B-date/inform, I-date/inform, B-time/inform, B-quantity/inform, B-area/inform, I-area/inform, B-goal/inform, O/bye, O/hello, O/reqmore inform, B-goal/reqmore inform, I-goal/reqmore inform, B-date/reqmore inform, B-interval/reqmore inform, O/null, O/help, B-goal/reqmore, I-goal/reqmore, B-title/reqmore, B-title/reqmore inform, I-title/reqmore inform, O/ack, O/reqalts\n" ] } ], "source": [ - "def conllu2flair(sentences, label=None):\n", + "def conllu2flair(sentences, label1=None, label2=None):\n", " fsentences = []\n", "\n", " for sentence in sentences:\n", @@ -310,16 +310,19 @@ " for token in sentence:\n", " ftoken = Token(token['form'])\n", "\n", - " if label:\n", - " ftoken.add_tag(label, token[label])\n", - "\n", + " if label1:\n", + " if label2:\n", + " ftoken.add_tag(label1, token[label1] + \"/\" + token[label2])\n", + " else:\n", + " ftoken.add_tag(label1, token[label1])\n", + " \n", " fsentence.add_token(ftoken)\n", "\n", " fsentences.append(fsentence)\n", "\n", " return SentenceDataset(fsentences)\n", "\n", - "corpus = Corpus(train=conllu2flair(trainset, 'slot'), test=conllu2flair(testset, 'slot'))\n", + "corpus = Corpus(train=conllu2flair(trainset, 'slot', \"frame\"), test=conllu2flair(testset, 'slot', \"frame\"))\n", "print(corpus)\n", "tag_dictionary = corpus.make_tag_dictionary(tag_type='slot')\n", "print(tag_dictionary)" @@ -334,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -361,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -396,7 +399,7 @@ " (locked_dropout): LockedDropout(p=0.5)\n", " (embedding2nn): Linear(in_features=4446, out_features=4446, bias=True)\n", " (rnn): LSTM(4446, 256, batch_first=True, bidirectional=True)\n", - " (linear): Linear(in_features=512, out_features=20, bias=True)\n", + " (linear): Linear(in_features=512, out_features=78, bias=True)\n", " (beta): 1.0\n", " (weights): None\n", " (weight_tensor) None\n", @@ -417,7 +420,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -479,14 +482,14 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2022-05-05 17:34:34,767 loading file slot-model/final-model.pt\n" + "2022-05-22 15:25:19,970 loading file slot-model/final-model.pt\n" ] } ], @@ -504,16 +507,16 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[('kiedy', 'O'), ('gracie', 'O'), ('film', 'O'), ('zorro', 'B-title')]" + "[('co', 'O/reqmore'), ('gracie', 'O/reqmore'), ('obecnie', 'O/reqmore')]" ] }, - "execution_count": 42, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -525,7 +528,7 @@ " model.predict(fsentence)\n", " return [(token, ftoken.get_tag('slot').value) for token, ftoken in zip(sentence, fsentence)]\n", "\n", - "predict(model, 'kiedy gracie film zorro'.split())" + "predict(model, 'co gracie obecnie'.split())" ] }, { @@ -538,7 +541,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -546,18 +549,18 @@ "text/html": [ "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", "
kiedy O
gracieO
film O
zorro B-title
kiedy O/reqmore
gracieO/reqmore
film O/reqmore
zorro O/reqmore
" ], "text/plain": [ - "'\\n\\n\\n\\n\\n\\n\\n
kiedy O
gracieO
film O
zorro B-title
'" + "'\\n\\n\\n\\n\\n\\n\\n
kiedy O/reqmore
gracieO/reqmore
film O/reqmore
zorro O/reqmore
'" ] }, - "execution_count": 24, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -568,17 +571,19 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 2, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "stats: \n", - "precision: 0.8076923076923077\n", - "recall: 0.4117647058823529\n", - "f1: 0.5454545454545454\n" + "ename": "NameError", + "evalue": "name 'testset' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Develop\\wmi\\AITECH\\sem1\\Systemy dialogowe\\lab\\08-parsing-semantyczny-uczenie(zmodyfikowany).ipynb Cell 25'\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mrecall: \u001b[39m\u001b[39m\"\u001b[39m, recallScore)\n\u001b[0;32m 38\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mf1: \u001b[39m\u001b[39m\"\u001b[39m, f1Score)\n\u001b[1;32m---> 40\u001b[0m \u001b[39meval\u001b[39;49m()\n", + "\u001b[1;32mc:\\Develop\\wmi\\AITECH\\sem1\\Systemy dialogowe\\lab\\08-parsing-semantyczny-uczenie(zmodyfikowany).ipynb Cell 25'\u001b[0m in \u001b[0;36meval\u001b[1;34m()\u001b[0m\n\u001b[0;32m 14\u001b[0m fp \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n\u001b[0;32m 15\u001b[0m fn \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n\u001b[1;32m---> 16\u001b[0m sentences \u001b[39m=\u001b[39m [sentence \u001b[39mfor\u001b[39;00m sentence \u001b[39min\u001b[39;00m testset]\n\u001b[0;32m 17\u001b[0m \u001b[39mfor\u001b[39;00m sentence \u001b[39min\u001b[39;00m sentences:\n\u001b[0;32m 18\u001b[0m \u001b[39m# get sentence as terms list\u001b[39;00m\n\u001b[0;32m 19\u001b[0m termsList \u001b[39m=\u001b[39m [w[\u001b[39m\"\u001b[39m\u001b[39mform\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mfor\u001b[39;00m w \u001b[39min\u001b[39;00m sentence]\n", + "\u001b[1;31mNameError\u001b[0m: name 'testset' is not defined" ] } ], @@ -605,13 +610,13 @@ " # predict tags\n", " predTags = [tag[1] for tag in predict(model, termsList)]\n", " \n", - " expTags = [token[\"slot\"] for token in sentence]\n", + " expTags = [token[\"slot\"] + \"/\" + token[\"frame\"] for token in sentence]\n", " for i in range(len(predTags)):\n", - " if (expTags[i] == \"O\" and expTags[i] != predTags[i]):\n", + " if (expTags[i][0] == \"O\" and expTags[i] != predTags[i]):\n", " fp += 1\n", - " elif ((expTags[i] != \"O\") & (predTags[i] == \"O\")):\n", + " elif ((expTags[i][0] != \"O\") & (predTags[i][0] == \"O\")):\n", " fn += 1\n", - " elif ((expTags[i] != \"O\") & (predTags[i] == expTags[i])):\n", + " elif ((expTags[i][0] != \"O\") & (predTags[i] == expTags[i])):\n", " tp += 1\n", "\n", " precisionScore = precision(tp, fp)\n", @@ -645,7 +650,7 @@ "author": "Marek Kubis", "email": "mkubis@amu.edu.pl", "interpreter": { - "hash": "2be5faf79681da6f2a61fdfdd5405d65d042280f7fba6178067603e3a2925119" + "hash": "2f9d6cf1e3d8195079a65c851de355134a77367bcd714b1a5d498c42d3c07114" }, "jupytext": { "cell_metadata_filter": "-all", @@ -653,7 +658,7 @@ "notebook_metadata_filter": "-all" }, "kernelspec": { - "display_name": "Python 3.10.4 64-bit", + "display_name": "Python 3.8.3 64-bit", "language": "python", "name": "python3" }, diff --git a/tasks/zad8/pl/test.conllu b/tasks/zad8/pl/test.conllu index 4674a20..5f2286c 100644 --- a/tasks/zad8/pl/test.conllu +++ b/tasks/zad8/pl/test.conllu @@ -19,9 +19,9 @@ # text: o której godzinie w piątek mogę zobaczyć na noże # intent: reqmore inform # slots: októrejgodzinie:goal,wpiątek:date,nanoże:title -1 o reqmore inform B-goal -2 której reqmore inform I-goal -3 godzinie reqmore inform I-goal +1 o reqmore inform B-time +2 której reqmore inform I-time +3 godzinie reqmore inform I-time 4 w reqmore inform NoLabel 5 piątek reqmore inform B-date 6 mogę reqmore inform NoLabel @@ -237,11 +237,11 @@ # slots: 1:quantity 1 1 inform B-quantity -# text: jakie sš dostępne ulgi +# text: jakie są dostępne ulgi # intent: nan # slots: 1 jakie NoLabel -2 sš NoLabel +2 są NoLabel 3 dostępne NoLabel 4 ulgi NoLabel diff --git a/tasks/zad8/pl/train.conllu b/tasks/zad8/pl/train.conllu index 5f8e9ad..29391c2 100644 --- a/tasks/zad8/pl/train.conllu +++ b/tasks/zad8/pl/train.conllu @@ -561,12 +561,12 @@ 2 potrafisz help NoLabel # text: jakie są najbliższe seanse -# intent: inform +# intent: reqmore # slots: -1 jakie inform B-goal -2 są inform I-goal -3 najbliższe inform I-goal -4 seanse inform I-goal +1 jakie reqmore NoLabel +2 są reqmore NoLabel +3 najbliższe reqmore B-interval +4 seanse reqmore NoLabel # text: a jakie są dostępne # intent: reqmore @@ -729,7 +729,7 @@ 5 grają reqmore NoLabel 6 jakieś reqmore NoLabel 7 stare reqmore NoLabel -8 filmy reqmore NoLabel +8 filmy reqmore B-title # text: to dziękuję # intent: thankyou @@ -808,14 +808,14 @@ 3 film inform NoLabel # text: jakie filmy są teraz w kinach -# intent: request +# intent: reqmore # slots: jakiefilmysą:goal -1 jakie request B-goal -2 filmy request I-goal -3 są request I-goal -4 teraz request NoLabel -5 w request NoLabel -6 kinach request NoLabel +1 jakie reqmore B-goal +2 filmy reqmore I-goal +3 są reqmore I-goal +4 teraz reqmore B-interval +5 w reqmore NoLabel +6 kinach reqmore NoLabel # text: poproszę 3x bilet na ambulans # intent: inform @@ -1824,13 +1824,13 @@ # slots: 1 witam hello NoLabel -# text: jakie sš najbliższe seanse +# text: jakie są najbliższe seanse # intent: reqmore # slots: najbliższeseanse:goal 1 jakie reqmore NoLabel -2 sš reqmore NoLabel -3 najbliższe reqmore B-goal -4 seanse reqmore I-goal +2 są reqmore NoLabel +3 najbliższe reqmore B-interval +4 seanse reqmore B-goal # text: w jakim to kinie # intent: reqmore @@ -2525,7 +2525,7 @@ 1 niech ack NoLabel 2 będzie ack NoLabel -# text: jakieś potwierdzenie rezerwacji dostanę czy mogę po prostu wejść i usišść +# text: jakieś potwierdzenie rezerwacji dostanę czy mogę po prostu wejść i usiąść # intent: request # slots: 1 jakieś request NoLabel @@ -2538,7 +2538,7 @@ 8 prostu request NoLabel 9 wejść request NoLabel 10 i request NoLabel -11 usišść request NoLabel +11 usiąść request NoLabel # text: wybieram batmana o 16:20 # intent: inform @@ -2853,14 +2853,15 @@ 2 filmy inform B-goal 3 są inform NoLabel 4 wieczorem inform B-interval + # text: co gracie za trzy dni # intent: inform # slots: zatrzydni:date -1 co inform NoLabel -2 gracie inform NoLabel -3 za inform B-date -4 trzy inform I-date -5 dni inform I-date +1 co reqmore NoLabel +2 gracie reqmore NoLabel +3 za reqmore B-date +4 trzy reqmore I-date +5 dni reqmore I-date # text: co gracie za dwa dni # intent: inform