commit e1b933bf4d64d283c5269b9430a73ae781fd7e33 Author: Jakub Kaczmarek Date: Thu Feb 16 18:21:17 2023 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e79af7f --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +data +out/t5 +out/gpt2 +out/roberta +.cache_training \ No newline at end of file diff --git a/__pycache__/gpt2.cpython-310.pyc b/__pycache__/gpt2.cpython-310.pyc new file mode 100644 index 0000000..83547e4 Binary files /dev/null and b/__pycache__/gpt2.cpython-310.pyc differ diff --git a/__pycache__/gpt2.cpython-39.pyc b/__pycache__/gpt2.cpython-39.pyc new file mode 100644 index 0000000..466f07e Binary files /dev/null and b/__pycache__/gpt2.cpython-39.pyc differ diff --git a/__pycache__/roberta.cpython-310.pyc b/__pycache__/roberta.cpython-310.pyc new file mode 100644 index 0000000..e4ff9b8 Binary files /dev/null and b/__pycache__/roberta.cpython-310.pyc differ diff --git a/__pycache__/roberta.cpython-39.pyc b/__pycache__/roberta.cpython-39.pyc new file mode 100644 index 0000000..87fd2f0 Binary files /dev/null and b/__pycache__/roberta.cpython-39.pyc differ diff --git a/__pycache__/t5.cpython-310.pyc b/__pycache__/t5.cpython-310.pyc new file mode 100644 index 0000000..a38ca25 Binary files /dev/null and b/__pycache__/t5.cpython-310.pyc differ diff --git a/__pycache__/t5.cpython-39.pyc b/__pycache__/t5.cpython-39.pyc new file mode 100644 index 0000000..1cce867 Binary files /dev/null and b/__pycache__/t5.cpython-39.pyc differ diff --git a/bart.py b/bart.py new file mode 100644 index 0000000..809e36d --- /dev/null +++ b/bart.py @@ -0,0 +1,10 @@ +from transformers import BartConfig, BartForSequenceClassification, BartModel +from torch import nn + +class BartForClassification(BartForSequenceClassification): + def __init__(self, config: BartConfig): + self.config = config + self.bart = BartForSequenceClassification(config) + self.bart.out_proj = nn.Linear(768, 4) + + diff --git a/gpt2.py b/gpt2.py new file mode 100644 index 0000000..342f5d9 --- /dev/null +++ b/gpt2.py @@ -0,0 +1,154 @@ +import torch +from torch import nn +from transformers import GPT2PreTrainedModel, GPT2Model +from transformers.modeling_outputs import SequenceClassifierOutputWithPast + +class GPT2ForSequenceClassification(GPT2PreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.transformer = GPT2Model(config) + self.score = nn.Linear(config.n_embd, self.num_labels, bias=False) + + # Model parallel + self.model_parallel = False + self.device_map = None + + # Initialize weights and apply final processing + self.post_init() + + +class GPT2ClassificationHeadCustom(nn.Module): + def __init__(self, config): + super().__init__() + hidden_size = config.n_embd + self.dense_1_input = nn.Linear(hidden_size, 2 * hidden_size) + self.dense_1_hidden = nn.Linear(hidden_size, 2 * hidden_size) + self.dense_2 = nn.Linear(4 * hidden_size, 4 * hidden_size) + self.dense_3 = nn.Linear(4 * hidden_size, 4 * hidden_size) + self.dense_4 = nn.Linear(4 * hidden_size, hidden_size) + self.dropout = nn.Dropout(config.resid_pdrop) + self.out_proj = nn.Linear(hidden_size, config.num_labels, bias=False) + + def forward(self, x, **kwargs): + if 'hidden_states' in kwargs and kwargs['hidden_states'] is not None: + # Get hidden states from last layer + hidden = kwargs['hidden_states'][-1] + else: + hidden = torch.zeros(x.size(), dtype=x.dtype, device=x.device) + + x = self.dense_1_input(x) + x = torch.relu(x) + x = self.dropout(x) + + hidden = self.dense_1_hidden(hidden) + hidden = torch.relu(hidden) + hidden = self.dropout(hidden) + + x = torch.cat((x, hidden), dim=2) + x = self.dense_2(x) + x = torch.relu(x) + x = self.dense_3(x) + x = torch.relu(x) + x = self.dense_4(x) + x = torch.relu(x) + x = self.dropout(x) + + x = self.out_proj(x) + return x + +class GPT2ForSequenceClassificationCustom(GPT2ForSequenceClassification): + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.transformer = GPT2Model(config) + + self.score = GPT2ClassificationHeadCustom(config) + + self.init_weights() + + # Model parallel + self.model_parallel = False + self.device_map = None + + def forward( + self, + input_ids=None, + past_key_values=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + labels=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + ): + r""" + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): + Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ..., + config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss), + If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy). + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + transformer_outputs = self.transformer( + input_ids, + past_key_values=past_key_values, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + hidden_states = transformer_outputs[0] + if return_dict: + logits = self.score(hidden_states, hidden_states=transformer_outputs.hidden_states) + else: + raise NotImplemented('Not implemented for using non-dictionary object') + + if input_ids is not None: + batch_size, sequence_length = input_ids.shape[:2] + else: + batch_size, sequence_length = inputs_embeds.shape[:2] + + assert ( + self.config.pad_token_id is not None or batch_size == 1 + ), "Cannot handle batch sizes > 1 if no padding token is defined." + if self.config.pad_token_id is None: + sequence_lengths = -1 + else: + if input_ids is not None: + sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1 + else: + sequence_lengths = -1 + + pooled_logits = logits[range(batch_size), sequence_lengths] + + loss = None + if labels is not None: + if self.num_labels == 1: + # We are doing regression + loss_fct = nn.MSELoss() + loss = loss_fct(pooled_logits.view(-1), labels.to(self.dtype).view(-1)) + else: + loss_fct = nn.CrossEntropyLoss() + loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1)) + + if not return_dict: + output = (pooled_logits,) + transformer_outputs[1:] + return ((loss,) + output) if loss is not None else output + + return SequenceClassifierOutputWithPast( + loss=loss, + logits=pooled_logits, + past_key_values=transformer_outputs.past_key_values, + hidden_states=transformer_outputs.hidden_states, + attentions=transformer_outputs.attentions, + ) diff --git a/out/gpt2_results/README.md b/out/gpt2_results/README.md new file mode 100644 index 0000000..856e902 --- /dev/null +++ b/out/gpt2_results/README.md @@ -0,0 +1,53 @@ +--- +tags: +- generated_from_trainer +model-index: +- name: gpt2_results + results: [] +--- + + + +# gpt2_results + +This model is a fine-tuned version of [out/gpt2](https://huggingface.co/out/gpt2) on an unknown dataset. +It achieves the following results on the evaluation set: +- eval_loss: 0.3020 +- eval_accuracy: 0.9195 +- eval_runtime: 24.1139 +- eval_samples_per_second: 82.94 +- eval_steps_per_second: 10.367 +- step: 0 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- training_steps: 2500 + +### Framework versions + +- Transformers 4.26.1 +- Pytorch 1.13.1+cu117 +- Datasets 2.9.0 +- Tokenizers 0.13.2 diff --git a/out/gpt2_results/all_results.json b/out/gpt2_results/all_results.json new file mode 100644 index 0000000..0a68305 --- /dev/null +++ b/out/gpt2_results/all_results.json @@ -0,0 +1,8 @@ +{ + "eval_accuracy": 0.9194999933242798, + "eval_loss": 0.3020096719264984, + "eval_runtime": 24.1139, + "eval_samples": 2000, + "eval_samples_per_second": 82.94, + "eval_steps_per_second": 10.367 +} \ No newline at end of file diff --git a/out/gpt2_results/eval_results.json b/out/gpt2_results/eval_results.json new file mode 100644 index 0000000..0a68305 --- /dev/null +++ b/out/gpt2_results/eval_results.json @@ -0,0 +1,8 @@ +{ + "eval_accuracy": 0.9194999933242798, + "eval_loss": 0.3020096719264984, + "eval_runtime": 24.1139, + "eval_samples": 2000, + "eval_samples_per_second": 82.94, + "eval_steps_per_second": 10.367 +} \ No newline at end of file diff --git a/out/gpt2_results/predict_results_None.txt b/out/gpt2_results/predict_results_None.txt new file mode 100644 index 0000000..c1f964f --- /dev/null +++ b/out/gpt2_results/predict_results_None.txt @@ -0,0 +1,3801 @@ +index prediction +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +10 0 +11 1 +12 2 +13 0 +14 0 +15 0 +16 0 +17 0 +18 0 +19 3 +20 0 +21 0 +22 0 +23 0 +24 3 +25 0 +26 0 +27 2 +28 0 +29 0 +30 0 +31 0 +32 0 +33 0 +34 0 +35 0 +36 0 +37 0 +38 0 +39 0 +40 0 +41 0 +42 0 +43 3 +44 0 +45 0 +46 0 +47 0 +48 0 +49 2 +50 0 +51 0 +52 0 +53 1 +54 1 +55 0 +56 0 +57 0 +58 0 +59 0 +60 0 +61 0 +62 0 +63 0 +64 0 +65 0 +66 0 +67 0 +68 0 +69 0 +70 0 +71 0 +72 0 +73 0 +74 0 +75 0 +76 0 +77 0 +78 0 +79 0 +80 0 +81 0 +82 0 +83 0 +84 0 +85 0 +86 0 +87 0 +88 0 +89 1 +90 0 +91 0 +92 0 +93 0 +94 0 +95 0 +96 0 +97 0 +98 0 +99 0 +100 0 +101 0 +102 0 +103 0 +104 1 +105 0 +106 3 +107 0 +108 0 +109 0 +110 0 +111 0 +112 0 +113 0 +114 1 +115 0 +116 0 +117 0 +118 0 +119 0 +120 0 +121 0 +122 0 +123 2 +124 0 +125 0 +126 0 +127 0 +128 0 +129 0 +130 0 +131 0 +132 0 +133 0 +134 0 +135 0 +136 0 +137 0 +138 1 +139 0 +140 0 +141 0 +142 0 +143 0 +144 0 +145 0 +146 0 +147 0 +148 3 +149 0 +150 0 +151 0 +152 1 +153 0 +154 0 +155 0 +156 0 +157 2 +158 0 +159 0 +160 0 +161 0 +162 0 +163 0 +164 0 +165 0 +166 0 +167 0 +168 0 +169 0 +170 0 +171 0 +172 0 +173 0 +174 0 +175 0 +176 0 +177 0 +178 2 +179 0 +180 0 +181 0 +182 0 +183 0 +184 0 +185 0 +186 0 +187 0 +188 0 +189 0 +190 0 +191 0 +192 0 +193 0 +194 0 +195 0 +196 0 +197 0 +198 0 +199 0 +200 0 +201 0 +202 0 +203 0 +204 0 +205 0 +206 0 +207 0 +208 0 +209 3 +210 0 +211 0 +212 0 +213 0 +214 0 +215 0 +216 0 +217 0 +218 2 +219 0 +220 0 +221 0 +222 0 +223 0 +224 0 +225 1 +226 0 +227 0 +228 0 +229 2 +230 0 +231 0 +232 0 +233 0 +234 0 +235 0 +236 0 +237 0 +238 0 +239 0 +240 0 +241 0 +242 0 +243 0 +244 0 +245 0 +246 0 +247 0 +248 0 +249 0 +250 2 +251 0 +252 0 +253 0 +254 0 +255 0 +256 0 +257 0 +258 1 +259 0 +260 0 +261 3 +262 0 +263 0 +264 0 +265 0 +266 0 +267 0 +268 3 +269 0 +270 0 +271 0 +272 0 +273 0 +274 3 +275 3 +276 0 +277 1 +278 0 +279 0 +280 0 +281 0 +282 0 +283 0 +284 0 +285 0 +286 0 +287 0 +288 0 +289 0 +290 0 +291 0 +292 0 +293 0 +294 0 +295 0 +296 0 +297 0 +298 0 +299 0 +300 0 +301 0 +302 0 +303 2 +304 0 +305 0 +306 0 +307 0 +308 0 +309 0 +310 0 +311 0 +312 0 +313 0 +314 0 +315 0 +316 0 +317 0 +318 0 +319 0 +320 0 +321 0 +322 0 +323 0 +324 0 +325 0 +326 0 +327 0 +328 0 +329 0 +330 0 +331 0 +332 0 +333 0 +334 0 +335 2 +336 0 +337 0 +338 0 +339 0 +340 0 +341 0 +342 0 +343 0 +344 0 +345 0 +346 0 +347 0 +348 0 +349 0 +350 0 +351 0 +352 0 +353 0 +354 0 +355 3 +356 0 +357 0 +358 0 +359 2 +360 0 +361 1 +362 2 +363 0 +364 0 +365 0 +366 0 +367 0 +368 0 +369 0 +370 0 +371 0 +372 0 +373 0 +374 0 +375 0 +376 0 +377 0 +378 0 +379 0 +380 0 +381 0 +382 0 +383 0 +384 0 +385 0 +386 0 +387 0 +388 0 +389 0 +390 0 +391 0 +392 0 +393 0 +394 0 +395 0 +396 0 +397 0 +398 0 +399 0 +400 0 +401 0 +402 0 +403 0 +404 0 +405 0 +406 0 +407 0 +408 0 +409 0 +410 2 +411 0 +412 0 +413 0 +414 0 +415 0 +416 0 +417 0 +418 2 +419 0 +420 3 +421 0 +422 0 +423 0 +424 0 +425 0 +426 0 +427 0 +428 0 +429 0 +430 0 +431 0 +432 0 +433 0 +434 0 +435 0 +436 0 +437 0 +438 0 +439 0 +440 0 +441 0 +442 0 +443 0 +444 0 +445 0 +446 0 +447 0 +448 0 +449 0 +450 0 +451 0 +452 0 +453 0 +454 0 +455 0 +456 0 +457 0 +458 2 +459 0 +460 0 +461 0 +462 2 +463 0 +464 0 +465 0 +466 0 +467 0 +468 0 +469 0 +470 0 +471 0 +472 0 +473 0 +474 0 +475 0 +476 0 +477 0 +478 0 +479 0 +480 0 +481 0 +482 0 +483 0 +484 0 +485 2 +486 0 +487 0 +488 0 +489 0 +490 0 +491 0 +492 0 +493 0 +494 0 +495 0 +496 0 +497 0 +498 0 +499 2 +500 0 +501 0 +502 0 +503 0 +504 0 +505 0 +506 0 +507 0 +508 0 +509 3 +510 0 +511 0 +512 0 +513 0 +514 0 +515 0 +516 0 +517 0 +518 0 +519 3 +520 0 +521 0 +522 0 +523 0 +524 0 +525 0 +526 0 +527 0 +528 0 +529 2 +530 0 +531 2 +532 0 +533 0 +534 0 +535 1 +536 0 +537 0 +538 0 +539 0 +540 0 +541 2 +542 0 +543 0 +544 0 +545 0 +546 0 +547 3 +548 0 +549 0 +550 1 +551 0 +552 0 +553 0 +554 2 +555 0 +556 0 +557 0 +558 0 +559 0 +560 0 +561 3 +562 0 +563 0 +564 0 +565 0 +566 0 +567 0 +568 0 +569 0 +570 0 +571 0 +572 0 +573 0 +574 0 +575 0 +576 0 +577 0 +578 2 +579 0 +580 0 +581 0 +582 1 +583 1 +584 0 +585 0 +586 0 +587 2 +588 0 +589 0 +590 0 +591 0 +592 0 +593 3 +594 0 +595 0 +596 0 +597 0 +598 0 +599 1 +600 0 +601 0 +602 0 +603 0 +604 0 +605 0 +606 0 +607 0 +608 0 +609 0 +610 0 +611 0 +612 0 +613 0 +614 2 +615 0 +616 0 +617 0 +618 0 +619 0 +620 2 +621 0 +622 0 +623 0 +624 0 +625 0 +626 3 +627 0 +628 0 +629 0 +630 0 +631 2 +632 0 +633 0 +634 0 +635 0 +636 0 +637 0 +638 0 +639 0 +640 3 +641 0 +642 0 +643 0 +644 0 +645 0 +646 0 +647 0 +648 0 +649 0 +650 0 +651 0 +652 0 +653 0 +654 0 +655 0 +656 0 +657 0 +658 0 +659 0 +660 0 +661 0 +662 0 +663 0 +664 0 +665 0 +666 0 +667 0 +668 0 +669 0 +670 0 +671 0 +672 0 +673 0 +674 0 +675 0 +676 0 +677 0 +678 0 +679 0 +680 0 +681 0 +682 0 +683 0 +684 0 +685 0 +686 0 +687 0 +688 0 +689 0 +690 2 +691 3 +692 0 +693 3 +694 0 +695 0 +696 0 +697 0 +698 0 +699 0 +700 0 +701 0 +702 0 +703 0 +704 0 +705 0 +706 3 +707 0 +708 0 +709 0 +710 3 +711 0 +712 0 +713 0 +714 0 +715 0 +716 0 +717 0 +718 0 +719 0 +720 0 +721 0 +722 0 +723 0 +724 2 +725 0 +726 0 +727 0 +728 0 +729 0 +730 2 +731 0 +732 3 +733 0 +734 0 +735 0 +736 0 +737 0 +738 0 +739 3 +740 0 +741 0 +742 3 +743 0 +744 2 +745 0 +746 0 +747 0 +748 0 +749 3 +750 2 +751 0 +752 0 +753 0 +754 0 +755 0 +756 2 +757 0 +758 0 +759 0 +760 0 +761 0 +762 0 +763 0 +764 0 +765 0 +766 0 +767 0 +768 0 +769 0 +770 0 +771 0 +772 0 +773 0 +774 0 +775 0 +776 0 +777 0 +778 0 +779 0 +780 0 +781 0 +782 0 +783 0 +784 0 +785 2 +786 3 +787 0 +788 0 +789 0 +790 0 +791 0 +792 0 +793 0 +794 0 +795 0 +796 0 +797 0 +798 2 +799 0 +800 2 +801 0 +802 0 +803 2 +804 3 +805 0 +806 0 +807 3 +808 0 +809 0 +810 0 +811 0 +812 0 +813 0 +814 0 +815 0 +816 0 +817 0 +818 0 +819 0 +820 0 +821 2 +822 0 +823 2 +824 0 +825 1 +826 0 +827 0 +828 0 +829 0 +830 0 +831 0 +832 0 +833 0 +834 3 +835 0 +836 0 +837 0 +838 0 +839 0 +840 0 +841 0 +842 0 +843 0 +844 3 +845 0 +846 0 +847 0 +848 0 +849 0 +850 0 +851 0 +852 0 +853 0 +854 0 +855 0 +856 0 +857 0 +858 0 +859 0 +860 0 +861 0 +862 0 +863 0 +864 1 +865 3 +866 0 +867 0 +868 0 +869 0 +870 0 +871 0 +872 0 +873 0 +874 0 +875 0 +876 0 +877 0 +878 0 +879 0 +880 0 +881 0 +882 0 +883 0 +884 0 +885 0 +886 0 +887 0 +888 0 +889 0 +890 2 +891 0 +892 0 +893 0 +894 0 +895 0 +896 0 +897 0 +898 0 +899 0 +900 0 +901 0 +902 0 +903 0 +904 0 +905 0 +906 0 +907 0 +908 0 +909 0 +910 0 +911 0 +912 0 +913 0 +914 1 +915 0 +916 0 +917 0 +918 0 +919 0 +920 0 +921 0 +922 0 +923 0 +924 3 +925 0 +926 0 +927 0 +928 0 +929 0 +930 0 +931 0 +932 0 +933 0 +934 0 +935 0 +936 3 +937 0 +938 0 +939 0 +940 0 +941 0 +942 0 +943 0 +944 0 +945 2 +946 0 +947 0 +948 0 +949 0 +950 1 +951 1 +952 1 +953 1 +954 1 +955 1 +956 1 +957 1 +958 1 +959 1 +960 1 +961 1 +962 1 +963 1 +964 1 +965 1 +966 1 +967 1 +968 1 +969 1 +970 1 +971 1 +972 1 +973 1 +974 1 +975 1 +976 1 +977 1 +978 1 +979 1 +980 1 +981 1 +982 1 +983 1 +984 1 +985 1 +986 1 +987 1 +988 1 +989 1 +990 1 +991 1 +992 1 +993 1 +994 1 +995 1 +996 1 +997 1 +998 1 +999 1 +1000 1 +1001 1 +1002 1 +1003 1 +1004 1 +1005 1 +1006 1 +1007 1 +1008 1 +1009 1 +1010 1 +1011 1 +1012 1 +1013 1 +1014 1 +1015 1 +1016 1 +1017 1 +1018 1 +1019 1 +1020 1 +1021 1 +1022 1 +1023 1 +1024 1 +1025 1 +1026 1 +1027 1 +1028 1 +1029 1 +1030 1 +1031 1 +1032 1 +1033 1 +1034 1 +1035 1 +1036 1 +1037 1 +1038 1 +1039 1 +1040 1 +1041 1 +1042 1 +1043 1 +1044 1 +1045 1 +1046 1 +1047 1 +1048 1 +1049 1 +1050 1 +1051 1 +1052 1 +1053 1 +1054 1 +1055 1 +1056 1 +1057 1 +1058 1 +1059 1 +1060 1 +1061 1 +1062 1 +1063 1 +1064 1 +1065 1 +1066 1 +1067 1 +1068 1 +1069 1 +1070 2 +1071 1 +1072 1 +1073 1 +1074 1 +1075 1 +1076 1 +1077 1 +1078 1 +1079 1 +1080 1 +1081 1 +1082 1 +1083 1 +1084 1 +1085 1 +1086 1 +1087 1 +1088 1 +1089 1 +1090 1 +1091 1 +1092 1 +1093 1 +1094 1 +1095 1 +1096 1 +1097 1 +1098 1 +1099 1 +1100 1 +1101 1 +1102 1 +1103 1 +1104 1 +1105 1 +1106 1 +1107 1 +1108 1 +1109 1 +1110 1 +1111 1 +1112 1 +1113 1 +1114 1 +1115 1 +1116 1 +1117 1 +1118 1 +1119 1 +1120 1 +1121 1 +1122 1 +1123 1 +1124 1 +1125 1 +1126 1 +1127 1 +1128 1 +1129 1 +1130 1 +1131 1 +1132 1 +1133 1 +1134 1 +1135 1 +1136 1 +1137 1 +1138 1 +1139 1 +1140 1 +1141 1 +1142 1 +1143 1 +1144 1 +1145 1 +1146 1 +1147 1 +1148 1 +1149 1 +1150 1 +1151 1 +1152 1 +1153 1 +1154 0 +1155 1 +1156 1 +1157 1 +1158 1 +1159 1 +1160 1 +1161 1 +1162 1 +1163 1 +1164 1 +1165 1 +1166 0 +1167 1 +1168 1 +1169 1 +1170 1 +1171 1 +1172 1 +1173 1 +1174 1 +1175 1 +1176 1 +1177 1 +1178 1 +1179 1 +1180 1 +1181 1 +1182 1 +1183 1 +1184 1 +1185 1 +1186 1 +1187 1 +1188 1 +1189 1 +1190 1 +1191 1 +1192 1 +1193 1 +1194 1 +1195 1 +1196 1 +1197 1 +1198 1 +1199 1 +1200 1 +1201 1 +1202 1 +1203 1 +1204 1 +1205 1 +1206 1 +1207 1 +1208 1 +1209 1 +1210 1 +1211 1 +1212 1 +1213 1 +1214 1 +1215 1 +1216 1 +1217 1 +1218 1 +1219 1 +1220 1 +1221 1 +1222 1 +1223 2 +1224 1 +1225 0 +1226 1 +1227 1 +1228 1 +1229 1 +1230 1 +1231 1 +1232 1 +1233 1 +1234 1 +1235 1 +1236 1 +1237 1 +1238 1 +1239 1 +1240 1 +1241 1 +1242 1 +1243 1 +1244 1 +1245 1 +1246 1 +1247 1 +1248 1 +1249 1 +1250 1 +1251 1 +1252 1 +1253 1 +1254 1 +1255 1 +1256 1 +1257 1 +1258 1 +1259 1 +1260 1 +1261 1 +1262 1 +1263 1 +1264 1 +1265 1 +1266 1 +1267 1 +1268 1 +1269 1 +1270 1 +1271 1 +1272 1 +1273 1 +1274 1 +1275 1 +1276 1 +1277 1 +1278 0 +1279 1 +1280 1 +1281 1 +1282 1 +1283 1 +1284 1 +1285 1 +1286 1 +1287 1 +1288 1 +1289 1 +1290 1 +1291 1 +1292 1 +1293 1 +1294 1 +1295 1 +1296 1 +1297 1 +1298 1 +1299 1 +1300 1 +1301 1 +1302 1 +1303 1 +1304 1 +1305 1 +1306 1 +1307 1 +1308 1 +1309 1 +1310 1 +1311 1 +1312 1 +1313 1 +1314 1 +1315 1 +1316 1 +1317 1 +1318 1 +1319 1 +1320 1 +1321 1 +1322 1 +1323 1 +1324 1 +1325 1 +1326 1 +1327 1 +1328 1 +1329 1 +1330 1 +1331 1 +1332 1 +1333 1 +1334 1 +1335 1 +1336 1 +1337 1 +1338 1 +1339 1 +1340 1 +1341 1 +1342 1 +1343 1 +1344 1 +1345 1 +1346 1 +1347 1 +1348 1 +1349 1 +1350 1 +1351 1 +1352 1 +1353 1 +1354 1 +1355 1 +1356 1 +1357 1 +1358 1 +1359 1 +1360 1 +1361 1 +1362 1 +1363 1 +1364 1 +1365 1 +1366 1 +1367 1 +1368 1 +1369 1 +1370 1 +1371 1 +1372 1 +1373 1 +1374 1 +1375 1 +1376 1 +1377 1 +1378 1 +1379 1 +1380 1 +1381 1 +1382 1 +1383 1 +1384 1 +1385 1 +1386 1 +1387 1 +1388 1 +1389 1 +1390 1 +1391 1 +1392 1 +1393 1 +1394 1 +1395 1 +1396 1 +1397 1 +1398 1 +1399 1 +1400 1 +1401 1 +1402 1 +1403 1 +1404 1 +1405 1 +1406 1 +1407 1 +1408 1 +1409 1 +1410 1 +1411 1 +1412 1 +1413 1 +1414 1 +1415 1 +1416 1 +1417 1 +1418 1 +1419 1 +1420 1 +1421 1 +1422 1 +1423 1 +1424 2 +1425 1 +1426 1 +1427 1 +1428 1 +1429 1 +1430 1 +1431 1 +1432 1 +1433 1 +1434 1 +1435 1 +1436 1 +1437 1 +1438 1 +1439 1 +1440 1 +1441 1 +1442 1 +1443 1 +1444 1 +1445 1 +1446 1 +1447 1 +1448 1 +1449 1 +1450 1 +1451 1 +1452 1 +1453 1 +1454 1 +1455 1 +1456 1 +1457 1 +1458 1 +1459 1 +1460 1 +1461 1 +1462 1 +1463 1 +1464 2 +1465 1 +1466 1 +1467 1 +1468 1 +1469 1 +1470 1 +1471 1 +1472 1 +1473 1 +1474 1 +1475 1 +1476 1 +1477 1 +1478 1 +1479 1 +1480 1 +1481 1 +1482 1 +1483 1 +1484 1 +1485 1 +1486 1 +1487 1 +1488 1 +1489 1 +1490 1 +1491 1 +1492 1 +1493 1 +1494 1 +1495 1 +1496 1 +1497 1 +1498 1 +1499 1 +1500 1 +1501 1 +1502 1 +1503 1 +1504 1 +1505 1 +1506 1 +1507 1 +1508 1 +1509 1 +1510 1 +1511 1 +1512 1 +1513 1 +1514 1 +1515 1 +1516 1 +1517 1 +1518 1 +1519 1 +1520 1 +1521 1 +1522 1 +1523 1 +1524 1 +1525 1 +1526 1 +1527 1 +1528 1 +1529 1 +1530 1 +1531 1 +1532 1 +1533 1 +1534 1 +1535 1 +1536 1 +1537 1 +1538 1 +1539 1 +1540 1 +1541 1 +1542 1 +1543 1 +1544 1 +1545 1 +1546 1 +1547 1 +1548 1 +1549 1 +1550 1 +1551 1 +1552 1 +1553 1 +1554 1 +1555 1 +1556 1 +1557 1 +1558 1 +1559 1 +1560 1 +1561 1 +1562 1 +1563 1 +1564 1 +1565 1 +1566 1 +1567 1 +1568 1 +1569 3 +1570 1 +1571 1 +1572 1 +1573 1 +1574 1 +1575 1 +1576 1 +1577 1 +1578 1 +1579 1 +1580 1 +1581 1 +1582 1 +1583 1 +1584 1 +1585 1 +1586 1 +1587 1 +1588 1 +1589 1 +1590 1 +1591 1 +1592 0 +1593 1 +1594 1 +1595 1 +1596 1 +1597 1 +1598 1 +1599 1 +1600 1 +1601 1 +1602 1 +1603 1 +1604 1 +1605 1 +1606 1 +1607 1 +1608 1 +1609 1 +1610 1 +1611 1 +1612 1 +1613 1 +1614 1 +1615 1 +1616 1 +1617 1 +1618 1 +1619 1 +1620 1 +1621 1 +1622 1 +1623 1 +1624 1 +1625 1 +1626 1 +1627 1 +1628 1 +1629 1 +1630 1 +1631 1 +1632 1 +1633 1 +1634 1 +1635 1 +1636 1 +1637 1 +1638 1 +1639 1 +1640 1 +1641 1 +1642 1 +1643 1 +1644 1 +1645 1 +1646 1 +1647 1 +1648 1 +1649 1 +1650 1 +1651 1 +1652 1 +1653 1 +1654 1 +1655 1 +1656 1 +1657 1 +1658 1 +1659 1 +1660 1 +1661 1 +1662 1 +1663 1 +1664 1 +1665 1 +1666 1 +1667 1 +1668 1 +1669 1 +1670 1 +1671 1 +1672 1 +1673 1 +1674 1 +1675 1 +1676 1 +1677 1 +1678 1 +1679 1 +1680 1 +1681 3 +1682 1 +1683 1 +1684 1 +1685 1 +1686 1 +1687 1 +1688 1 +1689 1 +1690 1 +1691 1 +1692 1 +1693 1 +1694 1 +1695 1 +1696 1 +1697 1 +1698 1 +1699 1 +1700 1 +1701 1 +1702 1 +1703 1 +1704 1 +1705 1 +1706 1 +1707 1 +1708 1 +1709 1 +1710 1 +1711 1 +1712 1 +1713 1 +1714 1 +1715 1 +1716 1 +1717 1 +1718 1 +1719 1 +1720 1 +1721 1 +1722 1 +1723 1 +1724 1 +1725 1 +1726 1 +1727 1 +1728 1 +1729 1 +1730 1 +1731 1 +1732 1 +1733 1 +1734 1 +1735 1 +1736 1 +1737 1 +1738 1 +1739 1 +1740 1 +1741 1 +1742 1 +1743 1 +1744 1 +1745 1 +1746 1 +1747 1 +1748 1 +1749 1 +1750 1 +1751 1 +1752 1 +1753 1 +1754 0 +1755 1 +1756 1 +1757 1 +1758 1 +1759 1 +1760 1 +1761 1 +1762 1 +1763 1 +1764 1 +1765 1 +1766 1 +1767 1 +1768 1 +1769 1 +1770 1 +1771 1 +1772 1 +1773 1 +1774 1 +1775 1 +1776 1 +1777 1 +1778 1 +1779 1 +1780 1 +1781 1 +1782 1 +1783 1 +1784 1 +1785 1 +1786 1 +1787 1 +1788 1 +1789 1 +1790 1 +1791 1 +1792 1 +1793 1 +1794 1 +1795 1 +1796 1 +1797 1 +1798 1 +1799 1 +1800 1 +1801 1 +1802 1 +1803 1 +1804 1 +1805 1 +1806 1 +1807 1 +1808 1 +1809 1 +1810 1 +1811 1 +1812 1 +1813 1 +1814 1 +1815 1 +1816 1 +1817 1 +1818 1 +1819 1 +1820 1 +1821 1 +1822 1 +1823 1 +1824 1 +1825 1 +1826 1 +1827 1 +1828 1 +1829 1 +1830 1 +1831 1 +1832 1 +1833 1 +1834 3 +1835 1 +1836 1 +1837 1 +1838 1 +1839 1 +1840 1 +1841 1 +1842 1 +1843 1 +1844 1 +1845 1 +1846 1 +1847 1 +1848 1 +1849 1 +1850 1 +1851 1 +1852 1 +1853 1 +1854 1 +1855 1 +1856 1 +1857 1 +1858 1 +1859 1 +1860 1 +1861 1 +1862 1 +1863 1 +1864 1 +1865 1 +1866 1 +1867 1 +1868 1 +1869 1 +1870 1 +1871 1 +1872 1 +1873 1 +1874 1 +1875 1 +1876 1 +1877 1 +1878 1 +1879 1 +1880 1 +1881 1 +1882 1 +1883 1 +1884 1 +1885 1 +1886 1 +1887 1 +1888 1 +1889 1 +1890 1 +1891 1 +1892 1 +1893 1 +1894 1 +1895 1 +1896 1 +1897 1 +1898 1 +1899 1 +1900 2 +1901 2 +1902 2 +1903 2 +1904 2 +1905 2 +1906 2 +1907 2 +1908 2 +1909 2 +1910 2 +1911 2 +1912 2 +1913 2 +1914 2 +1915 2 +1916 2 +1917 2 +1918 2 +1919 2 +1920 2 +1921 2 +1922 2 +1923 2 +1924 2 +1925 2 +1926 2 +1927 2 +1928 2 +1929 2 +1930 2 +1931 2 +1932 2 +1933 2 +1934 2 +1935 2 +1936 2 +1937 2 +1938 2 +1939 2 +1940 3 +1941 2 +1942 2 +1943 2 +1944 2 +1945 2 +1946 2 +1947 2 +1948 2 +1949 2 +1950 2 +1951 2 +1952 2 +1953 2 +1954 2 +1955 2 +1956 2 +1957 2 +1958 2 +1959 2 +1960 2 +1961 2 +1962 2 +1963 2 +1964 2 +1965 2 +1966 2 +1967 2 +1968 2 +1969 2 +1970 2 +1971 2 +1972 2 +1973 2 +1974 2 +1975 2 +1976 2 +1977 0 +1978 3 +1979 2 +1980 2 +1981 2 +1982 2 +1983 2 +1984 2 +1985 2 +1986 2 +1987 2 +1988 2 +1989 2 +1990 2 +1991 2 +1992 2 +1993 2 +1994 2 +1995 2 +1996 2 +1997 2 +1998 2 +1999 3 +2000 2 +2001 2 +2002 2 +2003 2 +2004 2 +2005 2 +2006 2 +2007 2 +2008 2 +2009 2 +2010 2 +2011 2 +2012 2 +2013 2 +2014 2 +2015 2 +2016 2 +2017 2 +2018 2 +2019 2 +2020 2 +2021 2 +2022 2 +2023 2 +2024 2 +2025 2 +2026 2 +2027 2 +2028 2 +2029 2 +2030 3 +2031 2 +2032 2 +2033 2 +2034 2 +2035 2 +2036 2 +2037 2 +2038 2 +2039 2 +2040 2 +2041 2 +2042 2 +2043 2 +2044 2 +2045 2 +2046 2 +2047 2 +2048 3 +2049 2 +2050 2 +2051 3 +2052 2 +2053 2 +2054 2 +2055 2 +2056 3 +2057 2 +2058 2 +2059 2 +2060 2 +2061 2 +2062 2 +2063 2 +2064 2 +2065 2 +2066 2 +2067 2 +2068 2 +2069 2 +2070 2 +2071 2 +2072 2 +2073 3 +2074 2 +2075 2 +2076 2 +2077 2 +2078 2 +2079 3 +2080 3 +2081 2 +2082 2 +2083 2 +2084 2 +2085 2 +2086 2 +2087 2 +2088 2 +2089 2 +2090 2 +2091 2 +2092 2 +2093 2 +2094 2 +2095 2 +2096 2 +2097 2 +2098 2 +2099 2 +2100 3 +2101 1 +2102 3 +2103 2 +2104 2 +2105 2 +2106 2 +2107 2 +2108 2 +2109 1 +2110 2 +2111 2 +2112 2 +2113 2 +2114 2 +2115 2 +2116 2 +2117 2 +2118 2 +2119 3 +2120 2 +2121 2 +2122 2 +2123 2 +2124 2 +2125 2 +2126 1 +2127 2 +2128 2 +2129 1 +2130 2 +2131 2 +2132 2 +2133 2 +2134 2 +2135 2 +2136 2 +2137 2 +2138 2 +2139 2 +2140 2 +2141 2 +2142 2 +2143 2 +2144 2 +2145 2 +2146 2 +2147 2 +2148 2 +2149 2 +2150 2 +2151 2 +2152 2 +2153 2 +2154 2 +2155 2 +2156 2 +2157 3 +2158 2 +2159 2 +2160 3 +2161 2 +2162 2 +2163 2 +2164 2 +2165 2 +2166 2 +2167 0 +2168 2 +2169 2 +2170 2 +2171 3 +2172 2 +2173 2 +2174 2 +2175 2 +2176 2 +2177 0 +2178 2 +2179 2 +2180 3 +2181 2 +2182 2 +2183 2 +2184 2 +2185 2 +2186 2 +2187 2 +2188 2 +2189 2 +2190 2 +2191 2 +2192 2 +2193 2 +2194 2 +2195 2 +2196 2 +2197 3 +2198 2 +2199 2 +2200 2 +2201 2 +2202 2 +2203 2 +2204 2 +2205 2 +2206 2 +2207 2 +2208 2 +2209 2 +2210 2 +2211 2 +2212 2 +2213 2 +2214 2 +2215 2 +2216 0 +2217 2 +2218 2 +2219 2 +2220 2 +2221 2 +2222 0 +2223 2 +2224 2 +2225 2 +2226 2 +2227 2 +2228 2 +2229 2 +2230 2 +2231 2 +2232 2 +2233 3 +2234 3 +2235 2 +2236 0 +2237 2 +2238 2 +2239 2 +2240 2 +2241 2 +2242 2 +2243 2 +2244 2 +2245 2 +2246 2 +2247 2 +2248 3 +2249 2 +2250 3 +2251 2 +2252 2 +2253 2 +2254 2 +2255 1 +2256 2 +2257 2 +2258 2 +2259 2 +2260 2 +2261 2 +2262 2 +2263 2 +2264 2 +2265 2 +2266 2 +2267 3 +2268 2 +2269 2 +2270 2 +2271 2 +2272 2 +2273 2 +2274 2 +2275 2 +2276 2 +2277 2 +2278 3 +2279 2 +2280 3 +2281 2 +2282 2 +2283 2 +2284 2 +2285 3 +2286 2 +2287 2 +2288 2 +2289 2 +2290 2 +2291 3 +2292 2 +2293 2 +2294 2 +2295 2 +2296 2 +2297 2 +2298 2 +2299 2 +2300 2 +2301 2 +2302 2 +2303 2 +2304 2 +2305 2 +2306 2 +2307 2 +2308 2 +2309 2 +2310 3 +2311 2 +2312 2 +2313 2 +2314 2 +2315 2 +2316 2 +2317 3 +2318 3 +2319 2 +2320 2 +2321 2 +2322 2 +2323 3 +2324 2 +2325 2 +2326 2 +2327 2 +2328 2 +2329 3 +2330 0 +2331 2 +2332 2 +2333 2 +2334 2 +2335 2 +2336 2 +2337 3 +2338 2 +2339 2 +2340 2 +2341 0 +2342 2 +2343 3 +2344 2 +2345 2 +2346 2 +2347 2 +2348 2 +2349 3 +2350 2 +2351 2 +2352 2 +2353 2 +2354 3 +2355 2 +2356 2 +2357 2 +2358 2 +2359 3 +2360 2 +2361 2 +2362 2 +2363 2 +2364 2 +2365 2 +2366 2 +2367 3 +2368 2 +2369 2 +2370 3 +2371 2 +2372 2 +2373 2 +2374 2 +2375 2 +2376 2 +2377 2 +2378 2 +2379 2 +2380 3 +2381 2 +2382 3 +2383 2 +2384 2 +2385 2 +2386 3 +2387 2 +2388 2 +2389 0 +2390 3 +2391 2 +2392 2 +2393 2 +2394 2 +2395 2 +2396 2 +2397 2 +2398 2 +2399 2 +2400 3 +2401 0 +2402 2 +2403 2 +2404 2 +2405 2 +2406 2 +2407 2 +2408 3 +2409 2 +2410 2 +2411 2 +2412 2 +2413 2 +2414 2 +2415 2 +2416 2 +2417 2 +2418 2 +2419 2 +2420 2 +2421 3 +2422 2 +2423 0 +2424 2 +2425 2 +2426 2 +2427 2 +2428 3 +2429 2 +2430 2 +2431 2 +2432 2 +2433 3 +2434 3 +2435 2 +2436 3 +2437 2 +2438 2 +2439 2 +2440 2 +2441 2 +2442 2 +2443 2 +2444 0 +2445 2 +2446 2 +2447 2 +2448 2 +2449 2 +2450 2 +2451 3 +2452 2 +2453 2 +2454 3 +2455 2 +2456 3 +2457 2 +2458 2 +2459 2 +2460 2 +2461 2 +2462 2 +2463 2 +2464 2 +2465 2 +2466 3 +2467 3 +2468 2 +2469 2 +2470 2 +2471 2 +2472 2 +2473 2 +2474 2 +2475 2 +2476 1 +2477 2 +2478 2 +2479 2 +2480 2 +2481 2 +2482 3 +2483 2 +2484 2 +2485 2 +2486 3 +2487 2 +2488 2 +2489 2 +2490 2 +2491 2 +2492 2 +2493 2 +2494 2 +2495 2 +2496 2 +2497 2 +2498 2 +2499 2 +2500 2 +2501 1 +2502 2 +2503 2 +2504 2 +2505 3 +2506 2 +2507 2 +2508 2 +2509 2 +2510 2 +2511 2 +2512 2 +2513 2 +2514 2 +2515 2 +2516 2 +2517 2 +2518 3 +2519 2 +2520 3 +2521 2 +2522 3 +2523 2 +2524 2 +2525 2 +2526 2 +2527 2 +2528 3 +2529 2 +2530 2 +2531 2 +2532 2 +2533 2 +2534 2 +2535 2 +2536 2 +2537 0 +2538 2 +2539 2 +2540 1 +2541 2 +2542 2 +2543 2 +2544 2 +2545 2 +2546 2 +2547 2 +2548 2 +2549 2 +2550 2 +2551 2 +2552 2 +2553 2 +2554 2 +2555 2 +2556 2 +2557 2 +2558 2 +2559 2 +2560 2 +2561 2 +2562 2 +2563 2 +2564 2 +2565 2 +2566 2 +2567 2 +2568 2 +2569 2 +2570 3 +2571 2 +2572 2 +2573 2 +2574 3 +2575 2 +2576 3 +2577 3 +2578 2 +2579 2 +2580 2 +2581 2 +2582 2 +2583 3 +2584 2 +2585 2 +2586 2 +2587 2 +2588 2 +2589 2 +2590 2 +2591 2 +2592 2 +2593 2 +2594 2 +2595 2 +2596 2 +2597 2 +2598 2 +2599 2 +2600 2 +2601 2 +2602 2 +2603 2 +2604 2 +2605 2 +2606 3 +2607 2 +2608 2 +2609 2 +2610 2 +2611 2 +2612 2 +2613 2 +2614 2 +2615 2 +2616 2 +2617 2 +2618 3 +2619 3 +2620 2 +2621 2 +2622 2 +2623 2 +2624 2 +2625 2 +2626 2 +2627 2 +2628 3 +2629 2 +2630 3 +2631 2 +2632 2 +2633 2 +2634 2 +2635 2 +2636 2 +2637 2 +2638 2 +2639 2 +2640 2 +2641 2 +2642 2 +2643 2 +2644 2 +2645 2 +2646 2 +2647 2 +2648 2 +2649 2 +2650 2 +2651 2 +2652 3 +2653 2 +2654 3 +2655 2 +2656 2 +2657 2 +2658 2 +2659 2 +2660 2 +2661 3 +2662 2 +2663 3 +2664 2 +2665 2 +2666 3 +2667 2 +2668 2 +2669 2 +2670 3 +2671 2 +2672 2 +2673 3 +2674 2 +2675 2 +2676 2 +2677 2 +2678 2 +2679 2 +2680 2 +2681 2 +2682 2 +2683 2 +2684 2 +2685 2 +2686 2 +2687 2 +2688 3 +2689 2 +2690 2 +2691 2 +2692 2 +2693 2 +2694 2 +2695 3 +2696 2 +2697 0 +2698 3 +2699 2 +2700 2 +2701 2 +2702 1 +2703 2 +2704 2 +2705 2 +2706 2 +2707 2 +2708 2 +2709 2 +2710 3 +2711 2 +2712 2 +2713 2 +2714 2 +2715 2 +2716 2 +2717 2 +2718 3 +2719 2 +2720 2 +2721 2 +2722 2 +2723 2 +2724 2 +2725 2 +2726 2 +2727 2 +2728 3 +2729 2 +2730 2 +2731 2 +2732 2 +2733 2 +2734 2 +2735 2 +2736 2 +2737 2 +2738 3 +2739 2 +2740 2 +2741 2 +2742 2 +2743 2 +2744 3 +2745 3 +2746 2 +2747 2 +2748 2 +2749 2 +2750 2 +2751 0 +2752 2 +2753 2 +2754 2 +2755 2 +2756 3 +2757 2 +2758 2 +2759 2 +2760 2 +2761 2 +2762 2 +2763 2 +2764 2 +2765 2 +2766 2 +2767 2 +2768 3 +2769 2 +2770 2 +2771 2 +2772 2 +2773 2 +2774 2 +2775 2 +2776 2 +2777 2 +2778 2 +2779 2 +2780 3 +2781 2 +2782 2 +2783 2 +2784 2 +2785 2 +2786 0 +2787 2 +2788 2 +2789 2 +2790 2 +2791 3 +2792 3 +2793 2 +2794 3 +2795 2 +2796 0 +2797 2 +2798 2 +2799 2 +2800 2 +2801 2 +2802 2 +2803 2 +2804 2 +2805 2 +2806 0 +2807 2 +2808 2 +2809 2 +2810 2 +2811 2 +2812 2 +2813 2 +2814 3 +2815 2 +2816 2 +2817 3 +2818 3 +2819 2 +2820 2 +2821 3 +2822 2 +2823 2 +2824 2 +2825 2 +2826 2 +2827 2 +2828 2 +2829 2 +2830 2 +2831 2 +2832 2 +2833 2 +2834 2 +2835 2 +2836 2 +2837 2 +2838 2 +2839 2 +2840 2 +2841 2 +2842 2 +2843 2 +2844 2 +2845 2 +2846 2 +2847 2 +2848 2 +2849 2 +2850 2 +2851 3 +2852 3 +2853 2 +2854 3 +2855 0 +2856 3 +2857 3 +2858 3 +2859 3 +2860 3 +2861 3 +2862 3 +2863 3 +2864 3 +2865 3 +2866 3 +2867 3 +2868 3 +2869 3 +2870 3 +2871 2 +2872 3 +2873 3 +2874 3 +2875 3 +2876 3 +2877 3 +2878 3 +2879 3 +2880 3 +2881 2 +2882 3 +2883 3 +2884 3 +2885 3 +2886 3 +2887 2 +2888 3 +2889 1 +2890 3 +2891 3 +2892 3 +2893 3 +2894 3 +2895 3 +2896 3 +2897 3 +2898 3 +2899 3 +2900 3 +2901 3 +2902 0 +2903 3 +2904 3 +2905 3 +2906 3 +2907 3 +2908 3 +2909 3 +2910 3 +2911 2 +2912 3 +2913 3 +2914 3 +2915 3 +2916 2 +2917 3 +2918 3 +2919 3 +2920 3 +2921 3 +2922 3 +2923 3 +2924 3 +2925 3 +2926 3 +2927 3 +2928 3 +2929 2 +2930 3 +2931 3 +2932 3 +2933 3 +2934 3 +2935 3 +2936 3 +2937 3 +2938 3 +2939 3 +2940 3 +2941 3 +2942 3 +2943 2 +2944 3 +2945 3 +2946 3 +2947 3 +2948 3 +2949 3 +2950 3 +2951 3 +2952 3 +2953 3 +2954 3 +2955 3 +2956 3 +2957 3 +2958 3 +2959 3 +2960 3 +2961 2 +2962 3 +2963 3 +2964 3 +2965 3 +2966 3 +2967 3 +2968 3 +2969 3 +2970 3 +2971 3 +2972 0 +2973 3 +2974 3 +2975 3 +2976 3 +2977 3 +2978 3 +2979 3 +2980 3 +2981 3 +2982 3 +2983 3 +2984 3 +2985 3 +2986 3 +2987 3 +2988 3 +2989 3 +2990 3 +2991 3 +2992 2 +2993 3 +2994 3 +2995 3 +2996 3 +2997 3 +2998 3 +2999 3 +3000 3 +3001 3 +3002 2 +3003 3 +3004 2 +3005 3 +3006 3 +3007 3 +3008 3 +3009 3 +3010 3 +3011 3 +3012 3 +3013 3 +3014 3 +3015 3 +3016 3 +3017 3 +3018 3 +3019 3 +3020 3 +3021 3 +3022 3 +3023 3 +3024 3 +3025 3 +3026 3 +3027 3 +3028 2 +3029 2 +3030 3 +3031 3 +3032 3 +3033 3 +3034 2 +3035 0 +3036 3 +3037 3 +3038 2 +3039 3 +3040 3 +3041 3 +3042 3 +3043 3 +3044 3 +3045 3 +3046 3 +3047 3 +3048 3 +3049 3 +3050 3 +3051 3 +3052 3 +3053 3 +3054 3 +3055 3 +3056 3 +3057 3 +3058 3 +3059 3 +3060 3 +3061 3 +3062 3 +3063 3 +3064 3 +3065 3 +3066 3 +3067 3 +3068 3 +3069 3 +3070 3 +3071 3 +3072 3 +3073 3 +3074 3 +3075 3 +3076 3 +3077 2 +3078 3 +3079 3 +3080 3 +3081 3 +3082 3 +3083 3 +3084 3 +3085 3 +3086 3 +3087 3 +3088 3 +3089 3 +3090 3 +3091 2 +3092 3 +3093 3 +3094 3 +3095 3 +3096 3 +3097 3 +3098 3 +3099 1 +3100 3 +3101 3 +3102 3 +3103 0 +3104 3 +3105 3 +3106 3 +3107 3 +3108 3 +3109 3 +3110 3 +3111 3 +3112 3 +3113 3 +3114 3 +3115 3 +3116 2 +3117 3 +3118 3 +3119 3 +3120 3 +3121 3 +3122 3 +3123 3 +3124 3 +3125 0 +3126 3 +3127 3 +3128 3 +3129 3 +3130 3 +3131 3 +3132 3 +3133 3 +3134 0 +3135 3 +3136 3 +3137 3 +3138 3 +3139 3 +3140 3 +3141 3 +3142 3 +3143 3 +3144 3 +3145 3 +3146 3 +3147 2 +3148 3 +3149 3 +3150 3 +3151 3 +3152 3 +3153 3 +3154 3 +3155 3 +3156 3 +3157 3 +3158 3 +3159 3 +3160 3 +3161 3 +3162 3 +3163 3 +3164 3 +3165 3 +3166 3 +3167 0 +3168 3 +3169 3 +3170 3 +3171 3 +3172 3 +3173 3 +3174 3 +3175 3 +3176 3 +3177 3 +3178 3 +3179 0 +3180 3 +3181 3 +3182 2 +3183 3 +3184 3 +3185 3 +3186 3 +3187 3 +3188 3 +3189 3 +3190 3 +3191 3 +3192 3 +3193 0 +3194 3 +3195 3 +3196 3 +3197 3 +3198 3 +3199 3 +3200 3 +3201 3 +3202 3 +3203 2 +3204 3 +3205 3 +3206 3 +3207 2 +3208 3 +3209 2 +3210 3 +3211 3 +3212 3 +3213 3 +3214 3 +3215 3 +3216 3 +3217 3 +3218 3 +3219 3 +3220 3 +3221 3 +3222 3 +3223 3 +3224 3 +3225 2 +3226 3 +3227 3 +3228 3 +3229 3 +3230 3 +3231 3 +3232 3 +3233 3 +3234 3 +3235 3 +3236 3 +3237 3 +3238 0 +3239 3 +3240 3 +3241 3 +3242 3 +3243 2 +3244 3 +3245 3 +3246 3 +3247 3 +3248 3 +3249 3 +3250 3 +3251 3 +3252 3 +3253 3 +3254 3 +3255 3 +3256 3 +3257 3 +3258 3 +3259 2 +3260 3 +3261 3 +3262 3 +3263 3 +3264 3 +3265 3 +3266 3 +3267 3 +3268 2 +3269 3 +3270 3 +3271 3 +3272 3 +3273 3 +3274 3 +3275 3 +3276 3 +3277 3 +3278 3 +3279 3 +3280 3 +3281 3 +3282 3 +3283 3 +3284 3 +3285 3 +3286 3 +3287 3 +3288 3 +3289 3 +3290 3 +3291 3 +3292 3 +3293 3 +3294 3 +3295 3 +3296 3 +3297 3 +3298 3 +3299 3 +3300 3 +3301 3 +3302 3 +3303 3 +3304 3 +3305 3 +3306 3 +3307 3 +3308 3 +3309 3 +3310 3 +3311 3 +3312 3 +3313 3 +3314 3 +3315 3 +3316 3 +3317 3 +3318 3 +3319 3 +3320 3 +3321 3 +3322 3 +3323 3 +3324 0 +3325 3 +3326 3 +3327 3 +3328 3 +3329 3 +3330 3 +3331 3 +3332 3 +3333 3 +3334 1 +3335 3 +3336 3 +3337 3 +3338 3 +3339 3 +3340 3 +3341 3 +3342 3 +3343 3 +3344 3 +3345 3 +3346 3 +3347 3 +3348 3 +3349 3 +3350 3 +3351 3 +3352 3 +3353 3 +3354 3 +3355 3 +3356 3 +3357 3 +3358 2 +3359 3 +3360 3 +3361 3 +3362 3 +3363 3 +3364 3 +3365 3 +3366 3 +3367 3 +3368 3 +3369 3 +3370 3 +3371 3 +3372 3 +3373 3 +3374 3 +3375 3 +3376 3 +3377 3 +3378 3 +3379 3 +3380 3 +3381 3 +3382 3 +3383 3 +3384 3 +3385 3 +3386 3 +3387 3 +3388 3 +3389 3 +3390 3 +3391 3 +3392 3 +3393 3 +3394 3 +3395 3 +3396 3 +3397 3 +3398 3 +3399 3 +3400 1 +3401 1 +3402 3 +3403 3 +3404 3 +3405 3 +3406 3 +3407 3 +3408 3 +3409 3 +3410 3 +3411 3 +3412 3 +3413 3 +3414 3 +3415 3 +3416 3 +3417 3 +3418 3 +3419 3 +3420 3 +3421 3 +3422 3 +3423 3 +3424 3 +3425 3 +3426 3 +3427 3 +3428 3 +3429 3 +3430 3 +3431 3 +3432 3 +3433 3 +3434 3 +3435 3 +3436 3 +3437 3 +3438 2 +3439 3 +3440 3 +3441 3 +3442 3 +3443 3 +3444 3 +3445 3 +3446 2 +3447 2 +3448 3 +3449 3 +3450 3 +3451 3 +3452 3 +3453 3 +3454 3 +3455 3 +3456 3 +3457 3 +3458 3 +3459 3 +3460 3 +3461 3 +3462 3 +3463 3 +3464 3 +3465 3 +3466 3 +3467 3 +3468 3 +3469 3 +3470 2 +3471 3 +3472 3 +3473 3 +3474 3 +3475 3 +3476 3 +3477 3 +3478 3 +3479 3 +3480 2 +3481 2 +3482 3 +3483 3 +3484 3 +3485 3 +3486 3 +3487 3 +3488 3 +3489 3 +3490 3 +3491 3 +3492 3 +3493 3 +3494 3 +3495 3 +3496 3 +3497 2 +3498 3 +3499 3 +3500 0 +3501 3 +3502 3 +3503 3 +3504 3 +3505 3 +3506 3 +3507 3 +3508 3 +3509 3 +3510 3 +3511 3 +3512 3 +3513 3 +3514 3 +3515 3 +3516 3 +3517 3 +3518 3 +3519 2 +3520 3 +3521 3 +3522 3 +3523 3 +3524 3 +3525 3 +3526 3 +3527 3 +3528 3 +3529 3 +3530 3 +3531 3 +3532 3 +3533 3 +3534 3 +3535 3 +3536 3 +3537 3 +3538 3 +3539 3 +3540 3 +3541 3 +3542 3 +3543 2 +3544 3 +3545 3 +3546 3 +3547 3 +3548 3 +3549 3 +3550 3 +3551 3 +3552 3 +3553 3 +3554 3 +3555 3 +3556 3 +3557 3 +3558 3 +3559 3 +3560 3 +3561 3 +3562 3 +3563 3 +3564 3 +3565 3 +3566 3 +3567 3 +3568 3 +3569 3 +3570 3 +3571 3 +3572 3 +3573 3 +3574 3 +3575 3 +3576 3 +3577 3 +3578 3 +3579 3 +3580 3 +3581 3 +3582 3 +3583 3 +3584 3 +3585 3 +3586 3 +3587 3 +3588 3 +3589 3 +3590 3 +3591 3 +3592 3 +3593 3 +3594 3 +3595 3 +3596 3 +3597 3 +3598 3 +3599 3 +3600 3 +3601 3 +3602 3 +3603 3 +3604 3 +3605 3 +3606 3 +3607 3 +3608 3 +3609 3 +3610 3 +3611 2 +3612 3 +3613 3 +3614 3 +3615 3 +3616 3 +3617 3 +3618 3 +3619 0 +3620 3 +3621 3 +3622 3 +3623 3 +3624 3 +3625 3 +3626 3 +3627 3 +3628 3 +3629 3 +3630 3 +3631 3 +3632 3 +3633 3 +3634 3 +3635 3 +3636 3 +3637 3 +3638 3 +3639 3 +3640 3 +3641 3 +3642 3 +3643 3 +3644 3 +3645 3 +3646 3 +3647 3 +3648 3 +3649 3 +3650 3 +3651 3 +3652 3 +3653 3 +3654 3 +3655 3 +3656 3 +3657 3 +3658 3 +3659 3 +3660 3 +3661 3 +3662 3 +3663 3 +3664 3 +3665 3 +3666 3 +3667 3 +3668 3 +3669 3 +3670 3 +3671 3 +3672 3 +3673 2 +3674 3 +3675 3 +3676 3 +3677 3 +3678 3 +3679 3 +3680 3 +3681 3 +3682 3 +3683 3 +3684 3 +3685 3 +3686 3 +3687 3 +3688 3 +3689 3 +3690 3 +3691 3 +3692 2 +3693 3 +3694 3 +3695 3 +3696 3 +3697 3 +3698 3 +3699 3 +3700 3 +3701 0 +3702 3 +3703 3 +3704 3 +3705 3 +3706 3 +3707 0 +3708 3 +3709 3 +3710 3 +3711 3 +3712 3 +3713 3 +3714 3 +3715 3 +3716 3 +3717 3 +3718 3 +3719 3 +3720 3 +3721 2 +3722 3 +3723 3 +3724 3 +3725 2 +3726 3 +3727 3 +3728 3 +3729 3 +3730 3 +3731 3 +3732 3 +3733 3 +3734 3 +3735 3 +3736 3 +3737 3 +3738 3 +3739 3 +3740 3 +3741 3 +3742 3 +3743 3 +3744 3 +3745 3 +3746 3 +3747 3 +3748 3 +3749 3 +3750 3 +3751 3 +3752 3 +3753 3 +3754 3 +3755 3 +3756 2 +3757 3 +3758 0 +3759 3 +3760 3 +3761 3 +3762 3 +3763 2 +3764 3 +3765 3 +3766 3 +3767 3 +3768 3 +3769 3 +3770 3 +3771 3 +3772 3 +3773 3 +3774 3 +3775 3 +3776 3 +3777 3 +3778 3 +3779 3 +3780 3 +3781 3 +3782 3 +3783 3 +3784 3 +3785 3 +3786 3 +3787 3 +3788 2 +3789 3 +3790 3 +3791 3 +3792 2 +3793 2 +3794 3 +3795 3 +3796 3 +3797 3 +3798 3 +3799 3 diff --git a/out/roberta_results/README.md b/out/roberta_results/README.md new file mode 100644 index 0000000..efb30d6 --- /dev/null +++ b/out/roberta_results/README.md @@ -0,0 +1,53 @@ +--- +tags: +- generated_from_trainer +model-index: +- name: roberta_results + results: [] +--- + + + +# roberta_results + +This model is a fine-tuned version of [out/roberta](https://huggingface.co/out/roberta) on an unknown dataset. +It achieves the following results on the evaluation set: +- eval_loss: 0.2960 +- eval_accuracy: 0.9230 +- eval_runtime: 17.8166 +- eval_samples_per_second: 112.255 +- eval_steps_per_second: 14.032 +- step: 0 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- training_steps: 2500 + +### Framework versions + +- Transformers 4.26.1 +- Pytorch 1.13.1+cu117 +- Datasets 2.9.0 +- Tokenizers 0.13.2 diff --git a/out/roberta_results/all_results.json b/out/roberta_results/all_results.json new file mode 100644 index 0000000..3fe0a29 --- /dev/null +++ b/out/roberta_results/all_results.json @@ -0,0 +1,8 @@ +{ + "eval_accuracy": 0.9229999780654907, + "eval_loss": 0.29598742723464966, + "eval_runtime": 17.8166, + "eval_samples": 2000, + "eval_samples_per_second": 112.255, + "eval_steps_per_second": 14.032 +} \ No newline at end of file diff --git a/out/roberta_results/eval_results.json b/out/roberta_results/eval_results.json new file mode 100644 index 0000000..3fe0a29 --- /dev/null +++ b/out/roberta_results/eval_results.json @@ -0,0 +1,8 @@ +{ + "eval_accuracy": 0.9229999780654907, + "eval_loss": 0.29598742723464966, + "eval_runtime": 17.8166, + "eval_samples": 2000, + "eval_samples_per_second": 112.255, + "eval_steps_per_second": 14.032 +} \ No newline at end of file diff --git a/out/roberta_results/predict_results_None.txt b/out/roberta_results/predict_results_None.txt new file mode 100644 index 0000000..f6c33da --- /dev/null +++ b/out/roberta_results/predict_results_None.txt @@ -0,0 +1,3801 @@ +index prediction +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +10 0 +11 1 +12 2 +13 0 +14 0 +15 0 +16 0 +17 0 +18 0 +19 3 +20 0 +21 0 +22 0 +23 0 +24 0 +25 0 +26 0 +27 2 +28 0 +29 1 +30 0 +31 0 +32 0 +33 0 +34 0 +35 0 +36 0 +37 0 +38 0 +39 0 +40 0 +41 2 +42 0 +43 3 +44 0 +45 0 +46 0 +47 0 +48 0 +49 2 +50 0 +51 0 +52 0 +53 1 +54 1 +55 0 +56 0 +57 0 +58 0 +59 0 +60 0 +61 0 +62 0 +63 0 +64 0 +65 0 +66 0 +67 0 +68 0 +69 0 +70 0 +71 0 +72 0 +73 0 +74 0 +75 0 +76 0 +77 0 +78 0 +79 0 +80 0 +81 0 +82 0 +83 0 +84 0 +85 0 +86 0 +87 0 +88 0 +89 1 +90 2 +91 1 +92 0 +93 0 +94 0 +95 0 +96 0 +97 0 +98 0 +99 0 +100 0 +101 0 +102 0 +103 0 +104 1 +105 0 +106 3 +107 0 +108 0 +109 0 +110 0 +111 0 +112 0 +113 0 +114 1 +115 0 +116 0 +117 0 +118 0 +119 0 +120 0 +121 0 +122 0 +123 2 +124 0 +125 0 +126 0 +127 0 +128 0 +129 0 +130 0 +131 0 +132 0 +133 0 +134 0 +135 0 +136 0 +137 0 +138 1 +139 0 +140 0 +141 0 +142 0 +143 0 +144 0 +145 0 +146 0 +147 0 +148 0 +149 0 +150 0 +151 0 +152 1 +153 0 +154 0 +155 0 +156 0 +157 3 +158 0 +159 0 +160 0 +161 0 +162 0 +163 0 +164 0 +165 0 +166 0 +167 0 +168 0 +169 0 +170 0 +171 0 +172 0 +173 0 +174 0 +175 0 +176 0 +177 0 +178 2 +179 0 +180 0 +181 0 +182 0 +183 0 +184 0 +185 0 +186 0 +187 0 +188 0 +189 0 +190 0 +191 0 +192 0 +193 0 +194 0 +195 0 +196 0 +197 0 +198 0 +199 0 +200 0 +201 0 +202 0 +203 0 +204 0 +205 0 +206 0 +207 0 +208 0 +209 3 +210 0 +211 0 +212 0 +213 0 +214 0 +215 0 +216 0 +217 0 +218 2 +219 0 +220 0 +221 0 +222 0 +223 0 +224 0 +225 1 +226 0 +227 0 +228 0 +229 2 +230 0 +231 0 +232 0 +233 0 +234 0 +235 0 +236 0 +237 0 +238 0 +239 0 +240 0 +241 0 +242 0 +243 0 +244 0 +245 0 +246 0 +247 0 +248 0 +249 0 +250 2 +251 0 +252 0 +253 0 +254 0 +255 0 +256 0 +257 0 +258 1 +259 0 +260 0 +261 3 +262 0 +263 0 +264 0 +265 0 +266 0 +267 0 +268 0 +269 0 +270 0 +271 0 +272 0 +273 0 +274 0 +275 3 +276 0 +277 0 +278 0 +279 0 +280 0 +281 0 +282 0 +283 0 +284 0 +285 0 +286 0 +287 0 +288 0 +289 0 +290 0 +291 0 +292 0 +293 0 +294 0 +295 0 +296 0 +297 0 +298 0 +299 0 +300 0 +301 0 +302 0 +303 3 +304 0 +305 0 +306 0 +307 0 +308 0 +309 0 +310 0 +311 0 +312 0 +313 0 +314 0 +315 0 +316 0 +317 0 +318 1 +319 0 +320 0 +321 0 +322 0 +323 0 +324 0 +325 0 +326 0 +327 0 +328 0 +329 0 +330 0 +331 0 +332 0 +333 0 +334 0 +335 2 +336 0 +337 0 +338 0 +339 0 +340 0 +341 0 +342 0 +343 0 +344 0 +345 0 +346 0 +347 0 +348 0 +349 0 +350 0 +351 0 +352 0 +353 0 +354 0 +355 3 +356 0 +357 0 +358 0 +359 2 +360 0 +361 1 +362 2 +363 0 +364 0 +365 0 +366 0 +367 3 +368 0 +369 0 +370 0 +371 0 +372 0 +373 0 +374 0 +375 0 +376 0 +377 0 +378 0 +379 0 +380 0 +381 0 +382 0 +383 0 +384 0 +385 0 +386 0 +387 0 +388 0 +389 0 +390 0 +391 0 +392 0 +393 0 +394 0 +395 0 +396 0 +397 0 +398 0 +399 0 +400 0 +401 0 +402 0 +403 0 +404 0 +405 0 +406 0 +407 0 +408 0 +409 0 +410 2 +411 0 +412 0 +413 0 +414 0 +415 0 +416 0 +417 0 +418 2 +419 0 +420 3 +421 0 +422 0 +423 0 +424 0 +425 0 +426 0 +427 0 +428 0 +429 0 +430 0 +431 0 +432 0 +433 0 +434 0 +435 0 +436 0 +437 0 +438 0 +439 0 +440 0 +441 0 +442 0 +443 0 +444 0 +445 0 +446 0 +447 0 +448 0 +449 0 +450 0 +451 0 +452 0 +453 0 +454 0 +455 0 +456 0 +457 0 +458 2 +459 0 +460 0 +461 0 +462 0 +463 0 +464 0 +465 0 +466 0 +467 0 +468 0 +469 0 +470 0 +471 0 +472 0 +473 0 +474 0 +475 0 +476 0 +477 0 +478 0 +479 0 +480 0 +481 0 +482 0 +483 0 +484 0 +485 2 +486 0 +487 0 +488 0 +489 0 +490 0 +491 0 +492 0 +493 0 +494 0 +495 0 +496 0 +497 0 +498 0 +499 2 +500 0 +501 0 +502 0 +503 0 +504 0 +505 0 +506 0 +507 0 +508 0 +509 3 +510 0 +511 0 +512 0 +513 0 +514 0 +515 0 +516 0 +517 0 +518 0 +519 0 +520 0 +521 0 +522 0 +523 0 +524 0 +525 0 +526 0 +527 0 +528 0 +529 2 +530 0 +531 2 +532 0 +533 0 +534 0 +535 0 +536 0 +537 0 +538 0 +539 0 +540 0 +541 2 +542 0 +543 0 +544 0 +545 2 +546 0 +547 3 +548 0 +549 0 +550 1 +551 0 +552 0 +553 0 +554 2 +555 0 +556 0 +557 0 +558 0 +559 3 +560 0 +561 0 +562 0 +563 0 +564 0 +565 0 +566 0 +567 0 +568 0 +569 0 +570 0 +571 0 +572 0 +573 0 +574 0 +575 0 +576 0 +577 0 +578 2 +579 0 +580 0 +581 0 +582 1 +583 1 +584 0 +585 0 +586 0 +587 2 +588 0 +589 0 +590 0 +591 0 +592 0 +593 0 +594 0 +595 0 +596 0 +597 0 +598 0 +599 1 +600 0 +601 0 +602 0 +603 0 +604 3 +605 0 +606 0 +607 0 +608 0 +609 0 +610 0 +611 0 +612 2 +613 0 +614 2 +615 0 +616 0 +617 0 +618 0 +619 0 +620 2 +621 0 +622 0 +623 0 +624 0 +625 0 +626 0 +627 0 +628 0 +629 0 +630 0 +631 0 +632 0 +633 0 +634 0 +635 0 +636 0 +637 0 +638 0 +639 0 +640 3 +641 0 +642 0 +643 0 +644 0 +645 0 +646 0 +647 0 +648 0 +649 0 +650 0 +651 0 +652 0 +653 0 +654 0 +655 0 +656 0 +657 0 +658 0 +659 0 +660 0 +661 0 +662 0 +663 0 +664 0 +665 0 +666 0 +667 0 +668 0 +669 0 +670 0 +671 0 +672 0 +673 0 +674 0 +675 0 +676 0 +677 0 +678 0 +679 0 +680 0 +681 0 +682 0 +683 0 +684 0 +685 0 +686 0 +687 0 +688 0 +689 0 +690 0 +691 3 +692 0 +693 3 +694 0 +695 0 +696 0 +697 0 +698 3 +699 0 +700 0 +701 0 +702 0 +703 0 +704 0 +705 0 +706 3 +707 0 +708 0 +709 0 +710 3 +711 0 +712 0 +713 0 +714 0 +715 0 +716 0 +717 0 +718 0 +719 0 +720 0 +721 0 +722 0 +723 0 +724 2 +725 0 +726 0 +727 0 +728 2 +729 0 +730 2 +731 0 +732 3 +733 0 +734 0 +735 1 +736 0 +737 0 +738 0 +739 3 +740 0 +741 0 +742 3 +743 0 +744 2 +745 0 +746 0 +747 0 +748 0 +749 3 +750 2 +751 0 +752 0 +753 0 +754 0 +755 0 +756 0 +757 0 +758 0 +759 0 +760 0 +761 0 +762 0 +763 0 +764 0 +765 0 +766 0 +767 0 +768 0 +769 0 +770 0 +771 0 +772 0 +773 0 +774 0 +775 0 +776 0 +777 0 +778 0 +779 0 +780 0 +781 0 +782 0 +783 0 +784 0 +785 0 +786 3 +787 0 +788 0 +789 0 +790 0 +791 0 +792 0 +793 0 +794 0 +795 0 +796 0 +797 0 +798 2 +799 0 +800 2 +801 0 +802 0 +803 0 +804 3 +805 0 +806 0 +807 3 +808 0 +809 0 +810 0 +811 0 +812 0 +813 0 +814 0 +815 0 +816 0 +817 0 +818 0 +819 0 +820 0 +821 0 +822 0 +823 0 +824 0 +825 0 +826 0 +827 0 +828 0 +829 0 +830 0 +831 0 +832 0 +833 0 +834 3 +835 0 +836 0 +837 0 +838 0 +839 0 +840 0 +841 3 +842 0 +843 0 +844 3 +845 0 +846 0 +847 0 +848 0 +849 0 +850 0 +851 0 +852 0 +853 0 +854 0 +855 0 +856 0 +857 0 +858 0 +859 0 +860 0 +861 0 +862 0 +863 0 +864 1 +865 3 +866 0 +867 0 +868 0 +869 0 +870 0 +871 0 +872 0 +873 0 +874 0 +875 0 +876 0 +877 0 +878 2 +879 0 +880 0 +881 0 +882 0 +883 0 +884 0 +885 0 +886 0 +887 0 +888 0 +889 0 +890 2 +891 0 +892 0 +893 0 +894 0 +895 0 +896 0 +897 0 +898 0 +899 0 +900 0 +901 0 +902 0 +903 0 +904 0 +905 0 +906 0 +907 0 +908 0 +909 0 +910 0 +911 0 +912 0 +913 1 +914 1 +915 0 +916 0 +917 0 +918 0 +919 0 +920 0 +921 0 +922 0 +923 0 +924 3 +925 0 +926 0 +927 0 +928 0 +929 0 +930 0 +931 0 +932 0 +933 0 +934 0 +935 0 +936 3 +937 0 +938 0 +939 0 +940 0 +941 0 +942 0 +943 0 +944 0 +945 2 +946 0 +947 0 +948 0 +949 0 +950 1 +951 1 +952 1 +953 1 +954 1 +955 1 +956 1 +957 1 +958 1 +959 1 +960 1 +961 1 +962 1 +963 1 +964 1 +965 1 +966 1 +967 1 +968 1 +969 1 +970 1 +971 1 +972 1 +973 1 +974 1 +975 1 +976 1 +977 1 +978 1 +979 1 +980 1 +981 1 +982 1 +983 1 +984 1 +985 1 +986 1 +987 1 +988 1 +989 1 +990 1 +991 1 +992 1 +993 1 +994 1 +995 1 +996 1 +997 1 +998 1 +999 1 +1000 1 +1001 1 +1002 1 +1003 1 +1004 1 +1005 1 +1006 1 +1007 1 +1008 1 +1009 1 +1010 1 +1011 1 +1012 1 +1013 1 +1014 1 +1015 1 +1016 1 +1017 1 +1018 1 +1019 1 +1020 1 +1021 1 +1022 1 +1023 1 +1024 1 +1025 1 +1026 1 +1027 1 +1028 1 +1029 1 +1030 1 +1031 1 +1032 1 +1033 1 +1034 1 +1035 1 +1036 1 +1037 1 +1038 1 +1039 1 +1040 1 +1041 1 +1042 1 +1043 1 +1044 1 +1045 1 +1046 1 +1047 1 +1048 1 +1049 1 +1050 1 +1051 1 +1052 1 +1053 1 +1054 1 +1055 1 +1056 1 +1057 1 +1058 1 +1059 1 +1060 1 +1061 1 +1062 1 +1063 1 +1064 1 +1065 1 +1066 1 +1067 1 +1068 1 +1069 1 +1070 2 +1071 1 +1072 1 +1073 1 +1074 1 +1075 1 +1076 1 +1077 1 +1078 1 +1079 1 +1080 1 +1081 1 +1082 1 +1083 1 +1084 1 +1085 1 +1086 1 +1087 1 +1088 1 +1089 1 +1090 1 +1091 1 +1092 1 +1093 1 +1094 1 +1095 1 +1096 1 +1097 1 +1098 1 +1099 1 +1100 1 +1101 1 +1102 1 +1103 1 +1104 1 +1105 1 +1106 1 +1107 1 +1108 1 +1109 1 +1110 1 +1111 1 +1112 1 +1113 1 +1114 1 +1115 1 +1116 1 +1117 1 +1118 1 +1119 1 +1120 1 +1121 1 +1122 1 +1123 1 +1124 1 +1125 1 +1126 1 +1127 1 +1128 1 +1129 1 +1130 1 +1131 1 +1132 1 +1133 1 +1134 1 +1135 1 +1136 1 +1137 1 +1138 1 +1139 1 +1140 1 +1141 1 +1142 1 +1143 1 +1144 1 +1145 1 +1146 1 +1147 1 +1148 1 +1149 1 +1150 1 +1151 1 +1152 1 +1153 1 +1154 0 +1155 1 +1156 1 +1157 1 +1158 1 +1159 1 +1160 1 +1161 1 +1162 1 +1163 1 +1164 1 +1165 1 +1166 0 +1167 1 +1168 1 +1169 1 +1170 1 +1171 1 +1172 1 +1173 1 +1174 1 +1175 1 +1176 1 +1177 1 +1178 1 +1179 1 +1180 1 +1181 1 +1182 1 +1183 1 +1184 1 +1185 1 +1186 1 +1187 1 +1188 1 +1189 1 +1190 1 +1191 1 +1192 1 +1193 1 +1194 1 +1195 1 +1196 1 +1197 1 +1198 1 +1199 1 +1200 1 +1201 1 +1202 1 +1203 1 +1204 1 +1205 1 +1206 1 +1207 1 +1208 1 +1209 1 +1210 1 +1211 1 +1212 1 +1213 1 +1214 1 +1215 1 +1216 1 +1217 1 +1218 1 +1219 1 +1220 1 +1221 1 +1222 1 +1223 2 +1224 1 +1225 1 +1226 1 +1227 1 +1228 1 +1229 1 +1230 1 +1231 1 +1232 1 +1233 1 +1234 1 +1235 1 +1236 1 +1237 1 +1238 1 +1239 1 +1240 1 +1241 1 +1242 1 +1243 1 +1244 1 +1245 1 +1246 1 +1247 1 +1248 1 +1249 1 +1250 1 +1251 1 +1252 1 +1253 1 +1254 1 +1255 1 +1256 1 +1257 1 +1258 1 +1259 1 +1260 1 +1261 1 +1262 1 +1263 1 +1264 1 +1265 1 +1266 1 +1267 1 +1268 1 +1269 1 +1270 1 +1271 1 +1272 1 +1273 1 +1274 1 +1275 1 +1276 1 +1277 1 +1278 1 +1279 1 +1280 1 +1281 1 +1282 1 +1283 1 +1284 1 +1285 1 +1286 1 +1287 1 +1288 1 +1289 1 +1290 1 +1291 1 +1292 1 +1293 1 +1294 1 +1295 1 +1296 1 +1297 1 +1298 1 +1299 1 +1300 1 +1301 1 +1302 1 +1303 1 +1304 1 +1305 1 +1306 1 +1307 1 +1308 1 +1309 1 +1310 1 +1311 1 +1312 1 +1313 1 +1314 1 +1315 1 +1316 1 +1317 1 +1318 1 +1319 1 +1320 1 +1321 1 +1322 1 +1323 1 +1324 1 +1325 1 +1326 1 +1327 1 +1328 1 +1329 1 +1330 1 +1331 1 +1332 1 +1333 1 +1334 1 +1335 1 +1336 1 +1337 1 +1338 1 +1339 1 +1340 1 +1341 1 +1342 1 +1343 1 +1344 1 +1345 1 +1346 1 +1347 1 +1348 1 +1349 1 +1350 1 +1351 1 +1352 1 +1353 1 +1354 1 +1355 1 +1356 1 +1357 1 +1358 1 +1359 1 +1360 1 +1361 1 +1362 1 +1363 1 +1364 1 +1365 1 +1366 1 +1367 1 +1368 1 +1369 1 +1370 1 +1371 1 +1372 1 +1373 1 +1374 1 +1375 1 +1376 1 +1377 1 +1378 1 +1379 1 +1380 1 +1381 1 +1382 1 +1383 1 +1384 1 +1385 1 +1386 1 +1387 1 +1388 1 +1389 1 +1390 1 +1391 1 +1392 1 +1393 1 +1394 1 +1395 1 +1396 1 +1397 1 +1398 1 +1399 1 +1400 1 +1401 1 +1402 1 +1403 1 +1404 1 +1405 1 +1406 1 +1407 2 +1408 1 +1409 1 +1410 1 +1411 1 +1412 1 +1413 1 +1414 1 +1415 1 +1416 1 +1417 1 +1418 1 +1419 1 +1420 1 +1421 1 +1422 1 +1423 1 +1424 0 +1425 1 +1426 1 +1427 1 +1428 1 +1429 1 +1430 1 +1431 1 +1432 1 +1433 1 +1434 1 +1435 1 +1436 1 +1437 1 +1438 1 +1439 1 +1440 1 +1441 1 +1442 1 +1443 1 +1444 1 +1445 1 +1446 1 +1447 1 +1448 1 +1449 1 +1450 1 +1451 1 +1452 1 +1453 1 +1454 1 +1455 1 +1456 1 +1457 1 +1458 1 +1459 1 +1460 1 +1461 1 +1462 1 +1463 1 +1464 2 +1465 1 +1466 1 +1467 1 +1468 1 +1469 1 +1470 1 +1471 1 +1472 1 +1473 1 +1474 1 +1475 1 +1476 1 +1477 1 +1478 1 +1479 1 +1480 1 +1481 1 +1482 1 +1483 1 +1484 1 +1485 1 +1486 1 +1487 1 +1488 1 +1489 1 +1490 1 +1491 1 +1492 1 +1493 1 +1494 1 +1495 1 +1496 1 +1497 1 +1498 1 +1499 1 +1500 1 +1501 1 +1502 1 +1503 1 +1504 1 +1505 1 +1506 1 +1507 1 +1508 1 +1509 1 +1510 1 +1511 1 +1512 1 +1513 1 +1514 1 +1515 1 +1516 1 +1517 1 +1518 1 +1519 1 +1520 1 +1521 1 +1522 1 +1523 1 +1524 1 +1525 1 +1526 1 +1527 1 +1528 1 +1529 1 +1530 1 +1531 1 +1532 1 +1533 1 +1534 1 +1535 1 +1536 1 +1537 1 +1538 1 +1539 1 +1540 1 +1541 1 +1542 1 +1543 1 +1544 1 +1545 1 +1546 1 +1547 1 +1548 1 +1549 1 +1550 1 +1551 1 +1552 1 +1553 1 +1554 1 +1555 1 +1556 1 +1557 1 +1558 1 +1559 1 +1560 1 +1561 1 +1562 1 +1563 1 +1564 1 +1565 1 +1566 1 +1567 1 +1568 1 +1569 3 +1570 1 +1571 1 +1572 1 +1573 1 +1574 1 +1575 1 +1576 1 +1577 1 +1578 1 +1579 1 +1580 1 +1581 1 +1582 1 +1583 1 +1584 1 +1585 1 +1586 1 +1587 1 +1588 1 +1589 1 +1590 1 +1591 1 +1592 1 +1593 1 +1594 1 +1595 1 +1596 1 +1597 1 +1598 1 +1599 1 +1600 1 +1601 1 +1602 1 +1603 1 +1604 1 +1605 1 +1606 1 +1607 1 +1608 1 +1609 1 +1610 1 +1611 1 +1612 1 +1613 1 +1614 1 +1615 1 +1616 1 +1617 1 +1618 1 +1619 1 +1620 1 +1621 1 +1622 1 +1623 1 +1624 1 +1625 1 +1626 1 +1627 1 +1628 1 +1629 1 +1630 1 +1631 1 +1632 1 +1633 1 +1634 1 +1635 1 +1636 1 +1637 1 +1638 1 +1639 1 +1640 1 +1641 1 +1642 1 +1643 1 +1644 1 +1645 1 +1646 1 +1647 1 +1648 1 +1649 1 +1650 1 +1651 1 +1652 1 +1653 1 +1654 1 +1655 1 +1656 1 +1657 1 +1658 1 +1659 1 +1660 1 +1661 1 +1662 1 +1663 1 +1664 1 +1665 1 +1666 1 +1667 1 +1668 1 +1669 1 +1670 1 +1671 1 +1672 1 +1673 1 +1674 1 +1675 1 +1676 1 +1677 1 +1678 1 +1679 1 +1680 1 +1681 3 +1682 1 +1683 1 +1684 1 +1685 1 +1686 1 +1687 1 +1688 1 +1689 1 +1690 1 +1691 1 +1692 1 +1693 1 +1694 1 +1695 1 +1696 1 +1697 1 +1698 1 +1699 1 +1700 1 +1701 1 +1702 1 +1703 0 +1704 1 +1705 1 +1706 1 +1707 1 +1708 1 +1709 1 +1710 1 +1711 1 +1712 1 +1713 1 +1714 1 +1715 1 +1716 1 +1717 1 +1718 1 +1719 1 +1720 1 +1721 1 +1722 1 +1723 1 +1724 1 +1725 1 +1726 1 +1727 1 +1728 1 +1729 1 +1730 1 +1731 1 +1732 1 +1733 1 +1734 1 +1735 1 +1736 1 +1737 1 +1738 1 +1739 1 +1740 1 +1741 1 +1742 1 +1743 1 +1744 1 +1745 1 +1746 1 +1747 1 +1748 1 +1749 1 +1750 1 +1751 1 +1752 1 +1753 1 +1754 2 +1755 1 +1756 1 +1757 1 +1758 1 +1759 1 +1760 1 +1761 1 +1762 1 +1763 1 +1764 1 +1765 1 +1766 1 +1767 1 +1768 1 +1769 1 +1770 1 +1771 1 +1772 1 +1773 1 +1774 1 +1775 1 +1776 1 +1777 1 +1778 1 +1779 1 +1780 1 +1781 1 +1782 1 +1783 1 +1784 1 +1785 1 +1786 1 +1787 1 +1788 1 +1789 1 +1790 1 +1791 1 +1792 1 +1793 1 +1794 1 +1795 1 +1796 1 +1797 1 +1798 1 +1799 1 +1800 1 +1801 1 +1802 1 +1803 1 +1804 1 +1805 1 +1806 1 +1807 1 +1808 1 +1809 1 +1810 1 +1811 1 +1812 1 +1813 1 +1814 1 +1815 1 +1816 1 +1817 1 +1818 1 +1819 1 +1820 0 +1821 1 +1822 1 +1823 1 +1824 1 +1825 1 +1826 1 +1827 1 +1828 1 +1829 1 +1830 1 +1831 1 +1832 1 +1833 1 +1834 3 +1835 1 +1836 1 +1837 1 +1838 1 +1839 1 +1840 1 +1841 1 +1842 1 +1843 1 +1844 1 +1845 1 +1846 1 +1847 1 +1848 1 +1849 1 +1850 1 +1851 1 +1852 1 +1853 1 +1854 1 +1855 1 +1856 1 +1857 1 +1858 1 +1859 1 +1860 1 +1861 1 +1862 1 +1863 1 +1864 1 +1865 1 +1866 1 +1867 1 +1868 1 +1869 1 +1870 1 +1871 1 +1872 1 +1873 1 +1874 1 +1875 1 +1876 1 +1877 1 +1878 1 +1879 1 +1880 1 +1881 1 +1882 1 +1883 1 +1884 1 +1885 1 +1886 1 +1887 1 +1888 1 +1889 1 +1890 1 +1891 1 +1892 1 +1893 1 +1894 1 +1895 1 +1896 1 +1897 1 +1898 1 +1899 1 +1900 2 +1901 2 +1902 2 +1903 2 +1904 2 +1905 2 +1906 2 +1907 2 +1908 2 +1909 2 +1910 2 +1911 2 +1912 2 +1913 2 +1914 2 +1915 2 +1916 2 +1917 2 +1918 2 +1919 2 +1920 2 +1921 2 +1922 2 +1923 2 +1924 2 +1925 2 +1926 2 +1927 2 +1928 2 +1929 2 +1930 2 +1931 2 +1932 2 +1933 2 +1934 2 +1935 2 +1936 2 +1937 2 +1938 2 +1939 2 +1940 3 +1941 2 +1942 2 +1943 2 +1944 2 +1945 2 +1946 2 +1947 2 +1948 2 +1949 2 +1950 2 +1951 2 +1952 2 +1953 2 +1954 2 +1955 2 +1956 2 +1957 2 +1958 2 +1959 2 +1960 2 +1961 2 +1962 2 +1963 2 +1964 2 +1965 2 +1966 2 +1967 2 +1968 2 +1969 2 +1970 2 +1971 2 +1972 2 +1973 2 +1974 2 +1975 2 +1976 2 +1977 0 +1978 3 +1979 2 +1980 2 +1981 2 +1982 2 +1983 2 +1984 2 +1985 2 +1986 2 +1987 2 +1988 2 +1989 2 +1990 2 +1991 2 +1992 2 +1993 2 +1994 2 +1995 2 +1996 2 +1997 2 +1998 2 +1999 3 +2000 2 +2001 2 +2002 2 +2003 2 +2004 2 +2005 2 +2006 2 +2007 2 +2008 2 +2009 2 +2010 2 +2011 2 +2012 2 +2013 2 +2014 2 +2015 2 +2016 2 +2017 2 +2018 2 +2019 2 +2020 2 +2021 2 +2022 2 +2023 2 +2024 2 +2025 2 +2026 2 +2027 2 +2028 2 +2029 2 +2030 3 +2031 2 +2032 2 +2033 2 +2034 2 +2035 2 +2036 2 +2037 2 +2038 2 +2039 2 +2040 2 +2041 2 +2042 2 +2043 2 +2044 2 +2045 2 +2046 2 +2047 2 +2048 3 +2049 2 +2050 2 +2051 3 +2052 2 +2053 2 +2054 2 +2055 2 +2056 3 +2057 2 +2058 2 +2059 2 +2060 2 +2061 2 +2062 2 +2063 2 +2064 2 +2065 2 +2066 2 +2067 2 +2068 2 +2069 2 +2070 2 +2071 2 +2072 2 +2073 3 +2074 2 +2075 2 +2076 2 +2077 3 +2078 2 +2079 3 +2080 3 +2081 2 +2082 2 +2083 2 +2084 2 +2085 2 +2086 2 +2087 2 +2088 2 +2089 2 +2090 2 +2091 2 +2092 2 +2093 2 +2094 2 +2095 2 +2096 2 +2097 2 +2098 2 +2099 2 +2100 3 +2101 1 +2102 3 +2103 2 +2104 2 +2105 2 +2106 2 +2107 2 +2108 2 +2109 2 +2110 2 +2111 2 +2112 2 +2113 2 +2114 2 +2115 2 +2116 2 +2117 2 +2118 2 +2119 3 +2120 2 +2121 2 +2122 2 +2123 2 +2124 2 +2125 2 +2126 2 +2127 2 +2128 2 +2129 2 +2130 2 +2131 2 +2132 2 +2133 2 +2134 2 +2135 2 +2136 2 +2137 2 +2138 2 +2139 2 +2140 2 +2141 2 +2142 2 +2143 2 +2144 2 +2145 2 +2146 2 +2147 2 +2148 2 +2149 2 +2150 2 +2151 2 +2152 2 +2153 2 +2154 2 +2155 2 +2156 2 +2157 2 +2158 2 +2159 2 +2160 3 +2161 2 +2162 2 +2163 2 +2164 2 +2165 2 +2166 2 +2167 2 +2168 2 +2169 2 +2170 2 +2171 2 +2172 2 +2173 2 +2174 2 +2175 2 +2176 3 +2177 0 +2178 2 +2179 2 +2180 2 +2181 2 +2182 2 +2183 2 +2184 2 +2185 2 +2186 2 +2187 2 +2188 2 +2189 2 +2190 2 +2191 2 +2192 2 +2193 2 +2194 2 +2195 2 +2196 2 +2197 3 +2198 2 +2199 2 +2200 2 +2201 2 +2202 2 +2203 2 +2204 2 +2205 2 +2206 2 +2207 2 +2208 2 +2209 2 +2210 2 +2211 2 +2212 2 +2213 2 +2214 2 +2215 2 +2216 0 +2217 2 +2218 2 +2219 2 +2220 2 +2221 2 +2222 0 +2223 2 +2224 2 +2225 2 +2226 2 +2227 2 +2228 2 +2229 2 +2230 2 +2231 2 +2232 2 +2233 3 +2234 3 +2235 2 +2236 0 +2237 2 +2238 2 +2239 2 +2240 2 +2241 2 +2242 3 +2243 2 +2244 2 +2245 2 +2246 2 +2247 3 +2248 3 +2249 2 +2250 2 +2251 2 +2252 2 +2253 2 +2254 2 +2255 1 +2256 2 +2257 2 +2258 2 +2259 2 +2260 2 +2261 2 +2262 2 +2263 2 +2264 2 +2265 2 +2266 2 +2267 3 +2268 2 +2269 2 +2270 2 +2271 2 +2272 2 +2273 2 +2274 2 +2275 2 +2276 2 +2277 2 +2278 3 +2279 2 +2280 3 +2281 2 +2282 2 +2283 2 +2284 2 +2285 3 +2286 2 +2287 2 +2288 2 +2289 2 +2290 2 +2291 3 +2292 2 +2293 2 +2294 2 +2295 2 +2296 2 +2297 2 +2298 2 +2299 2 +2300 2 +2301 2 +2302 2 +2303 2 +2304 2 +2305 2 +2306 2 +2307 2 +2308 2 +2309 2 +2310 2 +2311 2 +2312 2 +2313 2 +2314 2 +2315 2 +2316 2 +2317 3 +2318 3 +2319 2 +2320 2 +2321 2 +2322 2 +2323 2 +2324 2 +2325 2 +2326 2 +2327 2 +2328 2 +2329 3 +2330 0 +2331 2 +2332 2 +2333 2 +2334 2 +2335 2 +2336 2 +2337 2 +2338 2 +2339 2 +2340 2 +2341 0 +2342 2 +2343 3 +2344 2 +2345 2 +2346 2 +2347 2 +2348 2 +2349 3 +2350 2 +2351 2 +2352 2 +2353 2 +2354 3 +2355 2 +2356 2 +2357 2 +2358 2 +2359 3 +2360 2 +2361 2 +2362 2 +2363 2 +2364 2 +2365 2 +2366 2 +2367 3 +2368 2 +2369 2 +2370 3 +2371 2 +2372 2 +2373 2 +2374 2 +2375 2 +2376 2 +2377 2 +2378 2 +2379 2 +2380 3 +2381 2 +2382 3 +2383 2 +2384 2 +2385 2 +2386 3 +2387 2 +2388 2 +2389 0 +2390 3 +2391 2 +2392 2 +2393 2 +2394 2 +2395 2 +2396 2 +2397 2 +2398 2 +2399 2 +2400 2 +2401 2 +2402 2 +2403 2 +2404 2 +2405 2 +2406 2 +2407 2 +2408 2 +2409 2 +2410 2 +2411 2 +2412 2 +2413 2 +2414 2 +2415 2 +2416 2 +2417 2 +2418 2 +2419 2 +2420 2 +2421 3 +2422 2 +2423 0 +2424 2 +2425 2 +2426 2 +2427 2 +2428 3 +2429 2 +2430 2 +2431 2 +2432 2 +2433 3 +2434 3 +2435 2 +2436 3 +2437 2 +2438 2 +2439 2 +2440 2 +2441 2 +2442 2 +2443 2 +2444 2 +2445 2 +2446 2 +2447 2 +2448 2 +2449 2 +2450 2 +2451 3 +2452 2 +2453 2 +2454 3 +2455 2 +2456 2 +2457 2 +2458 2 +2459 2 +2460 2 +2461 2 +2462 2 +2463 2 +2464 2 +2465 2 +2466 3 +2467 2 +2468 2 +2469 2 +2470 2 +2471 2 +2472 2 +2473 2 +2474 2 +2475 2 +2476 1 +2477 2 +2478 2 +2479 2 +2480 2 +2481 2 +2482 3 +2483 2 +2484 2 +2485 2 +2486 3 +2487 2 +2488 2 +2489 2 +2490 2 +2491 2 +2492 2 +2493 2 +2494 2 +2495 2 +2496 2 +2497 2 +2498 2 +2499 2 +2500 2 +2501 1 +2502 2 +2503 2 +2504 2 +2505 3 +2506 2 +2507 2 +2508 2 +2509 2 +2510 2 +2511 2 +2512 2 +2513 2 +2514 2 +2515 2 +2516 2 +2517 2 +2518 3 +2519 2 +2520 2 +2521 2 +2522 3 +2523 2 +2524 2 +2525 2 +2526 2 +2527 2 +2528 2 +2529 2 +2530 2 +2531 2 +2532 2 +2533 2 +2534 2 +2535 2 +2536 2 +2537 2 +2538 2 +2539 2 +2540 2 +2541 2 +2542 2 +2543 3 +2544 2 +2545 2 +2546 2 +2547 2 +2548 2 +2549 2 +2550 2 +2551 2 +2552 2 +2553 2 +2554 2 +2555 2 +2556 2 +2557 2 +2558 2 +2559 2 +2560 2 +2561 2 +2562 2 +2563 2 +2564 2 +2565 2 +2566 2 +2567 2 +2568 2 +2569 2 +2570 3 +2571 2 +2572 2 +2573 2 +2574 2 +2575 2 +2576 3 +2577 3 +2578 2 +2579 2 +2580 2 +2581 2 +2582 2 +2583 3 +2584 2 +2585 2 +2586 2 +2587 2 +2588 2 +2589 2 +2590 2 +2591 2 +2592 2 +2593 2 +2594 2 +2595 2 +2596 2 +2597 2 +2598 2 +2599 2 +2600 2 +2601 2 +2602 2 +2603 2 +2604 2 +2605 2 +2606 2 +2607 2 +2608 2 +2609 2 +2610 2 +2611 2 +2612 2 +2613 2 +2614 2 +2615 2 +2616 2 +2617 2 +2618 3 +2619 3 +2620 2 +2621 2 +2622 2 +2623 2 +2624 2 +2625 2 +2626 2 +2627 2 +2628 3 +2629 2 +2630 3 +2631 2 +2632 2 +2633 2 +2634 2 +2635 2 +2636 2 +2637 2 +2638 2 +2639 2 +2640 2 +2641 2 +2642 2 +2643 2 +2644 2 +2645 2 +2646 2 +2647 2 +2648 2 +2649 2 +2650 2 +2651 2 +2652 3 +2653 2 +2654 3 +2655 2 +2656 2 +2657 2 +2658 2 +2659 2 +2660 2 +2661 3 +2662 2 +2663 3 +2664 0 +2665 2 +2666 3 +2667 2 +2668 2 +2669 2 +2670 2 +2671 2 +2672 2 +2673 3 +2674 2 +2675 2 +2676 2 +2677 2 +2678 2 +2679 2 +2680 2 +2681 2 +2682 2 +2683 2 +2684 2 +2685 2 +2686 2 +2687 0 +2688 3 +2689 2 +2690 2 +2691 2 +2692 2 +2693 2 +2694 2 +2695 2 +2696 2 +2697 0 +2698 3 +2699 2 +2700 2 +2701 2 +2702 0 +2703 2 +2704 2 +2705 2 +2706 2 +2707 2 +2708 2 +2709 2 +2710 3 +2711 2 +2712 2 +2713 2 +2714 2 +2715 2 +2716 2 +2717 3 +2718 3 +2719 2 +2720 2 +2721 2 +2722 2 +2723 2 +2724 2 +2725 2 +2726 2 +2727 2 +2728 3 +2729 2 +2730 2 +2731 2 +2732 2 +2733 2 +2734 2 +2735 2 +2736 2 +2737 2 +2738 3 +2739 2 +2740 2 +2741 2 +2742 2 +2743 2 +2744 3 +2745 3 +2746 2 +2747 2 +2748 2 +2749 2 +2750 2 +2751 0 +2752 2 +2753 2 +2754 2 +2755 2 +2756 2 +2757 2 +2758 2 +2759 2 +2760 2 +2761 2 +2762 2 +2763 2 +2764 2 +2765 2 +2766 2 +2767 2 +2768 3 +2769 2 +2770 2 +2771 2 +2772 2 +2773 2 +2774 2 +2775 2 +2776 2 +2777 2 +2778 2 +2779 2 +2780 3 +2781 2 +2782 2 +2783 2 +2784 2 +2785 2 +2786 2 +2787 2 +2788 2 +2789 2 +2790 2 +2791 2 +2792 2 +2793 2 +2794 3 +2795 2 +2796 0 +2797 2 +2798 2 +2799 2 +2800 2 +2801 2 +2802 2 +2803 2 +2804 2 +2805 2 +2806 0 +2807 2 +2808 2 +2809 2 +2810 2 +2811 2 +2812 3 +2813 2 +2814 3 +2815 2 +2816 2 +2817 2 +2818 3 +2819 2 +2820 2 +2821 3 +2822 2 +2823 2 +2824 2 +2825 2 +2826 2 +2827 2 +2828 2 +2829 2 +2830 2 +2831 2 +2832 2 +2833 2 +2834 2 +2835 2 +2836 2 +2837 2 +2838 2 +2839 2 +2840 2 +2841 2 +2842 2 +2843 2 +2844 2 +2845 2 +2846 2 +2847 2 +2848 2 +2849 2 +2850 3 +2851 3 +2852 3 +2853 2 +2854 3 +2855 0 +2856 3 +2857 3 +2858 3 +2859 3 +2860 3 +2861 3 +2862 3 +2863 3 +2864 3 +2865 3 +2866 3 +2867 3 +2868 3 +2869 3 +2870 3 +2871 3 +2872 3 +2873 3 +2874 3 +2875 3 +2876 3 +2877 3 +2878 3 +2879 3 +2880 3 +2881 2 +2882 3 +2883 3 +2884 0 +2885 3 +2886 3 +2887 2 +2888 3 +2889 1 +2890 3 +2891 3 +2892 3 +2893 3 +2894 3 +2895 3 +2896 3 +2897 3 +2898 3 +2899 3 +2900 3 +2901 3 +2902 3 +2903 3 +2904 3 +2905 3 +2906 3 +2907 3 +2908 3 +2909 3 +2910 3 +2911 2 +2912 3 +2913 3 +2914 3 +2915 3 +2916 2 +2917 3 +2918 3 +2919 3 +2920 3 +2921 3 +2922 3 +2923 3 +2924 3 +2925 3 +2926 3 +2927 3 +2928 3 +2929 2 +2930 3 +2931 3 +2932 3 +2933 3 +2934 3 +2935 3 +2936 3 +2937 3 +2938 3 +2939 3 +2940 3 +2941 3 +2942 3 +2943 2 +2944 3 +2945 3 +2946 3 +2947 3 +2948 3 +2949 3 +2950 3 +2951 3 +2952 3 +2953 3 +2954 3 +2955 3 +2956 3 +2957 3 +2958 3 +2959 3 +2960 3 +2961 2 +2962 3 +2963 3 +2964 3 +2965 3 +2966 3 +2967 3 +2968 3 +2969 3 +2970 3 +2971 3 +2972 3 +2973 3 +2974 3 +2975 3 +2976 3 +2977 3 +2978 3 +2979 3 +2980 3 +2981 3 +2982 3 +2983 3 +2984 3 +2985 3 +2986 3 +2987 3 +2988 3 +2989 3 +2990 3 +2991 3 +2992 2 +2993 3 +2994 3 +2995 3 +2996 3 +2997 3 +2998 3 +2999 3 +3000 3 +3001 3 +3002 2 +3003 3 +3004 2 +3005 3 +3006 3 +3007 3 +3008 3 +3009 3 +3010 3 +3011 3 +3012 3 +3013 3 +3014 3 +3015 3 +3016 3 +3017 3 +3018 3 +3019 3 +3020 3 +3021 3 +3022 3 +3023 3 +3024 3 +3025 3 +3026 3 +3027 3 +3028 2 +3029 2 +3030 3 +3031 3 +3032 3 +3033 3 +3034 2 +3035 3 +3036 3 +3037 3 +3038 2 +3039 3 +3040 3 +3041 3 +3042 3 +3043 3 +3044 3 +3045 3 +3046 3 +3047 3 +3048 3 +3049 3 +3050 3 +3051 3 +3052 3 +3053 3 +3054 3 +3055 3 +3056 3 +3057 3 +3058 3 +3059 3 +3060 3 +3061 3 +3062 3 +3063 3 +3064 3 +3065 3 +3066 3 +3067 3 +3068 3 +3069 3 +3070 3 +3071 3 +3072 3 +3073 3 +3074 3 +3075 3 +3076 3 +3077 2 +3078 3 +3079 3 +3080 3 +3081 3 +3082 3 +3083 3 +3084 3 +3085 3 +3086 3 +3087 3 +3088 3 +3089 3 +3090 3 +3091 2 +3092 3 +3093 3 +3094 3 +3095 3 +3096 3 +3097 3 +3098 3 +3099 3 +3100 3 +3101 3 +3102 3 +3103 3 +3104 3 +3105 3 +3106 2 +3107 3 +3108 3 +3109 3 +3110 3 +3111 3 +3112 3 +3113 3 +3114 3 +3115 3 +3116 3 +3117 3 +3118 3 +3119 3 +3120 3 +3121 3 +3122 3 +3123 3 +3124 3 +3125 3 +3126 3 +3127 3 +3128 3 +3129 3 +3130 3 +3131 3 +3132 3 +3133 3 +3134 2 +3135 3 +3136 3 +3137 3 +3138 3 +3139 3 +3140 3 +3141 3 +3142 3 +3143 3 +3144 3 +3145 3 +3146 3 +3147 2 +3148 3 +3149 3 +3150 3 +3151 3 +3152 3 +3153 3 +3154 3 +3155 3 +3156 3 +3157 3 +3158 3 +3159 3 +3160 3 +3161 3 +3162 3 +3163 3 +3164 3 +3165 3 +3166 3 +3167 0 +3168 3 +3169 3 +3170 3 +3171 3 +3172 3 +3173 2 +3174 3 +3175 3 +3176 3 +3177 3 +3178 3 +3179 0 +3180 3 +3181 3 +3182 2 +3183 0 +3184 3 +3185 3 +3186 3 +3187 3 +3188 3 +3189 3 +3190 3 +3191 3 +3192 3 +3193 0 +3194 3 +3195 3 +3196 3 +3197 3 +3198 3 +3199 3 +3200 3 +3201 3 +3202 3 +3203 2 +3204 3 +3205 3 +3206 3 +3207 2 +3208 3 +3209 3 +3210 3 +3211 3 +3212 3 +3213 3 +3214 3 +3215 3 +3216 3 +3217 3 +3218 3 +3219 3 +3220 3 +3221 3 +3222 3 +3223 3 +3224 3 +3225 2 +3226 3 +3227 3 +3228 3 +3229 3 +3230 3 +3231 3 +3232 3 +3233 3 +3234 3 +3235 3 +3236 3 +3237 3 +3238 0 +3239 3 +3240 3 +3241 3 +3242 3 +3243 3 +3244 3 +3245 3 +3246 3 +3247 3 +3248 3 +3249 3 +3250 3 +3251 3 +3252 3 +3253 3 +3254 3 +3255 3 +3256 3 +3257 3 +3258 3 +3259 2 +3260 3 +3261 3 +3262 3 +3263 3 +3264 3 +3265 3 +3266 3 +3267 3 +3268 2 +3269 3 +3270 3 +3271 3 +3272 2 +3273 3 +3274 3 +3275 3 +3276 3 +3277 3 +3278 3 +3279 3 +3280 3 +3281 3 +3282 3 +3283 3 +3284 3 +3285 3 +3286 3 +3287 3 +3288 3 +3289 3 +3290 3 +3291 3 +3292 3 +3293 3 +3294 3 +3295 3 +3296 3 +3297 3 +3298 2 +3299 3 +3300 3 +3301 3 +3302 3 +3303 3 +3304 3 +3305 3 +3306 3 +3307 3 +3308 3 +3309 3 +3310 3 +3311 3 +3312 3 +3313 3 +3314 3 +3315 3 +3316 3 +3317 3 +3318 3 +3319 3 +3320 3 +3321 3 +3322 3 +3323 3 +3324 1 +3325 3 +3326 3 +3327 3 +3328 1 +3329 3 +3330 3 +3331 3 +3332 3 +3333 3 +3334 1 +3335 3 +3336 3 +3337 3 +3338 3 +3339 3 +3340 3 +3341 3 +3342 3 +3343 3 +3344 3 +3345 3 +3346 3 +3347 3 +3348 3 +3349 3 +3350 3 +3351 3 +3352 3 +3353 3 +3354 3 +3355 3 +3356 3 +3357 3 +3358 2 +3359 3 +3360 3 +3361 3 +3362 3 +3363 3 +3364 3 +3365 3 +3366 3 +3367 3 +3368 3 +3369 3 +3370 3 +3371 3 +3372 3 +3373 3 +3374 3 +3375 3 +3376 3 +3377 3 +3378 3 +3379 3 +3380 3 +3381 3 +3382 3 +3383 3 +3384 3 +3385 3 +3386 3 +3387 3 +3388 3 +3389 3 +3390 3 +3391 3 +3392 3 +3393 3 +3394 3 +3395 3 +3396 3 +3397 3 +3398 3 +3399 3 +3400 2 +3401 3 +3402 3 +3403 3 +3404 3 +3405 3 +3406 3 +3407 3 +3408 3 +3409 3 +3410 3 +3411 3 +3412 3 +3413 3 +3414 3 +3415 3 +3416 3 +3417 3 +3418 3 +3419 3 +3420 3 +3421 3 +3422 3 +3423 3 +3424 3 +3425 3 +3426 3 +3427 3 +3428 2 +3429 3 +3430 3 +3431 3 +3432 3 +3433 3 +3434 3 +3435 3 +3436 3 +3437 3 +3438 2 +3439 3 +3440 3 +3441 3 +3442 3 +3443 3 +3444 3 +3445 3 +3446 2 +3447 2 +3448 3 +3449 3 +3450 3 +3451 3 +3452 3 +3453 3 +3454 3 +3455 3 +3456 3 +3457 3 +3458 3 +3459 3 +3460 3 +3461 3 +3462 3 +3463 3 +3464 3 +3465 3 +3466 3 +3467 3 +3468 3 +3469 3 +3470 2 +3471 3 +3472 3 +3473 3 +3474 3 +3475 3 +3476 3 +3477 3 +3478 3 +3479 3 +3480 3 +3481 2 +3482 3 +3483 3 +3484 3 +3485 3 +3486 3 +3487 3 +3488 3 +3489 3 +3490 3 +3491 3 +3492 2 +3493 3 +3494 3 +3495 3 +3496 3 +3497 2 +3498 3 +3499 3 +3500 0 +3501 3 +3502 3 +3503 3 +3504 3 +3505 3 +3506 3 +3507 3 +3508 3 +3509 3 +3510 3 +3511 3 +3512 3 +3513 3 +3514 3 +3515 3 +3516 3 +3517 3 +3518 3 +3519 2 +3520 3 +3521 3 +3522 3 +3523 3 +3524 3 +3525 3 +3526 3 +3527 3 +3528 3 +3529 3 +3530 3 +3531 3 +3532 3 +3533 3 +3534 3 +3535 3 +3536 3 +3537 3 +3538 3 +3539 3 +3540 3 +3541 3 +3542 3 +3543 2 +3544 3 +3545 2 +3546 3 +3547 3 +3548 3 +3549 3 +3550 3 +3551 3 +3552 3 +3553 3 +3554 3 +3555 3 +3556 3 +3557 3 +3558 3 +3559 3 +3560 3 +3561 3 +3562 3 +3563 3 +3564 3 +3565 3 +3566 3 +3567 3 +3568 3 +3569 3 +3570 3 +3571 3 +3572 3 +3573 3 +3574 3 +3575 3 +3576 3 +3577 3 +3578 3 +3579 3 +3580 3 +3581 3 +3582 3 +3583 3 +3584 3 +3585 3 +3586 3 +3587 3 +3588 3 +3589 3 +3590 3 +3591 3 +3592 3 +3593 3 +3594 3 +3595 3 +3596 3 +3597 3 +3598 3 +3599 3 +3600 3 +3601 3 +3602 3 +3603 3 +3604 3 +3605 3 +3606 3 +3607 3 +3608 3 +3609 3 +3610 2 +3611 3 +3612 3 +3613 3 +3614 3 +3615 3 +3616 3 +3617 3 +3618 3 +3619 3 +3620 3 +3621 3 +3622 3 +3623 3 +3624 3 +3625 3 +3626 3 +3627 3 +3628 3 +3629 3 +3630 3 +3631 2 +3632 3 +3633 3 +3634 3 +3635 3 +3636 3 +3637 3 +3638 3 +3639 3 +3640 3 +3641 3 +3642 3 +3643 3 +3644 3 +3645 3 +3646 3 +3647 3 +3648 2 +3649 3 +3650 3 +3651 3 +3652 3 +3653 3 +3654 3 +3655 3 +3656 3 +3657 3 +3658 3 +3659 3 +3660 3 +3661 3 +3662 3 +3663 3 +3664 3 +3665 3 +3666 3 +3667 3 +3668 3 +3669 3 +3670 3 +3671 3 +3672 3 +3673 2 +3674 3 +3675 3 +3676 3 +3677 3 +3678 3 +3679 3 +3680 3 +3681 3 +3682 3 +3683 3 +3684 3 +3685 3 +3686 3 +3687 3 +3688 3 +3689 3 +3690 3 +3691 3 +3692 2 +3693 3 +3694 3 +3695 3 +3696 3 +3697 3 +3698 3 +3699 3 +3700 3 +3701 0 +3702 3 +3703 3 +3704 3 +3705 3 +3706 3 +3707 0 +3708 3 +3709 3 +3710 3 +3711 3 +3712 3 +3713 3 +3714 3 +3715 3 +3716 3 +3717 3 +3718 3 +3719 3 +3720 3 +3721 2 +3722 3 +3723 3 +3724 3 +3725 2 +3726 3 +3727 3 +3728 3 +3729 3 +3730 3 +3731 3 +3732 3 +3733 3 +3734 3 +3735 3 +3736 3 +3737 3 +3738 3 +3739 3 +3740 3 +3741 3 +3742 3 +3743 3 +3744 3 +3745 3 +3746 3 +3747 3 +3748 3 +3749 3 +3750 3 +3751 3 +3752 3 +3753 3 +3754 3 +3755 3 +3756 2 +3757 3 +3758 0 +3759 3 +3760 3 +3761 3 +3762 3 +3763 2 +3764 3 +3765 2 +3766 3 +3767 3 +3768 3 +3769 3 +3770 3 +3771 3 +3772 3 +3773 3 +3774 3 +3775 3 +3776 3 +3777 3 +3778 3 +3779 3 +3780 3 +3781 3 +3782 3 +3783 3 +3784 3 +3785 3 +3786 3 +3787 3 +3788 2 +3789 3 +3790 3 +3791 3 +3792 2 +3793 2 +3794 3 +3795 3 +3796 3 +3797 3 +3798 3 +3799 3 diff --git a/out/t5_results/README.md b/out/t5_results/README.md new file mode 100644 index 0000000..e0f5354 --- /dev/null +++ b/out/t5_results/README.md @@ -0,0 +1,53 @@ +--- +tags: +- generated_from_trainer +model-index: +- name: t5_results + results: [] +--- + + + +# t5_results + +This model is a fine-tuned version of [out/t5](https://huggingface.co/out/t5) on an unknown dataset. +It achieves the following results on the evaluation set: +- eval_loss: 1.2139 +- eval_accuracy: 0.4675 +- eval_runtime: 40.5651 +- eval_samples_per_second: 49.303 +- eval_steps_per_second: 6.163 +- step: 0 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- training_steps: 2500 + +### Framework versions + +- Transformers 4.26.1 +- Pytorch 1.13.1+cu117 +- Datasets 2.9.0 +- Tokenizers 0.13.2 diff --git a/out/t5_results/all_results.json b/out/t5_results/all_results.json new file mode 100644 index 0000000..e900025 --- /dev/null +++ b/out/t5_results/all_results.json @@ -0,0 +1,8 @@ +{ + "eval_accuracy": 0.4675000011920929, + "eval_loss": 1.213880181312561, + "eval_runtime": 40.5651, + "eval_samples": 2000, + "eval_samples_per_second": 49.303, + "eval_steps_per_second": 6.163 +} \ No newline at end of file diff --git a/out/t5_results/eval_results.json b/out/t5_results/eval_results.json new file mode 100644 index 0000000..e900025 --- /dev/null +++ b/out/t5_results/eval_results.json @@ -0,0 +1,8 @@ +{ + "eval_accuracy": 0.4675000011920929, + "eval_loss": 1.213880181312561, + "eval_runtime": 40.5651, + "eval_samples": 2000, + "eval_samples_per_second": 49.303, + "eval_steps_per_second": 6.163 +} \ No newline at end of file diff --git a/out/t5_results/predict_results_None.txt b/out/t5_results/predict_results_None.txt new file mode 100644 index 0000000..01cd7c7 --- /dev/null +++ b/out/t5_results/predict_results_None.txt @@ -0,0 +1,3801 @@ +index prediction +0 0 +1 2 +2 0 +3 0 +4 0 +5 1 +6 0 +7 1 +8 1 +9 2 +10 0 +11 1 +12 0 +13 0 +14 0 +15 0 +16 0 +17 0 +18 0 +19 0 +20 0 +21 0 +22 0 +23 0 +24 2 +25 3 +26 1 +27 2 +28 3 +29 0 +30 1 +31 2 +32 0 +33 0 +34 0 +35 0 +36 0 +37 1 +38 0 +39 0 +40 0 +41 0 +42 3 +43 0 +44 2 +45 0 +46 0 +47 0 +48 0 +49 1 +50 1 +51 1 +52 0 +53 0 +54 1 +55 0 +56 0 +57 0 +58 3 +59 0 +60 0 +61 1 +62 0 +63 0 +64 0 +65 0 +66 0 +67 0 +68 0 +69 0 +70 0 +71 0 +72 0 +73 3 +74 0 +75 0 +76 0 +77 0 +78 0 +79 0 +80 0 +81 1 +82 2 +83 2 +84 2 +85 2 +86 1 +87 0 +88 0 +89 1 +90 2 +91 0 +92 0 +93 0 +94 0 +95 0 +96 0 +97 1 +98 3 +99 1 +100 0 +101 1 +102 0 +103 0 +104 1 +105 1 +106 0 +107 1 +108 0 +109 1 +110 2 +111 3 +112 0 +113 0 +114 2 +115 1 +116 3 +117 1 +118 1 +119 0 +120 0 +121 0 +122 0 +123 2 +124 1 +125 0 +126 0 +127 1 +128 0 +129 2 +130 1 +131 0 +132 2 +133 0 +134 0 +135 0 +136 2 +137 0 +138 1 +139 1 +140 0 +141 0 +142 0 +143 0 +144 0 +145 0 +146 0 +147 0 +148 0 +149 0 +150 2 +151 2 +152 0 +153 3 +154 2 +155 0 +156 3 +157 0 +158 0 +159 1 +160 0 +161 1 +162 1 +163 0 +164 1 +165 0 +166 0 +167 0 +168 0 +169 0 +170 0 +171 2 +172 0 +173 0 +174 0 +175 0 +176 0 +177 1 +178 1 +179 0 +180 0 +181 0 +182 0 +183 2 +184 0 +185 0 +186 0 +187 0 +188 1 +189 0 +190 0 +191 0 +192 0 +193 1 +194 1 +195 1 +196 1 +197 0 +198 1 +199 0 +200 0 +201 0 +202 0 +203 0 +204 0 +205 0 +206 0 +207 0 +208 0 +209 1 +210 0 +211 0 +212 0 +213 2 +214 0 +215 0 +216 3 +217 0 +218 1 +219 0 +220 0 +221 0 +222 0 +223 0 +224 2 +225 0 +226 2 +227 3 +228 0 +229 0 +230 0 +231 0 +232 2 +233 1 +234 0 +235 0 +236 0 +237 0 +238 1 +239 3 +240 0 +241 0 +242 0 +243 0 +244 0 +245 1 +246 0 +247 3 +248 0 +249 0 +250 1 +251 1 +252 0 +253 0 +254 1 +255 0 +256 0 +257 1 +258 1 +259 0 +260 1 +261 1 +262 1 +263 0 +264 0 +265 1 +266 0 +267 2 +268 0 +269 0 +270 1 +271 0 +272 1 +273 0 +274 1 +275 1 +276 0 +277 0 +278 0 +279 2 +280 1 +281 0 +282 0 +283 1 +284 0 +285 1 +286 0 +287 0 +288 0 +289 2 +290 0 +291 0 +292 1 +293 0 +294 0 +295 0 +296 1 +297 1 +298 0 +299 0 +300 1 +301 0 +302 1 +303 0 +304 0 +305 0 +306 1 +307 0 +308 0 +309 3 +310 0 +311 0 +312 0 +313 2 +314 0 +315 0 +316 1 +317 0 +318 3 +319 1 +320 0 +321 0 +322 0 +323 1 +324 0 +325 1 +326 0 +327 0 +328 3 +329 0 +330 1 +331 0 +332 1 +333 0 +334 1 +335 2 +336 0 +337 0 +338 2 +339 1 +340 2 +341 0 +342 1 +343 0 +344 1 +345 1 +346 0 +347 0 +348 0 +349 0 +350 1 +351 0 +352 0 +353 2 +354 0 +355 0 +356 0 +357 2 +358 0 +359 1 +360 0 +361 0 +362 0 +363 0 +364 1 +365 0 +366 0 +367 3 +368 0 +369 0 +370 0 +371 1 +372 0 +373 0 +374 0 +375 0 +376 0 +377 0 +378 0 +379 3 +380 0 +381 0 +382 0 +383 0 +384 0 +385 0 +386 0 +387 1 +388 0 +389 1 +390 1 +391 0 +392 1 +393 0 +394 1 +395 0 +396 1 +397 2 +398 0 +399 0 +400 0 +401 0 +402 3 +403 0 +404 1 +405 3 +406 0 +407 2 +408 1 +409 1 +410 0 +411 0 +412 2 +413 0 +414 0 +415 0 +416 0 +417 0 +418 1 +419 0 +420 1 +421 0 +422 1 +423 0 +424 1 +425 0 +426 0 +427 0 +428 0 +429 0 +430 0 +431 0 +432 0 +433 1 +434 0 +435 1 +436 0 +437 2 +438 2 +439 0 +440 1 +441 1 +442 0 +443 2 +444 0 +445 0 +446 0 +447 0 +448 0 +449 0 +450 0 +451 1 +452 0 +453 0 +454 3 +455 1 +456 0 +457 0 +458 0 +459 1 +460 0 +461 0 +462 0 +463 0 +464 0 +465 0 +466 0 +467 0 +468 0 +469 1 +470 1 +471 1 +472 0 +473 0 +474 2 +475 1 +476 2 +477 0 +478 0 +479 0 +480 0 +481 0 +482 1 +483 0 +484 3 +485 2 +486 1 +487 0 +488 1 +489 0 +490 0 +491 0 +492 0 +493 0 +494 1 +495 0 +496 1 +497 1 +498 0 +499 3 +500 0 +501 1 +502 0 +503 0 +504 0 +505 1 +506 0 +507 1 +508 0 +509 3 +510 1 +511 0 +512 0 +513 0 +514 0 +515 0 +516 1 +517 0 +518 0 +519 0 +520 0 +521 0 +522 1 +523 0 +524 0 +525 0 +526 1 +527 0 +528 0 +529 0 +530 1 +531 2 +532 0 +533 0 +534 1 +535 1 +536 1 +537 0 +538 1 +539 1 +540 0 +541 1 +542 0 +543 2 +544 0 +545 1 +546 1 +547 1 +548 0 +549 2 +550 1 +551 0 +552 0 +553 1 +554 0 +555 0 +556 0 +557 3 +558 0 +559 1 +560 0 +561 0 +562 0 +563 0 +564 3 +565 2 +566 0 +567 1 +568 0 +569 2 +570 2 +571 0 +572 1 +573 0 +574 1 +575 2 +576 2 +577 0 +578 1 +579 0 +580 0 +581 2 +582 1 +583 1 +584 1 +585 0 +586 0 +587 0 +588 0 +589 1 +590 1 +591 1 +592 0 +593 3 +594 2 +595 0 +596 0 +597 0 +598 1 +599 0 +600 0 +601 0 +602 1 +603 0 +604 1 +605 2 +606 2 +607 0 +608 0 +609 2 +610 0 +611 0 +612 1 +613 0 +614 0 +615 0 +616 3 +617 0 +618 0 +619 3 +620 0 +621 0 +622 0 +623 0 +624 1 +625 0 +626 0 +627 0 +628 0 +629 1 +630 0 +631 1 +632 1 +633 0 +634 0 +635 0 +636 0 +637 0 +638 1 +639 1 +640 3 +641 0 +642 0 +643 0 +644 0 +645 1 +646 2 +647 0 +648 1 +649 1 +650 1 +651 1 +652 0 +653 0 +654 3 +655 0 +656 0 +657 0 +658 0 +659 0 +660 0 +661 1 +662 0 +663 0 +664 1 +665 1 +666 1 +667 0 +668 0 +669 1 +670 0 +671 0 +672 1 +673 2 +674 0 +675 1 +676 1 +677 3 +678 0 +679 0 +680 0 +681 2 +682 0 +683 0 +684 2 +685 0 +686 1 +687 0 +688 0 +689 0 +690 0 +691 2 +692 0 +693 3 +694 1 +695 0 +696 0 +697 1 +698 1 +699 0 +700 0 +701 0 +702 0 +703 2 +704 0 +705 0 +706 3 +707 2 +708 0 +709 0 +710 1 +711 0 +712 0 +713 0 +714 0 +715 0 +716 0 +717 1 +718 0 +719 1 +720 0 +721 1 +722 0 +723 1 +724 1 +725 0 +726 2 +727 0 +728 1 +729 1 +730 2 +731 0 +732 0 +733 3 +734 0 +735 2 +736 3 +737 1 +738 0 +739 0 +740 0 +741 1 +742 1 +743 0 +744 0 +745 1 +746 0 +747 0 +748 0 +749 3 +750 0 +751 0 +752 0 +753 1 +754 0 +755 1 +756 1 +757 2 +758 1 +759 0 +760 2 +761 1 +762 0 +763 0 +764 0 +765 0 +766 0 +767 0 +768 3 +769 0 +770 0 +771 0 +772 0 +773 0 +774 1 +775 3 +776 3 +777 1 +778 1 +779 0 +780 0 +781 0 +782 2 +783 0 +784 0 +785 3 +786 2 +787 0 +788 0 +789 0 +790 0 +791 0 +792 1 +793 1 +794 0 +795 0 +796 1 +797 0 +798 2 +799 0 +800 1 +801 0 +802 0 +803 3 +804 3 +805 1 +806 0 +807 2 +808 1 +809 1 +810 3 +811 0 +812 1 +813 1 +814 0 +815 0 +816 2 +817 2 +818 1 +819 1 +820 0 +821 2 +822 1 +823 1 +824 3 +825 0 +826 0 +827 0 +828 1 +829 0 +830 2 +831 0 +832 3 +833 1 +834 3 +835 0 +836 0 +837 0 +838 1 +839 0 +840 1 +841 0 +842 0 +843 0 +844 0 +845 0 +846 0 +847 2 +848 1 +849 0 +850 0 +851 0 +852 1 +853 2 +854 0 +855 0 +856 0 +857 0 +858 1 +859 0 +860 0 +861 1 +862 0 +863 1 +864 0 +865 1 +866 0 +867 0 +868 0 +869 0 +870 0 +871 0 +872 1 +873 1 +874 1 +875 1 +876 1 +877 0 +878 0 +879 0 +880 0 +881 0 +882 1 +883 0 +884 0 +885 0 +886 0 +887 0 +888 0 +889 1 +890 2 +891 0 +892 0 +893 1 +894 0 +895 1 +896 0 +897 1 +898 0 +899 0 +900 0 +901 0 +902 0 +903 0 +904 0 +905 0 +906 0 +907 0 +908 0 +909 0 +910 3 +911 2 +912 0 +913 2 +914 0 +915 0 +916 0 +917 1 +918 0 +919 1 +920 2 +921 1 +922 0 +923 0 +924 1 +925 0 +926 0 +927 0 +928 0 +929 0 +930 0 +931 1 +932 0 +933 0 +934 2 +935 0 +936 1 +937 0 +938 0 +939 0 +940 0 +941 0 +942 0 +943 0 +944 0 +945 2 +946 0 +947 2 +948 0 +949 1 +950 1 +951 1 +952 1 +953 2 +954 1 +955 1 +956 2 +957 1 +958 0 +959 3 +960 0 +961 1 +962 1 +963 1 +964 0 +965 1 +966 3 +967 1 +968 1 +969 0 +970 0 +971 1 +972 0 +973 1 +974 1 +975 1 +976 1 +977 1 +978 0 +979 1 +980 0 +981 1 +982 1 +983 1 +984 3 +985 1 +986 1 +987 1 +988 2 +989 1 +990 1 +991 2 +992 0 +993 1 +994 0 +995 1 +996 1 +997 3 +998 0 +999 1 +1000 1 +1001 1 +1002 1 +1003 0 +1004 1 +1005 1 +1006 1 +1007 1 +1008 0 +1009 1 +1010 1 +1011 1 +1012 1 +1013 0 +1014 1 +1015 1 +1016 1 +1017 3 +1018 1 +1019 1 +1020 0 +1021 1 +1022 2 +1023 0 +1024 0 +1025 1 +1026 1 +1027 1 +1028 1 +1029 2 +1030 0 +1031 0 +1032 0 +1033 1 +1034 0 +1035 1 +1036 1 +1037 1 +1038 0 +1039 0 +1040 2 +1041 2 +1042 1 +1043 1 +1044 1 +1045 0 +1046 1 +1047 1 +1048 2 +1049 1 +1050 1 +1051 0 +1052 1 +1053 0 +1054 0 +1055 1 +1056 1 +1057 2 +1058 1 +1059 1 +1060 1 +1061 1 +1062 0 +1063 1 +1064 1 +1065 2 +1066 0 +1067 1 +1068 1 +1069 0 +1070 0 +1071 1 +1072 1 +1073 1 +1074 1 +1075 0 +1076 1 +1077 1 +1078 1 +1079 2 +1080 1 +1081 1 +1082 1 +1083 1 +1084 3 +1085 1 +1086 0 +1087 1 +1088 1 +1089 2 +1090 1 +1091 1 +1092 0 +1093 1 +1094 1 +1095 1 +1096 1 +1097 3 +1098 3 +1099 1 +1100 1 +1101 1 +1102 3 +1103 1 +1104 0 +1105 1 +1106 3 +1107 1 +1108 3 +1109 0 +1110 1 +1111 0 +1112 0 +1113 1 +1114 0 +1115 0 +1116 1 +1117 0 +1118 2 +1119 1 +1120 1 +1121 2 +1122 0 +1123 0 +1124 1 +1125 1 +1126 1 +1127 1 +1128 3 +1129 1 +1130 0 +1131 0 +1132 1 +1133 1 +1134 0 +1135 2 +1136 1 +1137 0 +1138 1 +1139 0 +1140 1 +1141 1 +1142 1 +1143 0 +1144 1 +1145 1 +1146 1 +1147 0 +1148 1 +1149 3 +1150 3 +1151 2 +1152 1 +1153 0 +1154 0 +1155 1 +1156 0 +1157 1 +1158 1 +1159 0 +1160 1 +1161 0 +1162 3 +1163 1 +1164 1 +1165 1 +1166 0 +1167 0 +1168 1 +1169 1 +1170 1 +1171 0 +1172 1 +1173 1 +1174 1 +1175 3 +1176 1 +1177 1 +1178 3 +1179 1 +1180 1 +1181 2 +1182 1 +1183 1 +1184 0 +1185 0 +1186 2 +1187 2 +1188 1 +1189 1 +1190 2 +1191 0 +1192 1 +1193 1 +1194 1 +1195 1 +1196 1 +1197 1 +1198 0 +1199 3 +1200 1 +1201 1 +1202 1 +1203 0 +1204 2 +1205 1 +1206 0 +1207 1 +1208 1 +1209 0 +1210 1 +1211 1 +1212 0 +1213 1 +1214 2 +1215 1 +1216 0 +1217 3 +1218 1 +1219 1 +1220 0 +1221 2 +1222 0 +1223 3 +1224 0 +1225 1 +1226 1 +1227 3 +1228 0 +1229 1 +1230 1 +1231 1 +1232 1 +1233 0 +1234 1 +1235 1 +1236 0 +1237 0 +1238 2 +1239 0 +1240 2 +1241 1 +1242 2 +1243 0 +1244 1 +1245 1 +1246 1 +1247 1 +1248 1 +1249 1 +1250 0 +1251 1 +1252 1 +1253 3 +1254 1 +1255 0 +1256 1 +1257 1 +1258 2 +1259 1 +1260 1 +1261 2 +1262 1 +1263 1 +1264 3 +1265 2 +1266 1 +1267 0 +1268 0 +1269 0 +1270 1 +1271 1 +1272 0 +1273 1 +1274 0 +1275 1 +1276 1 +1277 0 +1278 2 +1279 2 +1280 3 +1281 2 +1282 1 +1283 1 +1284 1 +1285 1 +1286 3 +1287 1 +1288 0 +1289 2 +1290 1 +1291 1 +1292 0 +1293 1 +1294 1 +1295 1 +1296 1 +1297 3 +1298 1 +1299 0 +1300 0 +1301 1 +1302 1 +1303 3 +1304 1 +1305 0 +1306 0 +1307 3 +1308 1 +1309 1 +1310 1 +1311 0 +1312 1 +1313 0 +1314 2 +1315 2 +1316 1 +1317 1 +1318 1 +1319 1 +1320 0 +1321 1 +1322 2 +1323 1 +1324 0 +1325 1 +1326 1 +1327 1 +1328 1 +1329 1 +1330 1 +1331 1 +1332 0 +1333 2 +1334 1 +1335 2 +1336 0 +1337 1 +1338 1 +1339 3 +1340 3 +1341 3 +1342 1 +1343 0 +1344 0 +1345 1 +1346 0 +1347 1 +1348 2 +1349 1 +1350 3 +1351 3 +1352 1 +1353 1 +1354 2 +1355 1 +1356 1 +1357 1 +1358 0 +1359 1 +1360 0 +1361 1 +1362 1 +1363 1 +1364 3 +1365 1 +1366 1 +1367 1 +1368 1 +1369 2 +1370 3 +1371 1 +1372 2 +1373 2 +1374 2 +1375 0 +1376 0 +1377 1 +1378 3 +1379 0 +1380 1 +1381 2 +1382 1 +1383 1 +1384 0 +1385 1 +1386 1 +1387 0 +1388 0 +1389 1 +1390 1 +1391 1 +1392 3 +1393 2 +1394 3 +1395 1 +1396 1 +1397 2 +1398 3 +1399 1 +1400 0 +1401 0 +1402 1 +1403 1 +1404 0 +1405 1 +1406 0 +1407 1 +1408 2 +1409 1 +1410 1 +1411 1 +1412 1 +1413 1 +1414 1 +1415 2 +1416 1 +1417 1 +1418 1 +1419 1 +1420 2 +1421 1 +1422 0 +1423 0 +1424 2 +1425 1 +1426 1 +1427 1 +1428 1 +1429 1 +1430 1 +1431 1 +1432 3 +1433 0 +1434 0 +1435 1 +1436 1 +1437 0 +1438 0 +1439 3 +1440 0 +1441 0 +1442 0 +1443 3 +1444 1 +1445 1 +1446 1 +1447 1 +1448 1 +1449 1 +1450 1 +1451 3 +1452 1 +1453 2 +1454 1 +1455 1 +1456 1 +1457 1 +1458 1 +1459 1 +1460 1 +1461 3 +1462 2 +1463 0 +1464 0 +1465 1 +1466 0 +1467 0 +1468 2 +1469 0 +1470 2 +1471 1 +1472 0 +1473 2 +1474 1 +1475 1 +1476 1 +1477 1 +1478 1 +1479 1 +1480 1 +1481 1 +1482 1 +1483 1 +1484 2 +1485 0 +1486 2 +1487 0 +1488 0 +1489 1 +1490 1 +1491 1 +1492 0 +1493 1 +1494 3 +1495 2 +1496 0 +1497 3 +1498 0 +1499 1 +1500 3 +1501 0 +1502 0 +1503 1 +1504 1 +1505 1 +1506 1 +1507 1 +1508 1 +1509 0 +1510 1 +1511 1 +1512 1 +1513 0 +1514 1 +1515 2 +1516 1 +1517 2 +1518 1 +1519 1 +1520 2 +1521 0 +1522 1 +1523 1 +1524 0 +1525 1 +1526 1 +1527 0 +1528 1 +1529 1 +1530 1 +1531 1 +1532 1 +1533 1 +1534 1 +1535 1 +1536 1 +1537 1 +1538 1 +1539 0 +1540 0 +1541 0 +1542 0 +1543 3 +1544 1 +1545 0 +1546 0 +1547 1 +1548 1 +1549 1 +1550 1 +1551 2 +1552 0 +1553 0 +1554 1 +1555 3 +1556 2 +1557 1 +1558 0 +1559 1 +1560 2 +1561 1 +1562 1 +1563 2 +1564 1 +1565 1 +1566 1 +1567 3 +1568 1 +1569 1 +1570 1 +1571 1 +1572 1 +1573 1 +1574 1 +1575 1 +1576 1 +1577 0 +1578 0 +1579 1 +1580 1 +1581 1 +1582 1 +1583 1 +1584 2 +1585 1 +1586 0 +1587 1 +1588 1 +1589 1 +1590 3 +1591 2 +1592 0 +1593 1 +1594 1 +1595 1 +1596 1 +1597 1 +1598 1 +1599 1 +1600 1 +1601 0 +1602 2 +1603 1 +1604 0 +1605 1 +1606 1 +1607 1 +1608 1 +1609 1 +1610 1 +1611 1 +1612 1 +1613 1 +1614 1 +1615 1 +1616 0 +1617 1 +1618 0 +1619 1 +1620 1 +1621 1 +1622 1 +1623 1 +1624 0 +1625 1 +1626 1 +1627 1 +1628 1 +1629 2 +1630 1 +1631 1 +1632 2 +1633 1 +1634 3 +1635 2 +1636 1 +1637 1 +1638 1 +1639 1 +1640 1 +1641 2 +1642 1 +1643 2 +1644 0 +1645 1 +1646 1 +1647 1 +1648 2 +1649 2 +1650 1 +1651 1 +1652 1 +1653 0 +1654 0 +1655 1 +1656 1 +1657 1 +1658 1 +1659 1 +1660 1 +1661 1 +1662 0 +1663 1 +1664 0 +1665 1 +1666 1 +1667 1 +1668 0 +1669 0 +1670 0 +1671 1 +1672 1 +1673 1 +1674 0 +1675 0 +1676 1 +1677 1 +1678 1 +1679 1 +1680 1 +1681 1 +1682 0 +1683 2 +1684 1 +1685 1 +1686 0 +1687 1 +1688 2 +1689 0 +1690 0 +1691 1 +1692 0 +1693 1 +1694 0 +1695 1 +1696 1 +1697 0 +1698 1 +1699 1 +1700 0 +1701 1 +1702 0 +1703 0 +1704 1 +1705 1 +1706 1 +1707 1 +1708 2 +1709 1 +1710 3 +1711 1 +1712 1 +1713 1 +1714 1 +1715 1 +1716 2 +1717 0 +1718 0 +1719 1 +1720 1 +1721 1 +1722 1 +1723 1 +1724 1 +1725 2 +1726 0 +1727 3 +1728 1 +1729 1 +1730 1 +1731 1 +1732 1 +1733 0 +1734 1 +1735 1 +1736 1 +1737 0 +1738 1 +1739 1 +1740 0 +1741 0 +1742 1 +1743 0 +1744 1 +1745 1 +1746 0 +1747 1 +1748 0 +1749 1 +1750 1 +1751 1 +1752 0 +1753 3 +1754 1 +1755 1 +1756 1 +1757 1 +1758 1 +1759 3 +1760 1 +1761 0 +1762 1 +1763 1 +1764 0 +1765 1 +1766 0 +1767 0 +1768 2 +1769 0 +1770 1 +1771 3 +1772 3 +1773 2 +1774 0 +1775 1 +1776 1 +1777 1 +1778 2 +1779 2 +1780 0 +1781 0 +1782 1 +1783 1 +1784 1 +1785 1 +1786 1 +1787 3 +1788 0 +1789 1 +1790 1 +1791 3 +1792 1 +1793 1 +1794 1 +1795 1 +1796 1 +1797 1 +1798 1 +1799 1 +1800 1 +1801 1 +1802 2 +1803 1 +1804 1 +1805 1 +1806 1 +1807 3 +1808 1 +1809 1 +1810 2 +1811 0 +1812 1 +1813 2 +1814 0 +1815 1 +1816 1 +1817 2 +1818 2 +1819 0 +1820 0 +1821 2 +1822 0 +1823 0 +1824 1 +1825 1 +1826 1 +1827 1 +1828 1 +1829 1 +1830 0 +1831 1 +1832 1 +1833 2 +1834 2 +1835 1 +1836 1 +1837 1 +1838 1 +1839 1 +1840 1 +1841 1 +1842 0 +1843 1 +1844 1 +1845 0 +1846 2 +1847 1 +1848 1 +1849 2 +1850 2 +1851 1 +1852 1 +1853 0 +1854 0 +1855 1 +1856 1 +1857 0 +1858 1 +1859 1 +1860 1 +1861 1 +1862 1 +1863 0 +1864 3 +1865 3 +1866 1 +1867 1 +1868 1 +1869 2 +1870 1 +1871 0 +1872 1 +1873 0 +1874 2 +1875 2 +1876 1 +1877 2 +1878 2 +1879 0 +1880 1 +1881 0 +1882 1 +1883 1 +1884 1 +1885 1 +1886 1 +1887 1 +1888 0 +1889 2 +1890 0 +1891 0 +1892 1 +1893 2 +1894 1 +1895 1 +1896 0 +1897 1 +1898 1 +1899 0 +1900 3 +1901 2 +1902 0 +1903 2 +1904 0 +1905 0 +1906 2 +1907 2 +1908 0 +1909 3 +1910 0 +1911 1 +1912 3 +1913 1 +1914 2 +1915 2 +1916 0 +1917 0 +1918 2 +1919 1 +1920 2 +1921 0 +1922 1 +1923 1 +1924 0 +1925 2 +1926 0 +1927 3 +1928 1 +1929 1 +1930 1 +1931 3 +1932 0 +1933 2 +1934 2 +1935 0 +1936 2 +1937 1 +1938 2 +1939 2 +1940 3 +1941 2 +1942 1 +1943 0 +1944 1 +1945 1 +1946 0 +1947 1 +1948 1 +1949 0 +1950 0 +1951 2 +1952 0 +1953 2 +1954 0 +1955 1 +1956 0 +1957 0 +1958 1 +1959 0 +1960 1 +1961 1 +1962 3 +1963 2 +1964 3 +1965 2 +1966 1 +1967 1 +1968 0 +1969 3 +1970 1 +1971 1 +1972 1 +1973 0 +1974 1 +1975 3 +1976 1 +1977 0 +1978 3 +1979 0 +1980 1 +1981 1 +1982 2 +1983 2 +1984 2 +1985 0 +1986 2 +1987 2 +1988 0 +1989 1 +1990 1 +1991 1 +1992 3 +1993 1 +1994 0 +1995 0 +1996 0 +1997 2 +1998 2 +1999 3 +2000 1 +2001 1 +2002 1 +2003 1 +2004 2 +2005 3 +2006 2 +2007 1 +2008 1 +2009 1 +2010 1 +2011 2 +2012 2 +2013 0 +2014 1 +2015 0 +2016 1 +2017 2 +2018 1 +2019 2 +2020 1 +2021 1 +2022 1 +2023 0 +2024 3 +2025 1 +2026 2 +2027 0 +2028 3 +2029 0 +2030 1 +2031 2 +2032 3 +2033 0 +2034 0 +2035 1 +2036 0 +2037 3 +2038 1 +2039 0 +2040 1 +2041 2 +2042 0 +2043 1 +2044 3 +2045 0 +2046 2 +2047 1 +2048 3 +2049 1 +2050 3 +2051 2 +2052 3 +2053 1 +2054 1 +2055 1 +2056 2 +2057 3 +2058 3 +2059 1 +2060 1 +2061 0 +2062 1 +2063 2 +2064 0 +2065 0 +2066 2 +2067 1 +2068 2 +2069 0 +2070 2 +2071 2 +2072 1 +2073 2 +2074 2 +2075 0 +2076 0 +2077 1 +2078 0 +2079 3 +2080 3 +2081 1 +2082 1 +2083 1 +2084 0 +2085 1 +2086 2 +2087 3 +2088 1 +2089 2 +2090 0 +2091 3 +2092 1 +2093 2 +2094 0 +2095 2 +2096 3 +2097 2 +2098 1 +2099 2 +2100 1 +2101 1 +2102 1 +2103 3 +2104 3 +2105 0 +2106 2 +2107 2 +2108 2 +2109 2 +2110 0 +2111 1 +2112 1 +2113 1 +2114 1 +2115 0 +2116 2 +2117 2 +2118 0 +2119 1 +2120 3 +2121 3 +2122 2 +2123 2 +2124 3 +2125 0 +2126 1 +2127 1 +2128 3 +2129 2 +2130 0 +2131 1 +2132 3 +2133 0 +2134 3 +2135 3 +2136 3 +2137 1 +2138 1 +2139 1 +2140 1 +2141 0 +2142 0 +2143 2 +2144 3 +2145 1 +2146 3 +2147 0 +2148 3 +2149 0 +2150 2 +2151 1 +2152 2 +2153 2 +2154 3 +2155 1 +2156 2 +2157 1 +2158 2 +2159 1 +2160 1 +2161 2 +2162 1 +2163 3 +2164 2 +2165 2 +2166 2 +2167 1 +2168 1 +2169 2 +2170 3 +2171 2 +2172 3 +2173 0 +2174 2 +2175 1 +2176 0 +2177 3 +2178 1 +2179 3 +2180 3 +2181 3 +2182 0 +2183 1 +2184 3 +2185 2 +2186 2 +2187 2 +2188 3 +2189 3 +2190 1 +2191 1 +2192 2 +2193 0 +2194 2 +2195 1 +2196 1 +2197 3 +2198 2 +2199 2 +2200 2 +2201 0 +2202 1 +2203 2 +2204 0 +2205 3 +2206 2 +2207 1 +2208 3 +2209 0 +2210 3 +2211 1 +2212 1 +2213 2 +2214 2 +2215 3 +2216 1 +2217 1 +2218 0 +2219 1 +2220 0 +2221 0 +2222 0 +2223 0 +2224 0 +2225 1 +2226 1 +2227 2 +2228 3 +2229 1 +2230 2 +2231 1 +2232 1 +2233 0 +2234 3 +2235 0 +2236 3 +2237 0 +2238 2 +2239 2 +2240 3 +2241 2 +2242 2 +2243 3 +2244 0 +2245 3 +2246 2 +2247 2 +2248 3 +2249 3 +2250 0 +2251 1 +2252 1 +2253 3 +2254 2 +2255 1 +2256 0 +2257 3 +2258 2 +2259 2 +2260 3 +2261 3 +2262 2 +2263 1 +2264 2 +2265 1 +2266 0 +2267 3 +2268 2 +2269 1 +2270 2 +2271 1 +2272 1 +2273 3 +2274 0 +2275 3 +2276 0 +2277 2 +2278 3 +2279 3 +2280 2 +2281 0 +2282 2 +2283 2 +2284 1 +2285 1 +2286 0 +2287 1 +2288 1 +2289 3 +2290 0 +2291 1 +2292 2 +2293 2 +2294 2 +2295 2 +2296 3 +2297 2 +2298 3 +2299 2 +2300 1 +2301 0 +2302 2 +2303 3 +2304 1 +2305 0 +2306 1 +2307 1 +2308 1 +2309 2 +2310 2 +2311 2 +2312 2 +2313 1 +2314 3 +2315 2 +2316 2 +2317 3 +2318 3 +2319 0 +2320 1 +2321 2 +2322 0 +2323 2 +2324 0 +2325 1 +2326 0 +2327 0 +2328 2 +2329 2 +2330 1 +2331 0 +2332 3 +2333 1 +2334 0 +2335 2 +2336 2 +2337 0 +2338 0 +2339 3 +2340 0 +2341 1 +2342 2 +2343 1 +2344 1 +2345 0 +2346 0 +2347 1 +2348 2 +2349 3 +2350 1 +2351 2 +2352 2 +2353 2 +2354 3 +2355 1 +2356 3 +2357 3 +2358 2 +2359 3 +2360 3 +2361 1 +2362 3 +2363 2 +2364 3 +2365 3 +2366 1 +2367 3 +2368 0 +2369 1 +2370 3 +2371 1 +2372 0 +2373 0 +2374 2 +2375 3 +2376 1 +2377 2 +2378 1 +2379 0 +2380 3 +2381 1 +2382 1 +2383 1 +2384 3 +2385 3 +2386 3 +2387 1 +2388 1 +2389 0 +2390 0 +2391 3 +2392 2 +2393 2 +2394 3 +2395 1 +2396 0 +2397 2 +2398 2 +2399 3 +2400 3 +2401 3 +2402 1 +2403 1 +2404 1 +2405 0 +2406 0 +2407 2 +2408 3 +2409 1 +2410 2 +2411 1 +2412 0 +2413 0 +2414 1 +2415 0 +2416 0 +2417 2 +2418 0 +2419 1 +2420 0 +2421 3 +2422 2 +2423 0 +2424 2 +2425 3 +2426 2 +2427 0 +2428 3 +2429 2 +2430 3 +2431 1 +2432 1 +2433 0 +2434 3 +2435 0 +2436 0 +2437 3 +2438 3 +2439 1 +2440 3 +2441 3 +2442 3 +2443 1 +2444 1 +2445 0 +2446 1 +2447 3 +2448 0 +2449 3 +2450 1 +2451 3 +2452 3 +2453 2 +2454 1 +2455 3 +2456 1 +2457 2 +2458 2 +2459 2 +2460 2 +2461 1 +2462 1 +2463 2 +2464 1 +2465 0 +2466 1 +2467 1 +2468 2 +2469 0 +2470 3 +2471 0 +2472 0 +2473 3 +2474 2 +2475 1 +2476 1 +2477 2 +2478 3 +2479 2 +2480 1 +2481 3 +2482 3 +2483 1 +2484 1 +2485 0 +2486 3 +2487 1 +2488 2 +2489 0 +2490 0 +2491 0 +2492 0 +2493 0 +2494 0 +2495 2 +2496 3 +2497 1 +2498 1 +2499 1 +2500 1 +2501 2 +2502 3 +2503 2 +2504 1 +2505 2 +2506 0 +2507 0 +2508 1 +2509 0 +2510 2 +2511 1 +2512 1 +2513 1 +2514 1 +2515 1 +2516 1 +2517 0 +2518 3 +2519 0 +2520 1 +2521 1 +2522 0 +2523 2 +2524 0 +2525 0 +2526 0 +2527 2 +2528 2 +2529 2 +2530 2 +2531 0 +2532 2 +2533 1 +2534 2 +2535 0 +2536 2 +2537 1 +2538 3 +2539 0 +2540 2 +2541 0 +2542 0 +2543 1 +2544 0 +2545 1 +2546 1 +2547 2 +2548 1 +2549 2 +2550 1 +2551 1 +2552 1 +2553 0 +2554 2 +2555 1 +2556 3 +2557 2 +2558 0 +2559 1 +2560 1 +2561 2 +2562 0 +2563 2 +2564 2 +2565 2 +2566 2 +2567 2 +2568 1 +2569 2 +2570 3 +2571 2 +2572 0 +2573 2 +2574 3 +2575 1 +2576 2 +2577 1 +2578 2 +2579 0 +2580 2 +2581 1 +2582 0 +2583 3 +2584 0 +2585 0 +2586 1 +2587 0 +2588 3 +2589 3 +2590 1 +2591 3 +2592 2 +2593 2 +2594 0 +2595 1 +2596 0 +2597 3 +2598 0 +2599 1 +2600 0 +2601 1 +2602 1 +2603 2 +2604 1 +2605 2 +2606 1 +2607 1 +2608 1 +2609 1 +2610 3 +2611 0 +2612 3 +2613 0 +2614 2 +2615 0 +2616 0 +2617 2 +2618 1 +2619 1 +2620 3 +2621 0 +2622 3 +2623 2 +2624 1 +2625 1 +2626 0 +2627 3 +2628 1 +2629 1 +2630 3 +2631 1 +2632 1 +2633 1 +2634 0 +2635 0 +2636 3 +2637 1 +2638 0 +2639 1 +2640 1 +2641 0 +2642 3 +2643 3 +2644 1 +2645 2 +2646 3 +2647 1 +2648 2 +2649 0 +2650 1 +2651 0 +2652 3 +2653 1 +2654 1 +2655 2 +2656 1 +2657 1 +2658 0 +2659 0 +2660 2 +2661 3 +2662 1 +2663 3 +2664 0 +2665 2 +2666 3 +2667 0 +2668 1 +2669 1 +2670 1 +2671 2 +2672 2 +2673 1 +2674 0 +2675 3 +2676 3 +2677 2 +2678 3 +2679 2 +2680 2 +2681 1 +2682 2 +2683 0 +2684 0 +2685 1 +2686 1 +2687 1 +2688 3 +2689 2 +2690 1 +2691 3 +2692 2 +2693 0 +2694 2 +2695 2 +2696 2 +2697 1 +2698 1 +2699 1 +2700 1 +2701 3 +2702 1 +2703 0 +2704 2 +2705 0 +2706 2 +2707 0 +2708 3 +2709 0 +2710 3 +2711 2 +2712 0 +2713 1 +2714 1 +2715 0 +2716 2 +2717 1 +2718 0 +2719 2 +2720 2 +2721 3 +2722 1 +2723 2 +2724 0 +2725 1 +2726 0 +2727 0 +2728 3 +2729 0 +2730 2 +2731 3 +2732 3 +2733 2 +2734 2 +2735 2 +2736 1 +2737 1 +2738 3 +2739 2 +2740 3 +2741 1 +2742 3 +2743 0 +2744 0 +2745 1 +2746 2 +2747 3 +2748 2 +2749 3 +2750 2 +2751 0 +2752 1 +2753 1 +2754 1 +2755 1 +2756 3 +2757 3 +2758 0 +2759 0 +2760 0 +2761 1 +2762 0 +2763 0 +2764 0 +2765 0 +2766 0 +2767 0 +2768 3 +2769 2 +2770 3 +2771 3 +2772 1 +2773 3 +2774 1 +2775 2 +2776 1 +2777 3 +2778 2 +2779 2 +2780 1 +2781 2 +2782 2 +2783 2 +2784 2 +2785 3 +2786 0 +2787 1 +2788 0 +2789 0 +2790 3 +2791 2 +2792 3 +2793 3 +2794 0 +2795 0 +2796 0 +2797 3 +2798 1 +2799 3 +2800 1 +2801 2 +2802 0 +2803 2 +2804 0 +2805 3 +2806 0 +2807 2 +2808 0 +2809 0 +2810 0 +2811 2 +2812 0 +2813 0 +2814 1 +2815 1 +2816 2 +2817 3 +2818 3 +2819 2 +2820 2 +2821 2 +2822 2 +2823 1 +2824 0 +2825 1 +2826 1 +2827 1 +2828 0 +2829 1 +2830 3 +2831 1 +2832 2 +2833 3 +2834 3 +2835 2 +2836 1 +2837 3 +2838 0 +2839 0 +2840 3 +2841 0 +2842 0 +2843 1 +2844 2 +2845 0 +2846 1 +2847 0 +2848 2 +2849 0 +2850 3 +2851 3 +2852 3 +2853 1 +2854 3 +2855 0 +2856 0 +2857 2 +2858 3 +2859 1 +2860 3 +2861 3 +2862 2 +2863 3 +2864 1 +2865 3 +2866 1 +2867 3 +2868 0 +2869 1 +2870 3 +2871 3 +2872 3 +2873 3 +2874 2 +2875 3 +2876 3 +2877 0 +2878 3 +2879 3 +2880 3 +2881 3 +2882 1 +2883 3 +2884 1 +2885 3 +2886 1 +2887 3 +2888 3 +2889 2 +2890 3 +2891 1 +2892 2 +2893 3 +2894 1 +2895 1 +2896 3 +2897 2 +2898 1 +2899 3 +2900 3 +2901 1 +2902 0 +2903 0 +2904 2 +2905 3 +2906 1 +2907 1 +2908 1 +2909 3 +2910 3 +2911 3 +2912 3 +2913 0 +2914 0 +2915 2 +2916 1 +2917 3 +2918 1 +2919 3 +2920 2 +2921 3 +2922 3 +2923 3 +2924 3 +2925 3 +2926 3 +2927 2 +2928 0 +2929 2 +2930 0 +2931 3 +2932 3 +2933 3 +2934 2 +2935 1 +2936 2 +2937 1 +2938 3 +2939 3 +2940 1 +2941 0 +2942 3 +2943 3 +2944 1 +2945 3 +2946 2 +2947 3 +2948 2 +2949 2 +2950 1 +2951 3 +2952 2 +2953 1 +2954 2 +2955 3 +2956 3 +2957 2 +2958 3 +2959 2 +2960 2 +2961 0 +2962 1 +2963 2 +2964 1 +2965 3 +2966 0 +2967 0 +2968 3 +2969 2 +2970 1 +2971 1 +2972 2 +2973 3 +2974 3 +2975 3 +2976 0 +2977 0 +2978 3 +2979 0 +2980 1 +2981 3 +2982 0 +2983 1 +2984 1 +2985 3 +2986 3 +2987 3 +2988 3 +2989 3 +2990 3 +2991 3 +2992 3 +2993 1 +2994 1 +2995 3 +2996 1 +2997 0 +2998 0 +2999 3 +3000 0 +3001 3 +3002 2 +3003 2 +3004 1 +3005 1 +3006 0 +3007 3 +3008 3 +3009 3 +3010 1 +3011 3 +3012 3 +3013 2 +3014 1 +3015 3 +3016 1 +3017 3 +3018 3 +3019 3 +3020 1 +3021 3 +3022 3 +3023 3 +3024 3 +3025 3 +3026 1 +3027 3 +3028 1 +3029 1 +3030 3 +3031 3 +3032 3 +3033 3 +3034 2 +3035 0 +3036 1 +3037 1 +3038 3 +3039 1 +3040 3 +3041 1 +3042 1 +3043 3 +3044 1 +3045 2 +3046 3 +3047 1 +3048 1 +3049 0 +3050 3 +3051 3 +3052 0 +3053 0 +3054 3 +3055 3 +3056 0 +3057 3 +3058 3 +3059 1 +3060 1 +3061 1 +3062 3 +3063 3 +3064 2 +3065 3 +3066 3 +3067 1 +3068 2 +3069 3 +3070 3 +3071 0 +3072 0 +3073 0 +3074 3 +3075 3 +3076 1 +3077 1 +3078 3 +3079 3 +3080 3 +3081 3 +3082 1 +3083 3 +3084 3 +3085 3 +3086 2 +3087 1 +3088 3 +3089 1 +3090 0 +3091 3 +3092 1 +3093 3 +3094 2 +3095 3 +3096 1 +3097 0 +3098 3 +3099 1 +3100 1 +3101 1 +3102 1 +3103 2 +3104 1 +3105 1 +3106 0 +3107 3 +3108 3 +3109 1 +3110 3 +3111 0 +3112 2 +3113 1 +3114 0 +3115 1 +3116 3 +3117 1 +3118 3 +3119 2 +3120 2 +3121 1 +3122 0 +3123 1 +3124 2 +3125 1 +3126 0 +3127 0 +3128 1 +3129 3 +3130 1 +3131 0 +3132 3 +3133 0 +3134 0 +3135 1 +3136 1 +3137 3 +3138 3 +3139 0 +3140 3 +3141 2 +3142 1 +3143 1 +3144 3 +3145 3 +3146 3 +3147 3 +3148 1 +3149 3 +3150 3 +3151 3 +3152 3 +3153 3 +3154 3 +3155 3 +3156 3 +3157 3 +3158 3 +3159 3 +3160 3 +3161 3 +3162 3 +3163 1 +3164 1 +3165 2 +3166 3 +3167 1 +3168 0 +3169 3 +3170 0 +3171 2 +3172 3 +3173 1 +3174 1 +3175 1 +3176 3 +3177 3 +3178 3 +3179 3 +3180 0 +3181 3 +3182 0 +3183 1 +3184 1 +3185 3 +3186 1 +3187 0 +3188 3 +3189 3 +3190 0 +3191 1 +3192 1 +3193 2 +3194 3 +3195 3 +3196 1 +3197 1 +3198 0 +3199 1 +3200 3 +3201 3 +3202 3 +3203 2 +3204 1 +3205 3 +3206 1 +3207 1 +3208 1 +3209 1 +3210 2 +3211 1 +3212 0 +3213 3 +3214 3 +3215 3 +3216 1 +3217 3 +3218 3 +3219 1 +3220 3 +3221 3 +3222 1 +3223 3 +3224 0 +3225 3 +3226 2 +3227 3 +3228 2 +3229 2 +3230 2 +3231 0 +3232 3 +3233 1 +3234 3 +3235 1 +3236 1 +3237 3 +3238 0 +3239 3 +3240 3 +3241 3 +3242 3 +3243 0 +3244 1 +3245 3 +3246 1 +3247 3 +3248 1 +3249 3 +3250 3 +3251 1 +3252 2 +3253 1 +3254 3 +3255 2 +3256 3 +3257 3 +3258 3 +3259 2 +3260 3 +3261 3 +3262 1 +3263 1 +3264 2 +3265 0 +3266 2 +3267 0 +3268 2 +3269 0 +3270 3 +3271 2 +3272 2 +3273 0 +3274 3 +3275 3 +3276 3 +3277 3 +3278 1 +3279 3 +3280 1 +3281 3 +3282 3 +3283 3 +3284 3 +3285 1 +3286 1 +3287 3 +3288 3 +3289 3 +3290 0 +3291 3 +3292 0 +3293 1 +3294 3 +3295 1 +3296 3 +3297 2 +3298 1 +3299 1 +3300 3 +3301 1 +3302 0 +3303 1 +3304 3 +3305 2 +3306 2 +3307 0 +3308 1 +3309 1 +3310 1 +3311 3 +3312 3 +3313 3 +3314 1 +3315 0 +3316 3 +3317 2 +3318 3 +3319 1 +3320 0 +3321 1 +3322 0 +3323 3 +3324 2 +3325 3 +3326 1 +3327 3 +3328 1 +3329 3 +3330 0 +3331 3 +3332 1 +3333 3 +3334 1 +3335 1 +3336 3 +3337 2 +3338 3 +3339 1 +3340 3 +3341 3 +3342 3 +3343 3 +3344 1 +3345 3 +3346 1 +3347 1 +3348 3 +3349 3 +3350 3 +3351 3 +3352 3 +3353 3 +3354 3 +3355 3 +3356 3 +3357 1 +3358 1 +3359 1 +3360 3 +3361 1 +3362 1 +3363 0 +3364 1 +3365 3 +3366 2 +3367 3 +3368 3 +3369 2 +3370 1 +3371 0 +3372 3 +3373 1 +3374 3 +3375 3 +3376 0 +3377 2 +3378 0 +3379 1 +3380 3 +3381 0 +3382 3 +3383 3 +3384 3 +3385 3 +3386 3 +3387 1 +3388 3 +3389 0 +3390 3 +3391 3 +3392 3 +3393 3 +3394 0 +3395 3 +3396 0 +3397 3 +3398 3 +3399 2 +3400 2 +3401 3 +3402 0 +3403 3 +3404 3 +3405 3 +3406 3 +3407 3 +3408 3 +3409 3 +3410 3 +3411 3 +3412 1 +3413 1 +3414 1 +3415 3 +3416 2 +3417 1 +3418 2 +3419 3 +3420 3 +3421 1 +3422 2 +3423 1 +3424 2 +3425 2 +3426 1 +3427 3 +3428 3 +3429 3 +3430 3 +3431 2 +3432 3 +3433 3 +3434 3 +3435 1 +3436 0 +3437 0 +3438 1 +3439 1 +3440 3 +3441 3 +3442 3 +3443 2 +3444 1 +3445 3 +3446 1 +3447 1 +3448 1 +3449 3 +3450 2 +3451 2 +3452 0 +3453 3 +3454 1 +3455 1 +3456 1 +3457 2 +3458 3 +3459 3 +3460 3 +3461 3 +3462 0 +3463 3 +3464 1 +3465 1 +3466 3 +3467 1 +3468 3 +3469 1 +3470 2 +3471 3 +3472 3 +3473 3 +3474 1 +3475 3 +3476 1 +3477 1 +3478 2 +3479 0 +3480 3 +3481 1 +3482 2 +3483 2 +3484 0 +3485 3 +3486 3 +3487 2 +3488 1 +3489 0 +3490 1 +3491 3 +3492 1 +3493 2 +3494 1 +3495 1 +3496 1 +3497 1 +3498 3 +3499 3 +3500 1 +3501 1 +3502 3 +3503 1 +3504 2 +3505 3 +3506 2 +3507 3 +3508 1 +3509 1 +3510 0 +3511 3 +3512 3 +3513 1 +3514 3 +3515 0 +3516 2 +3517 1 +3518 3 +3519 1 +3520 0 +3521 3 +3522 1 +3523 3 +3524 3 +3525 2 +3526 1 +3527 2 +3528 1 +3529 3 +3530 2 +3531 2 +3532 2 +3533 2 +3534 2 +3535 3 +3536 2 +3537 3 +3538 1 +3539 2 +3540 3 +3541 3 +3542 2 +3543 1 +3544 3 +3545 1 +3546 1 +3547 3 +3548 3 +3549 3 +3550 3 +3551 2 +3552 3 +3553 3 +3554 1 +3555 3 +3556 3 +3557 3 +3558 3 +3559 2 +3560 1 +3561 1 +3562 0 +3563 1 +3564 1 +3565 2 +3566 0 +3567 2 +3568 2 +3569 0 +3570 0 +3571 3 +3572 1 +3573 3 +3574 3 +3575 2 +3576 3 +3577 1 +3578 0 +3579 2 +3580 3 +3581 0 +3582 1 +3583 1 +3584 3 +3585 0 +3586 0 +3587 0 +3588 0 +3589 3 +3590 1 +3591 0 +3592 1 +3593 2 +3594 3 +3595 2 +3596 3 +3597 3 +3598 0 +3599 3 +3600 2 +3601 1 +3602 0 +3603 3 +3604 2 +3605 3 +3606 2 +3607 2 +3608 3 +3609 0 +3610 1 +3611 2 +3612 3 +3613 2 +3614 2 +3615 0 +3616 3 +3617 1 +3618 1 +3619 0 +3620 3 +3621 0 +3622 1 +3623 1 +3624 1 +3625 0 +3626 3 +3627 1 +3628 3 +3629 2 +3630 3 +3631 0 +3632 2 +3633 2 +3634 3 +3635 0 +3636 0 +3637 1 +3638 3 +3639 3 +3640 0 +3641 2 +3642 0 +3643 3 +3644 3 +3645 1 +3646 0 +3647 2 +3648 2 +3649 2 +3650 1 +3651 2 +3652 3 +3653 0 +3654 0 +3655 3 +3656 1 +3657 3 +3658 1 +3659 2 +3660 3 +3661 1 +3662 3 +3663 3 +3664 2 +3665 0 +3666 3 +3667 2 +3668 2 +3669 3 +3670 3 +3671 1 +3672 3 +3673 3 +3674 1 +3675 3 +3676 1 +3677 0 +3678 1 +3679 3 +3680 3 +3681 2 +3682 3 +3683 1 +3684 3 +3685 0 +3686 0 +3687 0 +3688 2 +3689 3 +3690 3 +3691 3 +3692 3 +3693 1 +3694 3 +3695 1 +3696 3 +3697 3 +3698 1 +3699 3 +3700 3 +3701 1 +3702 3 +3703 3 +3704 3 +3705 1 +3706 3 +3707 1 +3708 1 +3709 1 +3710 0 +3711 1 +3712 3 +3713 3 +3714 3 +3715 1 +3716 3 +3717 3 +3718 3 +3719 3 +3720 0 +3721 2 +3722 0 +3723 3 +3724 1 +3725 1 +3726 1 +3727 3 +3728 1 +3729 2 +3730 3 +3731 3 +3732 3 +3733 3 +3734 3 +3735 1 +3736 1 +3737 3 +3738 1 +3739 0 +3740 1 +3741 3 +3742 1 +3743 3 +3744 2 +3745 1 +3746 3 +3747 3 +3748 3 +3749 3 +3750 3 +3751 3 +3752 1 +3753 1 +3754 2 +3755 0 +3756 3 +3757 3 +3758 3 +3759 3 +3760 3 +3761 3 +3762 1 +3763 1 +3764 3 +3765 3 +3766 1 +3767 3 +3768 3 +3769 1 +3770 1 +3771 3 +3772 3 +3773 1 +3774 2 +3775 3 +3776 3 +3777 1 +3778 1 +3779 3 +3780 2 +3781 2 +3782 1 +3783 3 +3784 1 +3785 3 +3786 0 +3787 3 +3788 3 +3789 3 +3790 2 +3791 3 +3792 3 +3793 1 +3794 3 +3795 3 +3796 0 +3797 3 +3798 1 +3799 3 diff --git a/preparer_ag_nenws.py b/preparer_ag_nenws.py new file mode 100644 index 0000000..ff77eef --- /dev/null +++ b/preparer_ag_nenws.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +import json +import logging +from pathlib import Path +from typing import List, Dict + +from datasets import load_dataset + +logger = logging.getLogger(__name__) + +MAP_LABEL_TRANSLATION = { + 0: 'world', + 1: 'sport', + 2: 'business', + 3: 'scitech' +} + + +def save_as_translations(original_save_path: Path, data_to_save: List[Dict]) -> None: + file_name = 's2s-' + original_save_path.name + file_path = original_save_path.parent / file_name + + print(f'Saving into: {file_path}') + with open(file_path, 'wt') as f_write: + for data_line in data_to_save: + label = data_line['label'] + new_label = MAP_LABEL_TRANSLATION[label] + data_line['label'] = new_label + data_line_str = json.dumps(data_line) + f_write.write(f'{data_line_str}\n') + + +def main() -> None: + loaded_data = load_dataset('ag_news') + logger.info(f'Loaded dataset ag_news: {loaded_data}') + + save_path = Path('data/') + save_train_path = save_path / 'train.json' + save_valid_path = save_path / 'valid.json' + save_test_path = save_path / 'test.json' + if not save_path.exists(): + save_path.mkdir() + + # Read train and validation data + data_train, data_valid, data_test = [], [], [] + for source_data, dataset, max_size in [ + (loaded_data['train'], data_train, None), + (loaded_data['test'], data_valid, None) + ]: + for i, data in enumerate(source_data): + if max_size is not None and i >= max_size: + break + data_line = { + 'label': int(data['label']), + 'text': data['text'], + } + dataset.append(data_line) + logger.info(f'Train: {len(data_train):6d}') + + # Split validation set into 2 classes for validation and test splitting + world, sport, business, scitech = [], [], [], [] + + for data in data_valid: + label = data['label'] + if label == 0: + world.append(data) + elif label == 1: + sport.append(data) + elif label == 2: + business.append(data) + elif label == 3: + scitech.append(data) + + logger.info(f'World: {len(world):6d}') + logger.info(f'Sport: {len(sport):6d}') + logger.info(f'Business: {len(business):6d}') + logger.info(f'Scitech: {len(scitech):6d}') + + print(world) + print(f'World: {len(world)}') + print(f'Sport: {len(sport):6d}') + print(f'Business: {len(business):6d}') + print(f'Scitech: {len(scitech):6d}') + + + # Split 2 classes into validation and test + size_half_world = int(len(world) / 2) + size_half_sport = int(len(sport) / 2) + size_half_business = int(len(business) / 2) + size_half_scitech = int(len(scitech) / 2) + logger.info(f'Valid: {len(data_valid):6d}') + logger.info(f'Test : {len(data_test):6d}') + + data_valid = world[:size_half_world] + sport[:size_half_sport] + business[:size_half_business] + scitech[:size_half_scitech] + data_test = world[size_half_world:] + sport[size_half_sport:] + business[size_half_business:] + scitech[size_half_scitech:] + + # Save files + for file_path, data_to_save in [ + (save_train_path, data_train), + (save_valid_path, data_valid), + (save_test_path, data_test) + ]: + print(f'Saving into: {file_path}') + with open(file_path, 'wt') as f_write: + for data_line in data_to_save: + data_line_str = json.dumps(data_line) + f_write.write(f'{data_line_str}\n') + + save_as_translations(file_path, data_to_save) + + +if __name__ == '__main__': + main() diff --git a/projektV2.ipynb b/projektV2.ipynb new file mode 100644 index 0000000..7450a9b --- /dev/null +++ b/projektV2.ipynb @@ -0,0 +1,7335 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install transformers torch datasets evaluate scikit-learn sacremoses sentencepiece ipywidgets > /dev/null" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Roberta" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modifications" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Custom classification head with bigger hidden size\n", + "- Changed activation function to GELU" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from torch import nn\n", + "from transformers import RobertaForSequenceClassification, RobertaModel\n", + "\n", + "\n", + "# Simple version #\n", + "\n", + "class RobertaClassificationHeadCustomSimple(nn.Module):\n", + " \"\"\"Head for sentence-level classification tasks.\"\"\"\n", + "\n", + " def __init__(self, config):\n", + " super().__init__()\n", + " hidden_size = config.hidden_size\n", + " self.dense_1 = nn.Linear(hidden_size, 4 * hidden_size)\n", + " self.dense_2 = nn.Linear(4 * hidden_size, hidden_size)\n", + " classifier_dropout = (\n", + " config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob\n", + " )\n", + " self.dropout = nn.Dropout(classifier_dropout)\n", + " self.out_proj = nn.Linear(hidden_size, config.num_labels)\n", + " self.activation = nn.GELU()\n", + "\n", + " def forward(self, features, **kwargs):\n", + " x = features[:, 0, :] # take token (equiv. to [CLS])\n", + "\n", + " x = self.dense_1(x)\n", + " x = self.activation(x)\n", + " x = self.dropout(x)\n", + "\n", + " x = self.dense_2(x)\n", + " x = self.activation(x)\n", + " x = self.dropout(x)\n", + "\n", + " x = self.out_proj(x)\n", + " return x\n", + "\n", + "\n", + "class RobertaForSequenceClassificationCustomSimple(RobertaForSequenceClassification):\n", + " _keys_to_ignore_on_load_missing = [r\"position_ids\"]\n", + "\n", + " def __init__(self, config):\n", + " super().__init__(config)\n", + " self.num_labels = config.num_labels\n", + " self.config = config\n", + "\n", + " self.roberta = RobertaModel(config, add_pooling_layer=False)\n", + " self.classifier = RobertaClassificationHeadCustomSimple(config)\n", + "\n", + " # Initialize weights and apply final processing\n", + " self.post_init()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomSimple: ['roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']\n", + "- This IS expected if you are initializing RobertaForSequenceClassificationCustomSimple from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomSimple from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of RobertaForSequenceClassificationCustomSimple were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense_1.weight', 'classifier.out_proj.bias', 'classifier.dense_2.bias', 'classifier.dense_2.weight', 'classifier.out_proj.weight', 'classifier.dense_1.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + }, + { + "data": { + "text/plain": [ + "RobertaForSequenceClassificationCustomSimple(\n", + " (roberta): RobertaModel(\n", + " (embeddings): RobertaEmbeddings(\n", + " (word_embeddings): Embedding(50265, 768, padding_idx=1)\n", + " (position_embeddings): Embedding(514, 768, padding_idx=1)\n", + " (token_type_embeddings): Embedding(1, 768)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (encoder): RobertaEncoder(\n", + " (layer): ModuleList(\n", + " (0): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (1): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (2): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (3): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (4): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (5): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (6): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (7): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (8): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (9): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (10): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (11): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (classifier): RobertaClassificationHeadCustomSimple(\n", + " (dense_1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (dense_2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (out_proj): Linear(in_features=768, out_features=2, bias=True)\n", + " (activation): GELU(approximate='none')\n", + " )\n", + ")" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "RobertaForSequenceClassificationCustomSimple.from_pretrained(\"roberta-base\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "02/16/2023 15:21:14 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "02/16/2023 15:21:14 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "auto_find_batch_size=False,\n", + "bf16=False,\n", + "bf16_full_eval=False,\n", + "data_seed=None,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_bucket_cap_mb=None,\n", + "ddp_find_unused_parameters=None,\n", + "ddp_timeout=1800,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=True,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_delay=0,\n", + "eval_steps=250,\n", + "evaluation_strategy=steps,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "fsdp=[],\n", + "fsdp_min_num_params=0,\n", + "fsdp_transformer_layer_cls_to_wrap=None,\n", + "full_determinism=False,\n", + "gradient_accumulation_steps=1,\n", + "gradient_checkpointing=False,\n", + "greater_is_better=True,\n", + "group_by_length=False,\n", + "half_precision_backend=auto,\n", + "hub_model_id=None,\n", + "hub_private_repo=False,\n", + "hub_strategy=every_save,\n", + "hub_token=,\n", + "ignore_data_skip=False,\n", + "include_inputs_for_metrics=False,\n", + "jit_mode_eval=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=2e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=True,\n", + "local_rank=-1,\n", + "log_level=passive,\n", + "log_level_replica=passive,\n", + "log_on_each_node=True,\n", + "logging_dir=out/roberta/runs/Feb16_15-21-13_DESKTOP-R7JO8BQ,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=100,\n", + "logging_strategy=steps,\n", + "lr_scheduler_type=linear,\n", + "max_grad_norm=1.0,\n", + "max_steps=2500,\n", + "metric_for_best_model=accuracy,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=1.0,\n", + "optim=adamw_hf,\n", + "optim_args=None,\n", + "output_dir=out/roberta,\n", + "overwrite_output_dir=False,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "ray_scope=last,\n", + "remove_unused_columns=True,\n", + "report_to=[],\n", + "resume_from_checkpoint=None,\n", + "run_name=out/roberta,\n", + "save_on_each_node=False,\n", + "save_steps=250,\n", + "save_strategy=steps,\n", + "save_total_limit=5,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tf32=None,\n", + "torch_compile=False,\n", + "torch_compile_backend=None,\n", + "torch_compile_mode=None,\n", + "torchdynamo=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_ipex=False,\n", + "use_legacy_prediction_loop=False,\n", + "use_mps_device=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + "xpu_backend=None,\n", + ")\n", + "02/16/2023 15:21:14 - INFO - __main__ - Checkpoint detected, resuming training at out/roberta/checkpoint-2500. To avoid this behavior, change the `--output_dir` or add `--overwrite_output_dir` to train from scratch.\n", + "02/16/2023 15:21:14 - INFO - __main__ - load a local file for train: data/train.json\n", + "02/16/2023 15:21:14 - INFO - __main__ - load a local file for validation: data/valid.json\n", + "02/16/2023 15:21:14 - INFO - __main__ - load a local file for test: data/test.json\n", + "02/16/2023 15:21:14 - WARNING - datasets.builder - Using custom data configuration default-f6e8039906850c57\n", + "02/16/2023 15:21:14 - INFO - datasets.info - Loading Dataset Infos from /home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/datasets/packaged_modules/json\n", + "02/16/2023 15:21:14 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "02/16/2023 15:21:14 - INFO - datasets.info - Loading Dataset info from .cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "02/16/2023 15:21:14 - WARNING - datasets.builder - Found cached dataset json (/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", + "02/16/2023 15:21:14 - INFO - datasets.info - Loading Dataset info from /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "100%|█████████████████████████████████████████████| 3/3 [00:00<00:00, 48.00it/s]\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:21:15,174 >> loading configuration file config.json from cache at .cache_training/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:21:15,175 >> Model config RobertaConfig {\n", + " \"_name_or_path\": \"roberta-base\",\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"classifier_dropout\": null,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": \"LABEL_0\",\n", + " \"1\": \"LABEL_1\",\n", + " \"2\": \"LABEL_2\",\n", + " \"3\": \"LABEL_3\"\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"LABEL_0\": 0,\n", + " \"LABEL_1\": 1,\n", + " \"LABEL_2\": 2,\n", + " \"LABEL_3\": 3\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_auto.py:458] 2023-02-16 15:21:15,654 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:21:16,123 >> loading configuration file config.json from cache at .cache_training/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:21:16,123 >> Model config RobertaConfig {\n", + " \"_name_or_path\": \"roberta-base\",\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"classifier_dropout\": null,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:21:17,045 >> loading file vocab.json from cache at .cache_training/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:21:17,045 >> loading file merges.txt from cache at .cache_training/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:21:17,045 >> loading file tokenizer.json from cache at .cache_training/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:21:17,045 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:21:17,045 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:21:17,045 >> loading file tokenizer_config.json from cache at None\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:21:17,045 >> loading configuration file config.json from cache at .cache_training/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:21:17,046 >> Model config RobertaConfig {\n", + " \"_name_or_path\": \"roberta-base\",\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"classifier_dropout\": null,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "02/16/2023 15:21:17 - INFO - __main__ - Using hidden states in model: False\n", + "-------------------------------------------------------- Using hidden: False\n", + "02/16/2023 15:21:17 - INFO - __main__ - Using implementation from class: RobertaForSequenceClassificationCustomSimple\n", + "[INFO|modeling_utils.py:2275] 2023-02-16 15:21:17,101 >> loading weights file pytorch_model.bin from cache at .cache_training/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n", + "[WARNING|modeling_utils.py:2847] 2023-02-16 15:21:22,965 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomSimple: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.bias']\n", + "- This IS expected if you are initializing RobertaForSequenceClassificationCustomSimple from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomSimple from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:2859] 2023-02-16 15:21:22,965 >> Some weights of RobertaForSequenceClassificationCustomSimple were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense_1.bias', 'classifier.dense_2.weight', 'classifier.out_proj.weight', 'classifier.dense_2.bias', 'classifier.out_proj.bias', 'classifier.dense_1.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "RobertaForSequenceClassificationCustomSimple(\n", + " (roberta): RobertaModel(\n", + " (embeddings): RobertaEmbeddings(\n", + " (word_embeddings): Embedding(50265, 768, padding_idx=1)\n", + " (position_embeddings): Embedding(514, 768, padding_idx=1)\n", + " (token_type_embeddings): Embedding(1, 768)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (encoder): RobertaEncoder(\n", + " (layer): ModuleList(\n", + " (0): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (1): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (2): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (3): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (4): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (5): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (6): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (7): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (8): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (9): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (10): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (11): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (classifier): RobertaClassificationHeadCustomSimple(\n", + " (dense_1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (dense_2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (out_proj): Linear(in_features=768, out_features=4, bias=True)\n", + " (activation): GELU(approximate='none')\n", + " )\n", + ")\n", + "02/16/2023 15:21:22 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-204a6dc6fcae3352.arrow\n", + "Running tokenizer on dataset: 0%| | 0/4 [00:00> max_steps is given, it will override any value given in num_train_epochs\n", + "[INFO|trainer.py:1972] 2023-02-16 15:21:27,576 >> Loading model from out/roberta/checkpoint-2500.\n", + "[INFO|trainer.py:710] 2023-02-16 15:21:29,498 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassificationCustomSimple.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomSimple.forward`, you can safely ignore this message.\n", + "/home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " warnings.warn(\n", + "[INFO|trainer.py:1650] 2023-02-16 15:21:31,949 >> ***** Running training *****\n", + "[INFO|trainer.py:1651] 2023-02-16 15:21:31,950 >> Num examples = 120000\n", + "[INFO|trainer.py:1652] 2023-02-16 15:21:31,950 >> Num Epochs = 1\n", + "[INFO|trainer.py:1653] 2023-02-16 15:21:31,950 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1654] 2023-02-16 15:21:31,950 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1655] 2023-02-16 15:21:31,950 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1656] 2023-02-16 15:21:31,950 >> Total optimization steps = 2500\n", + "[INFO|trainer.py:1657] 2023-02-16 15:21:31,951 >> Number of trainable parameters = 128780548\n", + "[INFO|trainer.py:1679] 2023-02-16 15:21:31,951 >> Continuing training from checkpoint, will skip to saved global_step\n", + "[INFO|trainer.py:1680] 2023-02-16 15:21:31,951 >> Continuing training from epoch 0\n", + "[INFO|trainer.py:1681] 2023-02-16 15:21:31,951 >> Continuing training from global step 2500\n", + "[INFO|trainer.py:1683] 2023-02-16 15:21:31,951 >> Will skip the first 0 epochs then the first 2500 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.\n", + "Skipping the first batches: 0%| | 0/2500 [00:00> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "[INFO|trainer.py:2025] 2023-02-16 15:21:36,738 >> Loading best model from out/roberta/checkpoint-2500 (score: 0.9229999780654907).\n", + "\n", + "\u001b[A{'train_runtime': 5.7972, 'train_samples_per_second': 3449.95, 'train_steps_per_second': 431.244, 'train_loss': 3.2215512862971954e-06, 'epoch': 0.17}\n", + "\n", + "2501it [00:05, 431.57it/s]\u001b[A\n", + "[INFO|trainer.py:2709] 2023-02-16 15:21:37,750 >> Saving model checkpoint to out/roberta\n", + "[INFO|configuration_utils.py:453] 2023-02-16 15:21:37,751 >> Configuration saved in out/roberta/config.json\n", + "[INFO|modeling_utils.py:1704] 2023-02-16 15:21:38,719 >> Model weights saved in out/roberta/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:2160] 2023-02-16 15:21:38,742 >> tokenizer config file saved in out/roberta/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2167] 2023-02-16 15:21:38,743 >> Special tokens file saved in out/roberta/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 0.17\n", + " train_loss = 0.0\n", + " train_runtime = 0:00:05.79\n", + " train_samples = 120000\n", + " train_samples_per_second = 3449.95\n", + " train_steps_per_second = 431.244\n", + "02/16/2023 15:21:38 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:710] 2023-02-16 15:21:38,862 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomSimple.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomSimple.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 15:21:38,863 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2966] 2023-02-16 15:21:38,863 >> Num examples = 2000\n", + "[INFO|trainer.py:2969] 2023-02-16 15:21:38,863 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 250/250 [00:16<00:00, 14.75it/s]\n", + "***** eval metrics *****\n", + " epoch = 0.17\n", + " eval_accuracy = 0.923\n", + " eval_loss = 0.296\n", + " eval_runtime = 0:00:17.06\n", + " eval_samples = 2000\n", + " eval_samples_per_second = 117.168\n", + " eval_steps_per_second = 14.646\n", + "02/16/2023 15:21:55 - INFO - __main__ - *** Predict ***\n", + "[INFO|trainer.py:710] 2023-02-16 15:21:55,934 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassificationCustomSimple.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomSimple.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 15:21:55,935 >> ***** Running Prediction *****\n", + "[INFO|trainer.py:2966] 2023-02-16 15:21:55,935 >> Num examples = 3800\n", + "[INFO|trainer.py:2969] 2023-02-16 15:21:55,935 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 475/475 [00:32<00:00, 14.74it/s]\n", + "02/16/2023 15:22:28 - INFO - __main__ - ***** Predict results None *****\n", + "[INFO|modelcard.py:449] 2023-02-16 15:22:28,796 >> Dropping the following result as it does not have all the necessary fields:\n", + "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9229999780654907}]}\n" + ] + } + ], + "source": [ + "!python run_glue.py \\\n", + " --cache_dir .cache_training \\\n", + " --model_name_or_path roberta-base \\\n", + " --custom_model roberta_simple \\\n", + " --train_file data/train.json \\\n", + " --validation_file data/valid.json \\\n", + " --test_file data/test.json \\\n", + " --per_device_train_batch_size 8 \\\n", + " --per_device_eval_batch_size 8 \\\n", + " --do_train \\\n", + " --do_eval \\\n", + " --do_predict \\\n", + " --max_seq_length 128 \\\n", + " --learning_rate 2e-5 \\\n", + " --max_eval_samples 2000 \\\n", + " --max_steps 2500 \\\n", + " --num_train_epochs 1 \\\n", + " --save_strategy steps \\\n", + " --save_steps 250 \\\n", + " --save_total_limit 5 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 100 \\\n", + " --eval_steps 250 \\\n", + " --evaluation_strategy steps \\\n", + " --metric_for_best_model accuracy \\\n", + " --greater_is_better True \\\n", + " --load_best_model_at_end True \\\n", + " --output_dir out/roberta" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "02/16/2023 16:46:49 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "02/16/2023 16:46:49 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "auto_find_batch_size=False,\n", + "bf16=False,\n", + "bf16_full_eval=False,\n", + "data_seed=None,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_bucket_cap_mb=None,\n", + "ddp_find_unused_parameters=None,\n", + "ddp_timeout=1800,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=True,\n", + "do_train=False,\n", + "eval_accumulation_steps=None,\n", + "eval_delay=0,\n", + "eval_steps=250,\n", + "evaluation_strategy=steps,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "fsdp=[],\n", + "fsdp_min_num_params=0,\n", + "fsdp_transformer_layer_cls_to_wrap=None,\n", + "full_determinism=False,\n", + "gradient_accumulation_steps=1,\n", + "gradient_checkpointing=False,\n", + "greater_is_better=True,\n", + "group_by_length=False,\n", + "half_precision_backend=auto,\n", + "hub_model_id=None,\n", + "hub_private_repo=False,\n", + "hub_strategy=every_save,\n", + "hub_token=,\n", + "ignore_data_skip=False,\n", + "include_inputs_for_metrics=False,\n", + "jit_mode_eval=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=2e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=True,\n", + "local_rank=-1,\n", + "log_level=passive,\n", + "log_level_replica=passive,\n", + "log_on_each_node=True,\n", + "logging_dir=out/roberta_results/runs/Feb16_16-46-48_DESKTOP-R7JO8BQ,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=100,\n", + "logging_strategy=steps,\n", + "lr_scheduler_type=linear,\n", + "max_grad_norm=1.0,\n", + "max_steps=2500,\n", + "metric_for_best_model=accuracy,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=1.0,\n", + "optim=adamw_hf,\n", + "optim_args=None,\n", + "output_dir=out/roberta_results,\n", + "overwrite_output_dir=False,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "ray_scope=last,\n", + "remove_unused_columns=True,\n", + "report_to=[],\n", + "resume_from_checkpoint=None,\n", + "run_name=out/roberta_results,\n", + "save_on_each_node=False,\n", + "save_steps=250,\n", + "save_strategy=steps,\n", + "save_total_limit=5,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tf32=None,\n", + "torch_compile=False,\n", + "torch_compile_backend=None,\n", + "torch_compile_mode=None,\n", + "torchdynamo=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_ipex=False,\n", + "use_legacy_prediction_loop=False,\n", + "use_mps_device=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + "xpu_backend=None,\n", + ")\n", + "02/16/2023 16:46:49 - INFO - __main__ - load a local file for train: data/train.json\n", + "02/16/2023 16:46:49 - INFO - __main__ - load a local file for validation: data/valid.json\n", + "02/16/2023 16:46:49 - INFO - __main__ - load a local file for test: data/test.json\n", + "02/16/2023 16:46:50 - WARNING - datasets.builder - Using custom data configuration default-f6e8039906850c57\n", + "02/16/2023 16:46:50 - INFO - datasets.info - Loading Dataset Infos from /home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/datasets/packaged_modules/json\n", + "02/16/2023 16:46:50 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "02/16/2023 16:46:50 - INFO - datasets.info - Loading Dataset info from .cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "02/16/2023 16:46:50 - WARNING - datasets.builder - Found cached dataset json (/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", + "02/16/2023 16:46:50 - INFO - datasets.info - Loading Dataset info from /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "100%|████████████████████████████████████████████| 3/3 [00:00<00:00, 752.21it/s]\n", + "[INFO|configuration_utils.py:658] 2023-02-16 16:46:50,276 >> loading configuration file out/roberta/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 16:46:50,277 >> Model config RobertaConfig {\n", + " \"_name_or_path\": \"out/roberta\",\n", + " \"architectures\": [\n", + " \"RobertaForSequenceClassificationCustomSimple\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"classifier_dropout\": null,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"problem_type\": \"single_label_classification\",\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"use_hidden_states\": false,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:46:50,283 >> loading file vocab.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:46:50,283 >> loading file merges.txt\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:46:50,284 >> loading file tokenizer.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:46:50,284 >> loading file added_tokens.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:46:50,284 >> loading file special_tokens_map.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:46:50,284 >> loading file tokenizer_config.json\n", + "02/16/2023 16:46:50 - INFO - __main__ - Using hidden states in model: False\n", + "-------------------------------------------------------- Using hidden: False\n", + "02/16/2023 16:46:50 - INFO - __main__ - Using implementation from class: RobertaForSequenceClassificationCustomSimple\n", + "[INFO|modeling_utils.py:2272] 2023-02-16 16:46:50,339 >> loading weights file out/roberta/pytorch_model.bin\n", + "[INFO|modeling_utils.py:2857] 2023-02-16 16:46:52,079 >> All model checkpoint weights were used when initializing RobertaForSequenceClassificationCustomSimple.\n", + "\n", + "[INFO|modeling_utils.py:2865] 2023-02-16 16:46:52,079 >> All the weights of RobertaForSequenceClassificationCustomSimple were initialized from the model checkpoint at out/roberta.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use RobertaForSequenceClassificationCustomSimple for predictions without further training.\n", + "RobertaForSequenceClassificationCustomSimple(\n", + " (roberta): RobertaModel(\n", + " (embeddings): RobertaEmbeddings(\n", + " (word_embeddings): Embedding(50265, 768, padding_idx=1)\n", + " (position_embeddings): Embedding(514, 768, padding_idx=1)\n", + " (token_type_embeddings): Embedding(1, 768)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (encoder): RobertaEncoder(\n", + " (layer): ModuleList(\n", + " (0): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (1): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (2): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (3): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (4): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (5): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (6): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (7): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (8): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (9): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (10): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (11): RobertaLayer(\n", + " (attention): RobertaAttention(\n", + " (self): RobertaSelfAttention(\n", + " (query): Linear(in_features=768, out_features=768, bias=True)\n", + " (key): Linear(in_features=768, out_features=768, bias=True)\n", + " (value): Linear(in_features=768, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): RobertaSelfOutput(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): RobertaIntermediate(\n", + " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): RobertaOutput(\n", + " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", + " (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (classifier): RobertaClassificationHeadCustomSimple(\n", + " (dense_1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (dense_2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (out_proj): Linear(in_features=768, out_features=4, bias=True)\n", + " (activation): GELU(approximate='none')\n", + " )\n", + ")\n", + "02/16/2023 16:46:52 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-df96547ec55a44ce.arrow\n", + "02/16/2023 16:46:52 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-67b1030adaffbb4a.arrow\n", + "02/16/2023 16:46:52 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-ae09252df5e9bac1.arrow\n", + "02/16/2023 16:46:52 - INFO - __main__ - Set 500 samples for 0-class\n", + "02/16/2023 16:46:52 - INFO - __main__ - Set 500 samples for 1-class\n", + "02/16/2023 16:46:52 - INFO - __main__ - Set 500 samples for 2-class\n", + "02/16/2023 16:46:52 - INFO - __main__ - Set 500 samples for 3-class\n", + "[INFO|trainer.py:511] 2023-02-16 16:46:55,346 >> max_steps is given, it will override any value given in num_train_epochs\n", + "02/16/2023 16:46:55 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:710] 2023-02-16 16:46:55,346 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomSimple.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomSimple.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 16:46:55,348 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2966] 2023-02-16 16:46:55,348 >> Num examples = 2000\n", + "[INFO|trainer.py:2969] 2023-02-16 16:46:55,348 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 250/250 [00:17<00:00, 14.53it/s]\n", + "***** eval metrics *****\n", + " eval_accuracy = 0.923\n", + " eval_loss = 0.296\n", + " eval_runtime = 0:00:17.81\n", + " eval_samples = 2000\n", + " eval_samples_per_second = 112.255\n", + " eval_steps_per_second = 14.032\n", + "02/16/2023 16:47:13 - INFO - __main__ - *** Predict ***\n", + "[INFO|trainer.py:710] 2023-02-16 16:47:13,166 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassificationCustomSimple.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomSimple.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 16:47:13,167 >> ***** Running Prediction *****\n", + "[INFO|trainer.py:2966] 2023-02-16 16:47:13,167 >> Num examples = 3800\n", + "[INFO|trainer.py:2969] 2023-02-16 16:47:13,167 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 475/475 [00:32<00:00, 14.53it/s]\n", + "02/16/2023 16:47:45 - INFO - __main__ - ***** Predict results None *****\n", + "[INFO|modelcard.py:449] 2023-02-16 16:47:46,438 >> Dropping the following result as it does not have all the necessary fields:\n", + "{'task': {'name': 'Text Classification', 'type': 'text-classification'}}\n" + ] + } + ], + "source": [ + "!python run_glue.py \\\n", + " --cache_dir .cache_training \\\n", + " --model_name_or_path out/roberta \\\n", + " --custom_model roberta_simple \\\n", + " --train_file data/train.json \\\n", + " --validation_file data/valid.json \\\n", + " --test_file data/test.json \\\n", + " --per_device_train_batch_size 8 \\\n", + " --per_device_eval_batch_size 8 \\\n", + " --do_eval \\\n", + " --do_predict \\\n", + " --max_seq_length 128 \\\n", + " --learning_rate 2e-5 \\\n", + " --max_eval_samples 2000 \\\n", + " --max_steps 2500 \\\n", + " --num_train_epochs 1 \\\n", + " --save_strategy steps \\\n", + " --save_steps 250 \\\n", + " --save_total_limit 5 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 100 \\\n", + " --eval_steps 250 \\\n", + " --evaluation_strategy steps \\\n", + " --metric_for_best_model accuracy \\\n", + " --greater_is_better True \\\n", + " --load_best_model_at_end True \\\n", + " --output_dir out/roberta_results" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0;39m0.9229999780654907\u001b[0m\n" + ] + } + ], + "source": [ + "!cat out/roberta_results/eval_results.json | jq .eval_accuracy" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GPT2" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modifications" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Custom classification head with 3 dense layers\n", + "- Using hidden states from last layer" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch import nn\n", + "from transformers import GPT2PreTrainedModel, GPT2Model\n", + "from transformers.modeling_outputs import SequenceClassifierOutputWithPast\n", + "\n", + "class GPT2ForSequenceClassification(GPT2PreTrainedModel):\n", + " def __init__(self, config):\n", + " super().__init__(config)\n", + " self.num_labels = config.num_labels\n", + " self.transformer = GPT2Model(config)\n", + " self.score = nn.Linear(config.n_embd, self.num_labels, bias=False)\n", + "\n", + " # Model parallel\n", + " self.model_parallel = False\n", + " self.device_map = None\n", + "\n", + " # Initialize weights and apply final processing\n", + " self.post_init()\n", + "\n", + "\n", + "class GPT2ClassificationHeadCustom(nn.Module):\n", + " def __init__(self, config):\n", + " super().__init__()\n", + " hidden_size = config.n_embd\n", + " self.dense_1_input = nn.Linear(hidden_size, 2 * hidden_size)\n", + " self.dense_1_hidden = nn.Linear(hidden_size, 2 * hidden_size)\n", + " self.dense_2 = nn.Linear(4 * hidden_size, hidden_size)\n", + " self.dropout = nn.Dropout(config.resid_pdrop)\n", + " self.out_proj = nn.Linear(hidden_size, config.num_labels, bias=False)\n", + "\n", + " def forward(self, x, **kwargs):\n", + " if 'hidden_states' in kwargs and kwargs['hidden_states'] is not None:\n", + " hidden = kwargs['hidden_states'][-1]\n", + " else:\n", + " hidden = torch.zeros(x.size(), dtype=x.dtype, device=x.device)\n", + "\n", + " x = self.dense_1_input(x)\n", + " x = torch.relu(x)\n", + " x = self.dropout(x)\n", + "\n", + " hidden = self.dense_1_hidden(hidden)\n", + " hidden = torch.relu(hidden)\n", + " hidden = self.dropout(hidden)\n", + "\n", + " x = torch.cat((x, hidden), dim=2)\n", + " x = self.dense_2(x)\n", + " x = torch.relu(x)\n", + " x = self.dropout(x)\n", + "\n", + " x = self.out_proj(x)\n", + " return x\n", + "\n", + "class GPT2ForSequenceClassificationCustom(GPT2ForSequenceClassification):\n", + " def __init__(self, config):\n", + " super().__init__(config)\n", + " self.num_labels = config.num_labels\n", + " self.transformer = GPT2Model(config)\n", + " self.score = GPT2ClassificationHeadCustom(config)\n", + "\n", + " self.init_weights()\n", + "\n", + " # Model parallel\n", + " self.model_parallel = False\n", + " self.device_map = None\n", + "\n", + " def forward(\n", + " self,\n", + " input_ids=None,\n", + " past_key_values=None,\n", + " attention_mask=None,\n", + " token_type_ids=None,\n", + " position_ids=None,\n", + " head_mask=None,\n", + " inputs_embeds=None,\n", + " labels=None,\n", + " use_cache=None,\n", + " output_attentions=None,\n", + " output_hidden_states=None,\n", + " return_dict=None,\n", + " ):\n", + " r\"\"\"\n", + " labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):\n", + " Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,\n", + " config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),\n", + " If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).\n", + " \"\"\"\n", + " return_dict = return_dict if return_dict is not None else self.config.use_return_dict\n", + "\n", + " transformer_outputs = self.transformer(\n", + " input_ids,\n", + " past_key_values=past_key_values,\n", + " attention_mask=attention_mask,\n", + " token_type_ids=token_type_ids,\n", + " position_ids=position_ids,\n", + " head_mask=head_mask,\n", + " inputs_embeds=inputs_embeds,\n", + " use_cache=use_cache,\n", + " output_attentions=output_attentions,\n", + " output_hidden_states=output_hidden_states,\n", + " return_dict=return_dict,\n", + " )\n", + " hidden_states = transformer_outputs[0]\n", + " if return_dict:\n", + " logits = self.score(hidden_states, hidden_states=transformer_outputs.hidden_states)\n", + " else:\n", + " raise NotImplemented('Not implemented for using non-dictionary object')\n", + "\n", + " if input_ids is not None:\n", + " batch_size, sequence_length = input_ids.shape[:2]\n", + " else:\n", + " batch_size, sequence_length = inputs_embeds.shape[:2]\n", + "\n", + " assert (\n", + " self.config.pad_token_id is not None or batch_size == 1\n", + " ), \"Cannot handle batch sizes > 1 if no padding token is defined.\"\n", + " if self.config.pad_token_id is None:\n", + " sequence_lengths = -1\n", + " else:\n", + " if input_ids is not None:\n", + " sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1\n", + " else:\n", + " sequence_lengths = -1\n", + "\n", + " pooled_logits = logits[range(batch_size), sequence_lengths]\n", + "\n", + " loss = None\n", + " if labels is not None:\n", + " if self.num_labels == 1:\n", + " # We are doing regression\n", + " loss_fct = nn.MSELoss()\n", + " loss = loss_fct(pooled_logits.view(-1), labels.to(self.dtype).view(-1))\n", + " else:\n", + " loss_fct = nn.CrossEntropyLoss()\n", + " loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))\n", + "\n", + " if not return_dict:\n", + " output = (pooled_logits,) + transformer_outputs[1:]\n", + " return ((loss,) + output) if loss is not None else output\n", + "\n", + " return SequenceClassifierOutputWithPast(\n", + " loss=loss,\n", + " logits=pooled_logits,\n", + " past_key_values=transformer_outputs.past_key_values,\n", + " hidden_states=transformer_outputs.hidden_states,\n", + " attentions=transformer_outputs.attentions,\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4f980b257c2b453797f63ddc89c98923", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading (…)lve/main/config.json: 0%| | 0.00/665 [00:00,\n", + "ignore_data_skip=False,\n", + "include_inputs_for_metrics=False,\n", + "jit_mode_eval=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=2e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=True,\n", + "local_rank=-1,\n", + "log_level=passive,\n", + "log_level_replica=passive,\n", + "log_on_each_node=True,\n", + "logging_dir=out/gpt2/runs/Feb16_15-22-36_DESKTOP-R7JO8BQ,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=100,\n", + "logging_strategy=steps,\n", + "lr_scheduler_type=linear,\n", + "max_grad_norm=1.0,\n", + "max_steps=2500,\n", + "metric_for_best_model=accuracy,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=1.0,\n", + "optim=adamw_hf,\n", + "optim_args=None,\n", + "output_dir=out/gpt2,\n", + "overwrite_output_dir=False,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "ray_scope=last,\n", + "remove_unused_columns=True,\n", + "report_to=[],\n", + "resume_from_checkpoint=None,\n", + "run_name=out/gpt2,\n", + "save_on_each_node=False,\n", + "save_steps=250,\n", + "save_strategy=steps,\n", + "save_total_limit=5,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tf32=None,\n", + "torch_compile=False,\n", + "torch_compile_backend=None,\n", + "torch_compile_mode=None,\n", + "torchdynamo=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_ipex=False,\n", + "use_legacy_prediction_loop=False,\n", + "use_mps_device=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + "xpu_backend=None,\n", + ")\n", + "02/16/2023 15:22:37 - INFO - __main__ - Checkpoint detected, resuming training at out/gpt2/checkpoint-2500. To avoid this behavior, change the `--output_dir` or add `--overwrite_output_dir` to train from scratch.\n", + "02/16/2023 15:22:37 - INFO - __main__ - load a local file for train: data/train.json\n", + "02/16/2023 15:22:37 - INFO - __main__ - load a local file for validation: data/valid.json\n", + "02/16/2023 15:22:37 - WARNING - datasets.builder - Using custom data configuration default-e10a382a423bbb9a\n", + "02/16/2023 15:22:37 - INFO - datasets.info - Loading Dataset Infos from /home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/datasets/packaged_modules/json\n", + "02/16/2023 15:22:37 - INFO - datasets.builder - Generating dataset json (/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-e10a382a423bbb9a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", + "Downloading and preparing dataset json/default to /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-e10a382a423bbb9a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n", + "Downloading data files: 100%|██████████████████| 2/2 [00:00<00:00, 14820.86it/s]\n", + "02/16/2023 15:22:37 - INFO - datasets.download.download_manager - Downloading took 0.0 min\n", + "02/16/2023 15:22:37 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min\n", + "Extracting data files: 100%|████████████████████| 2/2 [00:00<00:00, 2476.71it/s]\n", + "02/16/2023 15:22:37 - INFO - datasets.utils.info_utils - Unable to verify checksums.\n", + "02/16/2023 15:22:37 - INFO - datasets.builder - Generating train split\n", + "02/16/2023 15:22:37 - INFO - datasets.builder - Generating validation split\n", + "02/16/2023 15:22:37 - INFO - datasets.utils.info_utils - Unable to verify splits sizes.\n", + "Dataset json downloaded and prepared to /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-e10a382a423bbb9a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n", + "100%|████████████████████████████████████████████| 2/2 [00:00<00:00, 642.61it/s]\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:22:38,465 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:22:38,465 >> Model config GPT2Config {\n", + " \"_name_or_path\": \"gpt2\",\n", + " \"activation_function\": \"gelu_new\",\n", + " \"architectures\": [\n", + " \"GPT2LMHeadModel\"\n", + " ],\n", + " \"attn_pdrop\": 0.1,\n", + " \"bos_token_id\": 50256,\n", + " \"embd_pdrop\": 0.1,\n", + " \"eos_token_id\": 50256,\n", + " \"id2label\": {\n", + " \"0\": \"LABEL_0\",\n", + " \"1\": \"LABEL_1\",\n", + " \"2\": \"LABEL_2\",\n", + " \"3\": \"LABEL_3\"\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"label2id\": {\n", + " \"LABEL_0\": 0,\n", + " \"LABEL_1\": 1,\n", + " \"LABEL_2\": 2,\n", + " \"LABEL_3\": 3\n", + " },\n", + " \"layer_norm_epsilon\": 1e-05,\n", + " \"model_type\": \"gpt2\",\n", + " \"n_ctx\": 1024,\n", + " \"n_embd\": 768,\n", + " \"n_head\": 12,\n", + " \"n_inner\": null,\n", + " \"n_layer\": 12,\n", + " \"n_positions\": 1024,\n", + " \"reorder_and_upcast_attn\": false,\n", + " \"resid_pdrop\": 0.1,\n", + " \"scale_attn_by_inverse_layer_idx\": false,\n", + " \"scale_attn_weights\": true,\n", + " \"summary_activation\": null,\n", + " \"summary_first_dropout\": 0.1,\n", + " \"summary_proj_to_labels\": true,\n", + " \"summary_type\": \"cls_index\",\n", + " \"summary_use_proj\": true,\n", + " \"task_specific_params\": {\n", + " \"text-generation\": {\n", + " \"do_sample\": true,\n", + " \"max_length\": 50\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50257\n", + "}\n", + "\n", + "[INFO|tokenization_auto.py:458] 2023-02-16 15:22:38,945 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:22:39,423 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:22:39,424 >> Model config GPT2Config {\n", + " \"_name_or_path\": \"gpt2\",\n", + " \"activation_function\": \"gelu_new\",\n", + " \"architectures\": [\n", + " \"GPT2LMHeadModel\"\n", + " ],\n", + " \"attn_pdrop\": 0.1,\n", + " \"bos_token_id\": 50256,\n", + " \"embd_pdrop\": 0.1,\n", + " \"eos_token_id\": 50256,\n", + " \"initializer_range\": 0.02,\n", + " \"layer_norm_epsilon\": 1e-05,\n", + " \"model_type\": \"gpt2\",\n", + " \"n_ctx\": 1024,\n", + " \"n_embd\": 768,\n", + " \"n_head\": 12,\n", + " \"n_inner\": null,\n", + " \"n_layer\": 12,\n", + " \"n_positions\": 1024,\n", + " \"reorder_and_upcast_attn\": false,\n", + " \"resid_pdrop\": 0.1,\n", + " \"scale_attn_by_inverse_layer_idx\": false,\n", + " \"scale_attn_weights\": true,\n", + " \"summary_activation\": null,\n", + " \"summary_first_dropout\": 0.1,\n", + " \"summary_proj_to_labels\": true,\n", + " \"summary_type\": \"cls_index\",\n", + " \"summary_use_proj\": true,\n", + " \"task_specific_params\": {\n", + " \"text-generation\": {\n", + " \"do_sample\": true,\n", + " \"max_length\": 50\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50257\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:22:40,400 >> loading file vocab.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:22:40,400 >> loading file merges.txt from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:22:40,400 >> loading file tokenizer.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:22:40,400 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:22:40,400 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:22:40,400 >> loading file tokenizer_config.json from cache at None\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:22:40,400 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:22:40,400 >> Model config GPT2Config {\n", + " \"_name_or_path\": \"gpt2\",\n", + " \"activation_function\": \"gelu_new\",\n", + " \"architectures\": [\n", + " \"GPT2LMHeadModel\"\n", + " ],\n", + " \"attn_pdrop\": 0.1,\n", + " \"bos_token_id\": 50256,\n", + " \"embd_pdrop\": 0.1,\n", + " \"eos_token_id\": 50256,\n", + " \"initializer_range\": 0.02,\n", + " \"layer_norm_epsilon\": 1e-05,\n", + " \"model_type\": \"gpt2\",\n", + " \"n_ctx\": 1024,\n", + " \"n_embd\": 768,\n", + " \"n_head\": 12,\n", + " \"n_inner\": null,\n", + " \"n_layer\": 12,\n", + " \"n_positions\": 1024,\n", + " \"reorder_and_upcast_attn\": false,\n", + " \"resid_pdrop\": 0.1,\n", + " \"scale_attn_by_inverse_layer_idx\": false,\n", + " \"scale_attn_weights\": true,\n", + " \"summary_activation\": null,\n", + " \"summary_first_dropout\": 0.1,\n", + " \"summary_proj_to_labels\": true,\n", + " \"summary_type\": \"cls_index\",\n", + " \"summary_use_proj\": true,\n", + " \"task_specific_params\": {\n", + " \"text-generation\": {\n", + " \"do_sample\": true,\n", + " \"max_length\": 50\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50257\n", + "}\n", + "\n", + "02/16/2023 15:22:40 - INFO - __main__ - Using hidden states in model: True\n", + "-------------------------------------------------------- Using hidden: True\n", + "02/16/2023 15:22:40 - INFO - __main__ - Using implementation from class: GPT2ForSequenceClassificationCustom\n", + "[INFO|modeling_utils.py:2275] 2023-02-16 15:22:40,458 >> loading weights file pytorch_model.bin from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n", + "[INFO|modeling_utils.py:2857] 2023-02-16 15:22:42,848 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n", + "\n", + "[WARNING|modeling_utils.py:2859] 2023-02-16 15:22:42,849 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.11.attn.masked_bias', 'score.out_proj.weight', 'h.7.attn.masked_bias', 'h.6.attn.masked_bias', 'h.8.attn.masked_bias', 'h.5.attn.masked_bias', 'score.dense_2.weight', 'h.9.attn.masked_bias', 'score.dense_4.bias', 'score.dense_1_input.bias', 'score.dense_3.weight', 'score.dense_1_hidden.bias', 'score.dense_1_input.weight', 'h.1.attn.masked_bias', 'score.dense_3.bias', 'h.10.attn.masked_bias', 'h.2.attn.masked_bias', 'h.4.attn.masked_bias', 'score.dense_1_hidden.weight', 'score.dense_2.bias', 'score.dense_4.weight', 'h.0.attn.masked_bias', 'h.3.attn.masked_bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "GPT2ForSequenceClassificationCustom(\n", + " (transformer): GPT2Model(\n", + " (wte): Embedding(50257, 768)\n", + " (wpe): Embedding(1024, 768)\n", + " (drop): Dropout(p=0.1, inplace=False)\n", + " (h): ModuleList(\n", + " (0): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (1): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (2): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (3): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (4): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (5): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (6): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (7): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (8): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (9): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (10): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (11): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (score): GPT2ClassificationHeadCustom(\n", + " (dense_1_input): Linear(in_features=768, out_features=1536, bias=True)\n", + " (dense_1_hidden): Linear(in_features=768, out_features=1536, bias=True)\n", + " (dense_2): Linear(in_features=3072, out_features=3072, bias=True)\n", + " (dense_3): Linear(in_features=3072, out_features=3072, bias=True)\n", + " (dense_4): Linear(in_features=3072, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (out_proj): Linear(in_features=768, out_features=4, bias=False)\n", + " )\n", + ")\n", + "[ERROR|tokenization_utils_base.py:1042] 2023-02-16 15:22:42,852 >> Using pad_token, but it is not set yet.\n", + "02/16/2023 15:22:42 - INFO - __main__ - Set PAD token to EOS: <|endoftext|>\n", + "Running tokenizer on dataset: 0%| | 0/120 [00:00\n", + " main()\n", + " File \"/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/run_glue.py\", line 533, in main\n", + " raise ValueError(\"--do_predict requires a test dataset\")\n", + "ValueError: --do_predict requires a test dataset\n" + ] + } + ], + "source": [ + "!python run_glue.py \\\n", + " --cache_dir .cache_training \\\n", + " --model_name_or_path gpt2 \\\n", + " --custom_model gpt2_hidden \\\n", + " --train_file data/train.json \\\n", + " --validation_file data/valid.json \\\n", + " --test_file data/test.json \\\n", + " --per_device_train_batch_size 8 \\\n", + " --per_device_eval_batch_size 8 \\\n", + " --do_train \\\n", + " --do_eval \\\n", + " --max_seq_length 128 \\\n", + " --learning_rate 2e-5 \\\n", + " --max_eval_samples 2000 \\\n", + " --max_steps 2500 \\\n", + " --num_train_epochs 1 \\\n", + " --save_strategy steps \\\n", + " --save_steps 250 \\\n", + " --save_total_limit 5 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 100 \\\n", + " --eval_steps 250 \\\n", + " --evaluation_strategy steps \\\n", + " --metric_for_best_model accuracy \\\n", + " --greater_is_better True \\\n", + " --load_best_model_at_end True \\\n", + " --output_dir out/gpt2 " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "02/16/2023 16:51:20 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "02/16/2023 16:51:20 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "auto_find_batch_size=False,\n", + "bf16=False,\n", + "bf16_full_eval=False,\n", + "data_seed=None,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_bucket_cap_mb=None,\n", + "ddp_find_unused_parameters=None,\n", + "ddp_timeout=1800,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=True,\n", + "do_train=False,\n", + "eval_accumulation_steps=None,\n", + "eval_delay=0,\n", + "eval_steps=250,\n", + "evaluation_strategy=steps,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "fsdp=[],\n", + "fsdp_min_num_params=0,\n", + "fsdp_transformer_layer_cls_to_wrap=None,\n", + "full_determinism=False,\n", + "gradient_accumulation_steps=1,\n", + "gradient_checkpointing=False,\n", + "greater_is_better=True,\n", + "group_by_length=False,\n", + "half_precision_backend=auto,\n", + "hub_model_id=None,\n", + "hub_private_repo=False,\n", + "hub_strategy=every_save,\n", + "hub_token=,\n", + "ignore_data_skip=False,\n", + "include_inputs_for_metrics=False,\n", + "jit_mode_eval=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=2e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=True,\n", + "local_rank=-1,\n", + "log_level=passive,\n", + "log_level_replica=passive,\n", + "log_on_each_node=True,\n", + "logging_dir=out/gpt2_results/runs/Feb16_16-51-19_DESKTOP-R7JO8BQ,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=100,\n", + "logging_strategy=steps,\n", + "lr_scheduler_type=linear,\n", + "max_grad_norm=1.0,\n", + "max_steps=2500,\n", + "metric_for_best_model=accuracy,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=1.0,\n", + "optim=adamw_hf,\n", + "optim_args=None,\n", + "output_dir=out/gpt2_results,\n", + "overwrite_output_dir=False,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "ray_scope=last,\n", + "remove_unused_columns=True,\n", + "report_to=[],\n", + "resume_from_checkpoint=None,\n", + "run_name=out/gpt2_results,\n", + "save_on_each_node=False,\n", + "save_steps=250,\n", + "save_strategy=steps,\n", + "save_total_limit=5,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tf32=None,\n", + "torch_compile=False,\n", + "torch_compile_backend=None,\n", + "torch_compile_mode=None,\n", + "torchdynamo=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_ipex=False,\n", + "use_legacy_prediction_loop=False,\n", + "use_mps_device=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + "xpu_backend=None,\n", + ")\n", + "02/16/2023 16:51:20 - INFO - __main__ - load a local file for train: data/train.json\n", + "02/16/2023 16:51:20 - INFO - __main__ - load a local file for validation: data/valid.json\n", + "02/16/2023 16:51:20 - INFO - __main__ - load a local file for test: data/test.json\n", + "02/16/2023 16:51:20 - WARNING - datasets.builder - Using custom data configuration default-f6e8039906850c57\n", + "02/16/2023 16:51:20 - INFO - datasets.info - Loading Dataset Infos from /home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/datasets/packaged_modules/json\n", + "02/16/2023 16:51:20 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "02/16/2023 16:51:20 - INFO - datasets.info - Loading Dataset info from .cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "02/16/2023 16:51:20 - WARNING - datasets.builder - Found cached dataset json (/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", + "02/16/2023 16:51:20 - INFO - datasets.info - Loading Dataset info from /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "100%|████████████████████████████████████████████| 3/3 [00:00<00:00, 591.33it/s]\n", + "[INFO|configuration_utils.py:658] 2023-02-16 16:51:20,920 >> loading configuration file out/gpt2/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 16:51:20,921 >> Model config GPT2Config {\n", + " \"_name_or_path\": \"out/gpt2\",\n", + " \"activation_function\": \"gelu_new\",\n", + " \"architectures\": [\n", + " \"GPT2ForSequenceClassificationCustom\"\n", + " ],\n", + " \"attn_pdrop\": 0.1,\n", + " \"bos_token_id\": 50256,\n", + " \"embd_pdrop\": 0.1,\n", + " \"eos_token_id\": 50256,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3\n", + " },\n", + " \"layer_norm_epsilon\": 1e-05,\n", + " \"model_type\": \"gpt2\",\n", + " \"n_ctx\": 1024,\n", + " \"n_embd\": 768,\n", + " \"n_head\": 12,\n", + " \"n_inner\": null,\n", + " \"n_layer\": 12,\n", + " \"n_positions\": 1024,\n", + " \"pad_token_id\": 50256,\n", + " \"reorder_and_upcast_attn\": false,\n", + " \"resid_pdrop\": 0.1,\n", + " \"scale_attn_by_inverse_layer_idx\": false,\n", + " \"scale_attn_weights\": true,\n", + " \"summary_activation\": null,\n", + " \"summary_first_dropout\": 0.1,\n", + " \"summary_proj_to_labels\": true,\n", + " \"summary_type\": \"cls_index\",\n", + " \"summary_use_proj\": true,\n", + " \"task_specific_params\": {\n", + " \"text-generation\": {\n", + " \"do_sample\": true,\n", + " \"max_length\": 50\n", + " }\n", + " },\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"use_hidden_states\": true,\n", + " \"vocab_size\": 50257\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:51:20,929 >> loading file vocab.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:51:20,929 >> loading file merges.txt\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:51:20,929 >> loading file tokenizer.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:51:20,929 >> loading file added_tokens.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:51:20,929 >> loading file special_tokens_map.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:51:20,929 >> loading file tokenizer_config.json\n", + "02/16/2023 16:51:20 - INFO - __main__ - Using hidden states in model: True\n", + "-------------------------------------------------------- Using hidden: True\n", + "02/16/2023 16:51:20 - INFO - __main__ - Using implementation from class: GPT2ForSequenceClassificationCustom\n", + "[INFO|modeling_utils.py:2272] 2023-02-16 16:51:20,982 >> loading weights file out/gpt2/pytorch_model.bin\n", + "[INFO|modeling_utils.py:2857] 2023-02-16 16:51:23,451 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n", + "\n", + "[INFO|modeling_utils.py:2865] 2023-02-16 16:51:23,451 >> All the weights of GPT2ForSequenceClassificationCustom were initialized from the model checkpoint at out/gpt2.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2ForSequenceClassificationCustom for predictions without further training.\n", + "GPT2ForSequenceClassificationCustom(\n", + " (transformer): GPT2Model(\n", + " (wte): Embedding(50257, 768)\n", + " (wpe): Embedding(1024, 768)\n", + " (drop): Dropout(p=0.1, inplace=False)\n", + " (h): ModuleList(\n", + " (0): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (1): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (2): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (3): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (4): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (5): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (6): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (7): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (8): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (9): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (10): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (11): GPT2Block(\n", + " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (attn): GPT2Attention(\n", + " (c_attn): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (attn_dropout): Dropout(p=0.1, inplace=False)\n", + " (resid_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (mlp): GPT2MLP(\n", + " (c_fc): Conv1D()\n", + " (c_proj): Conv1D()\n", + " (act): NewGELUActivation()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (score): GPT2ClassificationHeadCustom(\n", + " (dense_1_input): Linear(in_features=768, out_features=1536, bias=True)\n", + " (dense_1_hidden): Linear(in_features=768, out_features=1536, bias=True)\n", + " (dense_2): Linear(in_features=3072, out_features=3072, bias=True)\n", + " (dense_3): Linear(in_features=3072, out_features=3072, bias=True)\n", + " (dense_4): Linear(in_features=3072, out_features=768, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (out_proj): Linear(in_features=768, out_features=4, bias=False)\n", + " )\n", + ")\n", + "Running tokenizer on dataset: 0%| | 0/120 [00:00> max_steps is given, it will override any value given in num_train_epochs\n", + "02/16/2023 16:51:35 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:710] 2023-02-16 16:51:35,120 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 16:51:35,123 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2966] 2023-02-16 16:51:35,123 >> Num examples = 2000\n", + "[INFO|trainer.py:2969] 2023-02-16 16:51:35,123 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 250/250 [00:23<00:00, 10.65it/s]\n", + "***** eval metrics *****\n", + " eval_accuracy = 0.9195\n", + " eval_loss = 0.302\n", + " eval_runtime = 0:00:24.11\n", + " eval_samples = 2000\n", + " eval_samples_per_second = 82.94\n", + " eval_steps_per_second = 10.367\n", + "02/16/2023 16:51:59 - INFO - __main__ - *** Predict ***\n", + "[INFO|trainer.py:710] 2023-02-16 16:51:59,239 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 16:51:59,240 >> ***** Running Prediction *****\n", + "[INFO|trainer.py:2966] 2023-02-16 16:51:59,240 >> Num examples = 3800\n", + "[INFO|trainer.py:2969] 2023-02-16 16:51:59,240 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 475/475 [00:43<00:00, 10.84it/s]\n", + "02/16/2023 16:52:43 - INFO - __main__ - ***** Predict results None *****\n", + "[INFO|modelcard.py:449] 2023-02-16 16:52:43,692 >> Dropping the following result as it does not have all the necessary fields:\n", + "{'task': {'name': 'Text Classification', 'type': 'text-classification'}}\n" + ] + } + ], + "source": [ + "!python run_glue.py \\\n", + " --cache_dir .cache_training \\\n", + " --model_name_or_path out/gpt2 \\\n", + " --custom_model gpt2_hidden \\\n", + " --train_file data/train.json \\\n", + " --validation_file data/valid.json \\\n", + " --test_file data/test.json \\\n", + " --per_device_train_batch_size 8 \\\n", + " --per_device_eval_batch_size 8 \\\n", + " --do_eval \\\n", + " --do_predict \\\n", + " --max_seq_length 128 \\\n", + " --learning_rate 2e-5 \\\n", + " --max_eval_samples 2000 \\\n", + " --max_steps 2500 \\\n", + " --num_train_epochs 1 \\\n", + " --save_strategy steps \\\n", + " --save_steps 250 \\\n", + " --save_total_limit 5 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 100 \\\n", + " --eval_steps 250 \\\n", + " --evaluation_strategy steps \\\n", + " --metric_for_best_model accuracy \\\n", + " --greater_is_better True \\\n", + " --load_best_model_at_end True \\\n", + " --output_dir out/gpt2_results " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0;39m0.9194999933242798\u001b[0m\n" + ] + } + ], + "source": [ + "!cat out/gpt2_results/eval_results.json | jq .eval_accuracy" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# T5" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modifications" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Custom classification head with 3 dense layers\n", + "- Encoder layers frozen\n", + "- Decoder layers frozen" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import copy\n", + "from torch import nn\n", + "from transformers import T5PreTrainedModel, T5Config\n", + "from transformers.models.t5.modeling_t5 import T5Stack\n", + "from transformers.modeling_outputs import SequenceClassifierOutput\n", + "\n", + "\n", + "class T5ClassificationHead(nn.Module):\n", + " def __init__(self, config: T5Config):\n", + " super().__init__()\n", + "\n", + " self.dense_in = nn.Linear(config.d_model, 768)\n", + " self.dense = nn.Linear(768, 768)\n", + " self.dense_out = nn.Linear(768, config.num_labels)\n", + " self.dropout = nn.Dropout(0.1)\n", + "\n", + " def forward(self, features, **kwargs):\n", + " x = features[:, 0, :]\n", + " x = self.dropout(x)\n", + " x = self.dense_in(x)\n", + " x = torch.relu(x)\n", + " x = self.dropout(x)\n", + " x = self.dense(x)\n", + " x = torch.relu(x)\n", + " x = self.dropout(x)\n", + " x = self.dense_out(x)\n", + "\n", + " return x\n", + "\n", + "\n", + "class T5ForClassification(T5PreTrainedModel):\n", + " def __init__(self, config: T5Config):\n", + " super().__init__(config)\n", + " self.model_dim = config.d_model\n", + "\n", + " self.shared = nn.Embedding(config.vocab_size, config.d_model)\n", + "\n", + " encoder_config = copy.deepcopy(config)\n", + " encoder_config.is_decoder = False\n", + " encoder_config.use_cache = False\n", + " encoder_config.is_encoder_decoder = False\n", + " self.encoder = T5Stack(encoder_config, self.shared)\n", + "\n", + " decoder_config = copy.deepcopy(config)\n", + " decoder_config.is_decoder = True\n", + " decoder_config.is_encoder_decoder = False\n", + " decoder_config.num_layers = config.num_decoder_layers\n", + " self.decoder = T5Stack(decoder_config, self.shared)\n", + "\n", + " modules_to_freeze = [self.encoder.block[i].layer[0] for i in range(len(self.encoder.block))]\n", + " modules_to_freeze.extend([self.decoder.block[i].layer[0] for i in range(len(self.decoder.block))])\n", + " modules_to_freeze.extend([self.decoder.block[i].layer[1] for i in range(len(self.decoder.block))])\n", + "\n", + " for module in modules_to_freeze:\n", + " for param in module.parameters():\n", + " param.requires_grad = False\n", + "\n", + " self.lm_head = T5ClassificationHead(config)\n", + "\n", + " # Initialize weights and apply final processing\n", + " self.post_init()\n", + "\n", + " # Model parallel\n", + " self.model_parallel = False\n", + " self.device_map = None\n", + "\n", + "\n", + " def forward(\n", + " self,\n", + " input_ids=None,\n", + " attention_mask=None,\n", + " head_mask=None,\n", + " cross_attn_head_mask=None,\n", + " past_key_values=None,\n", + " inputs_embeds=None,\n", + " decoder_inputs_embeds=None,\n", + " use_cache=None,\n", + " output_attentions=None,\n", + " output_hidden_states=None,\n", + " return_dict=None,\n", + " labels=None\n", + " ):\n", + " return_dict = return_dict if return_dict is not None else self.config.use_return_dict\n", + "\n", + " outputs = self.encoder(\n", + " input_ids,\n", + " attention_mask=attention_mask,\n", + " head_mask=head_mask,\n", + " cross_attn_head_mask=cross_attn_head_mask,\n", + " past_key_values=past_key_values,\n", + " inputs_embeds=inputs_embeds,\n", + " use_cache=use_cache,\n", + " output_attentions=output_attentions,\n", + " output_hidden_states=output_hidden_states,\n", + " return_dict=return_dict,\n", + " )\n", + "\n", + " outputs = self.decoder(\n", + " input_ids,\n", + " attention_mask=attention_mask,\n", + " head_mask=head_mask,\n", + " cross_attn_head_mask=cross_attn_head_mask,\n", + " past_key_values=past_key_values,\n", + " inputs_embeds=inputs_embeds,\n", + " use_cache=use_cache,\n", + " output_attentions=output_attentions,\n", + " output_hidden_states=output_hidden_states,\n", + " return_dict=return_dict,\n", + " )\n", + "\n", + "\n", + " logits = self.lm_head(outputs[0])\n", + "\n", + "\n", + " loss = None\n", + " if labels is not None:\n", + " loss_fct = nn.CrossEntropyLoss()\n", + " loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))\n", + "\n", + "\n", + " return SequenceClassifierOutput(\n", + " loss=loss,\n", + " logits=logits,\n", + " )\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fda885ac92b1459ba9c0faf41a9d925f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading (…)lve/main/config.json: 0%| | 0.00/1.21k [00:00,\n", + "ignore_data_skip=False,\n", + "include_inputs_for_metrics=False,\n", + "jit_mode_eval=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=2e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=True,\n", + "local_rank=-1,\n", + "log_level=passive,\n", + "log_level_replica=passive,\n", + "log_on_each_node=True,\n", + "logging_dir=out/t5/runs/Feb16_15-24-12_DESKTOP-R7JO8BQ,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=100,\n", + "logging_strategy=steps,\n", + "lr_scheduler_type=linear,\n", + "max_grad_norm=1.0,\n", + "max_steps=2500,\n", + "metric_for_best_model=accuracy,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=1.0,\n", + "optim=adamw_hf,\n", + "optim_args=None,\n", + "output_dir=out/t5,\n", + "overwrite_output_dir=False,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "ray_scope=last,\n", + "remove_unused_columns=True,\n", + "report_to=[],\n", + "resume_from_checkpoint=None,\n", + "run_name=out/t5,\n", + "save_on_each_node=False,\n", + "save_steps=250,\n", + "save_strategy=steps,\n", + "save_total_limit=5,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tf32=None,\n", + "torch_compile=False,\n", + "torch_compile_backend=None,\n", + "torch_compile_mode=None,\n", + "torchdynamo=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_ipex=False,\n", + "use_legacy_prediction_loop=False,\n", + "use_mps_device=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + "xpu_backend=None,\n", + ")\n", + "02/16/2023 15:24:13 - INFO - __main__ - Checkpoint detected, resuming training at out/t5/checkpoint-2500. To avoid this behavior, change the `--output_dir` or add `--overwrite_output_dir` to train from scratch.\n", + "02/16/2023 15:24:13 - INFO - __main__ - load a local file for train: data/train.json\n", + "02/16/2023 15:24:13 - INFO - __main__ - load a local file for validation: data/valid.json\n", + "02/16/2023 15:24:13 - WARNING - datasets.builder - Using custom data configuration default-e10a382a423bbb9a\n", + "02/16/2023 15:24:13 - INFO - datasets.info - Loading Dataset Infos from /home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/datasets/packaged_modules/json\n", + "02/16/2023 15:24:13 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "02/16/2023 15:24:13 - INFO - datasets.info - Loading Dataset info from .cache_training/json/default-e10a382a423bbb9a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "02/16/2023 15:24:13 - WARNING - datasets.builder - Found cached dataset json (/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-e10a382a423bbb9a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", + "02/16/2023 15:24:13 - INFO - datasets.info - Loading Dataset info from /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-e10a382a423bbb9a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "100%|████████████████████████████████████████████| 2/2 [00:00<00:00, 426.97it/s]\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:24:14,422 >> loading configuration file config.json from cache at .cache_training/models--t5-base/snapshots/0db7e623bcaee2daf9b859a646637ea39bf016cd/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:24:14,423 >> Model config T5Config {\n", + " \"_name_or_path\": \"t5-base\",\n", + " \"architectures\": [\n", + " \"T5ForConditionalGeneration\"\n", + " ],\n", + " \"d_ff\": 3072,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 768,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dense_act_fn\": \"relu\",\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"feed_forward_proj\": \"relu\",\n", + " \"id2label\": {\n", + " \"0\": \"LABEL_0\",\n", + " \"1\": \"LABEL_1\",\n", + " \"2\": \"LABEL_2\",\n", + " \"3\": \"LABEL_3\"\n", + " },\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"is_gated_act\": false,\n", + " \"label2id\": {\n", + " \"LABEL_0\": 0,\n", + " \"LABEL_1\": 1,\n", + " \"LABEL_2\": 2,\n", + " \"LABEL_3\": 3\n", + " },\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_decoder_layers\": 12,\n", + " \"num_heads\": 12,\n", + " \"num_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_max_distance\": 128,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "[INFO|tokenization_auto.py:458] 2023-02-16 15:24:14,918 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:24:15,378 >> loading configuration file config.json from cache at .cache_training/models--t5-base/snapshots/0db7e623bcaee2daf9b859a646637ea39bf016cd/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:24:15,378 >> Model config T5Config {\n", + " \"_name_or_path\": \"t5-base\",\n", + " \"architectures\": [\n", + " \"T5ForConditionalGeneration\"\n", + " ],\n", + " \"d_ff\": 3072,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 768,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dense_act_fn\": \"relu\",\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"feed_forward_proj\": \"relu\",\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"is_gated_act\": false,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_decoder_layers\": 12,\n", + " \"num_heads\": 12,\n", + " \"num_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_max_distance\": 128,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:24:16,341 >> loading file spiece.model from cache at .cache_training/models--t5-base/snapshots/0db7e623bcaee2daf9b859a646637ea39bf016cd/spiece.model\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:24:16,341 >> loading file tokenizer.json from cache at .cache_training/models--t5-base/snapshots/0db7e623bcaee2daf9b859a646637ea39bf016cd/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:24:16,341 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:24:16,341 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1802] 2023-02-16 15:24:16,341 >> loading file tokenizer_config.json from cache at None\n", + "[INFO|configuration_utils.py:660] 2023-02-16 15:24:16,342 >> loading configuration file config.json from cache at .cache_training/models--t5-base/snapshots/0db7e623bcaee2daf9b859a646637ea39bf016cd/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 15:24:16,342 >> Model config T5Config {\n", + " \"_name_or_path\": \"t5-base\",\n", + " \"architectures\": [\n", + " \"T5ForConditionalGeneration\"\n", + " ],\n", + " \"d_ff\": 3072,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 768,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dense_act_fn\": \"relu\",\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"feed_forward_proj\": \"relu\",\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"is_gated_act\": false,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_decoder_layers\": 12,\n", + " \"num_heads\": 12,\n", + " \"num_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_max_distance\": 128,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "/home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n", + "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n", + "- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n", + "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n", + "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n", + " warnings.warn(\n", + "02/16/2023 15:24:16 - INFO - __main__ - Using hidden states in model: False\n", + "-------------------------------------------------------- Using hidden: False\n", + "02/16/2023 15:24:16 - INFO - __main__ - Using implementation from class: T5ForClassification\n", + "[INFO|modeling_utils.py:2275] 2023-02-16 15:24:16,391 >> loading weights file pytorch_model.bin from cache at .cache_training/models--t5-base/snapshots/0db7e623bcaee2daf9b859a646637ea39bf016cd/pytorch_model.bin\n", + "[WARNING|modeling_utils.py:2847] 2023-02-16 15:24:19,101 >> Some weights of the model checkpoint at t5-base were not used when initializing T5ForClassification: ['decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight']\n", + "- This IS expected if you are initializing T5ForClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing T5ForClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:2859] 2023-02-16 15:24:19,102 >> Some weights of T5ForClassification were not initialized from the model checkpoint at t5-base and are newly initialized: ['decoder.embed_tokens.weight', 'lm_head.dense.bias', 'lm_head.dense_out.bias', 'encoder.embed_tokens.weight', 'lm_head.dense_in.bias', 'lm_head.dense_in.weight', 'lm_head.dense.weight', 'lm_head.dense_out.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "T5ForClassification(\n", + " (shared): Embedding(32128, 768)\n", + " (encoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 768)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " (relative_attention_bias): Embedding(32, 12)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (6): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (7): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (8): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (9): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (10): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (11): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (decoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 768)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " (relative_attention_bias): Embedding(32, 12)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (6): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (7): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (8): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (9): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (10): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (11): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (lm_head): T5ClassificationHead(\n", + " (dense_in): Linear(in_features=768, out_features=768, bias=True)\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (dense_out): Linear(in_features=768, out_features=4, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + ")\n", + "Running tokenizer on dataset: 0%| | 0/120 [00:00\n", + " main()\n", + " File \"/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/run_glue.py\", line 533, in main\n", + " raise ValueError(\"--do_predict requires a test dataset\")\n", + "ValueError: --do_predict requires a test dataset\n" + ] + } + ], + "source": [ + "!python run_glue.py \\\n", + " --cache_dir .cache_training \\\n", + " --model_name_or_path t5-base \\\n", + " --custom_model t5_custom \\\n", + " --train_file data/train.json \\\n", + " --validation_file data/valid.json \\\n", + " --test_file data/test.json \\\n", + " --per_device_train_batch_size 8 \\\n", + " --per_device_eval_batch_size 8 \\\n", + " --do_train \\\n", + " --do_eval \\\n", + " --max_seq_length 128 \\\n", + " --learning_rate 2e-5 \\\n", + " --max_eval_samples 2000 \\\n", + " --max_steps 2500 \\\n", + " --num_train_epochs 1 \\\n", + " --save_strategy steps \\\n", + " --save_steps 250 \\\n", + " --save_total_limit 5 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 100 \\\n", + " --eval_steps 250 \\\n", + " --evaluation_strategy steps \\\n", + " --metric_for_best_model accuracy \\\n", + " --greater_is_better True \\\n", + " --load_best_model_at_end True \\\n", + " --output_dir out/t5" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "02/16/2023 16:52:57 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "02/16/2023 16:52:57 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "auto_find_batch_size=False,\n", + "bf16=False,\n", + "bf16_full_eval=False,\n", + "data_seed=None,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_bucket_cap_mb=None,\n", + "ddp_find_unused_parameters=None,\n", + "ddp_timeout=1800,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=True,\n", + "do_train=False,\n", + "eval_accumulation_steps=None,\n", + "eval_delay=0,\n", + "eval_steps=250,\n", + "evaluation_strategy=steps,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "fsdp=[],\n", + "fsdp_min_num_params=0,\n", + "fsdp_transformer_layer_cls_to_wrap=None,\n", + "full_determinism=False,\n", + "gradient_accumulation_steps=1,\n", + "gradient_checkpointing=False,\n", + "greater_is_better=True,\n", + "group_by_length=False,\n", + "half_precision_backend=auto,\n", + "hub_model_id=None,\n", + "hub_private_repo=False,\n", + "hub_strategy=every_save,\n", + "hub_token=,\n", + "ignore_data_skip=False,\n", + "include_inputs_for_metrics=False,\n", + "jit_mode_eval=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=2e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=True,\n", + "local_rank=-1,\n", + "log_level=passive,\n", + "log_level_replica=passive,\n", + "log_on_each_node=True,\n", + "logging_dir=out/t5_results/runs/Feb16_16-52-56_DESKTOP-R7JO8BQ,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=100,\n", + "logging_strategy=steps,\n", + "lr_scheduler_type=linear,\n", + "max_grad_norm=1.0,\n", + "max_steps=2500,\n", + "metric_for_best_model=accuracy,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=1.0,\n", + "optim=adamw_hf,\n", + "optim_args=None,\n", + "output_dir=out/t5_results,\n", + "overwrite_output_dir=False,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "ray_scope=last,\n", + "remove_unused_columns=True,\n", + "report_to=[],\n", + "resume_from_checkpoint=None,\n", + "run_name=out/t5_results,\n", + "save_on_each_node=False,\n", + "save_steps=250,\n", + "save_strategy=steps,\n", + "save_total_limit=5,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tf32=None,\n", + "torch_compile=False,\n", + "torch_compile_backend=None,\n", + "torch_compile_mode=None,\n", + "torchdynamo=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_ipex=False,\n", + "use_legacy_prediction_loop=False,\n", + "use_mps_device=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + "xpu_backend=None,\n", + ")\n", + "02/16/2023 16:52:57 - INFO - __main__ - load a local file for train: data/train.json\n", + "02/16/2023 16:52:57 - INFO - __main__ - load a local file for validation: data/valid.json\n", + "02/16/2023 16:52:57 - INFO - __main__ - load a local file for test: data/test.json\n", + "02/16/2023 16:52:58 - WARNING - datasets.builder - Using custom data configuration default-f6e8039906850c57\n", + "02/16/2023 16:52:58 - INFO - datasets.info - Loading Dataset Infos from /home/jacob/anaconda3/envs/ugp/lib/python3.10/site-packages/datasets/packaged_modules/json\n", + "02/16/2023 16:52:58 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "02/16/2023 16:52:58 - INFO - datasets.info - Loading Dataset info from .cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "02/16/2023 16:52:58 - WARNING - datasets.builder - Found cached dataset json (/home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", + "02/16/2023 16:52:58 - INFO - datasets.info - Loading Dataset info from /home/jacob/code/university/uczenie_glebokie_w_przetwarzaniu_tekstu/.cache_training/json/default-f6e8039906850c57/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\n", + "100%|████████████████████████████████████████████| 3/3 [00:00<00:00, 769.41it/s]\n", + "[INFO|configuration_utils.py:658] 2023-02-16 16:52:58,326 >> loading configuration file out/t5/config.json\n", + "[INFO|configuration_utils.py:712] 2023-02-16 16:52:58,327 >> Model config T5Config {\n", + " \"_name_or_path\": \"out/t5\",\n", + " \"architectures\": [\n", + " \"T5ForClassification\"\n", + " ],\n", + " \"d_ff\": 3072,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 768,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dense_act_fn\": \"relu\",\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"feed_forward_proj\": \"relu\",\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3\n", + " },\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"is_gated_act\": false,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3\n", + " },\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_decoder_layers\": 12,\n", + " \"num_heads\": 12,\n", + " \"num_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_max_distance\": 128,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.26.1\",\n", + " \"use_cache\": true,\n", + " \"use_hidden_states\": false,\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:52:58,328 >> loading file spiece.model\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:52:58,328 >> loading file tokenizer.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:52:58,328 >> loading file added_tokens.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:52:58,328 >> loading file special_tokens_map.json\n", + "[INFO|tokenization_utils_base.py:1800] 2023-02-16 16:52:58,328 >> loading file tokenizer_config.json\n", + "02/16/2023 16:52:58 - INFO - __main__ - Using hidden states in model: False\n", + "-------------------------------------------------------- Using hidden: False\n", + "02/16/2023 16:52:58 - INFO - __main__ - Using implementation from class: T5ForClassification\n", + "[INFO|modeling_utils.py:2272] 2023-02-16 16:52:58,375 >> loading weights file out/t5/pytorch_model.bin\n", + "[INFO|modeling_utils.py:2857] 2023-02-16 16:53:00,690 >> All model checkpoint weights were used when initializing T5ForClassification.\n", + "\n", + "[INFO|modeling_utils.py:2865] 2023-02-16 16:53:00,690 >> All the weights of T5ForClassification were initialized from the model checkpoint at out/t5.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForClassification for predictions without further training.\n", + "T5ForClassification(\n", + " (shared): Embedding(32128, 768)\n", + " (encoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 768)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " (relative_attention_bias): Embedding(32, 12)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (6): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (7): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (8): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (9): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (10): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (11): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (decoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 768)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " (relative_attention_bias): Embedding(32, 12)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (6): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (7): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (8): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (9): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (10): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (11): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=768, out_features=768, bias=False)\n", + " (k): Linear(in_features=768, out_features=768, bias=False)\n", + " (v): Linear(in_features=768, out_features=768, bias=False)\n", + " (o): Linear(in_features=768, out_features=768, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseActDense(\n", + " (wi): Linear(in_features=768, out_features=3072, bias=False)\n", + " (wo): Linear(in_features=3072, out_features=768, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " (act): ReLU()\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (lm_head): T5ClassificationHead(\n", + " (dense_in): Linear(in_features=768, out_features=768, bias=True)\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (dense_out): Linear(in_features=768, out_features=4, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + ")\n", + "Running tokenizer on dataset: 0%| | 0/120 [00:00> max_steps is given, it will override any value given in num_train_epochs\n", + "02/16/2023 16:53:12 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:710] 2023-02-16 16:53:12,739 >> The following columns in the evaluation set don't have a corresponding argument in `T5ForClassification.forward` and have been ignored: text. If text are not expected by `T5ForClassification.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 16:53:12,740 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2966] 2023-02-16 16:53:12,740 >> Num examples = 2000\n", + "[INFO|trainer.py:2969] 2023-02-16 16:53:12,740 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 250/250 [00:39<00:00, 6.26it/s]\n", + "***** eval metrics *****\n", + " eval_accuracy = 0.4675\n", + " eval_loss = 1.2139\n", + " eval_runtime = 0:00:40.56\n", + " eval_samples = 2000\n", + " eval_samples_per_second = 49.303\n", + " eval_steps_per_second = 6.163\n", + "02/16/2023 16:53:53 - INFO - __main__ - *** Predict ***\n", + "[INFO|trainer.py:710] 2023-02-16 16:53:53,307 >> The following columns in the test set don't have a corresponding argument in `T5ForClassification.forward` and have been ignored: text. If text are not expected by `T5ForClassification.forward`, you can safely ignore this message.\n", + "[INFO|trainer.py:2964] 2023-02-16 16:53:53,308 >> ***** Running Prediction *****\n", + "[INFO|trainer.py:2966] 2023-02-16 16:53:53,308 >> Num examples = 3800\n", + "[INFO|trainer.py:2969] 2023-02-16 16:53:53,308 >> Batch size = 8\n", + "100%|█████████████████████████████████████████| 475/475 [01:15<00:00, 6.32it/s]\n", + "02/16/2023 16:55:08 - INFO - __main__ - ***** Predict results None *****\n", + "[INFO|modelcard.py:449] 2023-02-16 16:55:09,179 >> Dropping the following result as it does not have all the necessary fields:\n", + "{'task': {'name': 'Text Classification', 'type': 'text-classification'}}\n" + ] + } + ], + "source": [ + "!python run_glue.py \\\n", + " --cache_dir .cache_training \\\n", + " --model_name_or_path out/t5 \\\n", + " --custom_model t5_custom \\\n", + " --train_file data/train.json \\\n", + " --validation_file data/valid.json \\\n", + " --test_file data/test.json \\\n", + " --per_device_train_batch_size 8 \\\n", + " --per_device_eval_batch_size 8 \\\n", + " --do_eval \\\n", + " --do_predict \\\n", + " --max_seq_length 128 \\\n", + " --learning_rate 2e-5 \\\n", + " --max_eval_samples 2000 \\\n", + " --max_steps 2500 \\\n", + " --num_train_epochs 1 \\\n", + " --save_strategy steps \\\n", + " --save_steps 250 \\\n", + " --save_total_limit 5 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 100 \\\n", + " --eval_steps 250 \\\n", + " --evaluation_strategy steps \\\n", + " --metric_for_best_model accuracy \\\n", + " --greater_is_better True \\\n", + " --load_best_model_at_end True \\\n", + " --output_dir out/t5_results" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Result" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0;39m0.4675000011920929\u001b[0m\n" + ] + } + ], + "source": [ + "!cat out/t5_results/eval_results.json | jq .eval_accuracy" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bart - Zero shot" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8de84b2cf8ed46488a6eb0bb4e0b11ef", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading (…)lve/main/config.json: 0%| | 0.00/1.40k [00:00 token (equiv. to [CLS]) + + x = self.dense_1(x) + x = self.activation(x) + x = self.dropout(x) + + x = self.dense_2(x) + x = self.activation(x) + x = self.dropout(x) + + x = self.out_proj(x) + return x + + +class RobertaForSequenceClassificationCustomSimple(RobertaForSequenceClassification): + _keys_to_ignore_on_load_missing = [r"position_ids"] + + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.config = config + + self.roberta = RobertaModel(config, add_pooling_layer=False) + self.classifier = RobertaClassificationHeadCustomSimple(config) + + # Initialize weights and apply final processing + self.post_init() diff --git a/run_glue.py b/run_glue.py new file mode 100644 index 0000000..f835943 --- /dev/null +++ b/run_glue.py @@ -0,0 +1,685 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2020 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Finetuning the library models for sequence classification on GLUE.""" +# You can also adapt this script on your own text classification task. Pointers for this are left as comments. + +import logging +import os +import random +import sys +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Optional + +import datasets +import numpy as np +from datasets import load_dataset + +import evaluate +import transformers +from transformers import ( + AutoConfig, + AutoModelForSequenceClassification, + AutoTokenizer, + DataCollatorWithPadding, + EvalPrediction, + HfArgumentParser, + PretrainedConfig, + Trainer, + TrainingArguments, + default_data_collator, + set_seed, +) +from transformers.trainer_utils import get_last_checkpoint +from transformers.utils import check_min_version, send_example_telemetry +from transformers.utils.versions import require_version + +from roberta import RobertaForSequenceClassificationCustomSimple +from gpt2 import GPT2ForSequenceClassificationCustom +from t5 import T5ForClassification +from transformers import BartForSequenceClassification + +MODEL_NAME_TO_CLASS = { + 'roberta_simple': RobertaForSequenceClassificationCustomSimple, + 'gpt2_hidden': GPT2ForSequenceClassificationCustom, + 't5_custom': T5ForClassification, + 'bart_base': BartForSequenceClassification, +} + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.23.0") + +require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") + +task_to_keys = { + "cola": ("sentence", None), + "mnli": ("premise", "hypothesis"), + "mrpc": ("sentence1", "sentence2"), + "qnli": ("question", "sentence"), + "qqp": ("question1", "question2"), + "rte": ("sentence1", "sentence2"), + "sst2": ("sentence", None), + "stsb": ("sentence1", "sentence2"), + "wnli": ("sentence1", "sentence2"), +} + +logger = logging.getLogger(__name__) + + +@dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + + Using `HfArgumentParser` we can turn this class + into argparse arguments to be able to specify them on + the command line. + """ + + task_name: Optional[str] = field( + default=None, + metadata={"help": "The name of the task to train on: " + ", ".join(task_to_keys.keys())}, + ) + dataset_name: Optional[str] = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + max_seq_length: int = field( + default=128, + metadata={ + "help": ( + "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + ) + }, + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."} + ) + pad_to_max_length: bool = field( + default=True, + metadata={ + "help": ( + "Whether to pad all samples to `max_seq_length`. " + "If False, will pad the samples dynamically when batching to the maximum length in the batch." + ) + }, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": ( + "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + ) + }, + ) + max_predict_samples: Optional[int] = field( + default=None, + metadata={ + "help": ( + "For debugging purposes or quicker training, truncate the number of prediction examples to this " + "value if set." + ) + }, + ) + train_file: Optional[str] = field( + default=None, metadata={"help": "A csv or a json file containing the training data."} + ) + validation_file: Optional[str] = field( + default=None, metadata={"help": "A csv or a json file containing the validation data."} + ) + test_file: Optional[str] = field(default=None, metadata={"help": "A csv or a json file containing the test data."}) + + def __post_init__(self): + if self.task_name is not None: + self.task_name = self.task_name.lower() + if self.task_name not in task_to_keys.keys(): + raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys())) + elif self.dataset_name is not None: + pass + elif self.train_file is None or self.validation_file is None: + raise ValueError("Need either a GLUE task, a training/validation file or a dataset name.") + else: + train_extension = self.train_file.split(".")[-1] + assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file." + validation_extension = self.validation_file.split(".")[-1] + assert ( + validation_extension == train_extension + ), "`validation_file` should have the same extension (csv or json) as `train_file`." + + +@dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + config_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"}, + ) + use_fast_tokenizer: bool = field( + default=True, + metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, + ) + model_revision: str = field( + default="main", + metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": ( + "Will use the token generated when running `huggingface-cli login` (necessary to use this script " + "with private models)." + ) + }, + ) + ignore_mismatched_sizes: bool = field( + default=False, + metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, + ) + custom_model: str = field( + default=None, + metadata={ + "help": "Use custom implementation from available list", + "choices": list(MODEL_NAME_TO_CLASS.keys()), + }, + ) + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_glue", model_args, data_args) + + if 'bart' in model_args.model_name_or_path: + model_args.ignore_mismatched_sizes = True + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + ) + logger.info(f"Training/evaluation parameters {training_args}") + + # Detecting last checkpoint. + last_checkpoint = None + if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: + last_checkpoint = get_last_checkpoint(training_args.output_dir) + if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Use --overwrite_output_dir to overcome." + ) + elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: + logger.info( + f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) + + # Set seed before initializing model. + set_seed(training_args.seed) + + # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) + # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). + # + # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the + # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named + # label if at least two columns are provided. + # + # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this + # single column. You can easily tweak this behavior (see below) + # + # In distributed training, the load_dataset function guarantee that only one local process can concurrently + # download the dataset. + if data_args.task_name is not None: + # Downloading and loading a dataset from the hub. + raw_datasets = load_dataset( + "glue", + data_args.task_name, + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + elif data_args.dataset_name is not None: + # Downloading and loading a dataset from the hub. + raw_datasets = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + else: + # Loading a dataset from your local files. + # CSV/JSON training and evaluation files are needed. + data_files = {"train": data_args.train_file, "validation": data_args.validation_file} + + # Get the test dataset: you can provide your own CSV/JSON test file (see below) + # when you use `do_predict` without specifying a GLUE benchmark task. + if training_args.do_predict: + if data_args.test_file is not None: + train_extension = data_args.train_file.split(".")[-1] + test_extension = data_args.test_file.split(".")[-1] + assert ( + test_extension == train_extension + ), "`test_file` should have the same extension (csv or json) as `train_file`." + data_files["test"] = data_args.test_file + else: + raise ValueError("Need either a GLUE task or a test file for `do_predict`.") + + for key in data_files.keys(): + logger.info(f"load a local file for {key}: {data_files[key]}") + + if data_args.train_file.endswith(".csv"): + # Loading a dataset from local csv files + raw_datasets = load_dataset( + "csv", + data_files=data_files, + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + else: + # Loading a dataset from local json files + raw_datasets = load_dataset( + "json", + data_files=data_files, + cache_dir=model_args.cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + # See more about loading any type of standard or custom dataset at + # https://huggingface.co/docs/datasets/loading_datasets.html. + + # Labels + if data_args.task_name is not None: + is_regression = data_args.task_name == "stsb" + if not is_regression: + label_list = raw_datasets["train"].features["label"].names + num_labels = len(label_list) + else: + num_labels = 1 + else: + # Trying to have good defaults here, don't hesitate to tweak to your needs. + is_regression = raw_datasets["train"].features["label"].dtype in ["float32", "float64"] + if is_regression: + num_labels = 1 + else: + # A useful fast method: + # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique + label_list = raw_datasets["train"].unique("label") + label_list.sort() # Let's sort it for determinism + num_labels = len(label_list) + + # Load pretrained model and tokenizer + # + # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently + # download model & vocab. + config = AutoConfig.from_pretrained( + model_args.config_name if model_args.config_name else model_args.model_name_or_path, + num_labels=num_labels, + finetuning_task=data_args.task_name, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + tokenizer = AutoTokenizer.from_pretrained( + model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + use_fast=model_args.use_fast_tokenizer, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ) + custom_model = model_args.custom_model + + if custom_model is not None: + # Check model and implementation is the same + if 'roberta' in custom_model and 'roberta' not in model_args.model_name_or_path: + raise RuntimeError('Model and custom implementation should be the same type: RoBERTa') + elif 'gpt2' in custom_model and 'gpt2' not in model_args.model_name_or_path: + raise RuntimeError('Model and custom implementation should be the same type: GPT-2') + + # Set custom configuration in model configuration + config.use_hidden_states = 'hidden' in custom_model + logger.info(f'Using hidden states in model: {config.use_hidden_states}') + + print(f'-------------------------------------------------------- Using hidden: {config.use_hidden_states}') + + # Get class to initialize model + model_cls = MODEL_NAME_TO_CLASS[custom_model] + else: + model_cls = AutoModelForSequenceClassification + logger.info(f'Using implementation from class: {model_cls.__name__}') + model = model_cls.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, + ) + + print(model) + + if 'gpt2' in tokenizer.name_or_path and tokenizer.pad_token is None: + logger.info(f'Set PAD token to EOS: {tokenizer.eos_token}') + tokenizer._pad_token = tokenizer.eos_token + model.config.pad_token_id = model.config.eos_token_id + + # Preprocessing the raw_datasets + if data_args.task_name is not None: + sentence1_key, sentence2_key = task_to_keys[data_args.task_name] + + # Preprocessing the raw_datasets + if data_args.task_name is not None: + sentence1_key, sentence2_key = task_to_keys[data_args.task_name] + else: + # Again, we try to have some nice defaults but don't hesitate to tweak to your use case. + non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"] + if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names: + sentence1_key, sentence2_key = "sentence1", "sentence2" + else: + if len(non_label_column_names) >= 2: + sentence1_key, sentence2_key = non_label_column_names[:2] + else: + sentence1_key, sentence2_key = non_label_column_names[0], None + + # Padding strategy + if data_args.pad_to_max_length: + padding = "max_length" + else: + # We will pad later, dynamically at batch creation, to the max sequence length in each batch + padding = False + + # Some models have set the order of the labels to use, so let's make sure we do use it. + label_to_id = None + if ( + model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id + and data_args.task_name is not None + and not is_regression + ): + # Some have all caps in their config, some don't. + label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} + if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} + else: + logger.warning( + "Your model seems to have been trained with labels, but they don't match the dataset: ", + f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + "\nIgnoring the model labels as a result.", + ) + elif data_args.task_name is None and not is_regression: + label_to_id = {v: i for i, v in enumerate(label_list)} + + if label_to_id is not None: + model.config.label2id = label_to_id + model.config.id2label = {id: label for label, id in config.label2id.items()} + elif data_args.task_name is not None and not is_regression: + model.config.label2id = {l: i for i, l in enumerate(label_list)} + model.config.id2label = {id: label for label, id in config.label2id.items()} + + if data_args.max_seq_length > tokenizer.model_max_length: + logger.warning( + f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" + f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." + ) + max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) + + def preprocess_function(examples): + # Tokenize the texts + args = ( + (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key]) + ) + result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True) + + # Map labels to IDs (not necessary for GLUE tasks) + if label_to_id is not None and "label" in examples: + result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]] + return result + + with training_args.main_process_first(desc="dataset map pre-processing"): + raw_datasets = raw_datasets.map( + preprocess_function, + batched=True, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on dataset", + ) + if training_args.do_train: + if "train" not in raw_datasets: + raise ValueError("--do_train requires a train dataset") + train_dataset = raw_datasets["train"] + if data_args.max_train_samples is not None: + max_train_samples = min(len(train_dataset), data_args.max_train_samples) + train_dataset = train_dataset.select(range(max_train_samples)) + + if training_args.do_eval: + if "validation" not in raw_datasets and "validation_matched" not in raw_datasets: + raise ValueError("--do_eval requires a validation dataset") + eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"] + if data_args.max_eval_samples is not None: + max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) + label_to_indexes = defaultdict(list) + for index, eval_sample in enumerate(eval_dataset): + label_to_indexes[eval_sample['label']].append(index) + max_samples_per_label = int(max_eval_samples / len(label_to_indexes)) + eval_sample_indexes = [] + for label, indexes in label_to_indexes.items(): + eval_sample_indexes.extend(indexes[:max_samples_per_label]) + logger.info(f"Set {max_samples_per_label} samples for {label}-class") + eval_sample_indexes.sort() + eval_dataset = eval_dataset.select(eval_sample_indexes) + + if training_args.do_predict or data_args.task_name is not None or data_args.test_file is not None: + if "test" not in raw_datasets and "test_matched" not in raw_datasets: + raise ValueError("--do_predict requires a test dataset") + predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"] + if data_args.max_predict_samples is not None: + max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples) + predict_dataset = predict_dataset.select(range(max_predict_samples)) + + # Log a few random samples from the training set: + if training_args.do_train: + for index in random.sample(range(len(train_dataset)), 3): + logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") + + # Get the metric function + if data_args.task_name is not None: + metric = evaluate.load("glue", data_args.task_name) + else: + metric = evaluate.load("accuracy") + + # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a + # predictions and label_ids field) and has to return a dictionary string to float. + def compute_metrics(p: EvalPrediction): + preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions + preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1) + if data_args.task_name is not None: + result = metric.compute(predictions=preds, references=p.label_ids) + if len(result) > 1: + result["combined_score"] = np.mean(list(result.values())).item() + return result + elif is_regression: + return {"mse": ((preds - p.label_ids) ** 2).mean().item()} + else: + return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()} + + # Data collator will default to DataCollatorWithPadding when the tokenizer is passed to Trainer, so we change it if + # we already did the padding. + if data_args.pad_to_max_length: + data_collator = default_data_collator + elif training_args.fp16: + data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) + else: + data_collator = None + + # Initialize our Trainer + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset if training_args.do_train else None, + eval_dataset=eval_dataset if training_args.do_eval else None, + compute_metrics=compute_metrics, + tokenizer=tokenizer, + data_collator=data_collator, + ) + + # Training + ignore_keys_for_eval = ['hidden_states', 'attentions', 'past_key_values'] + if training_args.do_train: + checkpoint = None + if training_args.resume_from_checkpoint is not None: + checkpoint = training_args.resume_from_checkpoint + elif last_checkpoint is not None: + checkpoint = last_checkpoint + train_result = trainer.train(resume_from_checkpoint=checkpoint, ignore_keys_for_eval=ignore_keys_for_eval) + metrics = train_result.metrics + max_train_samples = ( + data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) + ) + metrics["train_samples"] = min(max_train_samples, len(train_dataset)) + + trainer.save_model() # Saves the tokenizer too for easy upload + + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + # Evaluation + if training_args.do_eval: + logger.info("*** Evaluate ***") + + # Loop to handle MNLI double evaluation (matched, mis-matched) + tasks = [data_args.task_name] + eval_datasets = [eval_dataset] + if data_args.task_name == "mnli": + tasks.append("mnli-mm") + valid_mm_dataset = raw_datasets["validation_mismatched"] + if data_args.max_eval_samples is not None: + max_eval_samples = min(len(valid_mm_dataset), data_args.max_eval_samples) + valid_mm_dataset = valid_mm_dataset.select(range(max_eval_samples)) + eval_datasets.append(valid_mm_dataset) + combined = {} + + for eval_dataset, task in zip(eval_datasets, tasks): + metrics = trainer.evaluate(eval_dataset=eval_dataset, ignore_keys=ignore_keys_for_eval) + + max_eval_samples = ( + data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) + ) + metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) + + if task == "mnli-mm": + metrics = {k + "_mm": v for k, v in metrics.items()} + if task is not None and "mnli" in task: + combined.update(metrics) + + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", combined if task is not None and "mnli" in task else metrics) + + if training_args.do_predict: + logger.info("*** Predict ***") + + # Loop to handle MNLI double evaluation (matched, mis-matched) + tasks = [data_args.task_name] + predict_datasets = [predict_dataset] + if data_args.task_name == "mnli": + tasks.append("mnli-mm") + predict_datasets.append(raw_datasets["test_mismatched"]) + + for predict_dataset, task in zip(predict_datasets, tasks): + # Removing the `label` columns because it contains -1 and Trainer won't like that. + predict_dataset = predict_dataset.remove_columns("label") + predictions = trainer.predict(predict_dataset, metric_key_prefix="predict", ignore_keys=ignore_keys_for_eval).predictions + predictions = np.squeeze(predictions) if is_regression else np.argmax(predictions, axis=1) + + output_predict_file = os.path.join(training_args.output_dir, f"predict_results_{task}.txt") + if trainer.is_world_process_zero(): + with open(output_predict_file, "w") as writer: + logger.info(f"***** Predict results {task} *****") + writer.write("index\tprediction\n") + for index, item in enumerate(predictions): + if is_regression: + writer.write(f"{index}\t{item:3.3f}\n") + else: + item = label_list[item] + writer.write(f"{index}\t{item}\n") + + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"} + if data_args.task_name is not None: + kwargs["language"] = "en" + kwargs["dataset_tags"] = "glue" + kwargs["dataset_args"] = data_args.task_name + kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}" + + if training_args.push_to_hub: + trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) + + +def _mp_fn(index): + # For xla_spawn (TPUs) + main() + + +if __name__ == "__main__": + main() diff --git a/t5.py b/t5.py new file mode 100644 index 0000000..cc042dc --- /dev/null +++ b/t5.py @@ -0,0 +1,125 @@ +import torch +import copy +from torch import nn +from transformers import T5PreTrainedModel, T5Config +from transformers.models.t5.modeling_t5 import T5Stack +from transformers.modeling_outputs import SequenceClassifierOutput + + +class T5ClassificationHead(nn.Module): + def __init__(self, config: T5Config): + super().__init__() + + self.dense_in = nn.Linear(config.d_model, 768) + self.dense = nn.Linear(768, 768) + self.dense_out = nn.Linear(768, config.num_labels) + self.dropout = nn.Dropout(0.1) + + def forward(self, features, **kwargs): + x = features[:, 0, :] + x = self.dropout(x) + x = self.dense_in(x) + x = torch.relu(x) + x = self.dropout(x) + x = self.dense(x) + x = torch.relu(x) + x = self.dropout(x) + x = self.dense_out(x) + + return x + + +class T5ForClassification(T5PreTrainedModel): + def __init__(self, config: T5Config): + super().__init__(config) + self.model_dim = config.d_model + + self.shared = nn.Embedding(config.vocab_size, config.d_model) + + encoder_config = copy.deepcopy(config) + encoder_config.is_decoder = False + encoder_config.use_cache = False + encoder_config.is_encoder_decoder = False + self.encoder = T5Stack(encoder_config, self.shared) + + decoder_config = copy.deepcopy(config) + decoder_config.is_decoder = True + decoder_config.is_encoder_decoder = False + decoder_config.num_layers = config.num_decoder_layers + self.decoder = T5Stack(decoder_config, self.shared) + + modules_to_freeze = [self.encoder.block[i].layer[0] for i in range(len(self.encoder.block))] + modules_to_freeze.extend([self.decoder.block[i].layer[0] for i in range(len(self.decoder.block))]) + modules_to_freeze.extend([self.decoder.block[i].layer[1] for i in range(len(self.decoder.block))]) + + for module in modules_to_freeze: + for param in module.parameters(): + param.requires_grad = False + + self.lm_head = T5ClassificationHead(config) + + # Initialize weights and apply final processing + self.post_init() + + # Model parallel + self.model_parallel = False + self.device_map = None + + + def forward( + self, + input_ids=None, + attention_mask=None, + head_mask=None, + cross_attn_head_mask=None, + past_key_values=None, + inputs_embeds=None, + decoder_inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + labels=None + ): + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.encoder( + input_ids, + attention_mask=attention_mask, + head_mask=head_mask, + cross_attn_head_mask=cross_attn_head_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + outputs = self.decoder( + input_ids, + attention_mask=attention_mask, + head_mask=head_mask, + cross_attn_head_mask=cross_attn_head_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + + logits = self.lm_head(outputs[0]) + + + loss = None + if labels is not None: + loss_fct = nn.CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1)) + + + return SequenceClassifierOutput( + loss=loss, + logits=logits, + )