51 KiB
51 KiB
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset
c:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
dataset = load_dataset('dair-ai/emotion')
print(dataset)
c:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\datasets\load.py:1429: FutureWarning: The repository for dair-ai/emotion contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/dair-ai/emotion You can avoid this message in future by passing the argument `trust_remote_code=True`. Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`. warnings.warn(
DatasetDict({ train: Dataset({ features: ['text', 'label'], num_rows: 16000 }) validation: Dataset({ features: ['text', 'label'], num_rows: 2000 }) test: Dataset({ features: ['text', 'label'], num_rows: 2000 }) })
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=6)
max_len = 128
train_data = dataset['train']
train_encodings = tokenizer(train_data['text'], truncation=True, padding='max_length', max_length=max_len, return_tensors='pt')
train_labels = torch.tensor(train_data['label'])
train_dataset = torch.utils.data.TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()
num_epochs = 3
print(model)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. c:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\optimization.py:429: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning warnings.warn(
BertForSequenceClassification( (bert): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(30522, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0-11): 12 x BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) (dropout): Dropout(p=0.1, inplace=False) (classifier): Linear(in_features=768, out_features=6, bias=True) )
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for input_ids, attention_mask, labels in train_loader:
optimizer.zero_grad()
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
total_loss += loss.item()
loss.backward()
optimizer.step()
average_loss = total_loss / len(train_loader)
print(f'Epoch {epoch+1}/{num_epochs} - Average Loss: {average_loss}')
model.save_pretrained('emotion_model')
model.eval()
[1;31m---------------------------------------------------------------------------[0m [1;31mKeyboardInterrupt[0m Traceback (most recent call last) Cell [1;32mIn[4], line 8[0m [0;32m 5[0m [38;5;28;01mfor[39;00m input_ids, attention_mask, labels [38;5;129;01min[39;00m train_loader: [0;32m 6[0m optimizer[38;5;241m.[39mzero_grad() [1;32m----> 8[0m outputs [38;5;241m=[39m [43mmodel[49m[43m([49m[43minput_ids[49m[43m,[49m[43m [49m[43mattention_mask[49m[38;5;241;43m=[39;49m[43mattention_mask[49m[43m,[49m[43m [49m[43mlabels[49m[38;5;241;43m=[39;49m[43mlabels[49m[43m)[49m [0;32m 9[0m loss [38;5;241m=[39m outputs[38;5;241m.[39mloss [0;32m 10[0m total_loss [38;5;241m+[39m[38;5;241m=[39m loss[38;5;241m.[39mitem() File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1518[0m, in [0;36mModule._wrapped_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1516[0m [38;5;28;01mreturn[39;00m [38;5;28mself[39m[38;5;241m.[39m_compiled_call_impl([38;5;241m*[39margs, [38;5;241m*[39m[38;5;241m*[39mkwargs) [38;5;66;03m# type: ignore[misc][39;00m [0;32m 1517[0m [38;5;28;01melse[39;00m: [1;32m-> 1518[0m [38;5;28;01mreturn[39;00m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43m_call_impl[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1527[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1522[0m [38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1523[0m [38;5;66;03m# this function, and just call forward.[39;00m [0;32m 1524[0m [38;5;28;01mif[39;00m [38;5;129;01mnot[39;00m ([38;5;28mself[39m[38;5;241m.[39m_backward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_backward_pre_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_pre_hooks [0;32m 1525[0m [38;5;129;01mor[39;00m _global_backward_pre_hooks [38;5;129;01mor[39;00m _global_backward_hooks [0;32m 1526[0m [38;5;129;01mor[39;00m _global_forward_hooks [38;5;129;01mor[39;00m _global_forward_pre_hooks): [1;32m-> 1527[0m [38;5;28;01mreturn[39;00m [43mforward_call[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m [0;32m 1529[0m [38;5;28;01mtry[39;00m: [0;32m 1530[0m result [38;5;241m=[39m [38;5;28;01mNone[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\bert\modeling_bert.py:1564[0m, in [0;36mBertForSequenceClassification.forward[1;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)[0m [0;32m 1556[0m [38;5;250m[39m[38;5;124mr[39m[38;5;124;03m"""[39;00m [0;32m 1557[0m [38;5;124;03mlabels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):[39;00m [0;32m 1558[0m [38;5;124;03m Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,[39;00m [0;32m 1559[0m [38;5;124;03m config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If[39;00m [0;32m 1560[0m [38;5;124;03m `config.num_labels > 1` a classification loss is computed (Cross-Entropy).[39;00m [0;32m 1561[0m [38;5;124;03m"""[39;00m [0;32m 1562[0m return_dict [38;5;241m=[39m return_dict [38;5;28;01mif[39;00m return_dict [38;5;129;01mis[39;00m [38;5;129;01mnot[39;00m [38;5;28;01mNone[39;00m [38;5;28;01melse[39;00m [38;5;28mself[39m[38;5;241m.[39mconfig[38;5;241m.[39muse_return_dict [1;32m-> 1564[0m outputs [38;5;241m=[39m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43mbert[49m[43m([49m [0;32m 1565[0m [43m [49m[43minput_ids[49m[43m,[49m [0;32m 1566[0m [43m [49m[43mattention_mask[49m[38;5;241;43m=[39;49m[43mattention_mask[49m[43m,[49m [0;32m 1567[0m [43m [49m[43mtoken_type_ids[49m[38;5;241;43m=[39;49m[43mtoken_type_ids[49m[43m,[49m [0;32m 1568[0m [43m [49m[43mposition_ids[49m[38;5;241;43m=[39;49m[43mposition_ids[49m[43m,[49m [0;32m 1569[0m [43m [49m[43mhead_mask[49m[38;5;241;43m=[39;49m[43mhead_mask[49m[43m,[49m [0;32m 1570[0m [43m [49m[43minputs_embeds[49m[38;5;241;43m=[39;49m[43minputs_embeds[49m[43m,[49m [0;32m 1571[0m [43m [49m[43moutput_attentions[49m[38;5;241;43m=[39;49m[43moutput_attentions[49m[43m,[49m [0;32m 1572[0m [43m [49m[43moutput_hidden_states[49m[38;5;241;43m=[39;49m[43moutput_hidden_states[49m[43m,[49m [0;32m 1573[0m [43m [49m[43mreturn_dict[49m[38;5;241;43m=[39;49m[43mreturn_dict[49m[43m,[49m [0;32m 1574[0m [43m[49m[43m)[49m [0;32m 1576[0m pooled_output [38;5;241m=[39m outputs[[38;5;241m1[39m] [0;32m 1578[0m pooled_output [38;5;241m=[39m [38;5;28mself[39m[38;5;241m.[39mdropout(pooled_output) File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1518[0m, in [0;36mModule._wrapped_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1516[0m [38;5;28;01mreturn[39;00m [38;5;28mself[39m[38;5;241m.[39m_compiled_call_impl([38;5;241m*[39margs, [38;5;241m*[39m[38;5;241m*[39mkwargs) [38;5;66;03m# type: ignore[misc][39;00m [0;32m 1517[0m [38;5;28;01melse[39;00m: [1;32m-> 1518[0m [38;5;28;01mreturn[39;00m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43m_call_impl[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1527[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1522[0m [38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1523[0m [38;5;66;03m# this function, and just call forward.[39;00m [0;32m 1524[0m [38;5;28;01mif[39;00m [38;5;129;01mnot[39;00m ([38;5;28mself[39m[38;5;241m.[39m_backward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_backward_pre_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_pre_hooks [0;32m 1525[0m [38;5;129;01mor[39;00m _global_backward_pre_hooks [38;5;129;01mor[39;00m _global_backward_hooks [0;32m 1526[0m [38;5;129;01mor[39;00m _global_forward_hooks [38;5;129;01mor[39;00m _global_forward_pre_hooks): [1;32m-> 1527[0m [38;5;28;01mreturn[39;00m [43mforward_call[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m [0;32m 1529[0m [38;5;28;01mtry[39;00m: [0;32m 1530[0m result [38;5;241m=[39m [38;5;28;01mNone[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\bert\modeling_bert.py:1013[0m, in [0;36mBertModel.forward[1;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)[0m [0;32m 1004[0m head_mask [38;5;241m=[39m [38;5;28mself[39m[38;5;241m.[39mget_head_mask(head_mask, [38;5;28mself[39m[38;5;241m.[39mconfig[38;5;241m.[39mnum_hidden_layers) [0;32m 1006[0m embedding_output [38;5;241m=[39m [38;5;28mself[39m[38;5;241m.[39membeddings( [0;32m 1007[0m input_ids[38;5;241m=[39minput_ids, [0;32m 1008[0m position_ids[38;5;241m=[39mposition_ids, [1;32m (...)[0m [0;32m 1011[0m past_key_values_length[38;5;241m=[39mpast_key_values_length, [0;32m 1012[0m ) [1;32m-> 1013[0m encoder_outputs [38;5;241m=[39m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43mencoder[49m[43m([49m [0;32m 1014[0m [43m [49m[43membedding_output[49m[43m,[49m [0;32m 1015[0m [43m [49m[43mattention_mask[49m[38;5;241;43m=[39;49m[43mextended_attention_mask[49m[43m,[49m [0;32m 1016[0m [43m [49m[43mhead_mask[49m[38;5;241;43m=[39;49m[43mhead_mask[49m[43m,[49m [0;32m 1017[0m [43m [49m[43mencoder_hidden_states[49m[38;5;241;43m=[39;49m[43mencoder_hidden_states[49m[43m,[49m [0;32m 1018[0m [43m [49m[43mencoder_attention_mask[49m[38;5;241;43m=[39;49m[43mencoder_extended_attention_mask[49m[43m,[49m [0;32m 1019[0m [43m [49m[43mpast_key_values[49m[38;5;241;43m=[39;49m[43mpast_key_values[49m[43m,[49m [0;32m 1020[0m [43m [49m[43muse_cache[49m[38;5;241;43m=[39;49m[43muse_cache[49m[43m,[49m [0;32m 1021[0m [43m [49m[43moutput_attentions[49m[38;5;241;43m=[39;49m[43moutput_attentions[49m[43m,[49m [0;32m 1022[0m [43m [49m[43moutput_hidden_states[49m[38;5;241;43m=[39;49m[43moutput_hidden_states[49m[43m,[49m [0;32m 1023[0m [43m [49m[43mreturn_dict[49m[38;5;241;43m=[39;49m[43mreturn_dict[49m[43m,[49m [0;32m 1024[0m [43m[49m[43m)[49m [0;32m 1025[0m sequence_output [38;5;241m=[39m encoder_outputs[[38;5;241m0[39m] [0;32m 1026[0m pooled_output [38;5;241m=[39m [38;5;28mself[39m[38;5;241m.[39mpooler(sequence_output) [38;5;28;01mif[39;00m [38;5;28mself[39m[38;5;241m.[39mpooler [38;5;129;01mis[39;00m [38;5;129;01mnot[39;00m [38;5;28;01mNone[39;00m [38;5;28;01melse[39;00m [38;5;28;01mNone[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1518[0m, in [0;36mModule._wrapped_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1516[0m [38;5;28;01mreturn[39;00m [38;5;28mself[39m[38;5;241m.[39m_compiled_call_impl([38;5;241m*[39margs, [38;5;241m*[39m[38;5;241m*[39mkwargs) [38;5;66;03m# type: ignore[misc][39;00m [0;32m 1517[0m [38;5;28;01melse[39;00m: [1;32m-> 1518[0m [38;5;28;01mreturn[39;00m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43m_call_impl[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1527[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1522[0m [38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1523[0m [38;5;66;03m# this function, and just call forward.[39;00m [0;32m 1524[0m [38;5;28;01mif[39;00m [38;5;129;01mnot[39;00m ([38;5;28mself[39m[38;5;241m.[39m_backward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_backward_pre_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_pre_hooks [0;32m 1525[0m [38;5;129;01mor[39;00m _global_backward_pre_hooks [38;5;129;01mor[39;00m _global_backward_hooks [0;32m 1526[0m [38;5;129;01mor[39;00m _global_forward_hooks [38;5;129;01mor[39;00m _global_forward_pre_hooks): [1;32m-> 1527[0m [38;5;28;01mreturn[39;00m [43mforward_call[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m [0;32m 1529[0m [38;5;28;01mtry[39;00m: [0;32m 1530[0m result [38;5;241m=[39m [38;5;28;01mNone[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\bert\modeling_bert.py:607[0m, in [0;36mBertEncoder.forward[1;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)[0m [0;32m 596[0m layer_outputs [38;5;241m=[39m [38;5;28mself[39m[38;5;241m.[39m_gradient_checkpointing_func( [0;32m 597[0m layer_module[38;5;241m.[39m[38;5;21m__call__[39m, [0;32m 598[0m hidden_states, [1;32m (...)[0m [0;32m 604[0m output_attentions, [0;32m 605[0m ) [0;32m 606[0m [38;5;28;01melse[39;00m: [1;32m--> 607[0m layer_outputs [38;5;241m=[39m [43mlayer_module[49m[43m([49m [0;32m 608[0m [43m [49m[43mhidden_states[49m[43m,[49m [0;32m 609[0m [43m [49m[43mattention_mask[49m[43m,[49m [0;32m 610[0m [43m [49m[43mlayer_head_mask[49m[43m,[49m [0;32m 611[0m [43m [49m[43mencoder_hidden_states[49m[43m,[49m [0;32m 612[0m [43m [49m[43mencoder_attention_mask[49m[43m,[49m [0;32m 613[0m [43m [49m[43mpast_key_value[49m[43m,[49m [0;32m 614[0m [43m [49m[43moutput_attentions[49m[43m,[49m [0;32m 615[0m [43m [49m[43m)[49m [0;32m 617[0m hidden_states [38;5;241m=[39m layer_outputs[[38;5;241m0[39m] [0;32m 618[0m [38;5;28;01mif[39;00m use_cache: File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1518[0m, in [0;36mModule._wrapped_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1516[0m [38;5;28;01mreturn[39;00m [38;5;28mself[39m[38;5;241m.[39m_compiled_call_impl([38;5;241m*[39margs, [38;5;241m*[39m[38;5;241m*[39mkwargs) [38;5;66;03m# type: ignore[misc][39;00m [0;32m 1517[0m [38;5;28;01melse[39;00m: [1;32m-> 1518[0m [38;5;28;01mreturn[39;00m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43m_call_impl[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1527[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1522[0m [38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1523[0m [38;5;66;03m# this function, and just call forward.[39;00m [0;32m 1524[0m [38;5;28;01mif[39;00m [38;5;129;01mnot[39;00m ([38;5;28mself[39m[38;5;241m.[39m_backward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_backward_pre_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_pre_hooks [0;32m 1525[0m [38;5;129;01mor[39;00m _global_backward_pre_hooks [38;5;129;01mor[39;00m _global_backward_hooks [0;32m 1526[0m [38;5;129;01mor[39;00m _global_forward_hooks [38;5;129;01mor[39;00m _global_forward_pre_hooks): [1;32m-> 1527[0m [38;5;28;01mreturn[39;00m [43mforward_call[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m [0;32m 1529[0m [38;5;28;01mtry[39;00m: [0;32m 1530[0m result [38;5;241m=[39m [38;5;28;01mNone[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\bert\modeling_bert.py:497[0m, in [0;36mBertLayer.forward[1;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)[0m [0;32m 485[0m [38;5;28;01mdef[39;00m [38;5;21mforward[39m( [0;32m 486[0m [38;5;28mself[39m, [0;32m 487[0m hidden_states: torch[38;5;241m.[39mTensor, [1;32m (...)[0m [0;32m 494[0m ) [38;5;241m-[39m[38;5;241m>[39m Tuple[torch[38;5;241m.[39mTensor]: [0;32m 495[0m [38;5;66;03m# decoder uni-directional self-attention cached key/values tuple is at positions 1,2[39;00m [0;32m 496[0m self_attn_past_key_value [38;5;241m=[39m past_key_value[:[38;5;241m2[39m] [38;5;28;01mif[39;00m past_key_value [38;5;129;01mis[39;00m [38;5;129;01mnot[39;00m [38;5;28;01mNone[39;00m [38;5;28;01melse[39;00m [38;5;28;01mNone[39;00m [1;32m--> 497[0m self_attention_outputs [38;5;241m=[39m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43mattention[49m[43m([49m [0;32m 498[0m [43m [49m[43mhidden_states[49m[43m,[49m [0;32m 499[0m [43m [49m[43mattention_mask[49m[43m,[49m [0;32m 500[0m [43m [49m[43mhead_mask[49m[43m,[49m [0;32m 501[0m [43m [49m[43moutput_attentions[49m[38;5;241;43m=[39;49m[43moutput_attentions[49m[43m,[49m [0;32m 502[0m [43m [49m[43mpast_key_value[49m[38;5;241;43m=[39;49m[43mself_attn_past_key_value[49m[43m,[49m [0;32m 503[0m [43m [49m[43m)[49m [0;32m 504[0m attention_output [38;5;241m=[39m self_attention_outputs[[38;5;241m0[39m] [0;32m 506[0m [38;5;66;03m# if decoder, the last output is tuple of self-attn cache[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1518[0m, in [0;36mModule._wrapped_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1516[0m [38;5;28;01mreturn[39;00m [38;5;28mself[39m[38;5;241m.[39m_compiled_call_impl([38;5;241m*[39margs, [38;5;241m*[39m[38;5;241m*[39mkwargs) [38;5;66;03m# type: ignore[misc][39;00m [0;32m 1517[0m [38;5;28;01melse[39;00m: [1;32m-> 1518[0m [38;5;28;01mreturn[39;00m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43m_call_impl[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1527[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1522[0m [38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1523[0m [38;5;66;03m# this function, and just call forward.[39;00m [0;32m 1524[0m [38;5;28;01mif[39;00m [38;5;129;01mnot[39;00m ([38;5;28mself[39m[38;5;241m.[39m_backward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_backward_pre_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_pre_hooks [0;32m 1525[0m [38;5;129;01mor[39;00m _global_backward_pre_hooks [38;5;129;01mor[39;00m _global_backward_hooks [0;32m 1526[0m [38;5;129;01mor[39;00m _global_forward_hooks [38;5;129;01mor[39;00m _global_forward_pre_hooks): [1;32m-> 1527[0m [38;5;28;01mreturn[39;00m [43mforward_call[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m [0;32m 1529[0m [38;5;28;01mtry[39;00m: [0;32m 1530[0m result [38;5;241m=[39m [38;5;28;01mNone[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\bert\modeling_bert.py:427[0m, in [0;36mBertAttention.forward[1;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)[0m [0;32m 417[0m [38;5;28;01mdef[39;00m [38;5;21mforward[39m( [0;32m 418[0m [38;5;28mself[39m, [0;32m 419[0m hidden_states: torch[38;5;241m.[39mTensor, [1;32m (...)[0m [0;32m 425[0m output_attentions: Optional[[38;5;28mbool[39m] [38;5;241m=[39m [38;5;28;01mFalse[39;00m, [0;32m 426[0m ) [38;5;241m-[39m[38;5;241m>[39m Tuple[torch[38;5;241m.[39mTensor]: [1;32m--> 427[0m self_outputs [38;5;241m=[39m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43mself[49m[43m([49m [0;32m 428[0m [43m [49m[43mhidden_states[49m[43m,[49m [0;32m 429[0m [43m [49m[43mattention_mask[49m[43m,[49m [0;32m 430[0m [43m [49m[43mhead_mask[49m[43m,[49m [0;32m 431[0m [43m [49m[43mencoder_hidden_states[49m[43m,[49m [0;32m 432[0m [43m [49m[43mencoder_attention_mask[49m[43m,[49m [0;32m 433[0m [43m [49m[43mpast_key_value[49m[43m,[49m [0;32m 434[0m [43m [49m[43moutput_attentions[49m[43m,[49m [0;32m 435[0m [43m [49m[43m)[49m [0;32m 436[0m attention_output [38;5;241m=[39m [38;5;28mself[39m[38;5;241m.[39moutput(self_outputs[[38;5;241m0[39m], hidden_states) [0;32m 437[0m outputs [38;5;241m=[39m (attention_output,) [38;5;241m+[39m self_outputs[[38;5;241m1[39m:] [38;5;66;03m# add attentions if we output them[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1518[0m, in [0;36mModule._wrapped_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1516[0m [38;5;28;01mreturn[39;00m [38;5;28mself[39m[38;5;241m.[39m_compiled_call_impl([38;5;241m*[39margs, [38;5;241m*[39m[38;5;241m*[39mkwargs) [38;5;66;03m# type: ignore[misc][39;00m [0;32m 1517[0m [38;5;28;01melse[39;00m: [1;32m-> 1518[0m [38;5;28;01mreturn[39;00m [38;5;28;43mself[39;49m[38;5;241;43m.[39;49m[43m_call_impl[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1527[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1522[0m [38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1523[0m [38;5;66;03m# this function, and just call forward.[39;00m [0;32m 1524[0m [38;5;28;01mif[39;00m [38;5;129;01mnot[39;00m ([38;5;28mself[39m[38;5;241m.[39m_backward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_backward_pre_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_hooks [38;5;129;01mor[39;00m [38;5;28mself[39m[38;5;241m.[39m_forward_pre_hooks [0;32m 1525[0m [38;5;129;01mor[39;00m _global_backward_pre_hooks [38;5;129;01mor[39;00m _global_backward_hooks [0;32m 1526[0m [38;5;129;01mor[39;00m _global_forward_hooks [38;5;129;01mor[39;00m _global_forward_pre_hooks): [1;32m-> 1527[0m [38;5;28;01mreturn[39;00m [43mforward_call[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m [0;32m 1529[0m [38;5;28;01mtry[39;00m: [0;32m 1530[0m result [38;5;241m=[39m [38;5;28;01mNone[39;00m File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\bert\modeling_bert.py:355[0m, in [0;36mBertSelfAttention.forward[1;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)[0m [0;32m 352[0m attention_scores [38;5;241m=[39m attention_scores [38;5;241m+[39m attention_mask [0;32m 354[0m [38;5;66;03m# Normalize the attention scores to probabilities.[39;00m [1;32m--> 355[0m attention_probs [38;5;241m=[39m [43mnn[49m[38;5;241;43m.[39;49m[43mfunctional[49m[38;5;241;43m.[39;49m[43msoftmax[49m[43m([49m[43mattention_scores[49m[43m,[49m[43m [49m[43mdim[49m[38;5;241;43m=[39;49m[38;5;241;43m-[39;49m[38;5;241;43m1[39;49m[43m)[49m [0;32m 357[0m [38;5;66;03m# This is actually dropping out entire tokens to attend to, which might[39;00m [0;32m 358[0m [38;5;66;03m# seem a bit unusual, but is taken from the original Transformer paper.[39;00m [0;32m 359[0m attention_probs [38;5;241m=[39m [38;5;28mself[39m[38;5;241m.[39mdropout(attention_probs) File [1;32mc:\Users\KamBo\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\functional.py:1856[0m, in [0;36msoftmax[1;34m(input, dim, _stacklevel, dtype)[0m [0;32m 1854[0m dim [38;5;241m=[39m _get_softmax_dim([38;5;124m"[39m[38;5;124msoftmax[39m[38;5;124m"[39m, [38;5;28minput[39m[38;5;241m.[39mdim(), _stacklevel) [0;32m 1855[0m [38;5;28;01mif[39;00m dtype [38;5;129;01mis[39;00m [38;5;28;01mNone[39;00m: [1;32m-> 1856[0m ret [38;5;241m=[39m [38;5;28;43minput[39;49m[38;5;241;43m.[39;49m[43msoftmax[49m[43m([49m[43mdim[49m[43m)[49m [0;32m 1857[0m [38;5;28;01melse[39;00m: [0;32m 1858[0m ret [38;5;241m=[39m [38;5;28minput[39m[38;5;241m.[39msoftmax(dim, dtype[38;5;241m=[39mdtype) [1;31mKeyboardInterrupt[0m:
all_labels = []
all_predictions = []
with torch.no_grad():
for input_ids, attention_mask, labels in train_loader:
outputs = model(input_ids, attention_mask=attention_mask)
predictions = torch.argmax(outputs.logits, dim=1)
all_labels.extend(labels.numpy())
all_predictions.extend(predictions.numpy())
accuracy = accuracy_score(all_labels, all_predictions)
classification_report_str = classification_report(all_labels, all_predictions)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report_str)
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, AdamW
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset
dataset = load_dataset('dair-ai/emotion')
print(dataset)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=6) # 6 - liczba klas (0-5)
max_len = 128
train_data = dataset['train']
train_encodings = tokenizer(train_data['text'], truncation=True, padding='max_length', max_length=max_len, return_tensors='pt')
train_labels = torch.tensor(train_data['label'])
train_dataset = torch.utils.data.TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()
num_epochs = 3
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for input_ids, attention_mask, labels in train_loader:
optimizer.zero_grad()
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
total_loss += loss.item()
loss.backward()
optimizer.step()
average_loss = total_loss / len(train_loader)
print(f'Epoch {epoch+1}/{num_epochs} - Average Loss: {average_loss}')
model.save_pretrained('emotion_gpt2_model')
model.eval()
all_labels = []
all_predictions = []
with torch.no_grad():
for input_ids, attention_mask, labels in train_loader:
outputs = model(input_ids, attention_mask=attention_mask)
predictions = torch.argmax(outputs.logits, dim=1)
all_labels.extend(labels.numpy())
all_predictions.extend(predictions.numpy())
accuracy = accuracy_score(all_labels, all_predictions)
classification_report_str = classification_report(all_labels, all_predictions)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report_str)
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset
dataset = load_dataset('dair-ai/emotion')
print(dataset)
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')
max_len = 128
train_data = dataset['train']
train_encodings = tokenizer(train_data['text'], truncation=True, padding='max_length', max_length=max_len, return_tensors='pt')
train_labels = torch.tensor(train_data['label'])
train_dataset = torch.utils.data.TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()
num_epochs = 3
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for input_ids, attention_mask, labels in train_loader:
optimizer.zero_grad()
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
total_loss += loss.item()
loss.backward()
optimizer.step()
average_loss = total_loss / len(train_loader)
print(f'Epoch {epoch+1}/{num_epochs} - Average Loss: {average_loss}')
model.save_pretrained('emotion_t5_model')
model.eval()
all_labels = []
all_predictions = []
with torch.no_grad():
for input_ids, attention_mask, labels in train_loader:
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
predictions = torch.argmax(outputs.logits, dim=1)
all_labels.extend(labels.numpy())
all_predictions.extend(predictions.numpy())
accuracy = accuracy_score(all_labels, all_predictions)
classification_report_str = classification_report(all_labels, all_predictions)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report_str)
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW
from sklearn.metrics import accuracy_score, classification_report
from datasets import load_dataset
dataset = load_dataset('dair-ai/emotion')
print(dataset)
tokenizer = T5Tokenizer.from_pretrained('google/flan-t5-small')
model = T5ForConditionalGeneration.from_pretrained('google/flan-t5-small')
max_len = 128
train_data = dataset['train']
train_labels = train_data['label']
train_prompts = [f'emotion: {text}' for text in train_data['text']]
train_encodings = tokenizer(train_prompts, truncation=True, padding='max_length', max_length=max_len, return_tensors='pt')
train_labels = torch.tensor(train_labels)
train_dataset = torch.utils.data.TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()
num_epochs = 3
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for input_ids, attention_mask, labels in train_loader:
optimizer.zero_grad()
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
total_loss += loss.item()
loss.backward()
optimizer.step()
average_loss = total_loss / len(train_loader)
print(f'Epoch {epoch+1}/{num_epochs} - Average Loss: {average_loss}')
# Zapisz model
model.save_pretrained('emotion_flant5_model')
# Ewaluacja modelu
model.eval()
all_labels = []
all_predictions = []
with torch.no_grad():
for input_ids, attention_mask, labels in train_loader:
outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=1)
predictions = torch.argmax(outputs, dim=1)
all_labels.extend(labels.numpy())
all_predictions.extend(predictions.numpy())
# Oblicz metryki ewaluacyjne
accuracy = accuracy_score(all_labels, all_predictions)
classification_report_str = classification_report(all_labels, all_predictions)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report_str)