progress
This commit is contained in:
parent
dbadedfc1c
commit
023a4e4361
56
generate.py
Normal file
56
generate.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification
|
||||||
|
import torch
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
|
||||||
|
import torch.nn as nn
|
||||||
|
from sklearn.utils.class_weight import compute_class_weight
|
||||||
|
import numpy as np
|
||||||
|
from model import BERT_Arch
|
||||||
|
from sklearn.metrics import classification_report
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
path = 'saved_weights.pt'
|
||||||
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
|
device = torch.device("cuda")
|
||||||
|
|
||||||
|
bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||||
|
|
||||||
|
model = BERT_Arch(bert)
|
||||||
|
model.load_state_dict(torch.load(path))
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
test_data = pd.read_csv("dev-0/in.tsv", sep="\t")
|
||||||
|
test_data.columns = ["text", "d"]
|
||||||
|
|
||||||
|
test_target = pd.read_csv("dev-0/expected.tsv", sep="\t")
|
||||||
|
|
||||||
|
tokens_train = tokenizer.batch_encode_plus(
|
||||||
|
test_data["text"],
|
||||||
|
max_length = 25,
|
||||||
|
padding='max_length',
|
||||||
|
truncation=True
|
||||||
|
)
|
||||||
|
|
||||||
|
test_seq = torch.tensor(tokens_train['input_ids'])
|
||||||
|
test_mask = torch.tensor(tokens_train['attention_mask'])
|
||||||
|
|
||||||
|
#define a batch size
|
||||||
|
batch_size = 32
|
||||||
|
|
||||||
|
# wrap tensors
|
||||||
|
test_data = TensorDataset(test_seq, test_mask)
|
||||||
|
|
||||||
|
# sampler for sampling the data during training
|
||||||
|
test_sampler = RandomSampler(test_data)
|
||||||
|
|
||||||
|
# dataLoader for train set
|
||||||
|
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
preds = model(test_seq.to(device), test_mask.to(device))
|
||||||
|
preds = preds.detach().cpu().numpy()
|
||||||
|
preds = np.argmax(preds, axis = 1)
|
||||||
|
|
||||||
|
print(classification_report(test_target['0'], preds))
|
||||||
|
print(accuracy_score(test_target['0'], preds))
|
314
main.py
314
main.py
@ -1,155 +1,215 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from transformers import BertTokenizer, BertForSequenceClassification
|
from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification
|
||||||
import torch
|
import torch
|
||||||
# from torchtext.data import BucketIterator, Iterator
|
import matplotlib.pyplot as plt
|
||||||
|
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
|
||||||
|
import torch.nn as nn
|
||||||
|
from sklearn.utils.class_weight import compute_class_weight
|
||||||
|
import numpy as np
|
||||||
|
from model import BERT_Arch
|
||||||
|
|
||||||
train_input_path = "dev-0/in.tsv"
|
train_input_path = "train/in.tsv"
|
||||||
train_target_path = "dev-0/expected.tsv"
|
train_target_path = "train/expected.tsv"
|
||||||
|
|
||||||
train_input = pd.read_csv(train_input_path, sep="\t")[:100]
|
train_input = pd.read_csv(train_input_path, sep="\t")
|
||||||
train_input.columns=["text", "d"]
|
train_input.columns=["text", "d"]
|
||||||
train_target = pd.read_csv(train_target_path, sep="\t")[:100]
|
train_target = pd.read_csv(train_target_path, sep="\t")
|
||||||
|
|
||||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||||
device = torch.device("cuda")
|
device = torch.device("cuda")
|
||||||
|
|
||||||
MAX_SEQ_LEN = 128
|
|
||||||
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
|
|
||||||
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
|
|
||||||
|
|
||||||
# label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
|
# seq_len = [len(i.split()) for i in train_input["text"]]
|
||||||
# text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=True, include_lengths=False, batch_first=True,
|
|
||||||
# fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)
|
|
||||||
|
|
||||||
# fields = [('label', label_field), ('text', text_field),]
|
# pd.Series(seq_len).hist(bins = 30)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
# valid_iter = BucketIterator(train_input["text"], batch_size=16, sort_key=lambda x: len(x.text),
|
bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||||
# device=device, train=True, sort=True, sort_within_batch=True)
|
|
||||||
|
|
||||||
class BERT(torch.nn.Module):
|
|
||||||
|
|
||||||
def __init__(self):
|
tokens_train = tokenizer.batch_encode_plus(
|
||||||
super(BERT, self).__init__()
|
train_input["text"],
|
||||||
|
max_length = 25,
|
||||||
|
padding='max_length',
|
||||||
|
truncation=True
|
||||||
|
)
|
||||||
|
|
||||||
options_name = "bert-base-uncased"
|
train_seq = torch.tensor(tokens_train['input_ids'])
|
||||||
self.encoder = BertForSequenceClassification.from_pretrained(options_name)
|
train_mask = torch.tensor(tokens_train['attention_mask'])
|
||||||
|
train_y = torch.tensor(train_target.to_numpy())
|
||||||
|
|
||||||
def forward(self, text, label):
|
#define a batch size
|
||||||
loss, text_fea = self.encoder(text, labels=label)[:2]
|
batch_size = 32
|
||||||
|
|
||||||
return loss, text_fea
|
# wrap tensors
|
||||||
|
train_data = TensorDataset(train_seq, train_mask, train_y)
|
||||||
|
|
||||||
def save_checkpoint(save_path, model, valid_loss):
|
# sampler for sampling the data during training
|
||||||
|
train_sampler = RandomSampler(train_data)
|
||||||
|
|
||||||
if save_path == None:
|
# dataLoader for train set
|
||||||
return
|
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
|
||||||
|
|
||||||
|
for param in bert.parameters():
|
||||||
|
param.requires_grad = False
|
||||||
|
|
||||||
|
model = BERT_Arch(bert)
|
||||||
|
model = model.to(device)
|
||||||
|
# model.cuda(0)
|
||||||
|
|
||||||
|
optimizer = AdamW(model.parameters(), lr = 1e-5)
|
||||||
|
|
||||||
|
class_weights = compute_class_weight('balanced', np.unique(train_target.to_numpy()), train_target['1'])
|
||||||
|
weights= torch.tensor(class_weights,dtype=torch.float)
|
||||||
|
weights = weights.to(device)
|
||||||
|
|
||||||
|
# define the loss function
|
||||||
|
cross_entropy = nn.NLLLoss(weight=weights)
|
||||||
|
|
||||||
|
# number of training epochs
|
||||||
|
epochs = 10
|
||||||
|
|
||||||
|
def train():
|
||||||
|
|
||||||
|
model.train()
|
||||||
|
|
||||||
|
total_loss, total_accuracy = 0, 0
|
||||||
|
|
||||||
|
# empty list to save model predictions
|
||||||
|
total_preds=[]
|
||||||
|
|
||||||
|
# iterate over batches
|
||||||
|
for step,batch in enumerate(train_dataloader):
|
||||||
|
|
||||||
state_dict = {'model_state_dict': model.state_dict(),
|
# progress update after every 50 batches.
|
||||||
'valid_loss': valid_loss}
|
if step % 50 == 0 and not step == 0:
|
||||||
|
print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
|
||||||
|
|
||||||
|
# push the batch to gpu
|
||||||
|
batch = [r.to(device) for r in batch]
|
||||||
|
|
||||||
|
sent_id, mask, labels = batch
|
||||||
|
|
||||||
|
# clear previously calculated gradients
|
||||||
|
model.zero_grad()
|
||||||
|
|
||||||
|
# get model predictions for the current batch
|
||||||
|
preds = model(sent_id, mask)
|
||||||
|
|
||||||
|
# compute the loss between actual and predicted values
|
||||||
|
labels = torch.tensor([x[0] for x in labels]).to(device)
|
||||||
|
loss = cross_entropy(preds, labels)
|
||||||
|
|
||||||
|
# add on to the total loss
|
||||||
|
total_loss = total_loss + loss.item()
|
||||||
|
|
||||||
|
# backward pass to calculate the gradients
|
||||||
|
loss.backward()
|
||||||
|
|
||||||
|
# clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
|
||||||
|
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||||
|
|
||||||
|
# update parameters
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# model predictions are stored on GPU. So, push it to CPU
|
||||||
|
preds=preds.detach().cpu().numpy()
|
||||||
|
|
||||||
|
# append the model predictions
|
||||||
|
total_preds.append(preds)
|
||||||
|
|
||||||
|
# compute the training loss of the epoch
|
||||||
|
avg_loss = total_loss / len(train_dataloader)
|
||||||
|
|
||||||
|
# predictions are in the form of (no. of batches, size of batch, no. of classes).
|
||||||
|
# reshape the predictions in form of (number of samples, no. of classes)
|
||||||
|
total_preds = np.concatenate(total_preds, axis=0)
|
||||||
|
|
||||||
|
#returns the loss and predictions
|
||||||
|
return avg_loss, total_preds
|
||||||
|
|
||||||
|
def evaluate():
|
||||||
|
|
||||||
|
print("\nEvaluating...")
|
||||||
|
|
||||||
|
# deactivate dropout layers
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
total_loss, total_accuracy = 0, 0
|
||||||
|
|
||||||
|
# empty list to save the model predictions
|
||||||
|
total_preds = []
|
||||||
|
|
||||||
|
# iterate over batches
|
||||||
|
for step,batch in enumerate(train_dataloader):
|
||||||
|
|
||||||
torch.save(state_dict, save_path)
|
# Progress update every 50 batches.
|
||||||
print(f'Model saved to ==> {save_path}')
|
if step % 50 == 0 and not step == 0:
|
||||||
|
|
||||||
|
# Calculate elapsed time in minutes.
|
||||||
|
|
||||||
|
# Report progress.
|
||||||
|
print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
|
||||||
|
|
||||||
def load_checkpoint(load_path, model):
|
# push the batch to gpu
|
||||||
|
batch = [t.to(device) for t in batch]
|
||||||
|
|
||||||
|
sent_id, mask, labels = batch
|
||||||
|
|
||||||
|
# deactivate autograd
|
||||||
|
with torch.no_grad():
|
||||||
|
|
||||||
|
# model predictions
|
||||||
|
preds = model(sent_id, mask)
|
||||||
|
|
||||||
|
# compute the validation loss between actual and predicted values
|
||||||
|
labels = torch.tensor([x[0] for x in labels]).to(device)
|
||||||
|
loss = cross_entropy(preds,labels)
|
||||||
|
|
||||||
|
total_loss = total_loss + loss.item()
|
||||||
|
|
||||||
|
preds = preds.detach().cpu().numpy()
|
||||||
|
|
||||||
|
total_preds.append(preds)
|
||||||
|
|
||||||
|
# compute the validation loss of the epoch
|
||||||
|
avg_loss = total_loss / len(train_dataloader)
|
||||||
|
|
||||||
|
# reshape the predictions in form of (number of samples, no. of classes)
|
||||||
|
total_preds = np.concatenate(total_preds, axis=0)
|
||||||
|
|
||||||
|
return avg_loss, total_preds
|
||||||
|
|
||||||
|
# avg_loss, total_preds = train()
|
||||||
|
# set initial loss to infinite
|
||||||
|
best_valid_loss = float('inf')
|
||||||
|
|
||||||
|
# empty lists to store training and validation loss of each epoch
|
||||||
|
train_losses=[]
|
||||||
|
valid_losses=[]
|
||||||
|
|
||||||
|
print("Started training!")
|
||||||
|
#for each epoch
|
||||||
|
for epoch in range(epochs):
|
||||||
|
|
||||||
|
print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
|
||||||
|
|
||||||
if load_path==None:
|
#train model
|
||||||
return
|
train_loss, _ = train()
|
||||||
|
|
||||||
state_dict = torch.load(load_path, map_location=device)
|
#evaluate model
|
||||||
print(f'Model loaded from <== {load_path}')
|
valid_loss, _ = evaluate()
|
||||||
|
|
||||||
model.load_state_dict(state_dict['model_state_dict'])
|
#save the best model
|
||||||
return state_dict['valid_loss']
|
if valid_loss < best_valid_loss:
|
||||||
|
best_valid_loss = valid_loss
|
||||||
|
torch.save(model.state_dict(), 'saved_weights.pt')
|
||||||
def save_metrics(save_path, train_loss_list, valid_loss_list, global_steps_list):
|
|
||||||
|
|
||||||
if save_path == None:
|
|
||||||
return
|
|
||||||
|
|
||||||
state_dict = {'train_loss_list': train_loss_list,
|
# append training and validation loss
|
||||||
'valid_loss_list': valid_loss_list,
|
train_losses.append(train_loss)
|
||||||
'global_steps_list': global_steps_list}
|
valid_losses.append(valid_loss)
|
||||||
|
|
||||||
torch.save(state_dict, save_path)
|
print(f'\nTraining Loss: {train_loss:.3f}')
|
||||||
print(f'Model saved to ==> {save_path}')
|
print(f'Validation Loss: {valid_loss:.3f}')
|
||||||
|
|
||||||
|
print("Finished !!!")
|
||||||
def load_metrics(load_path):
|
|
||||||
|
|
||||||
if load_path==None:
|
|
||||||
return
|
|
||||||
|
|
||||||
state_dict = torch.load(load_path, map_location=device)
|
|
||||||
print(f'Model loaded from <== {load_path}')
|
|
||||||
|
|
||||||
return state_dict['train_loss_list'], state_dict['valid_loss_list'], state_dict['global_steps_list']
|
|
||||||
|
|
||||||
def train(model,
|
|
||||||
optimizer,
|
|
||||||
criterion = torch.nn.BCELoss(),
|
|
||||||
train_data = train_input['text'],
|
|
||||||
train_target = train_target,
|
|
||||||
num_epochs = 5,
|
|
||||||
eval_every = len(train_input) // 2,
|
|
||||||
file_path = "./",
|
|
||||||
best_valid_loss = float("Inf")):
|
|
||||||
|
|
||||||
# initialize running values
|
|
||||||
running_loss = 0.0
|
|
||||||
valid_running_loss = 0.0
|
|
||||||
global_step = 0
|
|
||||||
train_loss_list = []
|
|
||||||
valid_loss_list = []
|
|
||||||
global_steps_list = []
|
|
||||||
|
|
||||||
# training loop
|
|
||||||
model.train()
|
|
||||||
for epoch in range(num_epochs):
|
|
||||||
for text, label in zip(train_data, train_target):
|
|
||||||
output = model(text, label)
|
|
||||||
loss, _ = output
|
|
||||||
|
|
||||||
optimizer.zero_grad()
|
|
||||||
loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
|
|
||||||
# update running values
|
|
||||||
running_loss += loss.item()
|
|
||||||
global_step += 1
|
|
||||||
|
|
||||||
# evaluation step
|
|
||||||
if global_step % eval_every == 0:
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
# evaluation
|
|
||||||
average_train_loss = running_loss / eval_every
|
|
||||||
average_valid_loss = valid_running_loss / len(train_data)
|
|
||||||
train_loss_list.append(average_train_loss)
|
|
||||||
valid_loss_list.append(average_valid_loss)
|
|
||||||
global_steps_list.append(global_step)
|
|
||||||
|
|
||||||
# resetting running values
|
|
||||||
running_loss = 0.0
|
|
||||||
valid_running_loss = 0.0
|
|
||||||
model.train()
|
|
||||||
|
|
||||||
# print progress
|
|
||||||
print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
|
|
||||||
.format(epoch+1, num_epochs, global_step, num_epochs*len(train_data),
|
|
||||||
average_train_loss, average_valid_loss))
|
|
||||||
|
|
||||||
# checkpoint
|
|
||||||
if best_valid_loss > average_valid_loss:
|
|
||||||
best_valid_loss = average_valid_loss
|
|
||||||
save_checkpoint(file_path + '/' + 'model.pt', model, best_valid_loss)
|
|
||||||
save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
|
|
||||||
|
|
||||||
save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
|
|
||||||
print('Finished Training!')
|
|
||||||
|
|
||||||
model = BERT().to(device)
|
|
||||||
model.cuda()
|
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
|
|
||||||
|
|
||||||
train(model=model, optimizer=optimizer)
|
|
44
model.py
Normal file
44
model.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
class BERT_Arch(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, bert):
|
||||||
|
|
||||||
|
super(BERT_Arch, self).__init__()
|
||||||
|
|
||||||
|
self.bert = bert
|
||||||
|
|
||||||
|
# dropout layer
|
||||||
|
self.dropout = nn.Dropout(0.1)
|
||||||
|
|
||||||
|
# relu activation function
|
||||||
|
self.relu = nn.ReLU()
|
||||||
|
|
||||||
|
# dense layer 1
|
||||||
|
self.fc1 = nn.Linear(2,512)
|
||||||
|
|
||||||
|
# dense layer 2 (Output layer)
|
||||||
|
self.fc2 = nn.Linear(512,2)
|
||||||
|
|
||||||
|
#softmax activation function
|
||||||
|
self.softmax = nn.LogSoftmax(dim=1)
|
||||||
|
|
||||||
|
#define the forward pass
|
||||||
|
def forward(self, sent_id, mask):
|
||||||
|
|
||||||
|
#pass the inputs to the model
|
||||||
|
senence_classifier_output = self.bert(sent_id, attention_mask=mask)
|
||||||
|
x = senence_classifier_output.logits.float()
|
||||||
|
x = self.fc1(x)
|
||||||
|
|
||||||
|
x = self.relu(x)
|
||||||
|
|
||||||
|
x = self.dropout(x)
|
||||||
|
|
||||||
|
# output layer
|
||||||
|
x = self.fc2(x)
|
||||||
|
|
||||||
|
# apply softmax activation
|
||||||
|
x = self.softmax(x)
|
||||||
|
|
||||||
|
return x
|
Loading…
Reference in New Issue
Block a user