progress
This commit is contained in:
parent
dbadedfc1c
commit
023a4e4361
56
generate.py
Normal file
56
generate.py
Normal file
@ -0,0 +1,56 @@
|
||||
import pandas as pd
|
||||
from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
|
||||
import torch.nn as nn
|
||||
from sklearn.utils.class_weight import compute_class_weight
|
||||
import numpy as np
|
||||
from model import BERT_Arch
|
||||
from sklearn.metrics import classification_report
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
path = 'saved_weights.pt'
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
device = torch.device("cuda")
|
||||
|
||||
bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
|
||||
model = BERT_Arch(bert)
|
||||
model.load_state_dict(torch.load(path))
|
||||
model.to(device)
|
||||
|
||||
test_data = pd.read_csv("dev-0/in.tsv", sep="\t")
|
||||
test_data.columns = ["text", "d"]
|
||||
|
||||
test_target = pd.read_csv("dev-0/expected.tsv", sep="\t")
|
||||
|
||||
tokens_train = tokenizer.batch_encode_plus(
|
||||
test_data["text"],
|
||||
max_length = 25,
|
||||
padding='max_length',
|
||||
truncation=True
|
||||
)
|
||||
|
||||
test_seq = torch.tensor(tokens_train['input_ids'])
|
||||
test_mask = torch.tensor(tokens_train['attention_mask'])
|
||||
|
||||
#define a batch size
|
||||
batch_size = 32
|
||||
|
||||
# wrap tensors
|
||||
test_data = TensorDataset(test_seq, test_mask)
|
||||
|
||||
# sampler for sampling the data during training
|
||||
test_sampler = RandomSampler(test_data)
|
||||
|
||||
# dataLoader for train set
|
||||
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
|
||||
|
||||
with torch.no_grad():
|
||||
preds = model(test_seq.to(device), test_mask.to(device))
|
||||
preds = preds.detach().cpu().numpy()
|
||||
preds = np.argmax(preds, axis = 1)
|
||||
|
||||
print(classification_report(test_target['0'], preds))
|
||||
print(accuracy_score(test_target['0'], preds))
|
314
main.py
314
main.py
@ -1,155 +1,215 @@
|
||||
import pandas as pd
|
||||
from transformers import BertTokenizer, BertForSequenceClassification
|
||||
from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification
|
||||
import torch
|
||||
# from torchtext.data import BucketIterator, Iterator
|
||||
import matplotlib.pyplot as plt
|
||||
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
|
||||
import torch.nn as nn
|
||||
from sklearn.utils.class_weight import compute_class_weight
|
||||
import numpy as np
|
||||
from model import BERT_Arch
|
||||
|
||||
train_input_path = "dev-0/in.tsv"
|
||||
train_target_path = "dev-0/expected.tsv"
|
||||
train_input_path = "train/in.tsv"
|
||||
train_target_path = "train/expected.tsv"
|
||||
|
||||
train_input = pd.read_csv(train_input_path, sep="\t")[:100]
|
||||
train_input = pd.read_csv(train_input_path, sep="\t")
|
||||
train_input.columns=["text", "d"]
|
||||
train_target = pd.read_csv(train_target_path, sep="\t")[:100]
|
||||
train_target = pd.read_csv(train_target_path, sep="\t")
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
device = torch.device("cuda")
|
||||
|
||||
MAX_SEQ_LEN = 128
|
||||
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
|
||||
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
|
||||
|
||||
# label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
|
||||
# text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=True, include_lengths=False, batch_first=True,
|
||||
# fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)
|
||||
# seq_len = [len(i.split()) for i in train_input["text"]]
|
||||
|
||||
# fields = [('label', label_field), ('text', text_field),]
|
||||
# pd.Series(seq_len).hist(bins = 30)
|
||||
# plt.show()
|
||||
|
||||
# valid_iter = BucketIterator(train_input["text"], batch_size=16, sort_key=lambda x: len(x.text),
|
||||
# device=device, train=True, sort=True, sort_within_batch=True)
|
||||
bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
|
||||
class BERT(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BERT, self).__init__()
|
||||
tokens_train = tokenizer.batch_encode_plus(
|
||||
train_input["text"],
|
||||
max_length = 25,
|
||||
padding='max_length',
|
||||
truncation=True
|
||||
)
|
||||
|
||||
options_name = "bert-base-uncased"
|
||||
self.encoder = BertForSequenceClassification.from_pretrained(options_name)
|
||||
train_seq = torch.tensor(tokens_train['input_ids'])
|
||||
train_mask = torch.tensor(tokens_train['attention_mask'])
|
||||
train_y = torch.tensor(train_target.to_numpy())
|
||||
|
||||
def forward(self, text, label):
|
||||
loss, text_fea = self.encoder(text, labels=label)[:2]
|
||||
#define a batch size
|
||||
batch_size = 32
|
||||
|
||||
return loss, text_fea
|
||||
# wrap tensors
|
||||
train_data = TensorDataset(train_seq, train_mask, train_y)
|
||||
|
||||
def save_checkpoint(save_path, model, valid_loss):
|
||||
# sampler for sampling the data during training
|
||||
train_sampler = RandomSampler(train_data)
|
||||
|
||||
if save_path == None:
|
||||
return
|
||||
# dataLoader for train set
|
||||
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
|
||||
|
||||
for param in bert.parameters():
|
||||
param.requires_grad = False
|
||||
|
||||
model = BERT_Arch(bert)
|
||||
model = model.to(device)
|
||||
# model.cuda(0)
|
||||
|
||||
optimizer = AdamW(model.parameters(), lr = 1e-5)
|
||||
|
||||
class_weights = compute_class_weight('balanced', np.unique(train_target.to_numpy()), train_target['1'])
|
||||
weights= torch.tensor(class_weights,dtype=torch.float)
|
||||
weights = weights.to(device)
|
||||
|
||||
# define the loss function
|
||||
cross_entropy = nn.NLLLoss(weight=weights)
|
||||
|
||||
# number of training epochs
|
||||
epochs = 10
|
||||
|
||||
def train():
|
||||
|
||||
model.train()
|
||||
|
||||
total_loss, total_accuracy = 0, 0
|
||||
|
||||
# empty list to save model predictions
|
||||
total_preds=[]
|
||||
|
||||
# iterate over batches
|
||||
for step,batch in enumerate(train_dataloader):
|
||||
|
||||
state_dict = {'model_state_dict': model.state_dict(),
|
||||
'valid_loss': valid_loss}
|
||||
# progress update after every 50 batches.
|
||||
if step % 50 == 0 and not step == 0:
|
||||
print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
|
||||
|
||||
# push the batch to gpu
|
||||
batch = [r.to(device) for r in batch]
|
||||
|
||||
sent_id, mask, labels = batch
|
||||
|
||||
# clear previously calculated gradients
|
||||
model.zero_grad()
|
||||
|
||||
# get model predictions for the current batch
|
||||
preds = model(sent_id, mask)
|
||||
|
||||
# compute the loss between actual and predicted values
|
||||
labels = torch.tensor([x[0] for x in labels]).to(device)
|
||||
loss = cross_entropy(preds, labels)
|
||||
|
||||
# add on to the total loss
|
||||
total_loss = total_loss + loss.item()
|
||||
|
||||
# backward pass to calculate the gradients
|
||||
loss.backward()
|
||||
|
||||
# clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||
|
||||
# update parameters
|
||||
optimizer.step()
|
||||
|
||||
# model predictions are stored on GPU. So, push it to CPU
|
||||
preds=preds.detach().cpu().numpy()
|
||||
|
||||
# append the model predictions
|
||||
total_preds.append(preds)
|
||||
|
||||
# compute the training loss of the epoch
|
||||
avg_loss = total_loss / len(train_dataloader)
|
||||
|
||||
# predictions are in the form of (no. of batches, size of batch, no. of classes).
|
||||
# reshape the predictions in form of (number of samples, no. of classes)
|
||||
total_preds = np.concatenate(total_preds, axis=0)
|
||||
|
||||
#returns the loss and predictions
|
||||
return avg_loss, total_preds
|
||||
|
||||
def evaluate():
|
||||
|
||||
print("\nEvaluating...")
|
||||
|
||||
# deactivate dropout layers
|
||||
model.eval()
|
||||
|
||||
total_loss, total_accuracy = 0, 0
|
||||
|
||||
# empty list to save the model predictions
|
||||
total_preds = []
|
||||
|
||||
# iterate over batches
|
||||
for step,batch in enumerate(train_dataloader):
|
||||
|
||||
torch.save(state_dict, save_path)
|
||||
print(f'Model saved to ==> {save_path}')
|
||||
# Progress update every 50 batches.
|
||||
if step % 50 == 0 and not step == 0:
|
||||
|
||||
# Calculate elapsed time in minutes.
|
||||
|
||||
# Report progress.
|
||||
print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
|
||||
|
||||
def load_checkpoint(load_path, model):
|
||||
# push the batch to gpu
|
||||
batch = [t.to(device) for t in batch]
|
||||
|
||||
sent_id, mask, labels = batch
|
||||
|
||||
# deactivate autograd
|
||||
with torch.no_grad():
|
||||
|
||||
# model predictions
|
||||
preds = model(sent_id, mask)
|
||||
|
||||
# compute the validation loss between actual and predicted values
|
||||
labels = torch.tensor([x[0] for x in labels]).to(device)
|
||||
loss = cross_entropy(preds,labels)
|
||||
|
||||
total_loss = total_loss + loss.item()
|
||||
|
||||
preds = preds.detach().cpu().numpy()
|
||||
|
||||
total_preds.append(preds)
|
||||
|
||||
# compute the validation loss of the epoch
|
||||
avg_loss = total_loss / len(train_dataloader)
|
||||
|
||||
# reshape the predictions in form of (number of samples, no. of classes)
|
||||
total_preds = np.concatenate(total_preds, axis=0)
|
||||
|
||||
return avg_loss, total_preds
|
||||
|
||||
# avg_loss, total_preds = train()
|
||||
# set initial loss to infinite
|
||||
best_valid_loss = float('inf')
|
||||
|
||||
# empty lists to store training and validation loss of each epoch
|
||||
train_losses=[]
|
||||
valid_losses=[]
|
||||
|
||||
print("Started training!")
|
||||
#for each epoch
|
||||
for epoch in range(epochs):
|
||||
|
||||
print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
|
||||
|
||||
if load_path==None:
|
||||
return
|
||||
#train model
|
||||
train_loss, _ = train()
|
||||
|
||||
state_dict = torch.load(load_path, map_location=device)
|
||||
print(f'Model loaded from <== {load_path}')
|
||||
#evaluate model
|
||||
valid_loss, _ = evaluate()
|
||||
|
||||
model.load_state_dict(state_dict['model_state_dict'])
|
||||
return state_dict['valid_loss']
|
||||
|
||||
|
||||
def save_metrics(save_path, train_loss_list, valid_loss_list, global_steps_list):
|
||||
|
||||
if save_path == None:
|
||||
return
|
||||
#save the best model
|
||||
if valid_loss < best_valid_loss:
|
||||
best_valid_loss = valid_loss
|
||||
torch.save(model.state_dict(), 'saved_weights.pt')
|
||||
|
||||
state_dict = {'train_loss_list': train_loss_list,
|
||||
'valid_loss_list': valid_loss_list,
|
||||
'global_steps_list': global_steps_list}
|
||||
# append training and validation loss
|
||||
train_losses.append(train_loss)
|
||||
valid_losses.append(valid_loss)
|
||||
|
||||
torch.save(state_dict, save_path)
|
||||
print(f'Model saved to ==> {save_path}')
|
||||
print(f'\nTraining Loss: {train_loss:.3f}')
|
||||
print(f'Validation Loss: {valid_loss:.3f}')
|
||||
|
||||
|
||||
def load_metrics(load_path):
|
||||
|
||||
if load_path==None:
|
||||
return
|
||||
|
||||
state_dict = torch.load(load_path, map_location=device)
|
||||
print(f'Model loaded from <== {load_path}')
|
||||
|
||||
return state_dict['train_loss_list'], state_dict['valid_loss_list'], state_dict['global_steps_list']
|
||||
|
||||
def train(model,
|
||||
optimizer,
|
||||
criterion = torch.nn.BCELoss(),
|
||||
train_data = train_input['text'],
|
||||
train_target = train_target,
|
||||
num_epochs = 5,
|
||||
eval_every = len(train_input) // 2,
|
||||
file_path = "./",
|
||||
best_valid_loss = float("Inf")):
|
||||
|
||||
# initialize running values
|
||||
running_loss = 0.0
|
||||
valid_running_loss = 0.0
|
||||
global_step = 0
|
||||
train_loss_list = []
|
||||
valid_loss_list = []
|
||||
global_steps_list = []
|
||||
|
||||
# training loop
|
||||
model.train()
|
||||
for epoch in range(num_epochs):
|
||||
for text, label in zip(train_data, train_target):
|
||||
output = model(text, label)
|
||||
loss, _ = output
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# update running values
|
||||
running_loss += loss.item()
|
||||
global_step += 1
|
||||
|
||||
# evaluation step
|
||||
if global_step % eval_every == 0:
|
||||
model.eval()
|
||||
|
||||
# evaluation
|
||||
average_train_loss = running_loss / eval_every
|
||||
average_valid_loss = valid_running_loss / len(train_data)
|
||||
train_loss_list.append(average_train_loss)
|
||||
valid_loss_list.append(average_valid_loss)
|
||||
global_steps_list.append(global_step)
|
||||
|
||||
# resetting running values
|
||||
running_loss = 0.0
|
||||
valid_running_loss = 0.0
|
||||
model.train()
|
||||
|
||||
# print progress
|
||||
print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
|
||||
.format(epoch+1, num_epochs, global_step, num_epochs*len(train_data),
|
||||
average_train_loss, average_valid_loss))
|
||||
|
||||
# checkpoint
|
||||
if best_valid_loss > average_valid_loss:
|
||||
best_valid_loss = average_valid_loss
|
||||
save_checkpoint(file_path + '/' + 'model.pt', model, best_valid_loss)
|
||||
save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
|
||||
|
||||
save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
|
||||
print('Finished Training!')
|
||||
|
||||
model = BERT().to(device)
|
||||
model.cuda()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
|
||||
|
||||
train(model=model, optimizer=optimizer)
|
||||
print("Finished !!!")
|
44
model.py
Normal file
44
model.py
Normal file
@ -0,0 +1,44 @@
|
||||
import torch.nn as nn
|
||||
|
||||
class BERT_Arch(nn.Module):
|
||||
|
||||
def __init__(self, bert):
|
||||
|
||||
super(BERT_Arch, self).__init__()
|
||||
|
||||
self.bert = bert
|
||||
|
||||
# dropout layer
|
||||
self.dropout = nn.Dropout(0.1)
|
||||
|
||||
# relu activation function
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
# dense layer 1
|
||||
self.fc1 = nn.Linear(2,512)
|
||||
|
||||
# dense layer 2 (Output layer)
|
||||
self.fc2 = nn.Linear(512,2)
|
||||
|
||||
#softmax activation function
|
||||
self.softmax = nn.LogSoftmax(dim=1)
|
||||
|
||||
#define the forward pass
|
||||
def forward(self, sent_id, mask):
|
||||
|
||||
#pass the inputs to the model
|
||||
senence_classifier_output = self.bert(sent_id, attention_mask=mask)
|
||||
x = senence_classifier_output.logits.float()
|
||||
x = self.fc1(x)
|
||||
|
||||
x = self.relu(x)
|
||||
|
||||
x = self.dropout(x)
|
||||
|
||||
# output layer
|
||||
x = self.fc2(x)
|
||||
|
||||
# apply softmax activation
|
||||
x = self.softmax(x)
|
||||
|
||||
return x
|
Loading…
Reference in New Issue
Block a user