hf roberta (linear top layer by hand instead of hf)

This commit is contained in:
Jakub Pokrywka 2021-12-24 15:01:30 +01:00
parent fb6215f9c1
commit c85f1611e6
5 changed files with 297460 additions and 297434 deletions

298228
dev-0/out.tsv

File diff suppressed because it is too large Load Diff

View File

@ -4,12 +4,22 @@ from datasets import load_dataset
from transformers import AutoTokenizer, RobertaModel, RobertaTokenizer from transformers import AutoTokenizer, RobertaModel, RobertaTokenizer
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification from transformers import AutoModelForSequenceClassification
#from transformers import AdamW
from torch.optim import Adam from torch.optim import Adam
from transformers import get_scheduler from transformers import get_scheduler
import torch import torch
from tqdm.auto import tqdm from tqdm.auto import tqdm
import os
import pickle
from regressor_head import RegressorHead
try:
os.mkdir('roberta_year_prediction')
except Exception:
pass
def pickle_model_save(name):
with open(f'roberta_year_prediction/{name}', 'wb') as f:
pickle.dump(model,f)
if TEST: if TEST:
STEPS_EVAL = 10 STEPS_EVAL = 10
@ -29,9 +39,13 @@ train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE
eval_dataloader_small = DataLoader(eval_dataset_small, batch_size=BATCH_SIZE) eval_dataloader_small = DataLoader(eval_dataset_small, batch_size=BATCH_SIZE)
eval_dataloader_full = DataLoader(eval_dataset_full, batch_size=BATCH_SIZE) eval_dataloader_full = DataLoader(eval_dataset_full, batch_size=BATCH_SIZE)
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=1)
optimizer = Adam(model.parameters(), lr=LR)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = RobertaModel.from_pretrained('roberta-base')
model.regressor_head = RegressorHead(768).to('cuda')
model.to(device)
optimizer = Adam(model.parameters(), lr=LR)
num_training_steps = NUM_EPOCHS * len(train_dataloader) num_training_steps = NUM_EPOCHS * len(train_dataloader)
#lr_scheduler = get_scheduler( #lr_scheduler = get_scheduler(
@ -42,10 +56,6 @@ num_training_steps = NUM_EPOCHS * len(train_dataloader)
#) #)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
progress_bar = tqdm(range(num_training_steps)) progress_bar = tqdm(range(num_training_steps))
model.train() model.train()
@ -55,15 +65,15 @@ model.to(device)
def transform_batch(batch): def transform_batch(batch):
batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device) batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device)
batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device) batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device)
batch['labels'] = batch['year_scaled'].to(device).float() labels = batch['year_scaled'].to(device).float()
batch['labels'].to(device)
batch['input_ids'].to(device) batch['input_ids'].to(device)
batch['attention_mask'].to(device) batch['attention_mask'].to(device)
for c in set(batch.keys()) - {'input_ids', 'attention_mask', 'labels'}: for c in set(batch.keys()) - {'input_ids', 'attention_mask'}:
del batch[c] del batch[c]
return batch
return batch, labels
def eval(full = False): def eval(full = False):
@ -74,12 +84,12 @@ def eval(full = False):
items_passed = 0 items_passed = 0
for i, batch in enumerate(dataloader): for i, batch in enumerate(dataloader):
items_passed += len(batch) items_passed += len(batch)
batch = transform_batch(batch)
labels = batch['labels'] batch, labels = transform_batch(batch)
del batch['labels'] outputs = model(**batch)[0]
outputs = model(**batch) outputs = model.regressor_head(outputs)
o = soft_clip(outputs['logits']).squeeze()
loss = criterion(o, labels) loss = criterion(outputs.squeeze(), labels)
eval_loss += loss.item() eval_loss += loss.item()
eval_loss = (eval_loss / items_passed) eval_loss = (eval_loss / items_passed)
print(f'eval loss full={full}: {eval_loss:.5f}', end = '\n') print(f'eval loss full={full}: {eval_loss:.5f}', end = '\n')
@ -88,11 +98,6 @@ def eval(full = False):
criterion = torch.nn.MSELoss(reduction='sum').to(device) criterion = torch.nn.MSELoss(reduction='sum').to(device)
lrelu = torch.nn.LeakyReLU(0.1)
def soft_clip(t):
t = lrelu(t)
t = -lrelu(-t + 1 ) + 1
return t
best_eval_loss = 9999 best_eval_loss = 9999
epochs_without_progress = 0 epochs_without_progress = 0
@ -101,12 +106,12 @@ for epoch in range(NUM_EPOCHS):
items_passed = 0 items_passed = 0
for i, batch in enumerate(train_dataloader): for i, batch in enumerate(train_dataloader):
items_passed += len(batch) items_passed += len(batch)
batch = transform_batch(batch)
labels = batch['labels'] batch, labels = transform_batch(batch)
del batch['labels'] outputs = model(**batch)[0]
outputs = model(**batch) outputs = model.regressor_head(outputs)
o = soft_clip(outputs['logits']).squeeze()
loss = criterion(o, labels) loss = criterion(outputs.squeeze(), labels)
loss.backward() loss.backward()
train_loss += loss.item() train_loss += loss.item()
progress_bar.update(1) progress_bar.update(1)
@ -123,11 +128,13 @@ for epoch in range(NUM_EPOCHS):
eval(full = False) eval(full = False)
eval_loss = eval(full=True) eval_loss = eval(full=True)
model.save_pretrained(f'roberta_year_prediction/epoch_{epoch}_loss{eval_loss:.5f}')
model.save_pretrained(f'roberta_year_prediction/epoch_last')
pickle_model_save(f'epoch_{epoch}')
pickle_model_save(f'epoch_last')
if eval_loss < best_eval_loss: if eval_loss < best_eval_loss:
model.save_pretrained(f'roberta_year_prediction/epoch_best') pickle_model_save(f'epoch_best')
print('\nsaving best model') print('\nsaving best model')
best_eval_loss = eval_loss best_eval_loss = eval_loss
else: else:

View File

@ -1,6 +1,7 @@
import pickle import pickle
import torch import torch
from transformers import AutoModelForSequenceClassification from transformers import AutoTokenizer, RobertaModel, RobertaTokenizer
from regressor_head import RegressorHead
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@ -11,12 +12,14 @@ with open('test_dataset_A.pickle','rb') as f_p:
test_dataset = pickle.load(f_p) test_dataset = pickle.load(f_p)
device = 'cuda' device = 'cuda'
model = AutoModelForSequenceClassification.from_pretrained('./roberta_year_prediction/epoch_best') with open('./roberta_year_prediction/epoch_best', 'rb') as f:
model = pickle.load(f)
model.eval() model.eval()
model.to(device) model.to(device)
lrelu = torch.nn.LeakyReLU(0.0) lrelu = torch.nn.LeakyReLU(0.0)
def soft_clip(t): def hard_clip(t):
t = lrelu(t) t = lrelu(t)
t = -lrelu(-t + 1 ) + 1 t = -lrelu(-t + 1 ) + 1
return t return t
@ -25,7 +28,7 @@ with open('scalers.pickle', 'rb') as f_scaler:
scalers = pickle.load(f_scaler) scalers = pickle.load(f_scaler)
def predict(dataset, out_f): def predict(dataset, out_f):
eval_dataloader = DataLoader(dataset, batch_size=50) eval_dataloader = DataLoader(dataset, batch_size=20)
outputs = [] outputs = []
progress_bar = tqdm(range(len(eval_dataloader))) progress_bar = tqdm(range(len(eval_dataloader)))
@ -33,15 +36,18 @@ def predict(dataset, out_f):
for batch in eval_dataloader: for batch in eval_dataloader:
batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device) batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device)
batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device) batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device)
batch['labels'] = batch['year_scaled'].to(device).float()
batch['labels'].to(device)
batch['input_ids'].to(device) batch['input_ids'].to(device)
batch['attention_mask'].to(device) batch['attention_mask'].to(device)
for c in set(batch.keys()) - {'input_ids', 'attention_mask', 'labels'}: for c in set(batch.keys()) - {'input_ids', 'attention_mask', 'labels'}:
del batch[c] del batch[c]
outputs.extend(soft_clip(model(**batch).logits).tolist())
o = model(**batch)[0]
o = model.regressor_head(o)
o = hard_clip(o)
outputs.extend(o.tolist())
progress_bar.update(1) progress_bar.update(1)
outputs_transformed = scalers['year'].inverse_transform(outputs) outputs_transformed = scalers['year'].inverse_transform(outputs)

View File

@ -0,0 +1,13 @@
import torch
class RegressorHead(torch.nn.Module):
def __init__(self, in_dim):
super(RegressorHead, self).__init__()
self.linear = torch.nn.Linear(in_dim, 1)
self.m = torch.nn.LeakyReLU(0.1)
def forward(self, x):
x = x.mean(1)
x = self.linear(x)
x = self.m(x)
x = - self.m(-x + 1 ) +1
return x

File diff suppressed because it is too large Load Diff