hf roberta (linear top layer by hand instead of hf)

This commit is contained in:
Jakub Pokrywka 2021-12-24 15:01:30 +01:00
parent fb6215f9c1
commit c85f1611e6
5 changed files with 297460 additions and 297434 deletions

298228
dev-0/out.tsv

File diff suppressed because it is too large Load Diff

View File

@ -4,12 +4,22 @@ from datasets import load_dataset
from transformers import AutoTokenizer, RobertaModel, RobertaTokenizer
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification
#from transformers import AdamW
from torch.optim import Adam
from transformers import get_scheduler
import torch
from tqdm.auto import tqdm
import os
import pickle
from regressor_head import RegressorHead
try:
os.mkdir('roberta_year_prediction')
except Exception:
pass
def pickle_model_save(name):
with open(f'roberta_year_prediction/{name}', 'wb') as f:
pickle.dump(model,f)
if TEST:
STEPS_EVAL = 10
@ -29,9 +39,13 @@ train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE
eval_dataloader_small = DataLoader(eval_dataset_small, batch_size=BATCH_SIZE)
eval_dataloader_full = DataLoader(eval_dataset_full, batch_size=BATCH_SIZE)
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=1)
optimizer = Adam(model.parameters(), lr=LR)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = RobertaModel.from_pretrained('roberta-base')
model.regressor_head = RegressorHead(768).to('cuda')
model.to(device)
optimizer = Adam(model.parameters(), lr=LR)
num_training_steps = NUM_EPOCHS * len(train_dataloader)
#lr_scheduler = get_scheduler(
@ -42,10 +56,6 @@ num_training_steps = NUM_EPOCHS * len(train_dataloader)
#)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
progress_bar = tqdm(range(num_training_steps))
model.train()
@ -55,15 +65,15 @@ model.to(device)
def transform_batch(batch):
batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device)
batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device)
batch['labels'] = batch['year_scaled'].to(device).float()
labels = batch['year_scaled'].to(device).float()
batch['labels'].to(device)
batch['input_ids'].to(device)
batch['attention_mask'].to(device)
for c in set(batch.keys()) - {'input_ids', 'attention_mask', 'labels'}:
for c in set(batch.keys()) - {'input_ids', 'attention_mask'}:
del batch[c]
return batch
return batch, labels
def eval(full = False):
@ -74,12 +84,12 @@ def eval(full = False):
items_passed = 0
for i, batch in enumerate(dataloader):
items_passed += len(batch)
batch = transform_batch(batch)
labels = batch['labels']
del batch['labels']
outputs = model(**batch)
o = soft_clip(outputs['logits']).squeeze()
loss = criterion(o, labels)
batch, labels = transform_batch(batch)
outputs = model(**batch)[0]
outputs = model.regressor_head(outputs)
loss = criterion(outputs.squeeze(), labels)
eval_loss += loss.item()
eval_loss = (eval_loss / items_passed)
print(f'eval loss full={full}: {eval_loss:.5f}', end = '\n')
@ -88,11 +98,6 @@ def eval(full = False):
criterion = torch.nn.MSELoss(reduction='sum').to(device)
lrelu = torch.nn.LeakyReLU(0.1)
def soft_clip(t):
t = lrelu(t)
t = -lrelu(-t + 1 ) + 1
return t
best_eval_loss = 9999
epochs_without_progress = 0
@ -101,12 +106,12 @@ for epoch in range(NUM_EPOCHS):
items_passed = 0
for i, batch in enumerate(train_dataloader):
items_passed += len(batch)
batch = transform_batch(batch)
labels = batch['labels']
del batch['labels']
outputs = model(**batch)
o = soft_clip(outputs['logits']).squeeze()
loss = criterion(o, labels)
batch, labels = transform_batch(batch)
outputs = model(**batch)[0]
outputs = model.regressor_head(outputs)
loss = criterion(outputs.squeeze(), labels)
loss.backward()
train_loss += loss.item()
progress_bar.update(1)
@ -123,11 +128,13 @@ for epoch in range(NUM_EPOCHS):
eval(full = False)
eval_loss = eval(full=True)
model.save_pretrained(f'roberta_year_prediction/epoch_{epoch}_loss{eval_loss:.5f}')
model.save_pretrained(f'roberta_year_prediction/epoch_last')
pickle_model_save(f'epoch_{epoch}')
pickle_model_save(f'epoch_last')
if eval_loss < best_eval_loss:
model.save_pretrained(f'roberta_year_prediction/epoch_best')
pickle_model_save(f'epoch_best')
print('\nsaving best model')
best_eval_loss = eval_loss
else:

View File

@ -1,6 +1,7 @@
import pickle
import torch
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer, RobertaModel, RobertaTokenizer
from regressor_head import RegressorHead
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
@ -11,12 +12,14 @@ with open('test_dataset_A.pickle','rb') as f_p:
test_dataset = pickle.load(f_p)
device = 'cuda'
model = AutoModelForSequenceClassification.from_pretrained('./roberta_year_prediction/epoch_best')
with open('./roberta_year_prediction/epoch_best', 'rb') as f:
model = pickle.load(f)
model.eval()
model.to(device)
lrelu = torch.nn.LeakyReLU(0.0)
def soft_clip(t):
def hard_clip(t):
t = lrelu(t)
t = -lrelu(-t + 1 ) + 1
return t
@ -25,7 +28,7 @@ with open('scalers.pickle', 'rb') as f_scaler:
scalers = pickle.load(f_scaler)
def predict(dataset, out_f):
eval_dataloader = DataLoader(dataset, batch_size=50)
eval_dataloader = DataLoader(dataset, batch_size=20)
outputs = []
progress_bar = tqdm(range(len(eval_dataloader)))
@ -33,15 +36,18 @@ def predict(dataset, out_f):
for batch in eval_dataloader:
batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device)
batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device)
batch['labels'] = batch['year_scaled'].to(device).float()
batch['labels'].to(device)
batch['input_ids'].to(device)
batch['attention_mask'].to(device)
for c in set(batch.keys()) - {'input_ids', 'attention_mask', 'labels'}:
del batch[c]
outputs.extend(soft_clip(model(**batch).logits).tolist())
o = model(**batch)[0]
o = model.regressor_head(o)
o = hard_clip(o)
outputs.extend(o.tolist())
progress_bar.update(1)
outputs_transformed = scalers['year'].inverse_transform(outputs)

View File

@ -0,0 +1,13 @@
import torch
class RegressorHead(torch.nn.Module):
def __init__(self, in_dim):
super(RegressorHead, self).__init__()
self.linear = torch.nn.Linear(in_dim, 1)
self.m = torch.nn.LeakyReLU(0.1)
def forward(self, x):
x = x.mean(1)
x = self.linear(x)
x = self.m(x)
x = - self.m(-x + 1 ) +1
return x

File diff suppressed because it is too large Load Diff