hf roberta (linear top layer by hand instead of hf)
This commit is contained in:
parent
fb6215f9c1
commit
c85f1611e6
298228
dev-0/out.tsv
298228
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
@ -4,12 +4,22 @@ from datasets import load_dataset
|
||||
from transformers import AutoTokenizer, RobertaModel, RobertaTokenizer
|
||||
from torch.utils.data import DataLoader
|
||||
from transformers import AutoModelForSequenceClassification
|
||||
#from transformers import AdamW
|
||||
from torch.optim import Adam
|
||||
from transformers import get_scheduler
|
||||
import torch
|
||||
from tqdm.auto import tqdm
|
||||
import os
|
||||
import pickle
|
||||
from regressor_head import RegressorHead
|
||||
|
||||
try:
|
||||
os.mkdir('roberta_year_prediction')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def pickle_model_save(name):
|
||||
with open(f'roberta_year_prediction/{name}', 'wb') as f:
|
||||
pickle.dump(model,f)
|
||||
|
||||
if TEST:
|
||||
STEPS_EVAL = 10
|
||||
@ -29,9 +39,13 @@ train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE
|
||||
eval_dataloader_small = DataLoader(eval_dataset_small, batch_size=BATCH_SIZE)
|
||||
eval_dataloader_full = DataLoader(eval_dataset_full, batch_size=BATCH_SIZE)
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=1)
|
||||
optimizer = Adam(model.parameters(), lr=LR)
|
||||
|
||||
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
||||
model = RobertaModel.from_pretrained('roberta-base')
|
||||
model.regressor_head = RegressorHead(768).to('cuda')
|
||||
model.to(device)
|
||||
|
||||
optimizer = Adam(model.parameters(), lr=LR)
|
||||
|
||||
num_training_steps = NUM_EPOCHS * len(train_dataloader)
|
||||
#lr_scheduler = get_scheduler(
|
||||
@ -42,10 +56,6 @@ num_training_steps = NUM_EPOCHS * len(train_dataloader)
|
||||
#)
|
||||
|
||||
|
||||
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
||||
model.to(device)
|
||||
|
||||
|
||||
progress_bar = tqdm(range(num_training_steps))
|
||||
model.train()
|
||||
|
||||
@ -55,15 +65,15 @@ model.to(device)
|
||||
def transform_batch(batch):
|
||||
batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device)
|
||||
batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device)
|
||||
batch['labels'] = batch['year_scaled'].to(device).float()
|
||||
labels = batch['year_scaled'].to(device).float()
|
||||
|
||||
batch['labels'].to(device)
|
||||
batch['input_ids'].to(device)
|
||||
batch['attention_mask'].to(device)
|
||||
|
||||
for c in set(batch.keys()) - {'input_ids', 'attention_mask', 'labels'}:
|
||||
for c in set(batch.keys()) - {'input_ids', 'attention_mask'}:
|
||||
del batch[c]
|
||||
return batch
|
||||
|
||||
return batch, labels
|
||||
|
||||
|
||||
def eval(full = False):
|
||||
@ -74,12 +84,12 @@ def eval(full = False):
|
||||
items_passed = 0
|
||||
for i, batch in enumerate(dataloader):
|
||||
items_passed += len(batch)
|
||||
batch = transform_batch(batch)
|
||||
labels = batch['labels']
|
||||
del batch['labels']
|
||||
outputs = model(**batch)
|
||||
o = soft_clip(outputs['logits']).squeeze()
|
||||
loss = criterion(o, labels)
|
||||
|
||||
batch, labels = transform_batch(batch)
|
||||
outputs = model(**batch)[0]
|
||||
outputs = model.regressor_head(outputs)
|
||||
|
||||
loss = criterion(outputs.squeeze(), labels)
|
||||
eval_loss += loss.item()
|
||||
eval_loss = (eval_loss / items_passed)
|
||||
print(f'eval loss full={full}: {eval_loss:.5f}', end = '\n')
|
||||
@ -88,11 +98,6 @@ def eval(full = False):
|
||||
|
||||
criterion = torch.nn.MSELoss(reduction='sum').to(device)
|
||||
|
||||
lrelu = torch.nn.LeakyReLU(0.1)
|
||||
def soft_clip(t):
|
||||
t = lrelu(t)
|
||||
t = -lrelu(-t + 1 ) + 1
|
||||
return t
|
||||
|
||||
best_eval_loss = 9999
|
||||
epochs_without_progress = 0
|
||||
@ -101,12 +106,12 @@ for epoch in range(NUM_EPOCHS):
|
||||
items_passed = 0
|
||||
for i, batch in enumerate(train_dataloader):
|
||||
items_passed += len(batch)
|
||||
batch = transform_batch(batch)
|
||||
labels = batch['labels']
|
||||
del batch['labels']
|
||||
outputs = model(**batch)
|
||||
o = soft_clip(outputs['logits']).squeeze()
|
||||
loss = criterion(o, labels)
|
||||
|
||||
batch, labels = transform_batch(batch)
|
||||
outputs = model(**batch)[0]
|
||||
outputs = model.regressor_head(outputs)
|
||||
|
||||
loss = criterion(outputs.squeeze(), labels)
|
||||
loss.backward()
|
||||
train_loss += loss.item()
|
||||
progress_bar.update(1)
|
||||
@ -123,11 +128,13 @@ for epoch in range(NUM_EPOCHS):
|
||||
eval(full = False)
|
||||
|
||||
eval_loss = eval(full=True)
|
||||
model.save_pretrained(f'roberta_year_prediction/epoch_{epoch}_loss{eval_loss:.5f}')
|
||||
model.save_pretrained(f'roberta_year_prediction/epoch_last')
|
||||
|
||||
|
||||
pickle_model_save(f'epoch_{epoch}')
|
||||
pickle_model_save(f'epoch_last')
|
||||
|
||||
if eval_loss < best_eval_loss:
|
||||
model.save_pretrained(f'roberta_year_prediction/epoch_best')
|
||||
pickle_model_save(f'epoch_best')
|
||||
print('\nsaving best model')
|
||||
best_eval_loss = eval_loss
|
||||
else:
|
||||
|
@ -1,6 +1,7 @@
|
||||
import pickle
|
||||
import torch
|
||||
from transformers import AutoModelForSequenceClassification
|
||||
from transformers import AutoTokenizer, RobertaModel, RobertaTokenizer
|
||||
from regressor_head import RegressorHead
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
@ -11,12 +12,14 @@ with open('test_dataset_A.pickle','rb') as f_p:
|
||||
test_dataset = pickle.load(f_p)
|
||||
|
||||
device = 'cuda'
|
||||
model = AutoModelForSequenceClassification.from_pretrained('./roberta_year_prediction/epoch_best')
|
||||
with open('./roberta_year_prediction/epoch_best', 'rb') as f:
|
||||
model = pickle.load(f)
|
||||
|
||||
model.eval()
|
||||
model.to(device)
|
||||
|
||||
lrelu = torch.nn.LeakyReLU(0.0)
|
||||
def soft_clip(t):
|
||||
def hard_clip(t):
|
||||
t = lrelu(t)
|
||||
t = -lrelu(-t + 1 ) + 1
|
||||
return t
|
||||
@ -25,7 +28,7 @@ with open('scalers.pickle', 'rb') as f_scaler:
|
||||
scalers = pickle.load(f_scaler)
|
||||
|
||||
def predict(dataset, out_f):
|
||||
eval_dataloader = DataLoader(dataset, batch_size=50)
|
||||
eval_dataloader = DataLoader(dataset, batch_size=20)
|
||||
outputs = []
|
||||
|
||||
progress_bar = tqdm(range(len(eval_dataloader)))
|
||||
@ -33,15 +36,18 @@ def predict(dataset, out_f):
|
||||
for batch in eval_dataloader:
|
||||
batch['input_ids'] = torch.stack(batch['input_ids']).permute(1,0).to(device)
|
||||
batch['attention_mask'] = torch.stack(batch['attention_mask']).permute(1,0).to(device)
|
||||
batch['labels'] = batch['year_scaled'].to(device).float()
|
||||
|
||||
batch['labels'].to(device)
|
||||
batch['input_ids'].to(device)
|
||||
batch['attention_mask'].to(device)
|
||||
|
||||
for c in set(batch.keys()) - {'input_ids', 'attention_mask', 'labels'}:
|
||||
del batch[c]
|
||||
outputs.extend(soft_clip(model(**batch).logits).tolist())
|
||||
|
||||
o = model(**batch)[0]
|
||||
o = model.regressor_head(o)
|
||||
o = hard_clip(o)
|
||||
|
||||
outputs.extend(o.tolist())
|
||||
progress_bar.update(1)
|
||||
outputs_transformed = scalers['year'].inverse_transform(outputs)
|
||||
|
||||
|
13
hf_roberta_base/regressor_head.py
Normal file
13
hf_roberta_base/regressor_head.py
Normal file
@ -0,0 +1,13 @@
|
||||
import torch
|
||||
|
||||
class RegressorHead(torch.nn.Module):
|
||||
def __init__(self, in_dim):
|
||||
super(RegressorHead, self).__init__()
|
||||
self.linear = torch.nn.Linear(in_dim, 1)
|
||||
self.m = torch.nn.LeakyReLU(0.1)
|
||||
def forward(self, x):
|
||||
x = x.mean(1)
|
||||
x = self.linear(x)
|
||||
x = self.m(x)
|
||||
x = - self.m(-x + 1 ) +1
|
||||
return x
|
296564
test-A/out.tsv
296564
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user