hf roberta (linear top layer by hand instead of hf) with learning scheaduler)

This commit is contained in:
Jakub Pokrywka 2021-12-28 12:05:47 +01:00
parent c85f1611e6
commit 9ec36ba822
4 changed files with 297410 additions and 297409 deletions

298216
dev-0/out.tsv

File diff suppressed because it is too large Load Diff

View File

@ -42,18 +42,19 @@ eval_dataloader_full = DataLoader(eval_dataset_full, batch_size=BATCH_SIZE)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = RobertaModel.from_pretrained('roberta-base') model = RobertaModel.from_pretrained('roberta-base')
#model = RobertaModel(model.config)
model.regressor_head = RegressorHead(768).to('cuda') model.regressor_head = RegressorHead(768).to('cuda')
model.to(device) model.to(device)
optimizer = Adam(model.parameters(), lr=LR) optimizer = Adam(model.parameters(), lr=LR)
num_training_steps = NUM_EPOCHS * len(train_dataloader) num_training_steps = NUM_EPOCHS * len(train_dataloader)
#lr_scheduler = get_scheduler( lr_scheduler = get_scheduler(
# "linear", "linear",
# optimizer=optimizer, optimizer=optimizer,
# num_warmup_steps=WARMUP_STEPS, num_warmup_steps=WARMUP_STEPS,
# num_training_steps=num_training_steps num_training_steps=num_training_steps
#) )
progress_bar = tqdm(range(num_training_steps)) progress_bar = tqdm(range(num_training_steps))
@ -117,7 +118,7 @@ for epoch in range(NUM_EPOCHS):
progress_bar.update(1) progress_bar.update(1)
optimizer.step() optimizer.step()
#lr_scheduler.step() lr_scheduler.step()
optimizer.zero_grad() optimizer.zero_grad()
model.zero_grad() model.zero_grad()

View File

@ -1,8 +1,8 @@
#MODEL = '../MODELS/without_date/checkpoint-395000' #MODEL = '../MODELS/without_date/checkpoint-395000'
MODEL = 'roberta-base' MODEL = 'roberta-base'
BATCH_SIZE = 50 BATCH_SIZE = 90
EARLY_STOPPING = 3 EARLY_STOPPING = 3
WARMUP_STEPS = 10_000 WARMUP_STEPS = 5_000
LR=1e-6 LR=1e-6
NUM_EPOCHS = 20 NUM_EPOCHS = 20
STEPS_EVAL = 5_000 STEPS_EVAL = 5_000

File diff suppressed because it is too large Load Diff