roberta no year from scratch better finetuning

This commit is contained in:
kubapok 2021-10-14 11:38:26 +02:00
parent 7c52ed0759
commit 0798d54e30
4 changed files with 66998 additions and 66986 deletions

File diff suppressed because it is too large Load Diff

View File

@ -3,12 +3,12 @@ from config import LABELS_LIST, MODEL
from transformers import AutoTokenizer
from tqdm import tqdm
device = 'cpu'
device = 'cuda'
model_path= './roberta-ireland'
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained('test_trainer/checkpoint-620000/')
model = AutoModelForSequenceClassification.from_pretrained(model_path).cuda()
tokenizer = AutoTokenizer.from_pretrained(MODEL)
for dataset in ('dev-0', 'test-A'):

View File

@ -0,0 +1,12 @@
python run_glue.py --model_name_or_path roberta-base \
--train_file ../train/huggingface_format.csv \
--validation_file /media/kuba/ssdsam/gonito/ireland-news/dev-0/huggingface_format.csv \
--do_train \
--max_seq_length 64 \
--per_device_train_batch_size 32 \
--learning_rate 2e-5 \
--num_train_epochs 3 \
--output_dir ./roberta-ireland \
--save_steps=10000 \
--eval_steps=10000 \
--evaluation_strategy steps

File diff suppressed because it is too large Load Diff