fix
This commit is contained in:
parent
543b6e4e0f
commit
f986c74861
@ -17,37 +17,37 @@ def tokenize_function(examples):
|
||||
test_tokenized_datasets_A = test_dataset_A.map(tokenize_function, batched=True)
|
||||
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
||||
|
||||
#train_dataset = tokenized_datasets["train"].shuffle(seed=42)
|
||||
train_dataset = tokenized_datasets["train"].shuffle(seed=42)
|
||||
eval_dataset_full = tokenized_datasets["test"]
|
||||
#eval_dataset_small = tokenized_datasets["test"].select(range(2000))
|
||||
#test_dataset_A = test_tokenized_datasets_A["train"]
|
||||
#
|
||||
#
|
||||
#scalers = dict()
|
||||
#scalers['year'] = MinMaxScaler().fit(np.array(train_dataset['year']).reshape(-1,1))
|
||||
#
|
||||
#def add_scaled(example):
|
||||
# for factor in ('year',):
|
||||
# example[factor + '_scaled'] = scalers[factor].transform(np.array(example[factor]).reshape(-1,1)).reshape(1,-1)[0].item()
|
||||
# return example
|
||||
#
|
||||
#train_dataset = train_dataset.map(add_scaled)
|
||||
#eval_dataset_full = eval_dataset_full.map(add_scaled)
|
||||
#eval_dataset_small = eval_dataset_small.map(add_scaled)
|
||||
##test_dataset_A = test_dataset_A.map(add_scaled)
|
||||
#
|
||||
#
|
||||
#with open('train_dataset.pickle','wb') as f_p:
|
||||
# pickle.dump(train_dataset, f_p)
|
||||
#
|
||||
#with open('eval_dataset_small.pickle','wb') as f_p:
|
||||
# pickle.dump(eval_dataset_small, f_p)
|
||||
#
|
||||
#with open('eval_dataset_full.pickle','wb') as f_p:
|
||||
# pickle.dump(eval_dataset_full, f_p)
|
||||
#
|
||||
#with open('test_dataset_A.pickle','wb') as f_p:
|
||||
# pickle.dump(test_dataset_A, f_p)
|
||||
#
|
||||
#with open('scalers.pickle','wb') as f_p:
|
||||
# pickle.dump(scalers, f_p)
|
||||
eval_dataset_small = tokenized_datasets["test"].select(range(2000))
|
||||
test_dataset_A = test_tokenized_datasets_A["train"]
|
||||
|
||||
|
||||
scalers = dict()
|
||||
scalers['year'] = MinMaxScaler().fit(np.array(train_dataset['year']).reshape(-1,1))
|
||||
|
||||
def add_scaled(example):
|
||||
for factor in ('year',):
|
||||
example[factor + '_scaled'] = scalers[factor].transform(np.array(example[factor]).reshape(-1,1)).reshape(1,-1)[0].item()
|
||||
return example
|
||||
|
||||
train_dataset = train_dataset.map(add_scaled)
|
||||
eval_dataset_full = eval_dataset_full.map(add_scaled)
|
||||
eval_dataset_small = eval_dataset_small.map(add_scaled)
|
||||
#test_dataset_A = test_dataset_A.map(add_scaled)
|
||||
|
||||
|
||||
with open('train_dataset.pickle','wb') as f_p:
|
||||
pickle.dump(train_dataset, f_p)
|
||||
|
||||
with open('eval_dataset_small.pickle','wb') as f_p:
|
||||
pickle.dump(eval_dataset_small, f_p)
|
||||
|
||||
with open('eval_dataset_full.pickle','wb') as f_p:
|
||||
pickle.dump(eval_dataset_full, f_p)
|
||||
|
||||
with open('test_dataset_A.pickle','wb') as f_p:
|
||||
pickle.dump(test_dataset_A, f_p)
|
||||
|
||||
with open('scalers.pickle','wb') as f_p:
|
||||
pickle.dump(scalers, f_p)
|
||||
|
@ -29,7 +29,7 @@ model = AutoModelForSequenceClassification.from_pretrained('roberta-base', num_l
|
||||
optimizer = AdamW(model.parameters(), lr=1e-6)
|
||||
|
||||
|
||||
num_epochs = 1
|
||||
num_epochs = 3
|
||||
num_training_steps = num_epochs * len(train_dataloader)
|
||||
lr_scheduler = get_scheduler(
|
||||
"linear",
|
||||
@ -95,4 +95,4 @@ for epoch in range(num_epochs):
|
||||
train_loss = 0.0
|
||||
eval()
|
||||
|
||||
model.save_pretrained('roberta_year_prediction')
|
||||
model.save_pretrained(f'roberta_year_prediction/epoch_{epoch}')
|
||||
|
@ -23,7 +23,7 @@ with open('test-A_huggingface_format.csv','r') as f_p:
|
||||
test_dataset = f_p.readlines()
|
||||
|
||||
device = 'cuda'
|
||||
model = AutoModelForSequenceClassification.from_pretrained('./roberta_year_prediction')
|
||||
model = AutoModelForSequenceClassification.from_pretrained('./roberta_year_prediction/epoch_0')
|
||||
tokenizer = AutoTokenizer.from_pretrained('roberta-base')
|
||||
model.eval()
|
||||
model.to(device)
|
||||
|
Loading…
Reference in New Issue
Block a user