fix
This commit is contained in:
parent
543b6e4e0f
commit
f986c74861
@ -17,37 +17,37 @@ def tokenize_function(examples):
|
|||||||
test_tokenized_datasets_A = test_dataset_A.map(tokenize_function, batched=True)
|
test_tokenized_datasets_A = test_dataset_A.map(tokenize_function, batched=True)
|
||||||
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
||||||
|
|
||||||
#train_dataset = tokenized_datasets["train"].shuffle(seed=42)
|
train_dataset = tokenized_datasets["train"].shuffle(seed=42)
|
||||||
eval_dataset_full = tokenized_datasets["test"]
|
eval_dataset_full = tokenized_datasets["test"]
|
||||||
#eval_dataset_small = tokenized_datasets["test"].select(range(2000))
|
eval_dataset_small = tokenized_datasets["test"].select(range(2000))
|
||||||
#test_dataset_A = test_tokenized_datasets_A["train"]
|
test_dataset_A = test_tokenized_datasets_A["train"]
|
||||||
#
|
|
||||||
#
|
|
||||||
#scalers = dict()
|
scalers = dict()
|
||||||
#scalers['year'] = MinMaxScaler().fit(np.array(train_dataset['year']).reshape(-1,1))
|
scalers['year'] = MinMaxScaler().fit(np.array(train_dataset['year']).reshape(-1,1))
|
||||||
#
|
|
||||||
#def add_scaled(example):
|
def add_scaled(example):
|
||||||
# for factor in ('year',):
|
for factor in ('year',):
|
||||||
# example[factor + '_scaled'] = scalers[factor].transform(np.array(example[factor]).reshape(-1,1)).reshape(1,-1)[0].item()
|
example[factor + '_scaled'] = scalers[factor].transform(np.array(example[factor]).reshape(-1,1)).reshape(1,-1)[0].item()
|
||||||
# return example
|
return example
|
||||||
#
|
|
||||||
#train_dataset = train_dataset.map(add_scaled)
|
train_dataset = train_dataset.map(add_scaled)
|
||||||
#eval_dataset_full = eval_dataset_full.map(add_scaled)
|
eval_dataset_full = eval_dataset_full.map(add_scaled)
|
||||||
#eval_dataset_small = eval_dataset_small.map(add_scaled)
|
eval_dataset_small = eval_dataset_small.map(add_scaled)
|
||||||
##test_dataset_A = test_dataset_A.map(add_scaled)
|
#test_dataset_A = test_dataset_A.map(add_scaled)
|
||||||
#
|
|
||||||
#
|
|
||||||
#with open('train_dataset.pickle','wb') as f_p:
|
with open('train_dataset.pickle','wb') as f_p:
|
||||||
# pickle.dump(train_dataset, f_p)
|
pickle.dump(train_dataset, f_p)
|
||||||
#
|
|
||||||
#with open('eval_dataset_small.pickle','wb') as f_p:
|
with open('eval_dataset_small.pickle','wb') as f_p:
|
||||||
# pickle.dump(eval_dataset_small, f_p)
|
pickle.dump(eval_dataset_small, f_p)
|
||||||
#
|
|
||||||
#with open('eval_dataset_full.pickle','wb') as f_p:
|
with open('eval_dataset_full.pickle','wb') as f_p:
|
||||||
# pickle.dump(eval_dataset_full, f_p)
|
pickle.dump(eval_dataset_full, f_p)
|
||||||
#
|
|
||||||
#with open('test_dataset_A.pickle','wb') as f_p:
|
with open('test_dataset_A.pickle','wb') as f_p:
|
||||||
# pickle.dump(test_dataset_A, f_p)
|
pickle.dump(test_dataset_A, f_p)
|
||||||
#
|
|
||||||
#with open('scalers.pickle','wb') as f_p:
|
with open('scalers.pickle','wb') as f_p:
|
||||||
# pickle.dump(scalers, f_p)
|
pickle.dump(scalers, f_p)
|
||||||
|
@ -29,7 +29,7 @@ model = AutoModelForSequenceClassification.from_pretrained('roberta-base', num_l
|
|||||||
optimizer = AdamW(model.parameters(), lr=1e-6)
|
optimizer = AdamW(model.parameters(), lr=1e-6)
|
||||||
|
|
||||||
|
|
||||||
num_epochs = 1
|
num_epochs = 3
|
||||||
num_training_steps = num_epochs * len(train_dataloader)
|
num_training_steps = num_epochs * len(train_dataloader)
|
||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
"linear",
|
"linear",
|
||||||
@ -95,4 +95,4 @@ for epoch in range(num_epochs):
|
|||||||
train_loss = 0.0
|
train_loss = 0.0
|
||||||
eval()
|
eval()
|
||||||
|
|
||||||
model.save_pretrained('roberta_year_prediction')
|
model.save_pretrained(f'roberta_year_prediction/epoch_{epoch}')
|
||||||
|
@ -23,7 +23,7 @@ with open('test-A_huggingface_format.csv','r') as f_p:
|
|||||||
test_dataset = f_p.readlines()
|
test_dataset = f_p.readlines()
|
||||||
|
|
||||||
device = 'cuda'
|
device = 'cuda'
|
||||||
model = AutoModelForSequenceClassification.from_pretrained('./roberta_year_prediction')
|
model = AutoModelForSequenceClassification.from_pretrained('./roberta_year_prediction/epoch_0')
|
||||||
tokenizer = AutoTokenizer.from_pretrained('roberta-base')
|
tokenizer = AutoTokenizer.from_pretrained('roberta-base')
|
||||||
model.eval()
|
model.eval()
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
Loading…
Reference in New Issue
Block a user