31 lines
904 B
Python
31 lines
904 B
Python
import transformers
|
|
|
|
from datasets import Dataset
|
|
import pdb
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
|
import nltk
|
|
|
|
model_name = "pytorch_model.bin"
|
|
model_dir = f"model/checkpoint-2672/"
|
|
|
|
tokenizer_name = "google/flan-t5-base"
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir).to('cuda:1')
|
|
|
|
max_input_length = 512
|
|
import sys
|
|
text = ['it is too cold in here']
|
|
for line in sys.stdin:
|
|
|
|
inputs = line.rstrip().split('\t')[-1]
|
|
|
|
inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt").to('cuda:1')
|
|
output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
|
|
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
|
|
predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]
|
|
|
|
print(predicted_title)
|
|
|