This commit is contained in:
Mikolaj 2023-06-08 13:01:08 +02:00
parent c0894d950a
commit eb10e5db4a
3 changed files with 17936 additions and 7585 deletions

File diff suppressed because it is too large Load Diff

8
hf.py
View File

@ -6,12 +6,12 @@ import regex as re
import sys
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = AutoModelForCausalLM.from_pretrained('gpt2')
model = AutoModelForCausalLM.from_pretrained('gpt2').to('cuda')
for line in sys.stdin:
input_text = line.split('\t')[-2].rstrip()
input_ids = tokenizer.encode(input_text, return_tensors='pt')
input_ids = tokenizer.encode(input_text, return_tensors='pt').to('cuda')
with torch.no_grad():
outputs = model(input_ids)
@ -39,5 +39,5 @@ for line in sys.stdin:
continue
unknow_prob = 1 - sum_probs
string_to_print += f":{unknow_prob}"
print(string_to_print)
string_to_print = re.sub(' +', ' ', string_to_print)
print(string_to_print.rstrip().strip())

File diff suppressed because it is too large Load Diff