From f9a0b053084a02c97086035a975fb118f4466804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Strza=C5=82ko?= Date: Mon, 9 May 2022 10:51:57 +0200 Subject: [PATCH] nn Bigram --- tri_nn.py => bi_nn.py | 4 ++-- nn.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename tri_nn.py => bi_nn.py (95%) diff --git a/tri_nn.py b/bi_nn.py similarity index 95% rename from tri_nn.py rename to bi_nn.py index c311699..4c35901 100644 --- a/tri_nn.py +++ b/bi_nn.py @@ -6,7 +6,7 @@ import pandas as pd from os.path import exists from utils import read_csv, clean_text, get_words_from_line -from nn import Trigrams, Model +from nn import Bigrams, Model data = read_csv("train/in.tsv.xz") train_words = read_csv("train/expected.tsv") @@ -19,7 +19,7 @@ train_data = train_data.apply(clean_text) vocab_size = 30000 embed_size = 150 -train_dataset = Trigrams(train_data, vocab_size) +train_dataset = Bigrams(train_data, vocab_size) ################################################################################## diff --git a/nn.py b/nn.py index 5039528..a09291f 100644 --- a/nn.py +++ b/nn.py @@ -4,7 +4,7 @@ from torchtext.vocab import build_vocab_from_iterator import itertools -class Trigrams(torch.utils.data.IterableDataset): +class Bigrams(torch.utils.data.IterableDataset): def __init__(self, data, vocabulary_size): self.vocab = build_vocab_from_iterator( get_word_lines_from_data(data),