Final Version
This commit is contained in:
parent
f9172f10a0
commit
48a3c4eace
35
main.py
35
main.py
@ -1,16 +1,36 @@
|
|||||||
|
'''
|
||||||
|
Autor: Dominik Strzałko
|
||||||
|
Data: 05.08.2021
|
||||||
|
Zadanie: naiwny bayes2 gotowa biblioteka (Skeptic vs paranormal subreddits)
|
||||||
|
'''
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
from sklearn.naive_bayes import MultinomialNB
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
from sklearn.pipeline import make_pipeline
|
from sklearn.pipeline import make_pipeline
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
|
||||||
|
def open_tsv(tsv):
|
||||||
|
'''
|
||||||
|
Funkcja do zamiany plików tsv jako listy linii tekstu.
|
||||||
|
|
||||||
|
Na wejście potrzebuje ścieżkę do pliku .tsv
|
||||||
|
|
||||||
|
np. X = open_tsv("train/expected.tsv")
|
||||||
|
'''
|
||||||
|
with open(tsv) as f:
|
||||||
|
return f.readlines()
|
||||||
|
|
||||||
def Create_model(X_tsv, Y_tsv):
|
def Create_model(X_tsv, Y_tsv):
|
||||||
|
'''
|
||||||
|
Funkcja przeznaczona do tworzenia modelu uczenia maszynowego.
|
||||||
|
|
||||||
|
Na wejście trzeba podać zbiór X_train oraz Y_train w formie plików tsv.
|
||||||
|
|
||||||
with open(X_tsv) as f:
|
np. model = Create_model("train/in.tsv", "train/expected.tsv")
|
||||||
X = f.readlines()
|
'''
|
||||||
|
|
||||||
with open(Y_tsv) as f:
|
X = open_tsv(X_tsv)
|
||||||
Y = f.readlines()
|
Y = open_tsv(Y_tsv)
|
||||||
|
|
||||||
Y = LabelEncoder().fit_transform(Y)
|
Y = LabelEncoder().fit_transform(Y)
|
||||||
pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB())
|
pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB())
|
||||||
@ -19,9 +39,12 @@ def Create_model(X_tsv, Y_tsv):
|
|||||||
|
|
||||||
|
|
||||||
def predict(model, X_tsv, file_name):
|
def predict(model, X_tsv, file_name):
|
||||||
|
'''
|
||||||
|
Funkcja przeznaczona do predykcji wyników na podstawie modelu oraz zbiory X. trzecim argumentem w funkcji jest nazwa pliku z predykcjami, do zapisania na dysku.
|
||||||
|
|
||||||
with open(X_tsv) as f:
|
np. predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
|
||||||
X = f.readlines()
|
'''
|
||||||
|
X = open_tsv(X_tsv)
|
||||||
|
|
||||||
prediction = model.predict(X)
|
prediction = model.predict(X)
|
||||||
np.savetxt(file_name, prediction, fmt='%d')
|
np.savetxt(file_name, prediction, fmt='%d')
|
||||||
|
Loading…
Reference in New Issue
Block a user