Added naive bayes

This commit is contained in:
Maciej Sobkowiak 2021-05-12 20:37:58 +02:00
parent d4b396f3be
commit 7203bb3594
3 changed files with 10912 additions and 3 deletions

View File

@ -4,10 +4,13 @@ import gzip
from sklearn.pipeline import make_pipeline from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import MultinomialNB from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import metrics
# Read data # Read data
dev = pd.read_table('dev-0/in.tsv', error_bad_lines=False, header=None) dev = pd.read_table('dev-0/in.tsv', error_bad_lines=False, header=None)
test = pd.read_table('test-A/in.tsv', error_bad_lines=False, header=None) test = pd.read_table('test-A/in.tsv', error_bad_lines=False, header=None)
test_expected = pd.read_table(
'dev-0/expected.tsv', error_bad_lines=False, header=None)
X_train = [] X_train = []
y_train = [] y_train = []
@ -25,9 +28,18 @@ y_train = np.asarray(y_train)
X_dev = dev[0].values X_dev = dev[0].values
X_test = test[0].values X_test = test[0].values
print(type(y_train[0]))
print(X_train[0])
# Create model # Create model
model = make_pipeline(TfidfVectorizer(), MultinomialNB()) model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train) model.fit(X_train, y_train)
# Predict
dev_p = model.predict(X_dev)
test_p = model.predict(X_test)
# Accuracy
score = metrics.accuracy_score(test_expected, dev_p)
print("Accuracy: %0.3f" % score)
# Save to files
dev_p.tofile('./dev-0/out.tsv', sep='\n')
test_p.tofile('./test-A/out.tsv', sep='\n')

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

5445
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff