25 lines
826 B
Python
25 lines
826 B
Python
import pandas as pd
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
from sklearn.pipeline import make_pipeline
|
|
|
|
df = pd.read_csv("train/train.tsv", error_bad_lines=False, header=None, sep="\t")
|
|
dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
|
|
testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t")
|
|
|
|
dev0X = dev0.iloc[:, 0].tolist()
|
|
testAX = testA.iloc[:, 0].tolist()
|
|
|
|
Y = df.iloc[:, 0].tolist()
|
|
X = df.iloc[:, 1].tolist()
|
|
|
|
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
|
|
|
model.fit(X, Y)
|
|
|
|
dev0_predicted = model.predict(dev0X)
|
|
testA_predicted = model.predict(testAX)
|
|
|
|
pd.Series(dev0_predicted).to_csv("dev-0/out.tsv", sep="\t", index=False, header=False)
|
|
pd.Series(testA_predicted).to_csv("test-A/out.tsv", sep="\t", index=False, header=False)
|