s444415
This commit is contained in:
parent
d043e30286
commit
d3d2656597
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"python.formatting.provider": "black"
|
||||||
|
}
|
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
42
run.py
Normal file
42
run.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.naive_bayes import GaussianNB
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def read_train_data():
|
||||||
|
print('Load train data')
|
||||||
|
train_data = pd.read_csv("train/train.tsv", sep='\t', header=None)
|
||||||
|
train_data = train_data[:10000]
|
||||||
|
|
||||||
|
return train_data[1], train_data[0]
|
||||||
|
|
||||||
|
def read_pred_data():
|
||||||
|
print('Load pred data')
|
||||||
|
x_p = []
|
||||||
|
with open("dev-0/in.tsv", encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
x_p.append(line)
|
||||||
|
return x_p
|
||||||
|
|
||||||
|
def vectorize(x,x_p):
|
||||||
|
print('Vectorize')
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
x = vectorizer.fit_transform(x)
|
||||||
|
x_p = vectorizer.transform(x_p)
|
||||||
|
return x, x_p
|
||||||
|
|
||||||
|
def calc_score(x, y, x_p):
|
||||||
|
print('Calculate score')
|
||||||
|
model = GaussianNB()
|
||||||
|
model.fit(x.toarray(), y)
|
||||||
|
return model.predict(x_p.toarray())
|
||||||
|
|
||||||
|
def get_result():
|
||||||
|
x, y = read_train_data()
|
||||||
|
x_p = read_pred_data()
|
||||||
|
x, x_p = vectorize(x, x_p)
|
||||||
|
return calc_score(x, y, x_p)
|
||||||
|
|
||||||
|
|
||||||
|
pd.DataFrame(get_result()).to_csv("dev-0/out.tsv", header=False, index=None)
|
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user