s444415
This commit is contained in:
parent
d043e30286
commit
d3d2656597
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.formatting.provider": "black"
|
||||
}
|
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
42
run.py
Normal file
42
run.py
Normal file
@ -0,0 +1,42 @@
|
||||
import pandas as pd
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
||||
|
||||
|
||||
def read_train_data():
|
||||
print('Load train data')
|
||||
train_data = pd.read_csv("train/train.tsv", sep='\t', header=None)
|
||||
train_data = train_data[:10000]
|
||||
|
||||
return train_data[1], train_data[0]
|
||||
|
||||
def read_pred_data():
|
||||
print('Load pred data')
|
||||
x_p = []
|
||||
with open("dev-0/in.tsv", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
x_p.append(line)
|
||||
return x_p
|
||||
|
||||
def vectorize(x,x_p):
|
||||
print('Vectorize')
|
||||
vectorizer = TfidfVectorizer()
|
||||
x = vectorizer.fit_transform(x)
|
||||
x_p = vectorizer.transform(x_p)
|
||||
return x, x_p
|
||||
|
||||
def calc_score(x, y, x_p):
|
||||
print('Calculate score')
|
||||
model = GaussianNB()
|
||||
model.fit(x.toarray(), y)
|
||||
return model.predict(x_p.toarray())
|
||||
|
||||
def get_result():
|
||||
x, y = read_train_data()
|
||||
x_p = read_pred_data()
|
||||
x, x_p = vectorize(x, x_p)
|
||||
return calc_score(x, y, x_p)
|
||||
|
||||
|
||||
pd.DataFrame(get_result()).to_csv("dev-0/out.tsv", header=False, index=None)
|
98132
train/train.tsv
Normal file
98132
train/train.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user