37 lines
864 B
Python
37 lines
864 B
Python
|
#!/usr/bin/env python
|
||
|
import numpy as np
|
||
|
import sys
|
||
|
import csv
|
||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||
|
from numpy import loadtxt
|
||
|
from xgboost import XGBClassifier
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.metrics import accuracy_score
|
||
|
tsv_file = open("test-A/in.tsv")
|
||
|
read_tsv = csv.reader(tsv_file)
|
||
|
listatesting = []
|
||
|
for line in read_tsv:
|
||
|
listatesting.append(line[0])
|
||
|
|
||
|
lista = []
|
||
|
|
||
|
for line in sys.stdin:
|
||
|
lista.append(line)
|
||
|
|
||
|
vectorizer = CountVectorizer()
|
||
|
X = vectorizer.fit_transform(lista)
|
||
|
Y = loadtxt("train/expected.tsv")
|
||
|
seed = 1
|
||
|
param = {
|
||
|
'objective':'binary:logistic'}
|
||
|
|
||
|
X_train = X
|
||
|
X_test = Y
|
||
|
Y_train = vectorizer.fit_transform(listatesting)
|
||
|
model = XGBClassifier()
|
||
|
model.fit(X_train, X_test)
|
||
|
y_pred = model.predict_proba(X_train)
|
||
|
predictions = [value for value in y_pred]
|
||
|
for a in predictions:
|
||
|
print(a[0])
|