42 lines
1.1 KiB
Python
Executable File
42 lines
1.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
import numpy as np
|
|
import sys
|
|
import csv
|
|
from sklearn.feature_extraction.text import CountVectorizer
|
|
from numpy import loadtxt
|
|
from xgboost import XGBClassifier
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.metrics import accuracy_score
|
|
tsv_file = open("train/in.tsv")
|
|
tsv_file3 = open("dev-0/in.tsv")
|
|
tsv_file2 = open("train/expected.tsv")
|
|
read_tsv = csv.reader(tsv_file)
|
|
read_tsv2 = csv.reader(tsv_file2)
|
|
listatesting = []
|
|
listatesting2 = []
|
|
listatesting = list(tsv_file)
|
|
listatesting3 = []
|
|
listatesting3 = list(tsv_file3)
|
|
for line2 in read_tsv2:
|
|
listatesting2.append(line2)
|
|
|
|
lista = []
|
|
|
|
|
|
vectorizer = CountVectorizer()
|
|
seed = 7
|
|
X = vectorizer.fit_transform(listatesting)
|
|
Y = np.ravel(listatesting2)
|
|
X_train, y_train, x_test, y_test = train_test_split(X,Y, test_size=0.33,random_state=seed)
|
|
seed = 7
|
|
param = {
|
|
'objective':'binary:logistic'}
|
|
model = XGBClassifier()
|
|
model.fit(X_train, x_test)
|
|
y_pred = model.predict_proba(y_train)
|
|
Z_train = vectorizer.transform(listatesting3)
|
|
y_pred2 = model.predict_proba(Z_train)
|
|
predictions = [value for value in y_pred2]
|
|
for a in predictions:
|
|
print(1-a[0])
|