#!/usr/bin/env python import numpy as np import sys import csv from sklearn.feature_extraction.text import CountVectorizer from numpy import loadtxt from xgboost import XGBClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score tsv_file = open("test-A/in.tsv") read_tsv = csv.reader(tsv_file) listatesting = [] for line in read_tsv: listatesting.append(line[0]) lista = [] for line in sys.stdin: lista.append(line) vectorizer = CountVectorizer() X = vectorizer.fit_transform(lista) Y = loadtxt("train/expected.tsv") seed = 1 param = { 'objective':'binary:logistic'} X_train = X X_test = Y Y_train = vectorizer.fit_transform(listatesting) model = XGBClassifier() model.fit(X_train, X_test) y_pred = model.predict_proba(X_train) predictions = [value for value in y_pred] for a in predictions: print(a[0])