diff --git a/skrypt.py b/skrypt.py index e86f6d5..0e878de 100755 --- a/skrypt.py +++ b/skrypt.py @@ -7,30 +7,35 @@ from numpy import loadtxt from xgboost import XGBClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score -tsv_file = open("test-A/in.tsv") +tsv_file = open("train/in.tsv") +tsv_file3 = open("dev-0/in.tsv") +tsv_file2 = open("train/expected.tsv") read_tsv = csv.reader(tsv_file) +read_tsv2 = csv.reader(tsv_file2) listatesting = [] -for line in read_tsv: - listatesting.append(line[0]) +listatesting2 = [] +listatesting = list(tsv_file) +listatesting3 = [] +listatesting3 = list(tsv_file3) +for line2 in read_tsv2: + listatesting2.append(line2) lista = [] -for line in sys.stdin: - lista.append(line) - + vectorizer = CountVectorizer() -X = vectorizer.fit_transform(lista) -Y = loadtxt("train/expected.tsv") -seed = 1 +seed = 7 +X = vectorizer.fit_transform(listatesting) +Y = np.ravel(listatesting2) +X_train, y_train, x_test, y_test = train_test_split(X,Y, test_size=0.33,random_state=seed) +seed = 7 param = { 'objective':'binary:logistic'} - -X_train = X -X_test = Y -Y_train = vectorizer.fit_transform(listatesting) model = XGBClassifier() -model.fit(X_train, X_test) -y_pred = model.predict_proba(X_train) -predictions = [value for value in y_pred] +model.fit(X_train, x_test) +y_pred = model.predict_proba(y_train) +Z_train = vectorizer.transform(listatesting3) +y_pred2 = model.predict_proba(Z_train) +predictions = [value for value in y_pred2] for a in predictions: - print(a[0]) + print(1-a[0])