sport-text-classification-b.../skrypt-dev-0.py

import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline
import csv

prep = preprocessing.LabelEncoder()

with open("train/train.tsv") as file_train:
    csv_input = csv.reader(file_train, delimiter='\t')
    X = []
    Y = []
    for line in csv_input:
        Y.append(line[0])
        X.append(line[1])
    
    Y = prep.fit_transform(Y)
    with open("dev-0/in.tsv") as file_in:
        work_file_lines = file_in.readlines()
        MNB = make_pipeline(TfidfVectorizer(use_idf = True), MultinomialNB())
      
        model = MNB.fit(X,Y)

        y_predict = model.predict(work_file_lines)
        y_predict = np.array(y_predict)

        np.set_printoptions(threshold=np.inf)
        labels = np.array2string(y_predict.flatten(), separator='\n', suppress_small=True)

        file_out = open("dev-0/out.tsv", 'w')
        file_out.write(labels[1:-1])

        with open("dev-0/out.tsv", 'r') as fix_space:
            lines = fix_space.readlines()

            lines = [line.replace(' ', '') for line in lines]
        with open("dev-0/out.tsv", 'w') as fix_space:
            fix_space.writelines(lines)
fin 2021-05-02 23:16:33 +02:00			`import numpy as np`
			`from sklearn.naive_bayes import MultinomialNB`
			`from sklearn.feature_extraction.text import TfidfVectorizer`
			`from sklearn import preprocessing`
			`from sklearn.pipeline import make_pipeline`
			`import csv`

			`prep = preprocessing.LabelEncoder()`

			`with open("train/train.tsv") as file_train:`
			`csv_input = csv.reader(file_train, delimiter='\t')`
			`X = []`
			`Y = []`
			`for line in csv_input:`
			`Y.append(line[0])`
			`X.append(line[1])`

			`Y = prep.fit_transform(Y)`
			`with open("dev-0/in.tsv") as file_in:`
			`work_file_lines = file_in.readlines()`
			`MNB = make_pipeline(TfidfVectorizer(use_idf = True), MultinomialNB())`

			`model = MNB.fit(X,Y)`

			`y_predict = model.predict(work_file_lines)`
			`y_predict = np.array(y_predict)`

			`np.set_printoptions(threshold=np.inf)`
			`labels = np.array2string(y_predict.flatten(), separator='\n', suppress_small=True)`

			`file_out = open("dev-0/out.tsv", 'w')`
			`file_out.write(labels[1:-1])`

			`with open("dev-0/out.tsv", 'r') as fix_space:`
			`lines = fix_space.readlines()`

			`lines = [line.replace(' ', '') for line in lines]`
			`with open("dev-0/out.tsv", 'w') as fix_space:`
			`fix_space.writelines(lines)`