Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

7 changed files with 0 additions and 280773 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,48 +0,0 @@
from re import L
import pandas as pd
import numpy as np
import csv
from sklearn.linear_model import LinearRegression
from stop_words import get_stop_words
from sklearn.feature_extraction.text import TfidfVectorizer
def linear_regression():
# odczyt z plików
colnames_train = ['start_date', 'end_date', 'title', 'sort_title', 'data']
colnames_test = ['data']
train = pd.read_csv("train/train.tsv", names = colnames_train, sep = "\t")
dev_0 = pd.read_csv("dev-0/in.tsv", error_bad_lines = False, header = None, sep = "\t", quoting=csv.QUOTE_NONE)
dev_1 = pd.read_csv("dev-1/in.tsv", error_bad_lines = False, header = None, sep = "\t", quoting=csv.QUOTE_NONE,)
test = pd.read_csv("test-A/in.tsv", names = colnames_test, sep = "\t")
# stworzenie instancji TFIDF i regresji liniowej
tf = TfidfVectorizer(stop_words=get_stop_words('polish'))
lin_reg = LinearRegression()
# wydobycie daty
date = (train['start_date'] + train['end_date']) / 2
# regresja liniowa
train_vec = tf.fit_transform(train['data'])
lin_reg.fit(train_vec, date)
# predykcja dla dev-0
evaluate_dev = tf.transform(dev_0['data'])
prediction_dev = lin_reg.predict(evaluate_dev)
pd.DataFrame(prediction_dev).to_csv('dev-0/out2.tsv', sep = "\t", index = False, header = False)
# predykcja dla test-A
evaluate_test = tf.transform(test['data'])
prediction_test = lin_reg.predict(evaluate_test)
pd.DataFrame(prediction_test).to_csv('test-A/out2.tsv', sep = "\t", index = False, header = False)
return None
if __name__ == "__main__":
linear_regression()
# geval: 21.80

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long