Zrobione zadanie 07.

This commit is contained in:
Jan Nowak 2021-05-17 23:13:18 +02:00
parent 5c4bb10ddf
commit e41aa574d8
3 changed files with 2109 additions and 0 deletions

1000
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

109
rozwiazanie.py Normal file
View File

@ -0,0 +1,109 @@
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
data_train = pd.read_csv("train/train.tsv", header=None, sep='\t')
#print(data_train[[1,2,5]])
data_train[3] = data_train[3].astype('category')
data_train[4] = data_train[4].astype('category')
brand_codest = dict(enumerate(data_train[3].cat.categories))
brand_codes = {y:x for x,y in brand_codest.items()}
#data_train[3].map(data_train_codes)
data_train.replace({3: brand_codes}, inplace=True)
#print(brand_codes)
#print(data_train[3])
fuel_codest = dict(enumerate(data_train[4].cat.categories))
fuel_codes = {y:x for x,y in fuel_codest.items()}
#data_train[3].map(data_train_codes)
data_train.replace({4: fuel_codes}, inplace=True)
#print(fuel_codes)
#print(data_train[4])
# #Normalizacja danych
# flcols = data_train[[0, 1, 2]].columns
# x = data_train[[0, 1, 2]].values
# # min_max_scaler = preprocessing.MinMaxScaler()
# max_abs_scaler = preprocessing.MaxAbsScaler()
# # x_scaled = min_max_scaler.fit_transform(x)
# x_scaled = max_abs_scaler.fit_transform(x)
# normcols = pd.DataFrame(x_scaled, columns=flcols)
# for col in flcols:
# data_train[col] = normcols[col]
X_train = data_train[[1,2,3,4]]
y_train = data_train[data_train.columns[0]]
# print(X_train[3].value_counts())
# print(X_train[4].value_counts())
#print(X_train)
# train_columns = data_train.columns[5]
# text_columns = [3,4]
# data_train_dummy = pd.get_dummies(data_train[columns])
# print(len(data_train_dummy.columns))
data_val_X = pd.read_csv("dev-0/in.tsv", header=None, sep='\t')
data_val_y = pd.read_csv("dev-0/expected.tsv", header=None)
data_val_X.replace({2: brand_codes}, inplace=True)
data_val_X.replace({3: fuel_codes}, inplace=True)
#print(data_val_X[2].value_counts())
#print(data_val_X[3].value_counts())
X_val = data_val_X[[0,1,2,3]]
#print(data_val_y)
reg = LinearRegression().fit(X_train, y_train)
print(reg.score(X_train, y_train))
print(reg.score(X_val, data_val_y))
print(mean_squared_error(data_val_y, reg.predict(X_val), squared=False))
file = open('dev-0/out.tsv',"w")
for index, row in X_val.iterrows():
#print(np.reshape(row.to_numpy(),(-1,1)))
y_pred = reg.predict(np.reshape(row.to_numpy(),(1,-1)))
# print(y_pred)
file.writelines("{}\n".format(y_pred[0]))
# if index==10:
# break
file.close()
data_test_X = pd.read_csv("test-A/in.tsv", header=None, sep='\t')
data_test_X.replace({2: brand_codes}, inplace=True)
data_test_X.replace({3: fuel_codes}, inplace=True)
data_test_X.replace({2: {"Fabrycznie": 90}}, inplace=True)
# print(data_test_X[2].value_counts())
# print(data_test_X[3].value_counts())
X_test = data_test_X[[0,1,2,3]]
#print(X_test)
file = open('test-A/out.tsv',"w")
for index, row in X_test.iterrows():
#print(np.reshape(row.to_numpy(),(-1,1)))
y_pred = reg.predict(np.reshape(row.to_numpy(),(1,-1)))
# print(y_pred)
file.writelines("{}\n".format(y_pred[0]))
# if index==10:
# break
file.close()
# #Normalizacja danych
# flcols = data_val_X[[4]].columns
# x = data_val_X[[4]].values
# # min_max_scaler = preprocessing.MinMaxScaler()
# max_abs_scaler = preprocessing.MaxAbsScaler()
# # x_scaled = min_max_scaler.fit_transform(x)
# x_scaled = max_abs_scaler.fit_transform(x)
# normcols = pd.DataFrame(x_scaled, columns=flcols)
# for col in flcols:
# data_val_X[col] = normcols[col]
# #Normalizacja danych
# flcols = data_val_y[[0]].columns
# x = data_val_y[[0]].values
# # min_max_scaler = preprocessing.MinMaxScaler()
# max_abs_scaler = preprocessing.MaxAbsScaler()
# # x_scaled = min_max_scaler.fit_transform(x)
# x_scaled = max_abs_scaler.fit_transform(x)
# normcols = pd.DataFrame(x_scaled, columns=flcols)
# for col in flcols:
# data_val_y[col] = normcols[col]

1000
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff