Zrobione zadanie 07.
This commit is contained in:
parent
5c4bb10ddf
commit
e41aa574d8
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
109
rozwiazanie.py
Normal file
109
rozwiazanie.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
|
data_train = pd.read_csv("train/train.tsv", header=None, sep='\t')
|
||||||
|
#print(data_train[[1,2,5]])
|
||||||
|
data_train[3] = data_train[3].astype('category')
|
||||||
|
data_train[4] = data_train[4].astype('category')
|
||||||
|
brand_codest = dict(enumerate(data_train[3].cat.categories))
|
||||||
|
brand_codes = {y:x for x,y in brand_codest.items()}
|
||||||
|
#data_train[3].map(data_train_codes)
|
||||||
|
data_train.replace({3: brand_codes}, inplace=True)
|
||||||
|
#print(brand_codes)
|
||||||
|
#print(data_train[3])
|
||||||
|
fuel_codest = dict(enumerate(data_train[4].cat.categories))
|
||||||
|
fuel_codes = {y:x for x,y in fuel_codest.items()}
|
||||||
|
#data_train[3].map(data_train_codes)
|
||||||
|
data_train.replace({4: fuel_codes}, inplace=True)
|
||||||
|
#print(fuel_codes)
|
||||||
|
#print(data_train[4])
|
||||||
|
# #Normalizacja danych
|
||||||
|
# flcols = data_train[[0, 1, 2]].columns
|
||||||
|
# x = data_train[[0, 1, 2]].values
|
||||||
|
# # min_max_scaler = preprocessing.MinMaxScaler()
|
||||||
|
# max_abs_scaler = preprocessing.MaxAbsScaler()
|
||||||
|
# # x_scaled = min_max_scaler.fit_transform(x)
|
||||||
|
# x_scaled = max_abs_scaler.fit_transform(x)
|
||||||
|
# normcols = pd.DataFrame(x_scaled, columns=flcols)
|
||||||
|
# for col in flcols:
|
||||||
|
# data_train[col] = normcols[col]
|
||||||
|
|
||||||
|
X_train = data_train[[1,2,3,4]]
|
||||||
|
y_train = data_train[data_train.columns[0]]
|
||||||
|
# print(X_train[3].value_counts())
|
||||||
|
# print(X_train[4].value_counts())
|
||||||
|
#print(X_train)
|
||||||
|
|
||||||
|
# train_columns = data_train.columns[5]
|
||||||
|
# text_columns = [3,4]
|
||||||
|
# data_train_dummy = pd.get_dummies(data_train[columns])
|
||||||
|
# print(len(data_train_dummy.columns))
|
||||||
|
data_val_X = pd.read_csv("dev-0/in.tsv", header=None, sep='\t')
|
||||||
|
data_val_y = pd.read_csv("dev-0/expected.tsv", header=None)
|
||||||
|
|
||||||
|
data_val_X.replace({2: brand_codes}, inplace=True)
|
||||||
|
data_val_X.replace({3: fuel_codes}, inplace=True)
|
||||||
|
#print(data_val_X[2].value_counts())
|
||||||
|
#print(data_val_X[3].value_counts())
|
||||||
|
X_val = data_val_X[[0,1,2,3]]
|
||||||
|
#print(data_val_y)
|
||||||
|
reg = LinearRegression().fit(X_train, y_train)
|
||||||
|
print(reg.score(X_train, y_train))
|
||||||
|
print(reg.score(X_val, data_val_y))
|
||||||
|
print(mean_squared_error(data_val_y, reg.predict(X_val), squared=False))
|
||||||
|
|
||||||
|
file = open('dev-0/out.tsv',"w")
|
||||||
|
for index, row in X_val.iterrows():
|
||||||
|
#print(np.reshape(row.to_numpy(),(-1,1)))
|
||||||
|
y_pred = reg.predict(np.reshape(row.to_numpy(),(1,-1)))
|
||||||
|
# print(y_pred)
|
||||||
|
file.writelines("{}\n".format(y_pred[0]))
|
||||||
|
# if index==10:
|
||||||
|
# break
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
|
data_test_X = pd.read_csv("test-A/in.tsv", header=None, sep='\t')
|
||||||
|
data_test_X.replace({2: brand_codes}, inplace=True)
|
||||||
|
data_test_X.replace({3: fuel_codes}, inplace=True)
|
||||||
|
data_test_X.replace({2: {"Fabrycznie": 90}}, inplace=True)
|
||||||
|
# print(data_test_X[2].value_counts())
|
||||||
|
# print(data_test_X[3].value_counts())
|
||||||
|
X_test = data_test_X[[0,1,2,3]]
|
||||||
|
#print(X_test)
|
||||||
|
|
||||||
|
file = open('test-A/out.tsv',"w")
|
||||||
|
for index, row in X_test.iterrows():
|
||||||
|
#print(np.reshape(row.to_numpy(),(-1,1)))
|
||||||
|
y_pred = reg.predict(np.reshape(row.to_numpy(),(1,-1)))
|
||||||
|
# print(y_pred)
|
||||||
|
file.writelines("{}\n".format(y_pred[0]))
|
||||||
|
# if index==10:
|
||||||
|
# break
|
||||||
|
file.close()
|
||||||
|
# #Normalizacja danych
|
||||||
|
# flcols = data_val_X[[4]].columns
|
||||||
|
# x = data_val_X[[4]].values
|
||||||
|
# # min_max_scaler = preprocessing.MinMaxScaler()
|
||||||
|
# max_abs_scaler = preprocessing.MaxAbsScaler()
|
||||||
|
# # x_scaled = min_max_scaler.fit_transform(x)
|
||||||
|
# x_scaled = max_abs_scaler.fit_transform(x)
|
||||||
|
# normcols = pd.DataFrame(x_scaled, columns=flcols)
|
||||||
|
# for col in flcols:
|
||||||
|
# data_val_X[col] = normcols[col]
|
||||||
|
# #Normalizacja danych
|
||||||
|
# flcols = data_val_y[[0]].columns
|
||||||
|
# x = data_val_y[[0]].values
|
||||||
|
# # min_max_scaler = preprocessing.MinMaxScaler()
|
||||||
|
# max_abs_scaler = preprocessing.MaxAbsScaler()
|
||||||
|
# # x_scaled = min_max_scaler.fit_transform(x)
|
||||||
|
# x_scaled = max_abs_scaler.fit_transform(x)
|
||||||
|
# normcols = pd.DataFrame(x_scaled, columns=flcols)
|
||||||
|
# for col in flcols:
|
||||||
|
# data_val_y[col] = normcols[col]
|
||||||
|
|
||||||
|
|
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user