forked from kubapok/auta-public
Compare commits
No commits in common. "master" and "master" have entirely different histories.
1000
dev-0/out.tsv
1000
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
109
rozwiazanie.py
109
rozwiazanie.py
@ -1,109 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from sklearn.linear_model import LinearRegression
|
|
||||||
from sklearn import preprocessing
|
|
||||||
from sklearn.metrics import mean_squared_error
|
|
||||||
|
|
||||||
data_train = pd.read_csv("train/train.tsv", header=None, sep='\t')
|
|
||||||
#print(data_train[[1,2,5]])
|
|
||||||
data_train[3] = data_train[3].astype('category')
|
|
||||||
data_train[4] = data_train[4].astype('category')
|
|
||||||
brand_codest = dict(enumerate(data_train[3].cat.categories))
|
|
||||||
brand_codes = {y:x for x,y in brand_codest.items()}
|
|
||||||
#data_train[3].map(data_train_codes)
|
|
||||||
data_train.replace({3: brand_codes}, inplace=True)
|
|
||||||
#print(brand_codes)
|
|
||||||
#print(data_train[3])
|
|
||||||
fuel_codest = dict(enumerate(data_train[4].cat.categories))
|
|
||||||
fuel_codes = {y:x for x,y in fuel_codest.items()}
|
|
||||||
#data_train[3].map(data_train_codes)
|
|
||||||
data_train.replace({4: fuel_codes}, inplace=True)
|
|
||||||
#print(fuel_codes)
|
|
||||||
#print(data_train[4])
|
|
||||||
# #Normalizacja danych
|
|
||||||
# flcols = data_train[[0, 1, 2]].columns
|
|
||||||
# x = data_train[[0, 1, 2]].values
|
|
||||||
# # min_max_scaler = preprocessing.MinMaxScaler()
|
|
||||||
# max_abs_scaler = preprocessing.MaxAbsScaler()
|
|
||||||
# # x_scaled = min_max_scaler.fit_transform(x)
|
|
||||||
# x_scaled = max_abs_scaler.fit_transform(x)
|
|
||||||
# normcols = pd.DataFrame(x_scaled, columns=flcols)
|
|
||||||
# for col in flcols:
|
|
||||||
# data_train[col] = normcols[col]
|
|
||||||
|
|
||||||
X_train = data_train[[1,2,3,4]]
|
|
||||||
y_train = data_train[data_train.columns[0]]
|
|
||||||
# print(X_train[3].value_counts())
|
|
||||||
# print(X_train[4].value_counts())
|
|
||||||
#print(X_train)
|
|
||||||
|
|
||||||
# train_columns = data_train.columns[5]
|
|
||||||
# text_columns = [3,4]
|
|
||||||
# data_train_dummy = pd.get_dummies(data_train[columns])
|
|
||||||
# print(len(data_train_dummy.columns))
|
|
||||||
data_val_X = pd.read_csv("dev-0/in.tsv", header=None, sep='\t')
|
|
||||||
data_val_y = pd.read_csv("dev-0/expected.tsv", header=None)
|
|
||||||
|
|
||||||
data_val_X.replace({2: brand_codes}, inplace=True)
|
|
||||||
data_val_X.replace({3: fuel_codes}, inplace=True)
|
|
||||||
#print(data_val_X[2].value_counts())
|
|
||||||
#print(data_val_X[3].value_counts())
|
|
||||||
X_val = data_val_X[[0,1,2,3]]
|
|
||||||
#print(data_val_y)
|
|
||||||
reg = LinearRegression().fit(X_train, y_train)
|
|
||||||
print(reg.score(X_train, y_train))
|
|
||||||
print(reg.score(X_val, data_val_y))
|
|
||||||
print(mean_squared_error(data_val_y, reg.predict(X_val), squared=False))
|
|
||||||
|
|
||||||
file = open('dev-0/out.tsv',"w")
|
|
||||||
for index, row in X_val.iterrows():
|
|
||||||
#print(np.reshape(row.to_numpy(),(-1,1)))
|
|
||||||
y_pred = reg.predict(np.reshape(row.to_numpy(),(1,-1)))
|
|
||||||
# print(y_pred)
|
|
||||||
file.writelines("{}\n".format(y_pred[0]))
|
|
||||||
# if index==10:
|
|
||||||
# break
|
|
||||||
file.close()
|
|
||||||
|
|
||||||
|
|
||||||
data_test_X = pd.read_csv("test-A/in.tsv", header=None, sep='\t')
|
|
||||||
data_test_X.replace({2: brand_codes}, inplace=True)
|
|
||||||
data_test_X.replace({3: fuel_codes}, inplace=True)
|
|
||||||
data_test_X.replace({2: {"Fabrycznie": 90}}, inplace=True)
|
|
||||||
# print(data_test_X[2].value_counts())
|
|
||||||
# print(data_test_X[3].value_counts())
|
|
||||||
X_test = data_test_X[[0,1,2,3]]
|
|
||||||
#print(X_test)
|
|
||||||
|
|
||||||
file = open('test-A/out.tsv',"w")
|
|
||||||
for index, row in X_test.iterrows():
|
|
||||||
#print(np.reshape(row.to_numpy(),(-1,1)))
|
|
||||||
y_pred = reg.predict(np.reshape(row.to_numpy(),(1,-1)))
|
|
||||||
# print(y_pred)
|
|
||||||
file.writelines("{}\n".format(y_pred[0]))
|
|
||||||
# if index==10:
|
|
||||||
# break
|
|
||||||
file.close()
|
|
||||||
# #Normalizacja danych
|
|
||||||
# flcols = data_val_X[[4]].columns
|
|
||||||
# x = data_val_X[[4]].values
|
|
||||||
# # min_max_scaler = preprocessing.MinMaxScaler()
|
|
||||||
# max_abs_scaler = preprocessing.MaxAbsScaler()
|
|
||||||
# # x_scaled = min_max_scaler.fit_transform(x)
|
|
||||||
# x_scaled = max_abs_scaler.fit_transform(x)
|
|
||||||
# normcols = pd.DataFrame(x_scaled, columns=flcols)
|
|
||||||
# for col in flcols:
|
|
||||||
# data_val_X[col] = normcols[col]
|
|
||||||
# #Normalizacja danych
|
|
||||||
# flcols = data_val_y[[0]].columns
|
|
||||||
# x = data_val_y[[0]].values
|
|
||||||
# # min_max_scaler = preprocessing.MinMaxScaler()
|
|
||||||
# max_abs_scaler = preprocessing.MaxAbsScaler()
|
|
||||||
# # x_scaled = min_max_scaler.fit_transform(x)
|
|
||||||
# x_scaled = max_abs_scaler.fit_transform(x)
|
|
||||||
# normcols = pd.DataFrame(x_scaled, columns=flcols)
|
|
||||||
# for col in flcols:
|
|
||||||
# data_val_y[col] = normcols[col]
|
|
||||||
|
|
||||||
|
|
1000
test-A/out.tsv
1000
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user