upload files
This commit is contained in:
commit
83ba22cb18
438558
.ipynb_checkpoints/application_record-checkpoint.csv
Normal file
438558
.ipynb_checkpoints/application_record-checkpoint.csv
Normal file
File diff suppressed because it is too large
Load Diff
647
.ipynb_checkpoints/run-checkpoint.ipynb
Normal file
647
.ipynb_checkpoints/run-checkpoint.ipynb
Normal file
@ -0,0 +1,647 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eec59090",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import potrzebnych bibliotek"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ed6c43d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import sklearn\n",
|
||||
"\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"\n",
|
||||
"from sklearn.linear_model import SGDClassifier\n",
|
||||
"\n",
|
||||
"from sklearn.naive_bayes import GaussianNB\n",
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"\n",
|
||||
"from sklearn.metrics import precision_recall_fscore_support\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"import torch\n",
|
||||
"from torch import Tensor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fcd702cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Preprocessing danych"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "46133040",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def preprocess(data):\n",
|
||||
" #rename columns\n",
|
||||
" data.rename(columns = {'CODE_GENDER':'if_man', 'FLAG_OWN_CAR':'if_own_car', 'FLAG_OWN_REALTY':'if_own_realty', 'CNT_CHILDREN':'cnt_children', \n",
|
||||
" 'AMT_INCOME_TOTAL':'amt_income', 'NAME_EDUCATION_TYPE':'name_edu_type', 'CNT_FAM_MEMBERS':'cnt_fam_members',\n",
|
||||
" 'NAME_INCOME_TYPE':'name_income_type', 'NAME_FAMILY_STATUS':'name_fam_status'}, inplace = True)\n",
|
||||
" \n",
|
||||
" #replace data\n",
|
||||
" data['if_man'] = data['if_man'].apply(lambda x: 1 if x=='M' else 0)\n",
|
||||
" data['if_own_car'] = data['if_own_car'].apply(lambda x: 1 if x=='Y' else 0)\n",
|
||||
" data['if_own_realty'] = data['if_own_realty'].apply(lambda x: 1 if x=='Y' else 0)\n",
|
||||
" data['cnt_children'] = data['cnt_children'].apply(pd.to_numeric, errors='coerce')\n",
|
||||
" data['cnt_fam_members'] = data['cnt_fam_members'].apply(pd.to_numeric, errors='coerce')\n",
|
||||
" data['cnt_children'] = data['cnt_children'].apply(lambda x: np.NaN if x > 5 else x)\n",
|
||||
" data['cnt_fam_members'] = data['cnt_fam_members'].apply(lambda x: np.NaN if x > 8 else x)\n",
|
||||
" \n",
|
||||
" #get dummies\n",
|
||||
" data = pd.get_dummies(data, columns=['name_income_type'])\n",
|
||||
" data = pd.get_dummies(data, columns=['name_fam_status'])\n",
|
||||
" \n",
|
||||
" #dropna\n",
|
||||
" print(\"Length of dataset before dropna: \" + str(len(data)))\n",
|
||||
" data = data.dropna()\n",
|
||||
" print(\"Length of dataset after dropna: \" + str(len(data)))\n",
|
||||
" \n",
|
||||
" return data\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db8f4662",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Podział na zbiór trenujący i testowy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "e13ef021",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def split(data):\n",
|
||||
" split_point = int(0.8 * len(data))\n",
|
||||
" data_train = data[:split_point]\n",
|
||||
" data_test = data[split_point:]\n",
|
||||
" print(\"Length of whole dataset: \" + str(len(data)))\n",
|
||||
" print(\"Length of train dataset: \" + str(len(data_train)))\n",
|
||||
" print(\"Length of test dataset: \" + str(len(data_test)))\n",
|
||||
" return data_train, data_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c65e3f22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Ewaluacja"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "374bc36c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def evaluation(y_expected, y_predicted):\n",
|
||||
" precision, recall, fscore, support = precision_recall_fscore_support(y_expected, y_predicted, average=\"weighted\")\n",
|
||||
" accuracy = accuracy_score(y_expected, y_predicted)\n",
|
||||
" print(f\"Accuracy: {accuracy}\")\n",
|
||||
" print(f\"Precision: {precision}\")\n",
|
||||
" print(f\"Recall: {recall}\")\n",
|
||||
" print(f\"F-score: {fscore}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a671fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Wczytanie danych z pliku"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4a42cd28",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"alldata = pd.read_csv('application_record.csv', header=0, sep=',',\n",
|
||||
" usecols=['CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'NAME_EDUCATION_TYPE', 'CNT_FAM_MEMBERS', 'NAME_INCOME_TYPE', 'NAME_FAMILY_STATUS'])\n",
|
||||
"# print(alldata[:5])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a384b9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Wybór cech do trenowania"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c69232b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FEATURES = [\n",
|
||||
" 'if_man', \n",
|
||||
" 'if_own_car', \n",
|
||||
" 'if_own_realty', \n",
|
||||
" 'cnt_children', \n",
|
||||
" 'amt_income', \n",
|
||||
" 'cnt_fam_members', \n",
|
||||
" 'name_income_type_Commercial associate', \n",
|
||||
" 'name_income_type_Pensioner', \n",
|
||||
" 'name_income_type_State servant', \n",
|
||||
" 'name_income_type_Student', \n",
|
||||
" 'name_income_type_Working',\n",
|
||||
" 'name_fam_status_Civil marriage',\n",
|
||||
" 'name_fam_status_Married',\n",
|
||||
" 'name_fam_status_Separated',\n",
|
||||
" 'name_fam_status_Single / not married',\n",
|
||||
" 'name_fam_status_Widow'\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d146c21a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(pd.unique(alldata['if_man']))\n",
|
||||
"print(pd.unique(alldata['if_own_car']))\n",
|
||||
"print(pd.unique(alldata['if_own_realty']))\n",
|
||||
"print(pd.unique(alldata['cnt_children']))\n",
|
||||
"# print(pd.unique(alldata['name_income_type']))\n",
|
||||
"print(pd.unique(alldata['name_edu_type']))\n",
|
||||
"# print(pd.unique(alldata['name_fam_status']))\n",
|
||||
"print(pd.unique(alldata['cnt_fam_members']))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "144b9e70",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Przygotowanie danych"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2ca32941",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Length of dataset before dropna: 438557\n",
|
||||
"Length of dataset after dropna: 438531\n",
|
||||
"Length of whole dataset: 438531\n",
|
||||
"Length of train dataset: 350824\n",
|
||||
"Length of test dataset: 87707\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"alldata = preprocess(alldata)\n",
|
||||
"data_train, data_test = split(alldata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "f292f593",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_train = pd.DataFrame(data_train['name_edu_type'])\n",
|
||||
"x_train = pd.DataFrame(data_train[FEATURES])\n",
|
||||
"scaler = StandardScaler().fit(x_train)\n",
|
||||
"x_train = scaler.transform(x_train)\n",
|
||||
"x_test = pd.DataFrame(data_test[FEATURES])\n",
|
||||
"x_test = scaler.transform(x_test)\n",
|
||||
"y_expected = pd.DataFrame(data_test['name_edu_type'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3af424e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Regresja logistyczna"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "a77aa29f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
|
||||
" return f(*args, **kwargs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy: 0.7031023749529685\n",
|
||||
"Precision: 0.6408866393822401\n",
|
||||
"Recall: 0.7031023749529685\n",
|
||||
"F-score: 0.6268976358430636\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_logreg = LogisticRegression(max_iter=1000) \n",
|
||||
"model_logreg.fit(x_train, y_train)\n",
|
||||
"\n",
|
||||
"y_predicted_logreg = model_logreg.predict(x_test) \n",
|
||||
"\n",
|
||||
"evaluation(y_expected, y_predicted_logreg)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e05aa676",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SGD"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "31910f4a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
|
||||
" return f(*args, **kwargs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy: 0.6921340371920143\n",
|
||||
"Precision: 0.7333126153525842\n",
|
||||
"Recall: 0.6921340371920143\n",
|
||||
"F-score: 0.5666044690008586\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_sgd = SGDClassifier() \n",
|
||||
"model_sgd.fit(x_train, y_train)\n",
|
||||
"\n",
|
||||
"y_predicted_sgd = model_sgd.predict(x_test) \n",
|
||||
"\n",
|
||||
"evaluation(y_expected, y_predicted_sgd)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "037c6132",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Gaussian Naive Bayes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b71d276a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# model_gnb = GaussianNB() \n",
|
||||
"# model_gnb.fit(x_train, y_train)\n",
|
||||
"\n",
|
||||
"# y_predicted_sgd = model_gnb.predict(x_test) \n",
|
||||
"\n",
|
||||
"# evaluation(y_expected, y_predicted_sgd)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f41b9f3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## PyTorch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a4b83c7c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Przygotowanie danych"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "a578ed9d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'Higher education': 0,\n",
|
||||
" 'Secondary / secondary special': 1,\n",
|
||||
" 'Incomplete higher': 2,\n",
|
||||
" 'Lower secondary': 3,\n",
|
||||
" 'Academic degree': 4}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_numpy = alldata.drop(\"name_edu_type\", axis=1).values\n",
|
||||
"X_numpy = scaler.transform(X_numpy)\n",
|
||||
"target_map = {\n",
|
||||
" val: index for index, val in enumerate(alldata.name_edu_type.unique())\n",
|
||||
"}\n",
|
||||
"y_numpy = alldata.name_edu_type.map(target_map).values\n",
|
||||
"X = torch.tensor(X_numpy, dtype=torch.float32)\n",
|
||||
"y = torch.tensor(y_numpy)\n",
|
||||
"\n",
|
||||
"target_map"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72d58f4d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### One hot vectors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "8521b29d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def one_hot_encode(vector):\n",
|
||||
" n_classes = len(vector.unique())\n",
|
||||
" one_hot = torch.zeros((vector.shape[0], n_classes))\\\n",
|
||||
" .type(torch.LongTensor)\n",
|
||||
" return one_hot\\\n",
|
||||
" .scatter(1, vector.type(torch.LongTensor).unsqueeze(1), 1)\n",
|
||||
"\n",
|
||||
"y_one_hot = one_hot_encode(y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "2adc5f1d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"438531\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"random_indices = torch.randperm(X.shape[0])\n",
|
||||
"print(X.shape[0])\n",
|
||||
"n_train = int(0.8 * X.shape[0])\n",
|
||||
"X_train = X[random_indices[:n_train]]\n",
|
||||
"y_train = y[random_indices[:n_train]]\n",
|
||||
"y_train_one_hot = y_one_hot[random_indices[:n_train]]\n",
|
||||
"\n",
|
||||
"X_test = X[random_indices[n_train:]]\n",
|
||||
"y_test = y[random_indices[n_train:]]\n",
|
||||
"y_test_one_hot = y_one_hot[random_indices[n_train:]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2992b275",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "a0820cfa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_pytorch = torch.nn.Sequential(\n",
|
||||
" torch.nn.Linear(16, 5)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7d177d27",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Optymalizator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "1858b5d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"learning_rate = 0.1\n",
|
||||
"lambda_param = 0.01\n",
|
||||
"optimizer = torch.optim.SGD(\n",
|
||||
" model_pytorch.parameters(), \n",
|
||||
" lr=learning_rate, \n",
|
||||
" weight_decay=lambda_param\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ca95791b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Funkcja straty"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "16f5c2d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loss_function = torch.nn.CrossEntropyLoss()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ebe3feff",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Trenowanie"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "9b2115e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loss at iteration 1: 1.6335363388061523\n",
|
||||
"Loss at iteration 100: 0.8125142455101013\n",
|
||||
"Loss at iteration 200: 0.7701064944267273\n",
|
||||
"Loss at iteration 300: 0.75752854347229\n",
|
||||
"Loss at iteration 400: 0.7520564198493958\n",
|
||||
"Loss at iteration 500: 0.7492005228996277\n",
|
||||
"Loss at iteration 600: 0.7475774884223938\n",
|
||||
"Loss at iteration 700: 0.7465949058532715\n",
|
||||
"Loss at iteration 800: 0.7459684014320374\n",
|
||||
"Loss at iteration 900: 0.7455567717552185\n",
|
||||
"Loss at iteration 1000: 0.7452787160873413\n",
|
||||
"\n",
|
||||
"Final Test Accuracy: 0.7014605447683765\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"n_iterations = 1000\n",
|
||||
"for i in range(1, n_iterations + 1):\n",
|
||||
" Z = model_pytorch(X_train) # 1\n",
|
||||
" loss = loss_function(Z, y_train) # 2\n",
|
||||
" optimizer.zero_grad() # 3\n",
|
||||
" loss.backward() # 4\n",
|
||||
" optimizer.step() # 5\n",
|
||||
" \n",
|
||||
" if i == 1 or i % 100 == 0:\n",
|
||||
" print(\"Loss at iteration {}: {}\".format(i, loss))\n",
|
||||
"\n",
|
||||
"test_predictions = torch.argmax(\n",
|
||||
" torch.softmax(model_pytorch(X_test), 1), axis=1 # 6\n",
|
||||
")\n",
|
||||
"test_accuracy = float(sum(test_predictions == y_test)) / y_test.shape[0]\n",
|
||||
"print(\"\\nFinal Test Accuracy: {}\".format(test_accuracy))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c91a0446",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Ewaluacja"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "d394a74e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy: 0.7014605447683765\n",
|
||||
"Precision: 0.6413920853293256\n",
|
||||
"Recall: 0.7014605447683765\n",
|
||||
"F-score: 0.6264027678306182\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluation(y_test, test_predictions)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
BIN
Raport_478839.docx
Normal file
BIN
Raport_478839.docx
Normal file
Binary file not shown.
BIN
Raport_478839.pdf
Normal file
BIN
Raport_478839.pdf
Normal file
Binary file not shown.
438558
application_record.csv
Normal file
438558
application_record.csv
Normal file
File diff suppressed because it is too large
Load Diff
598
run.ipynb
Normal file
598
run.ipynb
Normal file
@ -0,0 +1,598 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "96b802c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import potrzebnych bibliotek"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "b583839d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import sklearn\n",
|
||||
"\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"\n",
|
||||
"from sklearn.linear_model import SGDClassifier\n",
|
||||
"\n",
|
||||
"from sklearn.metrics import precision_recall_fscore_support\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"import torch\n",
|
||||
"from torch import Tensor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13c9a468",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Preprocessing danych"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c0fe042f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def preprocess(data):\n",
|
||||
" #rename columns\n",
|
||||
" data.rename(columns = {'CODE_GENDER':'if_man', 'FLAG_OWN_CAR':'if_own_car', 'FLAG_OWN_REALTY':'if_own_realty', 'CNT_CHILDREN':'cnt_children', \n",
|
||||
" 'AMT_INCOME_TOTAL':'amt_income', 'NAME_EDUCATION_TYPE':'name_edu_type', 'CNT_FAM_MEMBERS':'cnt_fam_members',\n",
|
||||
" 'NAME_INCOME_TYPE':'name_income_type', 'NAME_FAMILY_STATUS':'name_fam_status'}, inplace = True)\n",
|
||||
" \n",
|
||||
" #replace data\n",
|
||||
" data['if_man'] = data['if_man'].apply(lambda x: 1 if x=='M' else 0)\n",
|
||||
" data['if_own_car'] = data['if_own_car'].apply(lambda x: 1 if x=='Y' else 0)\n",
|
||||
" data['if_own_realty'] = data['if_own_realty'].apply(lambda x: 1 if x=='Y' else 0)\n",
|
||||
" data['cnt_children'] = data['cnt_children'].apply(pd.to_numeric, errors='coerce')\n",
|
||||
" data['cnt_fam_members'] = data['cnt_fam_members'].apply(pd.to_numeric, errors='coerce')\n",
|
||||
" data['cnt_children'] = data['cnt_children'].apply(lambda x: np.NaN if x > 4 else x)\n",
|
||||
" data['cnt_fam_members'] = data['cnt_fam_members'].apply(lambda x: np.NaN if x > 7 else x)\n",
|
||||
" \n",
|
||||
" #get dummies\n",
|
||||
" data = pd.get_dummies(data, columns=['name_income_type'])\n",
|
||||
" data = pd.get_dummies(data, columns=['name_fam_status'])\n",
|
||||
" \n",
|
||||
" #dropna\n",
|
||||
" print(\"Length of dataset before dropna: \" + str(len(data)))\n",
|
||||
" data = data.dropna()\n",
|
||||
" print(\"Length of dataset after dropna: \" + str(len(data)))\n",
|
||||
" \n",
|
||||
" return data\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "327f6fd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Podział na zbiór trenujący i testowy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "bfbc1c3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def split(data):\n",
|
||||
" split_point = int(0.8 * len(data))\n",
|
||||
" data_train = data[:split_point]\n",
|
||||
" data_test = data[split_point:]\n",
|
||||
" print(\"Length of whole dataset: \" + str(len(data)))\n",
|
||||
" print(\"Length of train dataset: \" + str(len(data_train)))\n",
|
||||
" print(\"Length of test dataset: \" + str(len(data_test)))\n",
|
||||
" return data_train, data_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a121894e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Ewaluacja"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8ae290e7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def evaluation(y_expected, y_predicted):\n",
|
||||
" precision, recall, fscore, support = precision_recall_fscore_support(y_expected, y_predicted, average=\"weighted\")\n",
|
||||
" accuracy = accuracy_score(y_expected, y_predicted)\n",
|
||||
" print(f\"Accuracy: {accuracy}\")\n",
|
||||
" print(f\"Precision: {precision}\")\n",
|
||||
" print(f\"Recall: {recall}\")\n",
|
||||
" print(f\"F-score: {fscore}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a22769b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Wczytanie danych z pliku"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "01dc11ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"alldata = pd.read_csv('application_record.csv', header=0, sep=',',\n",
|
||||
" usecols=['CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'NAME_EDUCATION_TYPE', 'CNT_FAM_MEMBERS', 'NAME_INCOME_TYPE', 'NAME_FAMILY_STATUS'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1172b205",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Wybór cech do trenowania"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c7c81076",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FEATURES = [\n",
|
||||
" 'if_man', \n",
|
||||
" 'if_own_car', \n",
|
||||
" 'if_own_realty', \n",
|
||||
" 'cnt_children', \n",
|
||||
" 'amt_income', \n",
|
||||
" 'cnt_fam_members', \n",
|
||||
" 'name_income_type_Commercial associate', \n",
|
||||
" 'name_income_type_Pensioner', \n",
|
||||
" 'name_income_type_State servant', \n",
|
||||
" 'name_income_type_Student', \n",
|
||||
" 'name_income_type_Working',\n",
|
||||
" 'name_fam_status_Civil marriage',\n",
|
||||
" 'name_fam_status_Married',\n",
|
||||
" 'name_fam_status_Separated',\n",
|
||||
" 'name_fam_status_Single / not married',\n",
|
||||
" 'name_fam_status_Widow'\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e076ccd3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Przygotowanie danych"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "755eb6cc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Length of dataset before dropna: 438557\n",
|
||||
"Length of dataset after dropna: 438398\n",
|
||||
"Length of whole dataset: 438398\n",
|
||||
"Length of train dataset: 350718\n",
|
||||
"Length of test dataset: 87680\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"alldata = preprocess(alldata)\n",
|
||||
"data_train, data_test = split(alldata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "4a8e48d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_train = pd.DataFrame(data_train['name_edu_type'])\n",
|
||||
"x_train = pd.DataFrame(data_train[FEATURES])\n",
|
||||
"scaler = StandardScaler().fit(x_train)\n",
|
||||
"x_train = scaler.transform(x_train)\n",
|
||||
"x_test = pd.DataFrame(data_test[FEATURES])\n",
|
||||
"x_test = scaler.transform(x_test)\n",
|
||||
"y_expected = pd.DataFrame(data_test['name_edu_type'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f47c7fb3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Regresja logistyczna"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "144c5dad",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
|
||||
" return f(*args, **kwargs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy: 0.7029995437956205\n",
|
||||
"Precision: 0.640600346050133\n",
|
||||
"Recall: 0.7029995437956205\n",
|
||||
"F-score: 0.6268503647241781\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_logreg = LogisticRegression(max_iter=1000, penalty = 'l2') \n",
|
||||
"model_logreg.fit(x_train, y_train)\n",
|
||||
"\n",
|
||||
"y_predicted_logreg = model_logreg.predict(x_test) \n",
|
||||
"\n",
|
||||
"evaluation(y_expected, y_predicted_logreg)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "07b5fe4e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SGD"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "4ac92414",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
|
||||
" return f(*args, **kwargs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy: 0.6918681569343066\n",
|
||||
"Precision: 0.7408811892540971\n",
|
||||
"Recall: 0.6918681569343066\n",
|
||||
"F-score: 0.565943959233223\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_sgd = SGDClassifier() \n",
|
||||
"model_sgd.fit(x_train, y_train)\n",
|
||||
"\n",
|
||||
"y_predicted_sgd = model_sgd.predict(x_test) \n",
|
||||
"\n",
|
||||
"evaluation(y_expected, y_predicted_sgd)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "21b47e65",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## PyTorch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "372f817b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Przygotowanie danych"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2931ff2c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'Higher education': 0,\n",
|
||||
" 'Secondary / secondary special': 1,\n",
|
||||
" 'Incomplete higher': 2,\n",
|
||||
" 'Lower secondary': 3,\n",
|
||||
" 'Academic degree': 4}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_numpy = alldata.drop(\"name_edu_type\", axis=1).values\n",
|
||||
"X_numpy = scaler.transform(X_numpy)\n",
|
||||
"target_map = {\n",
|
||||
" val: index for index, val in enumerate(alldata.name_edu_type.unique())\n",
|
||||
"}\n",
|
||||
"y_numpy = alldata.name_edu_type.map(target_map).values\n",
|
||||
"X = torch.tensor(X_numpy, dtype=torch.float32)\n",
|
||||
"y = torch.tensor(y_numpy)\n",
|
||||
"\n",
|
||||
"target_map"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c01de74e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### One hot vectors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "59509f52",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def one_hot_encode(vector):\n",
|
||||
" n_classes = len(vector.unique())\n",
|
||||
" one_hot = torch.zeros((vector.shape[0], n_classes))\\\n",
|
||||
" .type(torch.LongTensor)\n",
|
||||
" return one_hot\\\n",
|
||||
" .scatter(1, vector.type(torch.LongTensor).unsqueeze(1), 1)\n",
|
||||
"\n",
|
||||
"y_one_hot = one_hot_encode(y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "35f7166f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"438398\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"random_indices = torch.randperm(X.shape[0])\n",
|
||||
"print(X.shape[0])\n",
|
||||
"n_train = int(0.8 * X.shape[0])\n",
|
||||
"X_train = X[random_indices[:n_train]]\n",
|
||||
"y_train = y[random_indices[:n_train]]\n",
|
||||
"y_train_one_hot = y_one_hot[random_indices[:n_train]]\n",
|
||||
"\n",
|
||||
"X_test = X[random_indices[n_train:]]\n",
|
||||
"y_test = y[random_indices[n_train:]]\n",
|
||||
"y_test_one_hot = y_one_hot[random_indices[n_train:]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a52ed69f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "af6977f6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_pytorch = torch.nn.Sequential(\n",
|
||||
" torch.nn.Linear(16, 5)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "33e22bf7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Optymalizator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "7d9ad96d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"learning_rate = 0.1\n",
|
||||
"lambda_param = 0.01\n",
|
||||
"optimizer = torch.optim.Adam(\n",
|
||||
" model_pytorch.parameters(), \n",
|
||||
" lr=learning_rate, \n",
|
||||
" weight_decay=lambda_param\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b170f06f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Funkcja straty"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "e971efdf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loss_function = torch.nn.CrossEntropyLoss()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5a30335a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Trenowanie"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "dc3864a1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loss at iteration 1: 1.6760406494140625\n",
|
||||
"Loss at iteration 100: 0.7446303963661194\n",
|
||||
"Loss at iteration 200: 0.7443369030952454\n",
|
||||
"Loss at iteration 300: 0.7443307638168335\n",
|
||||
"Loss at iteration 400: 0.7443307638168335\n",
|
||||
"Loss at iteration 500: 0.7443307638168335\n",
|
||||
"\n",
|
||||
"Final Test Accuracy: 0.699771897810219\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"n_iterations = 500\n",
|
||||
"for i in range(1, n_iterations + 1):\n",
|
||||
" Z = model_pytorch(X_train) \n",
|
||||
" loss = loss_function(Z, y_train) \n",
|
||||
" optimizer.zero_grad() \n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step() \n",
|
||||
" \n",
|
||||
" if i == 1 or i % 100 == 0:\n",
|
||||
" print(\"Loss at iteration {}: {}\".format(i, loss))\n",
|
||||
"\n",
|
||||
"test_predictions = torch.argmax(\n",
|
||||
" torch.softmax(model_pytorch(X_test), 1), axis=1 # 6\n",
|
||||
")\n",
|
||||
"test_accuracy = float(sum(test_predictions == y_test)) / y_test.shape[0]\n",
|
||||
"print(\"\\nFinal Test Accuracy: {}\".format(test_accuracy))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "839a125b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Ewaluacja"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "1f11907e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy: 0.699771897810219\n",
|
||||
"Precision: 0.6369786163546386\n",
|
||||
"Recall: 0.699771897810219\n",
|
||||
"F-score: 0.6249925465608228\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Programy\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1248: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluation(y_test, test_predictions)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user