DVC
This commit is contained in:
parent
3adee1f9a9
commit
ec2a3d1c51
@ -1,4 +1,6 @@
|
||||
[core]
|
||||
remote = my_local_remote
|
||||
remote = ium_ssh_remote
|
||||
['remote "my_local_remote"']
|
||||
url = /dvcstore
|
||||
['remote "ium_ssh_remote"']
|
||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -13,3 +13,5 @@ ipython_config.py
|
||||
# git rm -r .ipynb_checkpoints/
|
||||
|
||||
/adult-income-dataset.csv
|
||||
/evaluate_data.csv
|
||||
/result_pytorch.txt
|
||||
|
430
IUM_main.ipynb
430
IUM_main.ipynb
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -10,32 +10,32 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: kaggle in c:\\users\\user\\anaconda3\\lib\\site-packages (1.5.12)\n",
|
||||
"Requirement already satisfied: python-dateutil in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2.8.2)\n",
|
||||
"Requirement already satisfied: python-slugify in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (5.0.2)\n",
|
||||
"Requirement already satisfied: urllib3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (1.26.7)\n",
|
||||
"Requirement already satisfied: certifi in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2021.10.8)\n",
|
||||
"Requirement already satisfied: tqdm in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (4.62.3)\n",
|
||||
"Requirement already satisfied: tqdm in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (4.64.0)\n",
|
||||
"Requirement already satisfied: python-slugify in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (5.0.2)\n",
|
||||
"Requirement already satisfied: requests in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2.26.0)\n",
|
||||
"Requirement already satisfied: six>=1.10 in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (1.16.0)\n",
|
||||
"Requirement already satisfied: certifi in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2022.5.18.1)\n",
|
||||
"Requirement already satisfied: python-dateutil in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2.8.2)\n",
|
||||
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
|
||||
"Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.0.4)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.2)\n",
|
||||
"Requirement already satisfied: colorama in c:\\users\\user\\anaconda3\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n",
|
||||
"Requirement already satisfied: pandas in c:\\users\\user\\anaconda3\\lib\\site-packages (1.3.4)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (2.8.2)\n",
|
||||
"Requirement already satisfied: pytz>=2017.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (2021.3)\n",
|
||||
"Requirement already satisfied: numpy>=1.17.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (1.20.3)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (2.8.2)\n",
|
||||
"Requirement already satisfied: six>=1.5 in c:\\users\\user\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n",
|
||||
"Requirement already satisfied: seaborn in c:\\users\\user\\anaconda3\\lib\\site-packages (0.11.2)\n",
|
||||
"Requirement already satisfied: scipy>=1.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.7.1)\n",
|
||||
"Requirement already satisfied: pandas>=0.23 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.3.4)\n",
|
||||
"Requirement already satisfied: numpy>=1.15 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.20.3)\n",
|
||||
"Requirement already satisfied: matplotlib>=2.2 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (3.4.3)\n",
|
||||
"Requirement already satisfied: pandas>=0.23 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.3.4)\n",
|
||||
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
|
||||
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n",
|
||||
"Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (3.0.4)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n",
|
||||
"Requirement already satisfied: scipy>=1.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.7.1)\n",
|
||||
"Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
|
||||
"Requirement already satisfied: cycler>=0.10 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
|
||||
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n",
|
||||
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n",
|
||||
"Requirement already satisfied: six in c:\\users\\user\\anaconda3\\lib\\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.16.0)\n",
|
||||
"Requirement already satisfied: pytz>=2017.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2021.3)\n"
|
||||
]
|
||||
@ -49,7 +49,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -86,7 +86,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -374,7 +374,7 @@
|
||||
"[48842 rows x 15 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 56,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -387,7 +387,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -399,18 +399,354 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#usunięcie nie pełnych danych \n",
|
||||
"df = df[df.workclass != '?']\n",
|
||||
"df = df.reset_index()"
|
||||
"df = df.reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>age</th>\n",
|
||||
" <th>workclass</th>\n",
|
||||
" <th>fnlwgt</th>\n",
|
||||
" <th>education</th>\n",
|
||||
" <th>educational-num</th>\n",
|
||||
" <th>marital-status</th>\n",
|
||||
" <th>occupation</th>\n",
|
||||
" <th>relationship</th>\n",
|
||||
" <th>race</th>\n",
|
||||
" <th>gender</th>\n",
|
||||
" <th>capital-gain</th>\n",
|
||||
" <th>capital-loss</th>\n",
|
||||
" <th>hours-per-week</th>\n",
|
||||
" <th>native-country</th>\n",
|
||||
" <th>income</th>\n",
|
||||
" <th>income_if_<=50k</th>\n",
|
||||
" <th>if_male</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>226802</td>\n",
|
||||
" <td>11th</td>\n",
|
||||
" <td>7</td>\n",
|
||||
" <td>Never-married</td>\n",
|
||||
" <td>Machine-op-inspct</td>\n",
|
||||
" <td>Own-child</td>\n",
|
||||
" <td>Black</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td><=50K</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>38</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>89814</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Farming-fishing</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>50</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td><=50K</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>28</td>\n",
|
||||
" <td>Local-gov</td>\n",
|
||||
" <td>336951</td>\n",
|
||||
" <td>Assoc-acdm</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Protective-serv</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td>>50K</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>44</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>160323</td>\n",
|
||||
" <td>Some-college</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Machine-op-inspct</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>Black</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>7688</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td>>50K</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>34</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>198693</td>\n",
|
||||
" <td>10th</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>Never-married</td>\n",
|
||||
" <td>Other-service</td>\n",
|
||||
" <td>Not-in-family</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td><=50K</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46038</th>\n",
|
||||
" <td>27</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>257302</td>\n",
|
||||
" <td>Assoc-acdm</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Tech-support</td>\n",
|
||||
" <td>Wife</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>38</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td><=50K</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46039</th>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>154374</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Machine-op-inspct</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td>>50K</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46040</th>\n",
|
||||
" <td>58</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>151910</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Widowed</td>\n",
|
||||
" <td>Adm-clerical</td>\n",
|
||||
" <td>Unmarried</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td><=50K</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46041</th>\n",
|
||||
" <td>22</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>201490</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Never-married</td>\n",
|
||||
" <td>Adm-clerical</td>\n",
|
||||
" <td>Own-child</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td><=50K</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46042</th>\n",
|
||||
" <td>52</td>\n",
|
||||
" <td>Self-emp-inc</td>\n",
|
||||
" <td>287927</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Exec-managerial</td>\n",
|
||||
" <td>Wife</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>15024</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" <td>>50K</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>46043 rows × 17 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" age workclass fnlwgt education educational-num \\\n",
|
||||
"0 25 Private 226802 11th 7 \n",
|
||||
"1 38 Private 89814 HS-grad 9 \n",
|
||||
"2 28 Local-gov 336951 Assoc-acdm 12 \n",
|
||||
"3 44 Private 160323 Some-college 10 \n",
|
||||
"4 34 Private 198693 10th 6 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"46038 27 Private 257302 Assoc-acdm 12 \n",
|
||||
"46039 40 Private 154374 HS-grad 9 \n",
|
||||
"46040 58 Private 151910 HS-grad 9 \n",
|
||||
"46041 22 Private 201490 HS-grad 9 \n",
|
||||
"46042 52 Self-emp-inc 287927 HS-grad 9 \n",
|
||||
"\n",
|
||||
" marital-status occupation relationship race gender \\\n",
|
||||
"0 Never-married Machine-op-inspct Own-child Black Male \n",
|
||||
"1 Married-civ-spouse Farming-fishing Husband White Male \n",
|
||||
"2 Married-civ-spouse Protective-serv Husband White Male \n",
|
||||
"3 Married-civ-spouse Machine-op-inspct Husband Black Male \n",
|
||||
"4 Never-married Other-service Not-in-family White Male \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"46038 Married-civ-spouse Tech-support Wife White Female \n",
|
||||
"46039 Married-civ-spouse Machine-op-inspct Husband White Male \n",
|
||||
"46040 Widowed Adm-clerical Unmarried White Female \n",
|
||||
"46041 Never-married Adm-clerical Own-child White Male \n",
|
||||
"46042 Married-civ-spouse Exec-managerial Wife White Female \n",
|
||||
"\n",
|
||||
" capital-gain capital-loss hours-per-week native-country income \\\n",
|
||||
"0 0 0 40 United-States <=50K \n",
|
||||
"1 0 0 50 United-States <=50K \n",
|
||||
"2 0 0 40 United-States >50K \n",
|
||||
"3 7688 0 40 United-States >50K \n",
|
||||
"4 0 0 30 United-States <=50K \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"46038 0 0 38 United-States <=50K \n",
|
||||
"46039 0 0 40 United-States >50K \n",
|
||||
"46040 0 0 40 United-States <=50K \n",
|
||||
"46041 0 0 20 United-States <=50K \n",
|
||||
"46042 15024 0 40 United-States >50K \n",
|
||||
"\n",
|
||||
" income_if_<=50k if_male \n",
|
||||
"0 1 1 \n",
|
||||
"1 1 1 \n",
|
||||
"2 0 1 \n",
|
||||
"3 0 1 \n",
|
||||
"4 1 1 \n",
|
||||
"... ... ... \n",
|
||||
"46038 1 0 \n",
|
||||
"46039 0 1 \n",
|
||||
"46040 1 0 \n",
|
||||
"46041 1 1 \n",
|
||||
"46042 0 0 \n",
|
||||
"\n",
|
||||
"[46043 rows x 17 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -424,7 +760,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -436,7 +772,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -448,7 +784,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -465,7 +801,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -484,7 +820,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -494,28 +830,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"epoch:1,loss = 1.0032\n",
|
||||
"epoch:101,loss = 0.8295\n",
|
||||
"epoch:201,loss = 0.7194\n",
|
||||
"epoch:301,loss = 0.6511\n",
|
||||
"epoch:401,loss = 0.6088\n",
|
||||
"epoch:501,loss = 0.5823\n",
|
||||
"epoch:601,loss = 0.5656\n",
|
||||
"epoch:701,loss = 0.5548\n",
|
||||
"epoch:801,loss = 0.5478\n",
|
||||
"epoch:901,loss = 0.5431\n",
|
||||
"epoch:1001,loss = 0.5400\n",
|
||||
"epoch:1101,loss = 0.5378\n",
|
||||
"epoch:1201,loss = 0.5363\n",
|
||||
"epoch:1301,loss = 0.5353\n",
|
||||
"epoch:1401,loss = 0.5346\n"
|
||||
"epoch:1,loss = 0.5776\n",
|
||||
"epoch:101,loss = 0.5632\n",
|
||||
"epoch:201,loss = 0.5538\n",
|
||||
"epoch:301,loss = 0.5475\n",
|
||||
"epoch:401,loss = 0.5432\n",
|
||||
"epoch:501,loss = 0.5402\n",
|
||||
"epoch:601,loss = 0.5381\n",
|
||||
"epoch:701,loss = 0.5366\n",
|
||||
"epoch:801,loss = 0.5356\n",
|
||||
"epoch:901,loss = 0.5348\n",
|
||||
"epoch:1001,loss = 0.5342\n",
|
||||
"epoch:1101,loss = 0.5338\n",
|
||||
"epoch:1201,loss = 0.5335\n",
|
||||
"epoch:1301,loss = 0.5333\n",
|
||||
"epoch:1401,loss = 0.5332\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -534,14 +870,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.7395\n"
|
||||
"0.7412\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -555,7 +891,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -564,7 +900,7 @@
|
||||
"10"
|
||||
]
|
||||
},
|
||||
"execution_count": 72,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -577,10 +913,10 @@
|
||||
],
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
"hash": "2647ea34e536f865ab67ff9ddee7fd78773d956cec0cab53c79b32cd10da5d83"
|
||||
"hash": "f08154012ddadd8e950e6e9e035c7a7b32c136e7647e9b7c77e02eb723a8bedb"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.11 64-bit",
|
||||
"display_name": "Python 3.9.7 ('base')",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
25
dvc.lock
Normal file
25
dvc.lock
Normal file
@ -0,0 +1,25 @@
|
||||
schema: '2.0'
|
||||
stages:
|
||||
prepare:
|
||||
cmd: python evaluate_data.py
|
||||
deps:
|
||||
- path: adult-income-dataset.csv
|
||||
md5: 2e37fb9b8d375d82c7d424e934e0f074
|
||||
size: 5326368
|
||||
- path: evaluate_data.py
|
||||
md5: f32f596e84f28d6bdfd51778f51a871c
|
||||
size: 486
|
||||
outs:
|
||||
- path: evaluate_data_dvc.csv
|
||||
md5: 7950c91e22072d9086f2a9cba2fb1a51
|
||||
size: 5256976
|
||||
train:
|
||||
cmd: python train.py
|
||||
deps:
|
||||
- path: evaluate_data_dvc.csv
|
||||
md5: 7950c91e22072d9086f2a9cba2fb1a51
|
||||
size: 5256976
|
||||
outs:
|
||||
- path: result_pytorch.txt
|
||||
md5: 02aa16df8ab860cafc4946e1f5491937
|
||||
size: 10
|
14
dvc.yaml
Normal file
14
dvc.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
stages:
|
||||
prepare:
|
||||
cmd: python evaluate_data.py
|
||||
deps:
|
||||
- evaluate_data.py
|
||||
- adult-income-dataset.csv
|
||||
outs:
|
||||
- evaluate_data_dvc.csv
|
||||
train:
|
||||
cmd: python train.py
|
||||
deps:
|
||||
- evaluate_data_dvc.csv
|
||||
outs:
|
||||
- result_pytorch.txt
|
4
evaluate_data.csv.dvc
Normal file
4
evaluate_data.csv.dvc
Normal file
@ -0,0 +1,4 @@
|
||||
outs:
|
||||
- md5: 7950c91e22072d9086f2a9cba2fb1a51
|
||||
size: 5256976
|
||||
path: evaluate_data.csv
|
11
evaluate_data.py
Normal file
11
evaluate_data.py
Normal file
@ -0,0 +1,11 @@
|
||||
import pandas as pd
|
||||
df=pd.read_csv('adult-income-dataset.csv')
|
||||
df['income_if_<=50k'] = df['income'].apply(lambda x: True if x == '<=50K' else False)
|
||||
df['if_male'] = df['gender'].apply(lambda x: True if x == 'Male' else False)
|
||||
df['income_if_<=50k']= df['income_if_<=50k'].astype(int)
|
||||
df['if_male']= df['if_male'].astype(int)
|
||||
df = df[df.workclass != '?']
|
||||
df = df.reset_index(drop=True)
|
||||
|
||||
df.to_csv('evaluate_data.csv', index=False)
|
||||
df.to_csv('evaluate_data_dvc.csv', index=False)
|
46044
evaluate_data_dvc.csv
Normal file
46044
evaluate_data_dvc.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
||||
acc:0.7395
|
||||
acc:0.7434
|
53
train.py
Normal file
53
train.py
Normal file
@ -0,0 +1,53 @@
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
df = pd.read_csv('evaluate_data_dvc.csv')
|
||||
|
||||
X, y = df[['age']], df['income_if_<=50k']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=37)
|
||||
n_samples, n_features = X.shape
|
||||
X_train = np.array(X_train).reshape(-1,1)
|
||||
X_test = np.array(X_test).reshape(-1,1)
|
||||
y_train = np.array(y_train).reshape(-1,1)
|
||||
y_test = np.array(y_test).reshape(-1,1)
|
||||
sc = StandardScaler()
|
||||
X_train = sc.fit_transform(X_train)
|
||||
X_test = sc.fit_transform(X_test)
|
||||
torch.from_file
|
||||
X_train = torch.from_numpy(X_train.astype(np.float32))
|
||||
X_test = torch.from_numpy(X_test.astype(np.float32))
|
||||
y_train = torch.from_numpy(y_train.astype(np.float32))
|
||||
y_test = torch.from_numpy(y_test.astype(np.float32))
|
||||
|
||||
y_train = y_train.view(y_train.shape[0], 1)
|
||||
y_test= y_test.view(y_test.shape[0], 1)
|
||||
class LogisticRegresion(nn.Module):
|
||||
def __init__(self, n_input_featuers):
|
||||
super(LogisticRegresion, self).__init__()
|
||||
self.linear = nn.Linear(n_input_featuers, 1)
|
||||
|
||||
def forward(self, x):
|
||||
y_predicted = torch.sigmoid(self.linear(x))
|
||||
return y_predicted
|
||||
|
||||
model = LogisticRegresion(n_features)
|
||||
|
||||
criterion = nn.BCELoss()
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
|
||||
|
||||
num_epochs = 1500
|
||||
for epoch in range(num_epochs):
|
||||
y_predicted = model(X_train)
|
||||
loss = criterion(y_predicted,y_train)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
with torch.no_grad():
|
||||
y_predicted = model(X_test)
|
||||
y_predicted_cls = y_predicted.round()
|
||||
acc = y_predicted_cls.eq(y_test).sum()/float(y_test.shape[0])
|
||||
result = open("result_pytorch.txt",'w')
|
||||
result.write(f'acc:{acc:.4f}')
|
Loading…
Reference in New Issue
Block a user