DVC

2022-06-05 14:57:08 +02:00 · 2022-06-05 14:57:08 +02:00 · ec2a3d1c51
commit ec2a3d1c51
parent 3adee1f9a9
11 changed files with 46540 additions and 50 deletions
--- a/.dvc/config
+++ b/.dvc/config
@ -1,4 +1,6 @@
 [core]
-    remote = my_local_remote
+    remote = ium_ssh_remote
 ['remote "my_local_remote"']
    url = /dvcstore
+['remote "ium_ssh_remote"']
+    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.gitignore
+++ b/.gitignore
@ -13,3 +13,5 @@ ipython_config.py
 #   git rm -r .ipynb_checkpoints/

 /adult-income-dataset.csv
+/evaluate_data.csv
+/result_pytorch.txt
--- a/IUM_main.ipynb
+++ b/IUM_main.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
@ -10,32 +10,32 @@
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: kaggle in c:\\users\\user\\anaconda3\\lib\\site-packages (1.5.12)\n",
-      "Requirement already satisfied: python-dateutil in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2.8.2)\n",
-      "Requirement already satisfied: python-slugify in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (5.0.2)\n",
      "Requirement already satisfied: urllib3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (1.26.7)\n",
-      "Requirement already satisfied: certifi in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2021.10.8)\n",
-      "Requirement already satisfied: tqdm in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (4.62.3)\n",
+      "Requirement already satisfied: tqdm in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (4.64.0)\n",
+      "Requirement already satisfied: python-slugify in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (5.0.2)\n",
      "Requirement already satisfied: requests in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2.26.0)\n",
      "Requirement already satisfied: six>=1.10 in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (1.16.0)\n",
+      "Requirement already satisfied: certifi in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2022.5.18.1)\n",
+      "Requirement already satisfied: python-dateutil in c:\\users\\user\\anaconda3\\lib\\site-packages (from kaggle) (2.8.2)\n",
      "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.0.4)\n",
      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\user\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.2)\n",
      "Requirement already satisfied: colorama in c:\\users\\user\\anaconda3\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n",
      "Requirement already satisfied: pandas in c:\\users\\user\\anaconda3\\lib\\site-packages (1.3.4)\n",
+      "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2017.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (2021.3)\n",
      "Requirement already satisfied: numpy>=1.17.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (1.20.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\user\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n",
      "Requirement already satisfied: seaborn in c:\\users\\user\\anaconda3\\lib\\site-packages (0.11.2)\n",
-      "Requirement already satisfied: scipy>=1.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.7.1)\n",
+      "Requirement already satisfied: pandas>=0.23 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.3.4)\n",
      "Requirement already satisfied: numpy>=1.15 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.20.3)\n",
      "Requirement already satisfied: matplotlib>=2.2 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (3.4.3)\n",
-      "Requirement already satisfied: pandas>=0.23 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.3.4)\n",
-      "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
-      "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n",
-      "Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (3.0.4)\n",
-      "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n",
+      "Requirement already satisfied: scipy>=1.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from seaborn) (1.7.1)\n",
+      "Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
      "Requirement already satisfied: cycler>=0.10 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n",
+      "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\user\\anaconda3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n",
      "Requirement already satisfied: six in c:\\users\\user\\anaconda3\\lib\\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.16.0)\n",
      "Requirement already satisfied: pytz>=2017.3 in c:\\users\\user\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2021.3)\n"
     ]
@ -49,7 +49,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@ -68,7 +68,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
@ -86,7 +86,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -374,7 +374,7 @@
       "[48842 rows x 15 columns]"
      ]
     },
-     "execution_count": 56,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -387,7 +387,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@ -399,18 +399,354 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#usunięcie nie pełnych danych \n",
    "df = df[df.workclass != '?']\n",
-    "df = df.reset_index()"
+    "df = df.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>workclass</th>\n",
+       "      <th>fnlwgt</th>\n",
+       "      <th>education</th>\n",
+       "      <th>educational-num</th>\n",
+       "      <th>marital-status</th>\n",
+       "      <th>occupation</th>\n",
+       "      <th>relationship</th>\n",
+       "      <th>race</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>capital-gain</th>\n",
+       "      <th>capital-loss</th>\n",
+       "      <th>hours-per-week</th>\n",
+       "      <th>native-country</th>\n",
+       "      <th>income</th>\n",
+       "      <th>income_if_&lt;=50k</th>\n",
+       "      <th>if_male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>25</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>226802</td>\n",
+       "      <td>11th</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Never-married</td>\n",
+       "      <td>Machine-op-inspct</td>\n",
+       "      <td>Own-child</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>38</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>89814</td>\n",
+       "      <td>HS-grad</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Farming-fishing</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>50</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>28</td>\n",
+       "      <td>Local-gov</td>\n",
+       "      <td>336951</td>\n",
+       "      <td>Assoc-acdm</td>\n",
+       "      <td>12</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Protective-serv</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&gt;50K</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>44</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>160323</td>\n",
+       "      <td>Some-college</td>\n",
+       "      <td>10</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Machine-op-inspct</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>7688</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&gt;50K</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>34</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>198693</td>\n",
+       "      <td>10th</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Never-married</td>\n",
+       "      <td>Other-service</td>\n",
+       "      <td>Not-in-family</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>30</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46038</th>\n",
+       "      <td>27</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>257302</td>\n",
+       "      <td>Assoc-acdm</td>\n",
+       "      <td>12</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Tech-support</td>\n",
+       "      <td>Wife</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Female</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>38</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46039</th>\n",
+       "      <td>40</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>154374</td>\n",
+       "      <td>HS-grad</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Machine-op-inspct</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&gt;50K</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46040</th>\n",
+       "      <td>58</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>151910</td>\n",
+       "      <td>HS-grad</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Widowed</td>\n",
+       "      <td>Adm-clerical</td>\n",
+       "      <td>Unmarried</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Female</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46041</th>\n",
+       "      <td>22</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>201490</td>\n",
+       "      <td>HS-grad</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Never-married</td>\n",
+       "      <td>Adm-clerical</td>\n",
+       "      <td>Own-child</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>20</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46042</th>\n",
+       "      <td>52</td>\n",
+       "      <td>Self-emp-inc</td>\n",
+       "      <td>287927</td>\n",
+       "      <td>HS-grad</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Exec-managerial</td>\n",
+       "      <td>Wife</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Female</td>\n",
+       "      <td>15024</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&gt;50K</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>46043 rows × 17 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       age     workclass  fnlwgt     education  educational-num  \\\n",
+       "0       25       Private  226802          11th                7   \n",
+       "1       38       Private   89814       HS-grad                9   \n",
+       "2       28     Local-gov  336951    Assoc-acdm               12   \n",
+       "3       44       Private  160323  Some-college               10   \n",
+       "4       34       Private  198693          10th                6   \n",
+       "...    ...           ...     ...           ...              ...   \n",
+       "46038   27       Private  257302    Assoc-acdm               12   \n",
+       "46039   40       Private  154374       HS-grad                9   \n",
+       "46040   58       Private  151910       HS-grad                9   \n",
+       "46041   22       Private  201490       HS-grad                9   \n",
+       "46042   52  Self-emp-inc  287927       HS-grad                9   \n",
+       "\n",
+       "           marital-status         occupation   relationship   race  gender  \\\n",
+       "0           Never-married  Machine-op-inspct      Own-child  Black    Male   \n",
+       "1      Married-civ-spouse    Farming-fishing        Husband  White    Male   \n",
+       "2      Married-civ-spouse    Protective-serv        Husband  White    Male   \n",
+       "3      Married-civ-spouse  Machine-op-inspct        Husband  Black    Male   \n",
+       "4           Never-married      Other-service  Not-in-family  White    Male   \n",
+       "...                   ...                ...            ...    ...     ...   \n",
+       "46038  Married-civ-spouse       Tech-support           Wife  White  Female   \n",
+       "46039  Married-civ-spouse  Machine-op-inspct        Husband  White    Male   \n",
+       "46040             Widowed       Adm-clerical      Unmarried  White  Female   \n",
+       "46041       Never-married       Adm-clerical      Own-child  White    Male   \n",
+       "46042  Married-civ-spouse    Exec-managerial           Wife  White  Female   \n",
+       "\n",
+       "       capital-gain  capital-loss  hours-per-week native-country income  \\\n",
+       "0                 0             0              40  United-States  <=50K   \n",
+       "1                 0             0              50  United-States  <=50K   \n",
+       "2                 0             0              40  United-States   >50K   \n",
+       "3              7688             0              40  United-States   >50K   \n",
+       "4                 0             0              30  United-States  <=50K   \n",
+       "...             ...           ...             ...            ...    ...   \n",
+       "46038             0             0              38  United-States  <=50K   \n",
+       "46039             0             0              40  United-States   >50K   \n",
+       "46040             0             0              40  United-States  <=50K   \n",
+       "46041             0             0              20  United-States  <=50K   \n",
+       "46042         15024             0              40  United-States   >50K   \n",
+       "\n",
+       "       income_if_<=50k  if_male  \n",
+       "0                    1        1  \n",
+       "1                    1        1  \n",
+       "2                    0        1  \n",
+       "3                    0        1  \n",
+       "4                    1        1  \n",
+       "...                ...      ...  \n",
+       "46038                1        0  \n",
+       "46039                0        1  \n",
+       "46040                1        0  \n",
+       "46041                1        1  \n",
+       "46042                0        0  \n",
+       "\n",
+       "[46043 rows x 17 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -424,7 +760,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@ -436,7 +772,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@ -448,7 +784,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@ -465,7 +801,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@ -484,7 +820,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
@ -494,28 +830,28 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "epoch:1,loss = 1.0032\n",
-      "epoch:101,loss = 0.8295\n",
-      "epoch:201,loss = 0.7194\n",
-      "epoch:301,loss = 0.6511\n",
-      "epoch:401,loss = 0.6088\n",
-      "epoch:501,loss = 0.5823\n",
-      "epoch:601,loss = 0.5656\n",
-      "epoch:701,loss = 0.5548\n",
-      "epoch:801,loss = 0.5478\n",
-      "epoch:901,loss = 0.5431\n",
-      "epoch:1001,loss = 0.5400\n",
-      "epoch:1101,loss = 0.5378\n",
-      "epoch:1201,loss = 0.5363\n",
-      "epoch:1301,loss = 0.5353\n",
-      "epoch:1401,loss = 0.5346\n"
+      "epoch:1,loss = 0.5776\n",
+      "epoch:101,loss = 0.5632\n",
+      "epoch:201,loss = 0.5538\n",
+      "epoch:301,loss = 0.5475\n",
+      "epoch:401,loss = 0.5432\n",
+      "epoch:501,loss = 0.5402\n",
+      "epoch:601,loss = 0.5381\n",
+      "epoch:701,loss = 0.5366\n",
+      "epoch:801,loss = 0.5356\n",
+      "epoch:901,loss = 0.5348\n",
+      "epoch:1001,loss = 0.5342\n",
+      "epoch:1101,loss = 0.5338\n",
+      "epoch:1201,loss = 0.5335\n",
+      "epoch:1301,loss = 0.5333\n",
+      "epoch:1401,loss = 0.5332\n"
     ]
    }
   ],
@ -534,14 +870,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.7395\n"
+      "0.7412\n"
     ]
    }
   ],
@ -555,7 +891,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
@ -564,7 +900,7 @@
       "10"
      ]
     },
-     "execution_count": 72,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -577,10 +913,10 @@
 ],
 "metadata": {
  "interpreter": {
-   "hash": "2647ea34e536f865ab67ff9ddee7fd78773d956cec0cab53c79b32cd10da5d83"
+   "hash": "f08154012ddadd8e950e6e9e035c7a7b32c136e7647e9b7c77e02eb723a8bedb"
  },
  "kernelspec": {
-   "display_name": "Python 3.9.11 64-bit",
+   "display_name": "Python 3.9.7 ('base')",
   "language": "python",
   "name": "python3"
  },
--- a/dvc.lock
+++ b/dvc.lock
@ -0,0 +1,25 @@
+schema: '2.0'
+stages:
+  prepare:
+    cmd: python evaluate_data.py
+    deps:
+    - path: adult-income-dataset.csv
+      md5: 2e37fb9b8d375d82c7d424e934e0f074
+      size: 5326368
+    - path: evaluate_data.py
+      md5: f32f596e84f28d6bdfd51778f51a871c
+      size: 486
+    outs:
+    - path: evaluate_data_dvc.csv
+      md5: 7950c91e22072d9086f2a9cba2fb1a51
+      size: 5256976
+  train:
+    cmd: python train.py
+    deps:
+    - path: evaluate_data_dvc.csv
+      md5: 7950c91e22072d9086f2a9cba2fb1a51
+      size: 5256976
+    outs:
+    - path: result_pytorch.txt
+      md5: 02aa16df8ab860cafc4946e1f5491937
+      size: 10
--- a/dvc.yaml
+++ b/dvc.yaml
@ -0,0 +1,14 @@
+stages:
+  prepare:
+    cmd: python evaluate_data.py
+    deps:
+    - evaluate_data.py
+    - adult-income-dataset.csv
+    outs:
+    - evaluate_data_dvc.csv
+  train:
+    cmd: python train.py
+    deps:
+    - evaluate_data_dvc.csv
+    outs:
+    - result_pytorch.txt
--- a/evaluate_data.csv.dvc
+++ b/evaluate_data.csv.dvc
@ -0,0 +1,4 @@
+outs:
+- md5: 7950c91e22072d9086f2a9cba2fb1a51
+  size: 5256976
+  path: evaluate_data.csv
--- a/evaluate_data.py
+++ b/evaluate_data.py
@ -0,0 +1,11 @@
+import pandas as pd
+df=pd.read_csv('adult-income-dataset.csv')
+df['income_if_<=50k'] = df['income'].apply(lambda x: True if x == '<=50K' else False)
+df['if_male'] = df['gender'].apply(lambda x: True if x == 'Male' else False)
+df['income_if_<=50k']= df['income_if_<=50k'].astype(int)
+df['if_male']= df['if_male'].astype(int)
+df = df[df.workclass != '?']
+df = df.reset_index(drop=True)
+
+df.to_csv('evaluate_data.csv', index=False)
+df.to_csv('evaluate_data_dvc.csv', index=False)
--- a/evaluate_data_dvc.csv
+++ b/evaluate_data_dvc.csv
--- a/mian.py
+++ b/mian.py
@ -1 +0,0 @@
-print(2)
--- a/2
+++ b/2
@ -1 +1 @@
-acc:0.7395
+acc:0.7434
--- a/train.py
+++ b/train.py
@ -0,0 +1,53 @@
+import torch.nn as nn
+import torch
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+df = pd.read_csv('evaluate_data_dvc.csv')
+
+X, y = df[['age']], df['income_if_<=50k']
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=37)
+n_samples, n_features = X.shape
+X_train = np.array(X_train).reshape(-1,1)
+X_test = np.array(X_test).reshape(-1,1)
+y_train = np.array(y_train).reshape(-1,1)
+y_test = np.array(y_test).reshape(-1,1)
+sc = StandardScaler()
+X_train = sc.fit_transform(X_train)
+X_test = sc.fit_transform(X_test)
+torch.from_file
+X_train = torch.from_numpy(X_train.astype(np.float32))
+X_test = torch.from_numpy(X_test.astype(np.float32))
+y_train = torch.from_numpy(y_train.astype(np.float32))
+y_test = torch.from_numpy(y_test.astype(np.float32))
+
+y_train = y_train.view(y_train.shape[0], 1)
+y_test= y_test.view(y_test.shape[0], 1)
+class LogisticRegresion(nn.Module):
+    def __init__(self, n_input_featuers):
+        super(LogisticRegresion, self).__init__()
+        self.linear = nn.Linear(n_input_featuers, 1)
+    
+    def forward(self, x):
+        y_predicted = torch.sigmoid(self.linear(x))
+        return y_predicted
+
+model = LogisticRegresion(n_features)
+
+criterion = nn.BCELoss()
+optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+num_epochs = 1500
+for epoch in range(num_epochs):
+    y_predicted = model(X_train)
+    loss = criterion(y_predicted,y_train)
+    loss.backward()
+    optimizer.step()
+    optimizer.zero_grad()
+with torch.no_grad():
+    y_predicted = model(X_test)
+    y_predicted_cls = y_predicted.round()
+    acc = y_predicted_cls.eq(y_test).sum()/float(y_test.shape[0])
+result = open("result_pytorch.txt",'w')
+result.write(f'acc:{acc:.4f}')