Added task 1 (zadanie1) to repository

2023-03-21 20:42:13 +01:00 · 2023-03-21 20:42:13 +01:00 · bd84848bf6
commit bd84848bf6
parent 1d0b0ebd9c
4 changed files with 2191 additions and 4 deletions
--- a/Customers.csv
+++ b/Customers.csv
--- a/2
+++ b/2
@ -1 +1 @@
-"THE BEER-WARE LICENSE" (Revision 42):  <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you  can do whatever you want with this stuff. If we meet some day, and you think  this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp
+"THE BEER-WARE LICENSE" (Revision 42):  <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you  can do whatever you want with this stuff. If we meet some day, and you think  this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp
--- a/README.md
+++ b/README.md
@ -1,3 +1,3 @@
-# ium_452627
-
-Rozwiązania zadań do przedmiotu Inżynieria Uczenia Maszynowego
+# ium_452627
+
+Rozwiązania zadań do przedmiotu: Inżynieria Uczenia Maszynowego
--- a/zadanie1.ipynb
+++ b/zadanie1.ipynb
@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   CustomerID  Gender  Age  Annual Income ($)  Spending Score (1-100)  \\\n",
+      "0           1    Male   19              15000                      39   \n",
+      "1           2    Male   21              35000                      81   \n",
+      "2           3  Female   20              86000                       6   \n",
+      "3           4  Female   23              59000                      77   \n",
+      "4           5  Female   31              38000                      40   \n",
+      "5           6  Female   22              58000                      76   \n",
+      "6           7  Female   35              31000                       6   \n",
+      "7           8  Female   23              84000                      94   \n",
+      "8           9    Male   64              97000                       3   \n",
+      "9          10  Female   30              98000                      72   \n",
+      "\n",
+      "      Profession  Work Experience  Family Size  \n",
+      "0     Healthcare                1            4  \n",
+      "1       Engineer                3            3  \n",
+      "2       Engineer                1            1  \n",
+      "3         Lawyer                0            2  \n",
+      "4  Entertainment                2            6  \n",
+      "5         Artist                0            2  \n",
+      "6     Healthcare                1            3  \n",
+      "7     Healthcare                1            3  \n",
+      "8       Engineer                0            3  \n",
+      "9         Artist                1            4  \n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "data = pd.read_csv(\"Customers.csv\")\n",
+    "print(data[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   Gender       Age  Annual Income ($)  Spending Score (1-100)  Profession  \\\n",
+      "0     0.0  0.191919           0.078958                    0.39    0.111111   \n",
+      "1     0.0  0.212121           0.184236                    0.81    0.222222   \n",
+      "2     1.0  0.202020           0.452694                    0.06    0.222222   \n",
+      "3     1.0  0.232323           0.310569                    0.77    0.333333   \n",
+      "4     1.0  0.313131           0.200027                    0.40    0.444444   \n",
+      "5     1.0  0.222222           0.305305                    0.76    0.555556   \n",
+      "6     1.0  0.353535           0.163180                    0.06    0.111111   \n",
+      "7     1.0  0.232323           0.442166                    0.94    0.111111   \n",
+      "8     0.0  0.646465           0.510596                    0.03    0.222222   \n",
+      "9     1.0  0.303030           0.515860                    0.72    0.555556   \n",
+      "\n",
+      "   Work Experience  Family Size  \n",
+      "0         0.058824        0.375  \n",
+      "1         0.176471        0.250  \n",
+      "2         0.058824        0.000  \n",
+      "3         0.000000        0.125  \n",
+      "4         0.117647        0.625  \n",
+      "5         0.000000        0.125  \n",
+      "6         0.058824        0.250  \n",
+      "7         0.058824        0.250  \n",
+      "8         0.000000        0.250  \n",
+      "9         0.058824        0.375  \n"
+     ]
+    }
+   ],
+   "source": [
+    "dataF = data\n",
+    "\n",
+    "# Changing words to numbers\n",
+    "\n",
+    "mapping = {'NaN' : 0, 'Healthcare' : 1, 'Engineer' : 2, 'Lawyer' : 3, 'Entertainment' : 4, 'Artist' : 5, 'Executive' : 6,\n",
+    " 'Doctor' : 7, 'Homemaker' : 8, 'Marketing' : 9}\n",
+    "\n",
+    "mapping2 = {'Male' : 0, 'Female' : 1}\n",
+    "\n",
+    "dataF = dataF.replace({'Profession': mapping})\n",
+    "dataF = dataF.replace({'Gender': mapping2})\n",
+    "\n",
+    "dataF = dataF.drop(columns=['CustomerID'])\n",
+    "\n",
+    "# Normalization\n",
+    "\n",
+    "dataF['Profession'] = dataF['Profession'].fillna(0)\n",
+    "\n",
+    "normalized_dataF = (dataF - dataF.min())/(dataF.max() - dataF.min())\n",
+    "\n",
+    "print(normalized_dataF[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data = normalized_dataF[0:1600]\n",
+    "dev_data = normalized_dataF[1600:1800]\n",
+    "test_data = normalized_dataF[1800:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Wielkość zbioru Customers: 2000 elementów\n",
+      "Wielkość zbioru trenującego: 1600 elementów\n",
+      "Wielkość zbioru walidującego: 200 elementów\n",
+      "Wielkość zbioru testującego: 200 elementów\n",
+      " \n",
+      "Dane i wartości na temat zbioru: \n",
+      " \n",
+      "             Gender          Age  Annual Income ($)  Spending Score (1-100)  \\\n",
+      "count  2000.000000  2000.000000        2000.000000             2000.000000   \n",
+      "mean      0.593000     0.494545           0.582879                0.509625   \n",
+      "std       0.491398     0.287169           0.240767                0.279347   \n",
+      "min       0.000000     0.000000           0.000000                0.000000   \n",
+      "25%       0.000000     0.252525           0.392538                0.280000   \n",
+      "50%       1.000000     0.484848           0.579263                0.500000   \n",
+      "75%       1.000000     0.737374           0.784806                0.750000   \n",
+      "max       1.000000     1.000000           1.000000                1.000000   \n",
+      "\n",
+      "        Profession  Work Experience  Family Size  \n",
+      "count  2000.000000      2000.000000  2000.000000  \n",
+      "mean      0.467167         0.241324     0.346062  \n",
+      "std       0.250289         0.230718     0.246344  \n",
+      "min       0.000000         0.000000     0.000000  \n",
+      "25%       0.222222         0.058824     0.125000  \n",
+      "50%       0.555556         0.176471     0.375000  \n",
+      "75%       0.555556         0.411765     0.500000  \n",
+      "max       1.000000         1.000000     1.000000  \n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Wielkość zbioru Customers: {len(data)} elementów\")\n",
+    "print(f\"Wielkość zbioru trenującego: {len(train_data)} elementów\")\n",
+    "print(f\"Wielkość zbioru walidującego: {len(dev_data)} elementów\")\n",
+    "print(f\"Wielkość zbioru testującego: {len(test_data)} elementów\")\n",
+    "\n",
+    "print(f\" \\nDane i wartości na temat zbioru: \\n \\n {normalized_dataF.describe()}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}