Added task 1 (zadanie1) to repository
This commit is contained in:
parent
1d0b0ebd9c
commit
bd84848bf6
2001
Customers.csv
Normal file
2001
Customers.csv
Normal file
File diff suppressed because it is too large
Load Diff
2
LICENSE
2
LICENSE
@ -1 +1 @@
|
||||
"THE BEER-WARE LICENSE" (Revision 42): <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp
|
||||
"THE BEER-WARE LICENSE" (Revision 42): <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp
|
||||
|
@ -1,3 +1,3 @@
|
||||
# ium_452627
|
||||
|
||||
Rozwiązania zadań do przedmiotu Inżynieria Uczenia Maszynowego
|
||||
# ium_452627
|
||||
|
||||
Rozwiązania zadań do przedmiotu: Inżynieria Uczenia Maszynowego
|
186
zadanie1.ipynb
Normal file
186
zadanie1.ipynb
Normal file
@ -0,0 +1,186 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" CustomerID Gender Age Annual Income ($) Spending Score (1-100) \\\n",
|
||||
"0 1 Male 19 15000 39 \n",
|
||||
"1 2 Male 21 35000 81 \n",
|
||||
"2 3 Female 20 86000 6 \n",
|
||||
"3 4 Female 23 59000 77 \n",
|
||||
"4 5 Female 31 38000 40 \n",
|
||||
"5 6 Female 22 58000 76 \n",
|
||||
"6 7 Female 35 31000 6 \n",
|
||||
"7 8 Female 23 84000 94 \n",
|
||||
"8 9 Male 64 97000 3 \n",
|
||||
"9 10 Female 30 98000 72 \n",
|
||||
"\n",
|
||||
" Profession Work Experience Family Size \n",
|
||||
"0 Healthcare 1 4 \n",
|
||||
"1 Engineer 3 3 \n",
|
||||
"2 Engineer 1 1 \n",
|
||||
"3 Lawyer 0 2 \n",
|
||||
"4 Entertainment 2 6 \n",
|
||||
"5 Artist 0 2 \n",
|
||||
"6 Healthcare 1 3 \n",
|
||||
"7 Healthcare 1 3 \n",
|
||||
"8 Engineer 0 3 \n",
|
||||
"9 Artist 1 4 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"data = pd.read_csv(\"Customers.csv\")\n",
|
||||
"print(data[:10])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Gender Age Annual Income ($) Spending Score (1-100) Profession \\\n",
|
||||
"0 0.0 0.191919 0.078958 0.39 0.111111 \n",
|
||||
"1 0.0 0.212121 0.184236 0.81 0.222222 \n",
|
||||
"2 1.0 0.202020 0.452694 0.06 0.222222 \n",
|
||||
"3 1.0 0.232323 0.310569 0.77 0.333333 \n",
|
||||
"4 1.0 0.313131 0.200027 0.40 0.444444 \n",
|
||||
"5 1.0 0.222222 0.305305 0.76 0.555556 \n",
|
||||
"6 1.0 0.353535 0.163180 0.06 0.111111 \n",
|
||||
"7 1.0 0.232323 0.442166 0.94 0.111111 \n",
|
||||
"8 0.0 0.646465 0.510596 0.03 0.222222 \n",
|
||||
"9 1.0 0.303030 0.515860 0.72 0.555556 \n",
|
||||
"\n",
|
||||
" Work Experience Family Size \n",
|
||||
"0 0.058824 0.375 \n",
|
||||
"1 0.176471 0.250 \n",
|
||||
"2 0.058824 0.000 \n",
|
||||
"3 0.000000 0.125 \n",
|
||||
"4 0.117647 0.625 \n",
|
||||
"5 0.000000 0.125 \n",
|
||||
"6 0.058824 0.250 \n",
|
||||
"7 0.058824 0.250 \n",
|
||||
"8 0.000000 0.250 \n",
|
||||
"9 0.058824 0.375 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataF = data\n",
|
||||
"\n",
|
||||
"# Changing words to numbers\n",
|
||||
"\n",
|
||||
"mapping = {'NaN' : 0, 'Healthcare' : 1, 'Engineer' : 2, 'Lawyer' : 3, 'Entertainment' : 4, 'Artist' : 5, 'Executive' : 6,\n",
|
||||
" 'Doctor' : 7, 'Homemaker' : 8, 'Marketing' : 9}\n",
|
||||
"\n",
|
||||
"mapping2 = {'Male' : 0, 'Female' : 1}\n",
|
||||
"\n",
|
||||
"dataF = dataF.replace({'Profession': mapping})\n",
|
||||
"dataF = dataF.replace({'Gender': mapping2})\n",
|
||||
"\n",
|
||||
"dataF = dataF.drop(columns=['CustomerID'])\n",
|
||||
"\n",
|
||||
"# Normalization\n",
|
||||
"\n",
|
||||
"dataF['Profession'] = dataF['Profession'].fillna(0)\n",
|
||||
"\n",
|
||||
"normalized_dataF = (dataF - dataF.min())/(dataF.max() - dataF.min())\n",
|
||||
"\n",
|
||||
"print(normalized_dataF[:10])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_data = normalized_dataF[0:1600]\n",
|
||||
"dev_data = normalized_dataF[1600:1800]\n",
|
||||
"test_data = normalized_dataF[1800:]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Wielkość zbioru Customers: 2000 elementów\n",
|
||||
"Wielkość zbioru trenującego: 1600 elementów\n",
|
||||
"Wielkość zbioru walidującego: 200 elementów\n",
|
||||
"Wielkość zbioru testującego: 200 elementów\n",
|
||||
" \n",
|
||||
"Dane i wartości na temat zbioru: \n",
|
||||
" \n",
|
||||
" Gender Age Annual Income ($) Spending Score (1-100) \\\n",
|
||||
"count 2000.000000 2000.000000 2000.000000 2000.000000 \n",
|
||||
"mean 0.593000 0.494545 0.582879 0.509625 \n",
|
||||
"std 0.491398 0.287169 0.240767 0.279347 \n",
|
||||
"min 0.000000 0.000000 0.000000 0.000000 \n",
|
||||
"25% 0.000000 0.252525 0.392538 0.280000 \n",
|
||||
"50% 1.000000 0.484848 0.579263 0.500000 \n",
|
||||
"75% 1.000000 0.737374 0.784806 0.750000 \n",
|
||||
"max 1.000000 1.000000 1.000000 1.000000 \n",
|
||||
"\n",
|
||||
" Profession Work Experience Family Size \n",
|
||||
"count 2000.000000 2000.000000 2000.000000 \n",
|
||||
"mean 0.467167 0.241324 0.346062 \n",
|
||||
"std 0.250289 0.230718 0.246344 \n",
|
||||
"min 0.000000 0.000000 0.000000 \n",
|
||||
"25% 0.222222 0.058824 0.125000 \n",
|
||||
"50% 0.555556 0.176471 0.375000 \n",
|
||||
"75% 0.555556 0.411765 0.500000 \n",
|
||||
"max 1.000000 1.000000 1.000000 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"Wielkość zbioru Customers: {len(data)} elementów\")\n",
|
||||
"print(f\"Wielkość zbioru trenującego: {len(train_data)} elementów\")\n",
|
||||
"print(f\"Wielkość zbioru walidującego: {len(dev_data)} elementów\")\n",
|
||||
"print(f\"Wielkość zbioru testującego: {len(test_data)} elementów\")\n",
|
||||
"\n",
|
||||
"print(f\" \\nDane i wartości na temat zbioru: \\n \\n {normalized_dataF.describe()}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue
Block a user