sztuczna-empatia-kaczuszka/lab7/main.ipynb

416 lines
19 KiB
Plaintext
Raw Normal View History

2023-04-19 15:39:31 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: numpy in c:\\software\\python3\\lib\\site-packages (1.24.2)\n",
"Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.5.3)\n",
"Requirement already satisfied: sklearn in \\\\files\\students\\s478831\\.appdata\\python\\python310\\site-packages (0.0.post4)\n",
"Requirement already satisfied: xgboost in \\\\files\\students\\s478831\\.appdata\\python\\python310\\site-packages (1.7.5)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n",
"Requirement already satisfied: scipy in c:\\software\\python3\\lib\\site-packages (from xgboost) (1.10.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
]
}
],
"source": [
"!pip install numpy pandas sklearn xgboost"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os, sys\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from xgboost import XGBClassifier\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>MDVP:Fo(Hz)</th>\n",
" <th>MDVP:Fhi(Hz)</th>\n",
" <th>MDVP:Flo(Hz)</th>\n",
" <th>MDVP:Jitter(%)</th>\n",
" <th>MDVP:Jitter(Abs)</th>\n",
" <th>MDVP:RAP</th>\n",
" <th>MDVP:PPQ</th>\n",
" <th>Jitter:DDP</th>\n",
" <th>MDVP:Shimmer</th>\n",
" <th>...</th>\n",
" <th>Shimmer:DDA</th>\n",
" <th>NHR</th>\n",
" <th>HNR</th>\n",
" <th>status</th>\n",
" <th>RPDE</th>\n",
" <th>DFA</th>\n",
" <th>spread1</th>\n",
" <th>spread2</th>\n",
" <th>D2</th>\n",
" <th>PPE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>phon_R01_S01_1</td>\n",
" <td>119.992</td>\n",
" <td>157.302</td>\n",
" <td>74.997</td>\n",
" <td>0.00784</td>\n",
" <td>0.00007</td>\n",
" <td>0.00370</td>\n",
" <td>0.00554</td>\n",
" <td>0.01109</td>\n",
" <td>0.04374</td>\n",
" <td>...</td>\n",
" <td>0.06545</td>\n",
" <td>0.02211</td>\n",
" <td>21.033</td>\n",
" <td>1</td>\n",
" <td>0.414783</td>\n",
" <td>0.815285</td>\n",
" <td>-4.813031</td>\n",
" <td>0.266482</td>\n",
" <td>2.301442</td>\n",
" <td>0.284654</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>phon_R01_S01_2</td>\n",
" <td>122.400</td>\n",
" <td>148.650</td>\n",
" <td>113.819</td>\n",
" <td>0.00968</td>\n",
" <td>0.00008</td>\n",
" <td>0.00465</td>\n",
" <td>0.00696</td>\n",
" <td>0.01394</td>\n",
" <td>0.06134</td>\n",
" <td>...</td>\n",
" <td>0.09403</td>\n",
" <td>0.01929</td>\n",
" <td>19.085</td>\n",
" <td>1</td>\n",
" <td>0.458359</td>\n",
" <td>0.819521</td>\n",
" <td>-4.075192</td>\n",
" <td>0.335590</td>\n",
" <td>2.486855</td>\n",
" <td>0.368674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>phon_R01_S01_3</td>\n",
" <td>116.682</td>\n",
" <td>131.111</td>\n",
" <td>111.555</td>\n",
" <td>0.01050</td>\n",
" <td>0.00009</td>\n",
" <td>0.00544</td>\n",
" <td>0.00781</td>\n",
" <td>0.01633</td>\n",
" <td>0.05233</td>\n",
" <td>...</td>\n",
" <td>0.08270</td>\n",
" <td>0.01309</td>\n",
" <td>20.651</td>\n",
" <td>1</td>\n",
" <td>0.429895</td>\n",
" <td>0.825288</td>\n",
" <td>-4.443179</td>\n",
" <td>0.311173</td>\n",
" <td>2.342259</td>\n",
" <td>0.332634</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>phon_R01_S01_4</td>\n",
" <td>116.676</td>\n",
" <td>137.871</td>\n",
" <td>111.366</td>\n",
" <td>0.00997</td>\n",
" <td>0.00009</td>\n",
" <td>0.00502</td>\n",
" <td>0.00698</td>\n",
" <td>0.01505</td>\n",
" <td>0.05492</td>\n",
" <td>...</td>\n",
" <td>0.08771</td>\n",
" <td>0.01353</td>\n",
" <td>20.644</td>\n",
" <td>1</td>\n",
" <td>0.434969</td>\n",
" <td>0.819235</td>\n",
" <td>-4.117501</td>\n",
" <td>0.334147</td>\n",
" <td>2.405554</td>\n",
" <td>0.368975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>phon_R01_S01_5</td>\n",
" <td>116.014</td>\n",
" <td>141.781</td>\n",
" <td>110.655</td>\n",
" <td>0.01284</td>\n",
" <td>0.00011</td>\n",
" <td>0.00655</td>\n",
" <td>0.00908</td>\n",
" <td>0.01966</td>\n",
" <td>0.06425</td>\n",
" <td>...</td>\n",
" <td>0.10470</td>\n",
" <td>0.01767</td>\n",
" <td>19.649</td>\n",
" <td>1</td>\n",
" <td>0.417356</td>\n",
" <td>0.823484</td>\n",
" <td>-3.747787</td>\n",
" <td>0.234513</td>\n",
" <td>2.332180</td>\n",
" <td>0.410335</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) \\\n",
"0 phon_R01_S01_1 119.992 157.302 74.997 0.00784 \n",
"1 phon_R01_S01_2 122.400 148.650 113.819 0.00968 \n",
"2 phon_R01_S01_3 116.682 131.111 111.555 0.01050 \n",
"3 phon_R01_S01_4 116.676 137.871 111.366 0.00997 \n",
"4 phon_R01_S01_5 116.014 141.781 110.655 0.01284 \n",
"\n",
" MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... \\\n",
"0 0.00007 0.00370 0.00554 0.01109 0.04374 ... \n",
"1 0.00008 0.00465 0.00696 0.01394 0.06134 ... \n",
"2 0.00009 0.00544 0.00781 0.01633 0.05233 ... \n",
"3 0.00009 0.00502 0.00698 0.01505 0.05492 ... \n",
"4 0.00011 0.00655 0.00908 0.01966 0.06425 ... \n",
"\n",
" Shimmer:DDA NHR HNR status RPDE DFA spread1 \\\n",
"0 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 \n",
"1 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 \n",
"2 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 \n",
"3 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 \n",
"4 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 \n",
"\n",
" spread2 D2 PPE \n",
"0 0.266482 2.301442 0.284654 \n",
"1 0.335590 2.486855 0.368674 \n",
"2 0.311173 2.342259 0.332634 \n",
"3 0.334147 2.405554 0.368975 \n",
"4 0.234513 2.332180 0.410335 \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df=pd.read_csv('./parkinsons.data')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#DataFlair - Get the features and labels\n",
"features=df.loc[:,df.columns!='status'].values[:,1:]\n",
"labels=df.loc[:,'status'].values"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"147 48\n"
]
}
],
"source": [
"#DataFlair - Get the count of each label (0 and 1) in labels\n",
"print(labels[labels==1].shape[0], labels[labels==0].shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#DataFlair - Scale the features to between -1 and 1\n",
"scaler=MinMaxScaler((-1,1))\n",
"x=scaler.fit_transform(features)\n",
"y=labels"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"#DataFlair - Split the dataset\n",
"x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2, random_state=7)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=None, max_bin=None,\n",
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=None, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
" predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=None, max_bin=None,\n",
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=None, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
" predictor=None, random_state=None, ...)</pre></div></div></div></div></div>"
],
"text/plain": [
"XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=None, max_bin=None,\n",
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=None, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
" predictor=None, random_state=None, ...)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#DataFlair - Train the model\n",
"model=XGBClassifier()\n",
"model.fit(x_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"94.87179487179486\n"
]
}
],
"source": [
"# DataFlair - Calculate the accuracy\n",
"y_pred=model.predict(x_test)\n",
"print(accuracy_score(y_test, y_pred)*100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}