{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Requirement already satisfied: numpy in c:\\software\\python3\\lib\\site-packages (1.24.2)\n",
      "Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.5.3)\n",
      "Requirement already satisfied: sklearn in \\\\files\\students\\s478831\\.appdata\\python\\python310\\site-packages (0.0.post4)\n",
      "Requirement already satisfied: xgboost in \\\\files\\students\\s478831\\.appdata\\python\\python310\\site-packages (1.7.5)\n",
      "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n",
      "Requirement already satisfied: scipy in c:\\software\\python3\\lib\\site-packages (from xgboost) (1.10.1)\n",
      "Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install numpy pandas sklearn xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os, sys\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from xgboost import XGBClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>MDVP:Fo(Hz)</th>\n",
       "      <th>MDVP:Fhi(Hz)</th>\n",
       "      <th>MDVP:Flo(Hz)</th>\n",
       "      <th>MDVP:Jitter(%)</th>\n",
       "      <th>MDVP:Jitter(Abs)</th>\n",
       "      <th>MDVP:RAP</th>\n",
       "      <th>MDVP:PPQ</th>\n",
       "      <th>Jitter:DDP</th>\n",
       "      <th>MDVP:Shimmer</th>\n",
       "      <th>...</th>\n",
       "      <th>Shimmer:DDA</th>\n",
       "      <th>NHR</th>\n",
       "      <th>HNR</th>\n",
       "      <th>status</th>\n",
       "      <th>RPDE</th>\n",
       "      <th>DFA</th>\n",
       "      <th>spread1</th>\n",
       "      <th>spread2</th>\n",
       "      <th>D2</th>\n",
       "      <th>PPE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>phon_R01_S01_1</td>\n",
       "      <td>119.992</td>\n",
       "      <td>157.302</td>\n",
       "      <td>74.997</td>\n",
       "      <td>0.00784</td>\n",
       "      <td>0.00007</td>\n",
       "      <td>0.00370</td>\n",
       "      <td>0.00554</td>\n",
       "      <td>0.01109</td>\n",
       "      <td>0.04374</td>\n",
       "      <td>...</td>\n",
       "      <td>0.06545</td>\n",
       "      <td>0.02211</td>\n",
       "      <td>21.033</td>\n",
       "      <td>1</td>\n",
       "      <td>0.414783</td>\n",
       "      <td>0.815285</td>\n",
       "      <td>-4.813031</td>\n",
       "      <td>0.266482</td>\n",
       "      <td>2.301442</td>\n",
       "      <td>0.284654</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>phon_R01_S01_2</td>\n",
       "      <td>122.400</td>\n",
       "      <td>148.650</td>\n",
       "      <td>113.819</td>\n",
       "      <td>0.00968</td>\n",
       "      <td>0.00008</td>\n",
       "      <td>0.00465</td>\n",
       "      <td>0.00696</td>\n",
       "      <td>0.01394</td>\n",
       "      <td>0.06134</td>\n",
       "      <td>...</td>\n",
       "      <td>0.09403</td>\n",
       "      <td>0.01929</td>\n",
       "      <td>19.085</td>\n",
       "      <td>1</td>\n",
       "      <td>0.458359</td>\n",
       "      <td>0.819521</td>\n",
       "      <td>-4.075192</td>\n",
       "      <td>0.335590</td>\n",
       "      <td>2.486855</td>\n",
       "      <td>0.368674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>phon_R01_S01_3</td>\n",
       "      <td>116.682</td>\n",
       "      <td>131.111</td>\n",
       "      <td>111.555</td>\n",
       "      <td>0.01050</td>\n",
       "      <td>0.00009</td>\n",
       "      <td>0.00544</td>\n",
       "      <td>0.00781</td>\n",
       "      <td>0.01633</td>\n",
       "      <td>0.05233</td>\n",
       "      <td>...</td>\n",
       "      <td>0.08270</td>\n",
       "      <td>0.01309</td>\n",
       "      <td>20.651</td>\n",
       "      <td>1</td>\n",
       "      <td>0.429895</td>\n",
       "      <td>0.825288</td>\n",
       "      <td>-4.443179</td>\n",
       "      <td>0.311173</td>\n",
       "      <td>2.342259</td>\n",
       "      <td>0.332634</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>phon_R01_S01_4</td>\n",
       "      <td>116.676</td>\n",
       "      <td>137.871</td>\n",
       "      <td>111.366</td>\n",
       "      <td>0.00997</td>\n",
       "      <td>0.00009</td>\n",
       "      <td>0.00502</td>\n",
       "      <td>0.00698</td>\n",
       "      <td>0.01505</td>\n",
       "      <td>0.05492</td>\n",
       "      <td>...</td>\n",
       "      <td>0.08771</td>\n",
       "      <td>0.01353</td>\n",
       "      <td>20.644</td>\n",
       "      <td>1</td>\n",
       "      <td>0.434969</td>\n",
       "      <td>0.819235</td>\n",
       "      <td>-4.117501</td>\n",
       "      <td>0.334147</td>\n",
       "      <td>2.405554</td>\n",
       "      <td>0.368975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>phon_R01_S01_5</td>\n",
       "      <td>116.014</td>\n",
       "      <td>141.781</td>\n",
       "      <td>110.655</td>\n",
       "      <td>0.01284</td>\n",
       "      <td>0.00011</td>\n",
       "      <td>0.00655</td>\n",
       "      <td>0.00908</td>\n",
       "      <td>0.01966</td>\n",
       "      <td>0.06425</td>\n",
       "      <td>...</td>\n",
       "      <td>0.10470</td>\n",
       "      <td>0.01767</td>\n",
       "      <td>19.649</td>\n",
       "      <td>1</td>\n",
       "      <td>0.417356</td>\n",
       "      <td>0.823484</td>\n",
       "      <td>-3.747787</td>\n",
       "      <td>0.234513</td>\n",
       "      <td>2.332180</td>\n",
       "      <td>0.410335</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \\\n",
       "0  phon_R01_S01_1      119.992       157.302        74.997         0.00784   \n",
       "1  phon_R01_S01_2      122.400       148.650       113.819         0.00968   \n",
       "2  phon_R01_S01_3      116.682       131.111       111.555         0.01050   \n",
       "3  phon_R01_S01_4      116.676       137.871       111.366         0.00997   \n",
       "4  phon_R01_S01_5      116.014       141.781       110.655         0.01284   \n",
       "\n",
       "   MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  ...  \\\n",
       "0           0.00007   0.00370   0.00554     0.01109       0.04374  ...   \n",
       "1           0.00008   0.00465   0.00696     0.01394       0.06134  ...   \n",
       "2           0.00009   0.00544   0.00781     0.01633       0.05233  ...   \n",
       "3           0.00009   0.00502   0.00698     0.01505       0.05492  ...   \n",
       "4           0.00011   0.00655   0.00908     0.01966       0.06425  ...   \n",
       "\n",
       "   Shimmer:DDA      NHR     HNR  status      RPDE       DFA   spread1  \\\n",
       "0      0.06545  0.02211  21.033       1  0.414783  0.815285 -4.813031   \n",
       "1      0.09403  0.01929  19.085       1  0.458359  0.819521 -4.075192   \n",
       "2      0.08270  0.01309  20.651       1  0.429895  0.825288 -4.443179   \n",
       "3      0.08771  0.01353  20.644       1  0.434969  0.819235 -4.117501   \n",
       "4      0.10470  0.01767  19.649       1  0.417356  0.823484 -3.747787   \n",
       "\n",
       "    spread2        D2       PPE  \n",
       "0  0.266482  2.301442  0.284654  \n",
       "1  0.335590  2.486855  0.368674  \n",
       "2  0.311173  2.342259  0.332634  \n",
       "3  0.334147  2.405554  0.368975  \n",
       "4  0.234513  2.332180  0.410335  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.read_csv('./parkinsons.data')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#DataFlair - Get the features and labels\n",
    "features=df.loc[:,df.columns!='status'].values[:,1:]\n",
    "labels=df.loc[:,'status'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "147 48\n"
     ]
    }
   ],
   "source": [
    "#DataFlair - Get the count of each label (0 and 1) in labels\n",
    "print(labels[labels==1].shape[0], labels[labels==0].shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#DataFlair - Scale the features to between -1 and 1\n",
    "scaler=MinMaxScaler((-1,1))\n",
    "x=scaler.fit_transform(features)\n",
    "y=labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#DataFlair - Split the dataset\n",
    "x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2, random_state=7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
       "              colsample_bylevel=None, colsample_bynode=None,\n",
       "              colsample_bytree=None, early_stopping_rounds=None,\n",
       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
       "              predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
       "              colsample_bylevel=None, colsample_bynode=None,\n",
       "              colsample_bytree=None, early_stopping_rounds=None,\n",
       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
       "              predictor=None, random_state=None, ...)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
       "              colsample_bylevel=None, colsample_bynode=None,\n",
       "              colsample_bytree=None, early_stopping_rounds=None,\n",
       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
       "              predictor=None, random_state=None, ...)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#DataFlair - Train the model\n",
    "model=XGBClassifier()\n",
    "model.fit(x_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "94.87179487179486\n"
     ]
    }
   ],
   "source": [
    "# DataFlair - Calculate the accuracy\n",
    "y_pred=model.predict(x_test)\n",
    "print(accuracy_score(y_test, y_pred)*100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}