{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Requirement already satisfied: numpy in c:\\software\\python3\\lib\\site-packages (1.24.2)\n", "Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.5.3)\n", "Requirement already satisfied: sklearn in \\\\files\\students\\s478831\\.appdata\\python\\python310\\site-packages (0.0.post4)\n", "Requirement already satisfied: xgboost in \\\\files\\students\\s478831\\.appdata\\python\\python310\\site-packages (1.7.5)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n", "Requirement already satisfied: scipy in c:\\software\\python3\\lib\\site-packages (from xgboost) (1.10.1)\n", "Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" ] } ], "source": [ "!pip install numpy pandas sklearn xgboost" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import os, sys\n", "from sklearn.preprocessing import MinMaxScaler\n", "from xgboost import XGBClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameMDVP:Fo(Hz)MDVP:Fhi(Hz)MDVP:Flo(Hz)MDVP:Jitter(%)MDVP:Jitter(Abs)MDVP:RAPMDVP:PPQJitter:DDPMDVP:Shimmer...Shimmer:DDANHRHNRstatusRPDEDFAspread1spread2D2PPE
0phon_R01_S01_1119.992157.30274.9970.007840.000070.003700.005540.011090.04374...0.065450.0221121.03310.4147830.815285-4.8130310.2664822.3014420.284654
1phon_R01_S01_2122.400148.650113.8190.009680.000080.004650.006960.013940.06134...0.094030.0192919.08510.4583590.819521-4.0751920.3355902.4868550.368674
2phon_R01_S01_3116.682131.111111.5550.010500.000090.005440.007810.016330.05233...0.082700.0130920.65110.4298950.825288-4.4431790.3111732.3422590.332634
3phon_R01_S01_4116.676137.871111.3660.009970.000090.005020.006980.015050.05492...0.087710.0135320.64410.4349690.819235-4.1175010.3341472.4055540.368975
4phon_R01_S01_5116.014141.781110.6550.012840.000110.006550.009080.019660.06425...0.104700.0176719.64910.4173560.823484-3.7477870.2345132.3321800.410335
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) \\\n", "0 phon_R01_S01_1 119.992 157.302 74.997 0.00784 \n", "1 phon_R01_S01_2 122.400 148.650 113.819 0.00968 \n", "2 phon_R01_S01_3 116.682 131.111 111.555 0.01050 \n", "3 phon_R01_S01_4 116.676 137.871 111.366 0.00997 \n", "4 phon_R01_S01_5 116.014 141.781 110.655 0.01284 \n", "\n", " MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... \\\n", "0 0.00007 0.00370 0.00554 0.01109 0.04374 ... \n", "1 0.00008 0.00465 0.00696 0.01394 0.06134 ... \n", "2 0.00009 0.00544 0.00781 0.01633 0.05233 ... \n", "3 0.00009 0.00502 0.00698 0.01505 0.05492 ... \n", "4 0.00011 0.00655 0.00908 0.01966 0.06425 ... \n", "\n", " Shimmer:DDA NHR HNR status RPDE DFA spread1 \\\n", "0 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 \n", "1 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 \n", "2 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 \n", "3 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 \n", "4 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 \n", "\n", " spread2 D2 PPE \n", "0 0.266482 2.301442 0.284654 \n", "1 0.335590 2.486855 0.368674 \n", "2 0.311173 2.342259 0.332634 \n", "3 0.334147 2.405554 0.368975 \n", "4 0.234513 2.332180 0.410335 \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.read_csv('./parkinsons.data')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#DataFlair - Get the features and labels\n", "features=df.loc[:,df.columns!='status'].values[:,1:]\n", "labels=df.loc[:,'status'].values" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "147 48\n" ] } ], "source": [ "#DataFlair - Get the count of each label (0 and 1) in labels\n", "print(labels[labels==1].shape[0], labels[labels==0].shape[0])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#DataFlair - Scale the features to between -1 and 1\n", "scaler=MinMaxScaler((-1,1))\n", "x=scaler.fit_transform(features)\n", "y=labels" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "#DataFlair - Split the dataset\n", "x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2, random_state=7)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
       "              colsample_bylevel=None, colsample_bynode=None,\n",
       "              colsample_bytree=None, early_stopping_rounds=None,\n",
       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
       "              predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", " predictor=None, random_state=None, ...)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#DataFlair - Train the model\n", "model=XGBClassifier()\n", "model.fit(x_train,y_train)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "94.87179487179486\n" ] } ], "source": [ "# DataFlair - Calculate the accuracy\n", "y_pred=model.predict(x_test)\n", "print(accuracy_score(y_test, y_pred)*100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }