3338 lines
126 KiB
Plaintext
3338 lines
126 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## IUM 2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Installation of packages"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 86,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Requirement already satisfied: kaggle in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.6.6)\n",
|
|
"Requirement already satisfied: six>=1.10 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (1.16.0)\n",
|
|
"Requirement already satisfied: certifi in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2024.2.2)\n",
|
|
"Requirement already satisfied: python-dateutil in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (2.9.0.post0)\n",
|
|
"Requirement already satisfied: requests in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.31.0)\n",
|
|
"Requirement already satisfied: tqdm in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (4.66.2)\n",
|
|
"Requirement already satisfied: python-slugify in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (8.0.4)\n",
|
|
"Requirement already satisfied: urllib3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.2.1)\n",
|
|
"Requirement already satisfied: bleach in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (6.1.0)\n",
|
|
"Requirement already satisfied: webencodings in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n",
|
|
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
|
|
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.3.2)\n",
|
|
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.6)\n",
|
|
"Requirement already satisfied: colorama in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from tqdm->kaggle) (0.4.6)\n",
|
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
"Requirement already satisfied: pandas in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.2.1)\n",
|
|
"Requirement already satisfied: numpy<2,>=1.26.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (1.26.3)\n",
|
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from pandas) (2.9.0.post0)\n",
|
|
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
|
|
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
|
|
"Requirement already satisfied: six>=1.5 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
|
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
"Requirement already satisfied: numpy in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.26.3)\n",
|
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
"Requirement already satisfied: scikit-learn in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.4.1.post1)\n",
|
|
"Requirement already satisfied: numpy<2.0,>=1.19.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.26.3)\n",
|
|
"Requirement already satisfied: scipy>=1.6.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.12.0)\n",
|
|
"Requirement already satisfied: joblib>=1.2.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.3.2)\n",
|
|
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (3.3.0)\n",
|
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%pip install kaggle\n",
|
|
"%pip install pandas\n",
|
|
"%pip install numpy\n",
|
|
"%pip install scikit-learn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Importing libraries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 87,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"# To preprocess the data\n",
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
"\n",
|
|
"# To split the data\n",
|
|
"from sklearn.model_selection import train_test_split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Downloading a dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 88,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"creditcardfraud.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!kaggle datasets download -d mlg-ulb/creditcardfraud"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Uncompress a file"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 89,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Archive: creditcardfraud.zip\n",
|
|
" inflating: creditcard.csv \n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!unzip -o creditcardfraud.zip"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Load the data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 90,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv('creditcard.csv')\n",
|
|
"pd.set_option('display.max_columns', None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Check missing values"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 91,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Time 0\n",
|
|
"V1 0\n",
|
|
"V2 0\n",
|
|
"V3 0\n",
|
|
"V4 0\n",
|
|
"V5 0\n",
|
|
"V6 0\n",
|
|
"V7 0\n",
|
|
"V8 0\n",
|
|
"V9 0\n",
|
|
"V10 0\n",
|
|
"V11 0\n",
|
|
"V12 0\n",
|
|
"V13 0\n",
|
|
"V14 0\n",
|
|
"V15 0\n",
|
|
"V16 0\n",
|
|
"V17 0\n",
|
|
"V18 0\n",
|
|
"V19 0\n",
|
|
"V20 0\n",
|
|
"V21 0\n",
|
|
"V22 0\n",
|
|
"V23 0\n",
|
|
"V24 0\n",
|
|
"V25 0\n",
|
|
"V26 0\n",
|
|
"V27 0\n",
|
|
"V28 0\n",
|
|
"Amount 0\n",
|
|
"Class 0\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 91,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df.isnull().sum()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Size of the dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 92,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"RangeIndex: 284807 entries, 0 to 284806\n",
|
|
"Data columns (total 31 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 Time 284807 non-null float64\n",
|
|
" 1 V1 284807 non-null float64\n",
|
|
" 2 V2 284807 non-null float64\n",
|
|
" 3 V3 284807 non-null float64\n",
|
|
" 4 V4 284807 non-null float64\n",
|
|
" 5 V5 284807 non-null float64\n",
|
|
" 6 V6 284807 non-null float64\n",
|
|
" 7 V7 284807 non-null float64\n",
|
|
" 8 V8 284807 non-null float64\n",
|
|
" 9 V9 284807 non-null float64\n",
|
|
" 10 V10 284807 non-null float64\n",
|
|
" 11 V11 284807 non-null float64\n",
|
|
" 12 V12 284807 non-null float64\n",
|
|
" 13 V13 284807 non-null float64\n",
|
|
" 14 V14 284807 non-null float64\n",
|
|
" 15 V15 284807 non-null float64\n",
|
|
" 16 V16 284807 non-null float64\n",
|
|
" 17 V17 284807 non-null float64\n",
|
|
" 18 V18 284807 non-null float64\n",
|
|
" 19 V19 284807 non-null float64\n",
|
|
" 20 V20 284807 non-null float64\n",
|
|
" 21 V21 284807 non-null float64\n",
|
|
" 22 V22 284807 non-null float64\n",
|
|
" 23 V23 284807 non-null float64\n",
|
|
" 24 V24 284807 non-null float64\n",
|
|
" 25 V25 284807 non-null float64\n",
|
|
" 26 V26 284807 non-null float64\n",
|
|
" 27 V27 284807 non-null float64\n",
|
|
" 28 V28 284807 non-null float64\n",
|
|
" 29 Amount 284807 non-null float64\n",
|
|
" 30 Class 284807 non-null int64 \n",
|
|
"dtypes: float64(30), int64(1)\n",
|
|
"memory usage: 67.4 MB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Normalising the data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 93,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"scaler = StandardScaler()\n",
|
|
"\n",
|
|
"df['Amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Summary statistics"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 94,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Time</th>\n",
|
|
" <th>V1</th>\n",
|
|
" <th>V2</th>\n",
|
|
" <th>V3</th>\n",
|
|
" <th>V4</th>\n",
|
|
" <th>V5</th>\n",
|
|
" <th>V6</th>\n",
|
|
" <th>V7</th>\n",
|
|
" <th>V8</th>\n",
|
|
" <th>V9</th>\n",
|
|
" <th>V10</th>\n",
|
|
" <th>V11</th>\n",
|
|
" <th>V12</th>\n",
|
|
" <th>V13</th>\n",
|
|
" <th>V14</th>\n",
|
|
" <th>V15</th>\n",
|
|
" <th>V16</th>\n",
|
|
" <th>V17</th>\n",
|
|
" <th>V18</th>\n",
|
|
" <th>V19</th>\n",
|
|
" <th>V20</th>\n",
|
|
" <th>V21</th>\n",
|
|
" <th>V22</th>\n",
|
|
" <th>V23</th>\n",
|
|
" <th>V24</th>\n",
|
|
" <th>V25</th>\n",
|
|
" <th>V26</th>\n",
|
|
" <th>V27</th>\n",
|
|
" <th>V28</th>\n",
|
|
" <th>Amount</th>\n",
|
|
" <th>Class</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>284807.000000</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>2.848070e+05</td>\n",
|
|
" <td>284807.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>94813.859575</td>\n",
|
|
" <td>1.168375e-15</td>\n",
|
|
" <td>3.416908e-16</td>\n",
|
|
" <td>-1.379537e-15</td>\n",
|
|
" <td>2.074095e-15</td>\n",
|
|
" <td>9.604066e-16</td>\n",
|
|
" <td>1.487313e-15</td>\n",
|
|
" <td>-5.556467e-16</td>\n",
|
|
" <td>1.213481e-16</td>\n",
|
|
" <td>-2.406331e-15</td>\n",
|
|
" <td>2.239053e-15</td>\n",
|
|
" <td>1.673327e-15</td>\n",
|
|
" <td>-1.247012e-15</td>\n",
|
|
" <td>8.190001e-16</td>\n",
|
|
" <td>1.207294e-15</td>\n",
|
|
" <td>4.887456e-15</td>\n",
|
|
" <td>1.437716e-15</td>\n",
|
|
" <td>-3.772171e-16</td>\n",
|
|
" <td>9.564149e-16</td>\n",
|
|
" <td>1.039917e-15</td>\n",
|
|
" <td>6.406204e-16</td>\n",
|
|
" <td>1.654067e-16</td>\n",
|
|
" <td>-3.568593e-16</td>\n",
|
|
" <td>2.578648e-16</td>\n",
|
|
" <td>4.473266e-15</td>\n",
|
|
" <td>5.340915e-16</td>\n",
|
|
" <td>1.683437e-15</td>\n",
|
|
" <td>-3.660091e-16</td>\n",
|
|
" <td>-1.227390e-16</td>\n",
|
|
" <td>2.913952e-17</td>\n",
|
|
" <td>0.001727</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>47488.145955</td>\n",
|
|
" <td>1.958696e+00</td>\n",
|
|
" <td>1.651309e+00</td>\n",
|
|
" <td>1.516255e+00</td>\n",
|
|
" <td>1.415869e+00</td>\n",
|
|
" <td>1.380247e+00</td>\n",
|
|
" <td>1.332271e+00</td>\n",
|
|
" <td>1.237094e+00</td>\n",
|
|
" <td>1.194353e+00</td>\n",
|
|
" <td>1.098632e+00</td>\n",
|
|
" <td>1.088850e+00</td>\n",
|
|
" <td>1.020713e+00</td>\n",
|
|
" <td>9.992014e-01</td>\n",
|
|
" <td>9.952742e-01</td>\n",
|
|
" <td>9.585956e-01</td>\n",
|
|
" <td>9.153160e-01</td>\n",
|
|
" <td>8.762529e-01</td>\n",
|
|
" <td>8.493371e-01</td>\n",
|
|
" <td>8.381762e-01</td>\n",
|
|
" <td>8.140405e-01</td>\n",
|
|
" <td>7.709250e-01</td>\n",
|
|
" <td>7.345240e-01</td>\n",
|
|
" <td>7.257016e-01</td>\n",
|
|
" <td>6.244603e-01</td>\n",
|
|
" <td>6.056471e-01</td>\n",
|
|
" <td>5.212781e-01</td>\n",
|
|
" <td>4.822270e-01</td>\n",
|
|
" <td>4.036325e-01</td>\n",
|
|
" <td>3.300833e-01</td>\n",
|
|
" <td>1.000002e+00</td>\n",
|
|
" <td>0.041527</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>0.000000</td>\n",
|
|
" <td>-5.640751e+01</td>\n",
|
|
" <td>-7.271573e+01</td>\n",
|
|
" <td>-4.832559e+01</td>\n",
|
|
" <td>-5.683171e+00</td>\n",
|
|
" <td>-1.137433e+02</td>\n",
|
|
" <td>-2.616051e+01</td>\n",
|
|
" <td>-4.355724e+01</td>\n",
|
|
" <td>-7.321672e+01</td>\n",
|
|
" <td>-1.343407e+01</td>\n",
|
|
" <td>-2.458826e+01</td>\n",
|
|
" <td>-4.797473e+00</td>\n",
|
|
" <td>-1.868371e+01</td>\n",
|
|
" <td>-5.791881e+00</td>\n",
|
|
" <td>-1.921433e+01</td>\n",
|
|
" <td>-4.498945e+00</td>\n",
|
|
" <td>-1.412985e+01</td>\n",
|
|
" <td>-2.516280e+01</td>\n",
|
|
" <td>-9.498746e+00</td>\n",
|
|
" <td>-7.213527e+00</td>\n",
|
|
" <td>-5.449772e+01</td>\n",
|
|
" <td>-3.483038e+01</td>\n",
|
|
" <td>-1.093314e+01</td>\n",
|
|
" <td>-4.480774e+01</td>\n",
|
|
" <td>-2.836627e+00</td>\n",
|
|
" <td>-1.029540e+01</td>\n",
|
|
" <td>-2.604551e+00</td>\n",
|
|
" <td>-2.256568e+01</td>\n",
|
|
" <td>-1.543008e+01</td>\n",
|
|
" <td>-3.532294e-01</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>54201.500000</td>\n",
|
|
" <td>-9.203734e-01</td>\n",
|
|
" <td>-5.985499e-01</td>\n",
|
|
" <td>-8.903648e-01</td>\n",
|
|
" <td>-8.486401e-01</td>\n",
|
|
" <td>-6.915971e-01</td>\n",
|
|
" <td>-7.682956e-01</td>\n",
|
|
" <td>-5.540759e-01</td>\n",
|
|
" <td>-2.086297e-01</td>\n",
|
|
" <td>-6.430976e-01</td>\n",
|
|
" <td>-5.354257e-01</td>\n",
|
|
" <td>-7.624942e-01</td>\n",
|
|
" <td>-4.055715e-01</td>\n",
|
|
" <td>-6.485393e-01</td>\n",
|
|
" <td>-4.255740e-01</td>\n",
|
|
" <td>-5.828843e-01</td>\n",
|
|
" <td>-4.680368e-01</td>\n",
|
|
" <td>-4.837483e-01</td>\n",
|
|
" <td>-4.988498e-01</td>\n",
|
|
" <td>-4.562989e-01</td>\n",
|
|
" <td>-2.117214e-01</td>\n",
|
|
" <td>-2.283949e-01</td>\n",
|
|
" <td>-5.423504e-01</td>\n",
|
|
" <td>-1.618463e-01</td>\n",
|
|
" <td>-3.545861e-01</td>\n",
|
|
" <td>-3.171451e-01</td>\n",
|
|
" <td>-3.269839e-01</td>\n",
|
|
" <td>-7.083953e-02</td>\n",
|
|
" <td>-5.295979e-02</td>\n",
|
|
" <td>-3.308401e-01</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>84692.000000</td>\n",
|
|
" <td>1.810880e-02</td>\n",
|
|
" <td>6.548556e-02</td>\n",
|
|
" <td>1.798463e-01</td>\n",
|
|
" <td>-1.984653e-02</td>\n",
|
|
" <td>-5.433583e-02</td>\n",
|
|
" <td>-2.741871e-01</td>\n",
|
|
" <td>4.010308e-02</td>\n",
|
|
" <td>2.235804e-02</td>\n",
|
|
" <td>-5.142873e-02</td>\n",
|
|
" <td>-9.291738e-02</td>\n",
|
|
" <td>-3.275735e-02</td>\n",
|
|
" <td>1.400326e-01</td>\n",
|
|
" <td>-1.356806e-02</td>\n",
|
|
" <td>5.060132e-02</td>\n",
|
|
" <td>4.807155e-02</td>\n",
|
|
" <td>6.641332e-02</td>\n",
|
|
" <td>-6.567575e-02</td>\n",
|
|
" <td>-3.636312e-03</td>\n",
|
|
" <td>3.734823e-03</td>\n",
|
|
" <td>-6.248109e-02</td>\n",
|
|
" <td>-2.945017e-02</td>\n",
|
|
" <td>6.781943e-03</td>\n",
|
|
" <td>-1.119293e-02</td>\n",
|
|
" <td>4.097606e-02</td>\n",
|
|
" <td>1.659350e-02</td>\n",
|
|
" <td>-5.213911e-02</td>\n",
|
|
" <td>1.342146e-03</td>\n",
|
|
" <td>1.124383e-02</td>\n",
|
|
" <td>-2.652715e-01</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>139320.500000</td>\n",
|
|
" <td>1.315642e+00</td>\n",
|
|
" <td>8.037239e-01</td>\n",
|
|
" <td>1.027196e+00</td>\n",
|
|
" <td>7.433413e-01</td>\n",
|
|
" <td>6.119264e-01</td>\n",
|
|
" <td>3.985649e-01</td>\n",
|
|
" <td>5.704361e-01</td>\n",
|
|
" <td>3.273459e-01</td>\n",
|
|
" <td>5.971390e-01</td>\n",
|
|
" <td>4.539234e-01</td>\n",
|
|
" <td>7.395934e-01</td>\n",
|
|
" <td>6.182380e-01</td>\n",
|
|
" <td>6.625050e-01</td>\n",
|
|
" <td>4.931498e-01</td>\n",
|
|
" <td>6.488208e-01</td>\n",
|
|
" <td>5.232963e-01</td>\n",
|
|
" <td>3.996750e-01</td>\n",
|
|
" <td>5.008067e-01</td>\n",
|
|
" <td>4.589494e-01</td>\n",
|
|
" <td>1.330408e-01</td>\n",
|
|
" <td>1.863772e-01</td>\n",
|
|
" <td>5.285536e-01</td>\n",
|
|
" <td>1.476421e-01</td>\n",
|
|
" <td>4.395266e-01</td>\n",
|
|
" <td>3.507156e-01</td>\n",
|
|
" <td>2.409522e-01</td>\n",
|
|
" <td>9.104512e-02</td>\n",
|
|
" <td>7.827995e-02</td>\n",
|
|
" <td>-4.471707e-02</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>172792.000000</td>\n",
|
|
" <td>2.454930e+00</td>\n",
|
|
" <td>2.205773e+01</td>\n",
|
|
" <td>9.382558e+00</td>\n",
|
|
" <td>1.687534e+01</td>\n",
|
|
" <td>3.480167e+01</td>\n",
|
|
" <td>7.330163e+01</td>\n",
|
|
" <td>1.205895e+02</td>\n",
|
|
" <td>2.000721e+01</td>\n",
|
|
" <td>1.559499e+01</td>\n",
|
|
" <td>2.374514e+01</td>\n",
|
|
" <td>1.201891e+01</td>\n",
|
|
" <td>7.848392e+00</td>\n",
|
|
" <td>7.126883e+00</td>\n",
|
|
" <td>1.052677e+01</td>\n",
|
|
" <td>8.877742e+00</td>\n",
|
|
" <td>1.731511e+01</td>\n",
|
|
" <td>9.253526e+00</td>\n",
|
|
" <td>5.041069e+00</td>\n",
|
|
" <td>5.591971e+00</td>\n",
|
|
" <td>3.942090e+01</td>\n",
|
|
" <td>2.720284e+01</td>\n",
|
|
" <td>1.050309e+01</td>\n",
|
|
" <td>2.252841e+01</td>\n",
|
|
" <td>4.584549e+00</td>\n",
|
|
" <td>7.519589e+00</td>\n",
|
|
" <td>3.517346e+00</td>\n",
|
|
" <td>3.161220e+01</td>\n",
|
|
" <td>3.384781e+01</td>\n",
|
|
" <td>1.023622e+02</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Time V1 V2 V3 V4 \\\n",
|
|
"count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
"mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n",
|
|
"std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n",
|
|
"min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n",
|
|
"25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n",
|
|
"50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n",
|
|
"75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n",
|
|
"max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n",
|
|
"\n",
|
|
" V5 V6 V7 V8 V9 \\\n",
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
"mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n",
|
|
"std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n",
|
|
"min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n",
|
|
"25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n",
|
|
"50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n",
|
|
"75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n",
|
|
"max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n",
|
|
"\n",
|
|
" V10 V11 V12 V13 V14 \\\n",
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
"mean 2.239053e-15 1.673327e-15 -1.247012e-15 8.190001e-16 1.207294e-15 \n",
|
|
"std 1.088850e+00 1.020713e+00 9.992014e-01 9.952742e-01 9.585956e-01 \n",
|
|
"min -2.458826e+01 -4.797473e+00 -1.868371e+01 -5.791881e+00 -1.921433e+01 \n",
|
|
"25% -5.354257e-01 -7.624942e-01 -4.055715e-01 -6.485393e-01 -4.255740e-01 \n",
|
|
"50% -9.291738e-02 -3.275735e-02 1.400326e-01 -1.356806e-02 5.060132e-02 \n",
|
|
"75% 4.539234e-01 7.395934e-01 6.182380e-01 6.625050e-01 4.931498e-01 \n",
|
|
"max 2.374514e+01 1.201891e+01 7.848392e+00 7.126883e+00 1.052677e+01 \n",
|
|
"\n",
|
|
" V15 V16 V17 V18 V19 \\\n",
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
"mean 4.887456e-15 1.437716e-15 -3.772171e-16 9.564149e-16 1.039917e-15 \n",
|
|
"std 9.153160e-01 8.762529e-01 8.493371e-01 8.381762e-01 8.140405e-01 \n",
|
|
"min -4.498945e+00 -1.412985e+01 -2.516280e+01 -9.498746e+00 -7.213527e+00 \n",
|
|
"25% -5.828843e-01 -4.680368e-01 -4.837483e-01 -4.988498e-01 -4.562989e-01 \n",
|
|
"50% 4.807155e-02 6.641332e-02 -6.567575e-02 -3.636312e-03 3.734823e-03 \n",
|
|
"75% 6.488208e-01 5.232963e-01 3.996750e-01 5.008067e-01 4.589494e-01 \n",
|
|
"max 8.877742e+00 1.731511e+01 9.253526e+00 5.041069e+00 5.591971e+00 \n",
|
|
"\n",
|
|
" V20 V21 V22 V23 V24 \\\n",
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
"mean 6.406204e-16 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n",
|
|
"std 7.709250e-01 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n",
|
|
"min -5.449772e+01 -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n",
|
|
"25% -2.117214e-01 -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n",
|
|
"50% -6.248109e-02 -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n",
|
|
"75% 1.330408e-01 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n",
|
|
"max 3.942090e+01 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n",
|
|
"\n",
|
|
" V25 V26 V27 V28 Amount \\\n",
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
"mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 2.913952e-17 \n",
|
|
"std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 1.000002e+00 \n",
|
|
"min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 -3.532294e-01 \n",
|
|
"25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 -3.308401e-01 \n",
|
|
"50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 -2.652715e-01 \n",
|
|
"75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 -4.471707e-02 \n",
|
|
"max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 1.023622e+02 \n",
|
|
"\n",
|
|
" Class \n",
|
|
"count 284807.000000 \n",
|
|
"mean 0.001727 \n",
|
|
"std 0.041527 \n",
|
|
"min 0.000000 \n",
|
|
"25% 0.000000 \n",
|
|
"50% 0.000000 \n",
|
|
"75% 0.000000 \n",
|
|
"max 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 94,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df.describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Distribution of legitimate and fraudulent transactions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 95,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Class\n",
|
|
"0 284315\n",
|
|
"1 492\n",
|
|
"Name: count, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 95,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df['Class'].value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Undersampling the data\n",
|
|
"We will employ undersampling as one class significantly dominates the other."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 96,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Determine the number of instances in the minority class\n",
|
|
"fraud_count = len(df[df.Class == 1])\n",
|
|
"fraud_indices = np.array(df[df.Class == 1].index)\n",
|
|
"\n",
|
|
"# Select indices corresponding to majority class instances\n",
|
|
"normal_indices = df[df.Class == 0].index\n",
|
|
"\n",
|
|
"# Randomly sample the same number of instances from the majority class\n",
|
|
"random_normal_indices = np.random.choice(normal_indices, fraud_count, replace=False)\n",
|
|
"random_normal_indices = np.array(random_normal_indices)\n",
|
|
"\n",
|
|
"# Combine indices of both classes\n",
|
|
"undersample_indice = np.concatenate([fraud_indices, random_normal_indices])\n",
|
|
"\n",
|
|
"# Undersample dataset\n",
|
|
"undersample_data = df.iloc[undersample_indice, :]\n",
|
|
"\n",
|
|
"X_undersample = undersample_data.iloc[:, undersample_data.columns != 'Class']\n",
|
|
"y_undersample = undersample_data.iloc[:, undersample_data.columns == 'Class']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Size of undersampled dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 97,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"Index: 984 entries, 541 to 141412\n",
|
|
"Data columns (total 31 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 Time 984 non-null float64\n",
|
|
" 1 V1 984 non-null float64\n",
|
|
" 2 V2 984 non-null float64\n",
|
|
" 3 V3 984 non-null float64\n",
|
|
" 4 V4 984 non-null float64\n",
|
|
" 5 V5 984 non-null float64\n",
|
|
" 6 V6 984 non-null float64\n",
|
|
" 7 V7 984 non-null float64\n",
|
|
" 8 V8 984 non-null float64\n",
|
|
" 9 V9 984 non-null float64\n",
|
|
" 10 V10 984 non-null float64\n",
|
|
" 11 V11 984 non-null float64\n",
|
|
" 12 V12 984 non-null float64\n",
|
|
" 13 V13 984 non-null float64\n",
|
|
" 14 V14 984 non-null float64\n",
|
|
" 15 V15 984 non-null float64\n",
|
|
" 16 V16 984 non-null float64\n",
|
|
" 17 V17 984 non-null float64\n",
|
|
" 18 V18 984 non-null float64\n",
|
|
" 19 V19 984 non-null float64\n",
|
|
" 20 V20 984 non-null float64\n",
|
|
" 21 V21 984 non-null float64\n",
|
|
" 22 V22 984 non-null float64\n",
|
|
" 23 V23 984 non-null float64\n",
|
|
" 24 V24 984 non-null float64\n",
|
|
" 25 V25 984 non-null float64\n",
|
|
" 26 V26 984 non-null float64\n",
|
|
" 27 V27 984 non-null float64\n",
|
|
" 28 V28 984 non-null float64\n",
|
|
" 29 Amount 984 non-null float64\n",
|
|
" 30 Class 984 non-null int64 \n",
|
|
"dtypes: float64(30), int64(1)\n",
|
|
"memory usage: 246.0 KB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"undersample_data.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Summary statistics of the undersampled dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 98,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Time</th>\n",
|
|
" <th>V1</th>\n",
|
|
" <th>V2</th>\n",
|
|
" <th>V3</th>\n",
|
|
" <th>V4</th>\n",
|
|
" <th>V5</th>\n",
|
|
" <th>V6</th>\n",
|
|
" <th>V7</th>\n",
|
|
" <th>V8</th>\n",
|
|
" <th>V9</th>\n",
|
|
" <th>V10</th>\n",
|
|
" <th>V11</th>\n",
|
|
" <th>V12</th>\n",
|
|
" <th>V13</th>\n",
|
|
" <th>V14</th>\n",
|
|
" <th>V15</th>\n",
|
|
" <th>V16</th>\n",
|
|
" <th>V17</th>\n",
|
|
" <th>V18</th>\n",
|
|
" <th>V19</th>\n",
|
|
" <th>V20</th>\n",
|
|
" <th>V21</th>\n",
|
|
" <th>V22</th>\n",
|
|
" <th>V23</th>\n",
|
|
" <th>V24</th>\n",
|
|
" <th>V25</th>\n",
|
|
" <th>V26</th>\n",
|
|
" <th>V27</th>\n",
|
|
" <th>V28</th>\n",
|
|
" <th>Amount</th>\n",
|
|
" <th>Class</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" <td>984.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>88501.498984</td>\n",
|
|
" <td>-2.445079</td>\n",
|
|
" <td>1.781022</td>\n",
|
|
" <td>-3.509406</td>\n",
|
|
" <td>2.214004</td>\n",
|
|
" <td>-1.477993</td>\n",
|
|
" <td>-0.713150</td>\n",
|
|
" <td>-2.787427</td>\n",
|
|
" <td>0.279073</td>\n",
|
|
" <td>-1.253108</td>\n",
|
|
" <td>-2.841500</td>\n",
|
|
" <td>1.930697</td>\n",
|
|
" <td>-3.124120</td>\n",
|
|
" <td>-0.026229</td>\n",
|
|
" <td>-3.502384</td>\n",
|
|
" <td>-0.039494</td>\n",
|
|
" <td>-2.097294</td>\n",
|
|
" <td>-3.304208</td>\n",
|
|
" <td>-1.128950</td>\n",
|
|
" <td>0.343668</td>\n",
|
|
" <td>0.175905</td>\n",
|
|
" <td>0.331911</td>\n",
|
|
" <td>0.049631</td>\n",
|
|
" <td>-0.031264</td>\n",
|
|
" <td>-0.037389</td>\n",
|
|
" <td>0.022812</td>\n",
|
|
" <td>0.027632</td>\n",
|
|
" <td>0.086286</td>\n",
|
|
" <td>0.046738</td>\n",
|
|
" <td>0.039676</td>\n",
|
|
" <td>0.500000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>48996.269445</td>\n",
|
|
" <td>5.512352</td>\n",
|
|
" <td>3.713232</td>\n",
|
|
" <td>6.223001</td>\n",
|
|
" <td>3.231076</td>\n",
|
|
" <td>4.274632</td>\n",
|
|
" <td>1.789350</td>\n",
|
|
" <td>5.856197</td>\n",
|
|
" <td>4.857643</td>\n",
|
|
" <td>2.371055</td>\n",
|
|
" <td>4.563067</td>\n",
|
|
" <td>2.764745</td>\n",
|
|
" <td>4.595103</td>\n",
|
|
" <td>1.054377</td>\n",
|
|
" <td>4.653202</td>\n",
|
|
" <td>1.002911</td>\n",
|
|
" <td>3.465619</td>\n",
|
|
" <td>5.990033</td>\n",
|
|
" <td>2.412032</td>\n",
|
|
" <td>1.290973</td>\n",
|
|
" <td>1.126258</td>\n",
|
|
" <td>2.787884</td>\n",
|
|
" <td>1.167097</td>\n",
|
|
" <td>1.177562</td>\n",
|
|
" <td>0.551518</td>\n",
|
|
" <td>0.677541</td>\n",
|
|
" <td>0.476480</td>\n",
|
|
" <td>1.023332</td>\n",
|
|
" <td>0.479168</td>\n",
|
|
" <td>0.851800</td>\n",
|
|
" <td>0.500254</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>60.000000</td>\n",
|
|
" <td>-30.552380</td>\n",
|
|
" <td>-15.799625</td>\n",
|
|
" <td>-31.103685</td>\n",
|
|
" <td>-3.863126</td>\n",
|
|
" <td>-22.105532</td>\n",
|
|
" <td>-10.261990</td>\n",
|
|
" <td>-43.557242</td>\n",
|
|
" <td>-41.044261</td>\n",
|
|
" <td>-13.434066</td>\n",
|
|
" <td>-24.588262</td>\n",
|
|
" <td>-2.613374</td>\n",
|
|
" <td>-18.683715</td>\n",
|
|
" <td>-3.223045</td>\n",
|
|
" <td>-19.214325</td>\n",
|
|
" <td>-4.498945</td>\n",
|
|
" <td>-14.129855</td>\n",
|
|
" <td>-25.162799</td>\n",
|
|
" <td>-9.498746</td>\n",
|
|
" <td>-3.681904</td>\n",
|
|
" <td>-7.242879</td>\n",
|
|
" <td>-22.797604</td>\n",
|
|
" <td>-8.887017</td>\n",
|
|
" <td>-19.254328</td>\n",
|
|
" <td>-2.028024</td>\n",
|
|
" <td>-4.781606</td>\n",
|
|
" <td>-1.214960</td>\n",
|
|
" <td>-7.263482</td>\n",
|
|
" <td>-2.735623</td>\n",
|
|
" <td>-0.353229</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>45531.000000</td>\n",
|
|
" <td>-2.867222</td>\n",
|
|
" <td>-0.155438</td>\n",
|
|
" <td>-5.084967</td>\n",
|
|
" <td>-0.172018</td>\n",
|
|
" <td>-1.700260</td>\n",
|
|
" <td>-1.619179</td>\n",
|
|
" <td>-3.066415</td>\n",
|
|
" <td>-0.204192</td>\n",
|
|
" <td>-2.279453</td>\n",
|
|
" <td>-4.572043</td>\n",
|
|
" <td>-0.187147</td>\n",
|
|
" <td>-5.495221</td>\n",
|
|
" <td>-0.784589</td>\n",
|
|
" <td>-6.721799</td>\n",
|
|
" <td>-0.627097</td>\n",
|
|
" <td>-3.543426</td>\n",
|
|
" <td>-5.302111</td>\n",
|
|
" <td>-1.809496</td>\n",
|
|
" <td>-0.412430</td>\n",
|
|
" <td>-0.187708</td>\n",
|
|
" <td>-0.157259</td>\n",
|
|
" <td>-0.509376</td>\n",
|
|
" <td>-0.240064</td>\n",
|
|
" <td>-0.379825</td>\n",
|
|
" <td>-0.321251</td>\n",
|
|
" <td>-0.281187</td>\n",
|
|
" <td>-0.061809</td>\n",
|
|
" <td>-0.050194</td>\n",
|
|
" <td>-0.347302</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>83076.500000</td>\n",
|
|
" <td>-0.823244</td>\n",
|
|
" <td>0.957399</td>\n",
|
|
" <td>-1.381998</td>\n",
|
|
" <td>1.287041</td>\n",
|
|
" <td>-0.394605</td>\n",
|
|
" <td>-0.689473</td>\n",
|
|
" <td>-0.668321</td>\n",
|
|
" <td>0.147397</td>\n",
|
|
" <td>-0.694910</td>\n",
|
|
" <td>-0.948441</td>\n",
|
|
" <td>1.170286</td>\n",
|
|
" <td>-0.858094</td>\n",
|
|
" <td>-0.000686</td>\n",
|
|
" <td>-1.110717</td>\n",
|
|
" <td>-0.006070</td>\n",
|
|
" <td>-0.677801</td>\n",
|
|
" <td>-0.513640</td>\n",
|
|
" <td>-0.383038</td>\n",
|
|
" <td>0.221049</td>\n",
|
|
" <td>0.040630</td>\n",
|
|
" <td>0.155404</td>\n",
|
|
" <td>0.080270</td>\n",
|
|
" <td>-0.030318</td>\n",
|
|
" <td>0.009379</td>\n",
|
|
" <td>0.049923</td>\n",
|
|
" <td>-0.007475</td>\n",
|
|
" <td>0.063100</td>\n",
|
|
" <td>0.039464</td>\n",
|
|
" <td>-0.280984</td>\n",
|
|
" <td>0.500000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>135051.500000</td>\n",
|
|
" <td>0.919444</td>\n",
|
|
" <td>2.791569</td>\n",
|
|
" <td>0.356911</td>\n",
|
|
" <td>4.175332</td>\n",
|
|
" <td>0.616305</td>\n",
|
|
" <td>0.069620</td>\n",
|
|
" <td>0.265089</td>\n",
|
|
" <td>0.877002</td>\n",
|
|
" <td>0.134399</td>\n",
|
|
" <td>-0.016047</td>\n",
|
|
" <td>3.586502</td>\n",
|
|
" <td>0.190356</td>\n",
|
|
" <td>0.683977</td>\n",
|
|
" <td>0.110541</td>\n",
|
|
" <td>0.672903</td>\n",
|
|
" <td>0.250353</td>\n",
|
|
" <td>0.313841</td>\n",
|
|
" <td>0.334927</td>\n",
|
|
" <td>0.978754</td>\n",
|
|
" <td>0.445616</td>\n",
|
|
" <td>0.642724</td>\n",
|
|
" <td>0.624948</td>\n",
|
|
" <td>0.180735</td>\n",
|
|
" <td>0.365624</td>\n",
|
|
" <td>0.395001</td>\n",
|
|
" <td>0.324059</td>\n",
|
|
" <td>0.457194</td>\n",
|
|
" <td>0.226492</td>\n",
|
|
" <td>0.046539</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>172733.000000</td>\n",
|
|
" <td>2.335833</td>\n",
|
|
" <td>22.057729</td>\n",
|
|
" <td>3.476268</td>\n",
|
|
" <td>12.114672</td>\n",
|
|
" <td>14.103918</td>\n",
|
|
" <td>6.474115</td>\n",
|
|
" <td>5.802537</td>\n",
|
|
" <td>20.007208</td>\n",
|
|
" <td>6.816732</td>\n",
|
|
" <td>11.732926</td>\n",
|
|
" <td>12.018913</td>\n",
|
|
" <td>2.534876</td>\n",
|
|
" <td>3.091328</td>\n",
|
|
" <td>3.442422</td>\n",
|
|
" <td>2.471358</td>\n",
|
|
" <td>3.139656</td>\n",
|
|
" <td>6.739384</td>\n",
|
|
" <td>3.790316</td>\n",
|
|
" <td>5.228342</td>\n",
|
|
" <td>11.059004</td>\n",
|
|
" <td>27.202839</td>\n",
|
|
" <td>8.361985</td>\n",
|
|
" <td>5.466230</td>\n",
|
|
" <td>1.208141</td>\n",
|
|
" <td>2.208209</td>\n",
|
|
" <td>2.745261</td>\n",
|
|
" <td>3.052358</td>\n",
|
|
" <td>4.975792</td>\n",
|
|
" <td>8.146182</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Time V1 V2 V3 V4 \\\n",
|
|
"count 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
|
|
"mean 88501.498984 -2.445079 1.781022 -3.509406 2.214004 \n",
|
|
"std 48996.269445 5.512352 3.713232 6.223001 3.231076 \n",
|
|
"min 60.000000 -30.552380 -15.799625 -31.103685 -3.863126 \n",
|
|
"25% 45531.000000 -2.867222 -0.155438 -5.084967 -0.172018 \n",
|
|
"50% 83076.500000 -0.823244 0.957399 -1.381998 1.287041 \n",
|
|
"75% 135051.500000 0.919444 2.791569 0.356911 4.175332 \n",
|
|
"max 172733.000000 2.335833 22.057729 3.476268 12.114672 \n",
|
|
"\n",
|
|
" V5 V6 V7 V8 V9 V10 \\\n",
|
|
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
|
|
"mean -1.477993 -0.713150 -2.787427 0.279073 -1.253108 -2.841500 \n",
|
|
"std 4.274632 1.789350 5.856197 4.857643 2.371055 4.563067 \n",
|
|
"min -22.105532 -10.261990 -43.557242 -41.044261 -13.434066 -24.588262 \n",
|
|
"25% -1.700260 -1.619179 -3.066415 -0.204192 -2.279453 -4.572043 \n",
|
|
"50% -0.394605 -0.689473 -0.668321 0.147397 -0.694910 -0.948441 \n",
|
|
"75% 0.616305 0.069620 0.265089 0.877002 0.134399 -0.016047 \n",
|
|
"max 14.103918 6.474115 5.802537 20.007208 6.816732 11.732926 \n",
|
|
"\n",
|
|
" V11 V12 V13 V14 V15 V16 \\\n",
|
|
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
|
|
"mean 1.930697 -3.124120 -0.026229 -3.502384 -0.039494 -2.097294 \n",
|
|
"std 2.764745 4.595103 1.054377 4.653202 1.002911 3.465619 \n",
|
|
"min -2.613374 -18.683715 -3.223045 -19.214325 -4.498945 -14.129855 \n",
|
|
"25% -0.187147 -5.495221 -0.784589 -6.721799 -0.627097 -3.543426 \n",
|
|
"50% 1.170286 -0.858094 -0.000686 -1.110717 -0.006070 -0.677801 \n",
|
|
"75% 3.586502 0.190356 0.683977 0.110541 0.672903 0.250353 \n",
|
|
"max 12.018913 2.534876 3.091328 3.442422 2.471358 3.139656 \n",
|
|
"\n",
|
|
" V17 V18 V19 V20 V21 V22 \\\n",
|
|
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
|
|
"mean -3.304208 -1.128950 0.343668 0.175905 0.331911 0.049631 \n",
|
|
"std 5.990033 2.412032 1.290973 1.126258 2.787884 1.167097 \n",
|
|
"min -25.162799 -9.498746 -3.681904 -7.242879 -22.797604 -8.887017 \n",
|
|
"25% -5.302111 -1.809496 -0.412430 -0.187708 -0.157259 -0.509376 \n",
|
|
"50% -0.513640 -0.383038 0.221049 0.040630 0.155404 0.080270 \n",
|
|
"75% 0.313841 0.334927 0.978754 0.445616 0.642724 0.624948 \n",
|
|
"max 6.739384 3.790316 5.228342 11.059004 27.202839 8.361985 \n",
|
|
"\n",
|
|
" V23 V24 V25 V26 V27 V28 \\\n",
|
|
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
|
|
"mean -0.031264 -0.037389 0.022812 0.027632 0.086286 0.046738 \n",
|
|
"std 1.177562 0.551518 0.677541 0.476480 1.023332 0.479168 \n",
|
|
"min -19.254328 -2.028024 -4.781606 -1.214960 -7.263482 -2.735623 \n",
|
|
"25% -0.240064 -0.379825 -0.321251 -0.281187 -0.061809 -0.050194 \n",
|
|
"50% -0.030318 0.009379 0.049923 -0.007475 0.063100 0.039464 \n",
|
|
"75% 0.180735 0.365624 0.395001 0.324059 0.457194 0.226492 \n",
|
|
"max 5.466230 1.208141 2.208209 2.745261 3.052358 4.975792 \n",
|
|
"\n",
|
|
" Amount Class \n",
|
|
"count 984.000000 984.000000 \n",
|
|
"mean 0.039676 0.500000 \n",
|
|
"std 0.851800 0.500254 \n",
|
|
"min -0.353229 0.000000 \n",
|
|
"25% -0.347302 0.000000 \n",
|
|
"50% -0.280984 0.500000 \n",
|
|
"75% 0.046539 1.000000 \n",
|
|
"max 8.146182 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 98,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"undersample_data.describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Distribution of legitimate and fraudulent transactions in an undersampled dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 99,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Class\n",
|
|
"1 492\n",
|
|
"0 492\n",
|
|
"Name: count, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 99,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"undersample_data['Class'].value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Splitting whole data into training and test datasets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 100,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = df.iloc[:, df.columns != 'Class']\n",
|
|
"y = df.iloc[:, df.columns == 'Class']\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Statistical measures of the training dataset of whole data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 101,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"Index: 199364 entries, 161145 to 117952\n",
|
|
"Data columns (total 31 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 Time 199364 non-null float64\n",
|
|
" 1 V1 199364 non-null float64\n",
|
|
" 2 V2 199364 non-null float64\n",
|
|
" 3 V3 199364 non-null float64\n",
|
|
" 4 V4 199364 non-null float64\n",
|
|
" 5 V5 199364 non-null float64\n",
|
|
" 6 V6 199364 non-null float64\n",
|
|
" 7 V7 199364 non-null float64\n",
|
|
" 8 V8 199364 non-null float64\n",
|
|
" 9 V9 199364 non-null float64\n",
|
|
" 10 V10 199364 non-null float64\n",
|
|
" 11 V11 199364 non-null float64\n",
|
|
" 12 V12 199364 non-null float64\n",
|
|
" 13 V13 199364 non-null float64\n",
|
|
" 14 V14 199364 non-null float64\n",
|
|
" 15 V15 199364 non-null float64\n",
|
|
" 16 V16 199364 non-null float64\n",
|
|
" 17 V17 199364 non-null float64\n",
|
|
" 18 V18 199364 non-null float64\n",
|
|
" 19 V19 199364 non-null float64\n",
|
|
" 20 V20 199364 non-null float64\n",
|
|
" 21 V21 199364 non-null float64\n",
|
|
" 22 V22 199364 non-null float64\n",
|
|
" 23 V23 199364 non-null float64\n",
|
|
" 24 V24 199364 non-null float64\n",
|
|
" 25 V25 199364 non-null float64\n",
|
|
" 26 V26 199364 non-null float64\n",
|
|
" 27 V27 199364 non-null float64\n",
|
|
" 28 V28 199364 non-null float64\n",
|
|
" 29 Amount 199364 non-null float64\n",
|
|
" 30 Class 199364 non-null int64 \n",
|
|
"dtypes: float64(30), int64(1)\n",
|
|
"memory usage: 48.7 MB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_train, y_train], axis=1).info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 102,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Time</th>\n",
|
|
" <th>V1</th>\n",
|
|
" <th>V2</th>\n",
|
|
" <th>V3</th>\n",
|
|
" <th>V4</th>\n",
|
|
" <th>V5</th>\n",
|
|
" <th>V6</th>\n",
|
|
" <th>V7</th>\n",
|
|
" <th>V8</th>\n",
|
|
" <th>V9</th>\n",
|
|
" <th>V10</th>\n",
|
|
" <th>V11</th>\n",
|
|
" <th>V12</th>\n",
|
|
" <th>V13</th>\n",
|
|
" <th>V14</th>\n",
|
|
" <th>V15</th>\n",
|
|
" <th>V16</th>\n",
|
|
" <th>V17</th>\n",
|
|
" <th>V18</th>\n",
|
|
" <th>V19</th>\n",
|
|
" <th>V20</th>\n",
|
|
" <th>V21</th>\n",
|
|
" <th>V22</th>\n",
|
|
" <th>V23</th>\n",
|
|
" <th>V24</th>\n",
|
|
" <th>V25</th>\n",
|
|
" <th>V26</th>\n",
|
|
" <th>V27</th>\n",
|
|
" <th>V28</th>\n",
|
|
" <th>Amount</th>\n",
|
|
" <th>Class</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" <td>199364.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>94799.493936</td>\n",
|
|
" <td>0.000315</td>\n",
|
|
" <td>-0.002690</td>\n",
|
|
" <td>-0.001532</td>\n",
|
|
" <td>0.000721</td>\n",
|
|
" <td>-0.001494</td>\n",
|
|
" <td>-0.000210</td>\n",
|
|
" <td>-0.000870</td>\n",
|
|
" <td>-0.001980</td>\n",
|
|
" <td>0.000212</td>\n",
|
|
" <td>0.001357</td>\n",
|
|
" <td>-0.001039</td>\n",
|
|
" <td>-0.001565</td>\n",
|
|
" <td>0.000693</td>\n",
|
|
" <td>0.000137</td>\n",
|
|
" <td>0.000322</td>\n",
|
|
" <td>0.000084</td>\n",
|
|
" <td>0.000292</td>\n",
|
|
" <td>-0.000134</td>\n",
|
|
" <td>0.000490</td>\n",
|
|
" <td>0.000430</td>\n",
|
|
" <td>-0.000014</td>\n",
|
|
" <td>-0.000022</td>\n",
|
|
" <td>-0.000258</td>\n",
|
|
" <td>0.000362</td>\n",
|
|
" <td>0.000395</td>\n",
|
|
" <td>-0.000094</td>\n",
|
|
" <td>-0.000027</td>\n",
|
|
" <td>0.000015</td>\n",
|
|
" <td>0.001271</td>\n",
|
|
" <td>0.001731</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>47499.835491</td>\n",
|
|
" <td>1.963554</td>\n",
|
|
" <td>1.657379</td>\n",
|
|
" <td>1.516716</td>\n",
|
|
" <td>1.417138</td>\n",
|
|
" <td>1.368744</td>\n",
|
|
" <td>1.328673</td>\n",
|
|
" <td>1.226018</td>\n",
|
|
" <td>1.212338</td>\n",
|
|
" <td>1.102021</td>\n",
|
|
" <td>1.092801</td>\n",
|
|
" <td>1.020027</td>\n",
|
|
" <td>0.996526</td>\n",
|
|
" <td>0.997718</td>\n",
|
|
" <td>0.956938</td>\n",
|
|
" <td>0.916143</td>\n",
|
|
" <td>0.876131</td>\n",
|
|
" <td>0.852181</td>\n",
|
|
" <td>0.837556</td>\n",
|
|
" <td>0.814506</td>\n",
|
|
" <td>0.770257</td>\n",
|
|
" <td>0.743450</td>\n",
|
|
" <td>0.727625</td>\n",
|
|
" <td>0.629145</td>\n",
|
|
" <td>0.605298</td>\n",
|
|
" <td>0.521175</td>\n",
|
|
" <td>0.481842</td>\n",
|
|
" <td>0.401042</td>\n",
|
|
" <td>0.324849</td>\n",
|
|
" <td>0.983948</td>\n",
|
|
" <td>0.041563</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>0.000000</td>\n",
|
|
" <td>-46.855047</td>\n",
|
|
" <td>-63.344698</td>\n",
|
|
" <td>-33.680984</td>\n",
|
|
" <td>-5.560118</td>\n",
|
|
" <td>-42.147898</td>\n",
|
|
" <td>-23.496714</td>\n",
|
|
" <td>-43.557242</td>\n",
|
|
" <td>-73.216718</td>\n",
|
|
" <td>-13.434066</td>\n",
|
|
" <td>-24.588262</td>\n",
|
|
" <td>-4.797473</td>\n",
|
|
" <td>-17.769143</td>\n",
|
|
" <td>-5.791881</td>\n",
|
|
" <td>-19.214325</td>\n",
|
|
" <td>-4.498945</td>\n",
|
|
" <td>-14.129855</td>\n",
|
|
" <td>-25.162799</td>\n",
|
|
" <td>-9.498746</td>\n",
|
|
" <td>-7.213527</td>\n",
|
|
" <td>-23.646890</td>\n",
|
|
" <td>-34.830382</td>\n",
|
|
" <td>-10.933144</td>\n",
|
|
" <td>-44.807735</td>\n",
|
|
" <td>-2.822684</td>\n",
|
|
" <td>-10.295397</td>\n",
|
|
" <td>-2.534330</td>\n",
|
|
" <td>-22.565679</td>\n",
|
|
" <td>-11.710896</td>\n",
|
|
" <td>-0.353229</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>54126.000000</td>\n",
|
|
" <td>-0.921539</td>\n",
|
|
" <td>-0.601213</td>\n",
|
|
" <td>-0.892838</td>\n",
|
|
" <td>-0.848835</td>\n",
|
|
" <td>-0.692874</td>\n",
|
|
" <td>-0.769177</td>\n",
|
|
" <td>-0.554220</td>\n",
|
|
" <td>-0.209086</td>\n",
|
|
" <td>-0.644753</td>\n",
|
|
" <td>-0.535493</td>\n",
|
|
" <td>-0.762852</td>\n",
|
|
" <td>-0.407660</td>\n",
|
|
" <td>-0.648456</td>\n",
|
|
" <td>-0.425122</td>\n",
|
|
" <td>-0.583616</td>\n",
|
|
" <td>-0.467945</td>\n",
|
|
" <td>-0.484055</td>\n",
|
|
" <td>-0.498850</td>\n",
|
|
" <td>-0.456800</td>\n",
|
|
" <td>-0.211662</td>\n",
|
|
" <td>-0.229272</td>\n",
|
|
" <td>-0.544345</td>\n",
|
|
" <td>-0.162021</td>\n",
|
|
" <td>-0.354179</td>\n",
|
|
" <td>-0.316088</td>\n",
|
|
" <td>-0.327327</td>\n",
|
|
" <td>-0.070864</td>\n",
|
|
" <td>-0.052907</td>\n",
|
|
" <td>-0.330640</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>84633.500000</td>\n",
|
|
" <td>0.019705</td>\n",
|
|
" <td>0.063784</td>\n",
|
|
" <td>0.177888</td>\n",
|
|
" <td>-0.017852</td>\n",
|
|
" <td>-0.055832</td>\n",
|
|
" <td>-0.274397</td>\n",
|
|
" <td>0.039228</td>\n",
|
|
" <td>0.021803</td>\n",
|
|
" <td>-0.049633</td>\n",
|
|
" <td>-0.092069</td>\n",
|
|
" <td>-0.034135</td>\n",
|
|
" <td>0.137912</td>\n",
|
|
" <td>-0.013416</td>\n",
|
|
" <td>0.051179</td>\n",
|
|
" <td>0.049289</td>\n",
|
|
" <td>0.067772</td>\n",
|
|
" <td>-0.065113</td>\n",
|
|
" <td>-0.003217</td>\n",
|
|
" <td>0.004422</td>\n",
|
|
" <td>-0.062889</td>\n",
|
|
" <td>-0.029045</td>\n",
|
|
" <td>0.006744</td>\n",
|
|
" <td>-0.010915</td>\n",
|
|
" <td>0.040974</td>\n",
|
|
" <td>0.018014</td>\n",
|
|
" <td>-0.052287</td>\n",
|
|
" <td>0.001064</td>\n",
|
|
" <td>0.011119</td>\n",
|
|
" <td>-0.265271</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>139334.250000</td>\n",
|
|
" <td>1.316707</td>\n",
|
|
" <td>0.802437</td>\n",
|
|
" <td>1.025529</td>\n",
|
|
" <td>0.745566</td>\n",
|
|
" <td>0.609349</td>\n",
|
|
" <td>0.397928</td>\n",
|
|
" <td>0.569638</td>\n",
|
|
" <td>0.327023</td>\n",
|
|
" <td>0.597096</td>\n",
|
|
" <td>0.458129</td>\n",
|
|
" <td>0.738143</td>\n",
|
|
" <td>0.617393</td>\n",
|
|
" <td>0.664148</td>\n",
|
|
" <td>0.493925</td>\n",
|
|
" <td>0.649589</td>\n",
|
|
" <td>0.523095</td>\n",
|
|
" <td>0.401034</td>\n",
|
|
" <td>0.500436</td>\n",
|
|
" <td>0.460367</td>\n",
|
|
" <td>0.132834</td>\n",
|
|
" <td>0.187095</td>\n",
|
|
" <td>0.531017</td>\n",
|
|
" <td>0.147503</td>\n",
|
|
" <td>0.438953</td>\n",
|
|
" <td>0.350802</td>\n",
|
|
" <td>0.241082</td>\n",
|
|
" <td>0.090491</td>\n",
|
|
" <td>0.077989</td>\n",
|
|
" <td>-0.043058</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>172792.000000</td>\n",
|
|
" <td>2.451888</td>\n",
|
|
" <td>22.057729</td>\n",
|
|
" <td>9.382558</td>\n",
|
|
" <td>16.715537</td>\n",
|
|
" <td>34.099309</td>\n",
|
|
" <td>23.917837</td>\n",
|
|
" <td>44.054461</td>\n",
|
|
" <td>20.007208</td>\n",
|
|
" <td>15.594995</td>\n",
|
|
" <td>23.745136</td>\n",
|
|
" <td>12.018913</td>\n",
|
|
" <td>7.848392</td>\n",
|
|
" <td>4.569009</td>\n",
|
|
" <td>10.526766</td>\n",
|
|
" <td>5.825654</td>\n",
|
|
" <td>7.059132</td>\n",
|
|
" <td>9.207059</td>\n",
|
|
" <td>5.041069</td>\n",
|
|
" <td>5.572113</td>\n",
|
|
" <td>39.420904</td>\n",
|
|
" <td>27.202839</td>\n",
|
|
" <td>10.503090</td>\n",
|
|
" <td>22.528412</td>\n",
|
|
" <td>4.022866</td>\n",
|
|
" <td>7.519589</td>\n",
|
|
" <td>3.463246</td>\n",
|
|
" <td>12.152401</td>\n",
|
|
" <td>22.620072</td>\n",
|
|
" <td>78.235272</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Time V1 V2 V3 \\\n",
|
|
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean 94799.493936 0.000315 -0.002690 -0.001532 \n",
|
|
"std 47499.835491 1.963554 1.657379 1.516716 \n",
|
|
"min 0.000000 -46.855047 -63.344698 -33.680984 \n",
|
|
"25% 54126.000000 -0.921539 -0.601213 -0.892838 \n",
|
|
"50% 84633.500000 0.019705 0.063784 0.177888 \n",
|
|
"75% 139334.250000 1.316707 0.802437 1.025529 \n",
|
|
"max 172792.000000 2.451888 22.057729 9.382558 \n",
|
|
"\n",
|
|
" V4 V5 V6 V7 \\\n",
|
|
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean 0.000721 -0.001494 -0.000210 -0.000870 \n",
|
|
"std 1.417138 1.368744 1.328673 1.226018 \n",
|
|
"min -5.560118 -42.147898 -23.496714 -43.557242 \n",
|
|
"25% -0.848835 -0.692874 -0.769177 -0.554220 \n",
|
|
"50% -0.017852 -0.055832 -0.274397 0.039228 \n",
|
|
"75% 0.745566 0.609349 0.397928 0.569638 \n",
|
|
"max 16.715537 34.099309 23.917837 44.054461 \n",
|
|
"\n",
|
|
" V8 V9 V10 V11 \\\n",
|
|
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean -0.001980 0.000212 0.001357 -0.001039 \n",
|
|
"std 1.212338 1.102021 1.092801 1.020027 \n",
|
|
"min -73.216718 -13.434066 -24.588262 -4.797473 \n",
|
|
"25% -0.209086 -0.644753 -0.535493 -0.762852 \n",
|
|
"50% 0.021803 -0.049633 -0.092069 -0.034135 \n",
|
|
"75% 0.327023 0.597096 0.458129 0.738143 \n",
|
|
"max 20.007208 15.594995 23.745136 12.018913 \n",
|
|
"\n",
|
|
" V12 V13 V14 V15 \\\n",
|
|
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean -0.001565 0.000693 0.000137 0.000322 \n",
|
|
"std 0.996526 0.997718 0.956938 0.916143 \n",
|
|
"min -17.769143 -5.791881 -19.214325 -4.498945 \n",
|
|
"25% -0.407660 -0.648456 -0.425122 -0.583616 \n",
|
|
"50% 0.137912 -0.013416 0.051179 0.049289 \n",
|
|
"75% 0.617393 0.664148 0.493925 0.649589 \n",
|
|
"max 7.848392 4.569009 10.526766 5.825654 \n",
|
|
"\n",
|
|
" V16 V17 V18 V19 \\\n",
|
|
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean 0.000084 0.000292 -0.000134 0.000490 \n",
|
|
"std 0.876131 0.852181 0.837556 0.814506 \n",
|
|
"min -14.129855 -25.162799 -9.498746 -7.213527 \n",
|
|
"25% -0.467945 -0.484055 -0.498850 -0.456800 \n",
|
|
"50% 0.067772 -0.065113 -0.003217 0.004422 \n",
|
|
"75% 0.523095 0.401034 0.500436 0.460367 \n",
|
|
"max 7.059132 9.207059 5.041069 5.572113 \n",
|
|
"\n",
|
|
" V20 V21 V22 V23 \\\n",
|
|
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean 0.000430 -0.000014 -0.000022 -0.000258 \n",
|
|
"std 0.770257 0.743450 0.727625 0.629145 \n",
|
|
"min -23.646890 -34.830382 -10.933144 -44.807735 \n",
|
|
"25% -0.211662 -0.229272 -0.544345 -0.162021 \n",
|
|
"50% -0.062889 -0.029045 0.006744 -0.010915 \n",
|
|
"75% 0.132834 0.187095 0.531017 0.147503 \n",
|
|
"max 39.420904 27.202839 10.503090 22.528412 \n",
|
|
"\n",
|
|
" V24 V25 V26 V27 \\\n",
|
|
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean 0.000362 0.000395 -0.000094 -0.000027 \n",
|
|
"std 0.605298 0.521175 0.481842 0.401042 \n",
|
|
"min -2.822684 -10.295397 -2.534330 -22.565679 \n",
|
|
"25% -0.354179 -0.316088 -0.327327 -0.070864 \n",
|
|
"50% 0.040974 0.018014 -0.052287 0.001064 \n",
|
|
"75% 0.438953 0.350802 0.241082 0.090491 \n",
|
|
"max 4.022866 7.519589 3.463246 12.152401 \n",
|
|
"\n",
|
|
" V28 Amount Class \n",
|
|
"count 199364.000000 199364.000000 199364.000000 \n",
|
|
"mean 0.000015 0.001271 0.001731 \n",
|
|
"std 0.324849 0.983948 0.041563 \n",
|
|
"min -11.710896 -0.353229 0.000000 \n",
|
|
"25% -0.052907 -0.330640 0.000000 \n",
|
|
"50% 0.011119 -0.265271 0.000000 \n",
|
|
"75% 0.077989 -0.043058 0.000000 \n",
|
|
"max 22.620072 78.235272 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 102,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_train, y_train], axis=1).describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 103,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Class\n",
|
|
"0 199019\n",
|
|
"1 345\n",
|
|
"Name: count, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 103,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_train, y_train], axis=1)['Class'].value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Statistical measures of the test dataset of whole data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 104,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"Index: 85443 entries, 183484 to 240913\n",
|
|
"Data columns (total 31 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 Time 85443 non-null float64\n",
|
|
" 1 V1 85443 non-null float64\n",
|
|
" 2 V2 85443 non-null float64\n",
|
|
" 3 V3 85443 non-null float64\n",
|
|
" 4 V4 85443 non-null float64\n",
|
|
" 5 V5 85443 non-null float64\n",
|
|
" 6 V6 85443 non-null float64\n",
|
|
" 7 V7 85443 non-null float64\n",
|
|
" 8 V8 85443 non-null float64\n",
|
|
" 9 V9 85443 non-null float64\n",
|
|
" 10 V10 85443 non-null float64\n",
|
|
" 11 V11 85443 non-null float64\n",
|
|
" 12 V12 85443 non-null float64\n",
|
|
" 13 V13 85443 non-null float64\n",
|
|
" 14 V14 85443 non-null float64\n",
|
|
" 15 V15 85443 non-null float64\n",
|
|
" 16 V16 85443 non-null float64\n",
|
|
" 17 V17 85443 non-null float64\n",
|
|
" 18 V18 85443 non-null float64\n",
|
|
" 19 V19 85443 non-null float64\n",
|
|
" 20 V20 85443 non-null float64\n",
|
|
" 21 V21 85443 non-null float64\n",
|
|
" 22 V22 85443 non-null float64\n",
|
|
" 23 V23 85443 non-null float64\n",
|
|
" 24 V24 85443 non-null float64\n",
|
|
" 25 V25 85443 non-null float64\n",
|
|
" 26 V26 85443 non-null float64\n",
|
|
" 27 V27 85443 non-null float64\n",
|
|
" 28 V28 85443 non-null float64\n",
|
|
" 29 Amount 85443 non-null float64\n",
|
|
" 30 Class 85443 non-null int64 \n",
|
|
"dtypes: float64(30), int64(1)\n",
|
|
"memory usage: 20.9 MB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_test, y_test], axis=1).info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 105,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Time</th>\n",
|
|
" <th>V1</th>\n",
|
|
" <th>V2</th>\n",
|
|
" <th>V3</th>\n",
|
|
" <th>V4</th>\n",
|
|
" <th>V5</th>\n",
|
|
" <th>V6</th>\n",
|
|
" <th>V7</th>\n",
|
|
" <th>V8</th>\n",
|
|
" <th>V9</th>\n",
|
|
" <th>V10</th>\n",
|
|
" <th>V11</th>\n",
|
|
" <th>V12</th>\n",
|
|
" <th>V13</th>\n",
|
|
" <th>V14</th>\n",
|
|
" <th>V15</th>\n",
|
|
" <th>V16</th>\n",
|
|
" <th>V17</th>\n",
|
|
" <th>V18</th>\n",
|
|
" <th>V19</th>\n",
|
|
" <th>V20</th>\n",
|
|
" <th>V21</th>\n",
|
|
" <th>V22</th>\n",
|
|
" <th>V23</th>\n",
|
|
" <th>V24</th>\n",
|
|
" <th>V25</th>\n",
|
|
" <th>V26</th>\n",
|
|
" <th>V27</th>\n",
|
|
" <th>V28</th>\n",
|
|
" <th>Amount</th>\n",
|
|
" <th>Class</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" <td>85443.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>94847.378896</td>\n",
|
|
" <td>-0.000734</td>\n",
|
|
" <td>0.006277</td>\n",
|
|
" <td>0.003574</td>\n",
|
|
" <td>-0.001682</td>\n",
|
|
" <td>0.003486</td>\n",
|
|
" <td>0.000489</td>\n",
|
|
" <td>0.002030</td>\n",
|
|
" <td>0.004620</td>\n",
|
|
" <td>-0.000495</td>\n",
|
|
" <td>-0.003167</td>\n",
|
|
" <td>0.002424</td>\n",
|
|
" <td>0.003652</td>\n",
|
|
" <td>-0.001616</td>\n",
|
|
" <td>-0.000319</td>\n",
|
|
" <td>-0.000751</td>\n",
|
|
" <td>-0.000195</td>\n",
|
|
" <td>-0.000682</td>\n",
|
|
" <td>0.000312</td>\n",
|
|
" <td>-0.001144</td>\n",
|
|
" <td>-0.001004</td>\n",
|
|
" <td>0.000033</td>\n",
|
|
" <td>0.000052</td>\n",
|
|
" <td>0.000602</td>\n",
|
|
" <td>-0.000845</td>\n",
|
|
" <td>-0.000922</td>\n",
|
|
" <td>0.000220</td>\n",
|
|
" <td>0.000062</td>\n",
|
|
" <td>-0.000036</td>\n",
|
|
" <td>-0.002966</td>\n",
|
|
" <td>0.001720</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>47461.120548</td>\n",
|
|
" <td>1.947325</td>\n",
|
|
" <td>1.637050</td>\n",
|
|
" <td>1.515182</td>\n",
|
|
" <td>1.412908</td>\n",
|
|
" <td>1.406722</td>\n",
|
|
" <td>1.340636</td>\n",
|
|
" <td>1.262562</td>\n",
|
|
" <td>1.151291</td>\n",
|
|
" <td>1.090691</td>\n",
|
|
" <td>1.079574</td>\n",
|
|
" <td>1.022315</td>\n",
|
|
" <td>1.005413</td>\n",
|
|
" <td>0.989553</td>\n",
|
|
" <td>0.962457</td>\n",
|
|
" <td>0.913388</td>\n",
|
|
" <td>0.876542</td>\n",
|
|
" <td>0.842669</td>\n",
|
|
" <td>0.839626</td>\n",
|
|
" <td>0.812957</td>\n",
|
|
" <td>0.772484</td>\n",
|
|
" <td>0.713266</td>\n",
|
|
" <td>0.721198</td>\n",
|
|
" <td>0.613394</td>\n",
|
|
" <td>0.606464</td>\n",
|
|
" <td>0.521520</td>\n",
|
|
" <td>0.483126</td>\n",
|
|
" <td>0.409616</td>\n",
|
|
" <td>0.341987</td>\n",
|
|
" <td>1.036492</td>\n",
|
|
" <td>0.041443</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>0.000000</td>\n",
|
|
" <td>-56.407510</td>\n",
|
|
" <td>-72.715728</td>\n",
|
|
" <td>-48.325589</td>\n",
|
|
" <td>-5.683171</td>\n",
|
|
" <td>-113.743307</td>\n",
|
|
" <td>-26.160506</td>\n",
|
|
" <td>-28.215112</td>\n",
|
|
" <td>-50.943369</td>\n",
|
|
" <td>-9.481456</td>\n",
|
|
" <td>-20.949192</td>\n",
|
|
" <td>-4.568390</td>\n",
|
|
" <td>-18.683715</td>\n",
|
|
" <td>-3.888606</td>\n",
|
|
" <td>-18.493773</td>\n",
|
|
" <td>-4.391307</td>\n",
|
|
" <td>-13.303888</td>\n",
|
|
" <td>-22.883999</td>\n",
|
|
" <td>-9.287832</td>\n",
|
|
" <td>-6.938297</td>\n",
|
|
" <td>-54.497720</td>\n",
|
|
" <td>-22.665685</td>\n",
|
|
" <td>-9.499423</td>\n",
|
|
" <td>-32.828995</td>\n",
|
|
" <td>-2.836627</td>\n",
|
|
" <td>-8.696627</td>\n",
|
|
" <td>-2.604551</td>\n",
|
|
" <td>-9.793568</td>\n",
|
|
" <td>-15.430084</td>\n",
|
|
" <td>-0.353229</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>54354.000000</td>\n",
|
|
" <td>-0.916858</td>\n",
|
|
" <td>-0.591858</td>\n",
|
|
" <td>-0.883828</td>\n",
|
|
" <td>-0.848202</td>\n",
|
|
" <td>-0.688280</td>\n",
|
|
" <td>-0.766664</td>\n",
|
|
" <td>-0.553479</td>\n",
|
|
" <td>-0.207216</td>\n",
|
|
" <td>-0.638926</td>\n",
|
|
" <td>-0.535400</td>\n",
|
|
" <td>-0.761716</td>\n",
|
|
" <td>-0.400087</td>\n",
|
|
" <td>-0.648761</td>\n",
|
|
" <td>-0.426516</td>\n",
|
|
" <td>-0.581015</td>\n",
|
|
" <td>-0.468312</td>\n",
|
|
" <td>-0.483139</td>\n",
|
|
" <td>-0.498660</td>\n",
|
|
" <td>-0.455027</td>\n",
|
|
" <td>-0.211881</td>\n",
|
|
" <td>-0.226184</td>\n",
|
|
" <td>-0.537704</td>\n",
|
|
" <td>-0.161490</td>\n",
|
|
" <td>-0.355671</td>\n",
|
|
" <td>-0.319736</td>\n",
|
|
" <td>-0.326068</td>\n",
|
|
" <td>-0.070797</td>\n",
|
|
" <td>-0.053129</td>\n",
|
|
" <td>-0.331280</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>84850.000000</td>\n",
|
|
" <td>0.013238</td>\n",
|
|
" <td>0.070185</td>\n",
|
|
" <td>0.185047</td>\n",
|
|
" <td>-0.024109</td>\n",
|
|
" <td>-0.051627</td>\n",
|
|
" <td>-0.273686</td>\n",
|
|
" <td>0.042343</td>\n",
|
|
" <td>0.023782</td>\n",
|
|
" <td>-0.053821</td>\n",
|
|
" <td>-0.094949</td>\n",
|
|
" <td>-0.029129</td>\n",
|
|
" <td>0.144948</td>\n",
|
|
" <td>-0.013803</td>\n",
|
|
" <td>0.049248</td>\n",
|
|
" <td>0.045291</td>\n",
|
|
" <td>0.062957</td>\n",
|
|
" <td>-0.066955</td>\n",
|
|
" <td>-0.004245</td>\n",
|
|
" <td>0.002229</td>\n",
|
|
" <td>-0.061529</td>\n",
|
|
" <td>-0.030687</td>\n",
|
|
" <td>0.006971</td>\n",
|
|
" <td>-0.011789</td>\n",
|
|
" <td>0.040976</td>\n",
|
|
" <td>0.013508</td>\n",
|
|
" <td>-0.051695</td>\n",
|
|
" <td>0.001984</td>\n",
|
|
" <td>0.011561</td>\n",
|
|
" <td>-0.265271</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>139277.500000</td>\n",
|
|
" <td>1.313257</td>\n",
|
|
" <td>0.806615</td>\n",
|
|
" <td>1.031155</td>\n",
|
|
" <td>0.737784</td>\n",
|
|
" <td>0.618067</td>\n",
|
|
" <td>0.399864</td>\n",
|
|
" <td>0.572423</td>\n",
|
|
" <td>0.328337</td>\n",
|
|
" <td>0.597388</td>\n",
|
|
" <td>0.443126</td>\n",
|
|
" <td>0.743511</td>\n",
|
|
" <td>0.620694</td>\n",
|
|
" <td>0.657826</td>\n",
|
|
" <td>0.491916</td>\n",
|
|
" <td>0.647117</td>\n",
|
|
" <td>0.523608</td>\n",
|
|
" <td>0.396799</td>\n",
|
|
" <td>0.501455</td>\n",
|
|
" <td>0.455249</td>\n",
|
|
" <td>0.133608</td>\n",
|
|
" <td>0.184846</td>\n",
|
|
" <td>0.523689</td>\n",
|
|
" <td>0.147923</td>\n",
|
|
" <td>0.441093</td>\n",
|
|
" <td>0.350617</td>\n",
|
|
" <td>0.240657</td>\n",
|
|
" <td>0.092224</td>\n",
|
|
" <td>0.078900</td>\n",
|
|
" <td>-0.047356</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>172788.000000</td>\n",
|
|
" <td>2.454930</td>\n",
|
|
" <td>15.876923</td>\n",
|
|
" <td>4.079168</td>\n",
|
|
" <td>16.875344</td>\n",
|
|
" <td>34.801666</td>\n",
|
|
" <td>73.301626</td>\n",
|
|
" <td>120.589494</td>\n",
|
|
" <td>18.748872</td>\n",
|
|
" <td>9.272376</td>\n",
|
|
" <td>15.331742</td>\n",
|
|
" <td>11.669205</td>\n",
|
|
" <td>4.406338</td>\n",
|
|
" <td>7.126883</td>\n",
|
|
" <td>7.439566</td>\n",
|
|
" <td>8.877742</td>\n",
|
|
" <td>17.315112</td>\n",
|
|
" <td>9.253526</td>\n",
|
|
" <td>4.712398</td>\n",
|
|
" <td>5.591971</td>\n",
|
|
" <td>38.117209</td>\n",
|
|
" <td>22.579714</td>\n",
|
|
" <td>7.220158</td>\n",
|
|
" <td>20.803344</td>\n",
|
|
" <td>4.584549</td>\n",
|
|
" <td>5.826159</td>\n",
|
|
" <td>3.517346</td>\n",
|
|
" <td>31.612198</td>\n",
|
|
" <td>33.847808</td>\n",
|
|
" <td>102.362243</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Time V1 V2 V3 V4 \\\n",
|
|
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
|
|
"mean 94847.378896 -0.000734 0.006277 0.003574 -0.001682 \n",
|
|
"std 47461.120548 1.947325 1.637050 1.515182 1.412908 \n",
|
|
"min 0.000000 -56.407510 -72.715728 -48.325589 -5.683171 \n",
|
|
"25% 54354.000000 -0.916858 -0.591858 -0.883828 -0.848202 \n",
|
|
"50% 84850.000000 0.013238 0.070185 0.185047 -0.024109 \n",
|
|
"75% 139277.500000 1.313257 0.806615 1.031155 0.737784 \n",
|
|
"max 172788.000000 2.454930 15.876923 4.079168 16.875344 \n",
|
|
"\n",
|
|
" V5 V6 V7 V8 V9 \\\n",
|
|
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
|
|
"mean 0.003486 0.000489 0.002030 0.004620 -0.000495 \n",
|
|
"std 1.406722 1.340636 1.262562 1.151291 1.090691 \n",
|
|
"min -113.743307 -26.160506 -28.215112 -50.943369 -9.481456 \n",
|
|
"25% -0.688280 -0.766664 -0.553479 -0.207216 -0.638926 \n",
|
|
"50% -0.051627 -0.273686 0.042343 0.023782 -0.053821 \n",
|
|
"75% 0.618067 0.399864 0.572423 0.328337 0.597388 \n",
|
|
"max 34.801666 73.301626 120.589494 18.748872 9.272376 \n",
|
|
"\n",
|
|
" V10 V11 V12 V13 V14 \\\n",
|
|
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
|
|
"mean -0.003167 0.002424 0.003652 -0.001616 -0.000319 \n",
|
|
"std 1.079574 1.022315 1.005413 0.989553 0.962457 \n",
|
|
"min -20.949192 -4.568390 -18.683715 -3.888606 -18.493773 \n",
|
|
"25% -0.535400 -0.761716 -0.400087 -0.648761 -0.426516 \n",
|
|
"50% -0.094949 -0.029129 0.144948 -0.013803 0.049248 \n",
|
|
"75% 0.443126 0.743511 0.620694 0.657826 0.491916 \n",
|
|
"max 15.331742 11.669205 4.406338 7.126883 7.439566 \n",
|
|
"\n",
|
|
" V15 V16 V17 V18 V19 \\\n",
|
|
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
|
|
"mean -0.000751 -0.000195 -0.000682 0.000312 -0.001144 \n",
|
|
"std 0.913388 0.876542 0.842669 0.839626 0.812957 \n",
|
|
"min -4.391307 -13.303888 -22.883999 -9.287832 -6.938297 \n",
|
|
"25% -0.581015 -0.468312 -0.483139 -0.498660 -0.455027 \n",
|
|
"50% 0.045291 0.062957 -0.066955 -0.004245 0.002229 \n",
|
|
"75% 0.647117 0.523608 0.396799 0.501455 0.455249 \n",
|
|
"max 8.877742 17.315112 9.253526 4.712398 5.591971 \n",
|
|
"\n",
|
|
" V20 V21 V22 V23 V24 \\\n",
|
|
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
|
|
"mean -0.001004 0.000033 0.000052 0.000602 -0.000845 \n",
|
|
"std 0.772484 0.713266 0.721198 0.613394 0.606464 \n",
|
|
"min -54.497720 -22.665685 -9.499423 -32.828995 -2.836627 \n",
|
|
"25% -0.211881 -0.226184 -0.537704 -0.161490 -0.355671 \n",
|
|
"50% -0.061529 -0.030687 0.006971 -0.011789 0.040976 \n",
|
|
"75% 0.133608 0.184846 0.523689 0.147923 0.441093 \n",
|
|
"max 38.117209 22.579714 7.220158 20.803344 4.584549 \n",
|
|
"\n",
|
|
" V25 V26 V27 V28 Amount \\\n",
|
|
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
|
|
"mean -0.000922 0.000220 0.000062 -0.000036 -0.002966 \n",
|
|
"std 0.521520 0.483126 0.409616 0.341987 1.036492 \n",
|
|
"min -8.696627 -2.604551 -9.793568 -15.430084 -0.353229 \n",
|
|
"25% -0.319736 -0.326068 -0.070797 -0.053129 -0.331280 \n",
|
|
"50% 0.013508 -0.051695 0.001984 0.011561 -0.265271 \n",
|
|
"75% 0.350617 0.240657 0.092224 0.078900 -0.047356 \n",
|
|
"max 5.826159 3.517346 31.612198 33.847808 102.362243 \n",
|
|
"\n",
|
|
" Class \n",
|
|
"count 85443.000000 \n",
|
|
"mean 0.001720 \n",
|
|
"std 0.041443 \n",
|
|
"min 0.000000 \n",
|
|
"25% 0.000000 \n",
|
|
"50% 0.000000 \n",
|
|
"75% 0.000000 \n",
|
|
"max 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 105,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_test, y_test], axis=1).describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 106,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Class\n",
|
|
"0 85296\n",
|
|
"1 147\n",
|
|
"Name: count, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 106,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_test, y_test], axis=1)['Class'].value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Splitting undersampled data into training and test datasets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 107,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersample, y_undersample, test_size = 0.3, random_state = 0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Statistical measures of the training dataset of undersampled data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 108,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"Index: 688 entries, 6870 to 208266\n",
|
|
"Data columns (total 31 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 Time 688 non-null float64\n",
|
|
" 1 V1 688 non-null float64\n",
|
|
" 2 V2 688 non-null float64\n",
|
|
" 3 V3 688 non-null float64\n",
|
|
" 4 V4 688 non-null float64\n",
|
|
" 5 V5 688 non-null float64\n",
|
|
" 6 V6 688 non-null float64\n",
|
|
" 7 V7 688 non-null float64\n",
|
|
" 8 V8 688 non-null float64\n",
|
|
" 9 V9 688 non-null float64\n",
|
|
" 10 V10 688 non-null float64\n",
|
|
" 11 V11 688 non-null float64\n",
|
|
" 12 V12 688 non-null float64\n",
|
|
" 13 V13 688 non-null float64\n",
|
|
" 14 V14 688 non-null float64\n",
|
|
" 15 V15 688 non-null float64\n",
|
|
" 16 V16 688 non-null float64\n",
|
|
" 17 V17 688 non-null float64\n",
|
|
" 18 V18 688 non-null float64\n",
|
|
" 19 V19 688 non-null float64\n",
|
|
" 20 V20 688 non-null float64\n",
|
|
" 21 V21 688 non-null float64\n",
|
|
" 22 V22 688 non-null float64\n",
|
|
" 23 V23 688 non-null float64\n",
|
|
" 24 V24 688 non-null float64\n",
|
|
" 25 V25 688 non-null float64\n",
|
|
" 26 V26 688 non-null float64\n",
|
|
" 27 V27 688 non-null float64\n",
|
|
" 28 V28 688 non-null float64\n",
|
|
" 29 Amount 688 non-null float64\n",
|
|
" 30 Class 688 non-null int64 \n",
|
|
"dtypes: float64(30), int64(1)\n",
|
|
"memory usage: 172.0 KB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_train_undersample, y_train_undersample], axis=1).info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 109,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Time</th>\n",
|
|
" <th>V1</th>\n",
|
|
" <th>V2</th>\n",
|
|
" <th>V3</th>\n",
|
|
" <th>V4</th>\n",
|
|
" <th>V5</th>\n",
|
|
" <th>V6</th>\n",
|
|
" <th>V7</th>\n",
|
|
" <th>V8</th>\n",
|
|
" <th>V9</th>\n",
|
|
" <th>V10</th>\n",
|
|
" <th>V11</th>\n",
|
|
" <th>V12</th>\n",
|
|
" <th>V13</th>\n",
|
|
" <th>V14</th>\n",
|
|
" <th>V15</th>\n",
|
|
" <th>V16</th>\n",
|
|
" <th>V17</th>\n",
|
|
" <th>V18</th>\n",
|
|
" <th>V19</th>\n",
|
|
" <th>V20</th>\n",
|
|
" <th>V21</th>\n",
|
|
" <th>V22</th>\n",
|
|
" <th>V23</th>\n",
|
|
" <th>V24</th>\n",
|
|
" <th>V25</th>\n",
|
|
" <th>V26</th>\n",
|
|
" <th>V27</th>\n",
|
|
" <th>V28</th>\n",
|
|
" <th>Amount</th>\n",
|
|
" <th>Class</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" <td>688.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>88546.635174</td>\n",
|
|
" <td>-2.443642</td>\n",
|
|
" <td>1.748210</td>\n",
|
|
" <td>-3.490693</td>\n",
|
|
" <td>2.161294</td>\n",
|
|
" <td>-1.466909</td>\n",
|
|
" <td>-0.737723</td>\n",
|
|
" <td>-2.759190</td>\n",
|
|
" <td>0.361773</td>\n",
|
|
" <td>-1.222417</td>\n",
|
|
" <td>-2.808144</td>\n",
|
|
" <td>1.937783</td>\n",
|
|
" <td>-3.131850</td>\n",
|
|
" <td>-0.001132</td>\n",
|
|
" <td>-3.568854</td>\n",
|
|
" <td>-0.022936</td>\n",
|
|
" <td>-2.145811</td>\n",
|
|
" <td>-3.365430</td>\n",
|
|
" <td>-1.137238</td>\n",
|
|
" <td>0.377690</td>\n",
|
|
" <td>0.127157</td>\n",
|
|
" <td>0.446495</td>\n",
|
|
" <td>0.012945</td>\n",
|
|
" <td>-0.069031</td>\n",
|
|
" <td>-0.020203</td>\n",
|
|
" <td>0.031782</td>\n",
|
|
" <td>0.022154</td>\n",
|
|
" <td>0.114684</td>\n",
|
|
" <td>0.041557</td>\n",
|
|
" <td>0.036592</td>\n",
|
|
" <td>0.501453</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>48529.661753</td>\n",
|
|
" <td>5.382638</td>\n",
|
|
" <td>3.616426</td>\n",
|
|
" <td>6.020391</td>\n",
|
|
" <td>3.198221</td>\n",
|
|
" <td>4.227553</td>\n",
|
|
" <td>1.829535</td>\n",
|
|
" <td>5.498995</td>\n",
|
|
" <td>4.741154</td>\n",
|
|
" <td>2.336555</td>\n",
|
|
" <td>4.417548</td>\n",
|
|
" <td>2.771137</td>\n",
|
|
" <td>4.560753</td>\n",
|
|
" <td>1.081826</td>\n",
|
|
" <td>4.641960</td>\n",
|
|
" <td>0.981683</td>\n",
|
|
" <td>3.458663</td>\n",
|
|
" <td>6.062216</td>\n",
|
|
" <td>2.462689</td>\n",
|
|
" <td>1.287256</td>\n",
|
|
" <td>1.072960</td>\n",
|
|
" <td>2.749354</td>\n",
|
|
" <td>1.143940</td>\n",
|
|
" <td>1.283882</td>\n",
|
|
" <td>0.549485</td>\n",
|
|
" <td>0.689015</td>\n",
|
|
" <td>0.474411</td>\n",
|
|
" <td>0.923161</td>\n",
|
|
" <td>0.487077</td>\n",
|
|
" <td>0.834360</td>\n",
|
|
" <td>0.500362</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>117.000000</td>\n",
|
|
" <td>-30.552380</td>\n",
|
|
" <td>-15.799625</td>\n",
|
|
" <td>-31.103685</td>\n",
|
|
" <td>-3.863126</td>\n",
|
|
" <td>-22.105532</td>\n",
|
|
" <td>-10.261990</td>\n",
|
|
" <td>-37.060311</td>\n",
|
|
" <td>-37.353443</td>\n",
|
|
" <td>-11.126624</td>\n",
|
|
" <td>-23.228255</td>\n",
|
|
" <td>-2.613374</td>\n",
|
|
" <td>-18.431131</td>\n",
|
|
" <td>-3.223045</td>\n",
|
|
" <td>-19.214325</td>\n",
|
|
" <td>-4.498945</td>\n",
|
|
" <td>-13.563273</td>\n",
|
|
" <td>-25.162799</td>\n",
|
|
" <td>-9.498746</td>\n",
|
|
" <td>-3.602657</td>\n",
|
|
" <td>-7.242879</td>\n",
|
|
" <td>-16.922016</td>\n",
|
|
" <td>-8.887017</td>\n",
|
|
" <td>-19.254328</td>\n",
|
|
" <td>-2.028024</td>\n",
|
|
" <td>-4.781606</td>\n",
|
|
" <td>-1.214960</td>\n",
|
|
" <td>-7.263482</td>\n",
|
|
" <td>-2.735623</td>\n",
|
|
" <td>-0.353229</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>45531.000000</td>\n",
|
|
" <td>-2.867222</td>\n",
|
|
" <td>-0.164478</td>\n",
|
|
" <td>-5.049001</td>\n",
|
|
" <td>-0.212543</td>\n",
|
|
" <td>-1.703845</td>\n",
|
|
" <td>-1.691031</td>\n",
|
|
" <td>-3.105154</td>\n",
|
|
" <td>-0.220868</td>\n",
|
|
" <td>-2.205996</td>\n",
|
|
" <td>-4.731895</td>\n",
|
|
" <td>-0.194163</td>\n",
|
|
" <td>-5.643631</td>\n",
|
|
" <td>-0.767631</td>\n",
|
|
" <td>-6.767749</td>\n",
|
|
" <td>-0.562582</td>\n",
|
|
" <td>-3.612856</td>\n",
|
|
" <td>-5.277726</td>\n",
|
|
" <td>-1.816368</td>\n",
|
|
" <td>-0.373523</td>\n",
|
|
" <td>-0.197730</td>\n",
|
|
" <td>-0.142520</td>\n",
|
|
" <td>-0.510247</td>\n",
|
|
" <td>-0.246005</td>\n",
|
|
" <td>-0.373302</td>\n",
|
|
" <td>-0.320463</td>\n",
|
|
" <td>-0.281449</td>\n",
|
|
" <td>-0.061809</td>\n",
|
|
" <td>-0.050983</td>\n",
|
|
" <td>-0.346113</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>82526.500000</td>\n",
|
|
" <td>-0.874057</td>\n",
|
|
" <td>0.984845</td>\n",
|
|
" <td>-1.482880</td>\n",
|
|
" <td>1.285768</td>\n",
|
|
" <td>-0.400360</td>\n",
|
|
" <td>-0.741307</td>\n",
|
|
" <td>-0.740952</td>\n",
|
|
" <td>0.141389</td>\n",
|
|
" <td>-0.694910</td>\n",
|
|
" <td>-0.981569</td>\n",
|
|
" <td>1.154879</td>\n",
|
|
" <td>-0.845463</td>\n",
|
|
" <td>0.008049</td>\n",
|
|
" <td>-1.132761</td>\n",
|
|
" <td>0.001558</td>\n",
|
|
" <td>-0.750918</td>\n",
|
|
" <td>-0.495063</td>\n",
|
|
" <td>-0.392743</td>\n",
|
|
" <td>0.246478</td>\n",
|
|
" <td>0.030556</td>\n",
|
|
" <td>0.163323</td>\n",
|
|
" <td>0.076684</td>\n",
|
|
" <td>-0.027143</td>\n",
|
|
" <td>0.014360</td>\n",
|
|
" <td>0.046511</td>\n",
|
|
" <td>-0.026232</td>\n",
|
|
" <td>0.059798</td>\n",
|
|
" <td>0.036635</td>\n",
|
|
" <td>-0.273188</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>135096.750000</td>\n",
|
|
" <td>0.945582</td>\n",
|
|
" <td>2.850947</td>\n",
|
|
" <td>0.348579</td>\n",
|
|
" <td>4.166857</td>\n",
|
|
" <td>0.599892</td>\n",
|
|
" <td>0.033569</td>\n",
|
|
" <td>0.240843</td>\n",
|
|
" <td>0.919999</td>\n",
|
|
" <td>0.196633</td>\n",
|
|
" <td>-0.001047</td>\n",
|
|
" <td>3.625262</td>\n",
|
|
" <td>0.163104</td>\n",
|
|
" <td>0.744021</td>\n",
|
|
" <td>0.086669</td>\n",
|
|
" <td>0.665736</td>\n",
|
|
" <td>0.219809</td>\n",
|
|
" <td>0.314206</td>\n",
|
|
" <td>0.371481</td>\n",
|
|
" <td>0.978754</td>\n",
|
|
" <td>0.443495</td>\n",
|
|
" <td>0.680597</td>\n",
|
|
" <td>0.629109</td>\n",
|
|
" <td>0.174862</td>\n",
|
|
" <td>0.382076</td>\n",
|
|
" <td>0.406056</td>\n",
|
|
" <td>0.306403</td>\n",
|
|
" <td>0.482488</td>\n",
|
|
" <td>0.235549</td>\n",
|
|
" <td>0.046539</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>172573.000000</td>\n",
|
|
" <td>2.335833</td>\n",
|
|
" <td>19.167239</td>\n",
|
|
" <td>3.228978</td>\n",
|
|
" <td>11.927512</td>\n",
|
|
" <td>14.103918</td>\n",
|
|
" <td>6.355986</td>\n",
|
|
" <td>5.802537</td>\n",
|
|
" <td>20.007208</td>\n",
|
|
" <td>6.816732</td>\n",
|
|
" <td>11.732926</td>\n",
|
|
" <td>12.018913</td>\n",
|
|
" <td>2.534876</td>\n",
|
|
" <td>3.091328</td>\n",
|
|
" <td>3.442422</td>\n",
|
|
" <td>2.364199</td>\n",
|
|
" <td>3.139656</td>\n",
|
|
" <td>6.739384</td>\n",
|
|
" <td>3.790316</td>\n",
|
|
" <td>5.228342</td>\n",
|
|
" <td>7.907378</td>\n",
|
|
" <td>27.202839</td>\n",
|
|
" <td>5.774087</td>\n",
|
|
" <td>5.303607</td>\n",
|
|
" <td>1.208141</td>\n",
|
|
" <td>2.208209</td>\n",
|
|
" <td>2.745261</td>\n",
|
|
" <td>3.052358</td>\n",
|
|
" <td>4.975792</td>\n",
|
|
" <td>8.146182</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Time V1 V2 V3 V4 \\\n",
|
|
"count 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
|
|
"mean 88546.635174 -2.443642 1.748210 -3.490693 2.161294 \n",
|
|
"std 48529.661753 5.382638 3.616426 6.020391 3.198221 \n",
|
|
"min 117.000000 -30.552380 -15.799625 -31.103685 -3.863126 \n",
|
|
"25% 45531.000000 -2.867222 -0.164478 -5.049001 -0.212543 \n",
|
|
"50% 82526.500000 -0.874057 0.984845 -1.482880 1.285768 \n",
|
|
"75% 135096.750000 0.945582 2.850947 0.348579 4.166857 \n",
|
|
"max 172573.000000 2.335833 19.167239 3.228978 11.927512 \n",
|
|
"\n",
|
|
" V5 V6 V7 V8 V9 V10 \\\n",
|
|
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
|
|
"mean -1.466909 -0.737723 -2.759190 0.361773 -1.222417 -2.808144 \n",
|
|
"std 4.227553 1.829535 5.498995 4.741154 2.336555 4.417548 \n",
|
|
"min -22.105532 -10.261990 -37.060311 -37.353443 -11.126624 -23.228255 \n",
|
|
"25% -1.703845 -1.691031 -3.105154 -0.220868 -2.205996 -4.731895 \n",
|
|
"50% -0.400360 -0.741307 -0.740952 0.141389 -0.694910 -0.981569 \n",
|
|
"75% 0.599892 0.033569 0.240843 0.919999 0.196633 -0.001047 \n",
|
|
"max 14.103918 6.355986 5.802537 20.007208 6.816732 11.732926 \n",
|
|
"\n",
|
|
" V11 V12 V13 V14 V15 V16 \\\n",
|
|
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
|
|
"mean 1.937783 -3.131850 -0.001132 -3.568854 -0.022936 -2.145811 \n",
|
|
"std 2.771137 4.560753 1.081826 4.641960 0.981683 3.458663 \n",
|
|
"min -2.613374 -18.431131 -3.223045 -19.214325 -4.498945 -13.563273 \n",
|
|
"25% -0.194163 -5.643631 -0.767631 -6.767749 -0.562582 -3.612856 \n",
|
|
"50% 1.154879 -0.845463 0.008049 -1.132761 0.001558 -0.750918 \n",
|
|
"75% 3.625262 0.163104 0.744021 0.086669 0.665736 0.219809 \n",
|
|
"max 12.018913 2.534876 3.091328 3.442422 2.364199 3.139656 \n",
|
|
"\n",
|
|
" V17 V18 V19 V20 V21 V22 \\\n",
|
|
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
|
|
"mean -3.365430 -1.137238 0.377690 0.127157 0.446495 0.012945 \n",
|
|
"std 6.062216 2.462689 1.287256 1.072960 2.749354 1.143940 \n",
|
|
"min -25.162799 -9.498746 -3.602657 -7.242879 -16.922016 -8.887017 \n",
|
|
"25% -5.277726 -1.816368 -0.373523 -0.197730 -0.142520 -0.510247 \n",
|
|
"50% -0.495063 -0.392743 0.246478 0.030556 0.163323 0.076684 \n",
|
|
"75% 0.314206 0.371481 0.978754 0.443495 0.680597 0.629109 \n",
|
|
"max 6.739384 3.790316 5.228342 7.907378 27.202839 5.774087 \n",
|
|
"\n",
|
|
" V23 V24 V25 V26 V27 V28 \\\n",
|
|
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
|
|
"mean -0.069031 -0.020203 0.031782 0.022154 0.114684 0.041557 \n",
|
|
"std 1.283882 0.549485 0.689015 0.474411 0.923161 0.487077 \n",
|
|
"min -19.254328 -2.028024 -4.781606 -1.214960 -7.263482 -2.735623 \n",
|
|
"25% -0.246005 -0.373302 -0.320463 -0.281449 -0.061809 -0.050983 \n",
|
|
"50% -0.027143 0.014360 0.046511 -0.026232 0.059798 0.036635 \n",
|
|
"75% 0.174862 0.382076 0.406056 0.306403 0.482488 0.235549 \n",
|
|
"max 5.303607 1.208141 2.208209 2.745261 3.052358 4.975792 \n",
|
|
"\n",
|
|
" Amount Class \n",
|
|
"count 688.000000 688.000000 \n",
|
|
"mean 0.036592 0.501453 \n",
|
|
"std 0.834360 0.500362 \n",
|
|
"min -0.353229 0.000000 \n",
|
|
"25% -0.346113 0.000000 \n",
|
|
"50% -0.273188 1.000000 \n",
|
|
"75% 0.046539 1.000000 \n",
|
|
"max 8.146182 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 109,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_train_undersample, y_train_undersample], axis=1).describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 110,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Class\n",
|
|
"1 345\n",
|
|
"0 343\n",
|
|
"Name: count, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 110,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_train_undersample, y_train_undersample], axis=1)['Class'].value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Statistical measures of the test dataset of undersampled data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 111,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"Index: 296 entries, 102782 to 57921\n",
|
|
"Data columns (total 31 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 Time 296 non-null float64\n",
|
|
" 1 V1 296 non-null float64\n",
|
|
" 2 V2 296 non-null float64\n",
|
|
" 3 V3 296 non-null float64\n",
|
|
" 4 V4 296 non-null float64\n",
|
|
" 5 V5 296 non-null float64\n",
|
|
" 6 V6 296 non-null float64\n",
|
|
" 7 V7 296 non-null float64\n",
|
|
" 8 V8 296 non-null float64\n",
|
|
" 9 V9 296 non-null float64\n",
|
|
" 10 V10 296 non-null float64\n",
|
|
" 11 V11 296 non-null float64\n",
|
|
" 12 V12 296 non-null float64\n",
|
|
" 13 V13 296 non-null float64\n",
|
|
" 14 V14 296 non-null float64\n",
|
|
" 15 V15 296 non-null float64\n",
|
|
" 16 V16 296 non-null float64\n",
|
|
" 17 V17 296 non-null float64\n",
|
|
" 18 V18 296 non-null float64\n",
|
|
" 19 V19 296 non-null float64\n",
|
|
" 20 V20 296 non-null float64\n",
|
|
" 21 V21 296 non-null float64\n",
|
|
" 22 V22 296 non-null float64\n",
|
|
" 23 V23 296 non-null float64\n",
|
|
" 24 V24 296 non-null float64\n",
|
|
" 25 V25 296 non-null float64\n",
|
|
" 26 V26 296 non-null float64\n",
|
|
" 27 V27 296 non-null float64\n",
|
|
" 28 V28 296 non-null float64\n",
|
|
" 29 Amount 296 non-null float64\n",
|
|
" 30 Class 296 non-null int64 \n",
|
|
"dtypes: float64(30), int64(1)\n",
|
|
"memory usage: 74.0 KB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_test_undersample, y_test_undersample], axis=1).info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 112,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Time</th>\n",
|
|
" <th>V1</th>\n",
|
|
" <th>V2</th>\n",
|
|
" <th>V3</th>\n",
|
|
" <th>V4</th>\n",
|
|
" <th>V5</th>\n",
|
|
" <th>V6</th>\n",
|
|
" <th>V7</th>\n",
|
|
" <th>V8</th>\n",
|
|
" <th>V9</th>\n",
|
|
" <th>V10</th>\n",
|
|
" <th>V11</th>\n",
|
|
" <th>V12</th>\n",
|
|
" <th>V13</th>\n",
|
|
" <th>V14</th>\n",
|
|
" <th>V15</th>\n",
|
|
" <th>V16</th>\n",
|
|
" <th>V17</th>\n",
|
|
" <th>V18</th>\n",
|
|
" <th>V19</th>\n",
|
|
" <th>V20</th>\n",
|
|
" <th>V21</th>\n",
|
|
" <th>V22</th>\n",
|
|
" <th>V23</th>\n",
|
|
" <th>V24</th>\n",
|
|
" <th>V25</th>\n",
|
|
" <th>V26</th>\n",
|
|
" <th>V27</th>\n",
|
|
" <th>V28</th>\n",
|
|
" <th>Amount</th>\n",
|
|
" <th>Class</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" <td>296.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>88396.587838</td>\n",
|
|
" <td>-2.448419</td>\n",
|
|
" <td>1.857288</td>\n",
|
|
" <td>-3.552900</td>\n",
|
|
" <td>2.336519</td>\n",
|
|
" <td>-1.503755</td>\n",
|
|
" <td>-0.656035</td>\n",
|
|
" <td>-2.853058</td>\n",
|
|
" <td>0.086851</td>\n",
|
|
" <td>-1.324446</td>\n",
|
|
" <td>-2.919028</td>\n",
|
|
" <td>1.914227</td>\n",
|
|
" <td>-3.106154</td>\n",
|
|
" <td>-0.084562</td>\n",
|
|
" <td>-3.347887</td>\n",
|
|
" <td>-0.077981</td>\n",
|
|
" <td>-1.984526</td>\n",
|
|
" <td>-3.161909</td>\n",
|
|
" <td>-1.109686</td>\n",
|
|
" <td>0.264590</td>\n",
|
|
" <td>0.289212</td>\n",
|
|
" <td>0.065582</td>\n",
|
|
" <td>0.134902</td>\n",
|
|
" <td>0.056521</td>\n",
|
|
" <td>-0.077336</td>\n",
|
|
" <td>0.001963</td>\n",
|
|
" <td>0.040364</td>\n",
|
|
" <td>0.020281</td>\n",
|
|
" <td>0.058781</td>\n",
|
|
" <td>0.046845</td>\n",
|
|
" <td>0.496622</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>50147.105326</td>\n",
|
|
" <td>5.812072</td>\n",
|
|
" <td>3.934323</td>\n",
|
|
" <td>6.680660</td>\n",
|
|
" <td>3.308417</td>\n",
|
|
" <td>4.389263</td>\n",
|
|
" <td>1.693893</td>\n",
|
|
" <td>6.622008</td>\n",
|
|
" <td>5.121293</td>\n",
|
|
" <td>2.451914</td>\n",
|
|
" <td>4.891517</td>\n",
|
|
" <td>2.754439</td>\n",
|
|
" <td>4.681722</td>\n",
|
|
" <td>0.986937</td>\n",
|
|
" <td>4.683458</td>\n",
|
|
" <td>1.051296</td>\n",
|
|
" <td>3.484989</td>\n",
|
|
" <td>5.826410</td>\n",
|
|
" <td>2.293910</td>\n",
|
|
" <td>1.298310</td>\n",
|
|
" <td>1.235841</td>\n",
|
|
" <td>2.862463</td>\n",
|
|
" <td>1.216935</td>\n",
|
|
" <td>0.877975</td>\n",
|
|
" <td>0.555090</td>\n",
|
|
" <td>0.650752</td>\n",
|
|
" <td>0.481822</td>\n",
|
|
" <td>1.224166</td>\n",
|
|
" <td>0.460841</td>\n",
|
|
" <td>0.892432</td>\n",
|
|
" <td>0.500835</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>60.000000</td>\n",
|
|
" <td>-29.876366</td>\n",
|
|
" <td>-8.402154</td>\n",
|
|
" <td>-30.558697</td>\n",
|
|
" <td>-2.956827</td>\n",
|
|
" <td>-21.665654</td>\n",
|
|
" <td>-5.773192</td>\n",
|
|
" <td>-43.557242</td>\n",
|
|
" <td>-41.044261</td>\n",
|
|
" <td>-13.434066</td>\n",
|
|
" <td>-24.588262</td>\n",
|
|
" <td>-2.383066</td>\n",
|
|
" <td>-18.683715</td>\n",
|
|
" <td>-3.076318</td>\n",
|
|
" <td>-17.620634</td>\n",
|
|
" <td>-3.092108</td>\n",
|
|
" <td>-14.129855</td>\n",
|
|
" <td>-22.541652</td>\n",
|
|
" <td>-9.090892</td>\n",
|
|
" <td>-3.681904</td>\n",
|
|
" <td>-5.225849</td>\n",
|
|
" <td>-22.797604</td>\n",
|
|
" <td>-8.887017</td>\n",
|
|
" <td>-5.988806</td>\n",
|
|
" <td>-1.742803</td>\n",
|
|
" <td>-2.079928</td>\n",
|
|
" <td>-1.170476</td>\n",
|
|
" <td>-7.263482</td>\n",
|
|
" <td>-1.931920</td>\n",
|
|
" <td>-0.353229</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>45977.500000</td>\n",
|
|
" <td>-2.867766</td>\n",
|
|
" <td>-0.130600</td>\n",
|
|
" <td>-5.417818</td>\n",
|
|
" <td>-0.118496</td>\n",
|
|
" <td>-1.667035</td>\n",
|
|
" <td>-1.477544</td>\n",
|
|
" <td>-2.835885</td>\n",
|
|
" <td>-0.168935</td>\n",
|
|
" <td>-2.345829</td>\n",
|
|
" <td>-4.445615</td>\n",
|
|
" <td>-0.144802</td>\n",
|
|
" <td>-5.340188</td>\n",
|
|
" <td>-0.815218</td>\n",
|
|
" <td>-6.363108</td>\n",
|
|
" <td>-0.729637</td>\n",
|
|
" <td>-3.303237</td>\n",
|
|
" <td>-5.358990</td>\n",
|
|
" <td>-1.747789</td>\n",
|
|
" <td>-0.563676</td>\n",
|
|
" <td>-0.165023</td>\n",
|
|
" <td>-0.178103</td>\n",
|
|
" <td>-0.483530</td>\n",
|
|
" <td>-0.212828</td>\n",
|
|
" <td>-0.405811</td>\n",
|
|
" <td>-0.324214</td>\n",
|
|
" <td>-0.270853</td>\n",
|
|
" <td>-0.056831</td>\n",
|
|
" <td>-0.042639</td>\n",
|
|
" <td>-0.349231</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>84069.000000</td>\n",
|
|
" <td>-0.740915</td>\n",
|
|
" <td>0.941852</td>\n",
|
|
" <td>-1.139964</td>\n",
|
|
" <td>1.340723</td>\n",
|
|
" <td>-0.369227</td>\n",
|
|
" <td>-0.596589</td>\n",
|
|
" <td>-0.501864</td>\n",
|
|
" <td>0.169642</td>\n",
|
|
" <td>-0.696902</td>\n",
|
|
" <td>-0.875521</td>\n",
|
|
" <td>1.267304</td>\n",
|
|
" <td>-0.938658</td>\n",
|
|
" <td>-0.060414</td>\n",
|
|
" <td>-1.059352</td>\n",
|
|
" <td>-0.012904</td>\n",
|
|
" <td>-0.547678</td>\n",
|
|
" <td>-0.527389</td>\n",
|
|
" <td>-0.318904</td>\n",
|
|
" <td>0.169827</td>\n",
|
|
" <td>0.056998</td>\n",
|
|
" <td>0.130060</td>\n",
|
|
" <td>0.081904</td>\n",
|
|
" <td>-0.035614</td>\n",
|
|
" <td>-0.010232</td>\n",
|
|
" <td>0.068890</td>\n",
|
|
" <td>0.031911</td>\n",
|
|
" <td>0.073702</td>\n",
|
|
" <td>0.046030</td>\n",
|
|
" <td>-0.300834</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>135023.500000</td>\n",
|
|
" <td>0.879511</td>\n",
|
|
" <td>2.700371</td>\n",
|
|
" <td>0.394765</td>\n",
|
|
" <td>4.305361</td>\n",
|
|
" <td>0.624459</td>\n",
|
|
" <td>0.139244</td>\n",
|
|
" <td>0.306788</td>\n",
|
|
" <td>0.833392</td>\n",
|
|
" <td>0.011527</td>\n",
|
|
" <td>-0.051012</td>\n",
|
|
" <td>3.542336</td>\n",
|
|
" <td>0.234752</td>\n",
|
|
" <td>0.609629</td>\n",
|
|
" <td>0.173916</td>\n",
|
|
" <td>0.685300</td>\n",
|
|
" <td>0.351119</td>\n",
|
|
" <td>0.309636</td>\n",
|
|
" <td>0.237358</td>\n",
|
|
" <td>0.948371</td>\n",
|
|
" <td>0.461180</td>\n",
|
|
" <td>0.568611</td>\n",
|
|
" <td>0.617588</td>\n",
|
|
" <td>0.200328</td>\n",
|
|
" <td>0.317653</td>\n",
|
|
" <td>0.386804</td>\n",
|
|
" <td>0.355382</td>\n",
|
|
" <td>0.395412</td>\n",
|
|
" <td>0.192766</td>\n",
|
|
" <td>0.028048</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>172733.000000</td>\n",
|
|
" <td>2.306769</td>\n",
|
|
" <td>22.057729</td>\n",
|
|
" <td>3.476268</td>\n",
|
|
" <td>12.114672</td>\n",
|
|
" <td>9.880564</td>\n",
|
|
" <td>6.474115</td>\n",
|
|
" <td>3.791907</td>\n",
|
|
" <td>19.587773</td>\n",
|
|
" <td>4.866316</td>\n",
|
|
" <td>6.367661</td>\n",
|
|
" <td>11.152491</td>\n",
|
|
" <td>1.725185</td>\n",
|
|
" <td>2.897044</td>\n",
|
|
" <td>2.654275</td>\n",
|
|
" <td>2.471358</td>\n",
|
|
" <td>2.696475</td>\n",
|
|
" <td>6.443649</td>\n",
|
|
" <td>2.591846</td>\n",
|
|
" <td>4.851255</td>\n",
|
|
" <td>11.059004</td>\n",
|
|
" <td>27.202839</td>\n",
|
|
" <td>8.361985</td>\n",
|
|
" <td>5.466230</td>\n",
|
|
" <td>1.077407</td>\n",
|
|
" <td>2.156042</td>\n",
|
|
" <td>1.458828</td>\n",
|
|
" <td>2.706566</td>\n",
|
|
" <td>3.042406</td>\n",
|
|
" <td>5.663610</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Time V1 V2 V3 V4 \\\n",
|
|
"count 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
|
|
"mean 88396.587838 -2.448419 1.857288 -3.552900 2.336519 \n",
|
|
"std 50147.105326 5.812072 3.934323 6.680660 3.308417 \n",
|
|
"min 60.000000 -29.876366 -8.402154 -30.558697 -2.956827 \n",
|
|
"25% 45977.500000 -2.867766 -0.130600 -5.417818 -0.118496 \n",
|
|
"50% 84069.000000 -0.740915 0.941852 -1.139964 1.340723 \n",
|
|
"75% 135023.500000 0.879511 2.700371 0.394765 4.305361 \n",
|
|
"max 172733.000000 2.306769 22.057729 3.476268 12.114672 \n",
|
|
"\n",
|
|
" V5 V6 V7 V8 V9 V10 \\\n",
|
|
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
|
|
"mean -1.503755 -0.656035 -2.853058 0.086851 -1.324446 -2.919028 \n",
|
|
"std 4.389263 1.693893 6.622008 5.121293 2.451914 4.891517 \n",
|
|
"min -21.665654 -5.773192 -43.557242 -41.044261 -13.434066 -24.588262 \n",
|
|
"25% -1.667035 -1.477544 -2.835885 -0.168935 -2.345829 -4.445615 \n",
|
|
"50% -0.369227 -0.596589 -0.501864 0.169642 -0.696902 -0.875521 \n",
|
|
"75% 0.624459 0.139244 0.306788 0.833392 0.011527 -0.051012 \n",
|
|
"max 9.880564 6.474115 3.791907 19.587773 4.866316 6.367661 \n",
|
|
"\n",
|
|
" V11 V12 V13 V14 V15 V16 \\\n",
|
|
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
|
|
"mean 1.914227 -3.106154 -0.084562 -3.347887 -0.077981 -1.984526 \n",
|
|
"std 2.754439 4.681722 0.986937 4.683458 1.051296 3.484989 \n",
|
|
"min -2.383066 -18.683715 -3.076318 -17.620634 -3.092108 -14.129855 \n",
|
|
"25% -0.144802 -5.340188 -0.815218 -6.363108 -0.729637 -3.303237 \n",
|
|
"50% 1.267304 -0.938658 -0.060414 -1.059352 -0.012904 -0.547678 \n",
|
|
"75% 3.542336 0.234752 0.609629 0.173916 0.685300 0.351119 \n",
|
|
"max 11.152491 1.725185 2.897044 2.654275 2.471358 2.696475 \n",
|
|
"\n",
|
|
" V17 V18 V19 V20 V21 V22 \\\n",
|
|
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
|
|
"mean -3.161909 -1.109686 0.264590 0.289212 0.065582 0.134902 \n",
|
|
"std 5.826410 2.293910 1.298310 1.235841 2.862463 1.216935 \n",
|
|
"min -22.541652 -9.090892 -3.681904 -5.225849 -22.797604 -8.887017 \n",
|
|
"25% -5.358990 -1.747789 -0.563676 -0.165023 -0.178103 -0.483530 \n",
|
|
"50% -0.527389 -0.318904 0.169827 0.056998 0.130060 0.081904 \n",
|
|
"75% 0.309636 0.237358 0.948371 0.461180 0.568611 0.617588 \n",
|
|
"max 6.443649 2.591846 4.851255 11.059004 27.202839 8.361985 \n",
|
|
"\n",
|
|
" V23 V24 V25 V26 V27 V28 \\\n",
|
|
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
|
|
"mean 0.056521 -0.077336 0.001963 0.040364 0.020281 0.058781 \n",
|
|
"std 0.877975 0.555090 0.650752 0.481822 1.224166 0.460841 \n",
|
|
"min -5.988806 -1.742803 -2.079928 -1.170476 -7.263482 -1.931920 \n",
|
|
"25% -0.212828 -0.405811 -0.324214 -0.270853 -0.056831 -0.042639 \n",
|
|
"50% -0.035614 -0.010232 0.068890 0.031911 0.073702 0.046030 \n",
|
|
"75% 0.200328 0.317653 0.386804 0.355382 0.395412 0.192766 \n",
|
|
"max 5.466230 1.077407 2.156042 1.458828 2.706566 3.042406 \n",
|
|
"\n",
|
|
" Amount Class \n",
|
|
"count 296.000000 296.000000 \n",
|
|
"mean 0.046845 0.496622 \n",
|
|
"std 0.892432 0.500835 \n",
|
|
"min -0.353229 0.000000 \n",
|
|
"25% -0.349231 0.000000 \n",
|
|
"50% -0.300834 0.000000 \n",
|
|
"75% 0.028048 1.000000 \n",
|
|
"max 5.663610 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 112,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_test_undersample, y_test_undersample], axis=1).describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 113,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Class\n",
|
|
"0 149\n",
|
|
"1 147\n",
|
|
"Name: count, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 113,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.concat([X_test_undersample, y_test_undersample], axis=1)['Class'].value_counts()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|