ium_464913/IUM_2.ipynb

3280 lines
124 KiB
Plaintext
Raw Normal View History

2024-03-16 14:35:44 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## IUM 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Installation of packages"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.6.6)\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: certifi in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2024.2.2)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (2.9.0.post0)\n",
"Requirement already satisfied: requests in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.31.0)\n",
"Requirement already satisfied: tqdm in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (4.66.2)\n",
"Requirement already satisfied: python-slugify in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (8.0.4)\n",
"Requirement already satisfied: urllib3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.2.1)\n",
"Requirement already satisfied: bleach in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (6.1.0)\n",
"Requirement already satisfied: webencodings in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.6)\n",
"Requirement already satisfied: colorama in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from tqdm->kaggle) (0.4.6)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: pandas in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.2.1)\n",
"Requirement already satisfied: numpy<2,>=1.26.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (1.26.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: numpy in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.26.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: scikit-learn in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.4.1.post1)\n",
"Requirement already satisfied: numpy<2.0,>=1.19.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.26.3)\n",
"Requirement already satisfied: scipy>=1.6.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.12.0)\n",
"Requirement already satisfied: joblib>=1.2.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.3.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (3.3.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install kaggle\n",
"%pip install pandas\n",
"%pip install numpy\n",
"%pip install scikit-learn"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importing libraries"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# To preprocess the data\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"# To split the data\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Downloading a dataset"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"creditcardfraud.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
]
}
],
"source": [
"!kaggle datasets download -d mlg-ulb/creditcardfraud"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Uncompress a file"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: creditcardfraud.zip\n",
" inflating: creditcard.csv \n"
]
}
],
"source": [
"!unzip -o creditcardfraud.zip"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load the data"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('creditcard.csv')\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Size of the dataset"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 284807 entries, 0 to 284806\n",
"Data columns (total 31 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Time 284807 non-null float64\n",
" 1 V1 284807 non-null float64\n",
" 2 V2 284807 non-null float64\n",
" 3 V3 284807 non-null float64\n",
" 4 V4 284807 non-null float64\n",
" 5 V5 284807 non-null float64\n",
" 6 V6 284807 non-null float64\n",
" 7 V7 284807 non-null float64\n",
" 8 V8 284807 non-null float64\n",
" 9 V9 284807 non-null float64\n",
" 10 V10 284807 non-null float64\n",
" 11 V11 284807 non-null float64\n",
" 12 V12 284807 non-null float64\n",
" 13 V13 284807 non-null float64\n",
" 14 V14 284807 non-null float64\n",
" 15 V15 284807 non-null float64\n",
" 16 V16 284807 non-null float64\n",
" 17 V17 284807 non-null float64\n",
" 18 V18 284807 non-null float64\n",
" 19 V19 284807 non-null float64\n",
" 20 V20 284807 non-null float64\n",
" 21 V21 284807 non-null float64\n",
" 22 V22 284807 non-null float64\n",
" 23 V23 284807 non-null float64\n",
" 24 V24 284807 non-null float64\n",
" 25 V25 284807 non-null float64\n",
" 26 V26 284807 non-null float64\n",
" 27 V27 284807 non-null float64\n",
" 28 V28 284807 non-null float64\n",
" 29 Amount 284807 non-null float64\n",
" 30 Class 284807 non-null int64 \n",
"dtypes: float64(30), int64(1)\n",
"memory usage: 67.4 MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Normalising the data"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"scaler = StandardScaler()\n",
"\n",
"df['Amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Summary statistics"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>V1</th>\n",
" <th>V2</th>\n",
" <th>V3</th>\n",
" <th>V4</th>\n",
" <th>V5</th>\n",
" <th>V6</th>\n",
" <th>V7</th>\n",
" <th>V8</th>\n",
" <th>V9</th>\n",
" <th>V10</th>\n",
" <th>V11</th>\n",
" <th>V12</th>\n",
" <th>V13</th>\n",
" <th>V14</th>\n",
" <th>V15</th>\n",
" <th>V16</th>\n",
" <th>V17</th>\n",
" <th>V18</th>\n",
" <th>V19</th>\n",
" <th>V20</th>\n",
" <th>V21</th>\n",
" <th>V22</th>\n",
" <th>V23</th>\n",
" <th>V24</th>\n",
" <th>V25</th>\n",
" <th>V26</th>\n",
" <th>V27</th>\n",
" <th>V28</th>\n",
" <th>Amount</th>\n",
" <th>Class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>284807.000000</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>2.848070e+05</td>\n",
" <td>284807.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>94813.859575</td>\n",
" <td>1.168375e-15</td>\n",
" <td>3.416908e-16</td>\n",
" <td>-1.379537e-15</td>\n",
" <td>2.074095e-15</td>\n",
" <td>9.604066e-16</td>\n",
" <td>1.487313e-15</td>\n",
" <td>-5.556467e-16</td>\n",
" <td>1.213481e-16</td>\n",
" <td>-2.406331e-15</td>\n",
" <td>2.239053e-15</td>\n",
" <td>1.673327e-15</td>\n",
" <td>-1.247012e-15</td>\n",
" <td>8.190001e-16</td>\n",
" <td>1.207294e-15</td>\n",
" <td>4.887456e-15</td>\n",
" <td>1.437716e-15</td>\n",
" <td>-3.772171e-16</td>\n",
" <td>9.564149e-16</td>\n",
" <td>1.039917e-15</td>\n",
" <td>6.406204e-16</td>\n",
" <td>1.654067e-16</td>\n",
" <td>-3.568593e-16</td>\n",
" <td>2.578648e-16</td>\n",
" <td>4.473266e-15</td>\n",
" <td>5.340915e-16</td>\n",
" <td>1.683437e-15</td>\n",
" <td>-3.660091e-16</td>\n",
" <td>-1.227390e-16</td>\n",
" <td>2.913952e-17</td>\n",
" <td>0.001727</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>47488.145955</td>\n",
" <td>1.958696e+00</td>\n",
" <td>1.651309e+00</td>\n",
" <td>1.516255e+00</td>\n",
" <td>1.415869e+00</td>\n",
" <td>1.380247e+00</td>\n",
" <td>1.332271e+00</td>\n",
" <td>1.237094e+00</td>\n",
" <td>1.194353e+00</td>\n",
" <td>1.098632e+00</td>\n",
" <td>1.088850e+00</td>\n",
" <td>1.020713e+00</td>\n",
" <td>9.992014e-01</td>\n",
" <td>9.952742e-01</td>\n",
" <td>9.585956e-01</td>\n",
" <td>9.153160e-01</td>\n",
" <td>8.762529e-01</td>\n",
" <td>8.493371e-01</td>\n",
" <td>8.381762e-01</td>\n",
" <td>8.140405e-01</td>\n",
" <td>7.709250e-01</td>\n",
" <td>7.345240e-01</td>\n",
" <td>7.257016e-01</td>\n",
" <td>6.244603e-01</td>\n",
" <td>6.056471e-01</td>\n",
" <td>5.212781e-01</td>\n",
" <td>4.822270e-01</td>\n",
" <td>4.036325e-01</td>\n",
" <td>3.300833e-01</td>\n",
" <td>1.000002e+00</td>\n",
" <td>0.041527</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>-5.640751e+01</td>\n",
" <td>-7.271573e+01</td>\n",
" <td>-4.832559e+01</td>\n",
" <td>-5.683171e+00</td>\n",
" <td>-1.137433e+02</td>\n",
" <td>-2.616051e+01</td>\n",
" <td>-4.355724e+01</td>\n",
" <td>-7.321672e+01</td>\n",
" <td>-1.343407e+01</td>\n",
" <td>-2.458826e+01</td>\n",
" <td>-4.797473e+00</td>\n",
" <td>-1.868371e+01</td>\n",
" <td>-5.791881e+00</td>\n",
" <td>-1.921433e+01</td>\n",
" <td>-4.498945e+00</td>\n",
" <td>-1.412985e+01</td>\n",
" <td>-2.516280e+01</td>\n",
" <td>-9.498746e+00</td>\n",
" <td>-7.213527e+00</td>\n",
" <td>-5.449772e+01</td>\n",
" <td>-3.483038e+01</td>\n",
" <td>-1.093314e+01</td>\n",
" <td>-4.480774e+01</td>\n",
" <td>-2.836627e+00</td>\n",
" <td>-1.029540e+01</td>\n",
" <td>-2.604551e+00</td>\n",
" <td>-2.256568e+01</td>\n",
" <td>-1.543008e+01</td>\n",
" <td>-3.532294e-01</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>54201.500000</td>\n",
" <td>-9.203734e-01</td>\n",
" <td>-5.985499e-01</td>\n",
" <td>-8.903648e-01</td>\n",
" <td>-8.486401e-01</td>\n",
" <td>-6.915971e-01</td>\n",
" <td>-7.682956e-01</td>\n",
" <td>-5.540759e-01</td>\n",
" <td>-2.086297e-01</td>\n",
" <td>-6.430976e-01</td>\n",
" <td>-5.354257e-01</td>\n",
" <td>-7.624942e-01</td>\n",
" <td>-4.055715e-01</td>\n",
" <td>-6.485393e-01</td>\n",
" <td>-4.255740e-01</td>\n",
" <td>-5.828843e-01</td>\n",
" <td>-4.680368e-01</td>\n",
" <td>-4.837483e-01</td>\n",
" <td>-4.988498e-01</td>\n",
" <td>-4.562989e-01</td>\n",
" <td>-2.117214e-01</td>\n",
" <td>-2.283949e-01</td>\n",
" <td>-5.423504e-01</td>\n",
" <td>-1.618463e-01</td>\n",
" <td>-3.545861e-01</td>\n",
" <td>-3.171451e-01</td>\n",
" <td>-3.269839e-01</td>\n",
" <td>-7.083953e-02</td>\n",
" <td>-5.295979e-02</td>\n",
" <td>-3.308401e-01</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>84692.000000</td>\n",
" <td>1.810880e-02</td>\n",
" <td>6.548556e-02</td>\n",
" <td>1.798463e-01</td>\n",
" <td>-1.984653e-02</td>\n",
" <td>-5.433583e-02</td>\n",
" <td>-2.741871e-01</td>\n",
" <td>4.010308e-02</td>\n",
" <td>2.235804e-02</td>\n",
" <td>-5.142873e-02</td>\n",
" <td>-9.291738e-02</td>\n",
" <td>-3.275735e-02</td>\n",
" <td>1.400326e-01</td>\n",
" <td>-1.356806e-02</td>\n",
" <td>5.060132e-02</td>\n",
" <td>4.807155e-02</td>\n",
" <td>6.641332e-02</td>\n",
" <td>-6.567575e-02</td>\n",
" <td>-3.636312e-03</td>\n",
" <td>3.734823e-03</td>\n",
" <td>-6.248109e-02</td>\n",
" <td>-2.945017e-02</td>\n",
" <td>6.781943e-03</td>\n",
" <td>-1.119293e-02</td>\n",
" <td>4.097606e-02</td>\n",
" <td>1.659350e-02</td>\n",
" <td>-5.213911e-02</td>\n",
" <td>1.342146e-03</td>\n",
" <td>1.124383e-02</td>\n",
" <td>-2.652715e-01</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>139320.500000</td>\n",
" <td>1.315642e+00</td>\n",
" <td>8.037239e-01</td>\n",
" <td>1.027196e+00</td>\n",
" <td>7.433413e-01</td>\n",
" <td>6.119264e-01</td>\n",
" <td>3.985649e-01</td>\n",
" <td>5.704361e-01</td>\n",
" <td>3.273459e-01</td>\n",
" <td>5.971390e-01</td>\n",
" <td>4.539234e-01</td>\n",
" <td>7.395934e-01</td>\n",
" <td>6.182380e-01</td>\n",
" <td>6.625050e-01</td>\n",
" <td>4.931498e-01</td>\n",
" <td>6.488208e-01</td>\n",
" <td>5.232963e-01</td>\n",
" <td>3.996750e-01</td>\n",
" <td>5.008067e-01</td>\n",
" <td>4.589494e-01</td>\n",
" <td>1.330408e-01</td>\n",
" <td>1.863772e-01</td>\n",
" <td>5.285536e-01</td>\n",
" <td>1.476421e-01</td>\n",
" <td>4.395266e-01</td>\n",
" <td>3.507156e-01</td>\n",
" <td>2.409522e-01</td>\n",
" <td>9.104512e-02</td>\n",
" <td>7.827995e-02</td>\n",
" <td>-4.471707e-02</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>172792.000000</td>\n",
" <td>2.454930e+00</td>\n",
" <td>2.205773e+01</td>\n",
" <td>9.382558e+00</td>\n",
" <td>1.687534e+01</td>\n",
" <td>3.480167e+01</td>\n",
" <td>7.330163e+01</td>\n",
" <td>1.205895e+02</td>\n",
" <td>2.000721e+01</td>\n",
" <td>1.559499e+01</td>\n",
" <td>2.374514e+01</td>\n",
" <td>1.201891e+01</td>\n",
" <td>7.848392e+00</td>\n",
" <td>7.126883e+00</td>\n",
" <td>1.052677e+01</td>\n",
" <td>8.877742e+00</td>\n",
" <td>1.731511e+01</td>\n",
" <td>9.253526e+00</td>\n",
" <td>5.041069e+00</td>\n",
" <td>5.591971e+00</td>\n",
" <td>3.942090e+01</td>\n",
" <td>2.720284e+01</td>\n",
" <td>1.050309e+01</td>\n",
" <td>2.252841e+01</td>\n",
" <td>4.584549e+00</td>\n",
" <td>7.519589e+00</td>\n",
" <td>3.517346e+00</td>\n",
" <td>3.161220e+01</td>\n",
" <td>3.384781e+01</td>\n",
" <td>1.023622e+02</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time V1 V2 V3 V4 \\\n",
"count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
"mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n",
"std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n",
"min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n",
"25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n",
"50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n",
"75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n",
"max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n",
"\n",
" V5 V6 V7 V8 V9 \\\n",
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
"mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n",
"std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n",
"min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n",
"25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n",
"50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n",
"75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n",
"max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n",
"\n",
" V10 V11 V12 V13 V14 \\\n",
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
"mean 2.239053e-15 1.673327e-15 -1.247012e-15 8.190001e-16 1.207294e-15 \n",
"std 1.088850e+00 1.020713e+00 9.992014e-01 9.952742e-01 9.585956e-01 \n",
"min -2.458826e+01 -4.797473e+00 -1.868371e+01 -5.791881e+00 -1.921433e+01 \n",
"25% -5.354257e-01 -7.624942e-01 -4.055715e-01 -6.485393e-01 -4.255740e-01 \n",
"50% -9.291738e-02 -3.275735e-02 1.400326e-01 -1.356806e-02 5.060132e-02 \n",
"75% 4.539234e-01 7.395934e-01 6.182380e-01 6.625050e-01 4.931498e-01 \n",
"max 2.374514e+01 1.201891e+01 7.848392e+00 7.126883e+00 1.052677e+01 \n",
"\n",
" V15 V16 V17 V18 V19 \\\n",
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
"mean 4.887456e-15 1.437716e-15 -3.772171e-16 9.564149e-16 1.039917e-15 \n",
"std 9.153160e-01 8.762529e-01 8.493371e-01 8.381762e-01 8.140405e-01 \n",
"min -4.498945e+00 -1.412985e+01 -2.516280e+01 -9.498746e+00 -7.213527e+00 \n",
"25% -5.828843e-01 -4.680368e-01 -4.837483e-01 -4.988498e-01 -4.562989e-01 \n",
"50% 4.807155e-02 6.641332e-02 -6.567575e-02 -3.636312e-03 3.734823e-03 \n",
"75% 6.488208e-01 5.232963e-01 3.996750e-01 5.008067e-01 4.589494e-01 \n",
"max 8.877742e+00 1.731511e+01 9.253526e+00 5.041069e+00 5.591971e+00 \n",
"\n",
" V20 V21 V22 V23 V24 \\\n",
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
"mean 6.406204e-16 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n",
"std 7.709250e-01 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n",
"min -5.449772e+01 -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n",
"25% -2.117214e-01 -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n",
"50% -6.248109e-02 -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n",
"75% 1.330408e-01 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n",
"max 3.942090e+01 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n",
"\n",
" V25 V26 V27 V28 Amount \\\n",
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
"mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 2.913952e-17 \n",
"std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 1.000002e+00 \n",
"min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 -3.532294e-01 \n",
"25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 -3.308401e-01 \n",
"50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 -2.652715e-01 \n",
"75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 -4.471707e-02 \n",
"max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 1.023622e+02 \n",
"\n",
" Class \n",
"count 284807.000000 \n",
"mean 0.001727 \n",
"std 0.041527 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 0.000000 \n",
"75% 0.000000 \n",
"max 1.000000 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Distribution of legitimate and fraudulent transactions"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Class\n",
"0 284315\n",
"1 492\n",
"Name: count, dtype: int64"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Class'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Undersampling the data\n",
"We will employ undersampling as one class significantly dominates the other."
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# Determine the number of instances in the minority class\n",
"fraud_count = len(df[df.Class == 1])\n",
"fraud_indices = np.array(df[df.Class == 1].index)\n",
"\n",
"# Select indices corresponding to majority class instances\n",
"normal_indices = df[df.Class == 0].index\n",
"\n",
"# Randomly sample the same number of instances from the majority class\n",
"random_normal_indices = np.random.choice(normal_indices, fraud_count, replace=False)\n",
"random_normal_indices = np.array(random_normal_indices)\n",
"\n",
"# Combine indices of both classes\n",
"undersample_indice = np.concatenate([fraud_indices, random_normal_indices])\n",
"\n",
"# Undersample dataset\n",
"undersample_data = df.iloc[undersample_indice, :]\n",
"\n",
"X_undersample = undersample_data.iloc[:, undersample_data.columns != 'Class']\n",
"y_undersample = undersample_data.iloc[:, undersample_data.columns == 'Class']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Size of undersampled dataset"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 984 entries, 541 to 216408\n",
"Data columns (total 31 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Time 984 non-null float64\n",
" 1 V1 984 non-null float64\n",
" 2 V2 984 non-null float64\n",
" 3 V3 984 non-null float64\n",
" 4 V4 984 non-null float64\n",
" 5 V5 984 non-null float64\n",
" 6 V6 984 non-null float64\n",
" 7 V7 984 non-null float64\n",
" 8 V8 984 non-null float64\n",
" 9 V9 984 non-null float64\n",
" 10 V10 984 non-null float64\n",
" 11 V11 984 non-null float64\n",
" 12 V12 984 non-null float64\n",
" 13 V13 984 non-null float64\n",
" 14 V14 984 non-null float64\n",
" 15 V15 984 non-null float64\n",
" 16 V16 984 non-null float64\n",
" 17 V17 984 non-null float64\n",
" 18 V18 984 non-null float64\n",
" 19 V19 984 non-null float64\n",
" 20 V20 984 non-null float64\n",
" 21 V21 984 non-null float64\n",
" 22 V22 984 non-null float64\n",
" 23 V23 984 non-null float64\n",
" 24 V24 984 non-null float64\n",
" 25 V25 984 non-null float64\n",
" 26 V26 984 non-null float64\n",
" 27 V27 984 non-null float64\n",
" 28 V28 984 non-null float64\n",
" 29 Amount 984 non-null float64\n",
" 30 Class 984 non-null int64 \n",
"dtypes: float64(30), int64(1)\n",
"memory usage: 246.0 KB\n"
]
}
],
"source": [
"undersample_data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Summary statistics of the undersampled dataset"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>V1</th>\n",
" <th>V2</th>\n",
" <th>V3</th>\n",
" <th>V4</th>\n",
" <th>V5</th>\n",
" <th>V6</th>\n",
" <th>V7</th>\n",
" <th>V8</th>\n",
" <th>V9</th>\n",
" <th>V10</th>\n",
" <th>V11</th>\n",
" <th>V12</th>\n",
" <th>V13</th>\n",
" <th>V14</th>\n",
" <th>V15</th>\n",
" <th>V16</th>\n",
" <th>V17</th>\n",
" <th>V18</th>\n",
" <th>V19</th>\n",
" <th>V20</th>\n",
" <th>V21</th>\n",
" <th>V22</th>\n",
" <th>V23</th>\n",
" <th>V24</th>\n",
" <th>V25</th>\n",
" <th>V26</th>\n",
" <th>V27</th>\n",
" <th>V28</th>\n",
" <th>Amount</th>\n",
" <th>Class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" <td>984.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>88776.640244</td>\n",
" <td>-2.405835</td>\n",
" <td>1.819622</td>\n",
" <td>-3.511875</td>\n",
" <td>2.253022</td>\n",
" <td>-1.598199</td>\n",
" <td>-0.703490</td>\n",
" <td>-2.771834</td>\n",
" <td>0.290248</td>\n",
" <td>-1.271753</td>\n",
" <td>-2.866398</td>\n",
" <td>1.950912</td>\n",
" <td>-3.085801</td>\n",
" <td>-0.038172</td>\n",
" <td>-3.480604</td>\n",
" <td>-0.047034</td>\n",
" <td>-2.048191</td>\n",
" <td>-3.320723</td>\n",
" <td>-1.121329</td>\n",
" <td>0.338180</td>\n",
" <td>0.193890</td>\n",
" <td>0.363782</td>\n",
" <td>0.002554</td>\n",
" <td>0.005715</td>\n",
" <td>-0.068002</td>\n",
" <td>0.007024</td>\n",
" <td>0.018569</td>\n",
" <td>0.062892</td>\n",
" <td>0.027783</td>\n",
" <td>0.077270</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>48243.944271</td>\n",
" <td>5.526010</td>\n",
" <td>3.753287</td>\n",
" <td>6.215601</td>\n",
" <td>3.188581</td>\n",
" <td>4.284063</td>\n",
" <td>1.787638</td>\n",
" <td>5.918129</td>\n",
" <td>4.878508</td>\n",
" <td>2.321658</td>\n",
" <td>4.526228</td>\n",
" <td>2.737584</td>\n",
" <td>4.620098</td>\n",
" <td>1.046915</td>\n",
" <td>4.656872</td>\n",
" <td>0.957319</td>\n",
" <td>3.494192</td>\n",
" <td>5.975039</td>\n",
" <td>2.410427</td>\n",
" <td>1.275460</td>\n",
" <td>1.197462</td>\n",
" <td>2.801160</td>\n",
" <td>1.171430</td>\n",
" <td>1.286473</td>\n",
" <td>0.572075</td>\n",
" <td>0.671570</td>\n",
" <td>0.488011</td>\n",
" <td>1.023450</td>\n",
" <td>0.425708</td>\n",
" <td>1.322416</td>\n",
" <td>0.500254</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>406.000000</td>\n",
" <td>-30.552380</td>\n",
" <td>-20.984898</td>\n",
" <td>-31.103685</td>\n",
" <td>-3.421874</td>\n",
" <td>-22.105532</td>\n",
" <td>-14.425011</td>\n",
" <td>-43.557242</td>\n",
" <td>-41.044261</td>\n",
" <td>-13.434066</td>\n",
" <td>-24.588262</td>\n",
" <td>-2.279476</td>\n",
" <td>-18.683715</td>\n",
" <td>-3.184202</td>\n",
" <td>-19.214325</td>\n",
" <td>-4.498945</td>\n",
" <td>-14.129855</td>\n",
" <td>-25.162799</td>\n",
" <td>-9.498746</td>\n",
" <td>-3.681904</td>\n",
" <td>-5.244333</td>\n",
" <td>-22.797604</td>\n",
" <td>-8.887017</td>\n",
" <td>-19.254328</td>\n",
" <td>-2.082546</td>\n",
" <td>-4.781606</td>\n",
" <td>-1.407558</td>\n",
" <td>-7.263482</td>\n",
" <td>-1.869290</td>\n",
" <td>-0.353229</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>47898.750000</td>\n",
" <td>-2.896794</td>\n",
" <td>-0.156682</td>\n",
" <td>-5.084967</td>\n",
" <td>-0.100812</td>\n",
" <td>-1.758911</td>\n",
" <td>-1.509571</td>\n",
" <td>-3.060742</td>\n",
" <td>-0.179491</td>\n",
" <td>-2.279453</td>\n",
" <td>-4.593030</td>\n",
" <td>-0.047452</td>\n",
" <td>-5.495221</td>\n",
" <td>-0.761168</td>\n",
" <td>-6.721799</td>\n",
" <td>-0.639308</td>\n",
" <td>-3.543426</td>\n",
" <td>-5.302111</td>\n",
" <td>-1.809496</td>\n",
" <td>-0.441326</td>\n",
" <td>-0.203323</td>\n",
" <td>-0.159503</td>\n",
" <td>-0.523196</td>\n",
" <td>-0.233359</td>\n",
" <td>-0.409463</td>\n",
" <td>-0.331296</td>\n",
" <td>-0.316920</td>\n",
" <td>-0.061141</td>\n",
" <td>-0.053338</td>\n",
" <td>-0.347042</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>80545.000000</td>\n",
" <td>-0.803813</td>\n",
" <td>0.993854</td>\n",
" <td>-1.370281</td>\n",
" <td>1.291576</td>\n",
" <td>-0.425110</td>\n",
" <td>-0.640524</td>\n",
" <td>-0.640804</td>\n",
" <td>0.180271</td>\n",
" <td>-0.666598</td>\n",
" <td>-0.885646</td>\n",
" <td>1.171937</td>\n",
" <td>-0.734549</td>\n",
" <td>0.002739</td>\n",
" <td>-0.986762</td>\n",
" <td>-0.012609</td>\n",
" <td>-0.599078</td>\n",
" <td>-0.466877</td>\n",
" <td>-0.322246</td>\n",
" <td>0.228471</td>\n",
" <td>0.017882</td>\n",
" <td>0.140922</td>\n",
" <td>0.013228</td>\n",
" <td>-0.012981</td>\n",
" <td>-0.001297</td>\n",
" <td>0.035358</td>\n",
" <td>-0.027331</td>\n",
" <td>0.044271</td>\n",
" <td>0.032226</td>\n",
" <td>-0.278285</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>135096.750000</td>\n",
" <td>1.018644</td>\n",
" <td>2.798885</td>\n",
" <td>0.345737</td>\n",
" <td>4.235631</td>\n",
" <td>0.442399</td>\n",
" <td>0.088865</td>\n",
" <td>0.245823</td>\n",
" <td>0.879226</td>\n",
" <td>0.205275</td>\n",
" <td>-0.027386</td>\n",
" <td>3.586130</td>\n",
" <td>0.285771</td>\n",
" <td>0.646090</td>\n",
" <td>0.083101</td>\n",
" <td>0.608586</td>\n",
" <td>0.302161</td>\n",
" <td>0.269949</td>\n",
" <td>0.333976</td>\n",
" <td>0.955802</td>\n",
" <td>0.399500</td>\n",
" <td>0.654990</td>\n",
" <td>0.582570</td>\n",
" <td>0.196764</td>\n",
" <td>0.368293</td>\n",
" <td>0.371463</td>\n",
" <td>0.328812</td>\n",
" <td>0.424067</td>\n",
" <td>0.201361</td>\n",
" <td>0.046029</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>172743.000000</td>\n",
" <td>2.308878</td>\n",
" <td>22.057729</td>\n",
" <td>3.940337</td>\n",
" <td>12.114672</td>\n",
" <td>20.277728</td>\n",
" <td>12.128950</td>\n",
" <td>26.237722</td>\n",
" <td>20.007208</td>\n",
" <td>7.168878</td>\n",
" <td>10.423505</td>\n",
" <td>12.018913</td>\n",
" <td>1.948126</td>\n",
" <td>3.685570</td>\n",
" <td>3.442422</td>\n",
" <td>2.471358</td>\n",
" <td>3.139656</td>\n",
" <td>6.739384</td>\n",
" <td>3.790316</td>\n",
" <td>5.228342</td>\n",
" <td>16.436920</td>\n",
" <td>27.202839</td>\n",
" <td>8.361985</td>\n",
" <td>17.606637</td>\n",
" <td>1.102636</td>\n",
" <td>3.410742</td>\n",
" <td>2.745261</td>\n",
" <td>3.052358</td>\n",
" <td>1.779364</td>\n",
" <td>29.799137</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time V1 V2 V3 V4 \\\n",
"count 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
"mean 88776.640244 -2.405835 1.819622 -3.511875 2.253022 \n",
"std 48243.944271 5.526010 3.753287 6.215601 3.188581 \n",
"min 406.000000 -30.552380 -20.984898 -31.103685 -3.421874 \n",
"25% 47898.750000 -2.896794 -0.156682 -5.084967 -0.100812 \n",
"50% 80545.000000 -0.803813 0.993854 -1.370281 1.291576 \n",
"75% 135096.750000 1.018644 2.798885 0.345737 4.235631 \n",
"max 172743.000000 2.308878 22.057729 3.940337 12.114672 \n",
"\n",
" V5 V6 V7 V8 V9 V10 \\\n",
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
"mean -1.598199 -0.703490 -2.771834 0.290248 -1.271753 -2.866398 \n",
"std 4.284063 1.787638 5.918129 4.878508 2.321658 4.526228 \n",
"min -22.105532 -14.425011 -43.557242 -41.044261 -13.434066 -24.588262 \n",
"25% -1.758911 -1.509571 -3.060742 -0.179491 -2.279453 -4.593030 \n",
"50% -0.425110 -0.640524 -0.640804 0.180271 -0.666598 -0.885646 \n",
"75% 0.442399 0.088865 0.245823 0.879226 0.205275 -0.027386 \n",
"max 20.277728 12.128950 26.237722 20.007208 7.168878 10.423505 \n",
"\n",
" V11 V12 V13 V14 V15 V16 \\\n",
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
"mean 1.950912 -3.085801 -0.038172 -3.480604 -0.047034 -2.048191 \n",
"std 2.737584 4.620098 1.046915 4.656872 0.957319 3.494192 \n",
"min -2.279476 -18.683715 -3.184202 -19.214325 -4.498945 -14.129855 \n",
"25% -0.047452 -5.495221 -0.761168 -6.721799 -0.639308 -3.543426 \n",
"50% 1.171937 -0.734549 0.002739 -0.986762 -0.012609 -0.599078 \n",
"75% 3.586130 0.285771 0.646090 0.083101 0.608586 0.302161 \n",
"max 12.018913 1.948126 3.685570 3.442422 2.471358 3.139656 \n",
"\n",
" V17 V18 V19 V20 V21 V22 \\\n",
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
"mean -3.320723 -1.121329 0.338180 0.193890 0.363782 0.002554 \n",
"std 5.975039 2.410427 1.275460 1.197462 2.801160 1.171430 \n",
"min -25.162799 -9.498746 -3.681904 -5.244333 -22.797604 -8.887017 \n",
"25% -5.302111 -1.809496 -0.441326 -0.203323 -0.159503 -0.523196 \n",
"50% -0.466877 -0.322246 0.228471 0.017882 0.140922 0.013228 \n",
"75% 0.269949 0.333976 0.955802 0.399500 0.654990 0.582570 \n",
"max 6.739384 3.790316 5.228342 16.436920 27.202839 8.361985 \n",
"\n",
" V23 V24 V25 V26 V27 V28 \\\n",
"count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n",
"mean 0.005715 -0.068002 0.007024 0.018569 0.062892 0.027783 \n",
"std 1.286473 0.572075 0.671570 0.488011 1.023450 0.425708 \n",
"min -19.254328 -2.082546 -4.781606 -1.407558 -7.263482 -1.869290 \n",
"25% -0.233359 -0.409463 -0.331296 -0.316920 -0.061141 -0.053338 \n",
"50% -0.012981 -0.001297 0.035358 -0.027331 0.044271 0.032226 \n",
"75% 0.196764 0.368293 0.371463 0.328812 0.424067 0.201361 \n",
"max 17.606637 1.102636 3.410742 2.745261 3.052358 1.779364 \n",
"\n",
" Amount Class \n",
"count 984.000000 984.000000 \n",
"mean 0.077270 0.500000 \n",
"std 1.322416 0.500254 \n",
"min -0.353229 0.000000 \n",
"25% -0.347042 0.000000 \n",
"50% -0.278285 0.500000 \n",
"75% 0.046029 1.000000 \n",
"max 29.799137 1.000000 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"undersample_data.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Distribution of legitimate and fraudulent transactions in an undersampled dataset"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Class\n",
"1 492\n",
"0 492\n",
"Name: count, dtype: int64"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"undersample_data['Class'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Splitting whole data into training and test datasets"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"X = df.iloc[:, df.columns != 'Class']\n",
"y = df.iloc[:, df.columns == 'Class']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Statistical measures of the training dataset of whole data"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 199364 entries, 161145 to 117952\n",
"Data columns (total 31 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Time 199364 non-null float64\n",
" 1 V1 199364 non-null float64\n",
" 2 V2 199364 non-null float64\n",
" 3 V3 199364 non-null float64\n",
" 4 V4 199364 non-null float64\n",
" 5 V5 199364 non-null float64\n",
" 6 V6 199364 non-null float64\n",
" 7 V7 199364 non-null float64\n",
" 8 V8 199364 non-null float64\n",
" 9 V9 199364 non-null float64\n",
" 10 V10 199364 non-null float64\n",
" 11 V11 199364 non-null float64\n",
" 12 V12 199364 non-null float64\n",
" 13 V13 199364 non-null float64\n",
" 14 V14 199364 non-null float64\n",
" 15 V15 199364 non-null float64\n",
" 16 V16 199364 non-null float64\n",
" 17 V17 199364 non-null float64\n",
" 18 V18 199364 non-null float64\n",
" 19 V19 199364 non-null float64\n",
" 20 V20 199364 non-null float64\n",
" 21 V21 199364 non-null float64\n",
" 22 V22 199364 non-null float64\n",
" 23 V23 199364 non-null float64\n",
" 24 V24 199364 non-null float64\n",
" 25 V25 199364 non-null float64\n",
" 26 V26 199364 non-null float64\n",
" 27 V27 199364 non-null float64\n",
" 28 V28 199364 non-null float64\n",
" 29 Amount 199364 non-null float64\n",
" 30 Class 199364 non-null int64 \n",
"dtypes: float64(30), int64(1)\n",
"memory usage: 48.7 MB\n"
]
}
],
"source": [
"pd.concat([X_train, y_train], axis=1).info()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>V1</th>\n",
" <th>V2</th>\n",
" <th>V3</th>\n",
" <th>V4</th>\n",
" <th>V5</th>\n",
" <th>V6</th>\n",
" <th>V7</th>\n",
" <th>V8</th>\n",
" <th>V9</th>\n",
" <th>V10</th>\n",
" <th>V11</th>\n",
" <th>V12</th>\n",
" <th>V13</th>\n",
" <th>V14</th>\n",
" <th>V15</th>\n",
" <th>V16</th>\n",
" <th>V17</th>\n",
" <th>V18</th>\n",
" <th>V19</th>\n",
" <th>V20</th>\n",
" <th>V21</th>\n",
" <th>V22</th>\n",
" <th>V23</th>\n",
" <th>V24</th>\n",
" <th>V25</th>\n",
" <th>V26</th>\n",
" <th>V27</th>\n",
" <th>V28</th>\n",
" <th>Amount</th>\n",
" <th>Class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" <td>199364.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>94799.493936</td>\n",
" <td>0.000315</td>\n",
" <td>-0.002690</td>\n",
" <td>-0.001532</td>\n",
" <td>0.000721</td>\n",
" <td>-0.001494</td>\n",
" <td>-0.000210</td>\n",
" <td>-0.000870</td>\n",
" <td>-0.001980</td>\n",
" <td>0.000212</td>\n",
" <td>0.001357</td>\n",
" <td>-0.001039</td>\n",
" <td>-0.001565</td>\n",
" <td>0.000693</td>\n",
" <td>0.000137</td>\n",
" <td>0.000322</td>\n",
" <td>0.000084</td>\n",
" <td>0.000292</td>\n",
" <td>-0.000134</td>\n",
" <td>0.000490</td>\n",
" <td>0.000430</td>\n",
" <td>-0.000014</td>\n",
" <td>-0.000022</td>\n",
" <td>-0.000258</td>\n",
" <td>0.000362</td>\n",
" <td>0.000395</td>\n",
" <td>-0.000094</td>\n",
" <td>-0.000027</td>\n",
" <td>0.000015</td>\n",
" <td>0.001271</td>\n",
" <td>0.001731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>47499.835491</td>\n",
" <td>1.963554</td>\n",
" <td>1.657379</td>\n",
" <td>1.516716</td>\n",
" <td>1.417138</td>\n",
" <td>1.368744</td>\n",
" <td>1.328673</td>\n",
" <td>1.226018</td>\n",
" <td>1.212338</td>\n",
" <td>1.102021</td>\n",
" <td>1.092801</td>\n",
" <td>1.020027</td>\n",
" <td>0.996526</td>\n",
" <td>0.997718</td>\n",
" <td>0.956938</td>\n",
" <td>0.916143</td>\n",
" <td>0.876131</td>\n",
" <td>0.852181</td>\n",
" <td>0.837556</td>\n",
" <td>0.814506</td>\n",
" <td>0.770257</td>\n",
" <td>0.743450</td>\n",
" <td>0.727625</td>\n",
" <td>0.629145</td>\n",
" <td>0.605298</td>\n",
" <td>0.521175</td>\n",
" <td>0.481842</td>\n",
" <td>0.401042</td>\n",
" <td>0.324849</td>\n",
" <td>0.983948</td>\n",
" <td>0.041563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>-46.855047</td>\n",
" <td>-63.344698</td>\n",
" <td>-33.680984</td>\n",
" <td>-5.560118</td>\n",
" <td>-42.147898</td>\n",
" <td>-23.496714</td>\n",
" <td>-43.557242</td>\n",
" <td>-73.216718</td>\n",
" <td>-13.434066</td>\n",
" <td>-24.588262</td>\n",
" <td>-4.797473</td>\n",
" <td>-17.769143</td>\n",
" <td>-5.791881</td>\n",
" <td>-19.214325</td>\n",
" <td>-4.498945</td>\n",
" <td>-14.129855</td>\n",
" <td>-25.162799</td>\n",
" <td>-9.498746</td>\n",
" <td>-7.213527</td>\n",
" <td>-23.646890</td>\n",
" <td>-34.830382</td>\n",
" <td>-10.933144</td>\n",
" <td>-44.807735</td>\n",
" <td>-2.822684</td>\n",
" <td>-10.295397</td>\n",
" <td>-2.534330</td>\n",
" <td>-22.565679</td>\n",
" <td>-11.710896</td>\n",
" <td>-0.353229</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>54126.000000</td>\n",
" <td>-0.921539</td>\n",
" <td>-0.601213</td>\n",
" <td>-0.892838</td>\n",
" <td>-0.848835</td>\n",
" <td>-0.692874</td>\n",
" <td>-0.769177</td>\n",
" <td>-0.554220</td>\n",
" <td>-0.209086</td>\n",
" <td>-0.644753</td>\n",
" <td>-0.535493</td>\n",
" <td>-0.762852</td>\n",
" <td>-0.407660</td>\n",
" <td>-0.648456</td>\n",
" <td>-0.425122</td>\n",
" <td>-0.583616</td>\n",
" <td>-0.467945</td>\n",
" <td>-0.484055</td>\n",
" <td>-0.498850</td>\n",
" <td>-0.456800</td>\n",
" <td>-0.211662</td>\n",
" <td>-0.229272</td>\n",
" <td>-0.544345</td>\n",
" <td>-0.162021</td>\n",
" <td>-0.354179</td>\n",
" <td>-0.316088</td>\n",
" <td>-0.327327</td>\n",
" <td>-0.070864</td>\n",
" <td>-0.052907</td>\n",
" <td>-0.330640</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>84633.500000</td>\n",
" <td>0.019705</td>\n",
" <td>0.063784</td>\n",
" <td>0.177888</td>\n",
" <td>-0.017852</td>\n",
" <td>-0.055832</td>\n",
" <td>-0.274397</td>\n",
" <td>0.039228</td>\n",
" <td>0.021803</td>\n",
" <td>-0.049633</td>\n",
" <td>-0.092069</td>\n",
" <td>-0.034135</td>\n",
" <td>0.137912</td>\n",
" <td>-0.013416</td>\n",
" <td>0.051179</td>\n",
" <td>0.049289</td>\n",
" <td>0.067772</td>\n",
" <td>-0.065113</td>\n",
" <td>-0.003217</td>\n",
" <td>0.004422</td>\n",
" <td>-0.062889</td>\n",
" <td>-0.029045</td>\n",
" <td>0.006744</td>\n",
" <td>-0.010915</td>\n",
" <td>0.040974</td>\n",
" <td>0.018014</td>\n",
" <td>-0.052287</td>\n",
" <td>0.001064</td>\n",
" <td>0.011119</td>\n",
" <td>-0.265271</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>139334.250000</td>\n",
" <td>1.316707</td>\n",
" <td>0.802437</td>\n",
" <td>1.025529</td>\n",
" <td>0.745566</td>\n",
" <td>0.609349</td>\n",
" <td>0.397928</td>\n",
" <td>0.569638</td>\n",
" <td>0.327023</td>\n",
" <td>0.597096</td>\n",
" <td>0.458129</td>\n",
" <td>0.738143</td>\n",
" <td>0.617393</td>\n",
" <td>0.664148</td>\n",
" <td>0.493925</td>\n",
" <td>0.649589</td>\n",
" <td>0.523095</td>\n",
" <td>0.401034</td>\n",
" <td>0.500436</td>\n",
" <td>0.460367</td>\n",
" <td>0.132834</td>\n",
" <td>0.187095</td>\n",
" <td>0.531017</td>\n",
" <td>0.147503</td>\n",
" <td>0.438953</td>\n",
" <td>0.350802</td>\n",
" <td>0.241082</td>\n",
" <td>0.090491</td>\n",
" <td>0.077989</td>\n",
" <td>-0.043058</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>172792.000000</td>\n",
" <td>2.451888</td>\n",
" <td>22.057729</td>\n",
" <td>9.382558</td>\n",
" <td>16.715537</td>\n",
" <td>34.099309</td>\n",
" <td>23.917837</td>\n",
" <td>44.054461</td>\n",
" <td>20.007208</td>\n",
" <td>15.594995</td>\n",
" <td>23.745136</td>\n",
" <td>12.018913</td>\n",
" <td>7.848392</td>\n",
" <td>4.569009</td>\n",
" <td>10.526766</td>\n",
" <td>5.825654</td>\n",
" <td>7.059132</td>\n",
" <td>9.207059</td>\n",
" <td>5.041069</td>\n",
" <td>5.572113</td>\n",
" <td>39.420904</td>\n",
" <td>27.202839</td>\n",
" <td>10.503090</td>\n",
" <td>22.528412</td>\n",
" <td>4.022866</td>\n",
" <td>7.519589</td>\n",
" <td>3.463246</td>\n",
" <td>12.152401</td>\n",
" <td>22.620072</td>\n",
" <td>78.235272</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time V1 V2 V3 \\\n",
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
"mean 94799.493936 0.000315 -0.002690 -0.001532 \n",
"std 47499.835491 1.963554 1.657379 1.516716 \n",
"min 0.000000 -46.855047 -63.344698 -33.680984 \n",
"25% 54126.000000 -0.921539 -0.601213 -0.892838 \n",
"50% 84633.500000 0.019705 0.063784 0.177888 \n",
"75% 139334.250000 1.316707 0.802437 1.025529 \n",
"max 172792.000000 2.451888 22.057729 9.382558 \n",
"\n",
" V4 V5 V6 V7 \\\n",
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
"mean 0.000721 -0.001494 -0.000210 -0.000870 \n",
"std 1.417138 1.368744 1.328673 1.226018 \n",
"min -5.560118 -42.147898 -23.496714 -43.557242 \n",
"25% -0.848835 -0.692874 -0.769177 -0.554220 \n",
"50% -0.017852 -0.055832 -0.274397 0.039228 \n",
"75% 0.745566 0.609349 0.397928 0.569638 \n",
"max 16.715537 34.099309 23.917837 44.054461 \n",
"\n",
" V8 V9 V10 V11 \\\n",
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
"mean -0.001980 0.000212 0.001357 -0.001039 \n",
"std 1.212338 1.102021 1.092801 1.020027 \n",
"min -73.216718 -13.434066 -24.588262 -4.797473 \n",
"25% -0.209086 -0.644753 -0.535493 -0.762852 \n",
"50% 0.021803 -0.049633 -0.092069 -0.034135 \n",
"75% 0.327023 0.597096 0.458129 0.738143 \n",
"max 20.007208 15.594995 23.745136 12.018913 \n",
"\n",
" V12 V13 V14 V15 \\\n",
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
"mean -0.001565 0.000693 0.000137 0.000322 \n",
"std 0.996526 0.997718 0.956938 0.916143 \n",
"min -17.769143 -5.791881 -19.214325 -4.498945 \n",
"25% -0.407660 -0.648456 -0.425122 -0.583616 \n",
"50% 0.137912 -0.013416 0.051179 0.049289 \n",
"75% 0.617393 0.664148 0.493925 0.649589 \n",
"max 7.848392 4.569009 10.526766 5.825654 \n",
"\n",
" V16 V17 V18 V19 \\\n",
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
"mean 0.000084 0.000292 -0.000134 0.000490 \n",
"std 0.876131 0.852181 0.837556 0.814506 \n",
"min -14.129855 -25.162799 -9.498746 -7.213527 \n",
"25% -0.467945 -0.484055 -0.498850 -0.456800 \n",
"50% 0.067772 -0.065113 -0.003217 0.004422 \n",
"75% 0.523095 0.401034 0.500436 0.460367 \n",
"max 7.059132 9.207059 5.041069 5.572113 \n",
"\n",
" V20 V21 V22 V23 \\\n",
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
"mean 0.000430 -0.000014 -0.000022 -0.000258 \n",
"std 0.770257 0.743450 0.727625 0.629145 \n",
"min -23.646890 -34.830382 -10.933144 -44.807735 \n",
"25% -0.211662 -0.229272 -0.544345 -0.162021 \n",
"50% -0.062889 -0.029045 0.006744 -0.010915 \n",
"75% 0.132834 0.187095 0.531017 0.147503 \n",
"max 39.420904 27.202839 10.503090 22.528412 \n",
"\n",
" V24 V25 V26 V27 \\\n",
"count 199364.000000 199364.000000 199364.000000 199364.000000 \n",
"mean 0.000362 0.000395 -0.000094 -0.000027 \n",
"std 0.605298 0.521175 0.481842 0.401042 \n",
"min -2.822684 -10.295397 -2.534330 -22.565679 \n",
"25% -0.354179 -0.316088 -0.327327 -0.070864 \n",
"50% 0.040974 0.018014 -0.052287 0.001064 \n",
"75% 0.438953 0.350802 0.241082 0.090491 \n",
"max 4.022866 7.519589 3.463246 12.152401 \n",
"\n",
" V28 Amount Class \n",
"count 199364.000000 199364.000000 199364.000000 \n",
"mean 0.000015 0.001271 0.001731 \n",
"std 0.324849 0.983948 0.041563 \n",
"min -11.710896 -0.353229 0.000000 \n",
"25% -0.052907 -0.330640 0.000000 \n",
"50% 0.011119 -0.265271 0.000000 \n",
"75% 0.077989 -0.043058 0.000000 \n",
"max 22.620072 78.235272 1.000000 "
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_train, y_train], axis=1).describe()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Class\n",
"0 199019\n",
"1 345\n",
"Name: count, dtype: int64"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_train, y_train], axis=1)['Class'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Statistical measures of the test dataset of whole data"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 85443 entries, 183484 to 240913\n",
"Data columns (total 31 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Time 85443 non-null float64\n",
" 1 V1 85443 non-null float64\n",
" 2 V2 85443 non-null float64\n",
" 3 V3 85443 non-null float64\n",
" 4 V4 85443 non-null float64\n",
" 5 V5 85443 non-null float64\n",
" 6 V6 85443 non-null float64\n",
" 7 V7 85443 non-null float64\n",
" 8 V8 85443 non-null float64\n",
" 9 V9 85443 non-null float64\n",
" 10 V10 85443 non-null float64\n",
" 11 V11 85443 non-null float64\n",
" 12 V12 85443 non-null float64\n",
" 13 V13 85443 non-null float64\n",
" 14 V14 85443 non-null float64\n",
" 15 V15 85443 non-null float64\n",
" 16 V16 85443 non-null float64\n",
" 17 V17 85443 non-null float64\n",
" 18 V18 85443 non-null float64\n",
" 19 V19 85443 non-null float64\n",
" 20 V20 85443 non-null float64\n",
" 21 V21 85443 non-null float64\n",
" 22 V22 85443 non-null float64\n",
" 23 V23 85443 non-null float64\n",
" 24 V24 85443 non-null float64\n",
" 25 V25 85443 non-null float64\n",
" 26 V26 85443 non-null float64\n",
" 27 V27 85443 non-null float64\n",
" 28 V28 85443 non-null float64\n",
" 29 Amount 85443 non-null float64\n",
" 30 Class 85443 non-null int64 \n",
"dtypes: float64(30), int64(1)\n",
"memory usage: 20.9 MB\n"
]
}
],
"source": [
"pd.concat([X_test, y_test], axis=1).info()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>V1</th>\n",
" <th>V2</th>\n",
" <th>V3</th>\n",
" <th>V4</th>\n",
" <th>V5</th>\n",
" <th>V6</th>\n",
" <th>V7</th>\n",
" <th>V8</th>\n",
" <th>V9</th>\n",
" <th>V10</th>\n",
" <th>V11</th>\n",
" <th>V12</th>\n",
" <th>V13</th>\n",
" <th>V14</th>\n",
" <th>V15</th>\n",
" <th>V16</th>\n",
" <th>V17</th>\n",
" <th>V18</th>\n",
" <th>V19</th>\n",
" <th>V20</th>\n",
" <th>V21</th>\n",
" <th>V22</th>\n",
" <th>V23</th>\n",
" <th>V24</th>\n",
" <th>V25</th>\n",
" <th>V26</th>\n",
" <th>V27</th>\n",
" <th>V28</th>\n",
" <th>Amount</th>\n",
" <th>Class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" <td>85443.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>94847.378896</td>\n",
" <td>-0.000734</td>\n",
" <td>0.006277</td>\n",
" <td>0.003574</td>\n",
" <td>-0.001682</td>\n",
" <td>0.003486</td>\n",
" <td>0.000489</td>\n",
" <td>0.002030</td>\n",
" <td>0.004620</td>\n",
" <td>-0.000495</td>\n",
" <td>-0.003167</td>\n",
" <td>0.002424</td>\n",
" <td>0.003652</td>\n",
" <td>-0.001616</td>\n",
" <td>-0.000319</td>\n",
" <td>-0.000751</td>\n",
" <td>-0.000195</td>\n",
" <td>-0.000682</td>\n",
" <td>0.000312</td>\n",
" <td>-0.001144</td>\n",
" <td>-0.001004</td>\n",
" <td>0.000033</td>\n",
" <td>0.000052</td>\n",
" <td>0.000602</td>\n",
" <td>-0.000845</td>\n",
" <td>-0.000922</td>\n",
" <td>0.000220</td>\n",
" <td>0.000062</td>\n",
" <td>-0.000036</td>\n",
" <td>-0.002966</td>\n",
" <td>0.001720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>47461.120548</td>\n",
" <td>1.947325</td>\n",
" <td>1.637050</td>\n",
" <td>1.515182</td>\n",
" <td>1.412908</td>\n",
" <td>1.406722</td>\n",
" <td>1.340636</td>\n",
" <td>1.262562</td>\n",
" <td>1.151291</td>\n",
" <td>1.090691</td>\n",
" <td>1.079574</td>\n",
" <td>1.022315</td>\n",
" <td>1.005413</td>\n",
" <td>0.989553</td>\n",
" <td>0.962457</td>\n",
" <td>0.913388</td>\n",
" <td>0.876542</td>\n",
" <td>0.842669</td>\n",
" <td>0.839626</td>\n",
" <td>0.812957</td>\n",
" <td>0.772484</td>\n",
" <td>0.713266</td>\n",
" <td>0.721198</td>\n",
" <td>0.613394</td>\n",
" <td>0.606464</td>\n",
" <td>0.521520</td>\n",
" <td>0.483126</td>\n",
" <td>0.409616</td>\n",
" <td>0.341987</td>\n",
" <td>1.036492</td>\n",
" <td>0.041443</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>-56.407510</td>\n",
" <td>-72.715728</td>\n",
" <td>-48.325589</td>\n",
" <td>-5.683171</td>\n",
" <td>-113.743307</td>\n",
" <td>-26.160506</td>\n",
" <td>-28.215112</td>\n",
" <td>-50.943369</td>\n",
" <td>-9.481456</td>\n",
" <td>-20.949192</td>\n",
" <td>-4.568390</td>\n",
" <td>-18.683715</td>\n",
" <td>-3.888606</td>\n",
" <td>-18.493773</td>\n",
" <td>-4.391307</td>\n",
" <td>-13.303888</td>\n",
" <td>-22.883999</td>\n",
" <td>-9.287832</td>\n",
" <td>-6.938297</td>\n",
" <td>-54.497720</td>\n",
" <td>-22.665685</td>\n",
" <td>-9.499423</td>\n",
" <td>-32.828995</td>\n",
" <td>-2.836627</td>\n",
" <td>-8.696627</td>\n",
" <td>-2.604551</td>\n",
" <td>-9.793568</td>\n",
" <td>-15.430084</td>\n",
" <td>-0.353229</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>54354.000000</td>\n",
" <td>-0.916858</td>\n",
" <td>-0.591858</td>\n",
" <td>-0.883828</td>\n",
" <td>-0.848202</td>\n",
" <td>-0.688280</td>\n",
" <td>-0.766664</td>\n",
" <td>-0.553479</td>\n",
" <td>-0.207216</td>\n",
" <td>-0.638926</td>\n",
" <td>-0.535400</td>\n",
" <td>-0.761716</td>\n",
" <td>-0.400087</td>\n",
" <td>-0.648761</td>\n",
" <td>-0.426516</td>\n",
" <td>-0.581015</td>\n",
" <td>-0.468312</td>\n",
" <td>-0.483139</td>\n",
" <td>-0.498660</td>\n",
" <td>-0.455027</td>\n",
" <td>-0.211881</td>\n",
" <td>-0.226184</td>\n",
" <td>-0.537704</td>\n",
" <td>-0.161490</td>\n",
" <td>-0.355671</td>\n",
" <td>-0.319736</td>\n",
" <td>-0.326068</td>\n",
" <td>-0.070797</td>\n",
" <td>-0.053129</td>\n",
" <td>-0.331280</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>84850.000000</td>\n",
" <td>0.013238</td>\n",
" <td>0.070185</td>\n",
" <td>0.185047</td>\n",
" <td>-0.024109</td>\n",
" <td>-0.051627</td>\n",
" <td>-0.273686</td>\n",
" <td>0.042343</td>\n",
" <td>0.023782</td>\n",
" <td>-0.053821</td>\n",
" <td>-0.094949</td>\n",
" <td>-0.029129</td>\n",
" <td>0.144948</td>\n",
" <td>-0.013803</td>\n",
" <td>0.049248</td>\n",
" <td>0.045291</td>\n",
" <td>0.062957</td>\n",
" <td>-0.066955</td>\n",
" <td>-0.004245</td>\n",
" <td>0.002229</td>\n",
" <td>-0.061529</td>\n",
" <td>-0.030687</td>\n",
" <td>0.006971</td>\n",
" <td>-0.011789</td>\n",
" <td>0.040976</td>\n",
" <td>0.013508</td>\n",
" <td>-0.051695</td>\n",
" <td>0.001984</td>\n",
" <td>0.011561</td>\n",
" <td>-0.265271</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>139277.500000</td>\n",
" <td>1.313257</td>\n",
" <td>0.806615</td>\n",
" <td>1.031155</td>\n",
" <td>0.737784</td>\n",
" <td>0.618067</td>\n",
" <td>0.399864</td>\n",
" <td>0.572423</td>\n",
" <td>0.328337</td>\n",
" <td>0.597388</td>\n",
" <td>0.443126</td>\n",
" <td>0.743511</td>\n",
" <td>0.620694</td>\n",
" <td>0.657826</td>\n",
" <td>0.491916</td>\n",
" <td>0.647117</td>\n",
" <td>0.523608</td>\n",
" <td>0.396799</td>\n",
" <td>0.501455</td>\n",
" <td>0.455249</td>\n",
" <td>0.133608</td>\n",
" <td>0.184846</td>\n",
" <td>0.523689</td>\n",
" <td>0.147923</td>\n",
" <td>0.441093</td>\n",
" <td>0.350617</td>\n",
" <td>0.240657</td>\n",
" <td>0.092224</td>\n",
" <td>0.078900</td>\n",
" <td>-0.047356</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>172788.000000</td>\n",
" <td>2.454930</td>\n",
" <td>15.876923</td>\n",
" <td>4.079168</td>\n",
" <td>16.875344</td>\n",
" <td>34.801666</td>\n",
" <td>73.301626</td>\n",
" <td>120.589494</td>\n",
" <td>18.748872</td>\n",
" <td>9.272376</td>\n",
" <td>15.331742</td>\n",
" <td>11.669205</td>\n",
" <td>4.406338</td>\n",
" <td>7.126883</td>\n",
" <td>7.439566</td>\n",
" <td>8.877742</td>\n",
" <td>17.315112</td>\n",
" <td>9.253526</td>\n",
" <td>4.712398</td>\n",
" <td>5.591971</td>\n",
" <td>38.117209</td>\n",
" <td>22.579714</td>\n",
" <td>7.220158</td>\n",
" <td>20.803344</td>\n",
" <td>4.584549</td>\n",
" <td>5.826159</td>\n",
" <td>3.517346</td>\n",
" <td>31.612198</td>\n",
" <td>33.847808</td>\n",
" <td>102.362243</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time V1 V2 V3 V4 \\\n",
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
"mean 94847.378896 -0.000734 0.006277 0.003574 -0.001682 \n",
"std 47461.120548 1.947325 1.637050 1.515182 1.412908 \n",
"min 0.000000 -56.407510 -72.715728 -48.325589 -5.683171 \n",
"25% 54354.000000 -0.916858 -0.591858 -0.883828 -0.848202 \n",
"50% 84850.000000 0.013238 0.070185 0.185047 -0.024109 \n",
"75% 139277.500000 1.313257 0.806615 1.031155 0.737784 \n",
"max 172788.000000 2.454930 15.876923 4.079168 16.875344 \n",
"\n",
" V5 V6 V7 V8 V9 \\\n",
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
"mean 0.003486 0.000489 0.002030 0.004620 -0.000495 \n",
"std 1.406722 1.340636 1.262562 1.151291 1.090691 \n",
"min -113.743307 -26.160506 -28.215112 -50.943369 -9.481456 \n",
"25% -0.688280 -0.766664 -0.553479 -0.207216 -0.638926 \n",
"50% -0.051627 -0.273686 0.042343 0.023782 -0.053821 \n",
"75% 0.618067 0.399864 0.572423 0.328337 0.597388 \n",
"max 34.801666 73.301626 120.589494 18.748872 9.272376 \n",
"\n",
" V10 V11 V12 V13 V14 \\\n",
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
"mean -0.003167 0.002424 0.003652 -0.001616 -0.000319 \n",
"std 1.079574 1.022315 1.005413 0.989553 0.962457 \n",
"min -20.949192 -4.568390 -18.683715 -3.888606 -18.493773 \n",
"25% -0.535400 -0.761716 -0.400087 -0.648761 -0.426516 \n",
"50% -0.094949 -0.029129 0.144948 -0.013803 0.049248 \n",
"75% 0.443126 0.743511 0.620694 0.657826 0.491916 \n",
"max 15.331742 11.669205 4.406338 7.126883 7.439566 \n",
"\n",
" V15 V16 V17 V18 V19 \\\n",
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
"mean -0.000751 -0.000195 -0.000682 0.000312 -0.001144 \n",
"std 0.913388 0.876542 0.842669 0.839626 0.812957 \n",
"min -4.391307 -13.303888 -22.883999 -9.287832 -6.938297 \n",
"25% -0.581015 -0.468312 -0.483139 -0.498660 -0.455027 \n",
"50% 0.045291 0.062957 -0.066955 -0.004245 0.002229 \n",
"75% 0.647117 0.523608 0.396799 0.501455 0.455249 \n",
"max 8.877742 17.315112 9.253526 4.712398 5.591971 \n",
"\n",
" V20 V21 V22 V23 V24 \\\n",
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
"mean -0.001004 0.000033 0.000052 0.000602 -0.000845 \n",
"std 0.772484 0.713266 0.721198 0.613394 0.606464 \n",
"min -54.497720 -22.665685 -9.499423 -32.828995 -2.836627 \n",
"25% -0.211881 -0.226184 -0.537704 -0.161490 -0.355671 \n",
"50% -0.061529 -0.030687 0.006971 -0.011789 0.040976 \n",
"75% 0.133608 0.184846 0.523689 0.147923 0.441093 \n",
"max 38.117209 22.579714 7.220158 20.803344 4.584549 \n",
"\n",
" V25 V26 V27 V28 Amount \\\n",
"count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n",
"mean -0.000922 0.000220 0.000062 -0.000036 -0.002966 \n",
"std 0.521520 0.483126 0.409616 0.341987 1.036492 \n",
"min -8.696627 -2.604551 -9.793568 -15.430084 -0.353229 \n",
"25% -0.319736 -0.326068 -0.070797 -0.053129 -0.331280 \n",
"50% 0.013508 -0.051695 0.001984 0.011561 -0.265271 \n",
"75% 0.350617 0.240657 0.092224 0.078900 -0.047356 \n",
"max 5.826159 3.517346 31.612198 33.847808 102.362243 \n",
"\n",
" Class \n",
"count 85443.000000 \n",
"mean 0.001720 \n",
"std 0.041443 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 0.000000 \n",
"75% 0.000000 \n",
"max 1.000000 "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_test, y_test], axis=1).describe()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Class\n",
"0 85296\n",
"1 147\n",
"Name: count, dtype: int64"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_test, y_test], axis=1)['Class'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Splitting undersampled data into training and test datasets"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersample, y_undersample, test_size = 0.3, random_state = 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Statistical measures of the training dataset of undersampled data"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 688 entries, 6870 to 106127\n",
"Data columns (total 31 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Time 688 non-null float64\n",
" 1 V1 688 non-null float64\n",
" 2 V2 688 non-null float64\n",
" 3 V3 688 non-null float64\n",
" 4 V4 688 non-null float64\n",
" 5 V5 688 non-null float64\n",
" 6 V6 688 non-null float64\n",
" 7 V7 688 non-null float64\n",
" 8 V8 688 non-null float64\n",
" 9 V9 688 non-null float64\n",
" 10 V10 688 non-null float64\n",
" 11 V11 688 non-null float64\n",
" 12 V12 688 non-null float64\n",
" 13 V13 688 non-null float64\n",
" 14 V14 688 non-null float64\n",
" 15 V15 688 non-null float64\n",
" 16 V16 688 non-null float64\n",
" 17 V17 688 non-null float64\n",
" 18 V18 688 non-null float64\n",
" 19 V19 688 non-null float64\n",
" 20 V20 688 non-null float64\n",
" 21 V21 688 non-null float64\n",
" 22 V22 688 non-null float64\n",
" 23 V23 688 non-null float64\n",
" 24 V24 688 non-null float64\n",
" 25 V25 688 non-null float64\n",
" 26 V26 688 non-null float64\n",
" 27 V27 688 non-null float64\n",
" 28 V28 688 non-null float64\n",
" 29 Amount 688 non-null float64\n",
" 30 Class 688 non-null int64 \n",
"dtypes: float64(30), int64(1)\n",
"memory usage: 172.0 KB\n"
]
}
],
"source": [
"pd.concat([X_train_undersample, y_train_undersample], axis=1).info()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>V1</th>\n",
" <th>V2</th>\n",
" <th>V3</th>\n",
" <th>V4</th>\n",
" <th>V5</th>\n",
" <th>V6</th>\n",
" <th>V7</th>\n",
" <th>V8</th>\n",
" <th>V9</th>\n",
" <th>V10</th>\n",
" <th>V11</th>\n",
" <th>V12</th>\n",
" <th>V13</th>\n",
" <th>V14</th>\n",
" <th>V15</th>\n",
" <th>V16</th>\n",
" <th>V17</th>\n",
" <th>V18</th>\n",
" <th>V19</th>\n",
" <th>V20</th>\n",
" <th>V21</th>\n",
" <th>V22</th>\n",
" <th>V23</th>\n",
" <th>V24</th>\n",
" <th>V25</th>\n",
" <th>V26</th>\n",
" <th>V27</th>\n",
" <th>V28</th>\n",
" <th>Amount</th>\n",
" <th>Class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" <td>688.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>88957.739826</td>\n",
" <td>-2.453850</td>\n",
" <td>1.764205</td>\n",
" <td>-3.499349</td>\n",
" <td>2.201040</td>\n",
" <td>-1.614046</td>\n",
" <td>-0.742028</td>\n",
" <td>-2.720380</td>\n",
" <td>0.399640</td>\n",
" <td>-1.253509</td>\n",
" <td>-2.849864</td>\n",
" <td>1.963640</td>\n",
" <td>-3.099043</td>\n",
" <td>-0.029291</td>\n",
" <td>-3.531500</td>\n",
" <td>-0.042676</td>\n",
" <td>-2.076032</td>\n",
" <td>-3.378747</td>\n",
" <td>-1.150437</td>\n",
" <td>0.328887</td>\n",
" <td>0.168520</td>\n",
" <td>0.467831</td>\n",
" <td>-0.034725</td>\n",
" <td>-0.020933</td>\n",
" <td>-0.071299</td>\n",
" <td>0.007821</td>\n",
" <td>0.019710</td>\n",
" <td>0.084212</td>\n",
" <td>0.022576</td>\n",
" <td>0.108357</td>\n",
" <td>0.501453</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>48118.243616</td>\n",
" <td>5.389205</td>\n",
" <td>3.718764</td>\n",
" <td>6.010612</td>\n",
" <td>3.168326</td>\n",
" <td>4.284352</td>\n",
" <td>1.814252</td>\n",
" <td>5.608259</td>\n",
" <td>4.753225</td>\n",
" <td>2.282000</td>\n",
" <td>4.370873</td>\n",
" <td>2.737854</td>\n",
" <td>4.582021</td>\n",
" <td>1.068595</td>\n",
" <td>4.657990</td>\n",
" <td>0.932128</td>\n",
" <td>3.498859</td>\n",
" <td>6.050258</td>\n",
" <td>2.453462</td>\n",
" <td>1.280609</td>\n",
" <td>1.196703</td>\n",
" <td>2.745063</td>\n",
" <td>1.150116</td>\n",
" <td>1.428199</td>\n",
" <td>0.580309</td>\n",
" <td>0.688226</td>\n",
" <td>0.498609</td>\n",
" <td>0.941491</td>\n",
" <td>0.426584</td>\n",
" <td>1.489427</td>\n",
" <td>0.500362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>406.000000</td>\n",
" <td>-30.552380</td>\n",
" <td>-20.984898</td>\n",
" <td>-31.103685</td>\n",
" <td>-3.421874</td>\n",
" <td>-22.105532</td>\n",
" <td>-14.425011</td>\n",
" <td>-37.060311</td>\n",
" <td>-37.353443</td>\n",
" <td>-11.126624</td>\n",
" <td>-23.228255</td>\n",
" <td>-2.279476</td>\n",
" <td>-18.431131</td>\n",
" <td>-3.184202</td>\n",
" <td>-19.214325</td>\n",
" <td>-4.498945</td>\n",
" <td>-13.563273</td>\n",
" <td>-25.162799</td>\n",
" <td>-9.498746</td>\n",
" <td>-3.602657</td>\n",
" <td>-5.244333</td>\n",
" <td>-16.922016</td>\n",
" <td>-8.887017</td>\n",
" <td>-19.254328</td>\n",
" <td>-2.082546</td>\n",
" <td>-4.781606</td>\n",
" <td>-1.407558</td>\n",
" <td>-7.263482</td>\n",
" <td>-1.869290</td>\n",
" <td>-0.353229</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>47336.750000</td>\n",
" <td>-3.004735</td>\n",
" <td>-0.166482</td>\n",
" <td>-5.049001</td>\n",
" <td>-0.148394</td>\n",
" <td>-1.859243</td>\n",
" <td>-1.582428</td>\n",
" <td>-3.103817</td>\n",
" <td>-0.202836</td>\n",
" <td>-2.205996</td>\n",
" <td>-4.758711</td>\n",
" <td>-0.020195</td>\n",
" <td>-5.643631</td>\n",
" <td>-0.763081</td>\n",
" <td>-6.767749</td>\n",
" <td>-0.616305</td>\n",
" <td>-3.612856</td>\n",
" <td>-5.277726</td>\n",
" <td>-1.829426</td>\n",
" <td>-0.447025</td>\n",
" <td>-0.206394</td>\n",
" <td>-0.148616</td>\n",
" <td>-0.542923</td>\n",
" <td>-0.244385</td>\n",
" <td>-0.409463</td>\n",
" <td>-0.331382</td>\n",
" <td>-0.317043</td>\n",
" <td>-0.067511</td>\n",
" <td>-0.057033</td>\n",
" <td>-0.346073</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>79364.500000</td>\n",
" <td>-0.868711</td>\n",
" <td>1.030021</td>\n",
" <td>-1.441205</td>\n",
" <td>1.287384</td>\n",
" <td>-0.448367</td>\n",
" <td>-0.652911</td>\n",
" <td>-0.695771</td>\n",
" <td>0.189872</td>\n",
" <td>-0.678169</td>\n",
" <td>-0.938824</td>\n",
" <td>1.160274</td>\n",
" <td>-0.785118</td>\n",
" <td>-0.019448</td>\n",
" <td>-1.051034</td>\n",
" <td>-0.022344</td>\n",
" <td>-0.632427</td>\n",
" <td>-0.488137</td>\n",
" <td>-0.368900</td>\n",
" <td>0.213812</td>\n",
" <td>0.013168</td>\n",
" <td>0.163742</td>\n",
" <td>-0.017305</td>\n",
" <td>-0.004737</td>\n",
" <td>-0.003431</td>\n",
" <td>0.024723</td>\n",
" <td>-0.029254</td>\n",
" <td>0.042375</td>\n",
" <td>0.031353</td>\n",
" <td>-0.266371</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>135949.500000</td>\n",
" <td>0.949687</td>\n",
" <td>2.873792</td>\n",
" <td>0.311361</td>\n",
" <td>4.184137</td>\n",
" <td>0.439156</td>\n",
" <td>0.078117</td>\n",
" <td>0.222234</td>\n",
" <td>0.943207</td>\n",
" <td>0.189021</td>\n",
" <td>-0.023676</td>\n",
" <td>3.614383</td>\n",
" <td>0.283724</td>\n",
" <td>0.702811</td>\n",
" <td>0.083101</td>\n",
" <td>0.612832</td>\n",
" <td>0.291246</td>\n",
" <td>0.267112</td>\n",
" <td>0.334927</td>\n",
" <td>0.927348</td>\n",
" <td>0.420088</td>\n",
" <td>0.712372</td>\n",
" <td>0.582570</td>\n",
" <td>0.192766</td>\n",
" <td>0.368230</td>\n",
" <td>0.375865</td>\n",
" <td>0.322036</td>\n",
" <td>0.453371</td>\n",
" <td>0.212253</td>\n",
" <td>0.046539</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>172743.000000</td>\n",
" <td>2.308878</td>\n",
" <td>19.167239</td>\n",
" <td>2.927645</td>\n",
" <td>11.927512</td>\n",
" <td>20.277728</td>\n",
" <td>12.128950</td>\n",
" <td>26.237722</td>\n",
" <td>20.007208</td>\n",
" <td>7.168878</td>\n",
" <td>10.423505</td>\n",
" <td>12.018913</td>\n",
" <td>1.948126</td>\n",
" <td>2.899203</td>\n",
" <td>3.442422</td>\n",
" <td>2.310710</td>\n",
" <td>3.139656</td>\n",
" <td>6.739384</td>\n",
" <td>3.790316</td>\n",
" <td>5.228342</td>\n",
" <td>16.436920</td>\n",
" <td>27.202839</td>\n",
" <td>5.774087</td>\n",
" <td>17.606637</td>\n",
" <td>1.102636</td>\n",
" <td>3.410742</td>\n",
" <td>2.745261</td>\n",
" <td>3.052358</td>\n",
" <td>1.471988</td>\n",
" <td>29.799137</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time V1 V2 V3 V4 \\\n",
"count 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
"mean 88957.739826 -2.453850 1.764205 -3.499349 2.201040 \n",
"std 48118.243616 5.389205 3.718764 6.010612 3.168326 \n",
"min 406.000000 -30.552380 -20.984898 -31.103685 -3.421874 \n",
"25% 47336.750000 -3.004735 -0.166482 -5.049001 -0.148394 \n",
"50% 79364.500000 -0.868711 1.030021 -1.441205 1.287384 \n",
"75% 135949.500000 0.949687 2.873792 0.311361 4.184137 \n",
"max 172743.000000 2.308878 19.167239 2.927645 11.927512 \n",
"\n",
" V5 V6 V7 V8 V9 V10 \\\n",
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
"mean -1.614046 -0.742028 -2.720380 0.399640 -1.253509 -2.849864 \n",
"std 4.284352 1.814252 5.608259 4.753225 2.282000 4.370873 \n",
"min -22.105532 -14.425011 -37.060311 -37.353443 -11.126624 -23.228255 \n",
"25% -1.859243 -1.582428 -3.103817 -0.202836 -2.205996 -4.758711 \n",
"50% -0.448367 -0.652911 -0.695771 0.189872 -0.678169 -0.938824 \n",
"75% 0.439156 0.078117 0.222234 0.943207 0.189021 -0.023676 \n",
"max 20.277728 12.128950 26.237722 20.007208 7.168878 10.423505 \n",
"\n",
" V11 V12 V13 V14 V15 V16 \\\n",
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
"mean 1.963640 -3.099043 -0.029291 -3.531500 -0.042676 -2.076032 \n",
"std 2.737854 4.582021 1.068595 4.657990 0.932128 3.498859 \n",
"min -2.279476 -18.431131 -3.184202 -19.214325 -4.498945 -13.563273 \n",
"25% -0.020195 -5.643631 -0.763081 -6.767749 -0.616305 -3.612856 \n",
"50% 1.160274 -0.785118 -0.019448 -1.051034 -0.022344 -0.632427 \n",
"75% 3.614383 0.283724 0.702811 0.083101 0.612832 0.291246 \n",
"max 12.018913 1.948126 2.899203 3.442422 2.310710 3.139656 \n",
"\n",
" V17 V18 V19 V20 V21 V22 \\\n",
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
"mean -3.378747 -1.150437 0.328887 0.168520 0.467831 -0.034725 \n",
"std 6.050258 2.453462 1.280609 1.196703 2.745063 1.150116 \n",
"min -25.162799 -9.498746 -3.602657 -5.244333 -16.922016 -8.887017 \n",
"25% -5.277726 -1.829426 -0.447025 -0.206394 -0.148616 -0.542923 \n",
"50% -0.488137 -0.368900 0.213812 0.013168 0.163742 -0.017305 \n",
"75% 0.267112 0.334927 0.927348 0.420088 0.712372 0.582570 \n",
"max 6.739384 3.790316 5.228342 16.436920 27.202839 5.774087 \n",
"\n",
" V23 V24 V25 V26 V27 V28 \\\n",
"count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n",
"mean -0.020933 -0.071299 0.007821 0.019710 0.084212 0.022576 \n",
"std 1.428199 0.580309 0.688226 0.498609 0.941491 0.426584 \n",
"min -19.254328 -2.082546 -4.781606 -1.407558 -7.263482 -1.869290 \n",
"25% -0.244385 -0.409463 -0.331382 -0.317043 -0.067511 -0.057033 \n",
"50% -0.004737 -0.003431 0.024723 -0.029254 0.042375 0.031353 \n",
"75% 0.192766 0.368230 0.375865 0.322036 0.453371 0.212253 \n",
"max 17.606637 1.102636 3.410742 2.745261 3.052358 1.471988 \n",
"\n",
" Amount Class \n",
"count 688.000000 688.000000 \n",
"mean 0.108357 0.501453 \n",
"std 1.489427 0.500362 \n",
"min -0.353229 0.000000 \n",
"25% -0.346073 0.000000 \n",
"50% -0.266371 1.000000 \n",
"75% 0.046539 1.000000 \n",
"max 29.799137 1.000000 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_train_undersample, y_train_undersample], axis=1).describe()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Class\n",
"1 345\n",
"0 343\n",
"Name: count, dtype: int64"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_train_undersample, y_train_undersample], axis=1)['Class'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Statistical measures of the test dataset of undersampled data"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 296 entries, 102782 to 26982\n",
"Data columns (total 31 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Time 296 non-null float64\n",
" 1 V1 296 non-null float64\n",
" 2 V2 296 non-null float64\n",
" 3 V3 296 non-null float64\n",
" 4 V4 296 non-null float64\n",
" 5 V5 296 non-null float64\n",
" 6 V6 296 non-null float64\n",
" 7 V7 296 non-null float64\n",
" 8 V8 296 non-null float64\n",
" 9 V9 296 non-null float64\n",
" 10 V10 296 non-null float64\n",
" 11 V11 296 non-null float64\n",
" 12 V12 296 non-null float64\n",
" 13 V13 296 non-null float64\n",
" 14 V14 296 non-null float64\n",
" 15 V15 296 non-null float64\n",
" 16 V16 296 non-null float64\n",
" 17 V17 296 non-null float64\n",
" 18 V18 296 non-null float64\n",
" 19 V19 296 non-null float64\n",
" 20 V20 296 non-null float64\n",
" 21 V21 296 non-null float64\n",
" 22 V22 296 non-null float64\n",
" 23 V23 296 non-null float64\n",
" 24 V24 296 non-null float64\n",
" 25 V25 296 non-null float64\n",
" 26 V26 296 non-null float64\n",
" 27 V27 296 non-null float64\n",
" 28 V28 296 non-null float64\n",
" 29 Amount 296 non-null float64\n",
" 30 Class 296 non-null int64 \n",
"dtypes: float64(30), int64(1)\n",
"memory usage: 74.0 KB\n"
]
}
],
"source": [
"pd.concat([X_test_undersample, y_test_undersample], axis=1).info()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Time</th>\n",
" <th>V1</th>\n",
" <th>V2</th>\n",
" <th>V3</th>\n",
" <th>V4</th>\n",
" <th>V5</th>\n",
" <th>V6</th>\n",
" <th>V7</th>\n",
" <th>V8</th>\n",
" <th>V9</th>\n",
" <th>V10</th>\n",
" <th>V11</th>\n",
" <th>V12</th>\n",
" <th>V13</th>\n",
" <th>V14</th>\n",
" <th>V15</th>\n",
" <th>V16</th>\n",
" <th>V17</th>\n",
" <th>V18</th>\n",
" <th>V19</th>\n",
" <th>V20</th>\n",
" <th>V21</th>\n",
" <th>V22</th>\n",
" <th>V23</th>\n",
" <th>V24</th>\n",
" <th>V25</th>\n",
" <th>V26</th>\n",
" <th>V27</th>\n",
" <th>V28</th>\n",
" <th>Amount</th>\n",
" <th>Class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" <td>296.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>88355.706081</td>\n",
" <td>-2.294234</td>\n",
" <td>1.948429</td>\n",
" <td>-3.540990</td>\n",
" <td>2.373844</td>\n",
" <td>-1.561363</td>\n",
" <td>-0.613913</td>\n",
" <td>-2.891430</td>\n",
" <td>0.035986</td>\n",
" <td>-1.314160</td>\n",
" <td>-2.904830</td>\n",
" <td>1.921330</td>\n",
" <td>-3.055022</td>\n",
" <td>-0.058815</td>\n",
" <td>-3.362305</td>\n",
" <td>-0.057163</td>\n",
" <td>-1.983480</td>\n",
" <td>-3.185857</td>\n",
" <td>-1.053673</td>\n",
" <td>0.359781</td>\n",
" <td>0.252860</td>\n",
" <td>0.121940</td>\n",
" <td>0.089200</td>\n",
" <td>0.067653</td>\n",
" <td>-0.060339</td>\n",
" <td>0.005171</td>\n",
" <td>0.015917</td>\n",
" <td>0.013339</td>\n",
" <td>0.039887</td>\n",
" <td>0.005012</td>\n",
" <td>0.496622</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>48614.011043</td>\n",
" <td>5.839514</td>\n",
" <td>3.835604</td>\n",
" <td>6.678324</td>\n",
" <td>3.237348</td>\n",
" <td>4.290418</td>\n",
" <td>1.723870</td>\n",
" <td>6.590923</td>\n",
" <td>5.157315</td>\n",
" <td>2.414740</td>\n",
" <td>4.875752</td>\n",
" <td>2.741364</td>\n",
" <td>4.715112</td>\n",
" <td>0.996157</td>\n",
" <td>4.660005</td>\n",
" <td>1.015019</td>\n",
" <td>3.488374</td>\n",
" <td>5.804268</td>\n",
" <td>2.309940</td>\n",
" <td>1.265303</td>\n",
" <td>1.199174</td>\n",
" <td>2.917828</td>\n",
" <td>1.217111</td>\n",
" <td>0.871280</td>\n",
" <td>0.553352</td>\n",
" <td>0.632284</td>\n",
" <td>0.463252</td>\n",
" <td>1.192693</td>\n",
" <td>0.424138</td>\n",
" <td>0.808447</td>\n",
" <td>0.500835</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>472.000000</td>\n",
" <td>-29.876366</td>\n",
" <td>-8.402154</td>\n",
" <td>-30.558697</td>\n",
" <td>-3.085355</td>\n",
" <td>-21.665654</td>\n",
" <td>-5.773192</td>\n",
" <td>-43.557242</td>\n",
" <td>-41.044261</td>\n",
" <td>-13.434066</td>\n",
" <td>-24.588262</td>\n",
" <td>-1.937109</td>\n",
" <td>-18.683715</td>\n",
" <td>-3.076318</td>\n",
" <td>-17.620634</td>\n",
" <td>-2.992430</td>\n",
" <td>-14.129855</td>\n",
" <td>-22.541652</td>\n",
" <td>-9.090892</td>\n",
" <td>-3.681904</td>\n",
" <td>-3.493050</td>\n",
" <td>-22.797604</td>\n",
" <td>-8.887017</td>\n",
" <td>-5.988806</td>\n",
" <td>-1.690377</td>\n",
" <td>-2.079928</td>\n",
" <td>-1.104535</td>\n",
" <td>-7.263482</td>\n",
" <td>-1.429517</td>\n",
" <td>-0.353229</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>48520.000000</td>\n",
" <td>-2.736113</td>\n",
" <td>-0.105197</td>\n",
" <td>-5.417818</td>\n",
" <td>0.057290</td>\n",
" <td>-1.559190</td>\n",
" <td>-1.431466</td>\n",
" <td>-2.835885</td>\n",
" <td>-0.147139</td>\n",
" <td>-2.345829</td>\n",
" <td>-4.445615</td>\n",
" <td>-0.072236</td>\n",
" <td>-5.340188</td>\n",
" <td>-0.752422</td>\n",
" <td>-6.363108</td>\n",
" <td>-0.694497</td>\n",
" <td>-3.304713</td>\n",
" <td>-5.358990</td>\n",
" <td>-1.719376</td>\n",
" <td>-0.397503</td>\n",
" <td>-0.188506</td>\n",
" <td>-0.166957</td>\n",
" <td>-0.491502</td>\n",
" <td>-0.202399</td>\n",
" <td>-0.407468</td>\n",
" <td>-0.330734</td>\n",
" <td>-0.315317</td>\n",
" <td>-0.054283</td>\n",
" <td>-0.047520</td>\n",
" <td>-0.348692</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>83038.000000</td>\n",
" <td>-0.679669</td>\n",
" <td>0.947084</td>\n",
" <td>-1.255930</td>\n",
" <td>1.295388</td>\n",
" <td>-0.355709</td>\n",
" <td>-0.594254</td>\n",
" <td>-0.568848</td>\n",
" <td>0.146018</td>\n",
" <td>-0.661503</td>\n",
" <td>-0.718549</td>\n",
" <td>1.218066</td>\n",
" <td>-0.649834</td>\n",
" <td>0.012057</td>\n",
" <td>-0.869158</td>\n",
" <td>0.006640</td>\n",
" <td>-0.575661</td>\n",
" <td>-0.423941</td>\n",
" <td>-0.203209</td>\n",
" <td>0.241634</td>\n",
" <td>0.030365</td>\n",
" <td>0.085900</td>\n",
" <td>0.015825</td>\n",
" <td>-0.034589</td>\n",
" <td>0.007165</td>\n",
" <td>0.059396</td>\n",
" <td>-0.011840</td>\n",
" <td>0.061374</td>\n",
" <td>0.033726</td>\n",
" <td>-0.303633</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>130068.500000</td>\n",
" <td>1.084018</td>\n",
" <td>2.721612</td>\n",
" <td>0.469584</td>\n",
" <td>4.318982</td>\n",
" <td>0.448566</td>\n",
" <td>0.147038</td>\n",
" <td>0.296190</td>\n",
" <td>0.759335</td>\n",
" <td>0.224591</td>\n",
" <td>-0.030622</td>\n",
" <td>3.542336</td>\n",
" <td>0.288974</td>\n",
" <td>0.575899</td>\n",
" <td>0.074100</td>\n",
" <td>0.565682</td>\n",
" <td>0.346687</td>\n",
" <td>0.274371</td>\n",
" <td>0.316774</td>\n",
" <td>1.010884</td>\n",
" <td>0.363387</td>\n",
" <td>0.577355</td>\n",
" <td>0.571807</td>\n",
" <td>0.207959</td>\n",
" <td>0.381890</td>\n",
" <td>0.356160</td>\n",
" <td>0.354294</td>\n",
" <td>0.391969</td>\n",
" <td>0.175744</td>\n",
" <td>0.022531</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>171578.000000</td>\n",
" <td>2.255274</td>\n",
" <td>22.057729</td>\n",
" <td>3.940337</td>\n",
" <td>12.114672</td>\n",
" <td>9.880564</td>\n",
" <td>6.474115</td>\n",
" <td>3.791907</td>\n",
" <td>19.587773</td>\n",
" <td>2.690959</td>\n",
" <td>3.245086</td>\n",
" <td>11.152491</td>\n",
" <td>1.879038</td>\n",
" <td>3.685570</td>\n",
" <td>2.704376</td>\n",
" <td>2.471358</td>\n",
" <td>2.594266</td>\n",
" <td>6.443649</td>\n",
" <td>2.591846</td>\n",
" <td>4.851255</td>\n",
" <td>11.059004</td>\n",
" <td>27.202839</td>\n",
" <td>8.361985</td>\n",
" <td>5.466230</td>\n",
" <td>1.099509</td>\n",
" <td>2.156042</td>\n",
" <td>1.207731</td>\n",
" <td>2.706566</td>\n",
" <td>1.779364</td>\n",
" <td>5.663610</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Time V1 V2 V3 V4 \\\n",
"count 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
"mean 88355.706081 -2.294234 1.948429 -3.540990 2.373844 \n",
"std 48614.011043 5.839514 3.835604 6.678324 3.237348 \n",
"min 472.000000 -29.876366 -8.402154 -30.558697 -3.085355 \n",
"25% 48520.000000 -2.736113 -0.105197 -5.417818 0.057290 \n",
"50% 83038.000000 -0.679669 0.947084 -1.255930 1.295388 \n",
"75% 130068.500000 1.084018 2.721612 0.469584 4.318982 \n",
"max 171578.000000 2.255274 22.057729 3.940337 12.114672 \n",
"\n",
" V5 V6 V7 V8 V9 V10 \\\n",
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
"mean -1.561363 -0.613913 -2.891430 0.035986 -1.314160 -2.904830 \n",
"std 4.290418 1.723870 6.590923 5.157315 2.414740 4.875752 \n",
"min -21.665654 -5.773192 -43.557242 -41.044261 -13.434066 -24.588262 \n",
"25% -1.559190 -1.431466 -2.835885 -0.147139 -2.345829 -4.445615 \n",
"50% -0.355709 -0.594254 -0.568848 0.146018 -0.661503 -0.718549 \n",
"75% 0.448566 0.147038 0.296190 0.759335 0.224591 -0.030622 \n",
"max 9.880564 6.474115 3.791907 19.587773 2.690959 3.245086 \n",
"\n",
" V11 V12 V13 V14 V15 V16 \\\n",
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
"mean 1.921330 -3.055022 -0.058815 -3.362305 -0.057163 -1.983480 \n",
"std 2.741364 4.715112 0.996157 4.660005 1.015019 3.488374 \n",
"min -1.937109 -18.683715 -3.076318 -17.620634 -2.992430 -14.129855 \n",
"25% -0.072236 -5.340188 -0.752422 -6.363108 -0.694497 -3.304713 \n",
"50% 1.218066 -0.649834 0.012057 -0.869158 0.006640 -0.575661 \n",
"75% 3.542336 0.288974 0.575899 0.074100 0.565682 0.346687 \n",
"max 11.152491 1.879038 3.685570 2.704376 2.471358 2.594266 \n",
"\n",
" V17 V18 V19 V20 V21 V22 \\\n",
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
"mean -3.185857 -1.053673 0.359781 0.252860 0.121940 0.089200 \n",
"std 5.804268 2.309940 1.265303 1.199174 2.917828 1.217111 \n",
"min -22.541652 -9.090892 -3.681904 -3.493050 -22.797604 -8.887017 \n",
"25% -5.358990 -1.719376 -0.397503 -0.188506 -0.166957 -0.491502 \n",
"50% -0.423941 -0.203209 0.241634 0.030365 0.085900 0.015825 \n",
"75% 0.274371 0.316774 1.010884 0.363387 0.577355 0.571807 \n",
"max 6.443649 2.591846 4.851255 11.059004 27.202839 8.361985 \n",
"\n",
" V23 V24 V25 V26 V27 V28 \\\n",
"count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n",
"mean 0.067653 -0.060339 0.005171 0.015917 0.013339 0.039887 \n",
"std 0.871280 0.553352 0.632284 0.463252 1.192693 0.424138 \n",
"min -5.988806 -1.690377 -2.079928 -1.104535 -7.263482 -1.429517 \n",
"25% -0.202399 -0.407468 -0.330734 -0.315317 -0.054283 -0.047520 \n",
"50% -0.034589 0.007165 0.059396 -0.011840 0.061374 0.033726 \n",
"75% 0.207959 0.381890 0.356160 0.354294 0.391969 0.175744 \n",
"max 5.466230 1.099509 2.156042 1.207731 2.706566 1.779364 \n",
"\n",
" Amount Class \n",
"count 296.000000 296.000000 \n",
"mean 0.005012 0.496622 \n",
"std 0.808447 0.500835 \n",
"min -0.353229 0.000000 \n",
"25% -0.348692 0.000000 \n",
"50% -0.303633 0.000000 \n",
"75% 0.022531 1.000000 \n",
"max 5.663610 1.000000 "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_test_undersample, y_test_undersample], axis=1).describe()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Class\n",
"0 149\n",
"1 147\n",
"Name: count, dtype: int64"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([X_test_undersample, y_test_undersample], axis=1)['Class'].value_counts()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}