2024-03-16 14:35:44 +01:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"## IUM 2"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Installation of packages"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 86,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"Requirement already satisfied: kaggle in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.6.6)\n",
|
|
|
|
"Requirement already satisfied: six>=1.10 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (1.16.0)\n",
|
|
|
|
"Requirement already satisfied: certifi in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2024.2.2)\n",
|
|
|
|
"Requirement already satisfied: python-dateutil in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (2.9.0.post0)\n",
|
|
|
|
"Requirement already satisfied: requests in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.31.0)\n",
|
|
|
|
"Requirement already satisfied: tqdm in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (4.66.2)\n",
|
|
|
|
"Requirement already satisfied: python-slugify in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (8.0.4)\n",
|
|
|
|
"Requirement already satisfied: urllib3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.2.1)\n",
|
|
|
|
"Requirement already satisfied: bleach in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (6.1.0)\n",
|
|
|
|
"Requirement already satisfied: webencodings in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n",
|
|
|
|
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
|
|
|
|
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.3.2)\n",
|
|
|
|
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.6)\n",
|
|
|
|
"Requirement already satisfied: colorama in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from tqdm->kaggle) (0.4.6)\n",
|
|
|
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
|
|
"Requirement already satisfied: pandas in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.2.1)\n",
|
|
|
|
"Requirement already satisfied: numpy<2,>=1.26.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (1.26.3)\n",
|
|
|
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from pandas) (2.9.0.post0)\n",
|
|
|
|
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
|
|
|
|
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
|
|
|
|
"Requirement already satisfied: six>=1.5 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
|
|
|
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
|
|
"Requirement already satisfied: numpy in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.26.3)\n",
|
|
|
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
|
|
"Requirement already satisfied: scikit-learn in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.4.1.post1)\n",
|
|
|
|
"Requirement already satisfied: numpy<2.0,>=1.19.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.26.3)\n",
|
|
|
|
"Requirement already satisfied: scipy>=1.6.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.12.0)\n",
|
|
|
|
"Requirement already satisfied: joblib>=1.2.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.3.2)\n",
|
|
|
|
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (3.3.0)\n",
|
|
|
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"%pip install kaggle\n",
|
|
|
|
"%pip install pandas\n",
|
|
|
|
"%pip install numpy\n",
|
|
|
|
"%pip install scikit-learn"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Importing libraries"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 87,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import numpy as np\n",
|
|
|
|
"\n",
|
|
|
|
"# To preprocess the data\n",
|
|
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
|
|
"\n",
|
|
|
|
"# To split the data\n",
|
|
|
|
"from sklearn.model_selection import train_test_split"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Downloading a dataset"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 88,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"creditcardfraud.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"!kaggle datasets download -d mlg-ulb/creditcardfraud"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Uncompress a file"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 89,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"Archive: creditcardfraud.zip\n",
|
|
|
|
" inflating: creditcard.csv \n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"!unzip -o creditcardfraud.zip"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Load the data"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 90,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"df = pd.read_csv('creditcard.csv')\n",
|
|
|
|
"pd.set_option('display.max_columns', None)"
|
|
|
|
]
|
|
|
|
},
|
2024-03-16 14:45:16 +01:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Check missing values"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 91,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Time 0\n",
|
|
|
|
"V1 0\n",
|
|
|
|
"V2 0\n",
|
|
|
|
"V3 0\n",
|
|
|
|
"V4 0\n",
|
|
|
|
"V5 0\n",
|
|
|
|
"V6 0\n",
|
|
|
|
"V7 0\n",
|
|
|
|
"V8 0\n",
|
|
|
|
"V9 0\n",
|
|
|
|
"V10 0\n",
|
|
|
|
"V11 0\n",
|
|
|
|
"V12 0\n",
|
|
|
|
"V13 0\n",
|
|
|
|
"V14 0\n",
|
|
|
|
"V15 0\n",
|
|
|
|
"V16 0\n",
|
|
|
|
"V17 0\n",
|
|
|
|
"V18 0\n",
|
|
|
|
"V19 0\n",
|
|
|
|
"V20 0\n",
|
|
|
|
"V21 0\n",
|
|
|
|
"V22 0\n",
|
|
|
|
"V23 0\n",
|
|
|
|
"V24 0\n",
|
|
|
|
"V25 0\n",
|
|
|
|
"V26 0\n",
|
|
|
|
"V27 0\n",
|
|
|
|
"V28 0\n",
|
|
|
|
"Amount 0\n",
|
|
|
|
"Class 0\n",
|
|
|
|
"dtype: int64"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 91,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"df.isnull().sum()"
|
|
|
|
]
|
|
|
|
},
|
2024-03-16 14:35:44 +01:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Size of the dataset"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 92,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
"RangeIndex: 284807 entries, 0 to 284806\n",
|
|
|
|
"Data columns (total 31 columns):\n",
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
" 0 Time 284807 non-null float64\n",
|
|
|
|
" 1 V1 284807 non-null float64\n",
|
|
|
|
" 2 V2 284807 non-null float64\n",
|
|
|
|
" 3 V3 284807 non-null float64\n",
|
|
|
|
" 4 V4 284807 non-null float64\n",
|
|
|
|
" 5 V5 284807 non-null float64\n",
|
|
|
|
" 6 V6 284807 non-null float64\n",
|
|
|
|
" 7 V7 284807 non-null float64\n",
|
|
|
|
" 8 V8 284807 non-null float64\n",
|
|
|
|
" 9 V9 284807 non-null float64\n",
|
|
|
|
" 10 V10 284807 non-null float64\n",
|
|
|
|
" 11 V11 284807 non-null float64\n",
|
|
|
|
" 12 V12 284807 non-null float64\n",
|
|
|
|
" 13 V13 284807 non-null float64\n",
|
|
|
|
" 14 V14 284807 non-null float64\n",
|
|
|
|
" 15 V15 284807 non-null float64\n",
|
|
|
|
" 16 V16 284807 non-null float64\n",
|
|
|
|
" 17 V17 284807 non-null float64\n",
|
|
|
|
" 18 V18 284807 non-null float64\n",
|
|
|
|
" 19 V19 284807 non-null float64\n",
|
|
|
|
" 20 V20 284807 non-null float64\n",
|
|
|
|
" 21 V21 284807 non-null float64\n",
|
|
|
|
" 22 V22 284807 non-null float64\n",
|
|
|
|
" 23 V23 284807 non-null float64\n",
|
|
|
|
" 24 V24 284807 non-null float64\n",
|
|
|
|
" 25 V25 284807 non-null float64\n",
|
|
|
|
" 26 V26 284807 non-null float64\n",
|
|
|
|
" 27 V27 284807 non-null float64\n",
|
|
|
|
" 28 V28 284807 non-null float64\n",
|
|
|
|
" 29 Amount 284807 non-null float64\n",
|
|
|
|
" 30 Class 284807 non-null int64 \n",
|
|
|
|
"dtypes: float64(30), int64(1)\n",
|
|
|
|
"memory usage: 67.4 MB\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"df.info()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Normalising the data"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 93,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"scaler = StandardScaler()\n",
|
|
|
|
"\n",
|
|
|
|
"df['Amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Summary statistics"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 94,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>Time</th>\n",
|
|
|
|
" <th>V1</th>\n",
|
|
|
|
" <th>V2</th>\n",
|
|
|
|
" <th>V3</th>\n",
|
|
|
|
" <th>V4</th>\n",
|
|
|
|
" <th>V5</th>\n",
|
|
|
|
" <th>V6</th>\n",
|
|
|
|
" <th>V7</th>\n",
|
|
|
|
" <th>V8</th>\n",
|
|
|
|
" <th>V9</th>\n",
|
|
|
|
" <th>V10</th>\n",
|
|
|
|
" <th>V11</th>\n",
|
|
|
|
" <th>V12</th>\n",
|
|
|
|
" <th>V13</th>\n",
|
|
|
|
" <th>V14</th>\n",
|
|
|
|
" <th>V15</th>\n",
|
|
|
|
" <th>V16</th>\n",
|
|
|
|
" <th>V17</th>\n",
|
|
|
|
" <th>V18</th>\n",
|
|
|
|
" <th>V19</th>\n",
|
|
|
|
" <th>V20</th>\n",
|
|
|
|
" <th>V21</th>\n",
|
|
|
|
" <th>V22</th>\n",
|
|
|
|
" <th>V23</th>\n",
|
|
|
|
" <th>V24</th>\n",
|
|
|
|
" <th>V25</th>\n",
|
|
|
|
" <th>V26</th>\n",
|
|
|
|
" <th>V27</th>\n",
|
|
|
|
" <th>V28</th>\n",
|
|
|
|
" <th>Amount</th>\n",
|
|
|
|
" <th>Class</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>count</th>\n",
|
|
|
|
" <td>284807.000000</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>2.848070e+05</td>\n",
|
|
|
|
" <td>284807.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>mean</th>\n",
|
|
|
|
" <td>94813.859575</td>\n",
|
|
|
|
" <td>1.168375e-15</td>\n",
|
|
|
|
" <td>3.416908e-16</td>\n",
|
|
|
|
" <td>-1.379537e-15</td>\n",
|
|
|
|
" <td>2.074095e-15</td>\n",
|
|
|
|
" <td>9.604066e-16</td>\n",
|
|
|
|
" <td>1.487313e-15</td>\n",
|
|
|
|
" <td>-5.556467e-16</td>\n",
|
|
|
|
" <td>1.213481e-16</td>\n",
|
|
|
|
" <td>-2.406331e-15</td>\n",
|
|
|
|
" <td>2.239053e-15</td>\n",
|
|
|
|
" <td>1.673327e-15</td>\n",
|
|
|
|
" <td>-1.247012e-15</td>\n",
|
|
|
|
" <td>8.190001e-16</td>\n",
|
|
|
|
" <td>1.207294e-15</td>\n",
|
|
|
|
" <td>4.887456e-15</td>\n",
|
|
|
|
" <td>1.437716e-15</td>\n",
|
|
|
|
" <td>-3.772171e-16</td>\n",
|
|
|
|
" <td>9.564149e-16</td>\n",
|
|
|
|
" <td>1.039917e-15</td>\n",
|
|
|
|
" <td>6.406204e-16</td>\n",
|
|
|
|
" <td>1.654067e-16</td>\n",
|
|
|
|
" <td>-3.568593e-16</td>\n",
|
|
|
|
" <td>2.578648e-16</td>\n",
|
|
|
|
" <td>4.473266e-15</td>\n",
|
|
|
|
" <td>5.340915e-16</td>\n",
|
|
|
|
" <td>1.683437e-15</td>\n",
|
|
|
|
" <td>-3.660091e-16</td>\n",
|
|
|
|
" <td>-1.227390e-16</td>\n",
|
|
|
|
" <td>2.913952e-17</td>\n",
|
|
|
|
" <td>0.001727</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>std</th>\n",
|
|
|
|
" <td>47488.145955</td>\n",
|
|
|
|
" <td>1.958696e+00</td>\n",
|
|
|
|
" <td>1.651309e+00</td>\n",
|
|
|
|
" <td>1.516255e+00</td>\n",
|
|
|
|
" <td>1.415869e+00</td>\n",
|
|
|
|
" <td>1.380247e+00</td>\n",
|
|
|
|
" <td>1.332271e+00</td>\n",
|
|
|
|
" <td>1.237094e+00</td>\n",
|
|
|
|
" <td>1.194353e+00</td>\n",
|
|
|
|
" <td>1.098632e+00</td>\n",
|
|
|
|
" <td>1.088850e+00</td>\n",
|
|
|
|
" <td>1.020713e+00</td>\n",
|
|
|
|
" <td>9.992014e-01</td>\n",
|
|
|
|
" <td>9.952742e-01</td>\n",
|
|
|
|
" <td>9.585956e-01</td>\n",
|
|
|
|
" <td>9.153160e-01</td>\n",
|
|
|
|
" <td>8.762529e-01</td>\n",
|
|
|
|
" <td>8.493371e-01</td>\n",
|
|
|
|
" <td>8.381762e-01</td>\n",
|
|
|
|
" <td>8.140405e-01</td>\n",
|
|
|
|
" <td>7.709250e-01</td>\n",
|
|
|
|
" <td>7.345240e-01</td>\n",
|
|
|
|
" <td>7.257016e-01</td>\n",
|
|
|
|
" <td>6.244603e-01</td>\n",
|
|
|
|
" <td>6.056471e-01</td>\n",
|
|
|
|
" <td>5.212781e-01</td>\n",
|
|
|
|
" <td>4.822270e-01</td>\n",
|
|
|
|
" <td>4.036325e-01</td>\n",
|
|
|
|
" <td>3.300833e-01</td>\n",
|
|
|
|
" <td>1.000002e+00</td>\n",
|
|
|
|
" <td>0.041527</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>min</th>\n",
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
" <td>-5.640751e+01</td>\n",
|
|
|
|
" <td>-7.271573e+01</td>\n",
|
|
|
|
" <td>-4.832559e+01</td>\n",
|
|
|
|
" <td>-5.683171e+00</td>\n",
|
|
|
|
" <td>-1.137433e+02</td>\n",
|
|
|
|
" <td>-2.616051e+01</td>\n",
|
|
|
|
" <td>-4.355724e+01</td>\n",
|
|
|
|
" <td>-7.321672e+01</td>\n",
|
|
|
|
" <td>-1.343407e+01</td>\n",
|
|
|
|
" <td>-2.458826e+01</td>\n",
|
|
|
|
" <td>-4.797473e+00</td>\n",
|
|
|
|
" <td>-1.868371e+01</td>\n",
|
|
|
|
" <td>-5.791881e+00</td>\n",
|
|
|
|
" <td>-1.921433e+01</td>\n",
|
|
|
|
" <td>-4.498945e+00</td>\n",
|
|
|
|
" <td>-1.412985e+01</td>\n",
|
|
|
|
" <td>-2.516280e+01</td>\n",
|
|
|
|
" <td>-9.498746e+00</td>\n",
|
|
|
|
" <td>-7.213527e+00</td>\n",
|
|
|
|
" <td>-5.449772e+01</td>\n",
|
|
|
|
" <td>-3.483038e+01</td>\n",
|
|
|
|
" <td>-1.093314e+01</td>\n",
|
|
|
|
" <td>-4.480774e+01</td>\n",
|
|
|
|
" <td>-2.836627e+00</td>\n",
|
|
|
|
" <td>-1.029540e+01</td>\n",
|
|
|
|
" <td>-2.604551e+00</td>\n",
|
|
|
|
" <td>-2.256568e+01</td>\n",
|
|
|
|
" <td>-1.543008e+01</td>\n",
|
|
|
|
" <td>-3.532294e-01</td>\n",
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>25%</th>\n",
|
|
|
|
" <td>54201.500000</td>\n",
|
|
|
|
" <td>-9.203734e-01</td>\n",
|
|
|
|
" <td>-5.985499e-01</td>\n",
|
|
|
|
" <td>-8.903648e-01</td>\n",
|
|
|
|
" <td>-8.486401e-01</td>\n",
|
|
|
|
" <td>-6.915971e-01</td>\n",
|
|
|
|
" <td>-7.682956e-01</td>\n",
|
|
|
|
" <td>-5.540759e-01</td>\n",
|
|
|
|
" <td>-2.086297e-01</td>\n",
|
|
|
|
" <td>-6.430976e-01</td>\n",
|
|
|
|
" <td>-5.354257e-01</td>\n",
|
|
|
|
" <td>-7.624942e-01</td>\n",
|
|
|
|
" <td>-4.055715e-01</td>\n",
|
|
|
|
" <td>-6.485393e-01</td>\n",
|
|
|
|
" <td>-4.255740e-01</td>\n",
|
|
|
|
" <td>-5.828843e-01</td>\n",
|
|
|
|
" <td>-4.680368e-01</td>\n",
|
|
|
|
" <td>-4.837483e-01</td>\n",
|
|
|
|
" <td>-4.988498e-01</td>\n",
|
|
|
|
" <td>-4.562989e-01</td>\n",
|
|
|
|
" <td>-2.117214e-01</td>\n",
|
|
|
|
" <td>-2.283949e-01</td>\n",
|
|
|
|
" <td>-5.423504e-01</td>\n",
|
|
|
|
" <td>-1.618463e-01</td>\n",
|
|
|
|
" <td>-3.545861e-01</td>\n",
|
|
|
|
" <td>-3.171451e-01</td>\n",
|
|
|
|
" <td>-3.269839e-01</td>\n",
|
|
|
|
" <td>-7.083953e-02</td>\n",
|
|
|
|
" <td>-5.295979e-02</td>\n",
|
|
|
|
" <td>-3.308401e-01</td>\n",
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>50%</th>\n",
|
|
|
|
" <td>84692.000000</td>\n",
|
|
|
|
" <td>1.810880e-02</td>\n",
|
|
|
|
" <td>6.548556e-02</td>\n",
|
|
|
|
" <td>1.798463e-01</td>\n",
|
|
|
|
" <td>-1.984653e-02</td>\n",
|
|
|
|
" <td>-5.433583e-02</td>\n",
|
|
|
|
" <td>-2.741871e-01</td>\n",
|
|
|
|
" <td>4.010308e-02</td>\n",
|
|
|
|
" <td>2.235804e-02</td>\n",
|
|
|
|
" <td>-5.142873e-02</td>\n",
|
|
|
|
" <td>-9.291738e-02</td>\n",
|
|
|
|
" <td>-3.275735e-02</td>\n",
|
|
|
|
" <td>1.400326e-01</td>\n",
|
|
|
|
" <td>-1.356806e-02</td>\n",
|
|
|
|
" <td>5.060132e-02</td>\n",
|
|
|
|
" <td>4.807155e-02</td>\n",
|
|
|
|
" <td>6.641332e-02</td>\n",
|
|
|
|
" <td>-6.567575e-02</td>\n",
|
|
|
|
" <td>-3.636312e-03</td>\n",
|
|
|
|
" <td>3.734823e-03</td>\n",
|
|
|
|
" <td>-6.248109e-02</td>\n",
|
|
|
|
" <td>-2.945017e-02</td>\n",
|
|
|
|
" <td>6.781943e-03</td>\n",
|
|
|
|
" <td>-1.119293e-02</td>\n",
|
|
|
|
" <td>4.097606e-02</td>\n",
|
|
|
|
" <td>1.659350e-02</td>\n",
|
|
|
|
" <td>-5.213911e-02</td>\n",
|
|
|
|
" <td>1.342146e-03</td>\n",
|
|
|
|
" <td>1.124383e-02</td>\n",
|
|
|
|
" <td>-2.652715e-01</td>\n",
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>75%</th>\n",
|
|
|
|
" <td>139320.500000</td>\n",
|
|
|
|
" <td>1.315642e+00</td>\n",
|
|
|
|
" <td>8.037239e-01</td>\n",
|
|
|
|
" <td>1.027196e+00</td>\n",
|
|
|
|
" <td>7.433413e-01</td>\n",
|
|
|
|
" <td>6.119264e-01</td>\n",
|
|
|
|
" <td>3.985649e-01</td>\n",
|
|
|
|
" <td>5.704361e-01</td>\n",
|
|
|
|
" <td>3.273459e-01</td>\n",
|
|
|
|
" <td>5.971390e-01</td>\n",
|
|
|
|
" <td>4.539234e-01</td>\n",
|
|
|
|
" <td>7.395934e-01</td>\n",
|
|
|
|
" <td>6.182380e-01</td>\n",
|
|
|
|
" <td>6.625050e-01</td>\n",
|
|
|
|
" <td>4.931498e-01</td>\n",
|
|
|
|
" <td>6.488208e-01</td>\n",
|
|
|
|
" <td>5.232963e-01</td>\n",
|
|
|
|
" <td>3.996750e-01</td>\n",
|
|
|
|
" <td>5.008067e-01</td>\n",
|
|
|
|
" <td>4.589494e-01</td>\n",
|
|
|
|
" <td>1.330408e-01</td>\n",
|
|
|
|
" <td>1.863772e-01</td>\n",
|
|
|
|
" <td>5.285536e-01</td>\n",
|
|
|
|
" <td>1.476421e-01</td>\n",
|
|
|
|
" <td>4.395266e-01</td>\n",
|
|
|
|
" <td>3.507156e-01</td>\n",
|
|
|
|
" <td>2.409522e-01</td>\n",
|
|
|
|
" <td>9.104512e-02</td>\n",
|
|
|
|
" <td>7.827995e-02</td>\n",
|
|
|
|
" <td>-4.471707e-02</td>\n",
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>max</th>\n",
|
|
|
|
" <td>172792.000000</td>\n",
|
|
|
|
" <td>2.454930e+00</td>\n",
|
|
|
|
" <td>2.205773e+01</td>\n",
|
|
|
|
" <td>9.382558e+00</td>\n",
|
|
|
|
" <td>1.687534e+01</td>\n",
|
|
|
|
" <td>3.480167e+01</td>\n",
|
|
|
|
" <td>7.330163e+01</td>\n",
|
|
|
|
" <td>1.205895e+02</td>\n",
|
|
|
|
" <td>2.000721e+01</td>\n",
|
|
|
|
" <td>1.559499e+01</td>\n",
|
|
|
|
" <td>2.374514e+01</td>\n",
|
|
|
|
" <td>1.201891e+01</td>\n",
|
|
|
|
" <td>7.848392e+00</td>\n",
|
|
|
|
" <td>7.126883e+00</td>\n",
|
|
|
|
" <td>1.052677e+01</td>\n",
|
|
|
|
" <td>8.877742e+00</td>\n",
|
|
|
|
" <td>1.731511e+01</td>\n",
|
|
|
|
" <td>9.253526e+00</td>\n",
|
|
|
|
" <td>5.041069e+00</td>\n",
|
|
|
|
" <td>5.591971e+00</td>\n",
|
|
|
|
" <td>3.942090e+01</td>\n",
|
|
|
|
" <td>2.720284e+01</td>\n",
|
|
|
|
" <td>1.050309e+01</td>\n",
|
|
|
|
" <td>2.252841e+01</td>\n",
|
|
|
|
" <td>4.584549e+00</td>\n",
|
|
|
|
" <td>7.519589e+00</td>\n",
|
|
|
|
" <td>3.517346e+00</td>\n",
|
|
|
|
" <td>3.161220e+01</td>\n",
|
|
|
|
" <td>3.384781e+01</td>\n",
|
|
|
|
" <td>1.023622e+02</td>\n",
|
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" Time V1 V2 V3 V4 \\\n",
|
|
|
|
"count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
|
|
"mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n",
|
|
|
|
"std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n",
|
|
|
|
"min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n",
|
|
|
|
"25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n",
|
|
|
|
"50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n",
|
|
|
|
"75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n",
|
|
|
|
"max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n",
|
|
|
|
"\n",
|
|
|
|
" V5 V6 V7 V8 V9 \\\n",
|
|
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
|
|
"mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n",
|
|
|
|
"std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n",
|
|
|
|
"min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n",
|
|
|
|
"25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n",
|
|
|
|
"50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n",
|
|
|
|
"75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n",
|
|
|
|
"max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n",
|
|
|
|
"\n",
|
|
|
|
" V10 V11 V12 V13 V14 \\\n",
|
|
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
|
|
"mean 2.239053e-15 1.673327e-15 -1.247012e-15 8.190001e-16 1.207294e-15 \n",
|
|
|
|
"std 1.088850e+00 1.020713e+00 9.992014e-01 9.952742e-01 9.585956e-01 \n",
|
|
|
|
"min -2.458826e+01 -4.797473e+00 -1.868371e+01 -5.791881e+00 -1.921433e+01 \n",
|
|
|
|
"25% -5.354257e-01 -7.624942e-01 -4.055715e-01 -6.485393e-01 -4.255740e-01 \n",
|
|
|
|
"50% -9.291738e-02 -3.275735e-02 1.400326e-01 -1.356806e-02 5.060132e-02 \n",
|
|
|
|
"75% 4.539234e-01 7.395934e-01 6.182380e-01 6.625050e-01 4.931498e-01 \n",
|
|
|
|
"max 2.374514e+01 1.201891e+01 7.848392e+00 7.126883e+00 1.052677e+01 \n",
|
|
|
|
"\n",
|
|
|
|
" V15 V16 V17 V18 V19 \\\n",
|
|
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
|
|
"mean 4.887456e-15 1.437716e-15 -3.772171e-16 9.564149e-16 1.039917e-15 \n",
|
|
|
|
"std 9.153160e-01 8.762529e-01 8.493371e-01 8.381762e-01 8.140405e-01 \n",
|
|
|
|
"min -4.498945e+00 -1.412985e+01 -2.516280e+01 -9.498746e+00 -7.213527e+00 \n",
|
|
|
|
"25% -5.828843e-01 -4.680368e-01 -4.837483e-01 -4.988498e-01 -4.562989e-01 \n",
|
|
|
|
"50% 4.807155e-02 6.641332e-02 -6.567575e-02 -3.636312e-03 3.734823e-03 \n",
|
|
|
|
"75% 6.488208e-01 5.232963e-01 3.996750e-01 5.008067e-01 4.589494e-01 \n",
|
|
|
|
"max 8.877742e+00 1.731511e+01 9.253526e+00 5.041069e+00 5.591971e+00 \n",
|
|
|
|
"\n",
|
|
|
|
" V20 V21 V22 V23 V24 \\\n",
|
|
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
|
|
"mean 6.406204e-16 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n",
|
|
|
|
"std 7.709250e-01 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n",
|
|
|
|
"min -5.449772e+01 -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n",
|
|
|
|
"25% -2.117214e-01 -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n",
|
|
|
|
"50% -6.248109e-02 -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n",
|
|
|
|
"75% 1.330408e-01 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n",
|
|
|
|
"max 3.942090e+01 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n",
|
|
|
|
"\n",
|
|
|
|
" V25 V26 V27 V28 Amount \\\n",
|
|
|
|
"count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n",
|
|
|
|
"mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 2.913952e-17 \n",
|
|
|
|
"std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 1.000002e+00 \n",
|
|
|
|
"min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 -3.532294e-01 \n",
|
|
|
|
"25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 -3.308401e-01 \n",
|
|
|
|
"50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 -2.652715e-01 \n",
|
|
|
|
"75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 -4.471707e-02 \n",
|
|
|
|
"max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 1.023622e+02 \n",
|
|
|
|
"\n",
|
|
|
|
" Class \n",
|
|
|
|
"count 284807.000000 \n",
|
|
|
|
"mean 0.001727 \n",
|
|
|
|
"std 0.041527 \n",
|
|
|
|
"min 0.000000 \n",
|
|
|
|
"25% 0.000000 \n",
|
|
|
|
"50% 0.000000 \n",
|
|
|
|
"75% 0.000000 \n",
|
|
|
|
"max 1.000000 "
|
|
|
|
]
|
|
|
|
},
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 94,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"df.describe()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Distribution of legitimate and fraudulent transactions"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 95,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Class\n",
|
|
|
|
"0 284315\n",
|
|
|
|
"1 492\n",
|
|
|
|
"Name: count, dtype: int64"
|
|
|
|
]
|
|
|
|
},
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 95,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"df['Class'].value_counts()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Undersampling the data\n",
|
|
|
|
"We will employ undersampling as one class significantly dominates the other."
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 96,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Determine the number of instances in the minority class\n",
|
|
|
|
"fraud_count = len(df[df.Class == 1])\n",
|
|
|
|
"fraud_indices = np.array(df[df.Class == 1].index)\n",
|
|
|
|
"\n",
|
|
|
|
"# Select indices corresponding to majority class instances\n",
|
|
|
|
"normal_indices = df[df.Class == 0].index\n",
|
|
|
|
"\n",
|
|
|
|
"# Randomly sample the same number of instances from the majority class\n",
|
|
|
|
"random_normal_indices = np.random.choice(normal_indices, fraud_count, replace=False)\n",
|
|
|
|
"random_normal_indices = np.array(random_normal_indices)\n",
|
|
|
|
"\n",
|
|
|
|
"# Combine indices of both classes\n",
|
|
|
|
"undersample_indice = np.concatenate([fraud_indices, random_normal_indices])\n",
|
|
|
|
"\n",
|
|
|
|
"# Undersample dataset\n",
|
|
|
|
"undersample_data = df.iloc[undersample_indice, :]\n",
|
|
|
|
"\n",
|
|
|
|
"X_undersample = undersample_data.iloc[:, undersample_data.columns != 'Class']\n",
|
|
|
|
"y_undersample = undersample_data.iloc[:, undersample_data.columns == 'Class']"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Size of undersampled dataset"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 97,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
"Index: 984 entries, 541 to 141412\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
"Data columns (total 31 columns):\n",
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
" 0 Time 984 non-null float64\n",
|
|
|
|
" 1 V1 984 non-null float64\n",
|
|
|
|
" 2 V2 984 non-null float64\n",
|
|
|
|
" 3 V3 984 non-null float64\n",
|
|
|
|
" 4 V4 984 non-null float64\n",
|
|
|
|
" 5 V5 984 non-null float64\n",
|
|
|
|
" 6 V6 984 non-null float64\n",
|
|
|
|
" 7 V7 984 non-null float64\n",
|
|
|
|
" 8 V8 984 non-null float64\n",
|
|
|
|
" 9 V9 984 non-null float64\n",
|
|
|
|
" 10 V10 984 non-null float64\n",
|
|
|
|
" 11 V11 984 non-null float64\n",
|
|
|
|
" 12 V12 984 non-null float64\n",
|
|
|
|
" 13 V13 984 non-null float64\n",
|
|
|
|
" 14 V14 984 non-null float64\n",
|
|
|
|
" 15 V15 984 non-null float64\n",
|
|
|
|
" 16 V16 984 non-null float64\n",
|
|
|
|
" 17 V17 984 non-null float64\n",
|
|
|
|
" 18 V18 984 non-null float64\n",
|
|
|
|
" 19 V19 984 non-null float64\n",
|
|
|
|
" 20 V20 984 non-null float64\n",
|
|
|
|
" 21 V21 984 non-null float64\n",
|
|
|
|
" 22 V22 984 non-null float64\n",
|
|
|
|
" 23 V23 984 non-null float64\n",
|
|
|
|
" 24 V24 984 non-null float64\n",
|
|
|
|
" 25 V25 984 non-null float64\n",
|
|
|
|
" 26 V26 984 non-null float64\n",
|
|
|
|
" 27 V27 984 non-null float64\n",
|
|
|
|
" 28 V28 984 non-null float64\n",
|
|
|
|
" 29 Amount 984 non-null float64\n",
|
|
|
|
" 30 Class 984 non-null int64 \n",
|
|
|
|
"dtypes: float64(30), int64(1)\n",
|
|
|
|
"memory usage: 246.0 KB\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"undersample_data.info()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"### Summary statistics of the undersampled dataset"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-03-16 14:45:16 +01:00
|
|
|
"execution_count": 98,
|
2024-03-16 14:35:44 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>Time</th>\n",
|
|
|
|
" <th>V1</th>\n",
|
|
|
|
" <th>V2</th>\n",
|
|
|
|
" <th>V3</th>\n",
|
|
|
|
" <th>V4</th>\n",
|
|
|
|
" <th>V5</th>\n",
|
|
|
|
" <th>V6</th>\n",
|
|
|
|
" <th>V7</th>\n",
|
|
|
|
" <th>V8</th>\n",
|
|
|
|
" <th>V9</th>\n",
|
|
|
|
" <th>V10</th>\n",
|
|
|
|
" <th>V11</th>\n",
|
|
|
|
" <th>V12</th>\n",
|
|
|
|
" <th>V13</th>\n",
|
|
|
|
" <th>V14</th>\n",
|
|
|
|
" <th>V15</th>\n",
|
|
|
|
" <th>V16</th>\n",
|
|
|
|
" <th>V17</th>\n",
|
|
|
|
" <th>V18</th>\n",
|
|
|
|
" <th>V19</th>\n",
|
|
|
|
" <th>V20</th>\n",
|
|
|
|
" <th>V21</th>\n",
|
|
|
|
" <th>V22</th>\n",
|
|
|
|
" <th>V23</th>\n",
|
|
|
|
" <th>V24</th>\n",
|
|
|
|
" <th>V25</th>\n",
|
|
|
|
" <th>V26</th>\n",
|
|
|
|
" <th>V27</th>\n",
|
|
|
|
" <th>V28</th>\n",
|
|
|
|
" <th>Amount</th>\n",
|
|
|
|
" <th>Class</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>count</th>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" <td>984.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>mean</th>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>88501.498984</td>\n",
|
|
|
|
" <td>-2.445079</td>\n",
|
|
|
|
" <td>1.781022</td>\n",
|
|
|
|
" <td>-3.509406</td>\n",
|
|
|
|
" <td>2.214004</td>\n",
|
|
|
|
" <td>-1.477993</td>\n",
|
|
|
|
" <td>-0.713150</td>\n",
|
|
|
|
" <td>-2.787427</td>\n",
|
|
|
|
" <td>0.279073</td>\n",
|
|
|
|
" <td>-1.253108</td>\n",
|
|
|
|
" <td>-2.841500</td>\n",
|
|
|
|
" <td>1.930697</td>\n",
|
|
|
|
" <td>-3.124120</td>\n",
|
|
|
|
" <td>-0.026229</td>\n",
|
|
|
|
" <td>-3.502384</td>\n",
|
|
|
|
" <td>-0.039494</td>\n",
|
|
|
|
" <td>-2.097294</td>\n",
|
|
|
|
" <td>-3.304208</td>\n",
|
|
|
|
" <td>-1.128950</td>\n",
|
|
|
|
" <td>0.343668</td>\n",
|
|
|
|
" <td>0.175905</td>\n",
|
|
|
|
" <td>0.331911</td>\n",
|
|
|
|
" <td>0.049631</td>\n",
|
|
|
|
" <td>-0.031264</td>\n",
|
|
|
|
" <td>-0.037389</td>\n",
|
|
|
|
" <td>0.022812</td>\n",
|
|
|
|
" <td>0.027632</td>\n",
|
|
|
|
" <td>0.086286</td>\n",
|
|
|
|
" <td>0.046738</td>\n",
|
|
|
|
" <td>0.039676</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>0.500000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>std</th>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>48996.269445</td>\n",
|
|
|
|
" <td>5.512352</td>\n",
|
|
|
|
" <td>3.713232</td>\n",
|
|
|
|
" <td>6.223001</td>\n",
|
|
|
|
" <td>3.231076</td>\n",
|
|
|
|
" <td>4.274632</td>\n",
|
|
|
|
" <td>1.789350</td>\n",
|
|
|
|
" <td>5.856197</td>\n",
|
|
|
|
" <td>4.857643</td>\n",
|
|
|
|
" <td>2.371055</td>\n",
|
|
|
|
" <td>4.563067</td>\n",
|
|
|
|
" <td>2.764745</td>\n",
|
|
|
|
" <td>4.595103</td>\n",
|
|
|
|
" <td>1.054377</td>\n",
|
|
|
|
" <td>4.653202</td>\n",
|
|
|
|
" <td>1.002911</td>\n",
|
|
|
|
" <td>3.465619</td>\n",
|
|
|
|
" <td>5.990033</td>\n",
|
|
|
|
" <td>2.412032</td>\n",
|
|
|
|
" <td>1.290973</td>\n",
|
|
|
|
" <td>1.126258</td>\n",
|
|
|
|
" <td>2.787884</td>\n",
|
|
|
|
" <td>1.167097</td>\n",
|
|
|
|
" <td>1.177562</td>\n",
|
|
|
|
" <td>0.551518</td>\n",
|
|
|
|
" <td>0.677541</td>\n",
|
|
|
|
" <td>0.476480</td>\n",
|
|
|
|
" <td>1.023332</td>\n",
|
|
|
|
" <td>0.479168</td>\n",
|
|
|
|
" <td>0.851800</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>0.500254</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>min</th>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>60.000000</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-30.552380</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-15.799625</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-31.103685</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-3.863126</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-22.105532</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-10.261990</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-43.557242</td>\n",
|
|
|
|
" <td>-41.044261</td>\n",
|
|
|
|
" <td>-13.434066</td>\n",
|
|
|
|
" <td>-24.588262</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-2.613374</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-18.683715</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-3.223045</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-19.214325</td>\n",
|
|
|
|
" <td>-4.498945</td>\n",
|
|
|
|
" <td>-14.129855</td>\n",
|
|
|
|
" <td>-25.162799</td>\n",
|
|
|
|
" <td>-9.498746</td>\n",
|
|
|
|
" <td>-3.681904</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-7.242879</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-22.797604</td>\n",
|
|
|
|
" <td>-8.887017</td>\n",
|
|
|
|
" <td>-19.254328</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-2.028024</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-4.781606</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-1.214960</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-7.263482</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-2.735623</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-0.353229</td>\n",
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>25%</th>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>45531.000000</td>\n",
|
|
|
|
" <td>-2.867222</td>\n",
|
|
|
|
" <td>-0.155438</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-5.084967</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-0.172018</td>\n",
|
|
|
|
" <td>-1.700260</td>\n",
|
|
|
|
" <td>-1.619179</td>\n",
|
|
|
|
" <td>-3.066415</td>\n",
|
|
|
|
" <td>-0.204192</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-2.279453</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-4.572043</td>\n",
|
|
|
|
" <td>-0.187147</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-5.495221</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-0.784589</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-6.721799</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-0.627097</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>-3.543426</td>\n",
|
|
|
|
" <td>-5.302111</td>\n",
|
|
|
|
" <td>-1.809496</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>-0.412430</td>\n",
|
|
|
|
" <td>-0.187708</td>\n",
|
|
|
|
" <td>-0.157259</td>\n",
|
|
|
|
" <td>-0.509376</td>\n",
|
|
|
|
" <td>-0.240064</td>\n",
|
|
|
|
" <td>-0.379825</td>\n",
|
|
|
|
" <td>-0.321251</td>\n",
|
|
|
|
" <td>-0.281187</td>\n",
|
|
|
|
" <td>-0.061809</td>\n",
|
|
|
|
" <td>-0.050194</td>\n",
|
|
|
|
" <td>-0.347302</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>50%</th>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>83076.500000</td>\n",
|
|
|
|
" <td>-0.823244</td>\n",
|
|
|
|
" <td>0.957399</td>\n",
|
|
|
|
" <td>-1.381998</td>\n",
|
|
|
|
" <td>1.287041</td>\n",
|
|
|
|
" <td>-0.394605</td>\n",
|
|
|
|
" <td>-0.689473</td>\n",
|
|
|
|
" <td>-0.668321</td>\n",
|
|
|
|
" <td>0.147397</td>\n",
|
|
|
|
" <td>-0.694910</td>\n",
|
|
|
|
" <td>-0.948441</td>\n",
|
|
|
|
" <td>1.170286</td>\n",
|
|
|
|
" <td>-0.858094</td>\n",
|
|
|
|
" <td>-0.000686</td>\n",
|
|
|
|
" <td>-1.110717</td>\n",
|
|
|
|
" <td>-0.006070</td>\n",
|
|
|
|
" <td>-0.677801</td>\n",
|
|
|
|
" <td>-0.513640</td>\n",
|
|
|
|
" <td>-0.383038</td>\n",
|
|
|
|
" <td>0.221049</td>\n",
|
|
|
|
" <td>0.040630</td>\n",
|
|
|
|
" <td>0.155404</td>\n",
|
|
|
|
" <td>0.080270</td>\n",
|
|
|
|
" <td>-0.030318</td>\n",
|
|
|
|
" <td>0.009379</td>\n",
|
|
|
|
" <td>0.049923</td>\n",
|
|
|
|
" <td>-0.007475</td>\n",
|
|
|
|
" <td>0.063100</td>\n",
|
|
|
|
" <td>0.039464</td>\n",
|
|
|
|
" <td>-0.280984</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>0.500000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>75%</th>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>135051.500000</td>\n",
|
|
|
|
" <td>0.919444</td>\n",
|
|
|
|
" <td>2.791569</td>\n",
|
|
|
|
" <td>0.356911</td>\n",
|
|
|
|
" <td>4.175332</td>\n",
|
|
|
|
" <td>0.616305</td>\n",
|
|
|
|
" <td>0.069620</td>\n",
|
|
|
|
" <td>0.265089</td>\n",
|
|
|
|
" <td>0.877002</td>\n",
|
|
|
|
" <td>0.134399</td>\n",
|
|
|
|
" <td>-0.016047</td>\n",
|
|
|
|
" <td>3.586502</td>\n",
|
|
|
|
" <td>0.190356</td>\n",
|
|
|
|
" <td>0.683977</td>\n",
|
|
|
|
" <td>0.110541</td>\n",
|
|
|
|
" <td>0.672903</td>\n",
|
|
|
|
" <td>0.250353</td>\n",
|
|
|
|
" <td>0.313841</td>\n",
|
|
|
|
" <td>0.334927</td>\n",
|
|
|
|
" <td>0.978754</td>\n",
|
|
|
|
" <td>0.445616</td>\n",
|
|
|
|
" <td>0.642724</td>\n",
|
|
|
|
" <td>0.624948</td>\n",
|
|
|
|
" <td>0.180735</td>\n",
|
|
|
|
" <td>0.365624</td>\n",
|
|
|
|
" <td>0.395001</td>\n",
|
|
|
|
" <td>0.324059</td>\n",
|
|
|
|
" <td>0.457194</td>\n",
|
|
|
|
" <td>0.226492</td>\n",
|
|
|
|
" <td>0.046539</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>max</th>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>172733.000000</td>\n",
|
|
|
|
" <td>2.335833</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>22.057729</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>3.476268</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>12.114672</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>14.103918</td>\n",
|
|
|
|
" <td>6.474115</td>\n",
|
|
|
|
" <td>5.802537</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>20.007208</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>6.816732</td>\n",
|
|
|
|
" <td>11.732926</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>12.018913</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>2.534876</td>\n",
|
|
|
|
" <td>3.091328</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>3.442422</td>\n",
|
|
|
|
" <td>2.471358</td>\n",
|
|
|
|
" <td>3.139656</td>\n",
|
|
|
|
" <td>6.739384</td>\n",
|
|
|
|
" <td>3.790316</td>\n",
|
|
|
|
" <td>5.228342</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>11.059004</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>27.202839</td>\n",
|
|
|
|
" <td>8.361985</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>5.466230</td>\n",
|
|
|
|
" <td>1.208141</td>\n",
|
|
|
|
" <td>2.208209</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>2.745261</td>\n",
|
|
|
|
" <td>3.052358</td>\n",
|
2024-03-16 14:45:16 +01:00
|
|
|
" <td>4.975792</td>\n",
|
|
|
|
" <td>8.146182</td>\n",
|
2024-03-16 14:35:44 +01:00
|
|
|
" <td>1.000000</td>\n",
|
|