{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## IUM 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Installation of packages" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.6.6)\n", "Requirement already satisfied: six>=1.10 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: certifi in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2024.2.2)\n", "Requirement already satisfied: python-dateutil in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from kaggle) (2.9.0.post0)\n", "Requirement already satisfied: requests in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.31.0)\n", "Requirement already satisfied: tqdm in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (4.66.2)\n", "Requirement already satisfied: python-slugify in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (8.0.4)\n", "Requirement already satisfied: urllib3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (2.2.1)\n", "Requirement already satisfied: bleach in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from kaggle) (6.1.0)\n", "Requirement already satisfied: webencodings in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->kaggle) (3.6)\n", "Requirement already satisfied: colorama in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from tqdm->kaggle) (0.4.6)\n", "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: pandas in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.2.1)\n", "Requirement already satisfied: numpy<2,>=1.26.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (1.26.3)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n", "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\skype\\appdata\\roaming\\python\\python312\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: numpy in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.26.3)\n", "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: scikit-learn in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (1.4.1.post1)\n", "Requirement already satisfied: numpy<2.0,>=1.19.5 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.26.3)\n", "Requirement already satisfied: scipy>=1.6.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.12.0)\n", "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (1.3.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\skype\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from scikit-learn) (3.3.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install kaggle\n", "%pip install pandas\n", "%pip install numpy\n", "%pip install scikit-learn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Importing libraries" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "# To preprocess the data\n", "from sklearn.preprocessing import StandardScaler\n", "\n", "# To split the data\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Downloading a dataset" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "creditcardfraud.zip: Skipping, found more recently modified local copy (use --force to force download)\n" ] } ], "source": [ "!kaggle datasets download -d mlg-ulb/creditcardfraud" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Uncompress a file" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: creditcardfraud.zip\n", " inflating: creditcard.csv \n" ] } ], "source": [ "!unzip -o creditcardfraud.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the data" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('creditcard.csv')\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Size of the dataset" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 284807 entries, 0 to 284806\n", "Data columns (total 31 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Time 284807 non-null float64\n", " 1 V1 284807 non-null float64\n", " 2 V2 284807 non-null float64\n", " 3 V3 284807 non-null float64\n", " 4 V4 284807 non-null float64\n", " 5 V5 284807 non-null float64\n", " 6 V6 284807 non-null float64\n", " 7 V7 284807 non-null float64\n", " 8 V8 284807 non-null float64\n", " 9 V9 284807 non-null float64\n", " 10 V10 284807 non-null float64\n", " 11 V11 284807 non-null float64\n", " 12 V12 284807 non-null float64\n", " 13 V13 284807 non-null float64\n", " 14 V14 284807 non-null float64\n", " 15 V15 284807 non-null float64\n", " 16 V16 284807 non-null float64\n", " 17 V17 284807 non-null float64\n", " 18 V18 284807 non-null float64\n", " 19 V19 284807 non-null float64\n", " 20 V20 284807 non-null float64\n", " 21 V21 284807 non-null float64\n", " 22 V22 284807 non-null float64\n", " 23 V23 284807 non-null float64\n", " 24 V24 284807 non-null float64\n", " 25 V25 284807 non-null float64\n", " 26 V26 284807 non-null float64\n", " 27 V27 284807 non-null float64\n", " 28 V28 284807 non-null float64\n", " 29 Amount 284807 non-null float64\n", " 30 Class 284807 non-null int64 \n", "dtypes: float64(30), int64(1)\n", "memory usage: 67.4 MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Normalising the data" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "scaler = StandardScaler()\n", "\n", "df['Amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summary statistics" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
count284807.0000002.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05284807.000000
mean94813.8595751.168375e-153.416908e-16-1.379537e-152.074095e-159.604066e-161.487313e-15-5.556467e-161.213481e-16-2.406331e-152.239053e-151.673327e-15-1.247012e-158.190001e-161.207294e-154.887456e-151.437716e-15-3.772171e-169.564149e-161.039917e-156.406204e-161.654067e-16-3.568593e-162.578648e-164.473266e-155.340915e-161.683437e-15-3.660091e-16-1.227390e-162.913952e-170.001727
std47488.1459551.958696e+001.651309e+001.516255e+001.415869e+001.380247e+001.332271e+001.237094e+001.194353e+001.098632e+001.088850e+001.020713e+009.992014e-019.952742e-019.585956e-019.153160e-018.762529e-018.493371e-018.381762e-018.140405e-017.709250e-017.345240e-017.257016e-016.244603e-016.056471e-015.212781e-014.822270e-014.036325e-013.300833e-011.000002e+000.041527
min0.000000-5.640751e+01-7.271573e+01-4.832559e+01-5.683171e+00-1.137433e+02-2.616051e+01-4.355724e+01-7.321672e+01-1.343407e+01-2.458826e+01-4.797473e+00-1.868371e+01-5.791881e+00-1.921433e+01-4.498945e+00-1.412985e+01-2.516280e+01-9.498746e+00-7.213527e+00-5.449772e+01-3.483038e+01-1.093314e+01-4.480774e+01-2.836627e+00-1.029540e+01-2.604551e+00-2.256568e+01-1.543008e+01-3.532294e-010.000000
25%54201.500000-9.203734e-01-5.985499e-01-8.903648e-01-8.486401e-01-6.915971e-01-7.682956e-01-5.540759e-01-2.086297e-01-6.430976e-01-5.354257e-01-7.624942e-01-4.055715e-01-6.485393e-01-4.255740e-01-5.828843e-01-4.680368e-01-4.837483e-01-4.988498e-01-4.562989e-01-2.117214e-01-2.283949e-01-5.423504e-01-1.618463e-01-3.545861e-01-3.171451e-01-3.269839e-01-7.083953e-02-5.295979e-02-3.308401e-010.000000
50%84692.0000001.810880e-026.548556e-021.798463e-01-1.984653e-02-5.433583e-02-2.741871e-014.010308e-022.235804e-02-5.142873e-02-9.291738e-02-3.275735e-021.400326e-01-1.356806e-025.060132e-024.807155e-026.641332e-02-6.567575e-02-3.636312e-033.734823e-03-6.248109e-02-2.945017e-026.781943e-03-1.119293e-024.097606e-021.659350e-02-5.213911e-021.342146e-031.124383e-02-2.652715e-010.000000
75%139320.5000001.315642e+008.037239e-011.027196e+007.433413e-016.119264e-013.985649e-015.704361e-013.273459e-015.971390e-014.539234e-017.395934e-016.182380e-016.625050e-014.931498e-016.488208e-015.232963e-013.996750e-015.008067e-014.589494e-011.330408e-011.863772e-015.285536e-011.476421e-014.395266e-013.507156e-012.409522e-019.104512e-027.827995e-02-4.471707e-020.000000
max172792.0000002.454930e+002.205773e+019.382558e+001.687534e+013.480167e+017.330163e+011.205895e+022.000721e+011.559499e+012.374514e+011.201891e+017.848392e+007.126883e+001.052677e+018.877742e+001.731511e+019.253526e+005.041069e+005.591971e+003.942090e+012.720284e+011.050309e+012.252841e+014.584549e+007.519589e+003.517346e+003.161220e+013.384781e+011.023622e+021.000000
\n", "
" ], "text/plain": [ " Time V1 V2 V3 V4 \\\n", "count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", "mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n", "std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n", "min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n", "25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n", "50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n", "75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n", "max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n", "\n", " V5 V6 V7 V8 V9 \\\n", "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", "mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n", "std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n", "min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n", "25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n", "50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n", "75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n", "max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n", "\n", " V10 V11 V12 V13 V14 \\\n", "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", "mean 2.239053e-15 1.673327e-15 -1.247012e-15 8.190001e-16 1.207294e-15 \n", "std 1.088850e+00 1.020713e+00 9.992014e-01 9.952742e-01 9.585956e-01 \n", "min -2.458826e+01 -4.797473e+00 -1.868371e+01 -5.791881e+00 -1.921433e+01 \n", "25% -5.354257e-01 -7.624942e-01 -4.055715e-01 -6.485393e-01 -4.255740e-01 \n", "50% -9.291738e-02 -3.275735e-02 1.400326e-01 -1.356806e-02 5.060132e-02 \n", "75% 4.539234e-01 7.395934e-01 6.182380e-01 6.625050e-01 4.931498e-01 \n", "max 2.374514e+01 1.201891e+01 7.848392e+00 7.126883e+00 1.052677e+01 \n", "\n", " V15 V16 V17 V18 V19 \\\n", "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", "mean 4.887456e-15 1.437716e-15 -3.772171e-16 9.564149e-16 1.039917e-15 \n", "std 9.153160e-01 8.762529e-01 8.493371e-01 8.381762e-01 8.140405e-01 \n", "min -4.498945e+00 -1.412985e+01 -2.516280e+01 -9.498746e+00 -7.213527e+00 \n", "25% -5.828843e-01 -4.680368e-01 -4.837483e-01 -4.988498e-01 -4.562989e-01 \n", "50% 4.807155e-02 6.641332e-02 -6.567575e-02 -3.636312e-03 3.734823e-03 \n", "75% 6.488208e-01 5.232963e-01 3.996750e-01 5.008067e-01 4.589494e-01 \n", "max 8.877742e+00 1.731511e+01 9.253526e+00 5.041069e+00 5.591971e+00 \n", "\n", " V20 V21 V22 V23 V24 \\\n", "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", "mean 6.406204e-16 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n", "std 7.709250e-01 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n", "min -5.449772e+01 -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n", "25% -2.117214e-01 -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n", "50% -6.248109e-02 -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n", "75% 1.330408e-01 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n", "max 3.942090e+01 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n", "\n", " V25 V26 V27 V28 Amount \\\n", "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", "mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 2.913952e-17 \n", "std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 1.000002e+00 \n", "min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 -3.532294e-01 \n", "25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 -3.308401e-01 \n", "50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 -2.652715e-01 \n", "75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 -4.471707e-02 \n", "max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 1.023622e+02 \n", "\n", " Class \n", "count 284807.000000 \n", "mean 0.001727 \n", "std 0.041527 \n", "min 0.000000 \n", "25% 0.000000 \n", "50% 0.000000 \n", "75% 0.000000 \n", "max 1.000000 " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Distribution of legitimate and fraudulent transactions" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Class\n", "0 284315\n", "1 492\n", "Name: count, dtype: int64" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Class'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Undersampling the data\n", "We will employ undersampling as one class significantly dominates the other." ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# Determine the number of instances in the minority class\n", "fraud_count = len(df[df.Class == 1])\n", "fraud_indices = np.array(df[df.Class == 1].index)\n", "\n", "# Select indices corresponding to majority class instances\n", "normal_indices = df[df.Class == 0].index\n", "\n", "# Randomly sample the same number of instances from the majority class\n", "random_normal_indices = np.random.choice(normal_indices, fraud_count, replace=False)\n", "random_normal_indices = np.array(random_normal_indices)\n", "\n", "# Combine indices of both classes\n", "undersample_indice = np.concatenate([fraud_indices, random_normal_indices])\n", "\n", "# Undersample dataset\n", "undersample_data = df.iloc[undersample_indice, :]\n", "\n", "X_undersample = undersample_data.iloc[:, undersample_data.columns != 'Class']\n", "y_undersample = undersample_data.iloc[:, undersample_data.columns == 'Class']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Size of undersampled dataset" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 984 entries, 541 to 216408\n", "Data columns (total 31 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Time 984 non-null float64\n", " 1 V1 984 non-null float64\n", " 2 V2 984 non-null float64\n", " 3 V3 984 non-null float64\n", " 4 V4 984 non-null float64\n", " 5 V5 984 non-null float64\n", " 6 V6 984 non-null float64\n", " 7 V7 984 non-null float64\n", " 8 V8 984 non-null float64\n", " 9 V9 984 non-null float64\n", " 10 V10 984 non-null float64\n", " 11 V11 984 non-null float64\n", " 12 V12 984 non-null float64\n", " 13 V13 984 non-null float64\n", " 14 V14 984 non-null float64\n", " 15 V15 984 non-null float64\n", " 16 V16 984 non-null float64\n", " 17 V17 984 non-null float64\n", " 18 V18 984 non-null float64\n", " 19 V19 984 non-null float64\n", " 20 V20 984 non-null float64\n", " 21 V21 984 non-null float64\n", " 22 V22 984 non-null float64\n", " 23 V23 984 non-null float64\n", " 24 V24 984 non-null float64\n", " 25 V25 984 non-null float64\n", " 26 V26 984 non-null float64\n", " 27 V27 984 non-null float64\n", " 28 V28 984 non-null float64\n", " 29 Amount 984 non-null float64\n", " 30 Class 984 non-null int64 \n", "dtypes: float64(30), int64(1)\n", "memory usage: 246.0 KB\n" ] } ], "source": [ "undersample_data.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summary statistics of the undersampled dataset" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
count984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000984.000000
mean88776.640244-2.4058351.819622-3.5118752.253022-1.598199-0.703490-2.7718340.290248-1.271753-2.8663981.950912-3.085801-0.038172-3.480604-0.047034-2.048191-3.320723-1.1213290.3381800.1938900.3637820.0025540.005715-0.0680020.0070240.0185690.0628920.0277830.0772700.500000
std48243.9442715.5260103.7532876.2156013.1885814.2840631.7876385.9181294.8785082.3216584.5262282.7375844.6200981.0469154.6568720.9573193.4941925.9750392.4104271.2754601.1974622.8011601.1714301.2864730.5720750.6715700.4880111.0234500.4257081.3224160.500254
min406.000000-30.552380-20.984898-31.103685-3.421874-22.105532-14.425011-43.557242-41.044261-13.434066-24.588262-2.279476-18.683715-3.184202-19.214325-4.498945-14.129855-25.162799-9.498746-3.681904-5.244333-22.797604-8.887017-19.254328-2.082546-4.781606-1.407558-7.263482-1.869290-0.3532290.000000
25%47898.750000-2.896794-0.156682-5.084967-0.100812-1.758911-1.509571-3.060742-0.179491-2.279453-4.593030-0.047452-5.495221-0.761168-6.721799-0.639308-3.543426-5.302111-1.809496-0.441326-0.203323-0.159503-0.523196-0.233359-0.409463-0.331296-0.316920-0.061141-0.053338-0.3470420.000000
50%80545.000000-0.8038130.993854-1.3702811.291576-0.425110-0.640524-0.6408040.180271-0.666598-0.8856461.171937-0.7345490.002739-0.986762-0.012609-0.599078-0.466877-0.3222460.2284710.0178820.1409220.013228-0.012981-0.0012970.035358-0.0273310.0442710.032226-0.2782850.500000
75%135096.7500001.0186442.7988850.3457374.2356310.4423990.0888650.2458230.8792260.205275-0.0273863.5861300.2857710.6460900.0831010.6085860.3021610.2699490.3339760.9558020.3995000.6549900.5825700.1967640.3682930.3714630.3288120.4240670.2013610.0460291.000000
max172743.0000002.30887822.0577293.94033712.11467220.27772812.12895026.23772220.0072087.16887810.42350512.0189131.9481263.6855703.4424222.4713583.1396566.7393843.7903165.22834216.43692027.2028398.36198517.6066371.1026363.4107422.7452613.0523581.77936429.7991371.000000
\n", "
" ], "text/plain": [ " Time V1 V2 V3 V4 \\\n", "count 984.000000 984.000000 984.000000 984.000000 984.000000 \n", "mean 88776.640244 -2.405835 1.819622 -3.511875 2.253022 \n", "std 48243.944271 5.526010 3.753287 6.215601 3.188581 \n", "min 406.000000 -30.552380 -20.984898 -31.103685 -3.421874 \n", "25% 47898.750000 -2.896794 -0.156682 -5.084967 -0.100812 \n", "50% 80545.000000 -0.803813 0.993854 -1.370281 1.291576 \n", "75% 135096.750000 1.018644 2.798885 0.345737 4.235631 \n", "max 172743.000000 2.308878 22.057729 3.940337 12.114672 \n", "\n", " V5 V6 V7 V8 V9 V10 \\\n", "count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n", "mean -1.598199 -0.703490 -2.771834 0.290248 -1.271753 -2.866398 \n", "std 4.284063 1.787638 5.918129 4.878508 2.321658 4.526228 \n", "min -22.105532 -14.425011 -43.557242 -41.044261 -13.434066 -24.588262 \n", "25% -1.758911 -1.509571 -3.060742 -0.179491 -2.279453 -4.593030 \n", "50% -0.425110 -0.640524 -0.640804 0.180271 -0.666598 -0.885646 \n", "75% 0.442399 0.088865 0.245823 0.879226 0.205275 -0.027386 \n", "max 20.277728 12.128950 26.237722 20.007208 7.168878 10.423505 \n", "\n", " V11 V12 V13 V14 V15 V16 \\\n", "count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n", "mean 1.950912 -3.085801 -0.038172 -3.480604 -0.047034 -2.048191 \n", "std 2.737584 4.620098 1.046915 4.656872 0.957319 3.494192 \n", "min -2.279476 -18.683715 -3.184202 -19.214325 -4.498945 -14.129855 \n", "25% -0.047452 -5.495221 -0.761168 -6.721799 -0.639308 -3.543426 \n", "50% 1.171937 -0.734549 0.002739 -0.986762 -0.012609 -0.599078 \n", "75% 3.586130 0.285771 0.646090 0.083101 0.608586 0.302161 \n", "max 12.018913 1.948126 3.685570 3.442422 2.471358 3.139656 \n", "\n", " V17 V18 V19 V20 V21 V22 \\\n", "count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n", "mean -3.320723 -1.121329 0.338180 0.193890 0.363782 0.002554 \n", "std 5.975039 2.410427 1.275460 1.197462 2.801160 1.171430 \n", "min -25.162799 -9.498746 -3.681904 -5.244333 -22.797604 -8.887017 \n", "25% -5.302111 -1.809496 -0.441326 -0.203323 -0.159503 -0.523196 \n", "50% -0.466877 -0.322246 0.228471 0.017882 0.140922 0.013228 \n", "75% 0.269949 0.333976 0.955802 0.399500 0.654990 0.582570 \n", "max 6.739384 3.790316 5.228342 16.436920 27.202839 8.361985 \n", "\n", " V23 V24 V25 V26 V27 V28 \\\n", "count 984.000000 984.000000 984.000000 984.000000 984.000000 984.000000 \n", "mean 0.005715 -0.068002 0.007024 0.018569 0.062892 0.027783 \n", "std 1.286473 0.572075 0.671570 0.488011 1.023450 0.425708 \n", "min -19.254328 -2.082546 -4.781606 -1.407558 -7.263482 -1.869290 \n", "25% -0.233359 -0.409463 -0.331296 -0.316920 -0.061141 -0.053338 \n", "50% -0.012981 -0.001297 0.035358 -0.027331 0.044271 0.032226 \n", "75% 0.196764 0.368293 0.371463 0.328812 0.424067 0.201361 \n", "max 17.606637 1.102636 3.410742 2.745261 3.052358 1.779364 \n", "\n", " Amount Class \n", "count 984.000000 984.000000 \n", "mean 0.077270 0.500000 \n", "std 1.322416 0.500254 \n", "min -0.353229 0.000000 \n", "25% -0.347042 0.000000 \n", "50% -0.278285 0.500000 \n", "75% 0.046029 1.000000 \n", "max 29.799137 1.000000 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "undersample_data.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Distribution of legitimate and fraudulent transactions in an undersampled dataset" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Class\n", "1 492\n", "0 492\n", "Name: count, dtype: int64" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "undersample_data['Class'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Splitting whole data into training and test datasets" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "X = df.iloc[:, df.columns != 'Class']\n", "y = df.iloc[:, df.columns == 'Class']\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Statistical measures of the training dataset of whole data" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 199364 entries, 161145 to 117952\n", "Data columns (total 31 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Time 199364 non-null float64\n", " 1 V1 199364 non-null float64\n", " 2 V2 199364 non-null float64\n", " 3 V3 199364 non-null float64\n", " 4 V4 199364 non-null float64\n", " 5 V5 199364 non-null float64\n", " 6 V6 199364 non-null float64\n", " 7 V7 199364 non-null float64\n", " 8 V8 199364 non-null float64\n", " 9 V9 199364 non-null float64\n", " 10 V10 199364 non-null float64\n", " 11 V11 199364 non-null float64\n", " 12 V12 199364 non-null float64\n", " 13 V13 199364 non-null float64\n", " 14 V14 199364 non-null float64\n", " 15 V15 199364 non-null float64\n", " 16 V16 199364 non-null float64\n", " 17 V17 199364 non-null float64\n", " 18 V18 199364 non-null float64\n", " 19 V19 199364 non-null float64\n", " 20 V20 199364 non-null float64\n", " 21 V21 199364 non-null float64\n", " 22 V22 199364 non-null float64\n", " 23 V23 199364 non-null float64\n", " 24 V24 199364 non-null float64\n", " 25 V25 199364 non-null float64\n", " 26 V26 199364 non-null float64\n", " 27 V27 199364 non-null float64\n", " 28 V28 199364 non-null float64\n", " 29 Amount 199364 non-null float64\n", " 30 Class 199364 non-null int64 \n", "dtypes: float64(30), int64(1)\n", "memory usage: 48.7 MB\n" ] } ], "source": [ "pd.concat([X_train, y_train], axis=1).info()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
count199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000199364.000000
mean94799.4939360.000315-0.002690-0.0015320.000721-0.001494-0.000210-0.000870-0.0019800.0002120.001357-0.001039-0.0015650.0006930.0001370.0003220.0000840.000292-0.0001340.0004900.000430-0.000014-0.000022-0.0002580.0003620.000395-0.000094-0.0000270.0000150.0012710.001731
std47499.8354911.9635541.6573791.5167161.4171381.3687441.3286731.2260181.2123381.1020211.0928011.0200270.9965260.9977180.9569380.9161430.8761310.8521810.8375560.8145060.7702570.7434500.7276250.6291450.6052980.5211750.4818420.4010420.3248490.9839480.041563
min0.000000-46.855047-63.344698-33.680984-5.560118-42.147898-23.496714-43.557242-73.216718-13.434066-24.588262-4.797473-17.769143-5.791881-19.214325-4.498945-14.129855-25.162799-9.498746-7.213527-23.646890-34.830382-10.933144-44.807735-2.822684-10.295397-2.534330-22.565679-11.710896-0.3532290.000000
25%54126.000000-0.921539-0.601213-0.892838-0.848835-0.692874-0.769177-0.554220-0.209086-0.644753-0.535493-0.762852-0.407660-0.648456-0.425122-0.583616-0.467945-0.484055-0.498850-0.456800-0.211662-0.229272-0.544345-0.162021-0.354179-0.316088-0.327327-0.070864-0.052907-0.3306400.000000
50%84633.5000000.0197050.0637840.177888-0.017852-0.055832-0.2743970.0392280.021803-0.049633-0.092069-0.0341350.137912-0.0134160.0511790.0492890.067772-0.065113-0.0032170.004422-0.062889-0.0290450.006744-0.0109150.0409740.018014-0.0522870.0010640.011119-0.2652710.000000
75%139334.2500001.3167070.8024371.0255290.7455660.6093490.3979280.5696380.3270230.5970960.4581290.7381430.6173930.6641480.4939250.6495890.5230950.4010340.5004360.4603670.1328340.1870950.5310170.1475030.4389530.3508020.2410820.0904910.077989-0.0430580.000000
max172792.0000002.45188822.0577299.38255816.71553734.09930923.91783744.05446120.00720815.59499523.74513612.0189137.8483924.56900910.5267665.8256547.0591329.2070595.0410695.57211339.42090427.20283910.50309022.5284124.0228667.5195893.46324612.15240122.62007278.2352721.000000
\n", "
" ], "text/plain": [ " Time V1 V2 V3 \\\n", "count 199364.000000 199364.000000 199364.000000 199364.000000 \n", "mean 94799.493936 0.000315 -0.002690 -0.001532 \n", "std 47499.835491 1.963554 1.657379 1.516716 \n", "min 0.000000 -46.855047 -63.344698 -33.680984 \n", "25% 54126.000000 -0.921539 -0.601213 -0.892838 \n", "50% 84633.500000 0.019705 0.063784 0.177888 \n", "75% 139334.250000 1.316707 0.802437 1.025529 \n", "max 172792.000000 2.451888 22.057729 9.382558 \n", "\n", " V4 V5 V6 V7 \\\n", "count 199364.000000 199364.000000 199364.000000 199364.000000 \n", "mean 0.000721 -0.001494 -0.000210 -0.000870 \n", "std 1.417138 1.368744 1.328673 1.226018 \n", "min -5.560118 -42.147898 -23.496714 -43.557242 \n", "25% -0.848835 -0.692874 -0.769177 -0.554220 \n", "50% -0.017852 -0.055832 -0.274397 0.039228 \n", "75% 0.745566 0.609349 0.397928 0.569638 \n", "max 16.715537 34.099309 23.917837 44.054461 \n", "\n", " V8 V9 V10 V11 \\\n", "count 199364.000000 199364.000000 199364.000000 199364.000000 \n", "mean -0.001980 0.000212 0.001357 -0.001039 \n", "std 1.212338 1.102021 1.092801 1.020027 \n", "min -73.216718 -13.434066 -24.588262 -4.797473 \n", "25% -0.209086 -0.644753 -0.535493 -0.762852 \n", "50% 0.021803 -0.049633 -0.092069 -0.034135 \n", "75% 0.327023 0.597096 0.458129 0.738143 \n", "max 20.007208 15.594995 23.745136 12.018913 \n", "\n", " V12 V13 V14 V15 \\\n", "count 199364.000000 199364.000000 199364.000000 199364.000000 \n", "mean -0.001565 0.000693 0.000137 0.000322 \n", "std 0.996526 0.997718 0.956938 0.916143 \n", "min -17.769143 -5.791881 -19.214325 -4.498945 \n", "25% -0.407660 -0.648456 -0.425122 -0.583616 \n", "50% 0.137912 -0.013416 0.051179 0.049289 \n", "75% 0.617393 0.664148 0.493925 0.649589 \n", "max 7.848392 4.569009 10.526766 5.825654 \n", "\n", " V16 V17 V18 V19 \\\n", "count 199364.000000 199364.000000 199364.000000 199364.000000 \n", "mean 0.000084 0.000292 -0.000134 0.000490 \n", "std 0.876131 0.852181 0.837556 0.814506 \n", "min -14.129855 -25.162799 -9.498746 -7.213527 \n", "25% -0.467945 -0.484055 -0.498850 -0.456800 \n", "50% 0.067772 -0.065113 -0.003217 0.004422 \n", "75% 0.523095 0.401034 0.500436 0.460367 \n", "max 7.059132 9.207059 5.041069 5.572113 \n", "\n", " V20 V21 V22 V23 \\\n", "count 199364.000000 199364.000000 199364.000000 199364.000000 \n", "mean 0.000430 -0.000014 -0.000022 -0.000258 \n", "std 0.770257 0.743450 0.727625 0.629145 \n", "min -23.646890 -34.830382 -10.933144 -44.807735 \n", "25% -0.211662 -0.229272 -0.544345 -0.162021 \n", "50% -0.062889 -0.029045 0.006744 -0.010915 \n", "75% 0.132834 0.187095 0.531017 0.147503 \n", "max 39.420904 27.202839 10.503090 22.528412 \n", "\n", " V24 V25 V26 V27 \\\n", "count 199364.000000 199364.000000 199364.000000 199364.000000 \n", "mean 0.000362 0.000395 -0.000094 -0.000027 \n", "std 0.605298 0.521175 0.481842 0.401042 \n", "min -2.822684 -10.295397 -2.534330 -22.565679 \n", "25% -0.354179 -0.316088 -0.327327 -0.070864 \n", "50% 0.040974 0.018014 -0.052287 0.001064 \n", "75% 0.438953 0.350802 0.241082 0.090491 \n", "max 4.022866 7.519589 3.463246 12.152401 \n", "\n", " V28 Amount Class \n", "count 199364.000000 199364.000000 199364.000000 \n", "mean 0.000015 0.001271 0.001731 \n", "std 0.324849 0.983948 0.041563 \n", "min -11.710896 -0.353229 0.000000 \n", "25% -0.052907 -0.330640 0.000000 \n", "50% 0.011119 -0.265271 0.000000 \n", "75% 0.077989 -0.043058 0.000000 \n", "max 22.620072 78.235272 1.000000 " ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_train, y_train], axis=1).describe()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Class\n", "0 199019\n", "1 345\n", "Name: count, dtype: int64" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_train, y_train], axis=1)['Class'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Statistical measures of the test dataset of whole data" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 85443 entries, 183484 to 240913\n", "Data columns (total 31 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Time 85443 non-null float64\n", " 1 V1 85443 non-null float64\n", " 2 V2 85443 non-null float64\n", " 3 V3 85443 non-null float64\n", " 4 V4 85443 non-null float64\n", " 5 V5 85443 non-null float64\n", " 6 V6 85443 non-null float64\n", " 7 V7 85443 non-null float64\n", " 8 V8 85443 non-null float64\n", " 9 V9 85443 non-null float64\n", " 10 V10 85443 non-null float64\n", " 11 V11 85443 non-null float64\n", " 12 V12 85443 non-null float64\n", " 13 V13 85443 non-null float64\n", " 14 V14 85443 non-null float64\n", " 15 V15 85443 non-null float64\n", " 16 V16 85443 non-null float64\n", " 17 V17 85443 non-null float64\n", " 18 V18 85443 non-null float64\n", " 19 V19 85443 non-null float64\n", " 20 V20 85443 non-null float64\n", " 21 V21 85443 non-null float64\n", " 22 V22 85443 non-null float64\n", " 23 V23 85443 non-null float64\n", " 24 V24 85443 non-null float64\n", " 25 V25 85443 non-null float64\n", " 26 V26 85443 non-null float64\n", " 27 V27 85443 non-null float64\n", " 28 V28 85443 non-null float64\n", " 29 Amount 85443 non-null float64\n", " 30 Class 85443 non-null int64 \n", "dtypes: float64(30), int64(1)\n", "memory usage: 20.9 MB\n" ] } ], "source": [ "pd.concat([X_test, y_test], axis=1).info()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
count85443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.00000085443.000000
mean94847.378896-0.0007340.0062770.003574-0.0016820.0034860.0004890.0020300.004620-0.000495-0.0031670.0024240.003652-0.001616-0.000319-0.000751-0.000195-0.0006820.000312-0.001144-0.0010040.0000330.0000520.000602-0.000845-0.0009220.0002200.000062-0.000036-0.0029660.001720
std47461.1205481.9473251.6370501.5151821.4129081.4067221.3406361.2625621.1512911.0906911.0795741.0223151.0054130.9895530.9624570.9133880.8765420.8426690.8396260.8129570.7724840.7132660.7211980.6133940.6064640.5215200.4831260.4096160.3419871.0364920.041443
min0.000000-56.407510-72.715728-48.325589-5.683171-113.743307-26.160506-28.215112-50.943369-9.481456-20.949192-4.568390-18.683715-3.888606-18.493773-4.391307-13.303888-22.883999-9.287832-6.938297-54.497720-22.665685-9.499423-32.828995-2.836627-8.696627-2.604551-9.793568-15.430084-0.3532290.000000
25%54354.000000-0.916858-0.591858-0.883828-0.848202-0.688280-0.766664-0.553479-0.207216-0.638926-0.535400-0.761716-0.400087-0.648761-0.426516-0.581015-0.468312-0.483139-0.498660-0.455027-0.211881-0.226184-0.537704-0.161490-0.355671-0.319736-0.326068-0.070797-0.053129-0.3312800.000000
50%84850.0000000.0132380.0701850.185047-0.024109-0.051627-0.2736860.0423430.023782-0.053821-0.094949-0.0291290.144948-0.0138030.0492480.0452910.062957-0.066955-0.0042450.002229-0.061529-0.0306870.006971-0.0117890.0409760.013508-0.0516950.0019840.011561-0.2652710.000000
75%139277.5000001.3132570.8066151.0311550.7377840.6180670.3998640.5724230.3283370.5973880.4431260.7435110.6206940.6578260.4919160.6471170.5236080.3967990.5014550.4552490.1336080.1848460.5236890.1479230.4410930.3506170.2406570.0922240.078900-0.0473560.000000
max172788.0000002.45493015.8769234.07916816.87534434.80166673.301626120.58949418.7488729.27237615.33174211.6692054.4063387.1268837.4395668.87774217.3151129.2535264.7123985.59197138.11720922.5797147.22015820.8033444.5845495.8261593.51734631.61219833.847808102.3622431.000000
\n", "
" ], "text/plain": [ " Time V1 V2 V3 V4 \\\n", "count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n", "mean 94847.378896 -0.000734 0.006277 0.003574 -0.001682 \n", "std 47461.120548 1.947325 1.637050 1.515182 1.412908 \n", "min 0.000000 -56.407510 -72.715728 -48.325589 -5.683171 \n", "25% 54354.000000 -0.916858 -0.591858 -0.883828 -0.848202 \n", "50% 84850.000000 0.013238 0.070185 0.185047 -0.024109 \n", "75% 139277.500000 1.313257 0.806615 1.031155 0.737784 \n", "max 172788.000000 2.454930 15.876923 4.079168 16.875344 \n", "\n", " V5 V6 V7 V8 V9 \\\n", "count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n", "mean 0.003486 0.000489 0.002030 0.004620 -0.000495 \n", "std 1.406722 1.340636 1.262562 1.151291 1.090691 \n", "min -113.743307 -26.160506 -28.215112 -50.943369 -9.481456 \n", "25% -0.688280 -0.766664 -0.553479 -0.207216 -0.638926 \n", "50% -0.051627 -0.273686 0.042343 0.023782 -0.053821 \n", "75% 0.618067 0.399864 0.572423 0.328337 0.597388 \n", "max 34.801666 73.301626 120.589494 18.748872 9.272376 \n", "\n", " V10 V11 V12 V13 V14 \\\n", "count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n", "mean -0.003167 0.002424 0.003652 -0.001616 -0.000319 \n", "std 1.079574 1.022315 1.005413 0.989553 0.962457 \n", "min -20.949192 -4.568390 -18.683715 -3.888606 -18.493773 \n", "25% -0.535400 -0.761716 -0.400087 -0.648761 -0.426516 \n", "50% -0.094949 -0.029129 0.144948 -0.013803 0.049248 \n", "75% 0.443126 0.743511 0.620694 0.657826 0.491916 \n", "max 15.331742 11.669205 4.406338 7.126883 7.439566 \n", "\n", " V15 V16 V17 V18 V19 \\\n", "count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n", "mean -0.000751 -0.000195 -0.000682 0.000312 -0.001144 \n", "std 0.913388 0.876542 0.842669 0.839626 0.812957 \n", "min -4.391307 -13.303888 -22.883999 -9.287832 -6.938297 \n", "25% -0.581015 -0.468312 -0.483139 -0.498660 -0.455027 \n", "50% 0.045291 0.062957 -0.066955 -0.004245 0.002229 \n", "75% 0.647117 0.523608 0.396799 0.501455 0.455249 \n", "max 8.877742 17.315112 9.253526 4.712398 5.591971 \n", "\n", " V20 V21 V22 V23 V24 \\\n", "count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n", "mean -0.001004 0.000033 0.000052 0.000602 -0.000845 \n", "std 0.772484 0.713266 0.721198 0.613394 0.606464 \n", "min -54.497720 -22.665685 -9.499423 -32.828995 -2.836627 \n", "25% -0.211881 -0.226184 -0.537704 -0.161490 -0.355671 \n", "50% -0.061529 -0.030687 0.006971 -0.011789 0.040976 \n", "75% 0.133608 0.184846 0.523689 0.147923 0.441093 \n", "max 38.117209 22.579714 7.220158 20.803344 4.584549 \n", "\n", " V25 V26 V27 V28 Amount \\\n", "count 85443.000000 85443.000000 85443.000000 85443.000000 85443.000000 \n", "mean -0.000922 0.000220 0.000062 -0.000036 -0.002966 \n", "std 0.521520 0.483126 0.409616 0.341987 1.036492 \n", "min -8.696627 -2.604551 -9.793568 -15.430084 -0.353229 \n", "25% -0.319736 -0.326068 -0.070797 -0.053129 -0.331280 \n", "50% 0.013508 -0.051695 0.001984 0.011561 -0.265271 \n", "75% 0.350617 0.240657 0.092224 0.078900 -0.047356 \n", "max 5.826159 3.517346 31.612198 33.847808 102.362243 \n", "\n", " Class \n", "count 85443.000000 \n", "mean 0.001720 \n", "std 0.041443 \n", "min 0.000000 \n", "25% 0.000000 \n", "50% 0.000000 \n", "75% 0.000000 \n", "max 1.000000 " ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_test, y_test], axis=1).describe()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Class\n", "0 85296\n", "1 147\n", "Name: count, dtype: int64" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_test, y_test], axis=1)['Class'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Splitting undersampled data into training and test datasets" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersample, y_undersample, test_size = 0.3, random_state = 0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Statistical measures of the training dataset of undersampled data" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 688 entries, 6870 to 106127\n", "Data columns (total 31 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Time 688 non-null float64\n", " 1 V1 688 non-null float64\n", " 2 V2 688 non-null float64\n", " 3 V3 688 non-null float64\n", " 4 V4 688 non-null float64\n", " 5 V5 688 non-null float64\n", " 6 V6 688 non-null float64\n", " 7 V7 688 non-null float64\n", " 8 V8 688 non-null float64\n", " 9 V9 688 non-null float64\n", " 10 V10 688 non-null float64\n", " 11 V11 688 non-null float64\n", " 12 V12 688 non-null float64\n", " 13 V13 688 non-null float64\n", " 14 V14 688 non-null float64\n", " 15 V15 688 non-null float64\n", " 16 V16 688 non-null float64\n", " 17 V17 688 non-null float64\n", " 18 V18 688 non-null float64\n", " 19 V19 688 non-null float64\n", " 20 V20 688 non-null float64\n", " 21 V21 688 non-null float64\n", " 22 V22 688 non-null float64\n", " 23 V23 688 non-null float64\n", " 24 V24 688 non-null float64\n", " 25 V25 688 non-null float64\n", " 26 V26 688 non-null float64\n", " 27 V27 688 non-null float64\n", " 28 V28 688 non-null float64\n", " 29 Amount 688 non-null float64\n", " 30 Class 688 non-null int64 \n", "dtypes: float64(30), int64(1)\n", "memory usage: 172.0 KB\n" ] } ], "source": [ "pd.concat([X_train_undersample, y_train_undersample], axis=1).info()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
count688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000688.000000
mean88957.739826-2.4538501.764205-3.4993492.201040-1.614046-0.742028-2.7203800.399640-1.253509-2.8498641.963640-3.099043-0.029291-3.531500-0.042676-2.076032-3.378747-1.1504370.3288870.1685200.467831-0.034725-0.020933-0.0712990.0078210.0197100.0842120.0225760.1083570.501453
std48118.2436165.3892053.7187646.0106123.1683264.2843521.8142525.6082594.7532252.2820004.3708732.7378544.5820211.0685954.6579900.9321283.4988596.0502582.4534621.2806091.1967032.7450631.1501161.4281990.5803090.6882260.4986090.9414910.4265841.4894270.500362
min406.000000-30.552380-20.984898-31.103685-3.421874-22.105532-14.425011-37.060311-37.353443-11.126624-23.228255-2.279476-18.431131-3.184202-19.214325-4.498945-13.563273-25.162799-9.498746-3.602657-5.244333-16.922016-8.887017-19.254328-2.082546-4.781606-1.407558-7.263482-1.869290-0.3532290.000000
25%47336.750000-3.004735-0.166482-5.049001-0.148394-1.859243-1.582428-3.103817-0.202836-2.205996-4.758711-0.020195-5.643631-0.763081-6.767749-0.616305-3.612856-5.277726-1.829426-0.447025-0.206394-0.148616-0.542923-0.244385-0.409463-0.331382-0.317043-0.067511-0.057033-0.3460730.000000
50%79364.500000-0.8687111.030021-1.4412051.287384-0.448367-0.652911-0.6957710.189872-0.678169-0.9388241.160274-0.785118-0.019448-1.051034-0.022344-0.632427-0.488137-0.3689000.2138120.0131680.163742-0.017305-0.004737-0.0034310.024723-0.0292540.0423750.031353-0.2663711.000000
75%135949.5000000.9496872.8737920.3113614.1841370.4391560.0781170.2222340.9432070.189021-0.0236763.6143830.2837240.7028110.0831010.6128320.2912460.2671120.3349270.9273480.4200880.7123720.5825700.1927660.3682300.3758650.3220360.4533710.2122530.0465391.000000
max172743.0000002.30887819.1672392.92764511.92751220.27772812.12895026.23772220.0072087.16887810.42350512.0189131.9481262.8992033.4424222.3107103.1396566.7393843.7903165.22834216.43692027.2028395.77408717.6066371.1026363.4107422.7452613.0523581.47198829.7991371.000000
\n", "
" ], "text/plain": [ " Time V1 V2 V3 V4 \\\n", "count 688.000000 688.000000 688.000000 688.000000 688.000000 \n", "mean 88957.739826 -2.453850 1.764205 -3.499349 2.201040 \n", "std 48118.243616 5.389205 3.718764 6.010612 3.168326 \n", "min 406.000000 -30.552380 -20.984898 -31.103685 -3.421874 \n", "25% 47336.750000 -3.004735 -0.166482 -5.049001 -0.148394 \n", "50% 79364.500000 -0.868711 1.030021 -1.441205 1.287384 \n", "75% 135949.500000 0.949687 2.873792 0.311361 4.184137 \n", "max 172743.000000 2.308878 19.167239 2.927645 11.927512 \n", "\n", " V5 V6 V7 V8 V9 V10 \\\n", "count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n", "mean -1.614046 -0.742028 -2.720380 0.399640 -1.253509 -2.849864 \n", "std 4.284352 1.814252 5.608259 4.753225 2.282000 4.370873 \n", "min -22.105532 -14.425011 -37.060311 -37.353443 -11.126624 -23.228255 \n", "25% -1.859243 -1.582428 -3.103817 -0.202836 -2.205996 -4.758711 \n", "50% -0.448367 -0.652911 -0.695771 0.189872 -0.678169 -0.938824 \n", "75% 0.439156 0.078117 0.222234 0.943207 0.189021 -0.023676 \n", "max 20.277728 12.128950 26.237722 20.007208 7.168878 10.423505 \n", "\n", " V11 V12 V13 V14 V15 V16 \\\n", "count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n", "mean 1.963640 -3.099043 -0.029291 -3.531500 -0.042676 -2.076032 \n", "std 2.737854 4.582021 1.068595 4.657990 0.932128 3.498859 \n", "min -2.279476 -18.431131 -3.184202 -19.214325 -4.498945 -13.563273 \n", "25% -0.020195 -5.643631 -0.763081 -6.767749 -0.616305 -3.612856 \n", "50% 1.160274 -0.785118 -0.019448 -1.051034 -0.022344 -0.632427 \n", "75% 3.614383 0.283724 0.702811 0.083101 0.612832 0.291246 \n", "max 12.018913 1.948126 2.899203 3.442422 2.310710 3.139656 \n", "\n", " V17 V18 V19 V20 V21 V22 \\\n", "count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n", "mean -3.378747 -1.150437 0.328887 0.168520 0.467831 -0.034725 \n", "std 6.050258 2.453462 1.280609 1.196703 2.745063 1.150116 \n", "min -25.162799 -9.498746 -3.602657 -5.244333 -16.922016 -8.887017 \n", "25% -5.277726 -1.829426 -0.447025 -0.206394 -0.148616 -0.542923 \n", "50% -0.488137 -0.368900 0.213812 0.013168 0.163742 -0.017305 \n", "75% 0.267112 0.334927 0.927348 0.420088 0.712372 0.582570 \n", "max 6.739384 3.790316 5.228342 16.436920 27.202839 5.774087 \n", "\n", " V23 V24 V25 V26 V27 V28 \\\n", "count 688.000000 688.000000 688.000000 688.000000 688.000000 688.000000 \n", "mean -0.020933 -0.071299 0.007821 0.019710 0.084212 0.022576 \n", "std 1.428199 0.580309 0.688226 0.498609 0.941491 0.426584 \n", "min -19.254328 -2.082546 -4.781606 -1.407558 -7.263482 -1.869290 \n", "25% -0.244385 -0.409463 -0.331382 -0.317043 -0.067511 -0.057033 \n", "50% -0.004737 -0.003431 0.024723 -0.029254 0.042375 0.031353 \n", "75% 0.192766 0.368230 0.375865 0.322036 0.453371 0.212253 \n", "max 17.606637 1.102636 3.410742 2.745261 3.052358 1.471988 \n", "\n", " Amount Class \n", "count 688.000000 688.000000 \n", "mean 0.108357 0.501453 \n", "std 1.489427 0.500362 \n", "min -0.353229 0.000000 \n", "25% -0.346073 0.000000 \n", "50% -0.266371 1.000000 \n", "75% 0.046539 1.000000 \n", "max 29.799137 1.000000 " ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_train_undersample, y_train_undersample], axis=1).describe()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Class\n", "1 345\n", "0 343\n", "Name: count, dtype: int64" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_train_undersample, y_train_undersample], axis=1)['Class'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Statistical measures of the test dataset of undersampled data" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 296 entries, 102782 to 26982\n", "Data columns (total 31 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Time 296 non-null float64\n", " 1 V1 296 non-null float64\n", " 2 V2 296 non-null float64\n", " 3 V3 296 non-null float64\n", " 4 V4 296 non-null float64\n", " 5 V5 296 non-null float64\n", " 6 V6 296 non-null float64\n", " 7 V7 296 non-null float64\n", " 8 V8 296 non-null float64\n", " 9 V9 296 non-null float64\n", " 10 V10 296 non-null float64\n", " 11 V11 296 non-null float64\n", " 12 V12 296 non-null float64\n", " 13 V13 296 non-null float64\n", " 14 V14 296 non-null float64\n", " 15 V15 296 non-null float64\n", " 16 V16 296 non-null float64\n", " 17 V17 296 non-null float64\n", " 18 V18 296 non-null float64\n", " 19 V19 296 non-null float64\n", " 20 V20 296 non-null float64\n", " 21 V21 296 non-null float64\n", " 22 V22 296 non-null float64\n", " 23 V23 296 non-null float64\n", " 24 V24 296 non-null float64\n", " 25 V25 296 non-null float64\n", " 26 V26 296 non-null float64\n", " 27 V27 296 non-null float64\n", " 28 V28 296 non-null float64\n", " 29 Amount 296 non-null float64\n", " 30 Class 296 non-null int64 \n", "dtypes: float64(30), int64(1)\n", "memory usage: 74.0 KB\n" ] } ], "source": [ "pd.concat([X_test_undersample, y_test_undersample], axis=1).info()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
count296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000296.000000
mean88355.706081-2.2942341.948429-3.5409902.373844-1.561363-0.613913-2.8914300.035986-1.314160-2.9048301.921330-3.055022-0.058815-3.362305-0.057163-1.983480-3.185857-1.0536730.3597810.2528600.1219400.0892000.067653-0.0603390.0051710.0159170.0133390.0398870.0050120.496622
std48614.0110435.8395143.8356046.6783243.2373484.2904181.7238706.5909235.1573152.4147404.8757522.7413644.7151120.9961574.6600051.0150193.4883745.8042682.3099401.2653031.1991742.9178281.2171110.8712800.5533520.6322840.4632521.1926930.4241380.8084470.500835
min472.000000-29.876366-8.402154-30.558697-3.085355-21.665654-5.773192-43.557242-41.044261-13.434066-24.588262-1.937109-18.683715-3.076318-17.620634-2.992430-14.129855-22.541652-9.090892-3.681904-3.493050-22.797604-8.887017-5.988806-1.690377-2.079928-1.104535-7.263482-1.429517-0.3532290.000000
25%48520.000000-2.736113-0.105197-5.4178180.057290-1.559190-1.431466-2.835885-0.147139-2.345829-4.445615-0.072236-5.340188-0.752422-6.363108-0.694497-3.304713-5.358990-1.719376-0.397503-0.188506-0.166957-0.491502-0.202399-0.407468-0.330734-0.315317-0.054283-0.047520-0.3486920.000000
50%83038.000000-0.6796690.947084-1.2559301.295388-0.355709-0.594254-0.5688480.146018-0.661503-0.7185491.218066-0.6498340.012057-0.8691580.006640-0.575661-0.423941-0.2032090.2416340.0303650.0859000.015825-0.0345890.0071650.059396-0.0118400.0613740.033726-0.3036330.000000
75%130068.5000001.0840182.7216120.4695844.3189820.4485660.1470380.2961900.7593350.224591-0.0306223.5423360.2889740.5758990.0741000.5656820.3466870.2743710.3167741.0108840.3633870.5773550.5718070.2079590.3818900.3561600.3542940.3919690.1757440.0225311.000000
max171578.0000002.25527422.0577293.94033712.1146729.8805646.4741153.79190719.5877732.6909593.24508611.1524911.8790383.6855702.7043762.4713582.5942666.4436492.5918464.85125511.05900427.2028398.3619855.4662301.0995092.1560421.2077312.7065661.7793645.6636101.000000
\n", "
" ], "text/plain": [ " Time V1 V2 V3 V4 \\\n", "count 296.000000 296.000000 296.000000 296.000000 296.000000 \n", "mean 88355.706081 -2.294234 1.948429 -3.540990 2.373844 \n", "std 48614.011043 5.839514 3.835604 6.678324 3.237348 \n", "min 472.000000 -29.876366 -8.402154 -30.558697 -3.085355 \n", "25% 48520.000000 -2.736113 -0.105197 -5.417818 0.057290 \n", "50% 83038.000000 -0.679669 0.947084 -1.255930 1.295388 \n", "75% 130068.500000 1.084018 2.721612 0.469584 4.318982 \n", "max 171578.000000 2.255274 22.057729 3.940337 12.114672 \n", "\n", " V5 V6 V7 V8 V9 V10 \\\n", "count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n", "mean -1.561363 -0.613913 -2.891430 0.035986 -1.314160 -2.904830 \n", "std 4.290418 1.723870 6.590923 5.157315 2.414740 4.875752 \n", "min -21.665654 -5.773192 -43.557242 -41.044261 -13.434066 -24.588262 \n", "25% -1.559190 -1.431466 -2.835885 -0.147139 -2.345829 -4.445615 \n", "50% -0.355709 -0.594254 -0.568848 0.146018 -0.661503 -0.718549 \n", "75% 0.448566 0.147038 0.296190 0.759335 0.224591 -0.030622 \n", "max 9.880564 6.474115 3.791907 19.587773 2.690959 3.245086 \n", "\n", " V11 V12 V13 V14 V15 V16 \\\n", "count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n", "mean 1.921330 -3.055022 -0.058815 -3.362305 -0.057163 -1.983480 \n", "std 2.741364 4.715112 0.996157 4.660005 1.015019 3.488374 \n", "min -1.937109 -18.683715 -3.076318 -17.620634 -2.992430 -14.129855 \n", "25% -0.072236 -5.340188 -0.752422 -6.363108 -0.694497 -3.304713 \n", "50% 1.218066 -0.649834 0.012057 -0.869158 0.006640 -0.575661 \n", "75% 3.542336 0.288974 0.575899 0.074100 0.565682 0.346687 \n", "max 11.152491 1.879038 3.685570 2.704376 2.471358 2.594266 \n", "\n", " V17 V18 V19 V20 V21 V22 \\\n", "count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n", "mean -3.185857 -1.053673 0.359781 0.252860 0.121940 0.089200 \n", "std 5.804268 2.309940 1.265303 1.199174 2.917828 1.217111 \n", "min -22.541652 -9.090892 -3.681904 -3.493050 -22.797604 -8.887017 \n", "25% -5.358990 -1.719376 -0.397503 -0.188506 -0.166957 -0.491502 \n", "50% -0.423941 -0.203209 0.241634 0.030365 0.085900 0.015825 \n", "75% 0.274371 0.316774 1.010884 0.363387 0.577355 0.571807 \n", "max 6.443649 2.591846 4.851255 11.059004 27.202839 8.361985 \n", "\n", " V23 V24 V25 V26 V27 V28 \\\n", "count 296.000000 296.000000 296.000000 296.000000 296.000000 296.000000 \n", "mean 0.067653 -0.060339 0.005171 0.015917 0.013339 0.039887 \n", "std 0.871280 0.553352 0.632284 0.463252 1.192693 0.424138 \n", "min -5.988806 -1.690377 -2.079928 -1.104535 -7.263482 -1.429517 \n", "25% -0.202399 -0.407468 -0.330734 -0.315317 -0.054283 -0.047520 \n", "50% -0.034589 0.007165 0.059396 -0.011840 0.061374 0.033726 \n", "75% 0.207959 0.381890 0.356160 0.354294 0.391969 0.175744 \n", "max 5.466230 1.099509 2.156042 1.207731 2.706566 1.779364 \n", "\n", " Amount Class \n", "count 296.000000 296.000000 \n", "mean 0.005012 0.496622 \n", "std 0.808447 0.500835 \n", "min -0.353229 0.000000 \n", "25% -0.348692 0.000000 \n", "50% -0.303633 0.000000 \n", "75% 0.022531 1.000000 \n", "max 5.663610 1.000000 " ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_test_undersample, y_test_undersample], axis=1).describe()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Class\n", "0 149\n", "1 147\n", "Name: count, dtype: int64" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([X_test_undersample, y_test_undersample], axis=1)['Class'].value_counts()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }