PhishGuardian/backend/.ipynb_checkpoints/ML-checkpoint.ipynb

{
 "cells": [
  {
   "metadata": {
    "jupyter": {
     "is_executing": true
    },
    "ExecuteTime": {
     "start_time": "2024-06-05T20:03:23.481431Z"
    }
   },
   "cell_type": "code",
   "source": [
    "%pip install pandas\n",
    "%pip install matplotlib\n",
    "%pip install nltk\n",
    "%pip install wordcloud\n",
    "%pip install scikit-learn==1.3.2\n",
    "%pip install scikit-fuzzy==0.4.2\n",
    "# Import pakietów\n",
    "import nltk\n",
    "nltk.download('punkt')\n",
    "nltk.download('stopwords')\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import re\n",
    "import string\n",
    "from wordcloud import WordCloud\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.stem import PorterStemmer\n",
    "from nltk.tokenize import word_tokenize\n",
    "import joblib\n",
    "import pickle"
   ],
   "id": "b313cab7d5cc49c0",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pandas in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.2.2)\n",
      "Requirement already satisfied: numpy>=1.26.0 in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (1.26.4)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\alicj\\appdata\\roaming\\python\\python312\\site-packages (from pandas) (2.9.0.post0)\n",
      "Requirement already satisfied: pytz>=2020.1 in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\alicj\\appdata\\roaming\\python\\python312\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Załaduj dane\n",
    "data_path = \"joined_data.csv\"\n",
    "data = pd.read_csv(data_path)"
   ],
   "id": "768266dbb79c5e9d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(data.head())",
   "id": "ee08266d5c30627b"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(data.info())",
   "id": "1798f605e33fe5e5"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data",
   "id": "b4f43d913b92485b"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Usuwamy NaN",
   "id": "e3bf0f04a2be4e1a"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data.dropna(inplace=True)",
   "id": "71a6bbebdb0dccd4"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Usuwamy puste wiadomości i wiadomości zawierające jedynie \"\\n\"",
   "id": "b7fca25d67381cdd"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data = data[data['Body'] != '\\n']",
   "id": "72d84bf6c1e7023a"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data = data[data['Body'] != 'empty']",
   "id": "7c94c4dca6c4cdae"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data.reset_index(drop=True, inplace=True)",
   "id": "7e6fd3f8014498f3"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data",
   "id": "a0c33f82a936c59"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Sprawdźmy rozkład targetów\n",
    "print(data['Label'].value_counts())"
   ],
   "id": "19af5936d0cfeba2"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Analiza długości wiadomości",
   "id": "96c861e2655312cb"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "def get_len(row):\n",
    "    try:\n",
    "        return len(row)\n",
    "    except:\n",
    "        return row"
   ],
   "id": "e1ec1ed8aa7c856d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data['message_length'] = data['Body'].apply(get_len)",
   "id": "63c023f34d234f3e"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data.sort_values(by='message_length')",
   "id": "d4fd0e2dcc2bfee9"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Jedna wiadomość jest bardzo długa 17085626",
   "id": "e62112260ebc17f0"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data['message_length'].value_counts()",
   "id": "7c369131e3c91ce3"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Histogram długości wiadomości dla każdej kategorii - ograniczamy do 200.000 znaków celem wyświetlenia histogramów\n",
    "hist_data = data[data['message_length'] < 200000]\n",
    "plt.figure(figsize=(10, 6))\n",
    "hist_data[hist_data['Label'] == 0]['message_length'].hist(bins=100, alpha=0.6, label='Not Spam')\n",
    "hist_data[hist_data['Label'] == 1]['message_length'].hist(bins=100, alpha=0.6, label='Spam')\n",
    "plt.legend()\n",
    "plt.xlabel('Długość wiadomości')\n",
    "plt.ylabel('Liczba wiadomości')\n",
    "plt.title('Rozkład długości wiadomości')\n",
    "plt.show()"
   ],
   "id": "b6b509692fd7c541"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Ograniczamy jeszcze bardziej ",
   "id": "7182d6a1d6600c2"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Histogram długości wiadomości dla każdej kategorii - ograniczamy do 10000 znaków celem wyświetlenia histogramów\n",
    "hist_data = data[data['message_length'] < 10000]\n",
    "plt.figure(figsize=(10, 6))\n",
    "hist_data[hist_data['Label'] == 0]['message_length'].hist(bins=100, alpha=0.6, label='Not Spam')\n",
    "hist_data[hist_data['Label'] == 1]['message_length'].hist(bins=100, alpha=0.6, label='Spam')\n",
    "plt.legend()\n",
    "plt.xlabel('Długość wiadomości')\n",
    "plt.ylabel('Liczba wiadomości')\n",
    "plt.title('Rozkład długości wiadomości')\n",
    "plt.show()"
   ],
   "id": "962efe0bd652ecdb"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Można zauważyć, że trudno odróżnić widomości po samej długości. W tym celu należy skorzystać z bardziej zaawansowanych metod.",
   "id": "eaa483deb9c81942"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Przetwarzanie tekstu",
   "id": "6e0ee5fccf308cd1"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data",
   "id": "50c0131db25859cb"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "stop_words = set(stopwords.words('english'))\n",
    "ps = PorterStemmer()\n",
    "\n",
    "def preprocess_text(text):\n",
    "    # Usuwanie znaków specjalnych i tokenizacja\n",
    "    text = re.sub(r'\\d+', '', text)\n",
    "    text = text.translate(str.maketrans('', '', string.punctuation))\n",
    "    words = word_tokenize(text)\n",
    "    # Usuwanie stopwords i stemming\n",
    "    words = [ps.stem(word) for word in words if word.lower() not in stop_words]\n",
    "    return \" \".join(words)"
   ],
   "id": "c32c52a7b2575a3b"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Ten proces jest czasochłonny",
   "id": "5953cb974349cb33"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data['processed_message'] = data['Body'].apply(preprocess_text)",
   "id": "89b8cdeaa9da5c2d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data.head()",
   "id": "ccce395ac94c39a1"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "data['processed_message']",
   "id": "7ce382be7bcdff2c"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Analiza słów za pomocą WordCloud\n",
    "spam_words = ' '.join(list(data[data['Label'] == 1]['processed_message']))\n",
    "not_spam_words = ' '.join(list(data[data['Label'] == 0]['processed_message']))"
   ],
   "id": "dc456d793b576f7"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "plt.figure(figsize=(10, 6))\n",
    "wordcloud_spam = WordCloud(width=800, height=400).generate(spam_words)\n",
    "plt.imshow(wordcloud_spam, interpolation='bilinear')\n",
    "plt.axis('off')\n",
    "plt.title('Word Cloud dla Spam')\n",
    "plt.show()"
   ],
   "id": "c9d7d9c9f4ae91ed"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "plt.figure(figsize=(10, 6))\n",
    "wordcloud_not_spam = WordCloud(width=800, height=400).generate(not_spam_words)\n",
    "plt.imshow(wordcloud_not_spam, interpolation='bilinear')\n",
    "plt.axis('off')\n",
    "plt.title('Word Cloud dla Not Spam')\n",
    "plt.show()"
   ],
   "id": "d954e01a1d0b3a97"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Budowa modelu klasyfikacyjnego",
   "id": "743000c7d99b8a85"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Zamiana tekstu na wektory\n",
    "vectorizer = CountVectorizer()\n",
    "X = vectorizer.fit_transform(data['processed_message'])\n",
    "y = data['Label']"
   ],
   "id": "7b3ba8e5b035cdc0"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Podział na zbiór treningowy i testowy\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
   ],
   "id": "5d66dcf506f4f399"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Trenowanie modelu Naiwnego Bayesa\n",
    "model_NB = MultinomialNB()\n",
    "model_NB.fit(X_train, y_train)"
   ],
   "id": "b3c2a6673c718301"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Predykcja i ocena Naiwny Bayes\n",
    "y_pred_NB = model_NB.predict(X_test)\n",
    "accuracy_NB = accuracy_score(y_test, y_pred_NB)\n",
    "classification_rep_NB = classification_report(y_test, y_pred_NB)\n",
    "confusion_matrix_NB = confusion_matrix(y_test, y_pred_NB)"
   ],
   "id": "82f18edc9161422a"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "accuracy_NB",
   "id": "a629b6b89d5cdf34"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(classification_rep_NB)",
   "id": "53c0cf3dc8aa02bc"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(confusion_matrix_NB)",
   "id": "9b915d02828de60"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Trening Drzewa Decyzyjnego (DT)",
   "id": "160da18f95c142a0"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Parametry domyślne\n",
    "model_DT = DecisionTreeClassifier(criterion= 'gini',\n",
    "                                  max_depth= None,\n",
    "                                  min_samples_leaf= 1,\n",
    "                                  min_samples_split= 2,\n",
    "                                  splitter= 'best')\n",
    "model_DT.fit(X_train, y_train)"
   ],
   "id": "8720ed4fd0ed5c72"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Predykcja i ocena DT\n",
    "y_pred_DT = model_DT.predict(X_test)\n",
    "accuracy_DT = accuracy_score(y_test, y_pred_DT)\n",
    "classification_rep_DT = classification_report(y_test, y_pred_DT)\n",
    "confusion_matrix_DT = confusion_matrix(y_test, y_pred_DT)"
   ],
   "id": "7aee079d59bdd4eb"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "accuracy_DT",
   "id": "57ac5a3ffe724fd5"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(classification_rep_DT)",
   "id": "ed8955dc5d5cdeaf"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(confusion_matrix_DT)",
   "id": "3ebfee20eb06e8cc"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Las losowy",
   "id": "85d3dc4e44a2a4b3"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "model_RF = RandomForestClassifier(n_estimators= 100,\n",
    "                                  bootstrap= True,\n",
    "                                  ccp_alpha= 0.0,\n",
    "                                  criterion= 'gini',\n",
    "                                  max_depth= None,\n",
    "                                  min_samples_leaf= 1,\n",
    "                                  min_samples_split= 2,\n",
    "                                  random_state=123)\n",
    "model_RF.fit(X_train, y_train)"
   ],
   "id": "6f454235f54aa9cc"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Predykcja i ocena RF\n",
    "y_pred_RF = model_RF.predict(X_test)\n",
    "accuracy_RF = accuracy_score(y_test, y_pred_RF)\n",
    "classification_rep_RF = classification_report(y_test, y_pred_RF)\n",
    "confusion_matrix_RF = confusion_matrix(y_test, y_pred_RF)"
   ],
   "id": "23d68d066dc47f9"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "accuracy_RF",
   "id": "55789560bb43f9b8"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(classification_rep_RF)",
   "id": "d15d57c467b94bad"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(confusion_matrix_RF)",
   "id": "477ea9a19dbe7389"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Najlepszym modelem okazał się Las losowy - lepiej sklasyfikować spam jako wiadomość nie będącą spamem niż odwrotnie. \n",
    "# Dlatego wybieramy RF, a nie NB."
   ],
   "id": "9c3308c811b9d014"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Teraz dokonamy treningu na pełnych danych i zapiszemy model celem wykorzystania na danych rzeczywistych w późniejszej \n",
    "# aplikacji."
   ],
   "id": "81f08fa14ba4daf5"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "model_RF_full = RandomForestClassifier(n_estimators= 100,\n",
    "                                  bootstrap= True,\n",
    "                                  ccp_alpha= 0.0,\n",
    "                                  criterion= 'gini',\n",
    "                                  max_depth= None,\n",
    "                                  min_samples_leaf= 1,\n",
    "                                  min_samples_split= 2,\n",
    "                                  random_state=123)"
   ],
   "id": "7f580653f470d7af"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "model_RF_full.fit(X, y)",
   "id": "f75fc9a4d4746e5a"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Predykcja i ocena RF\n",
    "y_pred_RF_full = model_RF_full.predict(X)\n",
    "accuracy_RF_full = accuracy_score(y, y_pred_RF_full)\n",
    "classification_rep_RF_full = classification_report(y, y_pred_RF_full)\n",
    "confusion_matrix_RF_full = confusion_matrix(y, y_pred_RF_full)"
   ],
   "id": "3d77bed327ac2fa1"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "accuracy_RF_full",
   "id": "a76a53da77128562"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(classification_rep_RF_full)",
   "id": "9a66104fd13572f8"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "print(confusion_matrix_RF_full)",
   "id": "823635f2315ecf05"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "model_RF_full",
   "id": "d0136f7b9f6344c4"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Zapisz model i vectorizer\n",
    "joblib.dump(model_RF_full, 'spam_classifier_model.pkl')\n",
    "joblib.dump(vectorizer, 'vectorizer.pkl')"
   ],
   "id": "e02e9031d10617f6"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Uwaga, ważna jest zgodność wersji scikita i joblib tutaj i w środowisku aplikacji",
   "id": "2ac5943e18571301"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "pip freeze | findstr scikit",
   "id": "a238743e07978f4"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Jak instalować?",
   "id": "a64099b8c61a884"
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "d99c1dbe",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T16:57:22.800834Z",
     "start_time": "2024-06-05T16:57:22.798725Z"
    }
   },
   "outputs": [],
   "source": [
    "# Np. tak\n",
    "# pip install scikit-learn==1.3.2"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
Adding machine learning to analyze email content. Updating documentation 2024-06-08 11:04:41 +02:00			`{`
			`"cells": [`
			`{`
			`"metadata": {`
			`"jupyter": {`
			`"is_executing": true`
			`},`
			`"ExecuteTime": {`
			`"start_time": "2024-06-05T20:03:23.481431Z"`
			`}`
			`},`
			`"cell_type": "code",`
			`"source": [`
			`"%pip install pandas\n",`
			`"%pip install matplotlib\n",`
			`"%pip install nltk\n",`
			`"%pip install wordcloud\n",`
			`"%pip install scikit-learn==1.3.2\n",`
			`"%pip install scikit-fuzzy==0.4.2\n",`
			`"# Import pakietów\n",`
			`"import nltk\n",`
			`"nltk.download('punkt')\n",`
			`"nltk.download('stopwords')\n",`
			`"import pandas as pd\n",`
			`"import matplotlib.pyplot as plt\n",`
			`"import re\n",`
			`"import string\n",`
			`"from wordcloud import WordCloud\n",`
			`"from sklearn.feature_extraction.text import CountVectorizer\n",`
			`"from sklearn.model_selection import train_test_split\n",`
			`"from sklearn.naive_bayes import MultinomialNB\n",`
			`"from sklearn.ensemble import RandomForestClassifier\n",`
			`"from sklearn.tree import DecisionTreeClassifier\n",`
			`"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",`
			`"from nltk.corpus import stopwords\n",`
			`"from nltk.stem import PorterStemmer\n",`
			`"from nltk.tokenize import word_tokenize\n",`
			`"import joblib\n",`
			`"import pickle"`
			`],`
			`"id": "b313cab7d5cc49c0",`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Requirement already satisfied: pandas in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (2.2.2)\n",`
			`"Requirement already satisfied: numpy>=1.26.0 in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (1.26.4)\n",`
			`"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\alicj\\appdata\\roaming\\python\\python312\\site-packages (from pandas) (2.9.0.post0)\n",`
			`"Requirement already satisfied: pytz>=2020.1 in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",`
			`"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\alicj\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas) (2024.1)\n",`
			`"Requirement already satisfied: six>=1.5 in c:\\users\\alicj\\appdata\\roaming\\python\\python312\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",`
			`"Note: you may need to restart the kernel to use updated packages.\n"`
			`]`
			`}`
			`],`
			`"execution_count": null`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Załaduj dane\n",`
			`"data_path = \"joined_data.csv\"\n",`
			`"data = pd.read_csv(data_path)"`
			`],`
			`"id": "768266dbb79c5e9d"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(data.head())",`
			`"id": "ee08266d5c30627b"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(data.info())",`
			`"id": "1798f605e33fe5e5"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data",`
			`"id": "b4f43d913b92485b"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Usuwamy NaN",`
			`"id": "e3bf0f04a2be4e1a"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data.dropna(inplace=True)",`
			`"id": "71a6bbebdb0dccd4"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Usuwamy puste wiadomości i wiadomości zawierające jedynie \"\\n\"",`
			`"id": "b7fca25d67381cdd"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data = data[data['Body'] != '\\n']",`
			`"id": "72d84bf6c1e7023a"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data = data[data['Body'] != 'empty']",`
			`"id": "7c94c4dca6c4cdae"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data.reset_index(drop=True, inplace=True)",`
			`"id": "7e6fd3f8014498f3"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data",`
			`"id": "a0c33f82a936c59"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Sprawdźmy rozkład targetów\n",`
			`"print(data['Label'].value_counts())"`
			`],`
			`"id": "19af5936d0cfeba2"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Analiza długości wiadomości",`
			`"id": "96c861e2655312cb"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"def get_len(row):\n",`
			`" try:\n",`
			`" return len(row)\n",`
			`" except:\n",`
			`" return row"`
			`],`
			`"id": "e1ec1ed8aa7c856d"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data['message_length'] = data['Body'].apply(get_len)",`
			`"id": "63c023f34d234f3e"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data.sort_values(by='message_length')",`
			`"id": "d4fd0e2dcc2bfee9"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Jedna wiadomość jest bardzo długa 17085626",`
			`"id": "e62112260ebc17f0"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data['message_length'].value_counts()",`
			`"id": "7c369131e3c91ce3"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Histogram długości wiadomości dla każdej kategorii - ograniczamy do 200.000 znaków celem wyświetlenia histogramów\n",`
			`"hist_data = data[data['message_length'] < 200000]\n",`
			`"plt.figure(figsize=(10, 6))\n",`
			`"hist_data[hist_data['Label'] == 0]['message_length'].hist(bins=100, alpha=0.6, label='Not Spam')\n",`
			`"hist_data[hist_data['Label'] == 1]['message_length'].hist(bins=100, alpha=0.6, label='Spam')\n",`
			`"plt.legend()\n",`
			`"plt.xlabel('Długość wiadomości')\n",`
			`"plt.ylabel('Liczba wiadomości')\n",`
			`"plt.title('Rozkład długości wiadomości')\n",`
			`"plt.show()"`
			`],`
			`"id": "b6b509692fd7c541"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Ograniczamy jeszcze bardziej ",`
			`"id": "7182d6a1d6600c2"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Histogram długości wiadomości dla każdej kategorii - ograniczamy do 10000 znaków celem wyświetlenia histogramów\n",`
			`"hist_data = data[data['message_length'] < 10000]\n",`
			`"plt.figure(figsize=(10, 6))\n",`
			`"hist_data[hist_data['Label'] == 0]['message_length'].hist(bins=100, alpha=0.6, label='Not Spam')\n",`
			`"hist_data[hist_data['Label'] == 1]['message_length'].hist(bins=100, alpha=0.6, label='Spam')\n",`
			`"plt.legend()\n",`
			`"plt.xlabel('Długość wiadomości')\n",`
			`"plt.ylabel('Liczba wiadomości')\n",`
			`"plt.title('Rozkład długości wiadomości')\n",`
			`"plt.show()"`
			`],`
			`"id": "962efe0bd652ecdb"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Można zauważyć, że trudno odróżnić widomości po samej długości. W tym celu należy skorzystać z bardziej zaawansowanych metod.",`
			`"id": "eaa483deb9c81942"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Przetwarzanie tekstu",`
			`"id": "6e0ee5fccf308cd1"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data",`
			`"id": "50c0131db25859cb"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"stop_words = set(stopwords.words('english'))\n",`
			`"ps = PorterStemmer()\n",`
			`"\n",`
			`"def preprocess_text(text):\n",`
			`" # Usuwanie znaków specjalnych i tokenizacja\n",`
			`" text = re.sub(r'\\d+', '', text)\n",`
			`" text = text.translate(str.maketrans('', '', string.punctuation))\n",`
			`" words = word_tokenize(text)\n",`
			`" # Usuwanie stopwords i stemming\n",`
			`" words = [ps.stem(word) for word in words if word.lower() not in stop_words]\n",`
			`" return \" \".join(words)"`
			`],`
			`"id": "c32c52a7b2575a3b"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Ten proces jest czasochłonny",`
			`"id": "5953cb974349cb33"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data['processed_message'] = data['Body'].apply(preprocess_text)",`
			`"id": "89b8cdeaa9da5c2d"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data.head()",`
			`"id": "ccce395ac94c39a1"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "data['processed_message']",`
			`"id": "7ce382be7bcdff2c"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Analiza słów za pomocą WordCloud\n",`
			`"spam_words = ' '.join(list(data[data['Label'] == 1]['processed_message']))\n",`
			`"not_spam_words = ' '.join(list(data[data['Label'] == 0]['processed_message']))"`
			`],`
			`"id": "dc456d793b576f7"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"plt.figure(figsize=(10, 6))\n",`
			`"wordcloud_spam = WordCloud(width=800, height=400).generate(spam_words)\n",`
			`"plt.imshow(wordcloud_spam, interpolation='bilinear')\n",`
			`"plt.axis('off')\n",`
			`"plt.title('Word Cloud dla Spam')\n",`
			`"plt.show()"`
			`],`
			`"id": "c9d7d9c9f4ae91ed"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"plt.figure(figsize=(10, 6))\n",`
			`"wordcloud_not_spam = WordCloud(width=800, height=400).generate(not_spam_words)\n",`
			`"plt.imshow(wordcloud_not_spam, interpolation='bilinear')\n",`
			`"plt.axis('off')\n",`
			`"plt.title('Word Cloud dla Not Spam')\n",`
			`"plt.show()"`
			`],`
			`"id": "d954e01a1d0b3a97"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Budowa modelu klasyfikacyjnego",`
			`"id": "743000c7d99b8a85"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Zamiana tekstu na wektory\n",`
			`"vectorizer = CountVectorizer()\n",`
			`"X = vectorizer.fit_transform(data['processed_message'])\n",`
			`"y = data['Label']"`
			`],`
			`"id": "7b3ba8e5b035cdc0"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Podział na zbiór treningowy i testowy\n",`
			`"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"`
			`],`
			`"id": "5d66dcf506f4f399"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Trenowanie modelu Naiwnego Bayesa\n",`
			`"model_NB = MultinomialNB()\n",`
			`"model_NB.fit(X_train, y_train)"`
			`],`
			`"id": "b3c2a6673c718301"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Predykcja i ocena Naiwny Bayes\n",`
			`"y_pred_NB = model_NB.predict(X_test)\n",`
			`"accuracy_NB = accuracy_score(y_test, y_pred_NB)\n",`
			`"classification_rep_NB = classification_report(y_test, y_pred_NB)\n",`
			`"confusion_matrix_NB = confusion_matrix(y_test, y_pred_NB)"`
			`],`
			`"id": "82f18edc9161422a"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "accuracy_NB",`
			`"id": "a629b6b89d5cdf34"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(classification_rep_NB)",`
			`"id": "53c0cf3dc8aa02bc"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(confusion_matrix_NB)",`
			`"id": "9b915d02828de60"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Trening Drzewa Decyzyjnego (DT)",`
			`"id": "160da18f95c142a0"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Parametry domyślne\n",`
			`"model_DT = DecisionTreeClassifier(criterion= 'gini',\n",`
			`" max_depth= None,\n",`
			`" min_samples_leaf= 1,\n",`
			`" min_samples_split= 2,\n",`
			`" splitter= 'best')\n",`
			`"model_DT.fit(X_train, y_train)"`
			`],`
			`"id": "8720ed4fd0ed5c72"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Predykcja i ocena DT\n",`
			`"y_pred_DT = model_DT.predict(X_test)\n",`
			`"accuracy_DT = accuracy_score(y_test, y_pred_DT)\n",`
			`"classification_rep_DT = classification_report(y_test, y_pred_DT)\n",`
			`"confusion_matrix_DT = confusion_matrix(y_test, y_pred_DT)"`
			`],`
			`"id": "7aee079d59bdd4eb"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "accuracy_DT",`
			`"id": "57ac5a3ffe724fd5"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(classification_rep_DT)",`
			`"id": "ed8955dc5d5cdeaf"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(confusion_matrix_DT)",`
			`"id": "3ebfee20eb06e8cc"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Las losowy",`
			`"id": "85d3dc4e44a2a4b3"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"model_RF = RandomForestClassifier(n_estimators= 100,\n",`
			`" bootstrap= True,\n",`
			`" ccp_alpha= 0.0,\n",`
			`" criterion= 'gini',\n",`
			`" max_depth= None,\n",`
			`" min_samples_leaf= 1,\n",`
			`" min_samples_split= 2,\n",`
			`" random_state=123)\n",`
			`"model_RF.fit(X_train, y_train)"`
			`],`
			`"id": "6f454235f54aa9cc"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Predykcja i ocena RF\n",`
			`"y_pred_RF = model_RF.predict(X_test)\n",`
			`"accuracy_RF = accuracy_score(y_test, y_pred_RF)\n",`
			`"classification_rep_RF = classification_report(y_test, y_pred_RF)\n",`
			`"confusion_matrix_RF = confusion_matrix(y_test, y_pred_RF)"`
			`],`
			`"id": "23d68d066dc47f9"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "accuracy_RF",`
			`"id": "55789560bb43f9b8"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(classification_rep_RF)",`
			`"id": "d15d57c467b94bad"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(confusion_matrix_RF)",`
			`"id": "477ea9a19dbe7389"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Najlepszym modelem okazał się Las losowy - lepiej sklasyfikować spam jako wiadomość nie będącą spamem niż odwrotnie. \n",`
			`"# Dlatego wybieramy RF, a nie NB."`
			`],`
			`"id": "9c3308c811b9d014"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Teraz dokonamy treningu na pełnych danych i zapiszemy model celem wykorzystania na danych rzeczywistych w późniejszej \n",`
			`"# aplikacji."`
			`],`
			`"id": "81f08fa14ba4daf5"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"model_RF_full = RandomForestClassifier(n_estimators= 100,\n",`
			`" bootstrap= True,\n",`
			`" ccp_alpha= 0.0,\n",`
			`" criterion= 'gini',\n",`
			`" max_depth= None,\n",`
			`" min_samples_leaf= 1,\n",`
			`" min_samples_split= 2,\n",`
			`" random_state=123)"`
			`],`
			`"id": "7f580653f470d7af"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "model_RF_full.fit(X, y)",`
			`"id": "f75fc9a4d4746e5a"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Predykcja i ocena RF\n",`
			`"y_pred_RF_full = model_RF_full.predict(X)\n",`
			`"accuracy_RF_full = accuracy_score(y, y_pred_RF_full)\n",`
			`"classification_rep_RF_full = classification_report(y, y_pred_RF_full)\n",`
			`"confusion_matrix_RF_full = confusion_matrix(y, y_pred_RF_full)"`
			`],`
			`"id": "3d77bed327ac2fa1"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "accuracy_RF_full",`
			`"id": "a76a53da77128562"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(classification_rep_RF_full)",`
			`"id": "9a66104fd13572f8"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "print(confusion_matrix_RF_full)",`
			`"id": "823635f2315ecf05"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "model_RF_full",`
			`"id": "d0136f7b9f6344c4"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": [`
			`"# Zapisz model i vectorizer\n",`
			`"joblib.dump(model_RF_full, 'spam_classifier_model.pkl')\n",`
			`"joblib.dump(vectorizer, 'vectorizer.pkl')"`
			`],`
			`"id": "e02e9031d10617f6"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Uwaga, ważna jest zgodność wersji scikita i joblib tutaj i w środowisku aplikacji",`
			`"id": "2ac5943e18571301"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "pip freeze \| findstr scikit",`
			`"id": "a238743e07978f4"`
			`},`
			`{`
			`"metadata": {},`
			`"cell_type": "code",`
			`"outputs": [],`
			`"execution_count": null,`
			`"source": "# Jak instalować?",`
			`"id": "a64099b8c61a884"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 140,`
			`"id": "d99c1dbe",`
			`"metadata": {`
			`"ExecuteTime": {`
			`"end_time": "2024-06-05T16:57:22.800834Z",`
			`"start_time": "2024-06-05T16:57:22.798725Z"`
			`}`
			`},`
			`"outputs": [],`
			`"source": [`
			`"# Np. tak\n",`
			`"# pip install scikit-learn==1.3.2"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3 (ipykernel)",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.12.3"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`