887 KiB
887 KiB
%pip install pandas
%pip install matplotlib
%pip install nltk
%pip install wordcloud
%pip install scikit-learn==1.3.2
%pip install scikit-fuzzy==0.4.2
# Import pakietów
import nltk
nltk.download('punkt')
nltk.download('stopwords')
import pandas as pd
import matplotlib.pyplot as plt
import re
import string
from wordcloud import WordCloud
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import joblib
import pickle
Requirement already satisfied: pandas in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (2.2.2)Note: you may need to restart the kernel to use updated packages. Requirement already satisfied: numpy>=1.26.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from pandas) (1.26.4) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from pandas) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from pandas) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from pandas) (2024.1) Requirement already satisfied: six>=1.5 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0) Requirement already satisfied: matplotlib in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (3.9.0) Requirement already satisfied: contourpy>=1.0.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib) (1.2.1) Requirement already satisfied: cycler>=0.10 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib) (4.53.0) Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib) (1.4.5) Requirement already satisfied: numpy>=1.23 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib) (1.26.4) Requirement already satisfied: packaging>=20.0 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from matplotlib) (24.0) Requirement already satisfied: pillow>=8 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib) (10.3.0) Requirement already satisfied: pyparsing>=2.3.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib) (3.1.2) Requirement already satisfied: python-dateutil>=2.7 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from matplotlib) (2.9.0.post0) Requirement already satisfied: six>=1.5 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0) Note: you may need to restart the kernel to use updated packages. Requirement already satisfied: nltk in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (3.8.1) Requirement already satisfied: click in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from nltk) (8.1.7) Requirement already satisfied: joblib in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from nltk) (1.4.2) Requirement already satisfied: regex>=2021.8.3 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from nltk) (2024.5.15) Requirement already satisfied: tqdm in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from nltk) (4.66.4) Requirement already satisfied: colorama in c:\users\alicj\appdata\roaming\python\python312\site-packages (from click->nltk) (0.4.6) Note: you may need to restart the kernel to use updated packages. Requirement already satisfied: wordcloud in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (1.9.3) Requirement already satisfied: numpy>=1.6.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from wordcloud) (1.26.4) Requirement already satisfied: pillow in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from wordcloud) (10.3.0) Requirement already satisfied: matplotlib in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from wordcloud) (3.9.0) Requirement already satisfied: contourpy>=1.0.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib->wordcloud) (1.2.1) Requirement already satisfied: cycler>=0.10 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib->wordcloud) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib->wordcloud) (4.53.0) Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib->wordcloud) (1.4.5) Requirement already satisfied: packaging>=20.0 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from matplotlib->wordcloud) (24.0) Requirement already satisfied: pyparsing>=2.3.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from matplotlib->wordcloud) (3.1.2) Requirement already satisfied: python-dateutil>=2.7 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from matplotlib->wordcloud) (2.9.0.post0) Requirement already satisfied: six>=1.5 in c:\users\alicj\appdata\roaming\python\python312\site-packages (from python-dateutil>=2.7->matplotlib->wordcloud) (1.16.0) Note: you may need to restart the kernel to use updated packages. Requirement already satisfied: scikit-learn==1.3.2 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (1.3.2) Requirement already satisfied: numpy<2.0,>=1.17.3 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from scikit-learn==1.3.2) (1.26.4) Requirement already satisfied: scipy>=1.5.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from scikit-learn==1.3.2) (1.13.1) Requirement already satisfied: joblib>=1.1.1 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from scikit-learn==1.3.2) (1.4.2) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from scikit-learn==1.3.2) (3.5.0) Note: you may need to restart the kernel to use updated packages. Requirement already satisfied: scikit-fuzzy==0.4.2 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (0.4.2) Requirement already satisfied: numpy>=1.6.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from scikit-fuzzy==0.4.2) (1.26.4) Requirement already satisfied: scipy>=0.9.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from scikit-fuzzy==0.4.2) (1.13.1) Requirement already satisfied: networkx>=1.9.0 in c:\users\alicj\appdata\local\programs\python\python312\lib\site-packages (from scikit-fuzzy==0.4.2) (3.3) Note: you may need to restart the kernel to use updated packages.
[nltk_data] Downloading package punkt to [nltk_data] C:\Users\alicj\AppData\Roaming\nltk_data... [nltk_data] Package punkt is already up-to-date! [nltk_data] Downloading package stopwords to [nltk_data] C:\Users\alicj\AppData\Roaming\nltk_data... [nltk_data] Package stopwords is already up-to-date!
# Załaduj dane
data_path = "joined_data.csv"
data = pd.read_csv(data_path)
print(data.head())
Unnamed: 0 Body Label 0 0 Subject: congratulations\n vince ,\n congratul... 0 1 1 \nhttp://news.bbc.co.uk/1/hi/scotland/2515231.... 0 2 2 Big and big\nMAIN PAGE\nHuge big titties @ big... 1 3 3 Subject: re : enron visit - - thanks\n larry ,... 0 4 4 On Fri, Aug 09, 2002 at 09:30:29AM +0100, Ryan... 0
print(data.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 18651 entries, 0 to 18650 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 18651 non-null int64 1 Body 18650 non-null object 2 Label 18651 non-null int64 dtypes: int64(2), object(1) memory usage: 437.3+ KB None
data
Unnamed: 0 | Body | Label | |
---|---|---|---|
0 | 0 | Subject: congratulations\n vince ,\n congratul... | 0 |
1 | 1 | \nhttp://news.bbc.co.uk/1/hi/scotland/2515231.... | 0 |
2 | 2 | Big and big\nMAIN PAGE\nHuge big titties @ big... | 1 |
3 | 3 | Subject: re : enron visit - - thanks\n larry ,... | 0 |
4 | 4 | On Fri, Aug 09, 2002 at 09:30:29AM +0100, Ryan... | 0 |
... | ... | ... | ... |
18646 | 18646 | Subject: fluid analysis\n our customer speak v... | 1 |
18647 | 18647 | Subject: guadalupe\n i rolled 740208 , 740209 ... | 0 |
18648 | 18648 | 100% Free Porn!\nWhat more can you ask for?\nC... | 1 |
18649 | 18649 | Subject: revised nominations\n daren ,\n we ha... | 0 |
18650 | 18650 | Hello,\nI've got a small problem but still ann... | 0 |
18651 rows × 3 columns
# Usuwamy NaN
data.dropna(inplace=True)
# Usuwamy puste wiadomości i wiadomości zawierające jedynie "\n"
data = data[data['Body'] != '\n']
data = data[data['Body'] != 'empty']
data.reset_index(drop=True, inplace=True)
data
Unnamed: 0 | Body | Label | |
---|---|---|---|
0 | 0 | Subject: congratulations\n vince ,\n congratul... | 0 |
1 | 1 | \nhttp://news.bbc.co.uk/1/hi/scotland/2515231.... | 0 |
2 | 2 | Big and big\nMAIN PAGE\nHuge big titties @ big... | 1 |
3 | 3 | Subject: re : enron visit - - thanks\n larry ,... | 0 |
4 | 4 | On Fri, Aug 09, 2002 at 09:30:29AM +0100, Ryan... | 0 |
... | ... | ... | ... |
18109 | 18646 | Subject: fluid analysis\n our customer speak v... | 1 |
18110 | 18647 | Subject: guadalupe\n i rolled 740208 , 740209 ... | 0 |
18111 | 18648 | 100% Free Porn!\nWhat more can you ask for?\nC... | 1 |
18112 | 18649 | Subject: revised nominations\n daren ,\n we ha... | 0 |
18113 | 18650 | Hello,\nI've got a small problem but still ann... | 0 |
18114 rows × 3 columns
# Sprawdźmy rozkład targetów
print(data['Label'].value_counts())
Label 0 11124 1 6990 Name: count, dtype: int64
# Analiza długości wiadomości
def get_len(row):
try:
return len(row)
except:
return row
data['message_length'] = data['Body'].apply(get_len)
data.sort_values(by='message_length')
Unnamed: 0 | Body | Label | message_length | |
---|---|---|---|---|
16293 | 16774 | \n4623\n | 1 | 6 |
6071 | 6254 | Subject: \n | 1 | 10 |
3683 | 3792 | Subject: \n | 1 | 10 |
12843 | 13228 | Subject: \n | 1 | 10 |
17867 | 18399 | Subject: \n | 1 | 10 |
... | ... | ... | ... | ... |
6927 | 7128 | ------------------------ Yahoo! Groups Sponsor... | 0 | 107989 |
6887 | 7088 | Subject: enron mentions\n enron discusses cred... | 0 | 121502 |
2422 | 2488 | =?GB2312?B?yNW12squ0ru97NbQufq5+rzKtefX08nosb... | 1 | 129635 |
1522 | 1569 | change your settings: http://blo.gs/settings.p... | 0 | 194978 |
4844 | 4987 | ,Body,Label\n 0,"Subject: great part-time or s... | 0 | 17085626 |
18114 rows × 4 columns
# Jedna wiadomość jest bardzo długa 17085626
data['message_length'].value_counts()
message_length 293 68 295 53 291 52 539 44 446 40 .. 2394 1 4856 1 6192 1 2597 1 4004 1 Name: count, Length: 4903, dtype: int64
# Histogram długości wiadomości dla każdej kategorii - ograniczamy do 200.000 znaków celem wyświetlenia histogramów
hist_data = data[data['message_length'] < 200000]
plt.figure(figsize=(10, 6))
hist_data[hist_data['Label'] == 0]['message_length'].hist(bins=100, alpha=0.6, label='Not Spam')
hist_data[hist_data['Label'] == 1]['message_length'].hist(bins=100, alpha=0.6, label='Spam')
plt.legend()
plt.xlabel('Długość wiadomości')
plt.ylabel('Liczba wiadomości')
plt.title('Rozkład długości wiadomości')
plt.show()
# Ograniczamy jeszcze bardziej
# Histogram długości wiadomości dla każdej kategorii - ograniczamy do 10000 znaków celem wyświetlenia histogramów
hist_data = data[data['message_length'] < 10000]
plt.figure(figsize=(10, 6))
hist_data[hist_data['Label'] == 0]['message_length'].hist(bins=100, alpha=0.6, label='Not Spam')
hist_data[hist_data['Label'] == 1]['message_length'].hist(bins=100, alpha=0.6, label='Spam')
plt.legend()
plt.xlabel('Długość wiadomości')
plt.ylabel('Liczba wiadomości')
plt.title('Rozkład długości wiadomości')
plt.show()
# Można zauważyć, że trudno odróżnić widomości po samej długości. W tym celu należy skorzystać z bardziej zaawansowanych metod.
# Przetwarzanie tekstu
data
Unnamed: 0 | Body | Label | message_length | |
---|---|---|---|---|
0 | 0 | Subject: congratulations\n vince ,\n congratul... | 0 | 129 |
1 | 1 | \nhttp://news.bbc.co.uk/1/hi/scotland/2515231.... | 0 | 435 |
2 | 2 | Big and big\nMAIN PAGE\nHuge big titties @ big... | 1 | 231 |
3 | 3 | Subject: re : enron visit - - thanks\n larry ,... | 0 | 1180 |
4 | 4 | On Fri, Aug 09, 2002 at 09:30:29AM +0100, Ryan... | 0 | 574 |
... | ... | ... | ... | ... |
18109 | 18646 | Subject: fluid analysis\n our customer speak v... | 1 | 927 |
18110 | 18647 | Subject: guadalupe\n i rolled 740208 , 740209 ... | 0 | 337 |
18111 | 18648 | 100% Free Porn!\nWhat more can you ask for?\nC... | 1 | 345 |
18112 | 18649 | Subject: revised nominations\n daren ,\n we ha... | 0 | 346 |
18113 | 18650 | Hello,\nI've got a small problem but still ann... | 0 | 744 |
18114 rows × 4 columns
stop_words = set(stopwords.words('english'))
ps = PorterStemmer()
def preprocess_text(text):
# Usuwanie znaków specjalnych i tokenizacja
text = re.sub(r'\d+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
words = word_tokenize(text)
# Usuwanie stopwords i stemming
words = [ps.stem(word) for word in words if word.lower() not in stop_words]
return " ".join(words)
# Ten proces jest czasochłonny
data['processed_message'] = data['Body'].apply(preprocess_text)
data.head()
Unnamed: 0 | Body | Label | message_length | processed_message | |
---|---|---|---|---|---|
0 | 0 | Subject: congratulations\n vince ,\n congratul... | 0 | 129 | subject congratul vinc congratul wish best luc... |
1 | 1 | \nhttp://news.bbc.co.uk/1/hi/scotland/2515231.... | 0 | 435 | httpnewsbbccoukhiscotlandstm yahoo group spons... |
2 | 2 | Big and big\nMAIN PAGE\nHuge big titties @ big... | 1 | 231 | big big main page huge big titti bigbigscom sa... |
3 | 3 | Subject: re : enron visit - - thanks\n larry ,... | 0 | 1180 | subject enron visit thank larri think potenti ... |
4 | 4 | On Fri, Aug 09, 2002 at 09:30:29AM +0100, Ryan... | 0 | 574 | fri aug ryan shane mention imho stop spammer g... |
data['processed_message']
0 subject congratul vinc congratul wish best luc... 1 httpnewsbbccoukhiscotlandstm yahoo group spons... 2 big big main page huge big titti bigbigscom sa... 3 subject enron visit thank larri think potenti ... 4 fri aug ryan shane mention imho stop spammer g... ... 18109 subject fluid analysi custom speak volum spur ... 18110 subject guadalup roll june ena deal guadalup d... 18111 free porn ask click â â â remov instruct striv... 18112 subject revis nomin daren receiv revis nomin p... 18113 hello ive got small problem still annoy upgrad... Name: processed_message, Length: 18114, dtype: object
# Analiza słów za pomocą WordCloud
spam_words = ' '.join(list(data[data['Label'] == 1]['processed_message']))
not_spam_words = ' '.join(list(data[data['Label'] == 0]['processed_message']))
plt.figure(figsize=(10, 6))
wordcloud_spam = WordCloud(width=800, height=400).generate(spam_words)
plt.imshow(wordcloud_spam, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud dla Spam')
plt.show()
plt.figure(figsize=(10, 6))
wordcloud_not_spam = WordCloud(width=800, height=400).generate(not_spam_words)
plt.imshow(wordcloud_not_spam, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud dla Not Spam')
plt.show()
# Budowa modelu klasyfikacyjnego
# Zamiana tekstu na wektory
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data['processed_message'])
y = data['Label']
# Podział na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Trenowanie modelu Naiwnego Bayesa
model_NB = MultinomialNB()
model_NB.fit(X_train, y_train)
MultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MultinomialNB()
# Predykcja i ocena Naiwny Bayes
y_pred_NB = model_NB.predict(X_test)
accuracy_NB = accuracy_score(y_test, y_pred_NB)
classification_rep_NB = classification_report(y_test, y_pred_NB)
confusion_matrix_NB = confusion_matrix(y_test, y_pred_NB)
accuracy_NB
0.9536295887386144
print(classification_rep_NB)
precision recall f1-score support 0 0.98 0.95 0.96 2229 1 0.92 0.96 0.94 1394 accuracy 0.95 3623 macro avg 0.95 0.96 0.95 3623 weighted avg 0.95 0.95 0.95 3623
print(confusion_matrix_NB)
[[2110 119] [ 49 1345]]
# Trening Drzewa Decyzyjnego (DT)
# Parametry domyślne
model_DT = DecisionTreeClassifier(criterion= 'gini',
max_depth= None,
min_samples_leaf= 1,
min_samples_split= 2,
splitter= 'best')
model_DT.fit(X_train, y_train)
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
# Predykcja i ocena DT
y_pred_DT = model_DT.predict(X_test)
accuracy_DT = accuracy_score(y_test, y_pred_DT)
classification_rep_DT = classification_report(y_test, y_pred_DT)
confusion_matrix_DT = confusion_matrix(y_test, y_pred_DT)
accuracy_DT
0.9354126414573558
print(classification_rep_DT)
precision recall f1-score support 0 0.95 0.94 0.95 2229 1 0.91 0.93 0.92 1394 accuracy 0.94 3623 macro avg 0.93 0.93 0.93 3623 weighted avg 0.94 0.94 0.94 3623
print(confusion_matrix_DT)
[[2098 131] [ 103 1291]]
# Las losowy
model_RF = RandomForestClassifier(n_estimators= 100,
bootstrap= True,
ccp_alpha= 0.0,
criterion= 'gini',
max_depth= None,
min_samples_leaf= 1,
min_samples_split= 2,
random_state=123)
model_RF.fit(X_train, y_train)
RandomForestClassifier(random_state=123)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=123)
# Predykcja i ocena RF
y_pred_RF = model_RF.predict(X_test)
accuracy_RF = accuracy_score(y_test, y_pred_RF)
classification_rep_RF = classification_report(y_test, y_pred_RF)
confusion_matrix_RF = confusion_matrix(y_test, y_pred_RF)
accuracy_RF
0.9770908087220536
print(classification_rep_RF)
precision recall f1-score support 0 0.98 0.99 0.98 2229 1 0.98 0.96 0.97 1394 accuracy 0.98 3623 macro avg 0.98 0.97 0.98 3623 weighted avg 0.98 0.98 0.98 3623
print(confusion_matrix_RF)
[[2201 28] [ 55 1339]]
# Najlepszym modelem okazał się Las losowy - lepiej sklasyfikować spam jako wiadomość nie będącą spamem niż odwrotnie.
# Dlatego wybieramy RF, a nie NB.
# Teraz dokonamy treningu na pełnych danych i zapiszemy model celem wykorzystania na danych rzeczywistych w późniejszej
# aplikacji.
model_RF_full = RandomForestClassifier(n_estimators= 100,
bootstrap= True,
ccp_alpha= 0.0,
criterion= 'gini',
max_depth= None,
min_samples_leaf= 1,
min_samples_split= 2,
random_state=123)
model_RF_full.fit(X, y)
RandomForestClassifier(random_state=123)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=123)
# Predykcja i ocena RF
y_pred_RF_full = model_RF_full.predict(X)
accuracy_RF_full = accuracy_score(y, y_pred_RF_full)
classification_rep_RF_full = classification_report(y, y_pred_RF_full)
confusion_matrix_RF_full = confusion_matrix(y, y_pred_RF_full)
accuracy_RF_full
1.0
print(classification_rep_RF_full)
precision recall f1-score support 0 1.00 1.00 1.00 11124 1 1.00 1.00 1.00 6990 accuracy 1.00 18114 macro avg 1.00 1.00 1.00 18114 weighted avg 1.00 1.00 1.00 18114
print(confusion_matrix_RF_full)
[[11124 0] [ 0 6990]]
model_RF_full
RandomForestClassifier(random_state=123)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=123)
# Zapisz model i vectorizer
joblib.dump(model_RF_full, 'spam_classifier_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')
['vectorizer.pkl']
# Uwaga, ważna jest zgodność wersji scikita i joblib tutaj i w środowisku aplikacji
pip freeze | findstr scikit
scikit-fuzzy==0.4.2 scikit-learn==1.3.2 Note: you may need to restart the kernel to use updated packages.
# Jak instalować?
# Np. tak
# pip install scikit-learn==1.3.2