import os import pandas as pd from sklearn.naive_bayes import GaussianNB from sklearn import preprocessing import joblib TRAIN_DATA_DIR = "datasets_train_raw" train_df_list = [] for file in os.listdir(TRAIN_DATA_DIR): file_path = os.path.join(TRAIN_DATA_DIR, file) df = pd.read_csv(file_path, delim_whitespace=True, skiprows=1, names=["tbid", "tphys", "r", "vr", "vt", "ik1", "ik2", "sm1", "sm2", "a", "e", "collapsed"]) train_df_list.append(df) data_train = pd.concat(train_df_list, ignore_index=True).sample(frac=1, random_state=42) X_train = data_train.iloc[:, 1:-1].values y_train = data_train.iloc[:, -1].values lab = preprocessing.LabelEncoder() y_train_transformed = lab.fit_transform(y_train) clf = GaussianNB(var_smoothing=1e-07) clf.fit(X_train, y_train_transformed) joblib.dump(clf, 'trained_model.pkl') joblib.dump(lab, 'label_encoder.pkl')