import pandas as pd from sklearn import preprocessing from sklearn.model_selection import train_test_split data = pd.read_csv("Student_Performance.csv") print(data.head()) data.drop_duplicates(inplace=True) data["Extracurricular Activities"] = data["Extracurricular Activities"].replace({'Yes': 1, 'No': 0}) data[data.columns[:-1]] = preprocessing.StandardScaler().fit_transform(data[data.columns[:-1]]) print(data.head()) df_train, df_test = train_test_split(data, test_size=0.2, random_state=21, shuffle=True) data.to_csv("dataset.csv", index=False) df_train.to_csv("df_train.csv", index=False) df_test.to_csv("df_test.csv", index=False)