17 lines
643 B
Python
17 lines
643 B
Python
import pandas as pd
|
|
from sklearn import preprocessing
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
data = pd.read_csv("Student_Performance.csv")
|
|
print(data.head())
|
|
data.drop_duplicates(inplace=True)
|
|
data["Extracurricular Activities"] = data["Extracurricular Activities"].replace({'Yes': 1, 'No': 0})
|
|
|
|
data[data.columns[:-1]] = preprocessing.StandardScaler().fit_transform(data[data.columns[:-1]])
|
|
print(data.head())
|
|
df_train, df_test = train_test_split(data, test_size=0.2, random_state=21, shuffle=True)
|
|
data.to_csv("dataset.csv", index=False)
|
|
df_train.to_csv("df_train.csv", index=False)
|
|
df_test.to_csv("df_test.csv", index=False)
|
|
|