2023-05-23 21:53:08 +02:00
|
|
|
import pandas as pd
|
|
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
|
|
|
2023-05-23 23:24:45 +02:00
|
|
|
data_path = 'data.csv'
|
|
|
|
processed_data_path = 'processed_data.csv'
|
2023-05-23 21:53:08 +02:00
|
|
|
|
2023-05-23 23:24:45 +02:00
|
|
|
data = pd.read_csv(data_path, sep=';')
|
2023-05-23 21:53:08 +02:00
|
|
|
|
2023-05-23 23:24:45 +02:00
|
|
|
data = pd.get_dummies(data, columns=['Sex', 'Medal'])
|
2023-05-23 21:53:08 +02:00
|
|
|
|
2023-05-23 23:24:45 +02:00
|
|
|
data = data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event'])
|
2023-05-23 21:53:08 +02:00
|
|
|
|
2023-05-23 23:24:45 +02:00
|
|
|
scaler = MinMaxScaler()
|
|
|
|
data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
|
2023-05-23 21:53:08 +02:00
|
|
|
|
2023-05-23 23:24:45 +02:00
|
|
|
data.to_csv(processed_data_path, index=False)
|