DSIC-Bayes-continuous/datapreparator.py
2021-05-26 21:08:58 +02:00

39 lines
1.2 KiB
Python

from sklearn.model_selection import train_test_split
from copy import deepcopy
import pandas as pd
import typing
class DataPreparator:
genre_dict = {
"blues" : 1,
"classical" : 2,
"country" : 3,
"disco" : 4,
"hiphop" : 5,
"jazz" : 6,
"metal" : 7,
"pop" : 8,
"reggae" : 9,
"rock" : 10
}
def prepare_data(df: pd.DataFrame) -> pd.DataFrame:
data = deepcopy(df)
column = df["label"].apply(lambda x: DataPreparator.genre_dict[x])
data.insert(0, 'genre', column, 'int')
data = data.drop(columns=['filename', 'label', 'length'])
return data
def train_test_split(df: pd.DataFrame) -> typing.Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
#X = df.drop(["genre"], axis=1)
X = df[["chroma_stft_mean","chroma_stft_var","rms_mean"]]
Y = df["genre"]
return train_test_split(X, Y, test_size = 0.20, random_state = False)
def print_df_info(df: pd.DataFrame) -> None:
for key in DataPreparator.genre_dict.keys():
count = len(df[df["genre"]==DataPreparator.genre_dict[key]])
print(f"Key: {key}\tCount: {count}")