This commit is contained in:
Yevhenii Poliakov 2023-05-14 20:39:55 +02:00
parent d8e55bb489
commit ca4fe6932b

View File

@ -17,9 +17,9 @@ y = data['Rating']
# Preprocess the data # Preprocess the data
# Convert the categorical columns into numerical representations # Convert the categorical columns into numerical representations
mlb = MultiLabelBinarizer() mlb = MultiLabelBinarizer()
X['Generes'] = mlb.fit_transform(X['Generes']) X['Generes'] = mlb.fit_transform(X['Generes']).tolist()
X['Plot Kyeword'] = mlb.fit_transform(X['Plot Kyeword']) X['Plot Kyeword'] = mlb.fit_transform(X['Plot Kyeword']).tolist()
X['Top 5 Casts'] = mlb.fit_transform(X['Top 5 Casts'].astype(str)) X['Top 5 Casts'] = mlb.fit_transform(X['Top 5 Casts'].astype(str)).tolist()
# Split the data into training and testing sets # Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)