This commit is contained in:
Yevhenii Poliakov 2023-05-14 20:39:55 +02:00
parent d8e55bb489
commit ca4fe6932b

View File

@ -17,9 +17,9 @@ y = data['Rating']
# Preprocess the data
# Convert the categorical columns into numerical representations
mlb = MultiLabelBinarizer()
X['Generes'] = mlb.fit_transform(X['Generes'])
X['Plot Kyeword'] = mlb.fit_transform(X['Plot Kyeword'])
X['Top 5 Casts'] = mlb.fit_transform(X['Top 5 Casts'].astype(str))
X['Generes'] = mlb.fit_transform(X['Generes']).tolist()
X['Plot Kyeword'] = mlb.fit_transform(X['Plot Kyeword']).tolist()
X['Top 5 Casts'] = mlb.fit_transform(X['Top 5 Casts'].astype(str)).tolist()
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)