2023-05-14 22:31:23 +02:00
|
|
|
import pandas as pd
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
from sklearn.preprocessing import LabelEncoder
|
|
|
|
from keras.models import Sequential
|
|
|
|
from keras.layers import Dense
|
2023-06-06 19:52:20 +02:00
|
|
|
import pickle
|
2023-05-14 22:31:23 +02:00
|
|
|
|
|
|
|
# Load the dataset
|
2023-05-14 22:32:58 +02:00
|
|
|
df = pd.read_csv('data.csv')
|
2023-05-14 22:31:23 +02:00
|
|
|
|
|
|
|
# Select the relevant columns (e.g., 'Rating' and 'Writer')
|
|
|
|
data = df[['Rating', 'Writer']]
|
|
|
|
|
|
|
|
# Drop rows with missing values
|
|
|
|
data = data.dropna()
|
|
|
|
|
|
|
|
# Convert the 'Writer' column to numeric using label encoding
|
|
|
|
encoder = LabelEncoder()
|
|
|
|
data['Writer'] = encoder.fit_transform(data['Writer'])
|
|
|
|
|
|
|
|
# Split the data into training and testing sets
|
|
|
|
X = data['Writer']
|
|
|
|
y = data['Rating']
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
# Create the neural network model
|
|
|
|
model = Sequential()
|
|
|
|
model.add(Dense(64, activation='relu', input_dim=1))
|
|
|
|
model.add(Dense(1))
|
|
|
|
|
|
|
|
model.compile(loss='mean_squared_error', optimizer='adam')
|
|
|
|
|
|
|
|
# Train the model
|
|
|
|
model.fit(X_train, y_train, batch_size=64, epochs=10, validation_data=(X_test, y_test))
|
|
|
|
|
2023-06-06 19:52:20 +02:00
|
|
|
# Save the model to a file
|
|
|
|
model.save('model.h5')
|
|
|
|
|
|
|
|
# Save the encoder to a file
|
|
|
|
with open('encoder.pkl', 'wb') as f:
|
|
|
|
pickle.dump(encoder, f)
|
|
|
|
|
2023-05-14 22:31:23 +02:00
|
|
|
# Make predictions on new data
|
2023-05-14 22:35:05 +02:00
|
|
|
new_writer = 'Jim Cash'
|
2023-05-14 22:31:23 +02:00
|
|
|
new_writer_encoded = encoder.transform([new_writer])
|
|
|
|
|
|
|
|
rating_prediction = model.predict(new_writer_encoded)
|
2023-05-14 22:35:05 +02:00
|
|
|
print("Predicted rating for the writer 'Jim Cash':", rating_prediction)
|