ium_151636/script5_2.py
2023-06-06 19:52:20 +02:00

49 lines
1.4 KiB
Python

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
import pickle
# Load the dataset
df = pd.read_csv('data.csv')
# Select the relevant columns (e.g., 'Rating' and 'Writer')
data = df[['Rating', 'Writer']]
# Drop rows with missing values
data = data.dropna()
# Convert the 'Writer' column to numeric using label encoding
encoder = LabelEncoder()
data['Writer'] = encoder.fit_transform(data['Writer'])
# Split the data into training and testing sets
X = data['Writer']
y = data['Rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create the neural network model
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
# Train the model
model.fit(X_train, y_train, batch_size=64, epochs=10, validation_data=(X_test, y_test))
# Save the model to a file
model.save('model.h5')
# Save the encoder to a file
with open('encoder.pkl', 'wb') as f:
pickle.dump(encoder, f)
# Make predictions on new data
new_writer = 'Jim Cash'
new_writer_encoded = encoder.transform([new_writer])
rating_prediction = model.predict(new_writer_encoded)
print("Predicted rating for the writer 'Jim Cash':", rating_prediction)