tensorflow python script
This commit is contained in:
parent
4fdfd852b6
commit
33bd65fe00
8483
data/meets.csv
Normal file
8483
data/meets.csv
Normal file
File diff suppressed because it is too large
Load Diff
386415
data/openpowerlifting.csv
Normal file
386415
data/openpowerlifting.csv
Normal file
File diff suppressed because it is too large
Load Diff
BIN
data/powerlifting-database.zip
Normal file
BIN
data/powerlifting-database.zip
Normal file
Binary file not shown.
131
iumz_486867.py
Normal file
131
iumz_486867.py
Normal file
@ -0,0 +1,131 @@
|
||||
from kaggle.api.kaggle_api_extended import KaggleApi
|
||||
import zipfile
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Sequential # Use TensorFlow's Keras module
|
||||
from tensorflow.keras.layers import Dense # Use TensorFlow's Keras module
|
||||
import matplotlib.pyplot as plt
|
||||
from keras.utils import to_categorical # Use Keras's to_categorical function
|
||||
|
||||
|
||||
api = KaggleApi()
|
||||
api.authenticate()
|
||||
|
||||
api.dataset_download_files('dansbecker/powerlifting-database', path='./data')
|
||||
|
||||
with zipfile.ZipFile('./data/powerlifting-database.zip', 'r') as zip_ref:
|
||||
zip_ref.extractall('./data')
|
||||
|
||||
|
||||
def get_simplified_age(age):
|
||||
if 0 <= age < 10:
|
||||
return 0
|
||||
elif 10 <= age < 20:
|
||||
return 1
|
||||
elif 20 <= age < 30:
|
||||
return 2
|
||||
elif 30 <= age < 40:
|
||||
return 3
|
||||
elif 40 <= age < 50:
|
||||
return 4
|
||||
elif 50 <= age < 60:
|
||||
return 5
|
||||
elif 60 <= age < 70:
|
||||
return 6
|
||||
elif 70 <= age < 80:
|
||||
return 7
|
||||
elif 80 <= age < 100:
|
||||
return 8
|
||||
else:
|
||||
return age
|
||||
|
||||
|
||||
def plot_loss_tf(history):
|
||||
fig, ax = plt.subplots(1, 1, figsize=(4, 3))
|
||||
fig.canvas.toolbar_visible = False
|
||||
fig.canvas.header_visible = False
|
||||
fig.canvas.footer_visible = False
|
||||
ax.plot(history.history['loss'], label='loss')
|
||||
ax.set_xlabel('Epoch')
|
||||
ax.set_ylabel('loss (cost)')
|
||||
ax.legend()
|
||||
ax.grid(True)
|
||||
plt.show()
|
||||
|
||||
# Load your CSV data
|
||||
powerlifters_stats = pd.read_csv('data/openpowerlifting.csv', engine='python', encoding='ISO-8859-1', sep=',')
|
||||
|
||||
# Drop unnecessary columns
|
||||
columns_to_drop = ['MeetID', 'Name', 'Sex', 'Equipment', 'Division', 'Squat4Kg', 'BestSquatKg',
|
||||
'Bench4Kg', 'BestBenchKg', 'Deadlift4Kg', 'BestDeadliftKg', 'TotalKg', 'Place', 'Wilks','WeightClassKg']
|
||||
powerlifters_stats = powerlifters_stats.drop(columns_to_drop, axis=1)
|
||||
|
||||
# Apply the age simplification function
|
||||
powerlifters_stats['Age'] = powerlifters_stats['Age'].apply(get_simplified_age)
|
||||
|
||||
# Split your data into features (X) and target (y)
|
||||
X = powerlifters_stats.drop(columns=['Age'])
|
||||
y = powerlifters_stats['Age']
|
||||
|
||||
# Standardize the features
|
||||
scaler = StandardScaler()
|
||||
X_scaled = scaler.fit_transform(X)
|
||||
X = pd.DataFrame(X_scaled, columns=X.columns)
|
||||
|
||||
# Split the data into train, validation, and test sets
|
||||
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1)
|
||||
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1)
|
||||
|
||||
# Create a mask to identify rows with NaN values in y_train
|
||||
nan_mask = pd.isna(y_train).values
|
||||
|
||||
|
||||
# Apply the mask to both X_train and y_train
|
||||
X_train = X_train[~nan_mask]
|
||||
y_train = y_train[~nan_mask]
|
||||
|
||||
|
||||
y_train = y_train.astype(int)
|
||||
unique_values = np.unique(y_train)
|
||||
print(unique_values)
|
||||
print(y_train.dtypes)
|
||||
|
||||
# Convert the target variables to categorical
|
||||
y_train = to_categorical(y_train, num_classes=9)
|
||||
y_val = to_categorical(y_val, num_classes=8)
|
||||
y_test = to_categorical(y_test, num_classes=9)
|
||||
|
||||
# Create a Sequential model
|
||||
model = Sequential(
|
||||
[
|
||||
Dense(100, input_dim=X_train.shape[1], activation='relu'),
|
||||
Dense(70, activation='relu'),
|
||||
Dense(50, activation='relu'),
|
||||
Dense(9, activation='softmax') # Changed the output units to 9 to match the number of age categories
|
||||
], name="Players_model"
|
||||
)
|
||||
|
||||
# Compile the model
|
||||
model.compile(
|
||||
loss=tf.keras.losses.CategoricalCrossentropy(),
|
||||
optimizer=tf.keras.optimizers.Adam(),
|
||||
metrics=['accuracy']
|
||||
)
|
||||
|
||||
# Train the model
|
||||
history = model.fit(
|
||||
X_train, y_train,
|
||||
epochs=500,
|
||||
validation_data=(X_val, y_val)
|
||||
)
|
||||
|
||||
# Plot the loss
|
||||
plot_loss_tf(history)
|
||||
|
||||
# Evaluate the model
|
||||
print('Evaluating...')
|
||||
accuracy = model.evaluate(X_test, y_test)[1]
|
||||
print(f"accuracy: {accuracy}")
|
Loading…
Reference in New Issue
Block a user