tensorflow python script

2023-09-30 00:06:47 +02:00 · 2023-09-30 00:06:47 +02:00 · 33bd65fe00
commit 33bd65fe00
parent 4fdfd852b6
4 changed files with 395029 additions and 0 deletions
--- a/data/meets.csv
+++ b/data/meets.csv
--- a/data/openpowerlifting.csv
+++ b/data/openpowerlifting.csv
--- a/data/powerlifting-database.zip
+++ b/data/powerlifting-database.zip
--- a/iumz_486867.py
+++ b/iumz_486867.py
@ -0,0 +1,131 @@
+from kaggle.api.kaggle_api_extended import KaggleApi
+import zipfile
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.models import Sequential  # Use TensorFlow's Keras module
+from tensorflow.keras.layers import Dense  # Use TensorFlow's Keras module
+import matplotlib.pyplot as plt
+from keras.utils import to_categorical  # Use Keras's to_categorical function
+
+
+api = KaggleApi()
+api.authenticate()
+
+api.dataset_download_files('dansbecker/powerlifting-database', path='./data')
+
+with zipfile.ZipFile('./data/powerlifting-database.zip', 'r') as zip_ref:
+    zip_ref.extractall('./data')
+
+
+def get_simplified_age(age):
+    if 0 <= age < 10:
+        return 0
+    elif 10 <= age < 20:
+        return 1
+    elif 20 <= age < 30:
+        return 2
+    elif 30 <= age < 40:
+        return 3
+    elif 40 <= age < 50:
+        return 4
+    elif 50 <= age < 60:
+        return 5
+    elif 60 <= age < 70:
+        return 6
+    elif 70 <= age < 80:
+        return 7
+    elif 80 <= age < 100:
+        return 8
+    else:
+        return age
+
+
+def plot_loss_tf(history):
+    fig, ax = plt.subplots(1, 1, figsize=(4, 3))
+    fig.canvas.toolbar_visible = False
+    fig.canvas.header_visible = False
+    fig.canvas.footer_visible = False
+    ax.plot(history.history['loss'], label='loss')
+    ax.set_xlabel('Epoch')
+    ax.set_ylabel('loss (cost)')
+    ax.legend()
+    ax.grid(True)
+    plt.show()
+
+# Load your CSV data
+powerlifters_stats = pd.read_csv('data/openpowerlifting.csv', engine='python', encoding='ISO-8859-1', sep=',')
+
+# Drop unnecessary columns
+columns_to_drop = ['MeetID', 'Name', 'Sex', 'Equipment', 'Division', 'Squat4Kg', 'BestSquatKg',
+                    'Bench4Kg', 'BestBenchKg', 'Deadlift4Kg', 'BestDeadliftKg', 'TotalKg', 'Place', 'Wilks','WeightClassKg']
+powerlifters_stats = powerlifters_stats.drop(columns_to_drop, axis=1)
+
+# Apply the age simplification function
+powerlifters_stats['Age'] = powerlifters_stats['Age'].apply(get_simplified_age)
+
+# Split your data into features (X) and target (y)
+X = powerlifters_stats.drop(columns=['Age'])
+y = powerlifters_stats['Age']
+
+# Standardize the features
+scaler = StandardScaler()
+X_scaled = scaler.fit_transform(X)
+X = pd.DataFrame(X_scaled, columns=X.columns)
+
+# Split the data into train, validation, and test sets
+X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1)
+X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1)
+
+# Create a mask to identify rows with NaN values in y_train
+nan_mask = pd.isna(y_train).values
+
+
+# Apply the mask to both X_train and y_train
+X_train = X_train[~nan_mask]
+y_train = y_train[~nan_mask]
+
+
+y_train = y_train.astype(int)
+unique_values = np.unique(y_train)
+print(unique_values)
+print(y_train.dtypes)
+
+# Convert the target variables to categorical
+y_train = to_categorical(y_train, num_classes=9)
+y_val = to_categorical(y_val, num_classes=8)
+y_test = to_categorical(y_test, num_classes=9)
+
+# Create a Sequential model
+model = Sequential(
+    [
+        Dense(100, input_dim=X_train.shape[1], activation='relu'),
+        Dense(70, activation='relu'),
+        Dense(50, activation='relu'),
+        Dense(9, activation='softmax')  # Changed the output units to 9 to match the number of age categories
+    ], name="Players_model"
+)
+
+# Compile the model
+model.compile(
+    loss=tf.keras.losses.CategoricalCrossentropy(),
+    optimizer=tf.keras.optimizers.Adam(),
+    metrics=['accuracy']
+)
+
+# Train the model
+history = model.fit(
+    X_train, y_train,
+    epochs=500,
+    validation_data=(X_val, y_val)
+)
+
+# Plot the loss
+plot_loss_tf(history)
+
+# Evaluate the model
+print('Evaluating...')
+accuracy = model.evaluate(X_test, y_test)[1]
+print(f"accuracy: {accuracy}")