tensorflow python script

2023-09-30 00:06:47 +02:00 · 2023-09-30 00:06:47 +02:00 · 33bd65fe00
commit 33bd65fe00
parent 4fdfd852b6
4 changed files with 395029 additions and 0 deletions
--- a/data/meets.csv
+++ b/data/meets.csv
--- a/data/openpowerlifting.csv
+++ b/data/openpowerlifting.csv
--- a/data/powerlifting-database.zip
+++ b/data/powerlifting-database.zip
--- a/iumz_486867.py
+++ b/iumz_486867.py
@ -0,0 +1,131 @@
 from kaggle.api.kaggle_api_extended import KaggleApi
 import zipfile
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 import pandas as pd
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras.models import Sequential  # Use TensorFlow's Keras module
 from tensorflow.keras.layers import Dense  # Use TensorFlow's Keras module
 import matplotlib.pyplot as plt
 from keras.utils import to_categorical  # Use Keras's to_categorical function
 api = KaggleApi()
 api.authenticate()
 api.dataset_download_files('dansbecker/powerlifting-database', path='./data')
 with zipfile.ZipFile('./data/powerlifting-database.zip', 'r') as zip_ref:
    zip_ref.extractall('./data')
 def get_simplified_age(age):
    if 0 <= age < 10:
        return 0
    elif 10 <= age < 20:
        return 1
    elif 20 <= age < 30:
        return 2
    elif 30 <= age < 40:
        return 3
    elif 40 <= age < 50:
        return 4
    elif 50 <= age < 60:
        return 5
    elif 60 <= age < 70:
        return 6
    elif 70 <= age < 80:
        return 7
    elif 80 <= age < 100:
        return 8
    else:
        return age
 def plot_loss_tf(history):
    fig, ax = plt.subplots(1, 1, figsize=(4, 3))
    fig.canvas.toolbar_visible = False
    fig.canvas.header_visible = False
    fig.canvas.footer_visible = False
    ax.plot(history.history['loss'], label='loss')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('loss (cost)')
    ax.legend()
    ax.grid(True)
    plt.show()
 # Load your CSV data
 powerlifters_stats = pd.read_csv('data/openpowerlifting.csv', engine='python', encoding='ISO-8859-1', sep=',')
 # Drop unnecessary columns
 columns_to_drop = ['MeetID', 'Name', 'Sex', 'Equipment', 'Division', 'Squat4Kg', 'BestSquatKg',
                    'Bench4Kg', 'BestBenchKg', 'Deadlift4Kg', 'BestDeadliftKg', 'TotalKg', 'Place', 'Wilks','WeightClassKg']
 powerlifters_stats = powerlifters_stats.drop(columns_to_drop, axis=1)
 # Apply the age simplification function
 powerlifters_stats['Age'] = powerlifters_stats['Age'].apply(get_simplified_age)
 # Split your data into features (X) and target (y)
 X = powerlifters_stats.drop(columns=['Age'])
 y = powerlifters_stats['Age']
 # Standardize the features
 scaler = StandardScaler()
 X_scaled = scaler.fit_transform(X)
 X = pd.DataFrame(X_scaled, columns=X.columns)
 # Split the data into train, validation, and test sets
 X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1)
 X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1)
 # Create a mask to identify rows with NaN values in y_train
 nan_mask = pd.isna(y_train).values
 # Apply the mask to both X_train and y_train
 X_train = X_train[~nan_mask]
 y_train = y_train[~nan_mask]
 y_train = y_train.astype(int)
 unique_values = np.unique(y_train)
 print(unique_values)
 print(y_train.dtypes)
 # Convert the target variables to categorical
 y_train = to_categorical(y_train, num_classes=9)
 y_val = to_categorical(y_val, num_classes=8)
 y_test = to_categorical(y_test, num_classes=9)
 # Create a Sequential model
 model = Sequential(
    [
        Dense(100, input_dim=X_train.shape[1], activation='relu'),
        Dense(70, activation='relu'),
        Dense(50, activation='relu'),
        Dense(9, activation='softmax')  # Changed the output units to 9 to match the number of age categories
    ], name="Players_model"
 )
 # Compile the model
 model.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
 )
 # Train the model
 history = model.fit(
    X_train, y_train,
    epochs=500,
    validation_data=(X_val, y_val)
 )
 # Plot the loss
 plot_loss_tf(history)
 # Evaluate the model
 print('Evaluating...')
 accuracy = model.evaluate(X_test, y_test)[1]
 print(f"accuracy: {accuracy}")