fix

2024-05-14 23:43:14 +02:00 · 2024-05-14 23:40:55 +02:00 · 2024-05-14 23:39:54 +02:00 · 2024-05-14 23:38:30 +02:00 · 2024-05-14 23:35:39 +02:00 · 2024-05-14 23:30:16 +02:00
4 changed files with 58 additions and 28 deletions
--- a/2
+++ b/2
@ -2,7 +2,7 @@ FROM ubuntu:latest

 RUN apt-get update && apt-get install -y python3-pip unzip coreutils

-RUN pip install --user kaggle pandas scikit-learn tensorflow
+RUN pip install --no-cache-dir wheel kaggle pandas scikit-learn tensorflow

 WORKDIR /app

--- a/39
+++ b/39
@ -1,36 +1,37 @@
 pipeline {
    agent any

+    triggers {
+        upstream(upstreamProjects: 'z-s464937-create-dataset', threshold: hudson.model.Result.SUCCESS)
+    }
+
    parameters {
-        string(name: 'CUTOFF', defaultValue: '100', description: 'Ilość wierszy do odcięcia')
-	    string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
-        password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
+        string(name: 'EPOCHS', defaultValue: '10', description: 'Epochs')
+        buildSelector(defaultSelector: lastSuccessful(), description: 'Build no', name: 'BUILD_SELECTOR')
    }

    stages {
-        stage('Clone repo') {
+        stage('Clone Repository') {
            steps {
-                git branch: "main", url: "https://git.wmi.amu.edu.pl/s464937/ium_464937"
+                git branch: 'training', url: "https://git.wmi.amu.edu.pl/s464937/ium_464937.git"
            }
        }
-
-        stage('Download and preprocess') {
-            environment {
-                    KAGGLE_USERNAME = "szymonbartanowicz"
-                    KAGGLE_KEY = "4692239eb65f20ec79f9a59ef30e67eb"
-                }
+        stage('Copy Artifacts') {
            steps {
-                withEnv([
-                    "KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
-                    "KAGGLE_KEY=${env.KAGGLE_KEY}"
-                ]) {
-                    sh "bash ./script1.sh ${params.CUTOFF}"
-                }
+                 copyArtifacts filter: 'data/dev.csv,data/test.csv,data/train.csv', projectName: 'z-s464937-create-dataset', selector: buildParameter('BUILD_SELECTOR')
            }
        }
-        stage('Archive') {
+        stage("Run") {
+            agent {
+                dockerfile {
+                    filename 'Dockerfile'
+                    reuseNode true
+                }
+            }
            steps {
-                archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
+                sh "chmod +x ./model.py"
+                sh "python3 ./model.py ${params.EPOCHS}"
+                archiveArtifacts artifacts: 'powerlifting_model.h5', onlyIfSuccessful: true
            }
        }
    }
--- a/model.py
+++ b/model.py
@ -1,3 +1,4 @@
+import sys
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
@ -7,10 +8,12 @@ from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense
 import tensorflow as tf

-data = pd.read_csv('openpowerlifting.csv')
+data = pd.read_csv('./data/train.csv')

 data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
-
+data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
+data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce')
+data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce')
 features = data[['Sex', 'Age', 'BodyweightKg']]
 target = data['TotalKg']

@ -20,13 +23,13 @@ preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Age', 'BodyweightKg']),
        ('cat', OneHotEncoder(), ['Sex'])
-    ]
+    ],
 )

 pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', Sequential([
-        Dense(64, activation='relu', input_dim=4),
+        Dense(64, activation='relu', input_dim=5),
        Dense(64, activation='relu'),
        Dense(1)
    ]))
@ -34,6 +37,9 @@ pipeline = Pipeline(steps=[

 pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])

-pipeline.fit(X_train, y_train, model__epochs=10, model__validation_split=0.1)
+X_train_excluded = X_train.iloc[1:]
+y_train_excluded = y_train.iloc[1:]
+
+pipeline.fit(X_train_excluded, y_train_excluded, model__epochs=int(sys.argv[1]), model__validation_split=0.1)

 pipeline['model'].save('powerlifting_model.h5')
--- a/script1.sh
+++ b/script1.sh
@ -1,11 +1,31 @@
+##!/bin/bash
+#pip install kaggle
+#kaggle datasets download -d open-powerlifting/powerlifting-database
+#unzip -o powerlifting-database.zip
+#DATASET_FILE="openpowerlifting.csv"
+#echo "Obcięte wiersze: ${1}"
+#head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
+#echo "Podział i wymieszanie"
+#total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
+#train_lines=$((total_lines * 90 / 100))
+#dev_lines=$((total_lines * 10 / 100))
+#test_lines=$((total_lines - train_lines - dev_lines))
+#shuf cutoff_$DATASET_FILE -o shuffled.csv
+#head -n $train_lines shuffled.csv > train.csv
+#tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
+#tail -n $test_lines shuffled.csv > test.csv
+#mkdir -p data
+#mv train.csv dev.csv test.csv data/
+
 #!/bin/bash
 pip install kaggle
 kaggle datasets download -d open-powerlifting/powerlifting-database
 unzip -o powerlifting-database.zip
 DATASET_FILE="openpowerlifting.csv"
-echo "Obcięte wiersze: ${1}"
+column_names=$(head -n 1 $DATASET_FILE)
+echo "Truncated rows: ${1}"
 head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
-echo "Podział i wymieszanie"
+echo "$column_names" > temp && cat cutoff_$DATASET_FILE >> temp && mv temp cutoff_$DATASET_FILE
 total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
 train_lines=$((total_lines * 90 / 100))
 dev_lines=$((total_lines * 10 / 100))
@ -15,4 +35,7 @@ head -n $train_lines shuffled.csv > train.csv
 tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
 tail -n $test_lines shuffled.csv > test.csv
 mkdir -p data
-mv train.csv dev.csv test.csv data/
+echo "$column_names" | cat - train.csv > temp && mv temp train.csv
+echo "$column_names" | cat - dev.csv > temp && mv temp dev.csv
+echo "$column_names" | cat - test.csv > temp && mv temp test.csv
+mv train.csv dev.csv test.csv data/
Author	SHA1	Message	Date
Szymon Bartanowicz	5651fb92d1	fix	2024-05-14 23:43:14 +02:00
Szymon Bartanowicz	a597ba4d2a	fix	2024-05-14 23:40:55 +02:00
Szymon Bartanowicz	e1616fd376	fix	2024-05-14 23:39:54 +02:00
Szymon Bartanowicz	97ca282402	fix	2024-05-14 23:38:30 +02:00
Szymon Bartanowicz	a188435a1b	fix	2024-05-14 23:35:39 +02:00
Szymon Bartanowicz	fb48e4e5f5	fix	2024-05-14 23:30:16 +02:00
Szymon Bartanowicz	a860f43d29	fix	2024-05-14 23:25:25 +02:00
Szymon Bartanowicz	a3b003c422	fix	2024-05-14 23:19:35 +02:00
Szymon Bartanowicz	a3da4f5c76	fix	2024-05-14 23:12:11 +02:00
Szymon Bartanowicz	da08a94eb5	fix	2024-05-14 23:07:05 +02:00
Szymon Bartanowicz	c94695e20b	fix	2024-05-14 23:01:38 +02:00
Szymon Bartanowicz	27eb66aaed	fix	2024-05-14 22:59:17 +02:00
Szymon Bartanowicz	99173104ae	changed script	2024-05-14 22:55:10 +02:00
Szymon Bartanowicz	1e044d743f	changed script	2024-05-14 22:52:03 +02:00
Szymon Bartanowicz	ba6c8d5dde	fix	2024-05-14 22:39:09 +02:00
Szymon Bartanowicz	8942ab2122	fix	2024-05-14 22:32:48 +02:00
Szymon Bartanowicz	7e735543b9	fix	2024-05-14 22:29:02 +02:00
Szymon Bartanowicz	14fc6d1120	fix	2024-05-14 22:26:47 +02:00
Szymon Bartanowicz	d02162bc7a	fix	2024-05-14 22:11:46 +02:00
Szymon Bartanowicz	c200a8f364	fix	2024-05-14 22:10:14 +02:00
Szymon Bartanowicz	36199513fa	fix	2024-05-14 22:08:33 +02:00
Szymon Bartanowicz	8c6a6c593a	fix	2024-05-14 22:03:36 +02:00
Szymon Bartanowicz	844157b757	fix	2024-05-14 21:55:06 +02:00
Szymon Bartanowicz	199980fdb9	fix	2024-05-14 21:50:46 +02:00
Szymon Bartanowicz	8fd726c5a7	fix	2024-05-14 21:46:15 +02:00
Szymon Bartanowicz	9c578e17b3	fix	2024-05-14 21:41:21 +02:00
Szymon Bartanowicz	40f48555c8	fix	2024-05-14 19:58:32 +02:00
Szymon Bartanowicz	ac790babc3	fix	2024-05-14 19:46:25 +02:00
Szymon Bartanowicz	6ecb5ee56a	missing csv	2024-05-14 19:25:45 +02:00
Szymon Bartanowicz	af6518f064	06-training	2024-05-14 19:21:17 +02:00