This commit is contained in:
Klaudia 2023-05-11 18:27:25 +02:00
parent 055cd16bb9
commit e2bddf43e2
7 changed files with 56 additions and 41 deletions

View File

@ -3,4 +3,6 @@ FROM python:latest
RUN apt-get update && apt-get install -y RUN apt-get update && apt-get install -y
RUN pip install pandas RUN pip install pandas
RUN pip install tensorflow
RUN pip install matplotlib
RUN pip install scikit-learn RUN pip install scikit-learn

18
Jenkinsfile vendored
View File

@ -9,13 +9,13 @@ pipeline {
) )
} }
stages { stages {
stage('clear') { stage('Clear_Before') {
steps { steps {
sh 'rm -rf *' sh 'rm -rf *'
} }
} }
stage('Build') { stage('Clone_and_Build') {
steps { steps {
sh 'git clone https://git.wmi.amu.edu.pl/s444439/ium_z444439' sh 'git clone https://git.wmi.amu.edu.pl/s444439/ium_z444439'
sh 'curl -O https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data' sh 'curl -O https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
@ -38,9 +38,17 @@ pipeline {
sh 'ls -a' sh 'ls -a'
sh 'python ./ium_z444439/create-dataset.py' sh 'python ./ium_z444439/create-dataset.py'
echo 'process finish' echo 'process finish'
archiveArtifacts 'adult_test.csv' archiveArtifacts 'X_test.csv'
archiveArtifacts 'adult_dev.csv' archiveArtifacts 'X_dev.csv'
archiveArtifacts 'adult_train.csv' archiveArtifacts 'X_train.csv'
archiveArtifacts 'Y_test.csv'
archiveArtifacts 'Y_dev.csv'
archiveArtifacts 'Y_train.csv'
}
}
stage('Clear_After') {
steps {
sh 'rm -rf *'
} }
} }
} }

View File

@ -33,9 +33,9 @@ pipeline {
sh 'ls -a' sh 'ls -a'
sh 'python ./ium_z444439/stats.py' sh 'python ./ium_z444439/stats.py'
echo 'process finish' echo 'process finish'
archiveArtifacts 'adult_test_stats.csv' archiveArtifacts 'X_test_stats.csv'
archiveArtifacts 'adult_dev_stats.csv' archiveArtifacts 'X_dev_stats.csv'
archiveArtifacts 'adult_train_stats.csv' archiveArtifacts 'X_train_stats.csv'
} }
} }
stage('Goodbye!') { stage('Goodbye!') {

View File

@ -8,12 +8,15 @@ adults = adults.dropna()
adults = adults.sample(CUTOFF) adults = adults.sample(CUTOFF)
adult_X, adult_Y = adults, adults X = adults.copy()
adult_X_train, adult_X_temp, adult_Y_train, adult_Y_temp = train_test_split(adult_X, adult_Y, test_size=0.3, Y = pd.DataFrame(adults.pop('age'))
random_state=1)
adult_X_dev, adult_X_test, adult_Y_dev, adult_Y_test = train_test_split(adult_X_temp, adult_Y_temp, test_size=0.3,
random_state=1)
adult_X_train.to_csv('adult_train.csv', index=False) X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=1)
adult_X_dev.to_csv('adult_dev.csv', index=False) X_dev, X_test, Y_dev, Y_test = train_test_split(X_temp, Y_temp, test_size=0.3, random_state=1)
adult_X_test.to_csv('adult_test.csv', index=False)
X_train.to_csv('X_train.csv', index=False)
X_dev.to_csv('X_dev.csv', index=False)
X_test.to_csv('X_test.csv', index=False)
Y_test.to_csv('Y_test.csv', index=False)
Y_train.to_csv('Y_train.csv', index=False)
Y_dev.to_csv('Y_dev.csv', index=False)

View File

@ -2,12 +2,8 @@ import os
import urllib.request import urllib.request
from os.path import exists from os.path import exists
import pandas
from keras.layers import Dense
from keras.models import Sequential
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
@ -117,22 +113,6 @@ def train_dev_test(data):
return train_data, dev_data, test_data return train_data, dev_data, test_data
def create_model():
data = pd.read_csv('adult_train.csv')
X = data.copy()
y = data["education-num"]
X_train_encoded = pd.get_dummies(X)
y_train_cat = to_categorical(y)
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train_encoded.shape[1]))
model.add(Dense(17, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_encoded, y_train_cat, epochs=10, batch_size=32, validation_data=(X_train_encoded, y_train_cat))
model.save('model.joblib')
if __name__ == '__main__': if __name__ == '__main__':
download_file() download_file()
csv_file_name = 'adult.csv' csv_file_name = 'adult.csv'
@ -141,4 +121,3 @@ if __name__ == '__main__':
get_statistics(data) get_statistics(data)
normalization(data) normalization(data)
clean(data) clean(data)
create_model()

View File

@ -1,9 +1,8 @@
import pandas import pandas
adult_dev = pandas.read_csv('adult_dev.csv', engine='python', encoding='ISO-8859-1', sep=',') adult_dev = pandas.read_csv('X_dev.csv', engine='python', encoding='ISO-8859-1', sep=',')
adult_train = pandas.read_csv('adult_train.csv', engine='python', encoding='ISO-8859-1', sep=',') adult_train = pandas.read_csv('X_train.csv', engine='python', encoding='ISO-8859-1', sep=',')
adult_test = pandas.read_csv('X_test.csv', engine='python', encoding='ISO-8859-1', sep=',')
adult_test = pandas.read_csv('adult_test.csv', engine='python', encoding='ISO-8859-1', sep=',')
adult_dev.describe(include='all').to_csv('adult_dev_stats.csv', index=True) adult_dev.describe(include='all').to_csv('adult_dev_stats.csv', index=True)
adult_train.describe(include='all').to_csv('adult_train_stats.csv', index=True) adult_train.describe(include='all').to_csv('adult_train_stats.csv', index=True)

24
train.py Normal file
View File

@ -0,0 +1,24 @@
import pandas as pd
import tensorflow
from keras.applications.densenet import layers
train_data_x = pd.read_csv('./X_train.csv')
adults_train = train_data_x.copy()
adults_predict = train_data_x.pop('age')
normalize = layers.Normalization()
normalize.adapt(adults_train)
adult_model = tensorflow.keras.Sequential([
normalize,
layers.Dense(64),
layers.Dense(1)
])
adult_model.compile(
loss=tensorflow.keras.losses.MeanSquaredError(),
optimizer=tensorflow.keras.optimizers.Adam())
adult_model.fit(adults_train, adults_predict, epochs=500)
adult_model.save('model')