IUM_04 - update Jenkinsfile, update Dockerfile, add requirements.txt file
This commit is contained in:
parent
399a1b173d
commit
04acddb289
13
Dockerfile
13
Dockerfile
@ -6,15 +6,24 @@ RUN apt-get update && apt-get install -y \
|
|||||||
python3 \
|
python3 \
|
||||||
python3-pip
|
python3-pip
|
||||||
|
|
||||||
# Install the required Python packages
|
# Copy the requirements.txt file to the working directory
|
||||||
RUN pip3 install numpy pandas kaggle scikit-learn
|
COPY requirements.txt ./
|
||||||
|
|
||||||
|
# Install the required Python packages form requirements.txt
|
||||||
|
RUN pip3 install -r requirements.txt
|
||||||
|
|
||||||
# Set the working directory
|
# Set the working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy scripts to the working directory
|
# Copy scripts to the working directory
|
||||||
|
|
||||||
|
# Python scripts
|
||||||
COPY download_dataset.py ./
|
COPY download_dataset.py ./
|
||||||
COPY get_stats.py ./
|
COPY get_stats.py ./
|
||||||
|
|
||||||
|
# Bash scripts
|
||||||
|
COPY download_dataset.sh ./
|
||||||
|
COPY get_stats.sh ./
|
||||||
|
|
||||||
# Default command
|
# Default command
|
||||||
CMD bash
|
CMD bash
|
22
Jenkinsfile
vendored
22
Jenkinsfile
vendored
@ -26,29 +26,21 @@ pipeline {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stage('Download dataset') {
|
stage('Build Docker image') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
script {
|
||||||
sh "kaggle datasets download -d uciml/breast-cancer-wisconsin-data"
|
docker.build("create-dataset-s464863")
|
||||||
sh "unzip -o breast-cancer-wisconsin-data.zip"
|
|
||||||
sh "mkdir -p datasets"
|
|
||||||
sh "mv data.csv datasets/data.csv"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stage('Preprocess data') {
|
stage('Download dataset and preprocess data') {
|
||||||
agent {
|
|
||||||
dockerfile {
|
|
||||||
filename 'Dockerfile'
|
|
||||||
reuseNode true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
steps {
|
steps {
|
||||||
|
docker.image('create-dataset-s464863').withRun('-e KAGGLE_USERNAME=${params.KAGGLE_USERNAME} -e KAGGLE_KEY=${params.KAGGLE_KEY} -e CUTOFF=${params.CUTOFF}') {
|
||||||
sh "chmod +x ./download_dataset.py"
|
sh "chmod +x ./download_dataset.py"
|
||||||
sh "python3 ./download_dataset.py ${params.CUTOFF}"
|
sh "python3 ./download_dataset.py ${params.CUTOFF}"
|
||||||
archiveArtifacts artifacts: 'datasets/data.csv,datasets/train.csv,datasets/val.csv,datasets/test.csv', onlyIfSuccessful: true
|
archiveArtifacts artifacts: 'datasets/*', onlyIfSuccessful: true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,14 @@
|
|||||||
# Necessary imports
|
# Necessary imports
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import kaggle
|
||||||
import sys
|
import sys
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import MinMaxScaler
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
|
# Download the dataset
|
||||||
|
kaggle.api.authenticate()
|
||||||
|
kaggle.api.dataset_download_files('uciml/breast-cancer-wisconsin-data', path='./datasets', unzip=True)
|
||||||
|
|
||||||
# Load the dataset
|
# Load the dataset
|
||||||
df = pd.read_csv('./datasets/data.csv', index_col='id')
|
df = pd.read_csv('./datasets/data.csv', index_col='id')
|
||||||
|
|
||||||
@ -21,6 +26,9 @@ print(df.isnull().sum())
|
|||||||
# Print the first 5 rows of the dataset
|
# Print the first 5 rows of the dataset
|
||||||
print(df.head())
|
print(df.head())
|
||||||
|
|
||||||
|
# Convert the diagnosis column to binary
|
||||||
|
df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})
|
||||||
|
|
||||||
# Normalize the dataset
|
# Normalize the dataset
|
||||||
scaler = MinMaxScaler()
|
scaler = MinMaxScaler()
|
||||||
df[df.columns[1:]] = scaler.fit_transform(df[df.columns[1:]])
|
df[df.columns[1:]] = scaler.fit_transform(df[df.columns[1:]])
|
||||||
|
BIN
requirements.txt
Normal file
BIN
requirements.txt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user