Dockerfile
This commit is contained in:
parent
55e68cc2cc
commit
fe38ed7a4c
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.terminal.executeInFileDir": true
|
||||
}
|
26
Dockerfile
Normal file
26
Dockerfile
Normal file
@ -0,0 +1,26 @@
|
||||
# Nasz obraz będzie dzidziczył z obrazu Ubuntu w wersji latest
|
||||
FROM ubuntu:latest
|
||||
|
||||
# Instalujemy niezbędne zależności. Zwróć uwagę na flagę "-y" (assume yes)
|
||||
RUN apt update && apt install -y
|
||||
RUN apt-get install -y python3
|
||||
RUN apt-get install -y unzip
|
||||
RUN apt-get install -y python3-pip
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
RUN pip install --user kaggle
|
||||
RUN pip install --user pandas
|
||||
|
||||
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||
WORKDIR /app
|
||||
|
||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||
COPY ./startscript1.sh ./
|
||||
COPY ./src/task1python.py ./src/task1python.py
|
||||
|
||||
ARG KAGGLE_USERNAME
|
||||
ARG KAGGLE_KEY
|
||||
|
||||
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
|
||||
# RUN chmod u+x ./startscript1.sh
|
||||
# RUN chmod u+x ./src/task1python.py
|
6
Jenkinsfile
vendored
6
Jenkinsfile
vendored
@ -1,5 +1,4 @@
|
||||
pipeline {
|
||||
agent any
|
||||
parameters{
|
||||
string(
|
||||
defaultValue: 'mikolaj2',
|
||||
@ -17,6 +16,9 @@ pipeline {
|
||||
name: 'KAGGLE_KEY'
|
||||
)
|
||||
}
|
||||
agent {
|
||||
dockerfile {additionalBuildArgs "additionalBuildArgs '--build-arg KAGGLE_USERNAME="$KAGGLE_USERNAME" --build-arg KAGGLE_KEY="$KAGGLE_KEY" -t my-image"}
|
||||
}
|
||||
stages {
|
||||
stage("Check out from version control") {
|
||||
steps {
|
||||
@ -26,7 +28,7 @@ pipeline {
|
||||
stage("Shell Scripts") {
|
||||
steps {
|
||||
sh "chmod u+x ./startscript1.sh"
|
||||
sh "KAGGLE_USERNAME=${KAGGLE_USERNAME} KAGGLE_KEY=${env.KAGGLE_KEY} CUTOFF=${CUTOFF} ./startscript1.sh"
|
||||
sh "KAGGLE_USERNAME=${params.KAGGLE_USERNAME} KAGGLE_KEY=${params.KAGGLE_KEY} CUTOFF=${CUTOFF} ./startscript1.sh"
|
||||
archiveArtifacts 'data.txt'
|
||||
}
|
||||
}
|
||||
|
@ -78,7 +78,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -95,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -104,15 +104,23 @@
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 29451 entries, 0 to 29450\n",
|
||||
"Data columns (total 4 columns):\n",
|
||||
"Data columns (total 12 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 TARGET(PRICE_IN_LACS) 29451 non-null float64\n",
|
||||
" 1 SQUARE_FT 29451 non-null float64\n",
|
||||
" 2 BHK_NO. 29451 non-null int64 \n",
|
||||
" 3 RESALE 29451 non-null int64 \n",
|
||||
"dtypes: float64(2), int64(2)\n",
|
||||
"memory usage: 920.5 KB\n"
|
||||
" 0 POSTED_BY 29451 non-null object \n",
|
||||
" 1 UNDER_CONSTRUCTION 29451 non-null int64 \n",
|
||||
" 2 RERA 29451 non-null int64 \n",
|
||||
" 3 BHK_NO. 29451 non-null int64 \n",
|
||||
" 4 BHK_OR_RK 29451 non-null object \n",
|
||||
" 5 SQUARE_FT 29451 non-null float64\n",
|
||||
" 6 READY_TO_MOVE 29451 non-null int64 \n",
|
||||
" 7 RESALE 29451 non-null int64 \n",
|
||||
" 8 ADDRESS 29451 non-null object \n",
|
||||
" 9 LONGITUDE 29451 non-null float64\n",
|
||||
" 10 LATITUDE 29451 non-null float64\n",
|
||||
" 11 TARGET(PRICE_IN_LACS) 29451 non-null float64\n",
|
||||
"dtypes: float64(4), int64(5), object(3)\n",
|
||||
"memory usage: 2.7+ MB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -122,7 +130,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -177,7 +185,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -226,7 +234,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -235,7 +243,7 @@
|
||||
"<AxesSubplot:>"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@ -259,7 +267,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -362,7 +370,7 @@
|
||||
"max 1.000000 1.000000 20.000000 1.000000"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
63
src/task1python.py
Normal file
63
src/task1python.py
Normal file
@ -0,0 +1,63 @@
|
||||
import pandas as pd
|
||||
|
||||
# paths
|
||||
filePathTest = "../Participants_Data_HPP/Train.csv"
|
||||
filePathTrain = "../Participants_Data_HPP/Test.csv"
|
||||
|
||||
dataTest = pd.read_csv(filePathTest)
|
||||
dataTrain = pd.read_csv(filePathTrain)
|
||||
|
||||
number_lines = len(dataTest.index)
|
||||
row_size = number_lines // 2
|
||||
|
||||
# start looping through data writing it to a new file for each set
|
||||
# no of csv files with row size
|
||||
k = 2
|
||||
size = row_size
|
||||
|
||||
# split test data to test and dev
|
||||
for i in range(k):
|
||||
df = dataTest[size * i:size * (i + 1)]
|
||||
name = ""
|
||||
if i == 0:
|
||||
name = "Dev"
|
||||
else:
|
||||
name = "Test"
|
||||
df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)
|
||||
|
||||
#df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv")
|
||||
|
||||
#df_2 = pd.read_csv("../Participants_Data_HPP/Test.csv")
|
||||
|
||||
#df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv")
|
||||
|
||||
dataPath = '../Participants_Data_HPP/Train.csv'
|
||||
|
||||
#data informations
|
||||
data = pd.read_csv(dataPath)
|
||||
|
||||
description = data.describe(include="all")
|
||||
|
||||
corr = data.corr()
|
||||
|
||||
#select the most significant
|
||||
data = data[['TARGET(PRICE_IN_LACS)', 'SQUARE_FT', 'BHK_NO.', 'RESALE']]
|
||||
#normalize price column and flat area using min max technique
|
||||
columnName1 = 'TARGET(PRICE_IN_LACS)'
|
||||
columnName2 = 'SQUARE_FT'
|
||||
|
||||
column1Min = data[columnName1].min()
|
||||
column1Max = data[columnName1].max()
|
||||
column2Min = data[columnName2].min()
|
||||
column2Max = data[columnName2].max()
|
||||
|
||||
data[columnName1] = (data[columnName1] - column1Min) / (column1Max - column1Min)
|
||||
data[columnName2] = (data[columnName2] - column2Min) / (column2Max - column2Min)
|
||||
|
||||
print(description)
|
||||
|
||||
print(corr)
|
||||
|
||||
print(data.describe(include="all"))
|
||||
|
||||
print(data.head())
|
Loading…
Reference in New Issue
Block a user