Dockerfile
This commit is contained in:
parent
55e68cc2cc
commit
fe38ed7a4c
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"python.terminal.executeInFileDir": true
|
||||||
|
}
|
26
Dockerfile
Normal file
26
Dockerfile
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Nasz obraz będzie dzidziczył z obrazu Ubuntu w wersji latest
|
||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
# Instalujemy niezbędne zależności. Zwróć uwagę na flagę "-y" (assume yes)
|
||||||
|
RUN apt update && apt install -y
|
||||||
|
RUN apt-get install -y python3
|
||||||
|
RUN apt-get install -y unzip
|
||||||
|
RUN apt-get install -y python3-pip
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
|
||||||
|
RUN pip install --user kaggle
|
||||||
|
RUN pip install --user pandas
|
||||||
|
|
||||||
|
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||||
|
COPY ./startscript1.sh ./
|
||||||
|
COPY ./src/task1python.py ./src/task1python.py
|
||||||
|
|
||||||
|
ARG KAGGLE_USERNAME
|
||||||
|
ARG KAGGLE_KEY
|
||||||
|
|
||||||
|
# Domyślne polecenie, które zostanie uruchomione w kontenerze po jego starcie
|
||||||
|
# RUN chmod u+x ./startscript1.sh
|
||||||
|
# RUN chmod u+x ./src/task1python.py
|
6
Jenkinsfile
vendored
6
Jenkinsfile
vendored
@ -1,5 +1,4 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
|
||||||
parameters{
|
parameters{
|
||||||
string(
|
string(
|
||||||
defaultValue: 'mikolaj2',
|
defaultValue: 'mikolaj2',
|
||||||
@ -17,6 +16,9 @@ pipeline {
|
|||||||
name: 'KAGGLE_KEY'
|
name: 'KAGGLE_KEY'
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
agent {
|
||||||
|
dockerfile {additionalBuildArgs "additionalBuildArgs '--build-arg KAGGLE_USERNAME="$KAGGLE_USERNAME" --build-arg KAGGLE_KEY="$KAGGLE_KEY" -t my-image"}
|
||||||
|
}
|
||||||
stages {
|
stages {
|
||||||
stage("Check out from version control") {
|
stage("Check out from version control") {
|
||||||
steps {
|
steps {
|
||||||
@ -26,7 +28,7 @@ pipeline {
|
|||||||
stage("Shell Scripts") {
|
stage("Shell Scripts") {
|
||||||
steps {
|
steps {
|
||||||
sh "chmod u+x ./startscript1.sh"
|
sh "chmod u+x ./startscript1.sh"
|
||||||
sh "KAGGLE_USERNAME=${KAGGLE_USERNAME} KAGGLE_KEY=${env.KAGGLE_KEY} CUTOFF=${CUTOFF} ./startscript1.sh"
|
sh "KAGGLE_USERNAME=${params.KAGGLE_USERNAME} KAGGLE_KEY=${params.KAGGLE_KEY} CUTOFF=${CUTOFF} ./startscript1.sh"
|
||||||
archiveArtifacts 'data.txt'
|
archiveArtifacts 'data.txt'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -78,7 +78,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -95,7 +95,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -104,15 +104,23 @@
|
|||||||
"text": [
|
"text": [
|
||||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||||
"RangeIndex: 29451 entries, 0 to 29450\n",
|
"RangeIndex: 29451 entries, 0 to 29450\n",
|
||||||
"Data columns (total 4 columns):\n",
|
"Data columns (total 12 columns):\n",
|
||||||
" # Column Non-Null Count Dtype \n",
|
" # Column Non-Null Count Dtype \n",
|
||||||
"--- ------ -------------- ----- \n",
|
"--- ------ -------------- ----- \n",
|
||||||
" 0 TARGET(PRICE_IN_LACS) 29451 non-null float64\n",
|
" 0 POSTED_BY 29451 non-null object \n",
|
||||||
" 1 SQUARE_FT 29451 non-null float64\n",
|
" 1 UNDER_CONSTRUCTION 29451 non-null int64 \n",
|
||||||
" 2 BHK_NO. 29451 non-null int64 \n",
|
" 2 RERA 29451 non-null int64 \n",
|
||||||
" 3 RESALE 29451 non-null int64 \n",
|
" 3 BHK_NO. 29451 non-null int64 \n",
|
||||||
"dtypes: float64(2), int64(2)\n",
|
" 4 BHK_OR_RK 29451 non-null object \n",
|
||||||
"memory usage: 920.5 KB\n"
|
" 5 SQUARE_FT 29451 non-null float64\n",
|
||||||
|
" 6 READY_TO_MOVE 29451 non-null int64 \n",
|
||||||
|
" 7 RESALE 29451 non-null int64 \n",
|
||||||
|
" 8 ADDRESS 29451 non-null object \n",
|
||||||
|
" 9 LONGITUDE 29451 non-null float64\n",
|
||||||
|
" 10 LATITUDE 29451 non-null float64\n",
|
||||||
|
" 11 TARGET(PRICE_IN_LACS) 29451 non-null float64\n",
|
||||||
|
"dtypes: float64(4), int64(5), object(3)\n",
|
||||||
|
"memory usage: 2.7+ MB\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -122,7 +130,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -177,7 +185,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -226,7 +234,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -235,7 +243,7 @@
|
|||||||
"<AxesSubplot:>"
|
"<AxesSubplot:>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 11,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
},
|
},
|
||||||
@ -259,7 +267,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -362,7 +370,7 @@
|
|||||||
"max 1.000000 1.000000 20.000000 1.000000"
|
"max 1.000000 1.000000 20.000000 1.000000"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 13,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
63
src/task1python.py
Normal file
63
src/task1python.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# paths
|
||||||
|
filePathTest = "../Participants_Data_HPP/Train.csv"
|
||||||
|
filePathTrain = "../Participants_Data_HPP/Test.csv"
|
||||||
|
|
||||||
|
dataTest = pd.read_csv(filePathTest)
|
||||||
|
dataTrain = pd.read_csv(filePathTrain)
|
||||||
|
|
||||||
|
number_lines = len(dataTest.index)
|
||||||
|
row_size = number_lines // 2
|
||||||
|
|
||||||
|
# start looping through data writing it to a new file for each set
|
||||||
|
# no of csv files with row size
|
||||||
|
k = 2
|
||||||
|
size = row_size
|
||||||
|
|
||||||
|
# split test data to test and dev
|
||||||
|
for i in range(k):
|
||||||
|
df = dataTest[size * i:size * (i + 1)]
|
||||||
|
name = ""
|
||||||
|
if i == 0:
|
||||||
|
name = "Dev"
|
||||||
|
else:
|
||||||
|
name = "Test"
|
||||||
|
df.to_csv(f'../Participants_Data_HPP/' + name + '.csv', index=False)
|
||||||
|
|
||||||
|
#df_1 = pd.read_csv("../Participants_Data_HPP/Dev.csv")
|
||||||
|
|
||||||
|
#df_2 = pd.read_csv("../Participants_Data_HPP/Test.csv")
|
||||||
|
|
||||||
|
#df_2 = pd.read_csv("../Participants_Data_HPP/Train.csv")
|
||||||
|
|
||||||
|
dataPath = '../Participants_Data_HPP/Train.csv'
|
||||||
|
|
||||||
|
#data informations
|
||||||
|
data = pd.read_csv(dataPath)
|
||||||
|
|
||||||
|
description = data.describe(include="all")
|
||||||
|
|
||||||
|
corr = data.corr()
|
||||||
|
|
||||||
|
#select the most significant
|
||||||
|
data = data[['TARGET(PRICE_IN_LACS)', 'SQUARE_FT', 'BHK_NO.', 'RESALE']]
|
||||||
|
#normalize price column and flat area using min max technique
|
||||||
|
columnName1 = 'TARGET(PRICE_IN_LACS)'
|
||||||
|
columnName2 = 'SQUARE_FT'
|
||||||
|
|
||||||
|
column1Min = data[columnName1].min()
|
||||||
|
column1Max = data[columnName1].max()
|
||||||
|
column2Min = data[columnName2].min()
|
||||||
|
column2Max = data[columnName2].max()
|
||||||
|
|
||||||
|
data[columnName1] = (data[columnName1] - column1Min) / (column1Max - column1Min)
|
||||||
|
data[columnName2] = (data[columnName2] - column2Min) / (column2Max - column2Min)
|
||||||
|
|
||||||
|
print(description)
|
||||||
|
|
||||||
|
print(corr)
|
||||||
|
|
||||||
|
print(data.describe(include="all"))
|
||||||
|
|
||||||
|
print(data.head())
|
Loading…
Reference in New Issue
Block a user