forked from s464914/ium_464914
new script
This commit is contained in:
parent
c7169904a4
commit
96945632d6
@ -11,4 +11,4 @@ RUN pip install --user geopandas
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY ./get_dataset.sh ./
|
COPY IUM_2.py ./
|
||||||
|
48
IUM_2.py
Normal file
48
IUM_2.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import kaggle
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
def download_file():
|
||||||
|
kaggle.api.authenticate()
|
||||||
|
kaggle.api.dataset_download_files('nasa/meteorite-landings', path='.', unzip=True)
|
||||||
|
|
||||||
|
|
||||||
|
def split(data):
|
||||||
|
meteorite_train, meteorite_test = train_test_split(data, test_size=0.2, random_state=1)
|
||||||
|
meteorite_train, meteorite_val = train_test_split(meteorite_train, test_size=0.25, random_state=1)
|
||||||
|
return meteorite_train, meteorite_test, meteorite_val
|
||||||
|
|
||||||
|
def normalization(data):
|
||||||
|
scaler = StandardScaler()
|
||||||
|
data['mass'] = scaler.fit_transform(data[['mass']])
|
||||||
|
return data
|
||||||
|
|
||||||
|
def preprocessing(data):
|
||||||
|
data = data.dropna(subset=['reclat'])
|
||||||
|
|
||||||
|
incorrect_years_index = data.loc[(data['year'] > 2016) | (data['year'] < 860)].index
|
||||||
|
incorrect_location_index = data.loc[(data['reclat'] == 0) & (data['reclong'] == 0)].index
|
||||||
|
|
||||||
|
data.drop(incorrect_years_index.union(incorrect_location_index), inplace=True)
|
||||||
|
data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana')), 'mass'] = 0
|
||||||
|
return data
|
||||||
|
|
||||||
|
download_file()
|
||||||
|
data = pd.read_csv("meteorite-landings.csv")
|
||||||
|
meteorite_train, meteorite_test, meteorite_val = split(data)
|
||||||
|
|
||||||
|
meteorite_train = normalization(meteorite_train)
|
||||||
|
meteorite_test = normalization(meteorite_test)
|
||||||
|
meteorite_val = normalization(meteorite_val)
|
||||||
|
|
||||||
|
meteorite_train = normalization(meteorite_train)
|
||||||
|
meteorite_test = normalization(meteorite_test)
|
||||||
|
meteorite_val = normalization(meteorite_val)
|
||||||
|
|
||||||
|
meteorite_train.to_csv('meteorite_train.csv', encoding='utf-8')
|
||||||
|
meteorite_test.to_csv('meteorite_test.csv', encoding='utf-8')
|
||||||
|
meteorite_val.to_csv('meteorite_val.csv', encoding='utf-8')
|
||||||
|
|
||||||
|
|
8
Jenkinsfile
vendored
8
Jenkinsfile
vendored
@ -6,6 +6,11 @@ pipeline {
|
|||||||
string(name: 'CUTOFF', defaultValue: '100', description: 'cut off number')
|
string(name: 'CUTOFF', defaultValue: '100', description: 'cut off number')
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
|
stage('Git Checkout') {
|
||||||
|
steps {
|
||||||
|
checkout scm
|
||||||
|
}
|
||||||
|
}
|
||||||
stage('Build') {
|
stage('Build') {
|
||||||
steps {
|
steps {
|
||||||
script {
|
script {
|
||||||
@ -13,9 +18,8 @@ pipeline {
|
|||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
||||||
def customImage = docker.build("custom-image")
|
def customImage = docker.build("custom-image")
|
||||||
customImage.inside {
|
customImage.inside {
|
||||||
checkout scm
|
|
||||||
sh 'bash ./get_dataset.sh $CUTOFF'
|
sh 'bash ./get_dataset.sh $CUTOFF'
|
||||||
archiveArtifacts artifacts: 'artifacts/*', onlyIfSuccessful: true
|
archiveArtifacts artifacts: 'meteorite-landings.csv meteorite_train.csv meteorite_test.csv meteorite_val.csv', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user