This commit is contained in:
Alicja Szulecka 2024-04-02 19:36:23 +02:00
parent dcd281495b
commit 00367909c2
2 changed files with 10 additions and 7 deletions

View File

@ -1,13 +1,7 @@
import pandas as pd import pandas as pd
import kaggle
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
def download_file():
kaggle.api.authenticate()
kaggle.api.dataset_download_files('nasa/meteorite-landings', path='.', unzip=True)
def split(data): def split(data):
meteorite_train, meteorite_test = train_test_split(data, test_size=0.2, random_state=1) meteorite_train, meteorite_test = train_test_split(data, test_size=0.2, random_state=1)
meteorite_train, meteorite_val = train_test_split(meteorite_train, test_size=0.25, random_state=1) meteorite_train, meteorite_val = train_test_split(meteorite_train, test_size=0.25, random_state=1)
@ -28,7 +22,6 @@ def preprocessing(data):
data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana')), 'mass'] = 0 data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana')), 'mass'] = 0
return data return data
download_file()
data = pd.read_csv("meteorite-landings.csv") data = pd.read_csv("meteorite-landings.csv")
meteorite_train, meteorite_test, meteorite_val = split(data) meteorite_train, meteorite_test, meteorite_val = split(data)

10
Jenkinsfile vendored
View File

@ -10,6 +10,16 @@ pipeline {
steps { steps {
checkout scm checkout scm
} }
}
stage('Download dataset') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'pip install kaggle'
sh 'kaggle datasets download -d nasa/meteorite-landings'
sh 'unzip -o meteorite-landings.zip'
sh 'rm meteorite-landings.zip'
}
}
} }
stage('Build') { stage('Build') {
steps { steps {