Compare commits

...

2 Commits

Author SHA1 Message Date
Alicja Szulecka
96945632d6 new script 2024-04-02 19:05:02 +02:00
Alicja Szulecka
c7169904a4 Update Jenkinsfile 2024-04-02 16:33:57 +02:00
4 changed files with 50 additions and 2 deletions

View File

@ -11,4 +11,4 @@ RUN pip install --user geopandas
WORKDIR /app
COPY ./get_dataset.sh ./
COPY IUM_2.py ./

48
IUM_2.py Normal file
View File

@ -0,0 +1,48 @@
import matplotlib.pyplot as plt
import pandas as pd
import kaggle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def download_file():
kaggle.api.authenticate()
kaggle.api.dataset_download_files('nasa/meteorite-landings', path='.', unzip=True)
def split(data):
meteorite_train, meteorite_test = train_test_split(data, test_size=0.2, random_state=1)
meteorite_train, meteorite_val = train_test_split(meteorite_train, test_size=0.25, random_state=1)
return meteorite_train, meteorite_test, meteorite_val
def normalization(data):
scaler = StandardScaler()
data['mass'] = scaler.fit_transform(data[['mass']])
return data
def preprocessing(data):
data = data.dropna(subset=['reclat'])
incorrect_years_index = data.loc[(data['year'] > 2016) | (data['year'] < 860)].index
incorrect_location_index = data.loc[(data['reclat'] == 0) & (data['reclong'] == 0)].index
data.drop(incorrect_years_index.union(incorrect_location_index), inplace=True)
data.loc[(data['mass'].isnull()) & (data['name'].str.startswith('Österplana')), 'mass'] = 0
return data
download_file()
data = pd.read_csv("meteorite-landings.csv")
meteorite_train, meteorite_test, meteorite_val = split(data)
meteorite_train = normalization(meteorite_train)
meteorite_test = normalization(meteorite_test)
meteorite_val = normalization(meteorite_val)
meteorite_train = normalization(meteorite_train)
meteorite_test = normalization(meteorite_test)
meteorite_val = normalization(meteorite_val)
meteorite_train.to_csv('meteorite_train.csv', encoding='utf-8')
meteorite_test.to_csv('meteorite_test.csv', encoding='utf-8')
meteorite_val.to_csv('meteorite_val.csv', encoding='utf-8')

2
Jenkinsfile vendored
View File

@ -19,7 +19,7 @@ pipeline {
def customImage = docker.build("custom-image")
customImage.inside {
sh 'bash ./get_dataset.sh $CUTOFF'
archiveArtifacts artifacts: 'artifacts/*', onlyIfSuccessful: true
archiveArtifacts artifacts: 'meteorite-landings.csv meteorite_train.csv meteorite_test.csv meteorite_val.csv', onlyIfSuccessful: true
}
}
}

Binary file not shown.