keras
This commit is contained in:
parent
fad654ca64
commit
78645d7188
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
24
Dockerfile
24
Dockerfile
@ -4,12 +4,28 @@ RUN apt update
|
||||
|
||||
RUN apt install -y python3 python3-pip
|
||||
RUN pip3 install kaggle
|
||||
RUN apt install -y unzip
|
||||
RUN pip3 install pandas
|
||||
RUN pip3 install tensorflow
|
||||
RUN pip3 install scikit-learn
|
||||
RUN pip3 install pandas
|
||||
# RUN apt install -y unzip
|
||||
|
||||
RUN mkdir /.kaggle
|
||||
RUN chmod -R 777 /.kaggle
|
||||
|
||||
WORKDIR /app
|
||||
COPY ./avocado-preprocessing.sh ./
|
||||
RUN chmod +x avocado-preprocessing.sh
|
||||
CMD ./avocado-preprocessing.sh
|
||||
|
||||
COPY ./requirements.txt ./
|
||||
COPY ./avocado-preprocessing.py ./
|
||||
|
||||
RUN chmod +x ./requirements.txt
|
||||
RUN chmod +x ./avocado-preprocessing.py
|
||||
|
||||
RUN pip3 install -r ./requirements.txt
|
||||
CMD python3 avocado-preprocessing.py
|
||||
|
||||
# COPY ./avocado-preprocessing.sh ./
|
||||
# RUN chmod +x avocado-preprocessing.sh
|
||||
# CMD ./avocado-preprocessing.sh
|
||||
|
||||
|
||||
|
@ -2,42 +2,34 @@ import kaggle
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn import preprocessing
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.callbacks import EarlyStopping
|
||||
from keras.models import Sequential
|
||||
|
||||
device = 'cpu'
|
||||
|
||||
# kaggle
|
||||
kaggle.api.authenticate()
|
||||
kaggle.api.dataset_download_files('timmate/avocado-prices-2020', path='.', unzip=True)
|
||||
|
||||
# wczytanie danych
|
||||
avocado_with_year = pd.read_csv('avocado-updated-2020.csv')
|
||||
|
||||
# usuniecie redundantnej kolumny 'year' i zamiana wartosci 'type' na 0 lub 1
|
||||
new = ['date', 'average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags', 'type', 'geography']
|
||||
avocado = avocado_with_year[new]
|
||||
avocado.to_csv("avocado.csv", index=False)
|
||||
avocado = pd.read_csv('avocado.csv')
|
||||
avocado['type'] = avocado.type.map(dict(organic=1, conventional=0))
|
||||
|
||||
avocado_train, avocado_validate, avocado_test = np.split(avocado.sample(frac=1), [int(.6*len(avocado)), int(.8*len(avocado))])
|
||||
|
||||
print("Avocado: ".ljust(20), np.size(avocado))
|
||||
print("Avocado (train) : ".ljust(20), np.size(avocado_train))
|
||||
print("Avocado (validate): ".ljust(20), np.size(avocado_validate))
|
||||
print("Avocado (test) ".ljust(20), np.size(avocado_test))
|
||||
|
||||
avocado.describe(include = 'all')
|
||||
avocado_train.describe(include= 'all')
|
||||
avocado_validate.describe(include = 'all')
|
||||
avocado_test.describe(include = 'all')
|
||||
|
||||
avocado.geography.value_counts()
|
||||
avocado_test.geography.value_counts()
|
||||
avocado_train.geography.value_counts()
|
||||
pd.value_counts(avocado['type']).plot.bar()
|
||||
pd.value_counts(avocado_train['type']).plot.bar()
|
||||
pd.value_counts(avocado_test['type']).plot.bar()
|
||||
avocado['average_price'].hist()
|
||||
avocado_train['average_price'].hist()
|
||||
avocado_validate['average_price'].hist()
|
||||
avocado_test['average_price'].hist()
|
||||
# usuniecie wierszy z pustymi wartosciami
|
||||
avocado.isnull().sum()
|
||||
avocado.dropna()
|
||||
|
||||
# preprocessing
|
||||
num_values = avocado.select_dtypes(include='float64').values
|
||||
scaler = preprocessing.MinMaxScaler()
|
||||
x_scaled = scaler.fit_transform(num_values)
|
||||
@ -47,5 +39,66 @@ for col in avocado.columns:
|
||||
if col in num_columns:
|
||||
avocado[col] = avocado_normalized[col]
|
||||
|
||||
avocado.isnull().sum()
|
||||
avocado.dropna()
|
||||
avocado_normalized['type'] = avocado['type']
|
||||
avocado_normalized['geography'] = avocado['geography']
|
||||
|
||||
|
||||
# podział na train/dev/test
|
||||
avocado_train, avocado_validate, avocado_test = np.split(avocado_normalized.sample(frac=1), [int(.6*len(avocado_normalized)), int(.8*len(avocado_normalized))])
|
||||
|
||||
print("Avocado: ".ljust(20), np.size(avocado_normalized))
|
||||
print("Avocado (train) : ".ljust(20), np.size(avocado_train))
|
||||
print("Avocado (validate): ".ljust(20), np.size(avocado_validate))
|
||||
print("Avocado (test) ".ljust(20), np.size(avocado_test))
|
||||
|
||||
# sprawdzenie danych
|
||||
avocado_normalized.describe(include = 'all')
|
||||
avocado_train.describe(include= 'all')
|
||||
avocado_validate.describe(include = 'all')
|
||||
avocado_test.describe(include = 'all')
|
||||
|
||||
avocado_normalized.geography.value_counts()
|
||||
avocado_test.geography.value_counts()
|
||||
avocado_train.geography.value_counts()
|
||||
|
||||
pd.value_counts(avocado_normalized['type']).plot.bar()
|
||||
pd.value_counts(avocado_train['type']).plot.bar()
|
||||
pd.value_counts(avocado_test['type']).plot.bar()
|
||||
|
||||
avocado_normalized['average_price'].hist()
|
||||
avocado_train['average_price'].hist()
|
||||
avocado_validate['average_price'].hist()
|
||||
avocado_test['average_price'].hist()
|
||||
|
||||
|
||||
# print(avocado_train[:10])
|
||||
# print(avocado_test[:10])
|
||||
print(avocado_normalized)
|
||||
|
||||
# podzial na X i y
|
||||
X_train = avocado_train[['average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags']]
|
||||
y_train = avocado_train[['type']]
|
||||
X_test = avocado_test[['average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags']]
|
||||
y_test = avocado_test[['type']]
|
||||
|
||||
print(X_train.shape[1])
|
||||
# keras model
|
||||
model = Sequential()
|
||||
model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu'))
|
||||
model.add(Dense(1,kernel_initializer='normal', activation='sigmoid'))
|
||||
|
||||
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
|
||||
|
||||
# kompilacja
|
||||
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||
|
||||
# model fit
|
||||
model.fit(X_train, y_train, epochs=30, batch_size=10, validation_data=(X_test, y_test))
|
||||
|
||||
# predict
|
||||
predictions = model.predict(X_test)
|
||||
pd.DataFrame(predictions).to_csv('prediction_results.csv')
|
||||
|
||||
# ewaluacja
|
||||
# _, accuracy = model.evaluate(y_test, predictions)
|
||||
# print('Accuracy: %.2f' % (accuracy*100))
|
||||
|
6610
prediction_results.csv
Normal file
6610
prediction_results.csv
Normal file
File diff suppressed because it is too large
Load Diff
58
requirements.txt
Normal file
58
requirements.txt
Normal file
@ -0,0 +1,58 @@
|
||||
appnope==0.1.0
|
||||
attrs==19.1.0
|
||||
backcall==0.1.0
|
||||
bleach==3.1.0
|
||||
click==7.1.2
|
||||
csv-diff==1.0
|
||||
cycler==0.10.0
|
||||
decorator==4.4.0
|
||||
defusedxml==0.6.0
|
||||
dictdiffer==0.8.1
|
||||
entrypoints==0.3
|
||||
ipykernel==5.1.1
|
||||
ipython==7.5.0
|
||||
ipython-genutils==0.2.0
|
||||
ipywidgets==7.4.2
|
||||
jedi==0.13.3
|
||||
jsonschema==3.0.1
|
||||
jupyter==1.0.0
|
||||
jupyter-client==5.2.4
|
||||
jupyter-console==6.0.0
|
||||
jupyter-core==4.4.0
|
||||
kiwisolver==1.1.0
|
||||
matplotlib==3.1.0
|
||||
mistune==0.8.4
|
||||
mpmath==1.1.0
|
||||
nbconvert==5.5.0
|
||||
nbformat==4.4.0
|
||||
nose==1.3.7
|
||||
notebook==5.7.8
|
||||
numpy==1.16.4
|
||||
pandas==0.24.2
|
||||
pandocfilters==1.4.2
|
||||
parso==0.4.0
|
||||
pexpect==4.7.0
|
||||
pickleshare==0.7.5
|
||||
prometheus-client==0.7.0
|
||||
prompt-toolkit==2.0.9
|
||||
ptyprocess==0.6.0
|
||||
pygame==1.9.4
|
||||
Pygments==2.4.2
|
||||
pyparsing==2.4.0
|
||||
pyrsistent==0.15.2
|
||||
python-dateutil==2.8.0
|
||||
pytz==2019.1
|
||||
pyzmq==18.0.1
|
||||
qtconsole==4.5.1
|
||||
scipy==1.3.0
|
||||
Send2Trash==1.5.0
|
||||
six==1.12.0
|
||||
sympy==1.4
|
||||
terminado==0.8.2
|
||||
testpath==0.4.2
|
||||
tornado==6.0.2
|
||||
traitlets==4.3.2
|
||||
wcwidth==0.1.7
|
||||
webencodings==0.5.1
|
||||
widgetsnbextension==3.4.2
|
||||
xlrd==1.2.0
|
Loading…
Reference in New Issue
Block a user