IUM_05
This commit is contained in:
parent
4cb62c64a9
commit
e575276d81
20
Dockerfile
Normal file
20
Dockerfile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
# Instalacja niezbędnych narzędzi
|
||||||
|
RUN apt-get update && apt-get install -y python3-pip python3-venv unzip coreutils dos2unix
|
||||||
|
|
||||||
|
# Utworzenie i aktywacja wirtualnego środowiska
|
||||||
|
RUN python3 -m venv /opt/venv
|
||||||
|
ENV PATH="/opt/venv/bin:$PATH"
|
||||||
|
|
||||||
|
# Instalacja bibliotek Pythona w wirtualnym środowisku
|
||||||
|
RUN pip install numpy tensorflow scikit-learn kaggle pandas
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Kopiowanie plików konfiguracyjnych i skryptów
|
||||||
|
COPY kaggle.json /root/.kaggle/kaggle.json
|
||||||
|
COPY ./create-dataset.sh ./
|
||||||
|
RUN dos2unix ./create-dataset.sh
|
||||||
|
COPY ./model.py ./
|
||||||
|
COPY ./predict.py ./
|
@ -1,29 +1,35 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
pip install kaggle
|
export PATH=$PATH:/root/.local/bindock
|
||||||
|
|
||||||
kaggle datasets download -d syedanwarafridi/vehicle-sales-data
|
pip install kaggle
|
||||||
|
|
||||||
unzip -o vehicle-sales-data.zip
|
kaggle datasets download -d syedanwarafridi/vehicle-sales-data
|
||||||
|
|
||||||
# Tasowanie
|
unzip -o vehicle-sales-data.zip
|
||||||
shuf car_prices.csv -o car_prices_shuf.csv
|
|
||||||
|
head -n 1 car_prices.csv > car_prices_header.csv
|
||||||
# Podział danych na podzbiory
|
tail -n +2 car_prices.csv | awk -F, '!/,,/' | shuf > car_prices_no_null.csv
|
||||||
total_rows=$(wc -l < car_prices_shuf.csv)
|
|
||||||
test_dev_rows=$(( $1 * 2 ))
|
total_rows=$(wc -l < car_prices_no_null.csv)
|
||||||
|
test_dev_rows=$(( $1 * 2 ))
|
||||||
head -n $1 car_prices_shuf.csv > car_prices_test.csv
|
|
||||||
head -n $test_dev_rows car_prices_shuf.csv | tail -n +$(( $1 + 1 )) > car_prices_dev.csv
|
head -n $1 car_prices_no_null.csv > car_prices_test_temp.csv
|
||||||
tail -n +$(( $test_dev_rows + 1 )) car_prices_shuf.csv > car_prices_train.csv
|
head -n $test_dev_rows car_prices_no_null.csv | tail -n +$(( $1 + 1 )) > car_prices_dev_temp.csv
|
||||||
|
tail -n +$(( $test_dev_rows + 1 )) car_prices_no_null.csv > car_prices_train_temp.csv
|
||||||
test_size=$(wc -l < car_prices_test.csv)
|
|
||||||
dev_size=$(wc -l < car_prices_dev.csv)
|
cat car_prices_header.csv car_prices_test_temp.csv > car_prices_test.csv
|
||||||
train_size=$(wc -l < car_prices_train.csv)
|
cat car_prices_header.csv car_prices_dev_temp.csv > car_prices_dev.csv
|
||||||
echo "Rozmiar zbioru testowego: $test_size"
|
cat car_prices_header.csv car_prices_train_temp.csv > car_prices_train.csv
|
||||||
echo "Rozmiar zbioru deweloperskiego: $dev_size"
|
|
||||||
echo "Rozmiar zbioru treningowego: $train_size"
|
rm car_prices_test_temp.csv car_prices_dev_temp.csv car_prices_train_temp.csv car_prices_no_null.csv car_prices_header.csv
|
||||||
|
|
||||||
# Zapis artefaktów
|
test_size=$(wc -l < car_prices_test.csv)
|
||||||
mkdir -p data
|
dev_size=$(wc -l < car_prices_dev.csv)
|
||||||
mv car_prices.csv car_prices_shuf.csv car_prices_test.csv car_prices_dev.csv car_prices_train.csv data/
|
train_size=$(wc -l < car_prices_train.csv)
|
||||||
|
echo "Rozmiar zbioru testowego: $test_size"
|
||||||
|
echo "Rozmiar zbioru deweloperskiego: $dev_size"
|
||||||
|
echo "Rozmiar zbioru treningowego: $train_size"
|
||||||
|
|
||||||
|
mkdir -p data
|
||||||
|
mv car_prices.csv car_prices_test.csv car_prices_dev.csv car_prices_train.csv data/
|
||||||
|
33
model.py
Normal file
33
model.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from tensorflow.keras import Sequential
|
||||||
|
from tensorflow.keras.layers import Dense
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
|
train_data = pd.read_csv('./data/car_prices_train.csv')
|
||||||
|
|
||||||
|
train_data.dropna(inplace=True)
|
||||||
|
|
||||||
|
y_train = train_data['sellingprice'].astype(np.float32)
|
||||||
|
|
||||||
|
X_train = train_data[['year', 'condition', 'transmission']]
|
||||||
|
|
||||||
|
scaler_x = MinMaxScaler()
|
||||||
|
X_train['condition'] = scaler_x.fit_transform(X_train[['condition']])
|
||||||
|
|
||||||
|
scaler_y = MinMaxScaler()
|
||||||
|
y_train = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
|
||||||
|
|
||||||
|
X_train = pd.get_dummies(X_train, columns=['transmission'])
|
||||||
|
|
||||||
|
model = Sequential([
|
||||||
|
Dense(64, activation='relu'),
|
||||||
|
Dense(32, activation='relu'),
|
||||||
|
Dense(1)
|
||||||
|
])
|
||||||
|
|
||||||
|
model.compile(optimizer='adam', loss='mean_squared_error')
|
||||||
|
|
||||||
|
model.fit(X_train, y_train, epochs=20, batch_size=32)
|
||||||
|
|
||||||
|
model.save('car_prices_predict_model.h5')
|
27
predict.py
Normal file
27
predict.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from tensorflow.keras import Sequential
|
||||||
|
import tensorflow as tf
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
|
test_data = pd.read_csv('./data/car_prices_test.csv')
|
||||||
|
test_data.dropna(inplace=True)
|
||||||
|
|
||||||
|
y_test = test_data['sellingprice'].astype(np.float32)
|
||||||
|
X_test = test_data[['year', 'condition', 'transmission']]
|
||||||
|
|
||||||
|
scaler_y = MinMaxScaler()
|
||||||
|
scaler_y.fit(y_test.values.reshape(-1, 1))
|
||||||
|
|
||||||
|
scaler_X = MinMaxScaler()
|
||||||
|
X_test['condition'] = scaler_X.fit_transform(X_test[['condition']])
|
||||||
|
X_test = pd.get_dummies(X_test, columns=['transmission'])
|
||||||
|
|
||||||
|
model = tf.keras.models.load_model('car_prices_predict_model.h5')
|
||||||
|
|
||||||
|
y_pred_scaled = model.predict(X_test)
|
||||||
|
|
||||||
|
y_pred = scaler_y.inverse_transform(y_pred_scaled)
|
||||||
|
|
||||||
|
y_pred_df = pd.DataFrame(y_pred, columns=['PredictedSellingPrice'])
|
||||||
|
y_pred_df.to_csv('predicted_selling_prices.csv', index=False)
|
Loading…
Reference in New Issue
Block a user