commit be3e041321aad552978fe670bf68c5cc1e44f185 Author: MichaƂ Kaleta Date: Sat Mar 25 12:49:56 2023 +0100 first commit diff --git a/main.py b/main.py new file mode 100644 index 0000000..f4011b1 --- /dev/null +++ b/main.py @@ -0,0 +1,27 @@ +from kaggle.api.kaggle_api_extended import KaggleApi +import zipfile +from sklearn.model_selection import train_test_split +import pandas as pd +pd.set_option('display.max_columns', 100) + +api = KaggleApi() +api.authenticate() +api.dataset_download_files('rkiattisak/sports-car-prices-dataset', path='./data') +with zipfile.ZipFile('./data/sports-car-prices-dataset.zip', 'r') as zip_ref: + zip_ref.extractall('./data') + +cars = pd.read_csv('./data/Sport car price.csv') + +cars.dropna(inplace=True) + +random_seed = 42 +train_data, test_data = train_test_split(cars, test_size=0.2, random_state=random_seed) +train_data, dev_data = train_test_split(train_data, test_size=0.25, random_state=random_seed) + +train_stats = train_data.describe(include='all') +print(f"\nTraining set statistics:\n{train_stats}") +dev_stats = dev_data.describe(include='all') +print(f"\nDevelopment set statistics:\n{dev_stats}") +test_stats = test_data.describe(include='all') +print(f"\nTest set statistics:\n{test_stats}") +