import zipfile import os import pandas as pd from sklearn.model_selection import train_test_split if os.getenv("KAGGLE_KEY") is None or os.getenv("KAGGLE_USERNAME") is None: print("Brak zmiennych środowiskowych KAGGLE_KEY lub KAAGLE_USERNAME") exit() if not os.path.isfile('fifa19.zip'): os.system('kaggle datasets download -d karangadiya/fifa19') with zipfile.ZipFile('fifa19.zip', 'r') as zip_ref: zip_ref.extractall('.') df=pd.read_csv('data.csv') df = df[df["Release Clause"].notna()] df = df[df["Release Clause"].notnull()] if df["Overall"].mean() > 1: df["Overall"]= df["Overall"]/100 df["Release Clause"] = df["Release Clause"].str.replace("€", "") df["Release Clause"] = (df["Release Clause"].replace(r'[KM]+$', '', regex=True).astype(float) * df["Release Clause"].str.extract(r'[\d\.]+([KM]+)', expand=False) .replace(['K','M'], [1000, 1000000]).astype(int)) df.to_csv('data.csv') train, dev = train_test_split(df, train_size=0.6, test_size=0.4, shuffle=True) dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=False) test.to_csv('test.csv') dev.to_csv('dev.csv') train.to_csv('train.csv')