ium_434804/dvc_prepare_data.py
2021-06-12 18:15:43 +02:00

15 lines
546 B
Python

import numpy as np
import pandas as pd
import wget
from sklearn import preprocessing
url = 'https://git.wmi.amu.edu.pl/s434804/ium_434804/raw/branch/master/country_vaccinations.csv'
wget.download(url, out='country_vaccinations.csv', bar=None)
df = pd.read_csv('country_vaccinations.csv')
# podział danych na train/validate/test (6:2:2) za pomocą biblioteki numpy i pandas
train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])
train.to_csv("train.csv")
validate.to_csv("validate.csv")
test.to_csv("test.csv")