From 6f7fe404788561555520693e9e3f0848f7880a9e Mon Sep 17 00:00:00 2001 From: Kamila Date: Sun, 3 Apr 2022 13:35:05 +0200 Subject: [PATCH] docker task 2 --- data_expl.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/data_expl.py b/data_expl.py index 99609eb..635b1a9 100644 --- a/data_expl.py +++ b/data_expl.py @@ -5,7 +5,7 @@ data = pd.read_csv('./googleplaystore.csv') data.dropna(subset=['Rating', 'Type','Content Rating','Current Ver','Android Ver'], inplace=True) data.reset_index(drop=True, inplace=True) -data.drop(columns=["Size", "Android Ver", "Current Ver", "Last Updated"]) +data = data.drop(columns=["Size", "Android Ver", "Current Ver", "Last Updated"]) # normalizing text to_lowercase = ['App', 'Category', 'Type', 'Content Rating', 'Genres'] @@ -34,14 +34,6 @@ np.random.seed(123) train, validate, test = np.split(data.sample(frac=1, random_state=42), [int(.6*len(data)), int(.8*len(data))]) print(f"Data shape: {data.shape}\nTrain shape: {train.shape}\nTest shape: {test.shape}\nValidation shape:{validate.shape}") -f = open("appstrain.csv", "w") -f.write(str(train)) -f.close() - -f = open("appstest.csv", "w") -f.write(str(test)) -f.close() - -f = open("appsvalidate.csv", "w") -f.write(str(validate)) -f.close() +train.to_csv('appstrain.csv') +test.to_csv('appstest.csv') +validate.to_csv('appsvalidate.csv')