docker task 2
This commit is contained in:
parent
5ee718059f
commit
6f7fe40478
16
data_expl.py
16
data_expl.py
@ -5,7 +5,7 @@ data = pd.read_csv('./googleplaystore.csv')
|
||||
|
||||
data.dropna(subset=['Rating', 'Type','Content Rating','Current Ver','Android Ver'], inplace=True)
|
||||
data.reset_index(drop=True, inplace=True)
|
||||
data.drop(columns=["Size", "Android Ver", "Current Ver", "Last Updated"])
|
||||
data = data.drop(columns=["Size", "Android Ver", "Current Ver", "Last Updated"])
|
||||
|
||||
# normalizing text
|
||||
to_lowercase = ['App', 'Category', 'Type', 'Content Rating', 'Genres']
|
||||
@ -34,14 +34,6 @@ np.random.seed(123)
|
||||
train, validate, test = np.split(data.sample(frac=1, random_state=42), [int(.6*len(data)), int(.8*len(data))])
|
||||
print(f"Data shape: {data.shape}\nTrain shape: {train.shape}\nTest shape: {test.shape}\nValidation shape:{validate.shape}")
|
||||
|
||||
f = open("appstrain.csv", "w")
|
||||
f.write(str(train))
|
||||
f.close()
|
||||
|
||||
f = open("appstest.csv", "w")
|
||||
f.write(str(test))
|
||||
f.close()
|
||||
|
||||
f = open("appsvalidate.csv", "w")
|
||||
f.write(str(validate))
|
||||
f.close()
|
||||
train.to_csv('appstrain.csv')
|
||||
test.to_csv('appstest.csv')
|
||||
validate.to_csv('appsvalidate.csv')
|
||||
|
Loading…
Reference in New Issue
Block a user