Added Data python script
This commit is contained in:
parent
10a39edee8
commit
171fd416dc
44
preprocesing_python.py
Normal file
44
preprocesing_python.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import sys
|
||||||
|
import kaggle
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
sc = pd.read_csv('who_suicide_statistics.csv')
|
||||||
|
sc
|
||||||
|
|
||||||
|
|
||||||
|
train, validate, test = np.split(sc.sample(frac=1, random_state=42),
|
||||||
|
[int(.6*len(sc)), int(.8*len(sc))])
|
||||||
|
|
||||||
|
print("Train set: ", train.size)
|
||||||
|
print("Validate set: ", validate.size)
|
||||||
|
print("Test set: ", test.size)
|
||||||
|
print(train.describe(include='all'))
|
||||||
|
print(train.country.value_counts())
|
||||||
|
|
||||||
|
print(validate.describe(include='all'))
|
||||||
|
print(validate.country.value_counts())
|
||||||
|
|
||||||
|
print(test.describe(include='all'))
|
||||||
|
print(test.country.value_counts())
|
||||||
|
|
||||||
|
pd.value_counts(train['country']).plot.bar()
|
||||||
|
pd.value_counts(validate['country']).plot.bar()
|
||||||
|
pd.value_counts(test['country']).plot.bar()
|
||||||
|
|
||||||
|
test['age'] = test['age'].map(lambda x: x.rstrip('years'))
|
||||||
|
train['age'] = train['age'].map(lambda x: x.rstrip('years'))
|
||||||
|
validate['age'] = validate['age'].map(lambda x: x.rstrip('years'))
|
||||||
|
|
||||||
|
print(train.isnull().sum())
|
||||||
|
print(validate.isnull().sum())
|
||||||
|
print(test.isnull().sum())
|
||||||
|
|
||||||
|
train.dropna(inplace=True)
|
||||||
|
validate.dropna(inplace=True)
|
||||||
|
test.dropna(inplace=True)
|
||||||
|
|
||||||
|
print(train)
|
||||||
|
print(validate)
|
||||||
|
print(test)
|
Loading…
Reference in New Issue
Block a user