63 lines
1.5 KiB
Python
63 lines
1.5 KiB
Python
![]() |
import sys
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
import os
|
||
|
import tensorflow as tf
|
||
|
from countries_map import countries
|
||
|
|
||
|
|
||
|
def mapSet(set):
|
||
|
age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2,
|
||
|
"35-54 years": 3, "55-74 years": 4, "75+ years": 5}
|
||
|
sex = {"male": 0, "female": 1}
|
||
|
|
||
|
set["age"].replace(age, inplace=True)
|
||
|
set["sex"].replace(sex, inplace=True)
|
||
|
set["country"].replace({v: k for k, v in countries.items()}, inplace=True)
|
||
|
|
||
|
return set
|
||
|
|
||
|
|
||
|
column_names = ["country", "year", "sex", "age", "suicides_no", "population"]
|
||
|
feature_names = ["country", "year", "sex", "age", "population"]
|
||
|
label_name = column_names[4]
|
||
|
|
||
|
sc = pd.read_csv('who_suicide_statistics.csv')
|
||
|
|
||
|
train, validate, test = np.split(sc.sample(frac=1, random_state=42),
|
||
|
[int(.6*len(sc)), int(.8*len(sc))])
|
||
|
train.dropna(inplace=True)
|
||
|
validate.dropna(inplace=True)
|
||
|
test.dropna(inplace=True)
|
||
|
|
||
|
train_n = mapSet(train)
|
||
|
validate_n = mapSet(validate)
|
||
|
test_n = mapSet(validate)
|
||
|
|
||
|
train_csv = pd.DataFrame.to_csv(train_n, index=False)
|
||
|
|
||
|
train_dataset = tf.data.experimental.make_csv_dataset(
|
||
|
train_csv,
|
||
|
1000,
|
||
|
column_names=column_names,
|
||
|
label_name=label_name,
|
||
|
num_epochs=1)
|
||
|
|
||
|
features, labels = next(iter(train_dataset))
|
||
|
print(features)
|
||
|
|
||
|
plt.scatter(features['year'],
|
||
|
features['age'],
|
||
|
c=labels,
|
||
|
cmap='sex')
|
||
|
|
||
|
plt.xlabel("year")
|
||
|
plt.ylabel("age")
|
||
|
plt.show()
|
||
|
|
||
|
print("Features: {}".format(feature_names))
|
||
|
print("Label: {}".format(label_name))
|
||
|
|
||
|
# print(train)
|