ium_434784/training.py

63 lines
1.5 KiB
Python
Raw Normal View History

2021-04-26 02:14:45 +02:00
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from countries_map import countries
def mapSet(set):
age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2,
"35-54 years": 3, "55-74 years": 4, "75+ years": 5}
sex = {"male": 0, "female": 1}
set["age"].replace(age, inplace=True)
set["sex"].replace(sex, inplace=True)
set["country"].replace({v: k for k, v in countries.items()}, inplace=True)
return set
column_names = ["country", "year", "sex", "age", "suicides_no", "population"]
feature_names = ["country", "year", "sex", "age", "population"]
label_name = column_names[4]
sc = pd.read_csv('who_suicide_statistics.csv')
train, validate, test = np.split(sc.sample(frac=1, random_state=42),
[int(.6*len(sc)), int(.8*len(sc))])
train.dropna(inplace=True)
validate.dropna(inplace=True)
test.dropna(inplace=True)
train_n = mapSet(train)
validate_n = mapSet(validate)
test_n = mapSet(validate)
train_csv = pd.DataFrame.to_csv(train_n, index=False)
train_dataset = tf.data.experimental.make_csv_dataset(
train_csv,
1000,
column_names=column_names,
label_name=label_name,
num_epochs=1)
features, labels = next(iter(train_dataset))
print(features)
plt.scatter(features['year'],
features['age'],
c=labels,
cmap='sex')
plt.xlabel("year")
plt.ylabel("age")
plt.show()
print("Features: {}".format(feature_names))
print("Label: {}".format(label_name))
# print(train)