59 lines
2.0 KiB
Python
59 lines
2.0 KiB
Python
|
# Sebastian Raschka 2014-2020
|
||
|
# mlxtend Machine Learning Library Extensions
|
||
|
#
|
||
|
# A function for loading the open-source Boston Housing dataset.
|
||
|
# Author: Sebastian Raschka <sebastianraschka.com>
|
||
|
#
|
||
|
# License: BSD 3 clause
|
||
|
|
||
|
import numpy as np
|
||
|
import os
|
||
|
|
||
|
this_dir, this_filename = os.path.split(__file__)
|
||
|
DATA_PATH = os.path.join(this_dir, "data", "boston_housing.csv")
|
||
|
|
||
|
|
||
|
def boston_housing_data():
|
||
|
"""Boston Housing dataset.
|
||
|
|
||
|
Source : https://archive.ics.uci.edu/ml/datasets/Housing
|
||
|
Number of samples : 506
|
||
|
|
||
|
Continuous target variable : MEDV
|
||
|
MEDV = Median value of owner-occupied homes in $1000's
|
||
|
|
||
|
Dataset Attributes:
|
||
|
|
||
|
- 1) CRIM per capita crime rate by town
|
||
|
- 2) ZN proportion of residential land zoned for lots over
|
||
|
25,000 sq.ft.
|
||
|
- 3) INDUS proportion of non-retail business acres per town
|
||
|
- 4) CHAS Charles River dummy variable (= 1 if tract bounds
|
||
|
river; 0 otherwise)
|
||
|
- 5) NOX nitric oxides concentration (parts per 10 million)
|
||
|
- 6) RM average number of rooms per dwelling
|
||
|
- 7) AGE proportion of owner-occupied units built prior to 1940
|
||
|
- 8) DIS weighted distances to five Boston employment centres
|
||
|
- 9) RAD index of accessibility to radial highways
|
||
|
- 10) TAX full-value property-tax rate per $10,000
|
||
|
- 11) PTRATIO pupil-teacher ratio by town
|
||
|
- 12) B 1000(Bk - 0.63)^2 where Bk is the prop. of b. by town
|
||
|
- 13) LSTAT % lower status of the population
|
||
|
|
||
|
Returns
|
||
|
--------
|
||
|
X, y : [n_samples, n_features], [n_class_labels]
|
||
|
X is the feature matrix with 506 housing samples as rows
|
||
|
and 13 feature columns.
|
||
|
y is a 1-dimensional array of the continuous target variable MEDV
|
||
|
|
||
|
Examples
|
||
|
-----------
|
||
|
For usage examples, please see
|
||
|
http://rasbt.github.io/mlxtend/user_guide/data/boston_housing_data/
|
||
|
|
||
|
"""
|
||
|
tmp = np.genfromtxt(fname=DATA_PATH, delimiter=',', dtype=float)
|
||
|
X, y = tmp[:, :-1], tmp[:, -1]
|
||
|
return X, y
|