Add performance and accuracy tests for different classifiers
This commit is contained in:
parent
200f7b6319
commit
f6f82463b6
70
classifiers_test.py
Normal file
70
classifiers_test.py
Normal file
@ -0,0 +1,70 @@
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
|
||||
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
|
||||
from sklearn.gaussian_process import GaussianProcessClassifier
|
||||
from sklearn.gaussian_process.kernels import RBF
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
|
||||
|
||||
def invoke_and_measure(func, *args, **kwargs):
|
||||
start_time = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time.time()
|
||||
|
||||
elapsed_time = end_time - start_time
|
||||
return result, elapsed_time
|
||||
|
||||
data = pd.read_csv('starclusters-global-parameters2.dat',skiprows=1 ,delim_whitespace=True, header=None)
|
||||
|
||||
shuffled_data = data.sample(frac=1, random_state=42)
|
||||
n = int(0.8 * len(shuffled_data))
|
||||
|
||||
data_train = shuffled_data[:n]
|
||||
data_test = shuffled_data[n:]
|
||||
|
||||
X_train = data_train.iloc[:, 1:-1].values
|
||||
y_train = data_train.iloc[:, -1].values
|
||||
|
||||
lab = preprocessing.LabelEncoder()
|
||||
y_transformed = lab.fit_transform(y_train)
|
||||
|
||||
X_test = data_test.iloc[:, 1:-1].values
|
||||
|
||||
names = [
|
||||
"Nearest Neighbors",
|
||||
"Decision Tree",
|
||||
"Random Forest",
|
||||
"Neural Net",
|
||||
"AdaBoost",
|
||||
"Naive Bayes",
|
||||
"QDA",
|
||||
"SVC",
|
||||
"Gradient Boosting"
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
KNeighborsClassifier(3),
|
||||
DecisionTreeClassifier(max_depth=5),
|
||||
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
|
||||
MLPClassifier(alpha=1, max_iter=1000),
|
||||
AdaBoostClassifier(),
|
||||
GaussianNB(),
|
||||
QuadraticDiscriminantAnalysis(),
|
||||
SVC(),
|
||||
GradientBoostingClassifier()
|
||||
]
|
||||
|
||||
|
||||
for name, clf in zip(names, classifiers):
|
||||
_, fit_time = invoke_and_measure(clf.fit, X_train, y_transformed)
|
||||
y_pred, pred_time = invoke_and_measure(clf.predict, X_test)
|
||||
accuracy = accuracy_score(lab.transform(data_test.iloc[:, -1].values), y_pred)
|
||||
print(f"{name}: accuracy={accuracy * 100:.2f}% train={fit_time:.5f}s predict={pred_time:.5f}s")
|
Loading…
Reference in New Issue
Block a user