234 lines
7.2 KiB
Python
234 lines
7.2 KiB
Python
#Creating training and test set
|
|
import os
|
|
import numpy as np
|
|
import shutil
|
|
#Feature descriptors
|
|
import cv2
|
|
import mahotas
|
|
#saving data
|
|
import h5py
|
|
from sklearn.preprocessing import LabelEncoder
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
#classifier
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
#other
|
|
import random
|
|
from models.Garbagetruck import GarbageTruck
|
|
from models.garbageDump import Dump
|
|
|
|
#https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset
|
|
|
|
from os import path
|
|
|
|
def createSets():
|
|
if not path.exists('ClassificationGarbage/trainSet'):
|
|
rootDir = 'ClassificationGarbage'
|
|
typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic']
|
|
testRatio = 0.2
|
|
|
|
for cls in typesDir:
|
|
os.makedirs(rootDir + '/trainSet' + cls)
|
|
os.makedirs(rootDir + '/testSet' + cls)
|
|
sourceDir = rootDir + cls
|
|
allFileNames = os.listdir(sourceDir)
|
|
np.random.shuffle(allFileNames)
|
|
trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))])
|
|
trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()]
|
|
testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()]
|
|
print(cls + ':')
|
|
print('Total images: ', len(allFileNames))
|
|
print('Training: ', len(trainingFileNames))
|
|
print('Testing: ', len(testFileNames))
|
|
|
|
for name in trainingFileNames:
|
|
shutil.copy(name, rootDir +'/trainSet' + cls)
|
|
for name in testFileNames:
|
|
shutil.copy(name, rootDir +'/testSet' + cls)
|
|
print("Images copied.")
|
|
|
|
|
|
def processTrainData():
|
|
trainTypes = os.listdir('ClassificationGarbage/trainSet')
|
|
trainTypes.sort()
|
|
features = []
|
|
types = []
|
|
trainDir = 'ClassificationGarbage/trainSet/'
|
|
size = tuple((500, 500))
|
|
|
|
#process data
|
|
|
|
for type in trainTypes:
|
|
dir = os.path.join(trainDir, type)
|
|
currentType = type
|
|
print("Processing " + type + "...")
|
|
for imagename in os.listdir(dir):
|
|
file = dir + "/" + imagename
|
|
image = cv2.imread(file)
|
|
image = cv2.resize(image, size)
|
|
#Global features
|
|
huMoments = hu_moments(image)
|
|
hara = haralick(image)
|
|
histo = histogram(image)
|
|
allFeatures = np.hstack([histo, hara, huMoments])
|
|
types.append(currentType)
|
|
features.append(allFeatures)
|
|
print("Done.")
|
|
print("All processed.")
|
|
print("Training...")
|
|
|
|
#save data
|
|
|
|
h5Data = 'output/data.h5'
|
|
h5Types = 'output/types.h5'
|
|
|
|
targetNames = np.unique(types)
|
|
le = LabelEncoder()
|
|
target = le.fit_transform(types)
|
|
scaler = MinMaxScaler(feature_range=(0, 1))
|
|
rescaledFeatures = scaler.fit_transform(features)
|
|
|
|
fileData = h5py.File(h5Data, 'w')
|
|
fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
|
|
fileTypes = h5py.File(h5Types, 'w')
|
|
fileTypes.create_dataset('dataset_1', data=np.array(target))
|
|
fileData.close()
|
|
fileTypes.close()
|
|
|
|
|
|
def processTestData():
|
|
trainTypes = os.listdir('ClassificationGarbage/trainSet')
|
|
trainTypes.sort()
|
|
testDir = 'ClassificationGarbage/testSet/'
|
|
size = tuple((500, 500))
|
|
testTypes = []
|
|
testFeatures = []
|
|
print("Testing...")
|
|
|
|
#process data
|
|
|
|
for type in trainTypes:
|
|
dir = os.path.join(testDir, type)
|
|
currentType = type
|
|
for imagename in os.listdir(dir):
|
|
file = dir + "/" + imagename
|
|
image = cv2.imread(file)
|
|
image = cv2.resize(image, size)
|
|
#Global features
|
|
huMoments = hu_moments(image)
|
|
hara = haralick(image)
|
|
histo = histogram(image)
|
|
allFeatures = np.hstack([histo, hara, huMoments])
|
|
testTypes.append(currentType)
|
|
testFeatures.append(allFeatures)
|
|
|
|
#save data
|
|
|
|
h5TestData = 'output/testdata.h5'
|
|
h5TestTypes = 'output/testtypes.h5'
|
|
|
|
targetNames = np.unique(testTypes)
|
|
le = LabelEncoder()
|
|
target = le.fit_transform(testTypes)
|
|
scaler = MinMaxScaler(feature_range=(0, 1))
|
|
rescaledFeatures = scaler.fit_transform(testFeatures)
|
|
|
|
fileTestData = h5py.File(h5TestData, 'w')
|
|
fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
|
|
fileTestTypes = h5py.File(h5TestTypes, 'w')
|
|
fileTestTypes.create_dataset('dataset_1', data=np.array(target))
|
|
fileTestData.close()
|
|
fileTestTypes.close()
|
|
|
|
|
|
|
|
|
|
def trainAndTest():
|
|
h5Data = 'output/data.h5'
|
|
h5Types = 'output/types.h5'
|
|
h5TestData = 'output/testdata.h5'
|
|
h5TestTypes = 'output/testtypes.h5'
|
|
|
|
#import train data
|
|
fileData = h5py.File(h5Data, 'r')
|
|
fileTypes = h5py.File(h5Types, 'r')
|
|
features = fileData['dataset_1']
|
|
types = fileTypes['dataset_1']
|
|
allFeatures = np.array(features)
|
|
allTypes = np.array(types)
|
|
fileData.close()
|
|
fileTypes.close()
|
|
|
|
# create model
|
|
clf = RandomForestClassifier(n_estimators=100, random_state=9)
|
|
clf.fit(allFeatures, allTypes)
|
|
|
|
#import test data
|
|
fileTestData = h5py.File(h5TestData, 'r')
|
|
fileTestTypes = h5py.File(h5TestTypes, 'r')
|
|
features = fileTestData['dataset_1']
|
|
types = fileTestTypes['dataset_1']
|
|
allFeatures = np.array(features)
|
|
allTypes = np.array(types)
|
|
fileTestData.close()
|
|
fileTestTypes.close()
|
|
|
|
#Rfc score
|
|
print("Random Forest Classifier score:")
|
|
print(clf.score(allFeatures, allTypes))
|
|
return clf
|
|
|
|
|
|
#global features
|
|
|
|
def hu_moments(image):
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
moments = cv2.moments(gray)
|
|
huMoments = cv2.HuMoments(moments).flatten()
|
|
return huMoments
|
|
|
|
def haralick(image):
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
haralick = mahotas.features.haralick(gray).mean(axis=0)
|
|
return haralick
|
|
|
|
def histogram(image, mask=None):
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
|
hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
|
|
cv2.normalize(hist, hist)
|
|
histogram = hist.flatten()
|
|
return histogram
|
|
|
|
|
|
#Test one image
|
|
def classifyImage(file, clf):
|
|
size = tuple((500, 500))
|
|
types = os.listdir('ClassificationGarbage/testSet')
|
|
types.sort()
|
|
image = cv2.imread(file)
|
|
image = cv2.resize(image, size)
|
|
#Global features
|
|
huMoments = hu_moments(image)
|
|
hara = haralick(image)
|
|
histo = histogram(image)
|
|
allFeatures = np.hstack([histo, hara, huMoments])
|
|
features = allFeatures.reshape(1,-1)
|
|
prediction = clf.predict(features)[0]
|
|
return types[prediction]
|
|
|
|
|
|
#At the garbage dump
|
|
def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump):
|
|
testDir = 'ClassificationGarbage/testSet'
|
|
testTypes = os.listdir(testDir)
|
|
testTypes.sort()
|
|
noGarbage = [cardboard, glass, metal, paper, plastic]
|
|
for i in range(len(testTypes)):
|
|
print("Among " + testTypes[i] + ", we found: ")
|
|
for j in range(0, noGarbage[i]):
|
|
image = random.choice(os.listdir(testDir + '/' + testTypes[i]))
|
|
file = testDir + '/' + testTypes[i] + "/"+ image
|
|
prediction = classifyImage(file, clf)
|
|
print(prediction)
|
|
GT.empty(testTypes[i])
|
|
dump.addGarbage(prediction)
|