#Creating training and test set import os import numpy as np import shutil #Feature descriptors import cv2 import mahotas #saving data import h5py from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import MinMaxScaler #classifier from sklearn.ensemble import RandomForestClassifier #other import random from models.Garbagetruck import GarbageTruck from models.garbageDump import Dump #https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset def createSets(): rootDir = 'ClassificationGarbage' typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic'] testRatio = 0.2 for cls in typesDir: os.makedirs(rootDir + '/trainSet' + cls) os.makedirs(rootDir + '/testSet' + cls) sourceDir = rootDir + cls allFileNames = os.listdir(sourceDir) np.random.shuffle(allFileNames) trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))]) trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()] testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()] print(cls + ':') print('Total images: ', len(allFileNames)) print('Training: ', len(trainingFileNames)) print('Testing: ', len(testFileNames)) for name in trainingFileNames: shutil.copy(name, rootDir +'/trainSet' + cls) for name in testFileNames: shutil.copy(name, rootDir +'/testSet' + cls) print("Images copied.") def processTrainData(): trainTypes = os.listdir('ClassificationGarbage/trainSet') trainTypes.sort() features = [] types = [] trainDir = 'ClassificationGarbage/trainSet/' size = tuple((500, 500)) #process data for type in trainTypes: dir = os.path.join(trainDir, type) currentType = type print("Processing " + type + "...") for imagename in os.listdir(dir): file = dir + "/" + imagename image = cv2.imread(file) image = cv2.resize(image, size) #Global features huMoments = hu_moments(image) hara = haralick(image) histo = histogram(image) allFeatures = np.hstack([histo, hara, huMoments]) types.append(currentType) features.append(allFeatures) print("Done.") print("All processed.") print("Training...") #save data h5Data = 'output/data.h5' h5Types = 'output/types.h5' targetNames = np.unique(types) le = LabelEncoder() target = le.fit_transform(types) scaler = MinMaxScaler(feature_range=(0, 1)) rescaledFeatures = scaler.fit_transform(features) fileData = h5py.File(h5Data, 'w') fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) fileTypes = h5py.File(h5Types, 'w') fileTypes.create_dataset('dataset_1', data=np.array(target)) fileData.close() fileTypes.close() def processTestData(): trainTypes = os.listdir('ClassificationGarbage/trainSet') trainTypes.sort() testDir = 'ClassificationGarbage/testSet/' size = tuple((500, 500)) testTypes = [] testFeatures = [] print("Testing...") #process data for type in trainTypes: dir = os.path.join(testDir, type) currentType = type for imagename in os.listdir(dir): file = dir + "/" + imagename image = cv2.imread(file) image = cv2.resize(image, size) #Global features huMoments = hu_moments(image) hara = haralick(image) histo = histogram(image) allFeatures = np.hstack([histo, hara, huMoments]) testTypes.append(currentType) testFeatures.append(allFeatures) #save data h5TestData = 'output/testdata.h5' h5TestTypes = 'output/testtypes.h5' targetNames = np.unique(testTypes) le = LabelEncoder() target = le.fit_transform(testTypes) scaler = MinMaxScaler(feature_range=(0, 1)) rescaledFeatures = scaler.fit_transform(testFeatures) fileTestData = h5py.File(h5TestData, 'w') fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) fileTestTypes = h5py.File(h5TestTypes, 'w') fileTestTypes.create_dataset('dataset_1', data=np.array(target)) fileTestData.close() fileTestTypes.close() def trainAndTest(): h5Data = 'output/data.h5' h5Types = 'output/types.h5' h5TestData = 'output/testdata.h5' h5TestTypes = 'output/testtypes.h5' #import train data fileData = h5py.File(h5Data, 'r') fileTypes = h5py.File(h5Types, 'r') features = fileData['dataset_1'] types = fileTypes['dataset_1'] allFeatures = np.array(features) allTypes = np.array(types) fileData.close() fileTypes.close() # create model clf = RandomForestClassifier(n_estimators=100, random_state=9) clf.fit(allFeatures, allTypes) #import test data fileTestData = h5py.File(h5TestData, 'r') fileTestTypes = h5py.File(h5TestTypes, 'r') features = fileTestData['dataset_1'] types = fileTestTypes['dataset_1'] allFeatures = np.array(features) allTypes = np.array(types) fileTestData.close() fileTestTypes.close() #Rfc score print("Random Forest Classifier score:") print(clf.score(allFeatures, allTypes)) return clf #global features def hu_moments(image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) moments = cv2.moments(gray) huMoments = cv2.HuMoments(moments).flatten() return huMoments def haralick(image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) haralick = mahotas.features.haralick(gray).mean(axis=0) return haralick def histogram(image, mask=None): image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) cv2.normalize(hist, hist) histogram = hist.flatten() return histogram #Test one image def classifyImage(file, clf): size = tuple((500, 500)) types = os.listdir('ClassificationGarbage/testSet') types.sort() image = cv2.imread(file) image = cv2.resize(image, size) #Global features huMoments = hu_moments(image) hara = haralick(image) histo = histogram(image) allFeatures = np.hstack([histo, hara, huMoments]) features = allFeatures.reshape(1,-1) prediction = clf.predict(features)[0] return types[prediction] #At the garbage dump def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump): testDir = 'ClassificationGarbage/testSet' testTypes = os.listdir(testDir) testTypes.sort() noGarbage = [cardboard, glass, metal, paper, plastic] for i in range(len(testTypes)): print("Among " + testTypes[i] + ", we found: ") for j in range(0, noGarbage[i]): image = random.choice(os.listdir(testDir + '/' + testTypes[i])) file = testDir + '/' + testTypes[i] + "/"+ image prediction = classifyImage(file, clf) print(prediction) GT.empty(testTypes[i]) dump.addGarbage(prediction)