From 76ec19cde0147cd3edd52e753bfd21222c52c523 Mon Sep 17 00:00:00 2001 From: Kamila Bobkowska Date: Thu, 4 Jun 2020 07:33:58 +0000 Subject: [PATCH] changes in common part --- garbageDumpSorting.py | 463 +++++++++++++++++++++--------------------- 1 file changed, 232 insertions(+), 231 deletions(-) diff --git a/garbageDumpSorting.py b/garbageDumpSorting.py index 6118604..a3d9251 100644 --- a/garbageDumpSorting.py +++ b/garbageDumpSorting.py @@ -1,231 +1,232 @@ -#Creating training and test set -import os -import numpy as np -import shutil -#Feature descriptors -import cv2 -import mahotas -#saving data -import h5py -from sklearn.preprocessing import LabelEncoder -from sklearn.preprocessing import MinMaxScaler -#classifier -from sklearn.ensemble import RandomForestClassifier -#other -import random -from models.Garbagetruck import GarbageTruck -from models.garbageDump import Dump - -#https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset - - -def createSets(): - rootDir = 'ClassificationGarbage' - typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic'] - testRatio = 0.2 - - for cls in typesDir: - os.makedirs(rootDir + '/trainSet' + cls) - os.makedirs(rootDir + '/testSet' + cls) - sourceDir = rootDir + cls - allFileNames = os.listdir(sourceDir) - np.random.shuffle(allFileNames) - trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))]) - trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()] - testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()] - print(cls + ':') - print('Total images: ', len(allFileNames)) - print('Training: ', len(trainingFileNames)) - print('Testing: ', len(testFileNames)) - - for name in trainingFileNames: - shutil.copy(name, rootDir +'/trainSet' + cls) - for name in testFileNames: - shutil.copy(name, rootDir +'/testSet' + cls) - print("Images copied.") - - -def processTrainData(): - trainTypes = os.listdir('ClassificationGarbage/trainSet') - trainTypes.sort() - features = [] - types = [] - trainDir = 'ClassificationGarbage/trainSet/' - size = tuple((500, 500)) - - #process data - - for type in trainTypes: - dir = os.path.join(trainDir, type) - currentType = type - print("Processing " + type + "...") - for imagename in os.listdir(dir): - file = dir + "/" + imagename - image = cv2.imread(file) - image = cv2.resize(image, size) - #Global features - huMoments = hu_moments(image) - hara = haralick(image) - histo = histogram(image) - allFeatures = np.hstack([histo, hara, huMoments]) - types.append(currentType) - features.append(allFeatures) - print("Done.") - print("All processed.") - print("Training...") - - #save data - - h5Data = 'output/data.h5' - h5Types = 'output/types.h5' - - targetNames = np.unique(types) - le = LabelEncoder() - target = le.fit_transform(types) - scaler = MinMaxScaler(feature_range=(0, 1)) - rescaledFeatures = scaler.fit_transform(features) - - fileData = h5py.File(h5Data, 'w') - fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) - fileTypes = h5py.File(h5Types, 'w') - fileTypes.create_dataset('dataset_1', data=np.array(target)) - fileData.close() - fileTypes.close() - - -def processTestData(): - trainTypes = os.listdir('ClassificationGarbage/trainSet') - trainTypes.sort() - testDir = 'ClassificationGarbage/testSet/' - size = tuple((500, 500)) - testTypes = [] - testFeatures = [] - print("Testing...") - - #process data - - for type in trainTypes: - dir = os.path.join(testDir, type) - currentType = type - for imagename in os.listdir(dir): - file = dir + "/" + imagename - image = cv2.imread(file) - image = cv2.resize(image, size) - #Global features - huMoments = hu_moments(image) - hara = haralick(image) - histo = histogram(image) - allFeatures = np.hstack([histo, hara, huMoments]) - testTypes.append(currentType) - testFeatures.append(allFeatures) - - #save data - - h5TestData = 'output/testdata.h5' - h5TestTypes = 'output/testtypes.h5' - - targetNames = np.unique(testTypes) - le = LabelEncoder() - target = le.fit_transform(testTypes) - scaler = MinMaxScaler(feature_range=(0, 1)) - rescaledFeatures = scaler.fit_transform(testFeatures) - - fileTestData = h5py.File(h5TestData, 'w') - fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) - fileTestTypes = h5py.File(h5TestTypes, 'w') - fileTestTypes.create_dataset('dataset_1', data=np.array(target)) - fileTestData.close() - fileTestTypes.close() - - - - -def trainAndTest(): - h5Data = 'output/data.h5' - h5Types = 'output/types.h5' - h5TestData = 'output/testdata.h5' - h5TestTypes = 'output/testtypes.h5' - - #import train data - fileData = h5py.File(h5Data, 'r') - fileTypes = h5py.File(h5Types, 'r') - features = fileData['dataset_1'] - types = fileTypes['dataset_1'] - allFeatures = np.array(features) - allTypes = np.array(types) - fileData.close() - fileTypes.close() - - # create model - clf = RandomForestClassifier(n_estimators=100, random_state=9) - clf.fit(allFeatures, allTypes) - - #import test data - fileTestData = h5py.File(h5TestData, 'r') - fileTestTypes = h5py.File(h5TestTypes, 'r') - features = fileTestData['dataset_1'] - types = fileTestTypes['dataset_1'] - allFeatures = np.array(features) - allTypes = np.array(types) - fileTestData.close() - fileTestTypes.close() - - #Rfc score - print("Random Forest Classifier score:") - print(clf.score(allFeatures, allTypes)) - return clf - - -#global features - -def hu_moments(image): - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - moments = cv2.moments(gray) - huMoments = cv2.HuMoments(moments).flatten() - return huMoments - -def haralick(image): - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - haralick = mahotas.features.haralick(gray).mean(axis=0) - return haralick - -def histogram(image, mask=None): - image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) - cv2.normalize(hist, hist) - histogram = hist.flatten() - return histogram - - -#Test one image -def classifyImage(file, clf): - size = tuple((500, 500)) - types = os.listdir('ClassificationGarbage/testSet') - types.sort() - image = cv2.imread(file) - image = cv2.resize(image, size) - #Global features - huMoments = hu_moments(image) - hara = haralick(image) - histo = histogram(image) - allFeatures = np.hstack([histo, hara, huMoments]) - features = allFeatures.reshape(1,-1) - prediction = clf.predict(features)[0] - return types[prediction] - - -#At the garbage dump -def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump): - testDir = 'ClassificationGarbage/testSet' - testTypes = os.listdir(testDir) - testTypes.sort() - noGarbage = [cardboard, glass, metal, paper, plastic] - for i in range(len(testTypes)): - print("Among " + testTypes[i] + ", we found: ") - for j in range(0, noGarbage[i]): - image = random.choice(os.listdir(testDir + '/' + testTypes[i])) - file = testDir + '/' + testTypes[i] + "/"+ image - prediction = classifyImage(file, clf) - print(prediction) - GT.empty(testTypes[i]) - dump.addGarbage(prediction) +#Creating training and test set +import os +import numpy as np +import shutil +#Feature descriptors +import cv2 +import mahotas +#saving data +import h5py +from sklearn.preprocessing import LabelEncoder +from sklearn.preprocessing import MinMaxScaler +#classifier +from sklearn.ensemble import RandomForestClassifier +#other +import random +from models.Garbagetruck import GarbageTruck +from models.garbageDump import Dump + +#https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset + + +def createSets(): + if not path.exists('ClassificationGarbage/trainSet'): + rootDir = 'ClassificationGarbage' + typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic'] + testRatio = 0.2 + + for cls in typesDir: + os.makedirs(rootDir + '/trainSet' + cls) + os.makedirs(rootDir + '/testSet' + cls) + sourceDir = rootDir + cls + allFileNames = os.listdir(sourceDir) + np.random.shuffle(allFileNames) + trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))]) + trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()] + testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()] + print(cls + ':') + print('Total images: ', len(allFileNames)) + print('Training: ', len(trainingFileNames)) + print('Testing: ', len(testFileNames)) + + for name in trainingFileNames: + shutil.copy(name, rootDir +'/trainSet' + cls) + for name in testFileNames: + shutil.copy(name, rootDir +'/testSet' + cls) + print("Images copied.") + + +def processTrainData(): + trainTypes = os.listdir('ClassificationGarbage/trainSet') + trainTypes.sort() + features = [] + types = [] + trainDir = 'ClassificationGarbage/trainSet/' + size = tuple((500, 500)) + + #process data + + for type in trainTypes: + dir = os.path.join(trainDir, type) + currentType = type + print("Processing " + type + "...") + for imagename in os.listdir(dir): + file = dir + "/" + imagename + image = cv2.imread(file) + image = cv2.resize(image, size) + #Global features + huMoments = hu_moments(image) + hara = haralick(image) + histo = histogram(image) + allFeatures = np.hstack([histo, hara, huMoments]) + types.append(currentType) + features.append(allFeatures) + print("Done.") + print("All processed.") + print("Training...") + + #save data + + h5Data = 'output/data.h5' + h5Types = 'output/types.h5' + + targetNames = np.unique(types) + le = LabelEncoder() + target = le.fit_transform(types) + scaler = MinMaxScaler(feature_range=(0, 1)) + rescaledFeatures = scaler.fit_transform(features) + + fileData = h5py.File(h5Data, 'w') + fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) + fileTypes = h5py.File(h5Types, 'w') + fileTypes.create_dataset('dataset_1', data=np.array(target)) + fileData.close() + fileTypes.close() + + +def processTestData(): + trainTypes = os.listdir('ClassificationGarbage/trainSet') + trainTypes.sort() + testDir = 'ClassificationGarbage/testSet/' + size = tuple((500, 500)) + testTypes = [] + testFeatures = [] + print("Testing...") + + #process data + + for type in trainTypes: + dir = os.path.join(testDir, type) + currentType = type + for imagename in os.listdir(dir): + file = dir + "/" + imagename + image = cv2.imread(file) + image = cv2.resize(image, size) + #Global features + huMoments = hu_moments(image) + hara = haralick(image) + histo = histogram(image) + allFeatures = np.hstack([histo, hara, huMoments]) + testTypes.append(currentType) + testFeatures.append(allFeatures) + + #save data + + h5TestData = 'output/testdata.h5' + h5TestTypes = 'output/testtypes.h5' + + targetNames = np.unique(testTypes) + le = LabelEncoder() + target = le.fit_transform(testTypes) + scaler = MinMaxScaler(feature_range=(0, 1)) + rescaledFeatures = scaler.fit_transform(testFeatures) + + fileTestData = h5py.File(h5TestData, 'w') + fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) + fileTestTypes = h5py.File(h5TestTypes, 'w') + fileTestTypes.create_dataset('dataset_1', data=np.array(target)) + fileTestData.close() + fileTestTypes.close() + + + + +def trainAndTest(): + h5Data = 'output/data.h5' + h5Types = 'output/types.h5' + h5TestData = 'output/testdata.h5' + h5TestTypes = 'output/testtypes.h5' + + #import train data + fileData = h5py.File(h5Data, 'r') + fileTypes = h5py.File(h5Types, 'r') + features = fileData['dataset_1'] + types = fileTypes['dataset_1'] + allFeatures = np.array(features) + allTypes = np.array(types) + fileData.close() + fileTypes.close() + + # create model + clf = RandomForestClassifier(n_estimators=100, random_state=9) + clf.fit(allFeatures, allTypes) + + #import test data + fileTestData = h5py.File(h5TestData, 'r') + fileTestTypes = h5py.File(h5TestTypes, 'r') + features = fileTestData['dataset_1'] + types = fileTestTypes['dataset_1'] + allFeatures = np.array(features) + allTypes = np.array(types) + fileTestData.close() + fileTestTypes.close() + + #Rfc score + print("Random Forest Classifier score:") + print(clf.score(allFeatures, allTypes)) + return clf + + +#global features + +def hu_moments(image): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + moments = cv2.moments(gray) + huMoments = cv2.HuMoments(moments).flatten() + return huMoments + +def haralick(image): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + haralick = mahotas.features.haralick(gray).mean(axis=0) + return haralick + +def histogram(image, mask=None): + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) + cv2.normalize(hist, hist) + histogram = hist.flatten() + return histogram + + +#Test one image +def classifyImage(file, clf): + size = tuple((500, 500)) + types = os.listdir('ClassificationGarbage/testSet') + types.sort() + image = cv2.imread(file) + image = cv2.resize(image, size) + #Global features + huMoments = hu_moments(image) + hara = haralick(image) + histo = histogram(image) + allFeatures = np.hstack([histo, hara, huMoments]) + features = allFeatures.reshape(1,-1) + prediction = clf.predict(features)[0] + return types[prediction] + + +#At the garbage dump +def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump): + testDir = 'ClassificationGarbage/testSet' + testTypes = os.listdir(testDir) + testTypes.sort() + noGarbage = [cardboard, glass, metal, paper, plastic] + for i in range(len(testTypes)): + print("Among " + testTypes[i] + ", we found: ") + for j in range(0, noGarbage[i]): + image = random.choice(os.listdir(testDir + '/' + testTypes[i])) + file = testDir + '/' + testTypes[i] + "/"+ image + prediction = classifyImage(file, clf) + print(prediction) + GT.empty(testTypes[i]) + dump.addGarbage(prediction)