AI2020_Project/garbageDumpSorting.py

233 lines
7.2 KiB
Python
Raw Normal View History

2020-06-04 09:33:58 +02:00
#Creating training and test set
import os
import numpy as np
import shutil
#Feature descriptors
import cv2
import mahotas
#saving data
import h5py
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
#classifier
from sklearn.ensemble import RandomForestClassifier
#other
import random
from models.Garbagetruck import GarbageTruck
from models.garbageDump import Dump
#https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset
def createSets():
if not path.exists('ClassificationGarbage/trainSet'):
rootDir = 'ClassificationGarbage'
typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic']
testRatio = 0.2
for cls in typesDir:
os.makedirs(rootDir + '/trainSet' + cls)
os.makedirs(rootDir + '/testSet' + cls)
sourceDir = rootDir + cls
allFileNames = os.listdir(sourceDir)
np.random.shuffle(allFileNames)
trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))])
trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()]
testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()]
print(cls + ':')
print('Total images: ', len(allFileNames))
print('Training: ', len(trainingFileNames))
print('Testing: ', len(testFileNames))
for name in trainingFileNames:
shutil.copy(name, rootDir +'/trainSet' + cls)
for name in testFileNames:
shutil.copy(name, rootDir +'/testSet' + cls)
print("Images copied.")
def processTrainData():
trainTypes = os.listdir('ClassificationGarbage/trainSet')
trainTypes.sort()
features = []
types = []
trainDir = 'ClassificationGarbage/trainSet/'
size = tuple((500, 500))
#process data
for type in trainTypes:
dir = os.path.join(trainDir, type)
currentType = type
print("Processing " + type + "...")
for imagename in os.listdir(dir):
file = dir + "/" + imagename
image = cv2.imread(file)
image = cv2.resize(image, size)
#Global features
huMoments = hu_moments(image)
hara = haralick(image)
histo = histogram(image)
allFeatures = np.hstack([histo, hara, huMoments])
types.append(currentType)
features.append(allFeatures)
print("Done.")
print("All processed.")
print("Training...")
#save data
h5Data = 'output/data.h5'
h5Types = 'output/types.h5'
targetNames = np.unique(types)
le = LabelEncoder()
target = le.fit_transform(types)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaledFeatures = scaler.fit_transform(features)
fileData = h5py.File(h5Data, 'w')
fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
fileTypes = h5py.File(h5Types, 'w')
fileTypes.create_dataset('dataset_1', data=np.array(target))
fileData.close()
fileTypes.close()
def processTestData():
trainTypes = os.listdir('ClassificationGarbage/trainSet')
trainTypes.sort()
testDir = 'ClassificationGarbage/testSet/'
size = tuple((500, 500))
testTypes = []
testFeatures = []
print("Testing...")
#process data
for type in trainTypes:
dir = os.path.join(testDir, type)
currentType = type
for imagename in os.listdir(dir):
file = dir + "/" + imagename
image = cv2.imread(file)
image = cv2.resize(image, size)
#Global features
huMoments = hu_moments(image)
hara = haralick(image)
histo = histogram(image)
allFeatures = np.hstack([histo, hara, huMoments])
testTypes.append(currentType)
testFeatures.append(allFeatures)
#save data
h5TestData = 'output/testdata.h5'
h5TestTypes = 'output/testtypes.h5'
targetNames = np.unique(testTypes)
le = LabelEncoder()
target = le.fit_transform(testTypes)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaledFeatures = scaler.fit_transform(testFeatures)
fileTestData = h5py.File(h5TestData, 'w')
fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
fileTestTypes = h5py.File(h5TestTypes, 'w')
fileTestTypes.create_dataset('dataset_1', data=np.array(target))
fileTestData.close()
fileTestTypes.close()
def trainAndTest():
h5Data = 'output/data.h5'
h5Types = 'output/types.h5'
h5TestData = 'output/testdata.h5'
h5TestTypes = 'output/testtypes.h5'
#import train data
fileData = h5py.File(h5Data, 'r')
fileTypes = h5py.File(h5Types, 'r')
features = fileData['dataset_1']
types = fileTypes['dataset_1']
allFeatures = np.array(features)
allTypes = np.array(types)
fileData.close()
fileTypes.close()
# create model
clf = RandomForestClassifier(n_estimators=100, random_state=9)
clf.fit(allFeatures, allTypes)
#import test data
fileTestData = h5py.File(h5TestData, 'r')
fileTestTypes = h5py.File(h5TestTypes, 'r')
features = fileTestData['dataset_1']
types = fileTestTypes['dataset_1']
allFeatures = np.array(features)
allTypes = np.array(types)
fileTestData.close()
fileTestTypes.close()
#Rfc score
print("Random Forest Classifier score:")
print(clf.score(allFeatures, allTypes))
return clf
#global features
def hu_moments(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
moments = cv2.moments(gray)
huMoments = cv2.HuMoments(moments).flatten()
return huMoments
def haralick(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
haralick = mahotas.features.haralick(gray).mean(axis=0)
return haralick
def histogram(image, mask=None):
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
cv2.normalize(hist, hist)
histogram = hist.flatten()
return histogram
#Test one image
def classifyImage(file, clf):
size = tuple((500, 500))
types = os.listdir('ClassificationGarbage/testSet')
types.sort()
image = cv2.imread(file)
image = cv2.resize(image, size)
#Global features
huMoments = hu_moments(image)
hara = haralick(image)
histo = histogram(image)
allFeatures = np.hstack([histo, hara, huMoments])
features = allFeatures.reshape(1,-1)
prediction = clf.predict(features)[0]
return types[prediction]
#At the garbage dump
def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump):
testDir = 'ClassificationGarbage/testSet'
testTypes = os.listdir(testDir)
testTypes.sort()
noGarbage = [cardboard, glass, metal, paper, plastic]
for i in range(len(testTypes)):
print("Among " + testTypes[i] + ", we found: ")
for j in range(0, noGarbage[i]):
image = random.choice(os.listdir(testDir + '/' + testTypes[i]))
file = testDir + '/' + testTypes[i] + "/"+ image
prediction = classifyImage(file, clf)
print(prediction)
GT.empty(testTypes[i])
dump.addGarbage(prediction)