changes in common part

This commit is contained in:
Kamila Bobkowska 2020-06-04 07:33:58 +00:00
parent 9e608db67c
commit 76ec19cde0

View File

@ -1,231 +1,232 @@
#Creating training and test set #Creating training and test set
import os import os
import numpy as np import numpy as np
import shutil import shutil
#Feature descriptors #Feature descriptors
import cv2 import cv2
import mahotas import mahotas
#saving data #saving data
import h5py import h5py
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import MinMaxScaler
#classifier #classifier
from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomForestClassifier
#other #other
import random import random
from models.Garbagetruck import GarbageTruck from models.Garbagetruck import GarbageTruck
from models.garbageDump import Dump from models.garbageDump import Dump
#https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset #https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset
def createSets(): def createSets():
rootDir = 'ClassificationGarbage' if not path.exists('ClassificationGarbage/trainSet'):
typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic'] rootDir = 'ClassificationGarbage'
testRatio = 0.2 typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic']
testRatio = 0.2
for cls in typesDir:
os.makedirs(rootDir + '/trainSet' + cls) for cls in typesDir:
os.makedirs(rootDir + '/testSet' + cls) os.makedirs(rootDir + '/trainSet' + cls)
sourceDir = rootDir + cls os.makedirs(rootDir + '/testSet' + cls)
allFileNames = os.listdir(sourceDir) sourceDir = rootDir + cls
np.random.shuffle(allFileNames) allFileNames = os.listdir(sourceDir)
trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))]) np.random.shuffle(allFileNames)
trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()] trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))])
testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()] trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()]
print(cls + ':') testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()]
print('Total images: ', len(allFileNames)) print(cls + ':')
print('Training: ', len(trainingFileNames)) print('Total images: ', len(allFileNames))
print('Testing: ', len(testFileNames)) print('Training: ', len(trainingFileNames))
print('Testing: ', len(testFileNames))
for name in trainingFileNames:
shutil.copy(name, rootDir +'/trainSet' + cls) for name in trainingFileNames:
for name in testFileNames: shutil.copy(name, rootDir +'/trainSet' + cls)
shutil.copy(name, rootDir +'/testSet' + cls) for name in testFileNames:
print("Images copied.") shutil.copy(name, rootDir +'/testSet' + cls)
print("Images copied.")
def processTrainData():
trainTypes = os.listdir('ClassificationGarbage/trainSet') def processTrainData():
trainTypes.sort() trainTypes = os.listdir('ClassificationGarbage/trainSet')
features = [] trainTypes.sort()
types = [] features = []
trainDir = 'ClassificationGarbage/trainSet/' types = []
size = tuple((500, 500)) trainDir = 'ClassificationGarbage/trainSet/'
size = tuple((500, 500))
#process data
#process data
for type in trainTypes:
dir = os.path.join(trainDir, type) for type in trainTypes:
currentType = type dir = os.path.join(trainDir, type)
print("Processing " + type + "...") currentType = type
for imagename in os.listdir(dir): print("Processing " + type + "...")
file = dir + "/" + imagename for imagename in os.listdir(dir):
image = cv2.imread(file) file = dir + "/" + imagename
image = cv2.resize(image, size) image = cv2.imread(file)
#Global features image = cv2.resize(image, size)
huMoments = hu_moments(image) #Global features
hara = haralick(image) huMoments = hu_moments(image)
histo = histogram(image) hara = haralick(image)
allFeatures = np.hstack([histo, hara, huMoments]) histo = histogram(image)
types.append(currentType) allFeatures = np.hstack([histo, hara, huMoments])
features.append(allFeatures) types.append(currentType)
print("Done.") features.append(allFeatures)
print("All processed.") print("Done.")
print("Training...") print("All processed.")
print("Training...")
#save data
#save data
h5Data = 'output/data.h5'
h5Types = 'output/types.h5' h5Data = 'output/data.h5'
h5Types = 'output/types.h5'
targetNames = np.unique(types)
le = LabelEncoder() targetNames = np.unique(types)
target = le.fit_transform(types) le = LabelEncoder()
scaler = MinMaxScaler(feature_range=(0, 1)) target = le.fit_transform(types)
rescaledFeatures = scaler.fit_transform(features) scaler = MinMaxScaler(feature_range=(0, 1))
rescaledFeatures = scaler.fit_transform(features)
fileData = h5py.File(h5Data, 'w')
fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) fileData = h5py.File(h5Data, 'w')
fileTypes = h5py.File(h5Types, 'w') fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
fileTypes.create_dataset('dataset_1', data=np.array(target)) fileTypes = h5py.File(h5Types, 'w')
fileData.close() fileTypes.create_dataset('dataset_1', data=np.array(target))
fileTypes.close() fileData.close()
fileTypes.close()
def processTestData():
trainTypes = os.listdir('ClassificationGarbage/trainSet') def processTestData():
trainTypes.sort() trainTypes = os.listdir('ClassificationGarbage/trainSet')
testDir = 'ClassificationGarbage/testSet/' trainTypes.sort()
size = tuple((500, 500)) testDir = 'ClassificationGarbage/testSet/'
testTypes = [] size = tuple((500, 500))
testFeatures = [] testTypes = []
print("Testing...") testFeatures = []
print("Testing...")
#process data
#process data
for type in trainTypes:
dir = os.path.join(testDir, type) for type in trainTypes:
currentType = type dir = os.path.join(testDir, type)
for imagename in os.listdir(dir): currentType = type
file = dir + "/" + imagename for imagename in os.listdir(dir):
image = cv2.imread(file) file = dir + "/" + imagename
image = cv2.resize(image, size) image = cv2.imread(file)
#Global features image = cv2.resize(image, size)
huMoments = hu_moments(image) #Global features
hara = haralick(image) huMoments = hu_moments(image)
histo = histogram(image) hara = haralick(image)
allFeatures = np.hstack([histo, hara, huMoments]) histo = histogram(image)
testTypes.append(currentType) allFeatures = np.hstack([histo, hara, huMoments])
testFeatures.append(allFeatures) testTypes.append(currentType)
testFeatures.append(allFeatures)
#save data
#save data
h5TestData = 'output/testdata.h5'
h5TestTypes = 'output/testtypes.h5' h5TestData = 'output/testdata.h5'
h5TestTypes = 'output/testtypes.h5'
targetNames = np.unique(testTypes)
le = LabelEncoder() targetNames = np.unique(testTypes)
target = le.fit_transform(testTypes) le = LabelEncoder()
scaler = MinMaxScaler(feature_range=(0, 1)) target = le.fit_transform(testTypes)
rescaledFeatures = scaler.fit_transform(testFeatures) scaler = MinMaxScaler(feature_range=(0, 1))
rescaledFeatures = scaler.fit_transform(testFeatures)
fileTestData = h5py.File(h5TestData, 'w')
fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures)) fileTestData = h5py.File(h5TestData, 'w')
fileTestTypes = h5py.File(h5TestTypes, 'w') fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
fileTestTypes.create_dataset('dataset_1', data=np.array(target)) fileTestTypes = h5py.File(h5TestTypes, 'w')
fileTestData.close() fileTestTypes.create_dataset('dataset_1', data=np.array(target))
fileTestTypes.close() fileTestData.close()
fileTestTypes.close()
def trainAndTest():
h5Data = 'output/data.h5' def trainAndTest():
h5Types = 'output/types.h5' h5Data = 'output/data.h5'
h5TestData = 'output/testdata.h5' h5Types = 'output/types.h5'
h5TestTypes = 'output/testtypes.h5' h5TestData = 'output/testdata.h5'
h5TestTypes = 'output/testtypes.h5'
#import train data
fileData = h5py.File(h5Data, 'r') #import train data
fileTypes = h5py.File(h5Types, 'r') fileData = h5py.File(h5Data, 'r')
features = fileData['dataset_1'] fileTypes = h5py.File(h5Types, 'r')
types = fileTypes['dataset_1'] features = fileData['dataset_1']
allFeatures = np.array(features) types = fileTypes['dataset_1']
allTypes = np.array(types) allFeatures = np.array(features)
fileData.close() allTypes = np.array(types)
fileTypes.close() fileData.close()
fileTypes.close()
# create model
clf = RandomForestClassifier(n_estimators=100, random_state=9) # create model
clf.fit(allFeatures, allTypes) clf = RandomForestClassifier(n_estimators=100, random_state=9)
clf.fit(allFeatures, allTypes)
#import test data
fileTestData = h5py.File(h5TestData, 'r') #import test data
fileTestTypes = h5py.File(h5TestTypes, 'r') fileTestData = h5py.File(h5TestData, 'r')
features = fileTestData['dataset_1'] fileTestTypes = h5py.File(h5TestTypes, 'r')
types = fileTestTypes['dataset_1'] features = fileTestData['dataset_1']
allFeatures = np.array(features) types = fileTestTypes['dataset_1']
allTypes = np.array(types) allFeatures = np.array(features)
fileTestData.close() allTypes = np.array(types)
fileTestTypes.close() fileTestData.close()
fileTestTypes.close()
#Rfc score
print("Random Forest Classifier score:") #Rfc score
print(clf.score(allFeatures, allTypes)) print("Random Forest Classifier score:")
return clf print(clf.score(allFeatures, allTypes))
return clf
#global features
#global features
def hu_moments(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) def hu_moments(image):
moments = cv2.moments(gray) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
huMoments = cv2.HuMoments(moments).flatten() moments = cv2.moments(gray)
return huMoments huMoments = cv2.HuMoments(moments).flatten()
return huMoments
def haralick(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) def haralick(image):
haralick = mahotas.features.haralick(gray).mean(axis=0) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return haralick haralick = mahotas.features.haralick(gray).mean(axis=0)
return haralick
def histogram(image, mask=None):
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) def histogram(image, mask=None):
hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
cv2.normalize(hist, hist) hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
histogram = hist.flatten() cv2.normalize(hist, hist)
return histogram histogram = hist.flatten()
return histogram
#Test one image
def classifyImage(file, clf): #Test one image
size = tuple((500, 500)) def classifyImage(file, clf):
types = os.listdir('ClassificationGarbage/testSet') size = tuple((500, 500))
types.sort() types = os.listdir('ClassificationGarbage/testSet')
image = cv2.imread(file) types.sort()
image = cv2.resize(image, size) image = cv2.imread(file)
#Global features image = cv2.resize(image, size)
huMoments = hu_moments(image) #Global features
hara = haralick(image) huMoments = hu_moments(image)
histo = histogram(image) hara = haralick(image)
allFeatures = np.hstack([histo, hara, huMoments]) histo = histogram(image)
features = allFeatures.reshape(1,-1) allFeatures = np.hstack([histo, hara, huMoments])
prediction = clf.predict(features)[0] features = allFeatures.reshape(1,-1)
return types[prediction] prediction = clf.predict(features)[0]
return types[prediction]
#At the garbage dump
def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump): #At the garbage dump
testDir = 'ClassificationGarbage/testSet' def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump):
testTypes = os.listdir(testDir) testDir = 'ClassificationGarbage/testSet'
testTypes.sort() testTypes = os.listdir(testDir)
noGarbage = [cardboard, glass, metal, paper, plastic] testTypes.sort()
for i in range(len(testTypes)): noGarbage = [cardboard, glass, metal, paper, plastic]
print("Among " + testTypes[i] + ", we found: ") for i in range(len(testTypes)):
for j in range(0, noGarbage[i]): print("Among " + testTypes[i] + ", we found: ")
image = random.choice(os.listdir(testDir + '/' + testTypes[i])) for j in range(0, noGarbage[i]):
file = testDir + '/' + testTypes[i] + "/"+ image image = random.choice(os.listdir(testDir + '/' + testTypes[i]))
prediction = classifyImage(file, clf) file = testDir + '/' + testTypes[i] + "/"+ image
print(prediction) prediction = classifyImage(file, clf)
GT.empty(testTypes[i]) print(prediction)
dump.addGarbage(prediction) GT.empty(testTypes[i])
dump.addGarbage(prediction)