changes in common part

2020-06-04 07:33:58 +00:00 · 2020-06-04 07:33:58 +00:00 · 76ec19cde0
commit 76ec19cde0
parent 9e608db67c
1 changed files with 232 additions and 231 deletions
--- a/garbageDumpSorting.py
+++ b/garbageDumpSorting.py
@ -1,231 +1,232 @@
-#Creating training and test set
-import os
-import numpy as np
-import shutil
-#Feature descriptors
-import cv2
-import mahotas
-#saving data
-import h5py
-from sklearn.preprocessing import LabelEncoder
-from sklearn.preprocessing import MinMaxScaler
-#classifier
-from sklearn.ensemble import RandomForestClassifier
-#other
-import random
-from models.Garbagetruck import GarbageTruck
-from models.garbageDump import Dump
-
-#https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset
-
-
-def createSets():
-    rootDir = 'ClassificationGarbage'
-    typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic']
-    testRatio = 0.2
-
-    for cls in typesDir:
-        os.makedirs(rootDir + '/trainSet' + cls)
-        os.makedirs(rootDir + '/testSet' + cls)
-        sourceDir = rootDir + cls
-        allFileNames = os.listdir(sourceDir)
-        np.random.shuffle(allFileNames)
-        trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))])
-        trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()]
-        testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()]
-        print(cls + ':')
-        print('Total images: ', len(allFileNames))
-        print('Training: ', len(trainingFileNames))
-        print('Testing: ', len(testFileNames))
-
-        for name in trainingFileNames:
-            shutil.copy(name, rootDir +'/trainSet' + cls)
-        for name in testFileNames:
-            shutil.copy(name, rootDir +'/testSet' + cls)
-        print("Images copied.")
-
-
-def processTrainData():
-    trainTypes = os.listdir('ClassificationGarbage/trainSet')
-    trainTypes.sort()
-    features = []
-    types = []
-    trainDir = 'ClassificationGarbage/trainSet/'
-    size = tuple((500, 500))
-
-    #process data
-
-    for type in trainTypes:
-        dir = os.path.join(trainDir, type)
-        currentType = type
-        print("Processing " + type + "...")
-        for imagename in os.listdir(dir):
-            file = dir + "/" + imagename
-            image = cv2.imread(file)
-            image = cv2.resize(image, size)
-            #Global features
-            huMoments = hu_moments(image)
-            hara = haralick(image)
-            histo = histogram(image)
-            allFeatures = np.hstack([histo, hara, huMoments])
-            types.append(currentType)
-            features.append(allFeatures)
-        print("Done.")
-    print("All processed.")
-    print("Training...")
-
-    #save data
-
-    h5Data = 'output/data.h5'
-    h5Types = 'output/types.h5'
-
-    targetNames = np.unique(types)
-    le = LabelEncoder()
-    target = le.fit_transform(types)
-    scaler = MinMaxScaler(feature_range=(0, 1))
-    rescaledFeatures = scaler.fit_transform(features)
-
-    fileData = h5py.File(h5Data, 'w')
-    fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
-    fileTypes = h5py.File(h5Types, 'w')
-    fileTypes.create_dataset('dataset_1', data=np.array(target))
-    fileData.close()
-    fileTypes.close()
-
-
-def processTestData():
-    trainTypes = os.listdir('ClassificationGarbage/trainSet')
-    trainTypes.sort()
-    testDir = 'ClassificationGarbage/testSet/'
-    size = tuple((500, 500))
-    testTypes = []
-    testFeatures = []
-    print("Testing...")
-
-    #process data
-
-    for type in trainTypes:
-        dir = os.path.join(testDir, type)
-        currentType = type
-        for imagename in os.listdir(dir):
-            file = dir + "/" + imagename
-            image = cv2.imread(file)
-            image = cv2.resize(image, size)
-            #Global features
-            huMoments = hu_moments(image)
-            hara = haralick(image)
-            histo = histogram(image)
-            allFeatures = np.hstack([histo, hara, huMoments])
-            testTypes.append(currentType)
-            testFeatures.append(allFeatures)
-
-    #save data
-
-    h5TestData = 'output/testdata.h5'
-    h5TestTypes = 'output/testtypes.h5'
-
-    targetNames = np.unique(testTypes)
-    le = LabelEncoder()
-    target = le.fit_transform(testTypes)
-    scaler = MinMaxScaler(feature_range=(0, 1))
-    rescaledFeatures = scaler.fit_transform(testFeatures)
-
-    fileTestData = h5py.File(h5TestData, 'w')
-    fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
-    fileTestTypes = h5py.File(h5TestTypes, 'w')
-    fileTestTypes.create_dataset('dataset_1', data=np.array(target))
-    fileTestData.close()
-    fileTestTypes.close()
-
-
-
-
-def trainAndTest():
-    h5Data = 'output/data.h5'
-    h5Types = 'output/types.h5'
-    h5TestData = 'output/testdata.h5'
-    h5TestTypes = 'output/testtypes.h5'
-
-    #import train data
-    fileData = h5py.File(h5Data, 'r')
-    fileTypes = h5py.File(h5Types, 'r')
-    features = fileData['dataset_1']
-    types = fileTypes['dataset_1']
-    allFeatures = np.array(features)
-    allTypes = np.array(types)
-    fileData.close()
-    fileTypes.close()
-
-    # create model
-    clf  = RandomForestClassifier(n_estimators=100, random_state=9)
-    clf.fit(allFeatures, allTypes)
-
-    #import test data
-    fileTestData = h5py.File(h5TestData, 'r')
-    fileTestTypes = h5py.File(h5TestTypes, 'r')
-    features = fileTestData['dataset_1']
-    types = fileTestTypes['dataset_1']
-    allFeatures = np.array(features)
-    allTypes = np.array(types)
-    fileTestData.close()
-    fileTestTypes.close()
-
-    #Rfc score
-    print("Random Forest Classifier score:")
-    print(clf.score(allFeatures, allTypes))
-    return clf
-
-
-#global features
-
-def hu_moments(image):
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    moments = cv2.moments(gray)
-    huMoments = cv2.HuMoments(moments).flatten()
-    return huMoments
-
-def haralick(image):
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    haralick = mahotas.features.haralick(gray).mean(axis=0)
-    return haralick
-
-def histogram(image, mask=None):
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
-    hist  = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
-    cv2.normalize(hist, hist)
-    histogram = hist.flatten()
-    return histogram
-
-
-#Test one image
-def classifyImage(file, clf):
-    size = tuple((500, 500))
-    types = os.listdir('ClassificationGarbage/testSet')
-    types.sort()
-    image = cv2.imread(file)
-    image = cv2.resize(image, size)
-    #Global features
-    huMoments = hu_moments(image)
-    hara   = haralick(image)
-    histo  = histogram(image)
-    allFeatures = np.hstack([histo, hara, huMoments])
-    features = allFeatures.reshape(1,-1)
-    prediction = clf.predict(features)[0]
-    return types[prediction]
-
-
-#At the garbage dump
-def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump):
-    testDir = 'ClassificationGarbage/testSet'
-    testTypes = os.listdir(testDir)
-    testTypes.sort()
-    noGarbage = [cardboard, glass, metal, paper, plastic]
-    for i in range(len(testTypes)):
-        print("Among " + testTypes[i] + ", we found: ")
-        for j in range(0, noGarbage[i]):
-            image = random.choice(os.listdir(testDir + '/' + testTypes[i]))
-            file = testDir + '/' + testTypes[i] + "/"+ image
-            prediction = classifyImage(file, clf)
-            print(prediction)
-            GT.empty(testTypes[i])
-            dump.addGarbage(prediction)
+#Creating training and test set
+import os
+import numpy as np
+import shutil
+#Feature descriptors
+import cv2
+import mahotas
+#saving data
+import h5py
+from sklearn.preprocessing import LabelEncoder
+from sklearn.preprocessing import MinMaxScaler
+#classifier
+from sklearn.ensemble import RandomForestClassifier
+#other
+import random
+from models.Garbagetruck import GarbageTruck
+from models.garbageDump import Dump
+
+#https://www.kaggle.com/asdasdasasdas/garbage-classification - dataset
+
+
+def createSets():
+    if not path.exists('ClassificationGarbage/trainSet'):
+        rootDir = 'ClassificationGarbage'
+        typesDir = ['/cardboard', '/glass', '/metal', '/paper', '/plastic']
+        testRatio = 0.2
+
+        for cls in typesDir:
+            os.makedirs(rootDir + '/trainSet' + cls)
+            os.makedirs(rootDir + '/testSet' + cls)
+            sourceDir = rootDir + cls
+            allFileNames = os.listdir(sourceDir)
+            np.random.shuffle(allFileNames)
+            trainingFileNames, testFileNames = np.split(np.array(allFileNames), [int(len(allFileNames) * (1 - testRatio))])
+            trainingFileNames = [sourceDir +'/' + name for name in trainingFileNames.tolist()]
+            testFileNames = [sourceDir +'/' + name for name in testFileNames.tolist()]
+            print(cls + ':')
+            print('Total images: ', len(allFileNames))
+            print('Training: ', len(trainingFileNames))
+            print('Testing: ', len(testFileNames))
+
+            for name in trainingFileNames:
+                shutil.copy(name, rootDir +'/trainSet' + cls)
+            for name in testFileNames:
+                shutil.copy(name, rootDir +'/testSet' + cls)
+            print("Images copied.")
+
+
+def processTrainData():
+    trainTypes = os.listdir('ClassificationGarbage/trainSet')
+    trainTypes.sort()
+    features = []
+    types = []
+    trainDir = 'ClassificationGarbage/trainSet/'
+    size = tuple((500, 500))
+
+    #process data
+
+    for type in trainTypes:
+        dir = os.path.join(trainDir, type)
+        currentType = type
+        print("Processing " + type + "...")
+        for imagename in os.listdir(dir):
+            file = dir + "/" + imagename
+            image = cv2.imread(file)
+            image = cv2.resize(image, size)
+            #Global features
+            huMoments = hu_moments(image)
+            hara = haralick(image)
+            histo = histogram(image)
+            allFeatures = np.hstack([histo, hara, huMoments])
+            types.append(currentType)
+            features.append(allFeatures)
+        print("Done.")
+    print("All processed.")
+    print("Training...")
+
+    #save data
+
+    h5Data = 'output/data.h5'
+    h5Types = 'output/types.h5'
+
+    targetNames = np.unique(types)
+    le = LabelEncoder()
+    target = le.fit_transform(types)
+    scaler = MinMaxScaler(feature_range=(0, 1))
+    rescaledFeatures = scaler.fit_transform(features)
+
+    fileData = h5py.File(h5Data, 'w')
+    fileData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
+    fileTypes = h5py.File(h5Types, 'w')
+    fileTypes.create_dataset('dataset_1', data=np.array(target))
+    fileData.close()
+    fileTypes.close()
+
+
+def processTestData():
+    trainTypes = os.listdir('ClassificationGarbage/trainSet')
+    trainTypes.sort()
+    testDir = 'ClassificationGarbage/testSet/'
+    size = tuple((500, 500))
+    testTypes = []
+    testFeatures = []
+    print("Testing...")
+
+    #process data
+
+    for type in trainTypes:
+        dir = os.path.join(testDir, type)
+        currentType = type
+        for imagename in os.listdir(dir):
+            file = dir + "/" + imagename
+            image = cv2.imread(file)
+            image = cv2.resize(image, size)
+            #Global features
+            huMoments = hu_moments(image)
+            hara = haralick(image)
+            histo = histogram(image)
+            allFeatures = np.hstack([histo, hara, huMoments])
+            testTypes.append(currentType)
+            testFeatures.append(allFeatures)
+
+    #save data
+
+    h5TestData = 'output/testdata.h5'
+    h5TestTypes = 'output/testtypes.h5'
+
+    targetNames = np.unique(testTypes)
+    le = LabelEncoder()
+    target = le.fit_transform(testTypes)
+    scaler = MinMaxScaler(feature_range=(0, 1))
+    rescaledFeatures = scaler.fit_transform(testFeatures)
+
+    fileTestData = h5py.File(h5TestData, 'w')
+    fileTestData.create_dataset('dataset_1', data=np.array(rescaledFeatures))
+    fileTestTypes = h5py.File(h5TestTypes, 'w')
+    fileTestTypes.create_dataset('dataset_1', data=np.array(target))
+    fileTestData.close()
+    fileTestTypes.close()
+
+
+
+
+def trainAndTest():
+    h5Data = 'output/data.h5'
+    h5Types = 'output/types.h5'
+    h5TestData = 'output/testdata.h5'
+    h5TestTypes = 'output/testtypes.h5'
+
+    #import train data
+    fileData = h5py.File(h5Data, 'r')
+    fileTypes = h5py.File(h5Types, 'r')
+    features = fileData['dataset_1']
+    types = fileTypes['dataset_1']
+    allFeatures = np.array(features)
+    allTypes = np.array(types)
+    fileData.close()
+    fileTypes.close()
+
+    # create model
+    clf  = RandomForestClassifier(n_estimators=100, random_state=9)
+    clf.fit(allFeatures, allTypes)
+
+    #import test data
+    fileTestData = h5py.File(h5TestData, 'r')
+    fileTestTypes = h5py.File(h5TestTypes, 'r')
+    features = fileTestData['dataset_1']
+    types = fileTestTypes['dataset_1']
+    allFeatures = np.array(features)
+    allTypes = np.array(types)
+    fileTestData.close()
+    fileTestTypes.close()
+
+    #Rfc score
+    print("Random Forest Classifier score:")
+    print(clf.score(allFeatures, allTypes))
+    return clf
+
+
+#global features
+
+def hu_moments(image):
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    moments = cv2.moments(gray)
+    huMoments = cv2.HuMoments(moments).flatten()
+    return huMoments
+
+def haralick(image):
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    haralick = mahotas.features.haralick(gray).mean(axis=0)
+    return haralick
+
+def histogram(image, mask=None):
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+    hist  = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
+    cv2.normalize(hist, hist)
+    histogram = hist.flatten()
+    return histogram
+
+
+#Test one image
+def classifyImage(file, clf):
+    size = tuple((500, 500))
+    types = os.listdir('ClassificationGarbage/testSet')
+    types.sort()
+    image = cv2.imread(file)
+    image = cv2.resize(image, size)
+    #Global features
+    huMoments = hu_moments(image)
+    hara   = haralick(image)
+    histo  = histogram(image)
+    allFeatures = np.hstack([histo, hara, huMoments])
+    features = allFeatures.reshape(1,-1)
+    prediction = clf.predict(features)[0]
+    return types[prediction]
+
+
+#At the garbage dump
+def sortDump(cardboard, glass, metal, paper, plastic, clf, GT, dump):
+    testDir = 'ClassificationGarbage/testSet'
+    testTypes = os.listdir(testDir)
+    testTypes.sort()
+    noGarbage = [cardboard, glass, metal, paper, plastic]
+    for i in range(len(testTypes)):
+        print("Among " + testTypes[i] + ", we found: ")
+        for j in range(0, noGarbage[i]):
+            image = random.choice(os.listdir(testDir + '/' + testTypes[i]))
+            file = testDir + '/' + testTypes[i] + "/"+ image
+            prediction = classifyImage(file, clf)
+            print(prediction)
+            GT.empty(testTypes[i])
+            dump.addGarbage(prediction)