face_mask_detection/data_preprocessing.ipynb
2022-02-16 02:14:07 +01:00

9.1 KiB
Raw Permalink Blame History

import glob
import pandas as pd
import cv2
import os
from xml.etree import ElementTree
ANNOTATIONS_DIR = './data/annotations'
IMAGES_DIR = './data/images'
CROPPED_DIR = './data/cropped_images'
metadata = {'xmin': [], 'ymin': [], 'xmax': [], 'ymax': [], 'label': [], 'file': []}

for file in glob.glob(ANNOTATIONS_DIR + '/*.xml'):
    tree = ElementTree.parse(file)
    file_name = os.path.splitext(os.path.basename(file))[0]
    
    for annotation in tree.iter():

        if 'object' in annotation.tag:
            for attr in list(annotation):
                
                if 'name' in attr.tag:
                    name = attr.text                 
                    metadata['label'].append(name)
                    metadata['file'].append(file_name)
                            
                if 'bndbox' in attr.tag:
                    for dimensions in list(attr):
                        if 'xmin' in dimensions.tag:
                            xmin = int(round(float(dimensions.text)))
                            metadata['xmin'].append(xmin)
                        if 'ymin' in dimensions.tag:
                            ymin = int(round(float(dimensions.text)))
                            metadata['ymin'].append(ymin)                               
                        if 'xmax' in dimensions.tag:
                            xmax = int(round(float(dimensions.text)))
                            metadata['xmax'].append(xmax)                                
                        if 'ymax' in dimensions.tag:
                            ymax = int(round(float(dimensions.text)))
                            metadata['ymax'].append(ymax)
metadata_df = pd.DataFrame(metadata)
metadata_df
xmin ymin xmax ymax label file
0 79 105 109 142 without_mask maksssksksss0
1 185 100 226 144 with_mask maksssksksss0
2 325 90 360 141 without_mask maksssksksss0
3 321 34 354 69 with_mask maksssksksss1
4 224 38 261 73 with_mask maksssksksss1
... ... ... ... ... ... ...
4067 263 62 287 85 with_mask maksssksksss98
4068 344 80 377 106 with_mask maksssksksss98
4069 181 54 273 162 mask_weared_incorrect maksssksksss99
4070 99 87 176 165 with_mask maksssksksss99
4071 289 99 355 233 with_mask maksssksksss99

4072 rows × 6 columns

for i in range(len(metadata_df)):
    
    path = IMAGES_DIR + '/' + metadata_df['file'].iloc[i] + '.png'

    image = cv2.imread(path)
    
    cropped_name = str(i) + '.png'
    
    xmin = metadata_df['xmin'].iloc[i]
    ymin = metadata_df['ymin'].iloc[i]
    xmax = metadata_df['xmax'].iloc[i]
    ymax = metadata_df['ymax'].iloc[i]

    cropped_image = image[ymin:ymax, xmin:xmax]
    
    cropped_path = CROPPED_DIR + '/' + cropped_name
    
    cv2.imwrite(cropped_path, cropped_image)
metadata_df['label'].to_csv(r'./data/labels.txt', index=False, header=False)