2021-06-06 22:13:05 +02:00

285 lines
9.8 KiB

# Sebastian Raschka 2014-2020
# contributor: Vahid Mirjalili
# mlxtend Machine Learning Library Extensions
# A class for transforming face images.
# Author: Sebastian Raschka <>
# License: BSD 3 clause
import os
import warnings
import numpy as np
from . import extract_face_landmarks
from .utils import read_image
from ..externals.pyprind.progbar import ProgBar
from skimage.transform import warp, AffineTransform, resize
LEFT_INDEX = np.array([36, 37, 38, 39, 40, 41])
RIGHT_INDEX = np.array([42, 43, 44, 45, 46, 47])
class EyepadAlign(object):
"""Class to align/transform face images to facial landmarks,
based on eye alignment.
1. A scaling factor is computed based on distance between the
left and right eye, such that the transformed face image will
have the same eye distance as a reference face image.
2. A transformation is performed based on the eyes' center point.
to align the face based on the reference eye location.
3. Finally, the transformed image is padded with zeros to match
the desired final image size.
verbose : int (default=0)
Verbose level to display the progress bar and log messages.
Setting `verbose=1` will print a progress bar upon calling
target_landmarks_ : target landmarks to transform new face images to.
Depending on the chosen `fit` parameters, it can be either
(1) assigned to pre-fit shapes,
(2) computed from a single face image
(3) computed as the mean of face landmarks
from all face images in a file directory of face images.
eye_distance_ : the distance between left and right eyes
in the target landmarks.
target_height_ : the height of the transformed output image.
target_width_ : the width of the transformed output image.
self : object
For usage examples, please see
def __init__(self, verbose=0):
self.verbose = verbose
def fit_image(self, target_image):
"""Derives facial landmarks from a target image.
target_image : `uint8` numpy.array, shape=[width, height, channels]
NumPy array representation of the image data.
self : object
landmarks = extract_face_landmarks(target_image)
self.target_landmarks_ = landmarks
self.target_width_ = target_image.shape[1]
self.target_height_ = target_image.shape[0]
props = self._calc_eye_properties(self.target_landmarks_)
self.eyes_mid_point_ = props[0]
self.eye_distance_ = props[1]
return self
def fit_directory(self, target_img_dir, target_height,
target_width, file_extension='.jpg',
Calculates the average landmarks for all face images
in a directory which will then be set as the target landmark set.
target_img_dir : str
Directory containing the images
target_height : int
Expected image height of the images in the directory
target_width : int
Expected image width of the images in the directory
file_extension str (default='.jpg'): File extension of the image files.
pre_check Bool (default=True): Checks that each image has the
dimensions specificed via `target_height`
and `target_width` on the whole directory first to identify
potential issues that are recommended
to be fixed before proceeding. Raises a warning for each image if
dimensions differ from the ones specified and expected.
self : object
self.target_height_ = target_height
self.target_width_ = target_width
file_list = [os.path.relpath(os.path.join(dirpath, f),
for (dirpath, dirnames, filenames)
in os.walk(target_img_dir)
for f in filenames if f.endswith(file_extension)]
if not len(file_list):
raise ValueError('No images found in %s with extension %s.'
% (target_img_dir, file_extension))
landmarks_list = []
if pre_check:
if self.verbose >= 1:
print('Pre-Checking directory for'
' consistent image dimensions...')
pbar = ProgBar(len(file_list))
for f in file_list:
img = read_image(filename=f, path=target_img_dir)
if self.verbose >= 1:
if (img.shape[0] != self.target_height_
or img.shape[1] != self.target_width_):
warnings.warn('Image %s has '
'dimensions %d x %d '
'instead of %d x %d.'
% (f, img.shape[0],
if self.verbose >= 1:
print("Fitting the average facial landmarks "
"for %d face images " % (len(file_list)))
pbar = ProgBar(len(file_list))
for f in file_list:
img = read_image(filename=f, path=target_img_dir)
if self.verbose >= 1:
if self.target_width_ != img.shape[1]:
width_ratio = self.target_width_ / img.shape[1]
height_ratio = self.target_height_ / img.shape[0]
if np.abs(width_ratio - height_ratio) > 0.001: # ignore
img = resize(img, output_shape=(self.target_height_,
anti_aliasing=True, mode='reflect')
img = (img*255).astype('uint8')
landmarks = extract_face_landmarks(img)
if np.sum(landmarks) is not None: # i.e., None == no face detected
warnings.warn('No face detected in image %s. Image ignored.'
% f)
self.target_landmarks_ = np.mean(landmarks_list, axis=0)
props = self._calc_eye_properties(self.target_landmarks_)
self.eyes_mid_point_ = props[0]
self.eye_distance_ = props[1]
return self
def fit_values(self, target_landmarks, target_width, target_height):
""" Used for determining the eye location from pre-defined
landmark arrays, eliminating the need for re-computing
the average landmarks on a target image or image directory.
target_landmarks : np.array, shape=(height, width)
NumPy array containing the locations of the facial landmarks
as determined by `mlxtend.image.extract_face_landmarks`
target_height : int
image height
target_width : int
image width
self : object
self.target_landmarks_ = target_landmarks
self.target_width_ = target_width
self.target_height_ = target_height
props = self._calc_eye_properties(self.target_landmarks_)
self.eyes_mid_point_ = props[0]
self.eye_distance_ = props[1]
return self
def _calc_eye_properties(self, landmarks):
""" Calculates the face properties:
(1) coordinates of the left-eye
(2) coordinates of the right-eye
(3) the distance between left and right eyes
(4) the middle point between the two eyes
left_eye = np.mean(landmarks[LEFT_INDEX], axis=0)
right_eye = np.mean(landmarks[RIGHT_INDEX], axis=0)
eyes_mid_point = (left_eye + right_eye)/2.0
eye_distance = np.sqrt(np.sum(np.square(left_eye - right_eye)))
return eyes_mid_point, eye_distance
def transform(self, img):
""" transforms a single face image (img) to the target landmarks
based on the location of the eyes by
scaling, translation and cropping (if needed):
(1) Scaling the image so that the distance of the two eyes
in the given image (img) matches the distance of the
two eyes in the target landmarks.
(2) Translation is performed based on the middle point
between the two eyes.
img : np.array, shape=(height, width, channels)
Input image to be transformed.
self : object
if not hasattr(self, 'eyes_mid_point_'):
raise AttributeError('Not fitted, yet. Call one of the `fit*`'
' methods prior to using `transform`.')
landmarks = extract_face_landmarks(img)
if landmarks is None:
eyes_mid_point, eye_distance = self._calc_eye_properties(landmarks)
scale = self.eye_distance_ / eye_distance
tr = (self.eyes_mid_point_/scale - eyes_mid_point)
tr = (int(tr[0]*scale), int(tr[1]*scale))
tform = AffineTransform(scale=(scale, scale), rotation=0, shear=0,
h, w = self.target_height_, self.target_width_
img_tr = warp(img, tform.inverse, output_shape=(h, w))
return np.array(img_tr*255, dtype='uint8')