63 changed files with 42 additions and 88981 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,5 @@
 data
 .idea
 .yoloface
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/README.md
+++ b/README.md
@ -1,40 +0,0 @@
 # wko_anime-face-similarity
 Projekt przygotowany na zajęcia z widzenia komputerowego.
 Rozpoznaje twarz na zdjęciu wejściowym i dokonując transferu stylu do anime, porównuje zdjęcie ze zbiorem postaci
 z anime i wskazuje podobieństwa według wybranych metryk.
 ## Instalacja
 1. Pobranie submodułów:
    ```shell
    $ git submodule update --init
    ```
 2. Instalacja zależności:
   * Windows/Linux
     ```shell
     $ pip install -r requirements.txt     
     ```
   * MacOS
     ```shell
     $ pip install -r requirements-osx.txt
     ```
 3. Konfiguracja DCT-Netu (anime style transfer)
    ```shell
    $ cd DCT-Net && python download.py
    ```
 4. Pobranie datasetu twarzy postaci z anime (MyAnimeList)
    ```shell
    $ python scrape_data.py
    ```
 ## Uruchomienie
 Na tę chwilę zdjęcie poddawane porównaniu to `UAM-Andre.jpg`
 ```shell
 $ python main.py
 ```
 ### Walidacja
 Do walidacji metryk na postawie testowego datasetu z cosplayerami (`test_set`) uruchamiamy
 ```shell
 $ python --validate_only 1
 ```
--- a/comparisons.py
+++ b/comparisons.py
@ -40,42 +40,3 @@ def euclidean_distance(data_a: np.ndarray, data_b: np.ndarray) -> float:
    result += (histogram_a[i] - histogram_b[i]) ** 2
    i += 1
  return result[0] ** (1 / 2)
 def get_top_results(all_metrics: list[dict], metric='correlation', count=1):
  all_metrics.sort(reverse=True, key=lambda item: item['metrics'][metric])
  return list(map(lambda item: {'name': item['name'], 'score': item['metrics'][metric]}, all_metrics[:count]))
 class AccuracyGatherer:
  all_metric_names = [
    'structural-similarity',
    'euclidean-distance',
    'chi-square',
    'correlation',
    'intersection',
    'bhattacharyya-distance'
  ]
  def __init__(self, count, top_ks=(1, 3, 5)):
    self.top_ks = top_ks
    self.hits = {k: {metric: 0 for metric in AccuracyGatherer.all_metric_names} for k in top_ks}
    self.count = count
  def print(self):
    for k in self.top_ks:
      all_metrics = {metric: self.hits[k][metric] / self.count for metric in AccuracyGatherer.all_metric_names}
      print(f'Top {k} matches results:')
      [print(f'\t{key}: {value * 100}%') for key, value in all_metrics.items()]
  def for_results(self, results, test_label):
    top_results_all_metrics = {
      k: {m: get_top_results(results, m, k) for m in AccuracyGatherer.all_metric_names} for k in self.top_ks
    }
    for metric_name in AccuracyGatherer.all_metric_names:
      self.add_if_hit(top_results_all_metrics, test_label, metric_name)
  def add_if_hit(self, results, test_label, metric_name):
    for k in self.top_ks:
      if any(map(lambda single_result: single_result['name'] == test_label, results[k][metric_name])):
        self.hits[k][metric_name] += 1
--- a/face_detect.py
+++ b/face_detect.py
@ -1,54 +0,0 @@
 import cv2
 import numpy as np
 from yoloface import face_analysis
 face_detector = face_analysis()
 def equalize_image(data: np.ndarray):
  data_hsv = cv2.cvtColor(data, cv2.COLOR_RGB2HSV)
  data_hsv[:, :, 2] = cv2.equalizeHist(data_hsv[:, :, 2])
  return cv2.cvtColor(data_hsv, cv2.COLOR_HSV2RGB)
 def find_face_bbox_yolo(data: np.ndarray):
  _, box, conf = face_detector.face_detection(frame_arr=data, frame_status=True, model='full')
  if len(box) < 1:
    return None, None
  return box, conf
 def find_face_bbox(data: np.ndarray):
  classifier_files = [
    'haarcascades/haarcascade_frontalface_default.xml',
    'haarcascades/haarcascade_frontalface_alt.xml',
    'haarcascades/haarcascade_frontalface_alt2.xml',
    'haarcascades/haarcascade_profileface.xml',
    'haarcascades/haarcascade_glasses.xml',
    'lbpcascade_animeface.xml',
  ]
  data_equalized = equalize_image(data)
  data_gray = cv2.cvtColor(data_equalized, cv2.COLOR_RGB2GRAY)
  face_coords, conf = find_face_bbox_yolo(cv2.cvtColor(data_equalized, cv2.COLOR_RGB2BGR))
  if face_coords is not None:
    return face_coords[0]
  for classifier in classifier_files:
    face_cascade = cv2.CascadeClassifier(classifier)
    face_coords = face_cascade.detectMultiScale(data_gray, 1.1, 3)
    if face_coords is not None:
      break
  return max(face_coords, key=lambda v: v[2]*v[3])
 def crop_face(data: np.ndarray, bounding_box) -> np.ndarray:
  x, y, w, h = bounding_box
  # Extending the boxes
  factor = 0.4
  x, y = round(x - factor * w), round(y - factor * h)
  w, h = round(w + factor * w * 2), round(h + factor * h * 2)
  y = max(y, 0)
  x = max(x, 0)
  face = data[y:y + h, x:x + w]
  return face
--- a/haarcascades/haarcascade_frontalface_alt2.xml
+++ b/haarcascades/haarcascade_frontalface_alt2.xml
--- a/haarcascades/haarcascade_glasses.xml
+++ b/haarcascades/haarcascade_glasses.xml
--- a/haarcascades/haarcascade_profileface.xml
+++ b/haarcascades/haarcascade_profileface.xml
--- a/helpers.py
+++ b/helpers.py
@ -1,12 +0,0 @@
 import os
 import sys
 def no_stdout(func):
  def wrapper(*args, **kwargs):
    old_stdout = sys.stdout
    sys.stdout = open(os.devnull, "w")
    ret = func(*args, **kwargs)
    sys.stdout = old_stdout
    return ret
  return wrapper
--- a/load_test_data.py
+++ b/load_test_data.py
@ -1,43 +0,0 @@
 import numpy as np
 import os
 from skimage.io import imread
 import cv2 as cv
 from pathlib import Path
 def load_source(filename: str) -> np.ndarray:
  return cv.imread(filename)[..., ::-1]
 def load_data(input_dir):
    image_path = Path(input_dir)
    file_names = os.listdir(image_path)
    categories_name = []
    categories_count = []
    count = 0
    n = file_names[0]
    for name in file_names:
        if name != n:
            categories_count.append(count)
            n = name
            count = 1
        else:
            count += 1
        if not name in categories_name:
            categories_name.append(name)
    categories_count.append(count)
    test_img = []
    labels = []
    for n in file_names:
        p = image_path / n
        img = load_source(str(p)) # zwraca ndarry postaci xSize x ySize x colorDepth
        test_img.append(img)
        labels.append(n)
    X = {}
    X["values"] = np.array(test_img)
    X["name"] = categories_name
    X["names_count"] = categories_count
    X["labels"] = labels
    return X
--- a/main.py
+++ b/main.py
@ -1,16 +1,9 @@
 import argparse
 import sys
 import cv2
 import matplotlib.pyplot as plt
 import numpy as np
 import matplotlib.pyplot as plt
-from metrics import histogram_comparison, structural_similarity_index, euclidean_distance, AccuracyGatherer
+from comparisons import histogram_comparison, structural_similarity_index, euclidean_distance
 from face_detect import find_face_bbox, crop_face
 from helpers import no_stdout
 from load_test_data import load_data, load_source
 from metrics import get_top_results
 from plots import plot_two_images, plot_results
 # Allows imports from the style transfer submodule
 sys.path.append('DCT-Net')
@ -18,97 +11,50 @@ sys.path.append('DCT-Net')
 from source.cartoonize import Cartoonizer
-anime_transfer = Cartoonizer(dataroot='DCT-Net/damo/cv_unet_person-image-cartoon_compound-models')
+def load_source(filename: str) -> np.ndarray:
  return cv2.imread(filename)[..., ::-1]
-def compare_with_anime_characters(source_image: np.ndarray, anime_faces_dataset: dict, verbose=False) -> list[dict]:
+def find_and_crop_face(data: np.ndarray, classifier_file='haarcascades/haarcascade_frontalface_default.xml') -> np.ndarray:
-  all_metrics = []
+  data_gray = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)
-  for anime_image, label in zip(anime_faces_dataset['values'], anime_faces_dataset['labels']):
+  face_cascade = cv2.CascadeClassifier(classifier_file)
-    current_result = {
+  face = face_cascade.detectMultiScale(data_gray, 1.1, 3)
-      'name': label,
+  face = max(face, key=len)
-      'metrics': {}
+  x, y, w, h = face
-    }
+  face = data[y:y + h, x:x + w]
-    # TODO: Use a different face detection method for anime images
+  return face
-    # anime_face = find_and_crop_face(anime_image, 'haarcascades/lbpcascade_animeface.xml')
+
-    anime_face = anime_image
+
-    source_rescaled = cv2.resize(source_image, anime_face.shape[:2])
+def plot_two_images(a: np.ndarray, b: np.ndarray):
-    if verbose:
+  plt.figure(figsize=[10, 10])
-      plot_two_images(anime_face, source_rescaled)
+  plt.subplot(121)
-    current_result['metrics'] = histogram_comparison(source_rescaled, anime_face)
+  plt.imshow(a)
-    current_result['metrics']['structural-similarity'] = structural_similarity_index(source_rescaled, anime_face)
+  plt.title("A")
-    current_result['metrics']['euclidean-distance'] = euclidean_distance(source_rescaled, anime_face)
+  plt.subplot(122)
-    all_metrics.append(current_result)
+  plt.imshow(b)
-
+  plt.title("B")
-  return all_metrics
+  plt.show()
 def compare_with_anime_characters(data: np.ndarray) -> int:
  # Example will be one face from anime dataset
  example = load_source('data/images/Aisaka, Taiga.jpg')
  # TODO: Use a different face detection method for anime images
  example_face = find_and_crop_face(example, 'haarcascades/lbpcascade_animeface.xml')
  data_rescaled = cv2.resize(data, example_face.shape[:2])
  plot_two_images(example_face, data_rescaled)
  print(histogram_comparison(data_rescaled, example_face))
  print(f'structural-similarity: {structural_similarity_index(data_rescaled, example_face)}')
  print(f'euclidean-distance: {euclidean_distance(data_rescaled, example_face)}')
@no_stdout
 def transfer_to_anime(img: np.ndarray):
-  model_out = anime_transfer.cartoonize(img).astype(np.uint8)
+  algo = Cartoonizer(dataroot='DCT-Net/damo/cv_unet_person-image-cartoon_compound-models')
-  return cv2.cvtColor(model_out, cv2.COLOR_BGR2RGB)
+  return algo.cartoonize(img).astype(np.uint8)
 def similarity_to_anime(source_image, anime_faces_set, debug=False):
  try:
    source_face_bbox = find_face_bbox(source_image)
  except ValueError:
    return None
  source_anime = transfer_to_anime(source_image)
  source_face_anime = crop_face(source_anime, source_face_bbox)
  if debug:
    source_image_with_box = source_image.copy()
    x, y, w, h = source_face_bbox
    cv2.rectangle(source_image_with_box, (x, y), (x + w, y + h), (255, 0, 0), 2)
    plt.figure(figsize=[12, 4])
    plt.subplot(131)
    plt.imshow(source_image_with_box)
    plt.subplot(132)
    plt.imshow(source_anime)
    plt.subplot(133)
    plt.imshow(source_face_anime)
    plt.show()
  return compare_with_anime_characters(source_face_anime, anime_faces_set, verbose=debug)
 def validate(test_set, anime_faces_set):
  all_entries = len(test_set['values'])
  accuracy = AccuracyGatherer(all_entries)
  for test_image, test_label in zip(test_set['values'], test_set['labels']):
    test_results = similarity_to_anime(test_image, anime_faces_set)
    if test_results is None:
      print(f"cannot find face for {test_label}")
      all_entries -= 1
      continue
    accuracy.for_results(test_results, test_label)
  accuracy.count = all_entries
  accuracy.print()
 def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('-v', '--validate_only')
  args = parser.parse_args()
  anime_faces_set = load_data('data/croped_anime_faces')
  if args.validate_only:
    print('Validating')
    test_set = load_data('test_set')
    validate(test_set, anime_faces_set)
    exit(0)
  source = load_source('test_set/Ayanokouji, Kiyotaka.jpg')
  results = similarity_to_anime(source, anime_faces_set)
  method = 'correlation'
  top_results = get_top_results(results, count=4, metric=method)
  print(top_results)
  plot_results(source, transfer_to_anime(source), top_results, anime_faces_set, method)
 if __name__ == '__main__':
-  main()
+  source = load_source('UAM-Andre.jpg')
  source_anime = transfer_to_anime(source)
  source_face_anime = find_and_crop_face(source_anime)
  print(compare_with_anime_characters(source_face_anime))
--- a/plots.py
+++ b/plots.py
@ -1,45 +0,0 @@
 import numpy as np
 from matplotlib import pyplot as plt, gridspec
 def plot_two_images(a: np.ndarray, b: np.ndarray):
  plt.figure(figsize=[10, 10])
  plt.subplot(121)
  plt.imshow(a)
  plt.title("A")
  plt.subplot(122)
  plt.imshow(b)
  plt.title("B")
  plt.show()
 def plot_results(source, source_anime, results, anime_faces_set, method):
    cols = len(results)
    plt.figure(figsize=[3*cols, 7])
    gs = gridspec.GridSpec(2, cols)
    plt.subplot(gs[0, cols // 2 - 1])
    plt.imshow(source)
    plt.title('Your image')
    plt.axis('off')
    plt.subplot(gs[0, cols // 2])
    plt.imshow(source_anime)
    plt.title('Your image in Anime style')
    plt.axis('off')
    plt.figtext(0.5, 0.525, "Predictions", ha="center", va="top", fontsize=16)
    for idx, prediction in enumerate(results):
        result_img = anime_faces_set['values'][anime_faces_set['labels'].index(prediction['name'])]
        plt.subplot(gs[1, idx])
        plt.imshow(result_img, interpolation='bicubic')
        plt.title(f'{prediction["name"].partition(".")[0]}, score={str(round(prediction["score"], 4))}')
        plt.axis('off')
    plt.tight_layout()
    plt.figtext(0.5, 0.01, f"Metric: {method}", ha="center", va="bottom", fontsize=12)
    plt.subplots_adjust(wspace=0, hspace=0.1)
    plt.show()
--- a/requirements-osx.txt
+++ b/requirements-osx.txt
@ -1,11 +0,0 @@
 tensorflow-macos==2.11.0
 easydict==1.10
 numpy==1.23.1
 modelscope==1.1.3
 requests==2.28.2
 beautifulsoup4==4.11.1
 lxml==4.9.2
 opencv-python==4.7.0.68
 torch==1.13.1
 matplotlib==3.6.3
 scikit-image==0.19.3
--- a/requirements.txt
+++ b/requirements.txt
@ -9,5 +9,3 @@ opencv-python==4.7.0.68
 torch==1.13.1
 matplotlib==3.6.3
 scikit-image==0.19.3
 yoloface==0.0.4
 ipython==8.9.0
--- a/test_set/Ackerman,
+++ b/test_set/Ackerman,
--- a/test_set/Aisaka,
+++ b/test_set/Aisaka,
--- a/test_set/Alucard.jpg
+++ b/test_set/Alucard.jpg
--- a/test_set/Araragi,
+++ b/test_set/Araragi,
--- a/test_set/Ayanokouji,
+++ b/test_set/Ayanokouji,
--- a/test_set/Dazai,
+++ b/test_set/Dazai,
--- a/test_set/Elric,
+++ b/test_set/Elric,
--- a/test_set/Emilia.jpg
+++ b/test_set/Emilia.jpg
--- a/test_set/Evergarden,
+++ b/test_set/Evergarden,
--- a/test_set/Gasai,
+++ b/test_set/Gasai,
--- a/test_set/Gojou,
+++ b/test_set/Gojou,
--- a/test_set/Guts.jpg
+++ b/test_set/Guts.jpg
--- a/test_set/Hatake,
+++ b/test_set/Hatake,
--- a/test_set/Hikigaya,
+++ b/test_set/Hikigaya,
--- a/test_set/Joestar,
+++ b/test_set/Joestar,
--- a/test_set/Kamina.jpg
+++ b/test_set/Kamina.jpg
--- a/test_set/Kaneki,
+++ b/test_set/Kaneki,
--- a/test_set/Kirigaya,
+++ b/test_set/Kirigaya,
--- a/test_set/Kurosaki,
+++ b/test_set/Kurosaki,
--- a/test_set/Lamperouge,
+++ b/test_set/Lamperouge,
--- a/test_set/Lawliet,
+++ b/test_set/Lawliet,
--- a/test_set/Levi.jpg
+++ b/test_set/Levi.jpg
--- a/test_set/Makise,
+++ b/test_set/Makise,
--- a/test_set/Megumin.jpg
+++ b/test_set/Megumin.jpg
--- a/test_set/Monkey
+++ b/test_set/Monkey
--- a/test_set/Morow,
+++ b/test_set/Morow,
--- a/test_set/Mustang,
+++ b/test_set/Mustang,
--- a/test_set/Okabe,
+++ b/test_set/Okabe,
--- a/test_set/Onizuka,
+++ b/test_set/Onizuka,
--- a/test_set/Oshino,
+++ b/test_set/Oshino,
--- a/test_set/Reigen,
+++ b/test_set/Reigen,
--- a/test_set/Rem.jpg
+++ b/test_set/Rem.jpg
--- a/test_set/Roronoa,
+++ b/test_set/Roronoa,
--- a/test_set/Saber.jpg
+++ b/test_set/Saber.jpg
--- a/test_set/Saitama.jpg
+++ b/test_set/Saitama.jpg
--- a/test_set/Sakata,
+++ b/test_set/Sakata,
--- a/test_set/Sakurajima,
+++ b/test_set/Sakurajima,
--- a/test_set/Sanji.jpg
+++ b/test_set/Sanji.jpg
--- a/test_set/Senjougahara,
+++ b/test_set/Senjougahara,
--- a/test_set/Spiegel,
+++ b/test_set/Spiegel,
--- a/test_set/Uchiha,
+++ b/test_set/Uchiha,
--- a/test_set/Uchiha,
+++ b/test_set/Uchiha,
--- a/test_set/Uzumaki,
+++ b/test_set/Uzumaki,
--- a/test_set/Yagami,
+++ b/test_set/Yagami,
--- a/test_set/Yato.jpg
+++ b/test_set/Yato.jpg
--- a/test_set/Yeager,
+++ b/test_set/Yeager,
--- a/test_set/Yuuki,
+++ b/test_set/Yuuki,
--- a/test_set/Zero
+++ b/test_set/Zero
--- a/test_set/Zoldyck,
+++ b/test_set/Zoldyck,
--- a/test_set/holo.jpg
+++ b/test_set/holo.jpg