Compare commits

..

No commits in common. "master" and "dataset_andrzej" have entirely different histories.

481 changed files with 33 additions and 1256 deletions

View File

@ -1,16 +1,9 @@
# projekt_widzenie # projekt_widzenie
## Autorzy
Mikołaj Pokrywka,
Kamil Guttmann,
Andrzej Preibisz
## Run apllication ## Run apllication
1. `pip install -r requirements.txt` 1. `pip install -r requirements.txt`
2. `sudo apt-get install ffmpeg` 2. `streamlit run main.py`
3. `streamlit run main.py` 3. On http://localhost:8501/ you should see the app
4. On http://localhost:8501/ you should see the app
@ -18,84 +11,8 @@ Andrzej Preibisz
Mamy łącznie 197784 zdjęć Mamy łącznie 197784 zdjęć
+ swój własno zrobiony zbiór testowy 148 zdjęć
Linki do datasetów: Linki do datasetów:
1. https://www.kaggle.com/datasets/mrgeislinger/asl-rgb-depth-fingerspelling-spelling-it-out 1. https://www.kaggle.com/datasets/mrgeislinger/asl-rgb-depth-fingerspelling-spelling-it-out
2. https://www.kaggle.com/datasets/grassknoted/asl-alphabet 2. https://www.kaggle.com/datasets/grassknoted/asl-alphabet
3. https://www.kaggle.com/datasets/lexset/synthetic-asl-alphabet 3. https://www.kaggle.com/datasets/lexset/synthetic-asl-alphabet
4. https://www.kaggle.com/datasets/kuzivakwashe/significant-asl-sign-language-alphabet-dataset 4. https://www.kaggle.com/datasets/kuzivakwashe/significant-asl-sign-language-alphabet-dataset
## Trening modelu
Do trenowania używano biblioteki Keras
### Pierwsze podejście model trenowany od zera (from scratch)
```
img_height=256
img_width=256
batch_size=128
epochs=30
```
```
layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(29,activation='softmax')
```
Zbiór testowy własny: 22% Accuracy
Zbiór testowy mieszany z Kaggle: 80% Accuracy
---
## Drugie podejście model VGG16
Zastosowano early stopping z val_loss
```
img_height=224
img_width=224
batch_size=128
epochs=50
```
Usunięto 3 wierzchne wartswy i dodano warstwy:
```
x = layers.Flatten()(vgg_model.output)
x = layers.Dense(len(class_names), activation='softmax')(x)
```
Zbiór testowy własny: 52% Accuracy
Zbiór testowy mieszany z Kaggle: 79% Accuracy
## Trzecie podejście model VGG16 z detekcją dłoni
Model jak powyżej tylko datasety zostały przereobione modelem do detekcji dłoni i wycięciem odpowiedniego fragmentu ze zdjęcia
Zbiór testowy własny: 61% Accuracy
## Czwarte podejście model VGG16 z detekcją dłoni i zaznaczeniem szkieletu
Model jak powyżej tylko datasety zostały przereobione modelem do detekcji dłoni, wycięciem odpowiedniego fragmentu ze zdjęcia, a także zaznaczenie "szkieletu" dłoni
Zbiór testowy własny: 70% Accuracy
## Piąte podejście model VGG19 z detekcją dłoni i zaznaczeniem szkieletu
Model jak powyżej tylko datasety zostały przereobione modelem do detekcji dłoni, wycięciem odpowiedniego fragmentu ze zdjęcia, a także zaznaczenie "szkieletu" dłoni
Zbiór testowy własny: 67% Accuracy

View File

@ -1,39 +0,0 @@
import os
from cvzone.HandTrackingModule import HandDetector
import cv2
def crop_hand(img, detector, offset=50):
hands, det_img = detector.findHands(img.copy())
offset = int((img.shape[0] + img.shape[1]) * 0.1)
if hands:
hand = hands[0]
x, y, w, h = hand['bbox']
img_crop = img[max(0, y - offset):min(y + h + offset, img.shape[0]), max(0, x - offset):min(x + w + offset, img.shape[1])]
return img_crop
return img
def main():
input_path = "test_data"
output_path = "test_data_cropped"
dir_list = os.listdir(input_path)
detector = HandDetector(maxHands=1, mode=True, detectionCon=0.7, minTrackCon=0.8)
for sign in dir_list:
if not os.path.exists(output_path + '/' + sign):
os.mkdir(output_path + '/' + sign)
for img_name in os.listdir(input_path + '/' + sign):
file_path = input_path + '/' + sign + '/' + img_name
output_file_path = output_path + '/' + sign + '/cropped_' + img_name
img = cv2.imread(file_path)
img_crop = crop_hand(img, detector)
try:
cv2.imwrite(output_file_path, img_crop)
except:
cv2.imwrite(output_file_path, img)
if __name__ == "__main__":
main()

View File

@ -1,39 +0,0 @@
import os
from cvzone.HandTrackingModule import HandDetector
import cv2
def crop_hand(img, detector, offset=50):
hands, det_img = detector.findHands(img.copy())
offset = int((img.shape[0] + img.shape[1]) * 0.1)
if hands:
hand = hands[0]
x, y, w, h = hand['bbox']
img_crop = det_img[max(0, y - offset):min(y + h + offset, img.shape[0]), max(0, x - offset):min(x + w + offset, img.shape[1])]
return img_crop
return img
def main():
input_path = "test_data"
output_path = "test_data_cropped"
dir_list = os.listdir(input_path)
detector = HandDetector(maxHands=1, mode=True, detectionCon=0.7, minTrackCon=0.8)
for sign in dir_list:
if not os.path.exists(output_path + '/' + sign):
os.mkdir(output_path + '/' + sign)
for img_name in os.listdir(input_path + '/' + sign):
file_path = input_path + '/' + sign + '/' + img_name
output_file_path = output_path + '/' + sign + '/cropped_' + img_name
img = cv2.imread(file_path)
img_crop = crop_hand(img, detector)
try:
cv2.imwrite(output_file_path, img_crop)
except:
cv2.imwrite(output_file_path, img)
if __name__ == "__main__":
main()

View File

@ -1,18 +0,0 @@
import cv2 as cv
letters =['L', 'L', 'L', 'L', 'L', 'L', 'L', 'T', 'C', 'C', 'C', 'C', 'O', 'D', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'D', 'D', 'A', 'A', 'C', 'C', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L']
for i, l in enumerate(letters):
image = cv.imread(f"frame{i}.jpg", cv.IMREAD_COLOR)
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
cv.putText(image, l, (10, 100), cv.FONT_HERSHEY_SIMPLEX , 1, (255,0,0), 5)
image =cv.resize(image, [300, 300])
image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
cv.imwrite(f'post/{i}.jpg', image)

View File

@ -1,16 +0,0 @@
import cv2
import numpy as np
import glob
frameSize = (300, 300)
out = cv2.VideoWriter('2__output_video.avi',cv2.VideoWriter_fourcc(*'DIVX'), 30, frameSize)
for i in range(79):
img = cv2.imread(f"{i}.jpg")
print(f"{i}.jpg")
out.write(img)
out.release()

Binary file not shown.

96
main.py
View File

@ -1,95 +1,11 @@
import streamlit as st import streamlit as st
from process_video import segment_video, classify
from io import StringIO
import cv2 as cv
import tempfile
import os
import numpy as np
from PIL import Image
import tensorflow as tf
from crop_hand_skeleton import crop_hand
from cvzone.HandTrackingModule import HandDetector
if __name__ == "__main__": if __name__ == "__main__":
detector = HandDetector(maxHands=1, mode=True, detectionCon=0.7, minTrackCon=0.8)
model = tf.keras.models.load_model('model_pred/VGG16_sign_char_detection_model')
st.set_page_config( st.set_page_config(
page_title="Projekt widzenie" page_title="Projekt widzenie"
) )
st.title("Projekt rozpoznawanie liter z alfabetu znaków migowych z wideo") st.title("Projekt rozpoznawanie liter z alfabetu znaków migowych z wideo")
st.write('Załaduj film') st.write('Hello world')
upload_movie = st.file_uploader("Wybierz film", type=["mp4"])
if upload_movie:
st.write("Film się ładuje.....")
tfile = tempfile.NamedTemporaryFile(delete=False)
tfile.write(upload_movie.read())
video_cap = cv.VideoCapture(tfile.name)
font = cv.FONT_HERSHEY_SIMPLEX
result, num, frames = segment_video(video_cap, fps=1.5)
st.write(f"Załadowano {num} klatek")
classifications = []
for img in result:
img_skeleton = crop_hand(img, detector)
img2= cv.resize(img_skeleton,dsize=(224,224))
#breakpoint()
img_np = np.asarray(img2)
classification = classify(img_np[:,:,::-1], model)
classifications.append(classification)
cv.putText(img_skeleton,
classification,
(20, 50),
font, 2,
(255, 255, 255),
6,
cv.LINE_4)
st.image(img_skeleton[:,:,::-1])
i = 0
last_letter = ''
text = ''
font = cv.FONT_HERSHEY_SIMPLEX
width, height, layers = result[0].shape
new_video_cap = cv.VideoCapture(tfile.name)
out = cv.VideoWriter("output_video.mp4",cv.VideoWriter_fourcc(*'mp4v'), 30, (300, 300))
print(f"VIDEO CAP {result[0].shape}")
while True:
ret, frame = new_video_cap.read()
if ret == False:
break
image =cv.resize(frame, [300, 300])
image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
cv.putText(image,
last_letter,
(50, 50),
font, 2,
(255, 255, 255),
6,
cv.LINE_4)
cv.imwrite(f'frames/post/{i}.jpg', image)
if i in frames:
print(i)
frame_index = frames.index(i)
letter = classifications[frame_index]
last_letter = letter
img = cv.imread(f"frames/post/{i}.jpg")
out.write(img)
i += 1
video_cap.release()
new_video_cap.release()
out.release()
os.system("ffmpeg -i output_video.mp4 -vcodec libx264 output_video2.mp4")
video_file = open('output_video2.mp4', 'rb')
st.video(video_file, format="video/mp4")

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

BIN
mp.mp4

Binary file not shown.

View File

@ -1 +0,0 @@
q

View File

@ -9,10 +9,10 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
model = tf.keras.models.load_model('VGG19_model.hdf5') model = tf.keras.models.load_model('model_pred/sign_char_detection_model')
# Get the list of all files and directories # Get the list of all files and directories
path = "test_data_own_cropped" path = "test_data"
dir_list = os.listdir(path) dir_list = os.listdir(path)
print(dir_list) print(dir_list)
@ -23,18 +23,15 @@ tf.keras.utils.load_img
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space'] class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
img_height=224 img_height=256
img_width=224 img_width=256
actual=[] actual=[]
pred=[] pred=[]
img_size = [img_height, img_width]
for i in dir_list: for i in dir_list:
for j in os.listdir(path+'/'+i): for j in os.listdir(path+'/'+i):
file_path = path+'/'+i + '/' + j file_path = path+'/'+i + '/' + j
actual.append(i) actual.append(i)
test_image = tf.keras.utils.load_img(file_path, target_size = img_size) test_image = tf.keras.utils.load_img(file_path, target_size = (256, 256))
test_image = tf.keras.utils.img_to_array(test_image) test_image = tf.keras.utils.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0) test_image = np.expand_dims(test_image, axis = 0)
result = model.predict(test_image) result = model.predict(test_image)

View File

@ -1,68 +0,0 @@
import cv2
import tensorflow as tf
import numpy as np
from crop_hand_skeleton import crop_hand
from cvzone.HandTrackingModule import HandDetector
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
def segment_video(video, fps=5):
real_fps = video.get(cv2.CAP_PROP_FPS)
print(f"{real_fps=}")
if real_fps < fps:
raise Exception("Video FPS cannot be bigger than desired FPS!")
n = int(real_fps / fps)
result = []
frames_nums = []
i=0
num = 0
while True:
ret, frame = video.read()
if ret == False:
break
if i % n == 0:
result.append(frame)
frames_nums.append(i)
num += 1
i += 1
return result, num, frames_nums
def save_frames(frames, dir):
detector = HandDetector(maxHands=1, mode=True, detectionCon=0.7, minTrackCon=0.8)
for i, frame in enumerate(frames):
print(i)
cv2.imwrite(f"{dir}/frame{i}.jpg", crop_hand(frame, detector))
def classify(img, model):
#img = cv2.resize(img, (224, 224))
img = tf.keras.utils.img_to_array(img)
img = np.expand_dims(img, axis = 0)
return class_names[np.argmax(model.predict(img))]
def read_saved_frames(dir, n):
result = []
for i in range(n):
img = tf.keras.utils.load_img(f"{dir}/frame{i}.jpg", target_size = [224, 224])
result.append(img)
return result
if __name__ == "__main__":
video = cv2.VideoCapture("mp.mp4")
model = tf.keras.models.load_model('model_pred/effnet_sign_char_detection_model')
frames, num = segment_video(video, 30)
print(num)
save_frames(frames, "frames")
frames = read_saved_frames("frames", num)
result = []
for frame in frames:
result.append(classify(frame, model))
print(result)

View File

@ -2,5 +2,3 @@ streamlit
pandas pandas
tensorflow tensorflow
numpy numpy
cvzone
mediapipe

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Some files were not shown because too many files have changed in this diff Show More