88 lines
2.5 KiB
Python
88 lines
2.5 KiB
Python
import streamlit as st
|
|
from process_video import segment_video, classify
|
|
from io import StringIO
|
|
import cv2 as cv
|
|
import tempfile
|
|
import os
|
|
import numpy as np
|
|
from PIL import Image
|
|
import tensorflow as tf
|
|
from crop_hand_skeleton import crop_hand
|
|
from cvzone.HandTrackingModule import HandDetector
|
|
|
|
if __name__ == "__main__":
|
|
detector = HandDetector(maxHands=1, mode=True, detectionCon=0.7, minTrackCon=0.8)
|
|
model = tf.keras.models.load_model('model_pred/VGG16_sign_char_detection_model')
|
|
|
|
st.set_page_config(
|
|
page_title="Projekt widzenie"
|
|
)
|
|
st.title("Projekt rozpoznawanie liter z alfabetu znaków migowych z wideo")
|
|
|
|
st.write('Załaduj film')
|
|
|
|
upload_movie = st.file_uploader("Wybierz film", type=["mp4"])
|
|
|
|
if upload_movie:
|
|
st.write("Film się ładuje.....")
|
|
tfile = tempfile.NamedTemporaryFile(delete=False)
|
|
tfile.write(upload_movie.read())
|
|
video_cap = cv.VideoCapture(tfile.name)
|
|
font = cv.FONT_HERSHEY_SIMPLEX
|
|
|
|
result, num, frames = segment_video(video_cap, fps=3)
|
|
st.write(f"Załadowano {num} klatek")
|
|
classifications = []
|
|
for img in result:
|
|
img_skeleton = crop_hand(img, detector)
|
|
img2= cv.resize(img_skeleton,dsize=(224,224))
|
|
#breakpoint()
|
|
img_np = np.asarray(img2)
|
|
classification = classify(img_np[:,:,::-1], model)
|
|
classifications.append(classification)
|
|
cv.putText(img_skeleton,
|
|
classification,
|
|
(20, 50),
|
|
font, 2,
|
|
(255, 255, 255),
|
|
6,
|
|
cv.LINE_4)
|
|
|
|
st.image(img_skeleton[:,:,::-1])
|
|
i = 0
|
|
last_letter = ''
|
|
text = ''
|
|
font = cv.FONT_HERSHEY_SIMPLEX
|
|
width, height, layers = result[0].shape
|
|
fourcc = cv.VideoWriter_fourcc(*'mp4v')
|
|
new_video_cap = cv.VideoCapture(tfile.name)
|
|
|
|
new_video = cv.VideoWriter("output_video.mp4", fourcc, 30.0, (width, height), 3)
|
|
print(f"VIDEO CAP {result[0].shape}")
|
|
while True:
|
|
ret, frame = new_video_cap.read()
|
|
if ret == False:
|
|
break
|
|
if i in frames:
|
|
print(i)
|
|
frame_index = frames.index(i)
|
|
letter = classifications[frame_index]
|
|
last_letter = letter
|
|
cv.putText(frame,
|
|
last_letter,
|
|
(50, 50),
|
|
font, 1,
|
|
(0, 255, 255),
|
|
2,
|
|
cv.LINE_4)
|
|
img_pil = Image.fromarray(img)
|
|
frame = np.array(img_pil)
|
|
img = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
|
|
new_video.write(img)
|
|
|
|
i += 1
|
|
video_cap.release()
|
|
new_video_cap.release()
|
|
new_video.release()
|
|
|