projekt_widzenie/main.py

import streamlit as st
from process_video import segment_video, classify
from io import StringIO
import cv2 as cv
import tempfile
import os
import numpy as np
from PIL import Image
import tensorflow as tf
from crop_hand_skeleton import crop_hand
from cvzone.HandTrackingModule import HandDetector

if __name__ == "__main__":
	detector = HandDetector(maxHands=1, mode=True, detectionCon=0.7, minTrackCon=0.8)
	model = tf.keras.models.load_model('model_pred/VGG16_sign_char_detection_model')

	st.set_page_config(
		page_title="Projekt widzenie"
	)
	st.title("Projekt rozpoznawanie liter z alfabetu znaków migowych z wideo")

	st.write('Załaduj film')

	upload_movie = st.file_uploader("Wybierz film", type=["mp4"])

	if upload_movie:
		st.write("Film się ładuje.....")
		tfile = tempfile.NamedTemporaryFile(delete=False)
		tfile.write(upload_movie.read())
		video_cap = cv.VideoCapture(tfile.name)
		result, num, frames = segment_video(video_cap, fps=3)
		st.write(f"Załadowano {num} klatek")
		classifications = []
		for img in result:
			img_skeleton = crop_hand(img, detector)
			img2= cv.resize(img_skeleton,dsize=(224,224))
			#breakpoint()
			img_np = np.asarray(img2)
			classification = classify(img_np[:,:,::-1], model)
			classifications.append(classification)
			st.image(img_skeleton[:,:,::-1])
			st.write(classification)
		i = 0
		last_letter = ''
		text = ''
		font = cv.FONT_HERSHEY_SIMPLEX
		width, height, layers = result[0].shape
		fourcc = cv.VideoWriter_fourcc(*'mp4v')
		new_video_cap = cv.VideoCapture(tfile.name)

		new_video = cv.VideoWriter("output_video.mp4", fourcc, 30.0, (width, height), 3)
		print(f"VIDEO CAP {result[0].shape}")
		while True:
			ret, frame = new_video_cap.read()
			if ret == False:
				break
			if i in frames:
				print(i)
				frame_index = frames.index(i)
				letter = classifications[frame_index]
				last_letter = letter
				st.write(last_letter)
			cv.putText(frame,
                		last_letter,
                		(50, 50),
                		font, 1,
                		(0, 255, 255),
                		2,
                		cv.LINE_4)
			img_pil = Image.fromarray(img)
			frame = np.array(img_pil)
			img = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
			new_video.write(img)

			i += 1
		video_cap.release()
		new_video_cap.release()
		new_video.release()