Add progress bars

This commit is contained in:
s495728 2024-05-13 09:45:43 +02:00
parent 476673ad9d
commit ca58640f00

View File

@ -3,7 +3,8 @@ import glob
import os import os
import shutil import shutil
from pathlib import Path from pathlib import Path
from zipfile import ZipFile import zipfile
from tqdm import tqdm
import cv2 import cv2
import wget import wget
@ -29,15 +30,22 @@ args = parser.parse_args()
class DataManager: class DataManager:
def download_data(self): def download_data(self):
print("Downloading")
if not os.path.isfile("archive.zip"): if not os.path.isfile("archive.zip"):
wget.download("https://storage.googleapis.com/kaggle-data-sets/78313/182633/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240502%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240502T181500Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=87d0661313e358206b6e10d44f135d41e23501d601e58b1e8236ca28a82ccc434534564b45baa84c4d829dd1995ff384d51fe5dba3f543d00eb0763169fd712c6c8f91bb4f298db38a19b31b2d489798a9723a271aa4108d7b93345c5a64a7ef00b9b8f27d1d5f728e373c870f0287eb89bc747941f0aeeb4703c288059e2e07b7ece3a83114a9607276874a90d4ec96dde06fddb94a0d3af72848565661b1404e3ea248eeebf46374daada7df1f37db7d62b21b4ac90706ea64cc74200a58f35bfe379703e7691aeda9e39635b02f58a9f8399fa64b031b1a9bccd7f109d256c6f4886ef94fcdc11034d6da13c0f1d4d8b97cabdd295862a5107b587824ebe8") wget.download("https://storage.googleapis.com/kaggle-data-sets/78313/182633/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240512%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240512T222712Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=48a59c070b4f57e2746696d7ce6c77a7efd7e2d421a7d1f66411ab3fb21f688c0a0c81907ef4b788d99767cfd2e72200e6ee42e41fc4548d7874c128beabc2ff12d38aa47dfae1c10a10659e81b8d34c23515c1d6682bcf1f3eefc4e75a1ddba65dec5a03b03eec674272e772279b723f3a2739ca9099b185cf110cc5fb98e96b92622070c8cdd521f6ea0d676e6ba5dc37b23faf919e5dbc8b631e5be8e25f8da5bc74fbb94ff72393702c7348b3adf8140e80269d571ff00dd6aa065c43492d66131f62b9e59c503e1490851748c683680dbf3f929602239c6de297d329c9f3c4b12e389007783c88526f38064afbad73dba9897e408d8e1856b013fadc480")
def unzip_data(self, file_name, path_to_extract): def unzip_data(self, file_name, path_to_extract):
full_path_to_extract = main_path / path_to_extract full_path_to_extract = main_path / path_to_extract
old_path = "New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)" old_path = "New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)"
if not os.path.exists(main_path): if not os.path.exists(main_path):
os.makedirs(main_path) os.makedirs(main_path)
ZipFile(file_name).extractall(full_path_to_extract)
with zipfile.ZipFile(file_name) as zf:
for member in tqdm(zf.infolist(), desc='Extracting'):
try:
zf.extract(member, full_path_to_extract)
except zipfile.error as e:
pass
# shutil.move("data/test/test", # shutil.move("data/test/test",
# full_path_to_extract, copy_function=shutil.copytree) # full_path_to_extract, copy_function=shutil.copytree)
shutil.move(full_path_to_extract / old_path / "train", shutil.move(full_path_to_extract / old_path / "train",
@ -81,24 +89,30 @@ class DataManager:
def resize_dataset(self, source_dataset_name, shape): def resize_dataset(self, source_dataset_name, shape):
dataset_name = "resized_dataset" dataset_name = "resized_dataset"
if not os.path.exists(main_path / dataset_name): if not os.path.exists(main_path / dataset_name):
counter=0
for file in glob.glob(str(path_to_train_and_valid) % source_dataset_name, recursive=True): for file in glob.glob(str(path_to_train_and_valid) % source_dataset_name, recursive=True):
counter+=1
path_to_file = file.replace("\\", "/") path_to_file = file.replace("\\", "/")
image = cv2.imread(path_to_file) image = cv2.imread(path_to_file)
image = cv2.resize(image, shape) image = cv2.resize(image, shape)
new_path = path_to_file.replace( new_path = path_to_file.replace(
source_dataset_name, dataset_name) source_dataset_name, dataset_name)
self.write_image(image, new_path) self.write_image(image, new_path)
print("Resized %s files" % (counter), end='\r')
def sobelx(self, source_dataset_name): def sobelx(self, source_dataset_name):
dataset_name = "sobel_dataset" dataset_name = "sobel_dataset"
if not os.path.exists(main_path / dataset_name): if not os.path.exists(main_path / dataset_name):
counter=0
for file in glob.glob(str(path_to_train_and_valid) % source_dataset_name, recursive=True): for file in glob.glob(str(path_to_train_and_valid) % source_dataset_name, recursive=True):
counter+=1
path_to_file = file.replace("\\", "/") path_to_file = file.replace("\\", "/")
image = cv2.imread(path_to_file) image = cv2.imread(path_to_file)
sobel = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5) sobel = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)
new_path = path_to_file.replace( new_path = path_to_file.replace(
source_dataset_name, dataset_name) source_dataset_name, dataset_name)
self.write_image(sobel, new_path) self.write_image(sobel, new_path)
print("Sobel processed %s files" % (counter), end='\r')
if __name__ == "__main__": if __name__ == "__main__":