widzenie-komputerowe-projekt/transform_data.ipynb
2023-02-01 10:23:06 +01:00

28 KiB

import os
import pandas as pd
import shutil
import json
data = pd.read_csv(r'C:\Users\PC\Desktop\wko_test_data\train\_classes.csv', encoding='utf-8')
for category in list(data.columns[1:]):
    os.mkdir(f"./data/train/{category.strip().capitalize()}")
for file in data.iloc:
    file_name = file[0]
    for item in list(file.items())[1:]:
        if item[1]:
            shutil.copyfile(rf'C:\Users\PC\Desktop\wko_test_data\train\\{file_name}', f'./data/train/{item[0].strip().capitalize()}/{file_name}')
data_test = pd.read_csv(r'C:\Users\PC\Desktop\wko_test_data\test\_classes.csv', encoding='utf-8')
results = []
for file in data_test.iloc:
    file_name = file[0]
    for item in list(file.items())[1:]:
        shutil.copyfile(rf'C:\Users\PC\Desktop\wko_test_data\test\\{file_name}', f'./data/test/{file_name}')
        if item[1]:
            results.append({'filename': file_name, "value": item[0].strip().capitalize()})
            continue
with open("./data/test_label.json", 'w', encoding='utf-8') as f:
    json.dump(results, f)
data_count = len(os.listdir(r'D:\Michal\studia\wk_project_data\Fish_Data\images\cropped'))
round(data_count *0.8)
3362
results = []
os.listdir(r'D:\Michal\studia\wk_project_data\Fish_Data\images\cropped')
for fish in os.listdir(r'D:\Michal\studia\wk_project_data\Fish_Data\images\cropped')[:3362]:
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\Fish_Data\images\cropped\\{fish}', f'./new_data/train/Fish/{fish}')
for fish in os.listdir(r'D:\Michal\studia\wk_project_data\Fish_Data\images\cropped')[3363:]:
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\Fish_Data\images\cropped\\{fish}', f'./new_data/test/{fish}')
    results.append({'filename': fish, "value": 'Fish'})
for jellyfish in os.listdir(r'D:\Michal\studia\wk_project_data\jellyfish\data\val\animal_jellyfish'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\jellyfish\data\val\animal_jellyfish\\{jellyfish}', f'./new_data/train/Jellyfish/{jellyfish}')
for jellyfish in os.listdir(r'D:\Michal\studia\wk_project_data\jellyfish\data\test\animal_jellyfish'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\jellyfish\data\test\animal_jellyfish\\{jellyfish}', f'./new_data/test/{jellyfish}')
    results.append({'filename': jellyfish, "value": 'Jellyfish'})
for lionfish in os.listdir(r'D:\Michal\studia\wk_project_data\lionfish\data\val\lionfish'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\lionfish\data\val\lionfish\\{lionfish}', f'./new_data/train/Lionfish/{lionfish}')
for lionfish in os.listdir(r'D:\Michal\studia\wk_project_data\lionfish\data\test\lionfish'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\lionfish\data\test\lionfish\\{lionfish}', f'./new_data/test/{lionfish}')
    results.append({'filename': lionfish, "value": 'Lionfish'})
for shark in os.listdir(r'D:\Michal\studia\wk_project_data\shark\data\val\animal_shark'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\shark\data\val\animal_shark\\{shark}', f'./new_data/train/Shark/{shark}')
for shark in os.listdir(r'D:\Michal\studia\wk_project_data\shark\data\test\animal_shark'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\shark\data\test\animal_shark\\{shark}', f'./new_data/test/{shark}')
    results.append({'filename': shark, "value": 'Shark'})
for stingray in os.listdir(r'D:\Michal\studia\wk_project_data\stingray\data\val\stingray'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\stingray\data\val\stingray\\{stingray}', f'./new_data/train/Stingray/{stingray}')
for stingray in os.listdir(r'D:\Michal\studia\wk_project_data\stingray\data\test\stingray'):
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\stingray\data\test\stingray\\{stingray}', f'./new_data/test/{stingray}')
    results.append({'filename': stingray, "value": 'Stingray'})
round(len(os.listdir(r'D:\Michal\studia\wk_project_data\turtle\images')) *0.8)
1582
for turtle in os.listdir(r'D:\Michal\studia\wk_project_data\turtle\images')[:600]:
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\turtle\images\\{turtle}', f'./new_data/train/Turtle/{turtle}')
for turtle in os.listdir(r'D:\Michal\studia\wk_project_data\turtle\images')[600:700]:
    shutil.copyfile(rf'D:\Michal\studia\wk_project_data\turtle\images\\{turtle}', f'./new_data/test/{turtle}')
    results.append({'filename': turtle, "value": 'Turtle'})
with open("./new_data/test_labels.json", 'w', encoding='utf-8') as f:
    json.dump(results, f)
import cv2 as cv
from skimage.io import imread
results = []

for dir in os.listdir('./new_data/train'):
    for item in os.listdir(f'./new_data/train/{dir}'):
        a = imread(f'./new_data/train/{dir}/{item}')
        if len(a.shape) != 3:
            results.append(f'./new_data/train/{dir}/{item}')
            # os.remove(f'./new_data/train/{dir}/{item}')
len(results)
0
res = []
for dir in os.listdir('./new_data/test'):
    a = imread(f'./new_data/test/{dir}')
    if len(a.shape) != 3:
        # os.remove(f'./new_data/test/{dir}')
        results.append(f'./new_data/test/{dir}')

len(res)
0
import json
new_data = []
with open('./new_data/test_labels.json', 'r', encoding='utf-8') as f:
    data= json.load(f)
    files = os.listdir("./new_data/test")
    new_data = list(filter(lambda x: x['filename'] in files, data))
    print(len(data), len(new_data))
1961 1902
with open('./new_data/test_labels2.json', 'w', encoding='utf-8') as f:
    json.dump(new_data, f)
len(os.listdir('./new_data/train/Turtle/'))
600
r = {}
for data in os.listdir("./new_data_2/train"):
    r[data] = len(os.listdir(f"./new_data_2/train/{data}"))
r
{'Fish': 300,
 'Jellyfish': 300,
 'Lionfish': 300,
 'Shark': 300,
 'Stingray': 300,
 'Turtle': 300}
len(os.listdir("./new_data_2/test/"))
306
import shutil
import json
results = []
for type in os.listdir("./new_data/train"):
    count = 1
    os.mkdir(f"./new_data_2/train/{type}")
    for file in os.listdir(f"./new_data/train/{type}"):
        if count <= 300:
            shutil.copyfile(f"./new_data/train/{type}/{file}", f'./new_data_2/train/{type}/{file}')
        if count <= 350 and count >= 300:
            shutil.copyfile(f"./new_data/train/{type}/{file}", f'./new_data_2/test/{file}')
            results.append({'filename': file, "value": type})
        if count >= 350:
            break
        count += 1
with open("./new_data_2/test_labels.json", "w", encoding='utf-8') as f:
    json.dump(results, f)
with open("./new_data_2/test_labels.json", "r", encoding='utf-8') as f:
    print(len(json.load(f)))
306
sorted(os.listdir('./new_data_2/train/'))
['Fish', 'Jellyfish', 'Lionfish', 'Shark', 'Stingray', 'Turtle']
import os, shutil, json
from skimage.io import imread

count = 0
for fish in os.listdir("../wk_project_data/Fish_Data/images/raw_images/"):
    a = imread(f'../wk_project_data/Fish_Data/images/raw_images/{fish}')
    if len(a.shape) != 3:
        os.remove(f'../wk_project_data/Fish_Data/images/raw_images/{fish}')
        count +=1
count
284
# old = os.listdir("./new_data_2/train/Fish/")
for fish in os.listdir('../wk_project_data/Fish_Data/images/raw_images/')[:300]:
        shutil.copyfile(f'../wk_project_data/Fish_Data/images/raw_images/{fish}', f'./new_data_2/train/Fish/{fish}')
results = []
with open("./new_data_2/test_labels.json", "r", encoding='utf-8') as f:
    a = json.load(f)
    results = list(filter(lambda x: x['value'] != "Fish", a))
    for fish in os.listdir('../wk_project_data/Fish_Data/images/raw_images/')[301:351]:
        shutil.copyfile(f'../wk_project_data/Fish_Data/images/raw_images/{fish}', f'./new_data_2/test/{fish}')
        results.append({'filename': fish, "value": 'Fish'})
print(len(a), len(results))
# for fish in os.listdir('./new_data_2/test/'):
#     if fish not in [res['filename'] for res in results]:
#         os.remove(f'./new_data_2/test/{fish}')
with open('./new_data_2/test_labels2.json', 'w', encoding='utf-8') as f:
    json.dump(results, f)
255 305
len(os.listdir("./new_data_2/test/"))
305
results = []
old = os.listdir("./new_data_2/test/")
with open("./new_data_2/test_labels.json", "r", encoding='utf-8') as f:
    a = json.load(f)
    print(len(a))
    results = list(filter(lambda x: x['value'] != "Fish", a))
    # print(len(a), len(results))
    # for file in old:
    #     if file not in [res['filename'] for res in results]:
    #         os.remove(f'./new_data_2/test/{file}')
# with open('./new_data_2/test_labels2.json', 'w', encoding='utf-8') as f:
#     json.dump(results, f)
255
old
['aseraggodes_melanostictus_2.jpg',
 'aseraggodes_melanostictus_3.jpg',
 'atractoscion_aequidens_1.jpg',
 'atractoscion_aequidens_2.jpg',
 'atractoscion_aequidens_3.jpg',
 'atractoscion_aequidens_4.jpg',
 'atractoscion_aequidens_5.jpg',
 'atule_mate_1.jpg',
 'atule_mate_2.jpg',
 'atule_mate_3.jpg',
 'atule_mate_4.jpg',
 'atule_mate_5.jpg',
 'atule_mate_6.jpg',
 'auxis_rochei_1.jpg',
 'auxis_rochei_2.jpg',
 'auxis_rochei_3.jpg',
 'auxis_rochei_4.jpg',
 'auxis_rochei_5.jpg',
 'auxis_rochei_6.jpg',
 'auxis_thazard_1.jpg',
 'auxis_thazard_2.jpg',
 'auxis_thazard_3.jpg',
 'auxis_thazard_4.jpg',
 'auxis_thazard_5.jpg',
 'auxis_thazard_6.jpg',
 'auxis_thazard_7.jpg',
 'auxis_thazard_8.jpg',
 'BA472025AZL8.jpg',
 'BA5BMQ2M6X2R.jpg',
 'bathylagichthys_greyae_1.jpg',
 'bathylagichthys_greyae_2.jpg',
 'bathylagichthys_greyae_3.jpg',
 'BBUVLQAWO2J9.jpg',
 'BCNZZANYP0ZV.jpg',
 'BCTT754FXP33.jpg',
 'beryx_decadactylus_1.jpg',
 'beryx_decadactylus_2.jpg',
 'beryx_decadactylus_3.jpg',
 'beryx_decadactylus_4.jpg',
 'beryx_decadactylus_5.jpg',
 'beryx_decadactylus_6.jpg',
 'BFFL4FUMTZA5.jpg',
 'BG9GN3HB8COL.jpg',
 'BIGHPAJBP3PN.jpg',
 'BNBTC6NUO815.jpg',
 'bodianus_anthioides_1.jpg',
 'bodianus_anthioides_10.jpg',
 'bodianus_anthioides_11.jpg',
 'bodianus_anthioides_12.jpg',
 'bodianus_anthioides_13.jpg',
 'bodianus_anthioides_14.jpg',
 'bodianus_anthioides_15.jpg',
 'bodianus_anthioides_2.jpg',
 'bodianus_anthioides_3.jpg',
 'bodianus_anthioides_4.jpg',
 'bodianus_anthioides_5.jpg',
 'bodianus_anthioides_6.jpg',
 'bodianus_anthioides_7.jpg',
 'bodianus_anthioides_8.jpg',
 'BS58RK9Y7XRW.jpg',
 'BSCIMPOKX5GB.jpg',
 'BY2BTSPMY9A3.jpg',
 'BYDA62T1MQNH.jpg',
 'BYXDM7G9SR9B.jpg',
 'BZYALDSJ2FJP.jpg',
 'C1GMSNYC5W32.jpg',
 'C21MHB4ETOIW.jpg',
 'C241M7VI6HTO.jpg',
 'C4DIFKCLIEOW.jpg',
 'C4FSJT6DPTI3.jpg',
 'C5452BDK8XGB.jpg',
 'C5EQCB8LH8CB.jpg',
 'C5HH5QIO275O.jpg',
 'C614MT5ES45G.jpg',
 'C6Q2PU4WWBST.jpg',
 'C7898UC4B5X1.jpg',
 'C7KEAYQVWG1B.jpg',
 'C7TQFX1KRNP4.jpg',
 'C8YZEO6NMB0H.jpg',
 'C8ZK6PQS6WAH.jpg',
 'C9ESM28DBQU9.jpg',
 'C9F33776YZQ6.jpg',
 'CAKB3F68W4SN.jpg',
 'CALGDD519DMS.jpg',
 'CBA745E1722H.jpg',
 'CBFUPBSOTTAX.jpg',
 'CBSCA5L3BNG3.jpg',
 'CBTAU1KC5HPV.jpg',
 'CGAFDIWA20MB.jpg',
 'CGHJS5C7KPVF.jpg',
 'CHJXK4JRYF3U.jpg',
 'CHVOUMVWN6FN.jpg',
 'CHVP1FXWGEK5.jpg',
 'CK3CPRC9V4KT.jpg',
 'CK8XZ2HYTW6D.jpg',
 'CKOMZRVOPGNX.jpg',
 'CL1Y2UEBQTRJ.jpg',
 'CLDQS8GSPOP6.jpg',
 'CMZPKHVQF5SP.jpg',
 'COBI823JUKVD.jpg',
 'CQIM07U47AXF.jpg',
 'E5HM1HQN6CBH.jpg',
 'E63KPE2E5ASJ.jpg',
 'E6Q90FNYY212.jpg',
 'E6YUPML62IEJ.jpg',
 'E7XAEQMGVNDO.jpg',
 'E8XEKOMRQGD6.jpg',
 'EAC01K8CA5SG.jpg',
 'EC2WSC3HR0XC.jpg',
 'ECSBX5CG4UVS.jpg',
 'ED6JYRN3F7ON.jpg',
 'EDG8OAO8BLW8.jpg',
 'EE0QOJUM98IZ.jpg',
 'EGFB75PCMFUY.jpg',
 'EJDE4PPELA1E.jpg',
 'EK0XLC3GIAIJ.jpg',
 'EKKDYL1J6OSS.jpg',
 'EM2G1B34RKQS.jpg',
 'EN928JW7UB3Z.jpg',
 'ENYQ47TDFURY.jpg',
 'EP6WCRW28XKU.jpg',
 'EQ03JME5I8J8.jpg',
 'EQE1C6CAXPCM.jpg',
 'EQNGOJ4YK2TE.jpg',
 'EQWYK4FYCJD2.jpg',
 'ERILUU81EXXN.jpg',
 'EUAZ68FROFVM.jpg',
 'EWI1W5DSY28S.jpg',
 'EXCHOZ39L5NB.jpg',
 'EY75O5JEZN90.jpg',
 'F225V7HZIL3C.jpg',
 'F3IDO1PPX6TN.jpg',
 'F4P3GB7N94FW.jpg',
 'F515404E9D96.jpg',
 'F7Q3E7VEG6HO.jpg',
 'F8V9G1BSNK8I.jpg',
 'F92M1TSW6U5A.jpg',
 'FBYPMKFYB1Y8.jpg',
 'FD9KZBQ6S8ZE.jpg',
 'FEVFRGXYSUU6.jpg',
 'FFNAZQFD3LLQ.jpg',
 'FHD32EQTSRU0.jpg',
 'FHRAQBEA0JZ5.jpg',
 'FJ366SFC1JU2.jpg',
 'FJ3BLKK45MYN.jpg',
 'FJC6HC5R1D1R.jpg',
 'FJYSJ1FGD4MA.jpg',
 'FK3PTI29V78G.jpg',
 'FKP2F8SVMAB2.jpg',
 'FMWHH78S82D3.jpg',
 'FNSK9HQV1QY5.jpg',
 'FQENFKQ5F297.jpg',
 'Image_127.jpg',
 'Image_1270.jpg',
 'Image_1271.png',
 'Image_1272.jpg',
 'Image_1273.jpg',
 'Image_1274.jpg',
 'Image_1275.jpg',
 'Image_1276.jpg',
 'Image_1277.jpg',
 'Image_1278.png',
 'Image_1279.jpg',
 'Image_128.jpg',
 'Image_1280.jpg',
 'Image_1281.jpg',
 'Image_1282.jpg',
 'Image_1283.jpg',
 'Image_1284.jpg',
 'Image_1285.jpg',
 'Image_1286.jpg',
 'Image_1287.jpg',
 'Image_1288.jpg',
 'Image_1289.jpg',
 'Image_129.jpg',
 'Image_1290.jpg',
 'Image_1291.jpg',
 'Image_1292.jpg',
 'Image_1293.jpg',
 'Image_1294.jpg',
 'Image_1295.jpg',
 'Image_1296.jpg',
 'Image_1297.jpg',
 'Image_1298.jpg',
 'Image_1299.jpg',
 'Image_13.jpg',
 'Image_130.jpg',
 'Image_1300.jpg',
 'Image_1301.jpg',
 'Image_1302.jpg',
 'Image_1303.jpg',
 'Image_1304.jpg',
 'Image_1305.jpg',
 'Image_1306.jpg',
 'Image_1307.jpg',
 'Image_1308.jpg',
 'Image_1309.jpg',
 'Image_131.jpg',
 'Image_1310.png',
 'Image_1311.jpg',
 'Image_1312.jpg',
 'Image_1313.jpg',
 'Image_1314.jpg',
 'QPTNNR883Y71.jpg',
 'QYZKHFUZBYB5.jpg',
 'R7CE93VYTQ3C.jpg',
 'RFIF55SSBAJE.jpg',
 'RGFV4ID1FG4H.jpg',
 'RIMV8RD0O91K.jpg',
 'RP03D3135F0X.jpg',
 'RQTETBPA6LCE.jpg',
 'RUKD3M79DOW0.jpg',
 'RX52IW9OJ76K.jpg',
 'RYMXETE0HNP1.jpg',
 'S0H88RI2WWWG.jpg',
 'S4NVLRSGZ80O.jpg',
 'S5J21NY90AEU.jpg',
 'S6BGCT3174NY.jpg',
 'S7XQ2ZWU4XRQ.jpg',
 'SA6VWMCVZ0DZ.jpg',
 'SCBJ47X6E43S.jpg',
 'SCL4N9GWCRVO.jpg',
 'SDT00RQTLX26.jpg',
 'SEV5J90SU4YF.jpg',
 'SFOU9AI3AC8X.jpg',
 'SGN4LLDEANEJ.jpg',
 'SGUV3RQNZCTL.jpg',
 'SHGFRLYN2GRX.jpg',
 'SHKUM0IGCBMR.jpg',
 'SHRG1L84VXGX.jpg',
 'SHV3VYOR17UL.jpg',
 'SKGG21HRSA1Y.jpg',
 'SNFX2238RZM7.jpg',
 'SNQKPSJPS1WY.jpg',
 'SQJN7VCLB6UH.jpg',
 'SS22ZAHR1NF5.jpg',
 'SSTQDRFQCHXJ.jpg',
 'STL38E8QG8GV.jpg',
 'SVJJISHQ3G6M.jpg',
 'SWWB0WP99O6O.jpg',
 'SY4LBP84QR4G.jpg',
 'SZZSBZGDCMAE.jpg',
 'T097LZ1OKYQP.jpg',
 'T16PTKKU5CYI.jpg',
 'T2XV2BD91FUA.jpg',
 'T38TY65HTSK5.jpg',
 'TAOYE8VOUWAA.jpg',
 'TAQOKNTRRPF3.jpg',
 'TASC49VNJ1YK.jpg',
 'TBZQDRGHA3E3.jpg',
 'TCN33G4E3IOK.jpg',
 'TH8LVNHF3HNN.jpg',
 'TJ8TBWUKZT3C.jpg',
 'TM9SGFJQE6R6.jpg',
 'TNS5CQY59PKL.jpg',
 'TX1VAIF1SGJ0.jpg',
 'TYQIJ51SGBJA.jpg',
 'U03GPBVDJ3L4.jpg',
 'U14NZZCH5ODP.jpg',
 'U14YJ7AGS93Q.jpg',
 'U47XC7DHPNFB.jpg',
 'U6K0KC67TIOS.jpg',
 'U8JVPI5Z4DUZ.jpg',
 'U8Z8R0LK52QZ.jpg',
 'UBNLCHO4RAVH.jpg',
 'UCTEKRBL2KTR.jpg',
 'UHGC1FPUM4K6.jpg',
 'UJS5P0YWU2WU.jpg',
 'UJSGE2BICN35.jpg',
 'UKCCNFTGM4YW.jpg',
 'UMZYDRURKGDM.jpg',
 'UNIEYIPEDFVS.jpg',
 'UNJEX6XSSKS1.jpg',
 'UQKQLP83533C.jpg',
 'USDR2OG5BXWM.jpg',
 'UT0TPHF5Z1RJ.jpg',
 'UUGZB50IQZ2M.jpg',
 'UVQ3LHFITCH6.jpg',
 'UW58XXZKWFMF.jpg',
 'UYEYNMX0VDWL.jpg',
 'UZOYBSENZVXH.jpg',
 'V0EGKX79YBZX.jpg',
 'V3M63R4R2VFS.jpg',
 'V4JKHJU3WN7I.jpg',
 'V5OTJUPLGBB7.jpg',
 'V5XFYHOVIE5B.jpg',
 'V943JMHHK9RB.jpg',
 'VB6S7YTVATYU.jpg',
 'VCSGG6YJ2H7U.jpg',
 'VD413D5VBZ90.jpg',
 'VDFOC22A599Q.jpg',
 'VFFOLM579BOC.jpg',
 'VHJTERFVO8TJ.jpg',
 'VHPB719IW5QY.jpg',
 'VI77HMMP2J10.jpg',
 'VKC0QJ1WNZWC.jpg',
 'VPAJ21EX1D2J.jpg',
 'VQVVRECOPO5Z.jpg',
 'VWRNHRTZT1PH.jpg',
 'VWYQKJKHBLRG.jpg',
 'VY5YH6RQL5I2.jpg',
 'VZMJF0JU6BJY.jpg',
 'W0WUDCV7HORQ.jpg',
 'W1S4LFQY8ZEP.jpg',
 'W8ADO53ZUWVX.jpg']