{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import shutil\n", "import json" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv(r'C:\\Users\\PC\\Desktop\\wko_test_data\\train\\_classes.csv', encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "for category in list(data.columns[1:]):\n", " os.mkdir(f\"./data/train/{category.strip().capitalize()}\")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "for file in data.iloc:\n", " file_name = file[0]\n", " for item in list(file.items())[1:]:\n", " if item[1]:\n", " shutil.copyfile(rf'C:\\Users\\PC\\Desktop\\wko_test_data\\train\\{file_name}', f'./data/train/{item[0].strip().capitalize()}/{file_name}')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "data_test = pd.read_csv(r'C:\\Users\\PC\\Desktop\\wko_test_data\\test\\_classes.csv', encoding='utf-8')" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "results = []\n", "for file in data_test.iloc:\n", " file_name = file[0]\n", " for item in list(file.items())[1:]:\n", " shutil.copyfile(rf'C:\\Users\\PC\\Desktop\\wko_test_data\\test\\{file_name}', f'./data/test/{file_name}')\n", " if item[1]:\n", " results.append({'filename': file_name, \"value\": item[0].strip().capitalize()})\n", " continue\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "with open(\"./data/test_label.json\", 'w', encoding='utf-8') as f:\n", " json.dump(results, f)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data_count = len(os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped'))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3362" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(data_count *0.8)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "results = []" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "for fish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')[:3362]:\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped\\{fish}', f'./new_data/train/Fish/{fish}')\n", "for fish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')[3363:]:\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped\\{fish}', f'./new_data/test/{fish}')\n", " results.append({'filename': fish, \"value\": 'Fish'})" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "for jellyfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\val\\animal_jellyfish'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\val\\animal_jellyfish\\{jellyfish}', f'./new_data/train/Jellyfish/{jellyfish}')\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "for jellyfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\test\\animal_jellyfish'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\test\\animal_jellyfish\\{jellyfish}', f'./new_data/test/{jellyfish}')\n", " results.append({'filename': jellyfish, \"value\": 'Jellyfish'})" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "for lionfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\val\\lionfish'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\val\\lionfish\\{lionfish}', f'./new_data/train/Lionfish/{lionfish}')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "for lionfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\test\\lionfish'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\test\\lionfish\\{lionfish}', f'./new_data/test/{lionfish}')\n", " results.append({'filename': lionfish, \"value\": 'Lionfish'})" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "for shark in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\shark\\data\\val\\animal_shark'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\shark\\data\\val\\animal_shark\\{shark}', f'./new_data/train/Shark/{shark}')" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "for shark in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\shark\\data\\test\\animal_shark'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\shark\\data\\test\\animal_shark\\{shark}', f'./new_data/test/{shark}')\n", " results.append({'filename': shark, \"value\": 'Shark'})" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "for stingray in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\val\\stingray'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\val\\stingray\\{stingray}', f'./new_data/train/Stingray/{stingray}')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "for stingray in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\test\\stingray'):\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\test\\stingray\\{stingray}', f'./new_data/test/{stingray}')\n", " results.append({'filename': stingray, \"value\": 'Stingray'})" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1582" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(len(os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')) *0.8)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "for turtle in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')[:600]:\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\turtle\\images\\{turtle}', f'./new_data/train/Turtle/{turtle}')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "for turtle in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')[600:700]:\n", " shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\turtle\\images\\{turtle}', f'./new_data/test/{turtle}')\n", " results.append({'filename': turtle, \"value\": 'Turtle'})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open(\"./new_data/test_labels.json\", 'w', encoding='utf-8') as f:\n", " json.dump(results, f)" ] } ], "metadata": { "kernelspec": { "display_name": "um", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:41:22) [MSC v.1929 64 bit (AMD64)]" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "876e189cbbe99a9a838ece62aae1013186c4bb7e0254a10cfa2f9b2381853efb" } } }, "nbformat": 4, "nbformat_minor": 2 }