306 lines
8.7 KiB
Plaintext
306 lines
8.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import pandas as pd\n",
|
|
"import shutil\n",
|
|
"import json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data = pd.read_csv(r'C:\\Users\\PC\\Desktop\\wko_test_data\\train\\_classes.csv', encoding='utf-8')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for category in list(data.columns[1:]):\n",
|
|
" os.mkdir(f\"./data/train/{category.strip().capitalize()}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for file in data.iloc:\n",
|
|
" file_name = file[0]\n",
|
|
" for item in list(file.items())[1:]:\n",
|
|
" if item[1]:\n",
|
|
" shutil.copyfile(rf'C:\\Users\\PC\\Desktop\\wko_test_data\\train\\{file_name}', f'./data/train/{item[0].strip().capitalize()}/{file_name}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data_test = pd.read_csv(r'C:\\Users\\PC\\Desktop\\wko_test_data\\test\\_classes.csv', encoding='utf-8')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"results = []\n",
|
|
"for file in data_test.iloc:\n",
|
|
" file_name = file[0]\n",
|
|
" for item in list(file.items())[1:]:\n",
|
|
" shutil.copyfile(rf'C:\\Users\\PC\\Desktop\\wko_test_data\\test\\{file_name}', f'./data/test/{file_name}')\n",
|
|
" if item[1]:\n",
|
|
" results.append({'filename': file_name, \"value\": item[0].strip().capitalize()})\n",
|
|
" continue\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"./data/test_label.json\", 'w', encoding='utf-8') as f:\n",
|
|
" json.dump(results, f)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data_count = len(os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"3362"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"round(data_count *0.8)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"results = []"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for fish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')[:3362]:\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped\\{fish}', f'./new_data/train/Fish/{fish}')\n",
|
|
"for fish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')[3363:]:\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped\\{fish}', f'./new_data/test/{fish}')\n",
|
|
" results.append({'filename': fish, \"value\": 'Fish'})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for jellyfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\val\\animal_jellyfish'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\val\\animal_jellyfish\\{jellyfish}', f'./new_data/train/Jellyfish/{jellyfish}')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for jellyfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\test\\animal_jellyfish'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\test\\animal_jellyfish\\{jellyfish}', f'./new_data/test/{jellyfish}')\n",
|
|
" results.append({'filename': jellyfish, \"value\": 'Jellyfish'})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for lionfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\val\\lionfish'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\val\\lionfish\\{lionfish}', f'./new_data/train/Lionfish/{lionfish}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for lionfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\test\\lionfish'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\test\\lionfish\\{lionfish}', f'./new_data/test/{lionfish}')\n",
|
|
" results.append({'filename': lionfish, \"value\": 'Lionfish'})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for shark in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\shark\\data\\val\\animal_shark'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\shark\\data\\val\\animal_shark\\{shark}', f'./new_data/train/Shark/{shark}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for shark in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\shark\\data\\test\\animal_shark'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\shark\\data\\test\\animal_shark\\{shark}', f'./new_data/test/{shark}')\n",
|
|
" results.append({'filename': shark, \"value\": 'Shark'})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for stingray in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\val\\stingray'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\val\\stingray\\{stingray}', f'./new_data/train/Stingray/{stingray}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for stingray in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\test\\stingray'):\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\test\\stingray\\{stingray}', f'./new_data/test/{stingray}')\n",
|
|
" results.append({'filename': stingray, \"value\": 'Stingray'})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"1582"
|
|
]
|
|
},
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"round(len(os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')) *0.8)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for turtle in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')[:600]:\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\turtle\\images\\{turtle}', f'./new_data/train/Turtle/{turtle}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for turtle in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')[600:700]:\n",
|
|
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\turtle\\images\\{turtle}', f'./new_data/test/{turtle}')\n",
|
|
" results.append({'filename': turtle, \"value\": 'Turtle'})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"./new_data/test_labels.json\", 'w', encoding='utf-8') as f:\n",
|
|
" json.dump(results, f)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "um",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:41:22) [MSC v.1929 64 bit (AMD64)]"
|
|
},
|
|
"orig_nbformat": 4,
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "876e189cbbe99a9a838ece62aae1013186c4bb7e0254a10cfa2f9b2381853efb"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|