widzenie-komputerowe-projekt/transform_data.ipynb

306 lines
8.7 KiB
Plaintext
Raw Normal View History

2023-01-31 19:30:04 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import shutil\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(r'C:\\Users\\PC\\Desktop\\wko_test_data\\train\\_classes.csv', encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"for category in list(data.columns[1:]):\n",
" os.mkdir(f\"./data/train/{category.strip().capitalize()}\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"for file in data.iloc:\n",
" file_name = file[0]\n",
" for item in list(file.items())[1:]:\n",
" if item[1]:\n",
" shutil.copyfile(rf'C:\\Users\\PC\\Desktop\\wko_test_data\\train\\{file_name}', f'./data/train/{item[0].strip().capitalize()}/{file_name}')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"data_test = pd.read_csv(r'C:\\Users\\PC\\Desktop\\wko_test_data\\test\\_classes.csv', encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"results = []\n",
"for file in data_test.iloc:\n",
" file_name = file[0]\n",
" for item in list(file.items())[1:]:\n",
" shutil.copyfile(rf'C:\\Users\\PC\\Desktop\\wko_test_data\\test\\{file_name}', f'./data/test/{file_name}')\n",
" if item[1]:\n",
" results.append({'filename': file_name, \"value\": item[0].strip().capitalize()})\n",
" continue\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"with open(\"./data/test_label.json\", 'w', encoding='utf-8') as f:\n",
" json.dump(results, f)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_count = len(os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped'))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3362"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"round(data_count *0.8)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"results = []"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"for fish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')[:3362]:\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped\\{fish}', f'./new_data/train/Fish/{fish}')\n",
"for fish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped')[3363:]:\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\Fish_Data\\images\\cropped\\{fish}', f'./new_data/test/{fish}')\n",
" results.append({'filename': fish, \"value\": 'Fish'})"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"for jellyfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\val\\animal_jellyfish'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\val\\animal_jellyfish\\{jellyfish}', f'./new_data/train/Jellyfish/{jellyfish}')\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"for jellyfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\test\\animal_jellyfish'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\jellyfish\\data\\test\\animal_jellyfish\\{jellyfish}', f'./new_data/test/{jellyfish}')\n",
" results.append({'filename': jellyfish, \"value\": 'Jellyfish'})"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"for lionfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\val\\lionfish'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\val\\lionfish\\{lionfish}', f'./new_data/train/Lionfish/{lionfish}')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"for lionfish in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\test\\lionfish'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\lionfish\\data\\test\\lionfish\\{lionfish}', f'./new_data/test/{lionfish}')\n",
" results.append({'filename': lionfish, \"value\": 'Lionfish'})"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"for shark in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\shark\\data\\val\\animal_shark'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\shark\\data\\val\\animal_shark\\{shark}', f'./new_data/train/Shark/{shark}')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"for shark in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\shark\\data\\test\\animal_shark'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\shark\\data\\test\\animal_shark\\{shark}', f'./new_data/test/{shark}')\n",
" results.append({'filename': shark, \"value\": 'Shark'})"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"for stingray in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\val\\stingray'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\val\\stingray\\{stingray}', f'./new_data/train/Stingray/{stingray}')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"for stingray in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\test\\stingray'):\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\stingray\\data\\test\\stingray\\{stingray}', f'./new_data/test/{stingray}')\n",
" results.append({'filename': stingray, \"value\": 'Stingray'})"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1582"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"round(len(os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')) *0.8)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"for turtle in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')[:600]:\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\turtle\\images\\{turtle}', f'./new_data/train/Turtle/{turtle}')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"for turtle in os.listdir(r'D:\\Michal\\studia\\wk_project_data\\turtle\\images')[600:700]:\n",
" shutil.copyfile(rf'D:\\Michal\\studia\\wk_project_data\\turtle\\images\\{turtle}', f'./new_data/test/{turtle}')\n",
" results.append({'filename': turtle, \"value\": 'Turtle'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(\"./new_data/test_labels.json\", 'w', encoding='utf-8') as f:\n",
" json.dump(results, f)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "um",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:41:22) [MSC v.1929 64 bit (AMD64)]"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "876e189cbbe99a9a838ece62aae1013186c4bb7e0254a10cfa2f9b2381853efb"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}