WKO_PROJEKT/process_data

1 line
15 KiB
Plaintext

{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyM4q90TKm68+fCgyse7d3la"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"id":"YgcHkg_E9b9U","executionInfo":{"status":"ok","timestamp":1675102221170,"user_tz":-60,"elapsed":14,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}}},"outputs":[],"source":["import os\n","import cv2\n","from xml.etree.ElementTree import ElementTree\n","import re\n","import random\n","import numpy as np\n","from os.path import join\n","\n"]},{"cell_type":"code","source":["!fusermount -u drive\n","!google-drive-ocamlfuse drive"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hQ_r-pBSMknR","executionInfo":{"status":"ok","timestamp":1675090876283,"user_tz":-60,"elapsed":464,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}},"outputId":"0d2877c8-d320-4d0b-e9f3-43f52fe51eae"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["shell-init: error retrieving current directory: getcwd: cannot access parent directories: Transport endpoint is not connected\n","fusermount: bad mount point drive: Transport endpoint is not connected\n","shell-init: error retrieving current directory: getcwd: cannot access parent directories: Transport endpoint is not connected\n","/bin/bash: google-drive-ocamlfuse: command not found\n"]}]},{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/gdrive', force_remount=True)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6Mqg8_uIEau4","executionInfo":{"status":"ok","timestamp":1675102254260,"user_tz":-60,"elapsed":33102,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}},"outputId":"d04814c8-39f1-4b53-e031-837f255d86f4"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/gdrive\n"]}]},{"cell_type":"code","source":["cd gdrive/MyDrive/WKO_PROJECT_NEW"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"yF5SmBNFEdjJ","executionInfo":{"status":"ok","timestamp":1675102254261,"user_tz":-60,"elapsed":13,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}},"outputId":"c078f765-06c8-417d-d4c9-32b83f74c610"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/gdrive/MyDrive/WKO_PROJECT_NEW\n"]}]},{"cell_type":"code","source":["# convert voc annotation format to darknet format\n","def xml_to_darknet(path):\n"," root = ElementTree().parse(path)\n","\n"," img_path = root.find('filename').text.replace('png','txt')\n"," img_size = root.find('size')\n"," width = int(img_size.find('width').text)\n"," height = int(img_size.find('height').text)\n"," with open('mask_dataset/labels/' + img_path, 'w') as f:\n"," lines = []\n"," \n"," for node in root.findall('object'):\n"," object_ = dict(class_=None, x=None, y=None, width=None, height=None)\n"," \n"," # class\n"," class_name = node.find('name').text\n","\n"," if(class_name == 'without_mask'):\n"," object_['class_'] = '0'\n"," elif(class_name == 'with_mask'):\n"," object_['class_'] = '1'\n"," else:\n"," object_['class_'] = '2'\n"," \n"," # bounding box\n"," bnd_box = node.find(\"bndbox\")\n"," x_min = float(bnd_box[0].text)\n"," y_min = float(bnd_box[1].text)\n"," x_max = float(bnd_box[2].text)\n"," y_max = float(bnd_box[3].text)\n","\n"," dw = float(1/width)\n"," dh = float(1/height)\n","\n"," w = float(x_max - x_min)\n"," h = float(y_max - y_min)\n","\n"," x = float((x_min + x_max)/2 -1)\n"," y = float((y_min + y_max)/2 -1)\n","\n"," w = float(w * dw)\n"," h = float(h * dh)\n"," x = float(x * dw)\n"," y = float(y * dh)\n"," \n"," object_['x'] = str(x)\n"," object_['y'] = str(y)\n"," object_['width'] = str(w)\n"," object_['height'] = str(h)\n","\n"," line = object_['class_'] + ' ' + object_['x'] + ' ' + object_['y'] + ' ' + object_['width'] + ' ' + object_['height']\n"," \n"," lines.append(line)\n"," lines.append('\\n')\n","\n"," for line in lines[:-1]:\n"," f.write(line) \n"," f.close"],"metadata":{"id":"wkrL0mpX9ljT"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def process_data():\n","\n"," # get the paths of all the images available\n"," img_paths = []\n"," \n"," for dirname, _, filenames in os.walk('mask_dataset/images'):\n"," for filename in filenames:\n"," \n"," img_paths.append(os.path.join('/content/gdrive/MyDrive/WKO_PROJECT_NEW/mask_dataset/images', filename)) # google colab\n","\n"," \n"," # shuffle data\n"," random.shuffle(img_paths)\n","\n"," # split\n"," train, validate, test = np.split(img_paths, [int(len(img_paths)*0.8), int(len(img_paths)*0.9)])\n"," \n"," # training images set = 80% of all images\n"," # validating images set = 10% of all images\n"," # testing images set = 10% of all images\n","\n"," # write train.txt\n"," with open('mask_dataset/train.txt', 'w') as f:\n"," lines = list('\\n'.join(train))\n"," f.writelines(lines)\n"," f.close\n","\n"," # write validate.txt\n"," with open('mask_dataset/validate.txt', 'w') as f:\n"," lines = list('\\n'.join(validate))\n"," f.writelines(lines)\n"," f.close\n","\n"," # write test.txt\n"," with open('mask_dataset/test.txt', 'w') as f:\n"," lines = list('\\n'.join(test))\n"," f.writelines(lines)\n"," f.close\n","\n"," # process annotations\n"," for dirname, _, filenames in os.walk('mask_dataset/annotations'):\n"," for filename in filenames:\n"," annotation_path = (os.path.join(dirname, filename))\n"," print(annotation_path)\n"," xml_to_darknet(annotation_path)\n"," \n"," \n","\n","\n"," \n","\n"," \n","process_data()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JXSpim7hD0VN","outputId":"126139a9-c749-4047-d562-a81007e736aa"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["mask_dataset/annotations/maksssksksss120.xml\n","mask_dataset/annotations/maksssksksss121.xml\n","mask_dataset/annotations/maksssksksss12.xml\n","mask_dataset/annotations/maksssksksss11.xml\n","mask_dataset/annotations/maksssksksss112.xml\n","mask_dataset/annotations/maksssksksss110.xml\n","mask_dataset/annotations/maksssksksss116.xml\n","mask_dataset/annotations/maksssksksss119.xml\n","mask_dataset/annotations/maksssksksss107.xml\n","mask_dataset/annotations/maksssksksss1.xml\n","mask_dataset/annotations/maksssksksss128.xml\n","mask_dataset/annotations/maksssksksss102.xml\n","mask_dataset/annotations/maksssksksss111.xml\n","mask_dataset/annotations/maksssksksss0.xml\n","mask_dataset/annotations/maksssksksss125.xml\n","mask_dataset/annotations/maksssksksss129.xml\n","mask_dataset/annotations/maksssksksss109.xml\n","mask_dataset/annotations/maksssksksss118.xml\n","mask_dataset/annotations/maksssksksss103.xml\n","mask_dataset/annotations/maksssksksss124.xml\n","mask_dataset/annotations/maksssksksss122.xml\n","mask_dataset/annotations/maksssksksss13.xml\n","mask_dataset/annotations/maksssksksss113.xml\n","mask_dataset/annotations/maksssksksss115.xml\n","mask_dataset/annotations/maksssksksss10.xml\n","mask_dataset/annotations/maksssksksss106.xml\n","mask_dataset/annotations/maksssksksss105.xml\n","mask_dataset/annotations/maksssksksss114.xml\n","mask_dataset/annotations/maksssksksss104.xml\n","mask_dataset/annotations/maksssksksss108.xml\n","mask_dataset/annotations/maksssksksss101.xml\n","mask_dataset/annotations/maksssksksss123.xml\n","mask_dataset/annotations/maksssksksss100.xml\n","mask_dataset/annotations/maksssksksss127.xml\n","mask_dataset/annotations/maksssksksss126.xml\n","mask_dataset/annotations/maksssksksss117.xml\n","mask_dataset/annotations/maksssksksss150.xml\n","mask_dataset/annotations/maksssksksss149.xml\n","mask_dataset/annotations/maksssksksss161.xml\n","mask_dataset/annotations/maksssksksss143.xml\n","mask_dataset/annotations/maksssksksss156.xml\n","mask_dataset/annotations/maksssksksss169.xml\n","mask_dataset/annotations/maksssksksss135.xml\n","mask_dataset/annotations/maksssksksss166.xml\n","mask_dataset/annotations/maksssksksss130.xml\n","mask_dataset/annotations/maksssksksss171.xml\n","mask_dataset/annotations/maksssksksss131.xml\n","mask_dataset/annotations/maksssksksss154.xml\n","mask_dataset/annotations/maksssksksss164.xml\n","mask_dataset/annotations/maksssksksss14.xml\n","mask_dataset/annotations/maksssksksss134.xml\n","mask_dataset/annotations/maksssksksss159.xml\n","mask_dataset/annotations/maksssksksss17.xml\n","mask_dataset/annotations/maksssksksss133.xml\n","mask_dataset/annotations/maksssksksss160.xml\n","mask_dataset/annotations/maksssksksss147.xml\n","mask_dataset/annotations/maksssksksss15.xml\n","mask_dataset/annotations/maksssksksss152.xml\n","mask_dataset/annotations/maksssksksss141.xml\n","mask_dataset/annotations/maksssksksss168.xml\n","mask_dataset/annotations/maksssksksss157.xml\n","mask_dataset/annotations/maksssksksss165.xml\n","mask_dataset/annotations/maksssksksss145.xml\n","mask_dataset/annotations/maksssksksss158.xml\n","mask_dataset/annotations/maksssksksss144.xml\n","mask_dataset/annotations/maksssksksss162.xml\n","mask_dataset/annotations/maksssksksss16.xml\n","mask_dataset/annotations/maksssksksss142.xml\n","mask_dataset/annotations/maksssksksss140.xml\n","mask_dataset/annotations/maksssksksss170.xml\n","mask_dataset/annotations/maksssksksss146.xml\n","mask_dataset/annotations/maksssksksss167.xml\n","mask_dataset/annotations/maksssksksss151.xml\n","mask_dataset/annotations/maksssksksss137.xml\n","mask_dataset/annotations/maksssksksss138.xml\n","mask_dataset/annotations/maksssksksss163.xml\n","mask_dataset/annotations/maksssksksss153.xml\n","mask_dataset/annotations/maksssksksss132.xml\n","mask_dataset/annotations/maksssksksss136.xml\n","mask_dataset/annotations/maksssksksss148.xml\n","mask_dataset/annotations/maksssksksss155.xml\n","mask_dataset/annotations/maksssksksss139.xml\n","mask_dataset/annotations/maksssksksss194.xml\n","mask_dataset/annotations/maksssksksss19.xml\n","mask_dataset/annotations/maksssksksss172.xml\n","mask_dataset/annotations/maksssksksss180.xml\n","mask_dataset/annotations/maksssksksss183.xml\n","mask_dataset/annotations/maksssksksss191.xml\n","mask_dataset/annotations/maksssksksss176.xml\n","mask_dataset/annotations/maksssksksss182.xml\n","mask_dataset/annotations/maksssksksss185.xml\n","mask_dataset/annotations/maksssksksss192.xml\n","mask_dataset/annotations/maksssksksss173.xml\n","mask_dataset/annotations/maksssksksss193.xml\n","mask_dataset/annotations/maksssksksss197.xml\n","mask_dataset/annotations/maksssksksss188.xml\n","mask_dataset/annotations/maksssksksss190.xml\n","mask_dataset/annotations/maksssksksss195.xml\n","mask_dataset/annotations/maksssksksss179.xml\n","mask_dataset/annotations/maksssksksss198.xml\n","mask_dataset/annotations/maksssksksss184.xml\n","mask_dataset/annotations/maksssksksss178.xml\n","mask_dataset/annotations/maksssksksss181.xml\n","mask_dataset/annotations/maksssksksss175.xml\n","mask_dataset/annotations/maksssksksss186.xml\n","mask_dataset/annotations/maksssksksss196.xml\n","mask_dataset/annotations/maksssksksss174.xml\n","mask_dataset/annotations/maksssksksss189.xml\n","mask_dataset/annotations/maksssksksss177.xml\n","mask_dataset/annotations/maksssksksss187.xml\n","mask_dataset/annotations/maksssksksss18.xml\n","mask_dataset/annotations/maksssksksss232.xml\n","mask_dataset/annotations/maksssksksss200.xml\n","mask_dataset/annotations/maksssksksss21.xml\n","mask_dataset/annotations/maksssksksss220.xml\n","mask_dataset/annotations/maksssksksss225.xml\n","mask_dataset/annotations/maksssksksss204.xml\n","mask_dataset/annotations/maksssksksss199.xml\n","mask_dataset/annotations/maksssksksss23.xml\n","mask_dataset/annotations/maksssksksss209.xml\n","mask_dataset/annotations/maksssksksss205.xml\n","mask_dataset/annotations/maksssksksss211.xml\n","mask_dataset/annotations/maksssksksss231.xml\n","mask_dataset/annotations/maksssksksss230.xml\n","mask_dataset/annotations/maksssksksss215.xml\n","mask_dataset/annotations/maksssksksss203.xml\n","mask_dataset/annotations/maksssksksss208.xml\n","mask_dataset/annotations/maksssksksss217.xml\n","mask_dataset/annotations/maksssksksss22.xml\n","mask_dataset/annotations/maksssksksss218.xml\n"]}]},{"cell_type":"markdown","source":["# Nowa sekcja"],"metadata":{"id":"RyBEbWZlHMJg"}},{"cell_type":"code","source":["import os\n","\n","_, _, files = next(os.walk(\"mask_dataset/annotations\"))\n","file_count = len(files)"],"metadata":{"id":"Y1TjRzjcKxaU","executionInfo":{"status":"ok","timestamp":1675102291297,"user_tz":-60,"elapsed":532,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}}},"execution_count":8,"outputs":[]},{"cell_type":"code","source":["file_count"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OHCRkozLK4PR","executionInfo":{"status":"ok","timestamp":1675102292520,"user_tz":-60,"elapsed":7,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}},"outputId":"d4218f85-2b68-4b41-b6c4-c67b2a8c07f8"},"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["853"]},"metadata":{},"execution_count":9}]},{"cell_type":"code","source":["_, _, files2 = next(os.walk(\"mask_dataset/labels\"))\n","file_count = len(files2)"],"metadata":{"id":"3oSH07p1K6Td","executionInfo":{"status":"ok","timestamp":1675102281750,"user_tz":-60,"elapsed":6013,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["file_count"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"29xMvMMSK_JW","executionInfo":{"status":"ok","timestamp":1675102281751,"user_tz":-60,"elapsed":8,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}},"outputId":"827b366b-11f2-40dd-9d21-d58f008732d9"},"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["853"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":["_, _, files = next(os.walk(\"mask_dataset/images\"))\n","file_count = len(files)"],"metadata":{"id":"T850UYjhLFIC","executionInfo":{"status":"ok","timestamp":1675102285517,"user_tz":-60,"elapsed":820,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}}},"execution_count":6,"outputs":[]},{"cell_type":"code","source":["file_count"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"To0EfxMILHrT","executionInfo":{"status":"ok","timestamp":1675102287768,"user_tz":-60,"elapsed":5,"user":{"displayName":"marti Xooo","userId":"17000102553335328898"}},"outputId":"17e46df3-1298-4c73-9ac1-7b19f88f5354"},"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["853"]},"metadata":{},"execution_count":7}]}]}