from ultralytics import YOLO from flask import request, Flask, jsonify from waitress import serve from PIL import Image import onnxruntime as ort import numpy as np #my changes import os script_dir = os.path.dirname(os.path.abspath(__file__)) # Change the working directory to the script's directory os.chdir(script_dir) yolo_classes = ["b_fully_ripened", "b_half_ripened", "b_green", "l_fully_ripened", "l_half_ripened", "l_green" ] #app start app = Flask(__name__) @app.route("/") def root(): """ Site main page handler function. :return: Content of index.html file """ with open("index.html") as file: return file.read() @app.route("/detect", methods=["POST"]) def detect(): """ Handler of /detect POST endpoint Receives uploaded file with a name "image_file", passes it through YOLOv8 object detection network and returns and array of bounding boxes. :return: a JSON array of objects bounding boxes in format [[x1,y1,x2,y2,object_type,probability],..] """ buf = request.files["image_file"] boxes = detect_objects_on_image(buf.stream) print(boxes) return jsonify(boxes) def detect_objects_on_image(buf): input, img_width, img_height = prepare_input(buf) output = run_model(input) return process_output(output,img_width,img_height) def prepare_input(buf): img = Image.open(buf) img_width, img_height = img.size img = img.resize((640, 640)) img = img.convert("RGB") input = np.array(img) input = input.transpose(2, 0, 1) input = input.reshape(1, 3, 640, 640) / 255.0 return input.astype(np.float32), img_width, img_height def run_model(input): model = ort.InferenceSession("best.onnx", providers=['CPUExecutionProvider']) outputs = model.run(["output0"], {"images":input}) return outputs[0] def process_output(output, img_width, img_height): output = output[0].astype(float) output = output.transpose() boxes = [] for row in output: prob = row[4:].max() if prob < 0.5: continue class_id = row[4:].argmax() label = yolo_classes[class_id] xc, yc, w, h = row[:4] x1 = (xc - w/2) / 640 * img_width y1 = (yc - h/2) / 640 * img_height x2 = (xc + w/2) / 640 * img_width y2 = (yc + h/2) / 640 * img_height rotated_x1 = img_height - y2 rotated_y1 = x1 rotated_x2 = img_height - y1 rotated_y2 = x2 boxes.append([rotated_x1, rotated_y1, rotated_x2, rotated_y2, label, prob]) #boxes.append([x1, y1, x2, y2, label, prob]) boxes.sort(key=lambda x: x[5], reverse=True) result = [] while len(boxes) > 0: result.append(boxes[0]) boxes = [box for box in boxes if iou(box, boxes[0]) < 0.7] return result def iou(box1,box2): return intersection(box1,box2)/union(box1,box2) def union(box1,box2): box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4] box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4] box1_area = (box1_x2-box1_x1)*(box1_y2-box1_y1) box2_area = (box2_x2-box2_x1)*(box2_y2-box2_y1) return box1_area + box2_area - intersection(box1,box2) def intersection(box1,box2): box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4] box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4] x1 = max(box1_x1,box2_x1) y1 = max(box1_y1,box2_y1) x2 = min(box1_x2,box2_x2) y2 = min(box1_y2,box2_y2) return (x2-x1)*(y2-y1) """ def detect_objects_on_image(buf): """ """" Function receives an image, passes it through YOLOv8 neural network and returns an array of detected objects and their bounding boxes :param buf: Input image file stream :return: Array of bounding boxes in format [[x1,y1,x2,y2,object_type,probability],..] """ """"" model = YOLO("best.pt") results = model.predict(Image.open(buf)) result = results[0] output = [] for box in result.boxes: x1, y1, x2, y2 = [ round(x) for x in box.xyxy[0].tolist() ] class_id = box.cls[0].item() prob = round(box.conf[0].item(), 2) output.append([ x1, y1, x2, y2, result.names[class_id], prob ]) return output """ serve(app, host='0.0.0.0', port=8080)