diff --git a/.gitignore b/.gitignore index 0bf06dc..958d94a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /dataset/cats -/dataset/cats.zip \ No newline at end of file +/dataset/cats.zip +/output \ No newline at end of file diff --git a/cat_detect.ipynb b/cat_detect.ipynb index 0d540a7..c246f9e 100644 --- a/cat_detect.ipynb +++ b/cat_detect.ipynb @@ -563,7 +563,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 234, "metadata": {}, "outputs": [ { @@ -571,8 +571,8 @@ "output_type": "stream", "text": [ "\n", - "0: 640x384 (no detections), 40.5ms\n", - "Speed: 21.0ms preprocess, 40.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n" + "0: 640x384 (no detections), 23.0ms\n", + "Speed: 23.6ms preprocess, 23.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)\n" ] } ], @@ -583,51 +583,93 @@ }, { "cell_type": "code", - "execution_count": 196, + "execution_count": 241, "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n ", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[196], line 7\u001b[0m\n\u001b[0;32m 5\u001b[0m probs \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mprobs \u001b[38;5;66;03m# Probs object for classification outputs\u001b[39;00m\n\u001b[0;32m 6\u001b[0m obb \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mobb \u001b[38;5;66;03m# Oriented boxes object for OBB outputs\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m \u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mascontiguousarray\u001b[49m() \u001b[38;5;66;03m# display to screen\u001b[39;00m\n\u001b[0;32m 8\u001b[0m result\u001b[38;5;241m.\u001b[39msave(filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresult.jpg\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# save to disk\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\danie\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ultralytics\\utils\\__init__.py:235\u001b[0m, in \u001b[0;36mSimpleClass.__getattr__\u001b[1;34m(self, attr)\u001b[0m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Custom attribute access error message with helpful information.\"\"\"\u001b[39;00m\n\u001b[0;32m 234\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m--> 235\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. See valid attributes below.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[1;31mAttributeError\u001b[0m: 'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n " + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "image 1/1 e:\\Facultate\\Master\\Anul 1\\CV\\Project\\dataset\\1818949000-IMG-20240118-WA0001.jpg: 640x384 1 cat, 1 chair, 29.5ms\n", + "Speed: 5.0ms preprocess, 29.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n" ] } ], "source": [ - "for result in pred:\n", - " boxes = result.boxes # Boxes object for bounding box outputs\n", - " masks = result.masks # Masks object for segmentation masks outputs\n", - " keypoints = result.keypoints # Keypoints object for pose outputs\n", - " probs = result.probs # Probs object for classification outputs\n", - " obb = result.obb # Oriented boxes object for OBB outputs\n", - " result.show() # display to screen\n", - " result.save(filename=\"result.jpg\") # save to disk" + "results = model.predict(source=\"dataset/1818949000-IMG-20240118-WA0001.jpg\", save=False)" ] }, { "cell_type": "code", - "execution_count": 194, + "execution_count": 242, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Class: cat, Confidence: 0.79, Box: 1.4070484638214111, 389.7481994628906, 766.0083618164062, 1191.5501708984375\n", + "Class: chair, Confidence: 0.27, Box: 0.0, 255.45545959472656, 884.272216796875, 1599.0\n" + ] + } + ], + "source": [ + "for result in results:\n", + " boxes = result.boxes # Bounding box information\n", + "\n", + " for box in boxes:\n", + " # Box coordinates\n", + " x_min, y_min, x_max, y_max = box.xyxy[0] # Format: [x_min, y_min, x_max, y_max]\n", + "\n", + " # Confidence score\n", + " confidence = box.conf[0]\n", + "\n", + " # Class ID or name\n", + " class_id = box.cls[0]\n", + " class_name = model.names[int(class_id)] # Convert class ID to class name\n", + "\n", + " print(f\"Class: {class_name}, Confidence: {confidence:.2f}, Box: {x_min}, {y_min}, {x_max}, {y_max}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 251, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([], device='cuda:0', size=(0, 4))" + "True" ] }, - "execution_count": 194, + "execution_count": 251, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pred[0].boxes.xyxy" + "image = cv2.imread(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")\n", + "for result in results:\n", + " boxes = result.boxes\n", + "\n", + " for box in boxes:\n", + " x_min, y_min, x_max, y_max = map(int, box.xyxy[0])\n", + " class_id = int(box.cls[0])\n", + " class_name = model.names[class_id]\n", + " confidence = box.conf[0]\n", + "\n", + " if confidence > 0.7:\n", + "\n", + " # Format the label with class name and confidence\n", + " label = f\"{class_name} {confidence:.2f}\"\n", + "\n", + " # Draw the bounding box\n", + " cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)\n", + " cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)\n", + "\n", + "# Save or display the image\n", + "cv2.imwrite(\"output/output.jpg\", image)" ] }, { diff --git a/output.jpg b/output.jpg deleted file mode 100644 index 4ec0f0b..0000000 Binary files a/output.jpg and /dev/null differ