test bbox

2025-01-23 02:15:48 +02:00 · 2025-01-23 02:15:48 +02:00 · 2434635399
commit 2434635399
parent 495c75a86d
3 changed files with 69 additions and 26 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 /dataset/cats
-/dataset/cats.zip
+/dataset/cats.zip
+/output
--- a/cat_detect.ipynb
+++ b/cat_detect.ipynb
@ -563,7 +563,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 182,
+   "execution_count": 234,
   "metadata": {},
   "outputs": [
    {
@ -571,8 +571,8 @@
     "output_type": "stream",
     "text": [
      "\n",
-      "0: 640x384 (no detections), 40.5ms\n",
-      "Speed: 21.0ms preprocess, 40.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n"
+      "0: 640x384 (no detections), 23.0ms\n",
+      "Speed: 23.6ms preprocess, 23.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)\n"
     ]
    }
   ],
@ -583,51 +583,93 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 196,
+   "execution_count": 241,
   "metadata": {},
   "outputs": [
    {
-     "ename": "AttributeError",
-     "evalue": "'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n    A class for storing and manipulating inference results.\n\n    This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n    and classification results from YOLO models.\n\n    Attributes:\n        orig_img (numpy.ndarray): Original image as a numpy array.\n        orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n        boxes (Boxes | None): Object containing detection bounding boxes.\n        masks (Masks | None): Object containing detection masks.\n        probs (Probs | None): Object containing class probabilities for classification tasks.\n        keypoints (Keypoints | None): Object containing detected keypoints for each object.\n        obb (OBB | None): Object containing oriented bounding boxes.\n        speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n        names (Dict[int, str]): Dictionary mapping class IDs to class names.\n        path (str): Path to the image file.\n        _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n    Methods:\n        update: Updates object attributes with new detection results.\n        cpu: Returns a copy of the Results object with all tensors on CPU memory.\n        numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n        cuda: Returns a copy of the Results object with all tensors on GPU memory.\n        to: Returns a copy of the Results object with tensors on a specified device and dtype.\n        new: Returns a new Results object with the same image, path, and names.\n        plot: Plots detection results on an input image, returning an annotated image.\n        show: Shows annotated results on screen.\n        save: Saves annotated results to file.\n        verbose: Returns a log string for each task, detailing detections and classifications.\n        save_txt: Saves detection results to a text file.\n        save_crop: Saves cropped detection images.\n        tojson: Converts detection results to JSON format.\n\n    Examples:\n        >>> results = model(\"path/to/image.jpg\")\n        >>> for result in results:\n        ...     print(result.boxes)  # Print detection boxes\n        ...     result.show()  # Display the annotated image\n        ...     result.save(filename=\"result.jpg\")  # Save annotated image\n    ",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[196], line 7\u001b[0m\n\u001b[0;32m      5\u001b[0m probs \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mprobs  \u001b[38;5;66;03m# Probs object for classification outputs\u001b[39;00m\n\u001b[0;32m      6\u001b[0m obb \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mobb  \u001b[38;5;66;03m# Oriented boxes object for OBB outputs\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m \u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mascontiguousarray\u001b[49m()  \u001b[38;5;66;03m# display to screen\u001b[39;00m\n\u001b[0;32m      8\u001b[0m result\u001b[38;5;241m.\u001b[39msave(filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresult.jpg\u001b[39m\u001b[38;5;124m\"\u001b[39m)  \u001b[38;5;66;03m# save to disk\u001b[39;00m\n",
-      "File \u001b[1;32mc:\\Users\\danie\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ultralytics\\utils\\__init__.py:235\u001b[0m, in \u001b[0;36mSimpleClass.__getattr__\u001b[1;34m(self, attr)\u001b[0m\n\u001b[0;32m    233\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Custom attribute access error message with helpful information.\"\"\"\u001b[39;00m\n\u001b[0;32m    234\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m--> 235\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. See valid attributes below.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[1;31mAttributeError\u001b[0m: 'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n    A class for storing and manipulating inference results.\n\n    This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n    and classification results from YOLO models.\n\n    Attributes:\n        orig_img (numpy.ndarray): Original image as a numpy array.\n        orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n        boxes (Boxes | None): Object containing detection bounding boxes.\n        masks (Masks | None): Object containing detection masks.\n        probs (Probs | None): Object containing class probabilities for classification tasks.\n        keypoints (Keypoints | None): Object containing detected keypoints for each object.\n        obb (OBB | None): Object containing oriented bounding boxes.\n        speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n        names (Dict[int, str]): Dictionary mapping class IDs to class names.\n        path (str): Path to the image file.\n        _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n    Methods:\n        update: Updates object attributes with new detection results.\n        cpu: Returns a copy of the Results object with all tensors on CPU memory.\n        numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n        cuda: Returns a copy of the Results object with all tensors on GPU memory.\n        to: Returns a copy of the Results object with tensors on a specified device and dtype.\n        new: Returns a new Results object with the same image, path, and names.\n        plot: Plots detection results on an input image, returning an annotated image.\n        show: Shows annotated results on screen.\n        save: Saves annotated results to file.\n        verbose: Returns a log string for each task, detailing detections and classifications.\n        save_txt: Saves detection results to a text file.\n        save_crop: Saves cropped detection images.\n        tojson: Converts detection results to JSON format.\n\n    Examples:\n        >>> results = model(\"path/to/image.jpg\")\n        >>> for result in results:\n        ...     print(result.boxes)  # Print detection boxes\n        ...     result.show()  # Display the annotated image\n        ...     result.save(filename=\"result.jpg\")  # Save annotated image\n    "
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "image 1/1 e:\\Facultate\\Master\\Anul 1\\CV\\Project\\dataset\\1818949000-IMG-20240118-WA0001.jpg: 640x384 1 cat, 1 chair, 29.5ms\n",
+      "Speed: 5.0ms preprocess, 29.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n"
     ]
    }
   ],
   "source": [
-    "for result in pred:\n",
-    "    boxes = result.boxes  # Boxes object for bounding box outputs\n",
-    "    masks = result.masks  # Masks object for segmentation masks outputs\n",
-    "    keypoints = result.keypoints  # Keypoints object for pose outputs\n",
-    "    probs = result.probs  # Probs object for classification outputs\n",
-    "    obb = result.obb  # Oriented boxes object for OBB outputs\n",
-    "    result.show()  # display to screen\n",
-    "    result.save(filename=\"result.jpg\")  # save to disk"
+    "results = model.predict(source=\"dataset/1818949000-IMG-20240118-WA0001.jpg\", save=False)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 194,
+   "execution_count": 242,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Class: cat, Confidence: 0.79, Box: 1.4070484638214111, 389.7481994628906, 766.0083618164062, 1191.5501708984375\n",
+      "Class: chair, Confidence: 0.27, Box: 0.0, 255.45545959472656, 884.272216796875, 1599.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "for result in results:\n",
+    "    boxes = result.boxes  # Bounding box information\n",
+    "\n",
+    "    for box in boxes:\n",
+    "        # Box coordinates\n",
+    "        x_min, y_min, x_max, y_max = box.xyxy[0]  # Format: [x_min, y_min, x_max, y_max]\n",
+    "\n",
+    "        # Confidence score\n",
+    "        confidence = box.conf[0]\n",
+    "\n",
+    "        # Class ID or name\n",
+    "        class_id = box.cls[0]\n",
+    "        class_name = model.names[int(class_id)]  # Convert class ID to class name\n",
+    "\n",
+    "        print(f\"Class: {class_name}, Confidence: {confidence:.2f}, Box: {x_min}, {y_min}, {x_max}, {y_max}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 251,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "tensor([], device='cuda:0', size=(0, 4))"
+       "True"
      ]
     },
-     "execution_count": 194,
+     "execution_count": 251,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "pred[0].boxes.xyxy"
+    "image = cv2.imread(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")\n",
+    "for result in results:\n",
+    "    boxes = result.boxes\n",
+    "\n",
+    "    for box in boxes:\n",
+    "        x_min, y_min, x_max, y_max = map(int, box.xyxy[0])\n",
+    "        class_id = int(box.cls[0])\n",
+    "        class_name = model.names[class_id]\n",
+    "        confidence = box.conf[0]\n",
+    "\n",
+    "        if confidence > 0.7:\n",
+    "\n",
+    "            # Format the label with class name and confidence\n",
+    "            label = f\"{class_name} {confidence:.2f}\"\n",
+    "\n",
+    "            # Draw the bounding box\n",
+    "            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)\n",
+    "            cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)\n",
+    "\n",
+    "# Save or display the image\n",
+    "cv2.imwrite(\"output/output.jpg\", image)"
   ]
  },
  {
--- a/output.jpg
+++ b/output.jpg