test bbox

This commit is contained in:
AshLynxu 2025-01-23 02:15:48 +02:00
parent 495c75a86d
commit 2434635399
3 changed files with 69 additions and 26 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
/dataset/cats
/dataset/cats.zip
/dataset/cats.zip
/output

View File

@ -563,7 +563,7 @@
},
{
"cell_type": "code",
"execution_count": 182,
"execution_count": 234,
"metadata": {},
"outputs": [
{
@ -571,8 +571,8 @@
"output_type": "stream",
"text": [
"\n",
"0: 640x384 (no detections), 40.5ms\n",
"Speed: 21.0ms preprocess, 40.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n"
"0: 640x384 (no detections), 23.0ms\n",
"Speed: 23.6ms preprocess, 23.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)\n"
]
}
],
@ -583,51 +583,93 @@
},
{
"cell_type": "code",
"execution_count": 196,
"execution_count": 241,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n ",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[196], line 7\u001b[0m\n\u001b[0;32m 5\u001b[0m probs \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mprobs \u001b[38;5;66;03m# Probs object for classification outputs\u001b[39;00m\n\u001b[0;32m 6\u001b[0m obb \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mobb \u001b[38;5;66;03m# Oriented boxes object for OBB outputs\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m \u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mascontiguousarray\u001b[49m() \u001b[38;5;66;03m# display to screen\u001b[39;00m\n\u001b[0;32m 8\u001b[0m result\u001b[38;5;241m.\u001b[39msave(filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresult.jpg\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# save to disk\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\danie\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ultralytics\\utils\\__init__.py:235\u001b[0m, in \u001b[0;36mSimpleClass.__getattr__\u001b[1;34m(self, attr)\u001b[0m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Custom attribute access error message with helpful information.\"\"\"\u001b[39;00m\n\u001b[0;32m 234\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m--> 235\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. See valid attributes below.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mAttributeError\u001b[0m: 'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n "
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"image 1/1 e:\\Facultate\\Master\\Anul 1\\CV\\Project\\dataset\\1818949000-IMG-20240118-WA0001.jpg: 640x384 1 cat, 1 chair, 29.5ms\n",
"Speed: 5.0ms preprocess, 29.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n"
]
}
],
"source": [
"for result in pred:\n",
" boxes = result.boxes # Boxes object for bounding box outputs\n",
" masks = result.masks # Masks object for segmentation masks outputs\n",
" keypoints = result.keypoints # Keypoints object for pose outputs\n",
" probs = result.probs # Probs object for classification outputs\n",
" obb = result.obb # Oriented boxes object for OBB outputs\n",
" result.show() # display to screen\n",
" result.save(filename=\"result.jpg\") # save to disk"
"results = model.predict(source=\"dataset/1818949000-IMG-20240118-WA0001.jpg\", save=False)"
]
},
{
"cell_type": "code",
"execution_count": 194,
"execution_count": 242,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Class: cat, Confidence: 0.79, Box: 1.4070484638214111, 389.7481994628906, 766.0083618164062, 1191.5501708984375\n",
"Class: chair, Confidence: 0.27, Box: 0.0, 255.45545959472656, 884.272216796875, 1599.0\n"
]
}
],
"source": [
"for result in results:\n",
" boxes = result.boxes # Bounding box information\n",
"\n",
" for box in boxes:\n",
" # Box coordinates\n",
" x_min, y_min, x_max, y_max = box.xyxy[0] # Format: [x_min, y_min, x_max, y_max]\n",
"\n",
" # Confidence score\n",
" confidence = box.conf[0]\n",
"\n",
" # Class ID or name\n",
" class_id = box.cls[0]\n",
" class_name = model.names[int(class_id)] # Convert class ID to class name\n",
"\n",
" print(f\"Class: {class_name}, Confidence: {confidence:.2f}, Box: {x_min}, {y_min}, {x_max}, {y_max}\")"
]
},
{
"cell_type": "code",
"execution_count": 251,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([], device='cuda:0', size=(0, 4))"
"True"
]
},
"execution_count": 194,
"execution_count": 251,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred[0].boxes.xyxy"
"image = cv2.imread(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")\n",
"for result in results:\n",
" boxes = result.boxes\n",
"\n",
" for box in boxes:\n",
" x_min, y_min, x_max, y_max = map(int, box.xyxy[0])\n",
" class_id = int(box.cls[0])\n",
" class_name = model.names[class_id]\n",
" confidence = box.conf[0]\n",
"\n",
" if confidence > 0.7:\n",
"\n",
" # Format the label with class name and confidence\n",
" label = f\"{class_name} {confidence:.2f}\"\n",
"\n",
" # Draw the bounding box\n",
" cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)\n",
" cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)\n",
"\n",
"# Save or display the image\n",
"cv2.imwrite(\"output/output.jpg\", image)"
]
},
{

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 KiB