diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0bf06dc --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/dataset/cats +/dataset/cats.zip \ No newline at end of file diff --git a/cat_detect.ipynb b/cat_detect.ipynb index 262ff6a..0d540a7 100644 --- a/cat_detect.ipynb +++ b/cat_detect.ipynb @@ -58,19 +58,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating new Ultralytics Settings v0.0.6 file \n", - "View Ultralytics Settings with 'yolo settings' or at 'C:\\Users\\danie\\AppData\\Roaming\\Ultralytics\\settings.json'\n", - "Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import cv2\n", @@ -85,19 +75,559 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "YOLO\n" + "Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 6.25M/6.25M [00:00<00:00, 10.3MB/s]\n" ] } ], "source": [ - "print(\"YOLO\")" + "model = YOLO(\"yolov8n.pt\").to('cuda')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cuda:0\n" + ] + } + ], + "source": [ + "print(model.device)" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [], + "source": [ + "im = Image.open(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [], + "source": [ + "im = transforms.ToTensor()(im)\n", + "# im = im.numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "im = im.reshape(-1, im.shape[0], im.shape[1], im.shape[2])\n", + "\n", + "# apply resize to image 3, 640, 640\n" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1599, 899, 3)" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "im.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "def resize_and_pad_image(im, stride=32):\n", + " # Get original dimensions\n", + " _, _, h, w = im.shape\n", + "\n", + " # Calculate the nearest divisible dimensions\n", + " new_h = int(np.ceil(h / stride) * stride)\n", + " new_w = int(np.ceil(w / stride) * stride)\n", + "\n", + " # Resize the image while maintaining aspect ratio\n", + " resize_transform = transforms.Compose([\n", + " transforms.ToPILImage(),\n", + " transforms.Resize((new_h, new_w)), # Resize to divisible dimensions\n", + " transforms.ToTensor()\n", + " ])\n", + "\n", + " # Apply transform to the tensor\n", + " im_resized = resize_transform(im.squeeze(0)) # Remove batch dimension for processing\n", + "\n", + " # Add batch dimension back\n", + " im_resized = im_resized.unsqueeze(0)\n", + " return im_resized" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "im = resize_and_pad_image(im)" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [], + "source": [ + "im = im.numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1599, 899, 3)" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "im.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [], + "source": [ + "im = im.transpose(1,2,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "YOLO(\n", + " (model): DetectionModel(\n", + " (model): Sequential(\n", + " (0): Conv(\n", + " (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (1): Conv(\n", + " (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (2): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0): Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (3): Conv(\n", + " (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (4): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0-1): 2 x Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (5): Conv(\n", + " (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (6): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0-1): 2 x Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (7): Conv(\n", + " (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (8): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0): Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (9): SPPF(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): MaxPool2d(kernel_size=5, stride=1, padding=2, dilation=1, ceil_mode=False)\n", + " )\n", + " (10): Upsample(scale_factor=2.0, mode='nearest')\n", + " (11): Concat()\n", + " (12): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0): Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (13): Upsample(scale_factor=2.0, mode='nearest')\n", + " (14): Concat()\n", + " (15): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0): Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (16): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (17): Concat()\n", + " (18): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0): Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (19): Conv(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (20): Concat()\n", + " (21): C2f(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (m): ModuleList(\n", + " (0): Bottleneck(\n", + " (cv1): Conv(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (cv2): Conv(\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (22): Detect(\n", + " (cv2): ModuleList(\n", + " (0): Sequential(\n", + " (0): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (1): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n", + " )\n", + " (1): Sequential(\n", + " (0): Conv(\n", + " (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (1): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n", + " )\n", + " (2): Sequential(\n", + " (0): Conv(\n", + " (conv): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (1): Conv(\n", + " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n", + " )\n", + " )\n", + " (cv3): ModuleList(\n", + " (0): Sequential(\n", + " (0): Conv(\n", + " (conv): Conv2d(64, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (1): Conv(\n", + " (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n", + " )\n", + " (1): Sequential(\n", + " (0): Conv(\n", + " (conv): Conv2d(128, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (1): Conv(\n", + " (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n", + " )\n", + " (2): Sequential(\n", + " (0): Conv(\n", + " (conv): Conv2d(256, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (1): Conv(\n", + " (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (act): SiLU(inplace=True)\n", + " )\n", + " (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n", + " )\n", + " )\n", + " (dfl): DFL(\n", + " (conv): Conv2d(16, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + ")" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "0: 640x384 (no detections), 40.5ms\n", + "Speed: 21.0ms preprocess, 40.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n" + ] + } + ], + "source": [ + "with torch.no_grad():\n", + " pred = model(im)" + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n ", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[196], line 7\u001b[0m\n\u001b[0;32m 5\u001b[0m probs \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mprobs \u001b[38;5;66;03m# Probs object for classification outputs\u001b[39;00m\n\u001b[0;32m 6\u001b[0m obb \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mobb \u001b[38;5;66;03m# Oriented boxes object for OBB outputs\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m \u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mascontiguousarray\u001b[49m() \u001b[38;5;66;03m# display to screen\u001b[39;00m\n\u001b[0;32m 8\u001b[0m result\u001b[38;5;241m.\u001b[39msave(filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresult.jpg\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# save to disk\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\danie\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ultralytics\\utils\\__init__.py:235\u001b[0m, in \u001b[0;36mSimpleClass.__getattr__\u001b[1;34m(self, attr)\u001b[0m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Custom attribute access error message with helpful information.\"\"\"\u001b[39;00m\n\u001b[0;32m 234\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m--> 235\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. See valid attributes below.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[1;31mAttributeError\u001b[0m: 'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n " + ] + } + ], + "source": [ + "for result in pred:\n", + " boxes = result.boxes # Boxes object for bounding box outputs\n", + " masks = result.masks # Masks object for segmentation masks outputs\n", + " keypoints = result.keypoints # Keypoints object for pose outputs\n", + " probs = result.probs # Probs object for classification outputs\n", + " obb = result.obb # Oriented boxes object for OBB outputs\n", + " result.show() # display to screen\n", + " result.save(filename=\"result.jpg\") # save to disk" + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([], device='cuda:0', size=(0, 4))" + ] + }, + "execution_count": 194, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pred[0].boxes.xyxy" ] }, { diff --git a/dataset/1818949000-IMG-20240118-WA0001.jpg b/dataset/1818949000-IMG-20240118-WA0001.jpg new file mode 100644 index 0000000..85fa3f9 Binary files /dev/null and b/dataset/1818949000-IMG-20240118-WA0001.jpg differ diff --git a/dataset_download.py b/dataset_download.py new file mode 100644 index 0000000..e69de29 diff --git a/output.jpg b/output.jpg new file mode 100644 index 0000000..4ec0f0b Binary files /dev/null and b/output.jpg differ diff --git a/yolov8n.pt b/yolov8n.pt new file mode 100644 index 0000000..0db4ca4 Binary files /dev/null and b/yolov8n.pt differ